mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-22 04:36:32 +00:00
Compare commits
799 Commits
v1.12.0-rc
...
lazy-word-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6a5a834f27 | ||
|
|
418fa47963 | ||
|
|
0656a0d515 | ||
|
|
e36a8c50b9 | ||
|
|
08ff135ad6 | ||
|
|
f729864466 | ||
|
|
94ea263bef | ||
|
|
0e475cb5e6 | ||
|
|
62de70b73c | ||
|
|
7707fb18dd | ||
|
|
bb2e9419d3 | ||
|
|
cf68713145 | ||
|
|
811143cbe9 | ||
|
|
c670e9a39b | ||
|
|
65f1b13475 | ||
|
|
db7ce03763 | ||
|
|
7ed9adde29 | ||
|
|
9ce7ccfbe7 | ||
|
|
3deb1ef78f | ||
|
|
5820d822c8 | ||
|
|
637bea0370 | ||
|
|
fd079c6757 | ||
|
|
182e5d5632 | ||
|
|
82aee6a9af | ||
|
|
fca947219f | ||
|
|
fb7ae9f97f | ||
|
|
cd421fea1e | ||
|
|
1ad4235beb | ||
|
|
de6c7e551e | ||
|
|
c0fe70c5f0 | ||
|
|
a09d08c7b6 | ||
|
|
2e6aa63efc | ||
|
|
f9807ba32e | ||
|
|
8c8cc59a6c | ||
|
|
f540a69ac3 | ||
|
|
7df2bdfb15 | ||
|
|
71f7456748 | ||
|
|
c98b313d03 | ||
|
|
69678ed8e1 | ||
|
|
bf144a94d8 | ||
|
|
b0b1888ef9 | ||
|
|
6ec1d2b712 | ||
|
|
cbdf80893d | ||
|
|
e2156ddfc7 | ||
|
|
49dd50dab2 | ||
|
|
13a88d6131 | ||
|
|
d9875b782d | ||
|
|
cb16baab18 | ||
|
|
2500e3c067 | ||
|
|
d3e4b2dfe7 | ||
|
|
2a46624e19 | ||
|
|
009c36a4d0 | ||
|
|
2a47e25e6d | ||
|
|
82912e191b | ||
|
|
e2d372823a | ||
|
|
1876132172 | ||
|
|
d0b0b90d17 | ||
|
|
b08544e86d | ||
|
|
d9111fe8ce | ||
|
|
41d8161017 | ||
|
|
7df5715d39 | ||
|
|
5fe02ab5e0 | ||
|
|
5ef7767429 | ||
|
|
3fad48167b | ||
|
|
a92a48b9b9 | ||
|
|
d53225bf64 | ||
|
|
20896500c2 | ||
|
|
1af520077c | ||
|
|
7e07cb9de1 | ||
|
|
a12b06d99d | ||
|
|
331dc3d241 | ||
|
|
ef9d9f8481 | ||
|
|
d3d22d8ed4 | ||
|
|
5e6abcf50c | ||
|
|
a4aaf932ba | ||
|
|
16c962eb30 | ||
|
|
55ca2c4481 | ||
|
|
fedb444e66 | ||
|
|
bef5954741 | ||
|
|
ff8cf38d6b | ||
|
|
f8ac575ec5 | ||
|
|
566b4efb06 | ||
|
|
1d499ed9b2 | ||
|
|
3bc62f0549 | ||
|
|
21bbbdec76 | ||
|
|
78ebd8dba2 | ||
|
|
34df44a002 | ||
|
|
48a27f669e | ||
|
|
e2d0ce52ba | ||
|
|
995f8962bd | ||
|
|
1cd00f37c0 | ||
|
|
1aa3375e12 | ||
|
|
60ff1b19a8 | ||
|
|
7df5e3f059 | ||
|
|
0197dc87e0 | ||
|
|
7a172b82ca | ||
|
|
eb3ff325d1 | ||
|
|
d3cd5ea689 | ||
|
|
3ed43f9097 | ||
|
|
a2a86ef4e2 | ||
|
|
d500c7f625 | ||
|
|
ea7e299663 | ||
|
|
a370b467fe | ||
|
|
8790880589 | ||
|
|
7072fe9780 | ||
|
|
d0dda78f3d | ||
|
|
fa8afc5cfd | ||
|
|
6d52c6e711 | ||
|
|
dfb8411647 | ||
|
|
6269f757ff | ||
|
|
40c5f911fd | ||
|
|
abef655849 | ||
|
|
b12ffd1356 | ||
|
|
c9a4c6ed96 | ||
|
|
aa32b719c7 | ||
|
|
41d2b1e52b | ||
|
|
54ee81bb09 | ||
|
|
689e69d6d2 | ||
|
|
9d9e0d4c54 | ||
|
|
19c9caed39 | ||
|
|
21c3b3957e | ||
|
|
f292fc9ac0 | ||
|
|
1d3c4642a6 | ||
|
|
9a282be0a2 | ||
|
|
bea28968a0 | ||
|
|
ed1dcbe0f7 | ||
|
|
5ceddbda84 | ||
|
|
ca41ce3bbd | ||
|
|
8ec0c322ea | ||
|
|
b88aa9cc76 | ||
|
|
3fd86e8d76 | ||
|
|
67f7470c83 | ||
|
|
4fab72cbea | ||
|
|
afb4b9677f | ||
|
|
73d2dbd60f | ||
|
|
57a6beee30 | ||
|
|
b190b612a3 | ||
|
|
111e77eff2 | ||
|
|
ba30747de3 | ||
|
|
25f0536f5a | ||
|
|
c8c0951c43 | ||
|
|
63e753bde0 | ||
|
|
5fa4b5c50a | ||
|
|
a7a62e5e4c | ||
|
|
683a2ac685 | ||
|
|
e751342dfb | ||
|
|
17bf82235d | ||
|
|
0401c4e511 | ||
|
|
4798c35c50 | ||
|
|
9585950e0e | ||
|
|
b8c6eb5453 | ||
|
|
02586e727e | ||
|
|
0cfc9261ba | ||
|
|
035674d56e | ||
|
|
d35470e29b | ||
|
|
23e07f1a93 | ||
|
|
f2a28a4dd7 | ||
|
|
1994494155 | ||
|
|
6dbec91d2b | ||
|
|
9a75dc6ab3 | ||
|
|
ae8d453868 | ||
|
|
95bccaf5f5 | ||
|
|
659855c88e | ||
|
|
286d310287 | ||
|
|
4f7ece2411 | ||
|
|
967033579d | ||
|
|
0200c65ebf | ||
|
|
c63c25a9a2 | ||
|
|
046bbea864 | ||
|
|
c5cb7d2f2c | ||
|
|
5e7f226ac9 | ||
|
|
754f254a00 | ||
|
|
39b5ad3c86 | ||
|
|
80adbb1bdc | ||
|
|
4b6fa1cf41 | ||
|
|
dc78d8e9c4 | ||
|
|
d4063c9dcd | ||
|
|
abebc574f6 | ||
|
|
f32ab67819 | ||
|
|
d25953f322 | ||
|
|
405bbd04c1 | ||
|
|
5d421abdc4 | ||
|
|
9f3663e768 | ||
|
|
d9642ec916 | ||
|
|
818e8b0237 | ||
|
|
4f77a7fba5 | ||
|
|
058f08dff5 | ||
|
|
9a6c1730aa | ||
|
|
91a8a97045 | ||
|
|
15788773af | ||
|
|
025b9b79bb | ||
|
|
1c60b17a37 | ||
|
|
3b2cd54b9d | ||
|
|
0833cb7d34 | ||
|
|
b0d4f9590f | ||
|
|
dfce20be21 | ||
|
|
24fe6cd205 | ||
|
|
e374b095a2 | ||
|
|
9f3e4801b1 | ||
|
|
b85180fedb | ||
|
|
3cdcc54a9e | ||
|
|
294cf39cad | ||
|
|
4a2643daa2 | ||
|
|
8d2d9066ba | ||
|
|
526476e168 | ||
|
|
ea7bae9a71 | ||
|
|
76fd5d92d7 | ||
|
|
245a55722a | ||
|
|
434fad5327 | ||
|
|
243a5fa6a8 | ||
|
|
9d314ace09 | ||
|
|
1b1172ad16 | ||
|
|
1d99c8465c | ||
|
|
05cc8c650c | ||
|
|
14e1459bf5 | ||
|
|
589bf30ec6 | ||
|
|
b367c71ad2 | ||
|
|
3ff1de0a21 | ||
|
|
1005a60fb8 | ||
|
|
e9add14189 | ||
|
|
4a058a080e | ||
|
|
11a11fc870 | ||
|
|
cd0dfa3f1b | ||
|
|
7b4ce468a6 | ||
|
|
11759c4be4 | ||
|
|
0f1aeb8eaa | ||
|
|
5e7803632d | ||
|
|
885710a07b | ||
|
|
c55fdad2c3 | ||
|
|
1caad4c4b0 | ||
|
|
8419ed52a1 | ||
|
|
a65c52cc97 | ||
|
|
49e9655c24 | ||
|
|
fa763ca5dc | ||
|
|
c7aeb554b2 | ||
|
|
88d9d47928 | ||
|
|
8e0d8d31f9 | ||
|
|
81a38099ec | ||
|
|
bd27fe7d02 | ||
|
|
41203f0931 | ||
|
|
803a699b15 | ||
|
|
246ad3b06e | ||
|
|
a21c440274 | ||
|
|
c01d26ffd7 | ||
|
|
225af069a9 | ||
|
|
70305b9f71 | ||
|
|
5dab435d13 | ||
|
|
c83c1a3c51 | ||
|
|
afc6c10a2a | ||
|
|
b83275c9c5 | ||
|
|
d7f35ee3ba | ||
|
|
1dce341bfb | ||
|
|
4876c1c8eb | ||
|
|
43c8d54501 | ||
|
|
84e2a1f836 | ||
|
|
00eb47d42e | ||
|
|
9293e7f2c1 | ||
|
|
80198aa855 | ||
|
|
fa00b42c93 | ||
|
|
6c9409edf8 | ||
|
|
acb06cb3e6 | ||
|
|
7d0d8f4445 | ||
|
|
491d115c3c | ||
|
|
55fa2dda00 | ||
|
|
c71eea8023 | ||
|
|
df40533741 | ||
|
|
4e819a6187 | ||
|
|
0c3e7fe963 | ||
|
|
45f843ccb9 | ||
|
|
35b6bca598 | ||
|
|
7f82d33597 | ||
|
|
f2185438ee | ||
|
|
8c5856007c | ||
|
|
ae1d7f4d9b | ||
|
|
792be63567 | ||
|
|
ca1ad51564 | ||
|
|
70aac71c63 | ||
|
|
a1d1e7c82a | ||
|
|
56438bdea4 | ||
|
|
a562d6abc1 | ||
|
|
33b67b82e1 | ||
|
|
b7fdd9516c | ||
|
|
5f2a1a4fd1 | ||
|
|
2b0e17ede0 | ||
|
|
37092adc71 | ||
|
|
86fcad788e | ||
|
|
2ea5c57871 | ||
|
|
7b4f2aa593 | ||
|
|
1fb96d3edb | ||
|
|
b63c64395d | ||
|
|
628119e31e | ||
|
|
78867b6852 | ||
|
|
b21b8e8f30 | ||
|
|
4a9e5ae215 | ||
|
|
6e1865b75b | ||
|
|
64409a1de7 | ||
|
|
1b81cab782 | ||
|
|
88190b5602 | ||
|
|
0b27aa5138 | ||
|
|
35160788d7 | ||
|
|
c3e5c3ba36 | ||
|
|
04ac0af54b | ||
|
|
9996533364 | ||
|
|
3f6b334fc5 | ||
|
|
b30e5a7a35 | ||
|
|
6d79cb23ba | ||
|
|
e34afca6d7 | ||
|
|
4918b9ffb6 | ||
|
|
73474e7af0 | ||
|
|
7ae6dda03f | ||
|
|
00e764b0d3 | ||
|
|
4abf0db0b4 | ||
|
|
acc885fd0a | ||
|
|
61e8cfd4bc | ||
|
|
796acd1aee | ||
|
|
cc8df5e11f | ||
|
|
ede74ccc42 | ||
|
|
e93a5719ef | ||
|
|
d34f0b606c | ||
|
|
6425451bbc | ||
|
|
acc400face | ||
|
|
fe46855462 | ||
|
|
8e7d2d25f2 | ||
|
|
a436534515 | ||
|
|
aa2327591e | ||
|
|
a6f9e0ddf0 | ||
|
|
60470bb647 | ||
|
|
294e1ba16d | ||
|
|
8e6893ddbe | ||
|
|
d018346f18 | ||
|
|
2385842537 | ||
|
|
6a70c0ec92 | ||
|
|
7a9382b115 | ||
|
|
62dabeba5f | ||
|
|
48812229a9 | ||
|
|
915cc377fb | ||
|
|
96544bfa43 | ||
|
|
09d474da63 | ||
|
|
aaefbfae1f | ||
|
|
97e17f52a1 | ||
|
|
62ced0e3f1 | ||
|
|
71bb24f17e | ||
|
|
c72f114b33 | ||
|
|
8ed39f5de0 | ||
|
|
424c5bde40 | ||
|
|
bdd3005d10 | ||
|
|
4224edea28 | ||
|
|
cb1b7513af | ||
|
|
2f89b8209f | ||
|
|
a9d0f4a002 | ||
|
|
db032079d8 | ||
|
|
a00796c46a | ||
|
|
6112bd8caa | ||
|
|
cec88cfc29 | ||
|
|
8439aeb7cf | ||
|
|
42257eec53 | ||
|
|
1beda3b9af | ||
|
|
8676e94f5c | ||
|
|
ef47a0d820 | ||
|
|
e0f0da57e2 | ||
|
|
485e3127c7 | ||
|
|
58f90b70c7 | ||
|
|
508db9020d | ||
|
|
6ff37c6fc4 | ||
|
|
f21ae1f5d1 | ||
|
|
483c52f07b | ||
|
|
f0d7ab81ad | ||
|
|
f88f415a00 | ||
|
|
19bc885b07 | ||
|
|
47f70e3d79 | ||
|
|
0f8eb3b506 | ||
|
|
4a5923a55e | ||
|
|
de98656ed1 | ||
|
|
da7469be38 | ||
|
|
df9d10ac44 | ||
|
|
528d9d6d8b | ||
|
|
4fb5c39b92 | ||
|
|
022205af90 | ||
|
|
50280bf02b | ||
|
|
9b579069df | ||
|
|
f5a4a1c8b2 | ||
|
|
5ab4cdb1f3 | ||
|
|
1f54f07f72 | ||
|
|
73d8a4eace | ||
|
|
c1e5897076 | ||
|
|
718a98fbbf | ||
|
|
86bf231d29 | ||
|
|
182c3f4b80 | ||
|
|
c1eba66443 | ||
|
|
7197ced673 | ||
|
|
4f21ee6c66 | ||
|
|
787472453d | ||
|
|
8f65f35de9 | ||
|
|
c27c923439 | ||
|
|
fd5649091d | ||
|
|
9a57736773 | ||
|
|
7740997ea8 | ||
|
|
7eb23f73ba | ||
|
|
b9e9fc376a | ||
|
|
27bf2f1298 | ||
|
|
d4d82fbd0c | ||
|
|
eda09a54da | ||
|
|
b132d70413 | ||
|
|
e41ebd3047 | ||
|
|
705d31e8bd | ||
|
|
7d95950ce6 | ||
|
|
c6b4c21c23 | ||
|
|
bf96fdb858 | ||
|
|
41eeffd88d | ||
|
|
1eb9fe8562 | ||
|
|
bac7a1623a | ||
|
|
5458850d21 | ||
|
|
20ac59c946 | ||
|
|
cfc1e193b6 | ||
|
|
0cc25c7e4c | ||
|
|
102681e384 | ||
|
|
3ef7a478cd | ||
|
|
e70ac35e02 | ||
|
|
d3654906bf | ||
|
|
e6295c9c5f | ||
|
|
b15de68831 | ||
|
|
6723700fb9 | ||
|
|
2c099b7c23 | ||
|
|
50fca8fc70 | ||
|
|
b9d92c481b | ||
|
|
d142c5e432 | ||
|
|
4d4683adb6 | ||
|
|
d6063079af | ||
|
|
2e04ab4737 | ||
|
|
d95384a636 | ||
|
|
c0690f5b9e | ||
|
|
909d84447d | ||
|
|
2cf57d584e | ||
|
|
59242b9c4f | ||
|
|
6a6212d4e1 | ||
|
|
a8006a3750 | ||
|
|
0e0e462f5b | ||
|
|
805531c90d | ||
|
|
a6470a0c37 | ||
|
|
8a54f14b8e | ||
|
|
be5e521cb0 | ||
|
|
60f20119a2 | ||
|
|
2f257fdc3d | ||
|
|
0991cb0de4 | ||
|
|
4709c638ed | ||
|
|
0776217801 | ||
|
|
9eae36ce3e | ||
|
|
3f501c9b85 | ||
|
|
c85146524b | ||
|
|
79d192fb3f | ||
|
|
a4ed36f0cc | ||
|
|
dddb51a9ca | ||
|
|
8f006eeaf3 | ||
|
|
445e5aff02 | ||
|
|
234d0c360f | ||
|
|
cd181b36c3 | ||
|
|
4cfe0dbdd8 | ||
|
|
89a4ac92eb | ||
|
|
deb90ff573 | ||
|
|
0c10063a87 | ||
|
|
87ea080c10 | ||
|
|
6d62fa061b | ||
|
|
de6cd3ac01 | ||
|
|
cb8f033130 | ||
|
|
03097e65e8 | ||
|
|
c32bec338f | ||
|
|
73d3d286d9 | ||
|
|
29eeb84ce3 | ||
|
|
d78951feb7 | ||
|
|
63c8cbae5b | ||
|
|
72ded27e98 | ||
|
|
c25781f720 | ||
|
|
c3b18fede9 | ||
|
|
4070895a21 | ||
|
|
a21711f473 | ||
|
|
f0ec8cbffe | ||
|
|
e568dbbabb | ||
|
|
8ff15b3dfb | ||
|
|
247eaed872 | ||
|
|
8b1fcfd7f8 | ||
|
|
45f289488d | ||
|
|
b0ef7701ae | ||
|
|
c9fb6c48b8 | ||
|
|
0de34aa8fa | ||
|
|
6bfcad4b05 | ||
|
|
67a0c9fff8 | ||
|
|
cc4aca78c4 | ||
|
|
5c7fa9b924 | ||
|
|
9837de271d | ||
|
|
fd251c37bb | ||
|
|
adb6bca950 | ||
|
|
42854c0bca | ||
|
|
d0bdff7b7b | ||
|
|
8650ee66c1 | ||
|
|
377fa09cb7 | ||
|
|
00a03742ff | ||
|
|
d11e359244 | ||
|
|
09d45439c7 | ||
|
|
5d92da0c73 | ||
|
|
677bb39e73 | ||
|
|
85ea77de0b | ||
|
|
03317be0bd | ||
|
|
4aa7c8f7b1 | ||
|
|
ce57a342a3 | ||
|
|
1cc6cd78e0 | ||
|
|
c204afdc79 | ||
|
|
c14967eeac | ||
|
|
f38db86120 | ||
|
|
50b155fa2d | ||
|
|
a533c8e041 | ||
|
|
e5595a05df | ||
|
|
908adee6fc | ||
|
|
7b3353252f | ||
|
|
647a10bf18 | ||
|
|
f2141a894a | ||
|
|
08c332980b | ||
|
|
7b57a44b5a | ||
|
|
fe2c0cc3d5 | ||
|
|
eecf4c53e7 | ||
|
|
cf4c3c287b | ||
|
|
71e5605daa | ||
|
|
890a5c64dd | ||
|
|
0ee4671a91 | ||
|
|
68333424c6 | ||
|
|
d4529d8c83 | ||
|
|
5e8144b0e1 | ||
|
|
3e3695445f | ||
|
|
091f989b72 | ||
|
|
dd28a3fd5a | ||
|
|
6f24b438e0 | ||
|
|
48a9ad4c17 | ||
|
|
b997039a91 | ||
|
|
0e6b6bd130 | ||
|
|
b1b0b0b67c | ||
|
|
27155f845c | ||
|
|
c6f14279d7 | ||
|
|
fa15356209 | ||
|
|
99f5e09a79 | ||
|
|
a8ef6f08e0 | ||
|
|
ae5a04e85c | ||
|
|
8ebfc9fa92 | ||
|
|
21026f0ca8 | ||
|
|
e579554c84 | ||
|
|
ff49250c1a | ||
|
|
8b95c6ae56 | ||
|
|
28162759a4 | ||
|
|
dd128656cb | ||
|
|
4456df5a46 | ||
|
|
0b104b3efa | ||
|
|
ac944f0960 | ||
|
|
5f55e88484 | ||
|
|
aab6ffec30 | ||
|
|
1dd33af8a3 | ||
|
|
8a2a1e4d27 | ||
|
|
e2686c0fce | ||
|
|
9473a2a6ca | ||
|
|
11ce3b9636 | ||
|
|
0bf4157a75 | ||
|
|
4eaa626bca | ||
|
|
668b26b641 | ||
|
|
04e4586fb3 | ||
|
|
78f6f22a80 | ||
|
|
13afdaf393 | ||
|
|
742d0ee531 | ||
|
|
4275833bab | ||
|
|
de7f8c4406 | ||
|
|
f00a285a6d | ||
|
|
43bb02e7b4 | ||
|
|
56fd4ee9bd | ||
|
|
0625d08e4e | ||
|
|
9269086fda | ||
|
|
98e3ecb86b | ||
|
|
9af9e73c45 | ||
|
|
4b107b17cb | ||
|
|
cb82b0798a | ||
|
|
7f1071943e | ||
|
|
3c1e7c7428 | ||
|
|
baeefa4817 | ||
|
|
e8ba7833ec | ||
|
|
db676aee73 | ||
|
|
1a0d8810e5 | ||
|
|
4615d86748 | ||
|
|
525e67ba93 | ||
|
|
44eb153619 | ||
|
|
195785c47f | ||
|
|
4eae92f411 | ||
|
|
d7cb319217 | ||
|
|
15062e7dba | ||
|
|
bf19f86e38 | ||
|
|
91c7ef8723 | ||
|
|
fc23a0ee52 | ||
|
|
d3491851bc | ||
|
|
886404cc4d | ||
|
|
75a7f0e26c | ||
|
|
47827ca5c1 | ||
|
|
f75d74a967 | ||
|
|
42648919c7 | ||
|
|
6987cac1ba | ||
|
|
082237863e | ||
|
|
4bcdd7a9f9 | ||
|
|
fc4b7ccb70 | ||
|
|
df9ac07922 | ||
|
|
ba27a09efe | ||
|
|
bc51d3a918 | ||
|
|
b39d4e9b50 | ||
|
|
b18cd9075d | ||
|
|
36b897858a | ||
|
|
a7b2f461cf | ||
|
|
fce132a21b | ||
|
|
9c857ff48f | ||
|
|
f27b33dabe | ||
|
|
9eb4b84abd | ||
|
|
71834787ec | ||
|
|
b004db37c7 | ||
|
|
0c04cd1d9f | ||
|
|
63ea405b3e | ||
|
|
ba11121cfc | ||
|
|
acdd5aa6ea | ||
|
|
2f3cc8cdd2 | ||
|
|
7a95fed23f | ||
|
|
961de4d34e | ||
|
|
18ce95dcbf | ||
|
|
c177210b1b | ||
|
|
1fc90fbacb | ||
|
|
6c72559457 | ||
|
|
1fdfa3f208 | ||
|
|
1a01196a80 | ||
|
|
0d0c18f519 | ||
|
|
d12364c1e0 | ||
|
|
8cd3a1aa57 | ||
|
|
08fd026ebd | ||
|
|
75d5cea624 | ||
|
|
ab9213fa94 | ||
|
|
45d5d4bf40 | ||
|
|
fa885e75b4 | ||
|
|
29fc77ee5b | ||
|
|
ad4dc70720 | ||
|
|
5d682b4700 | ||
|
|
f1beb60204 | ||
|
|
85577e70cd | ||
|
|
c5536c37b5 | ||
|
|
9245c89cfe | ||
|
|
f4ff722247 | ||
|
|
262b429a4c | ||
|
|
eaabc1af2f | ||
|
|
04a24a9239 | ||
|
|
1f54dfa883 | ||
|
|
786b0fabea | ||
|
|
26733c705d | ||
|
|
ab75f53efd | ||
|
|
867e6a8f1d | ||
|
|
6f4823fc97 | ||
|
|
df9b68f8ed | ||
|
|
0a0a5f84bf | ||
|
|
5bc6391700 | ||
|
|
eaa897d983 | ||
|
|
c06f386ac3 | ||
|
|
bfca54cc2c | ||
|
|
04a62d2b97 | ||
|
|
8c19cb0a0b | ||
|
|
5c492031d9 | ||
|
|
fb1caa4724 | ||
|
|
5622b9607d | ||
|
|
01bcc601be | ||
|
|
93fbdc06d3 | ||
|
|
69c931334f | ||
|
|
d683f5980c | ||
|
|
f8ba112f66 | ||
|
|
c614d0dd35 | ||
|
|
479607e5dd | ||
|
|
bb00e70087 | ||
|
|
2a04ecccc4 | ||
|
|
e974be9518 | ||
|
|
aeb6b74725 | ||
|
|
a751972c57 | ||
|
|
6b269795d2 | ||
|
|
d075be798a | ||
|
|
89637bcaaf | ||
|
|
866ac91be3 | ||
|
|
e610af36aa | ||
|
|
7cf6707ed3 | ||
|
|
34254b42b6 | ||
|
|
1995040846 | ||
|
|
07f42e8057 | ||
|
|
71f59749dc | ||
|
|
3b0b9967f6 | ||
|
|
123b54a178 | ||
|
|
f5dd8dfc3e | ||
|
|
6768e4ef75 | ||
|
|
bcfed70888 | ||
|
|
503ef3bbc9 | ||
|
|
08f2c696b0 | ||
|
|
54e34beac6 | ||
|
|
c0aa018c87 | ||
|
|
b75f1f4c17 | ||
|
|
95ed079761 | ||
|
|
4a082683df | ||
|
|
26be5e0733 | ||
|
|
bd5110a2fe | ||
|
|
fa8b9acdf6 | ||
|
|
2b74d1824b | ||
|
|
c77b00d3ac | ||
|
|
c77073efcc | ||
|
|
1537323eb9 | ||
|
|
a0a3b55700 | ||
|
|
214b51de87 | ||
|
|
95975944d7 | ||
|
|
9a9383643f | ||
|
|
cac355bfa7 | ||
|
|
9020a50df8 | ||
|
|
52843123d4 | ||
|
|
6298db5bea | ||
|
|
a003a0934a | ||
|
|
3a11e39c01 | ||
|
|
5f896b1050 | ||
|
|
d0c4e6da6b | ||
|
|
2da5584bb5 | ||
|
|
b7eb802ae6 | ||
|
|
2e32d0474c | ||
|
|
cb99ac6f7e | ||
|
|
be411435f5 | ||
|
|
29ef164530 | ||
|
|
739c52a3cd | ||
|
|
7a2af06b1e | ||
|
|
cb0c3a5aad | ||
|
|
8388698993 | ||
|
|
cbcf6c9ba3 | ||
|
|
bf742d81cf | ||
|
|
7458f0386c | ||
|
|
fc1df5793c | ||
|
|
3ded069042 | ||
|
|
261d2ceb06 | ||
|
|
1a17e2e572 | ||
|
|
5b8cd68abe | ||
|
|
5ce9acb0b9 | ||
|
|
953a82ca04 | ||
|
|
54341c2e80 | ||
|
|
96831ed9bb | ||
|
|
0459b1a242 | ||
|
|
8ecb726683 | ||
|
|
297e72e262 | ||
|
|
0ad2f57a92 | ||
|
|
b21d7aedf9 | ||
|
|
71d53f413f | ||
|
|
054622bd16 | ||
|
|
e905a72d73 | ||
|
|
2e879c1df8 | ||
|
|
d040aff101 | ||
|
|
5e30731cad | ||
|
|
beeb31ce41 | ||
|
|
057143214d | ||
|
|
6a1d26a60c | ||
|
|
d78f4666a0 | ||
|
|
a439fa3e1a | ||
|
|
767259be7e | ||
|
|
e9f34fb4b1 | ||
|
|
d5c07ef7b3 | ||
|
|
5e218f3f4d | ||
|
|
bcab61ab1d | ||
|
|
263c5a348e | ||
|
|
2f1a9105b9 | ||
|
|
be7d2fbe63 | ||
|
|
f7f9a131e4 | ||
|
|
5df5eb2db2 | ||
|
|
30eb0e5b5b | ||
|
|
5b860cb989 | ||
|
|
76d0623b11 | ||
|
|
db4eaf4d2d | ||
|
|
13f21206a6 | ||
|
|
27bb591331 | ||
|
|
14ee7aa84c | ||
|
|
8a35cd1743 | ||
|
|
8d33af1dff | ||
|
|
3c7ac093d3 | ||
|
|
d49d127863 | ||
|
|
b57dd5c58e | ||
|
|
90b428a8c3 | ||
|
|
096a28656e | ||
|
|
3dc87f5baa | ||
|
|
cc4bd54669 | ||
|
|
5383f41bba | ||
|
|
58eab9a018 | ||
|
|
9f36ffcbdb | ||
|
|
68c4717e21 | ||
|
|
5c488e20cc | ||
|
|
da650f834e | ||
|
|
e83534a430 | ||
|
|
98d4a2909e | ||
|
|
a514ce472a | ||
|
|
cc63802115 | ||
|
|
acec45ad7c | ||
|
|
08d6413365 | ||
|
|
70802eb7c7 | ||
|
|
6ac5b3b136 | ||
|
|
e1e76f39d0 | ||
|
|
2094ce8a9a | ||
|
|
8442db8101 | ||
|
|
79671c9faa | ||
|
|
94a1f5a8ea |
10
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
10
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
@@ -22,6 +22,16 @@ Related product discussion:
|
||||
|
||||
<!---If necessary, create a list with technical/product steps-->
|
||||
|
||||
### Reminders when modifying the API
|
||||
|
||||
- [ ] Update the openAPI file with utoipa:
|
||||
- [ ] If a new module has been introduced, create a new structure deriving [the OpenAPI proc-macro](https://docs.rs/utoipa/latest/utoipa/derive.OpenApi.html) and nest it in the main [openAPI structure](https://github.com/meilisearch/meilisearch/blob/f2185438eed60fa32d25b15480c5ee064f6fba4a/crates/meilisearch/src/routes/mod.rs#L64-L78).
|
||||
- [ ] If a new route has been introduced, add the [path decorator](https://docs.rs/utoipa/latest/utoipa/attr.path.html) to it and add the route at the top of the file in its openAPI structure.
|
||||
- [ ] If a structure which is deserialized or serialized in the API has been introduced or modified, it must derive the [`schema`](https://docs.rs/utoipa/latest/utoipa/macro.schema.html) or the [`IntoParams`](https://docs.rs/utoipa/latest/utoipa/derive.IntoParams.html) proc-macro.
|
||||
If it's a **new** structure you must also add it to the big list of structures [in the main `OpenApi` structure](https://github.com/meilisearch/meilisearch/blob/f2185438eed60fa32d25b15480c5ee064f6fba4a/crates/meilisearch/src/routes/mod.rs#L88).
|
||||
- [ ] Once everything is done, start Meilisearch with the swagger flag: `cargo run --features swagger`, open `http://localhost:7700/scalar` on your browser, and ensure everything works as expected.
|
||||
- For more info, refer to [this presentation](https://pitch.com/v/generating-the-openapi-file-jrn3nh).
|
||||
|
||||
### Reminders when modifying the Setting API
|
||||
|
||||
<!--- Special steps to remind when adding a new index setting -->
|
||||
|
||||
2
.github/workflows/bench-manual.yml
vendored
2
.github/workflows/bench-manual.yml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
timeout-minutes: 180 # 3h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
4
.github/workflows/bench-pr.yml
vendored
4
.github/workflows/bench-pr.yml
vendored
@@ -55,7 +55,7 @@ jobs:
|
||||
reaction-type: "rocket"
|
||||
repo-token: ${{ env.GH_TOKEN }}
|
||||
|
||||
- uses: xt0rted/pull-request-comment-branch@v2
|
||||
- uses: xt0rted/pull-request-comment-branch@v3
|
||||
id: comment-branch
|
||||
with:
|
||||
repo_token: ${{ env.GH_TOKEN }}
|
||||
@@ -66,7 +66,7 @@ jobs:
|
||||
fetch-depth: 0 # fetch full history to be able to get main commit sha
|
||||
ref: ${{ steps.comment-branch.outputs.head_ref }}
|
||||
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
2
.github/workflows/bench-push-indexing.yml
vendored
2
.github/workflows/bench-push-indexing.yml
vendored
@@ -12,7 +12,7 @@ jobs:
|
||||
timeout-minutes: 180 # 3h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
2
.github/workflows/benchmarks-manual.yml
vendored
2
.github/workflows/benchmarks-manual.yml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
4
.github/workflows/benchmarks-pr.yml
vendored
4
.github/workflows/benchmarks-pr.yml
vendored
@@ -44,7 +44,7 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
@@ -56,7 +56,7 @@ jobs:
|
||||
reaction-type: "eyes"
|
||||
repo-token: ${{ env.GH_TOKEN }}
|
||||
|
||||
- uses: xt0rted/pull-request-comment-branch@v2
|
||||
- uses: xt0rted/pull-request-comment-branch@v3
|
||||
id: comment-branch
|
||||
with:
|
||||
repo_token: ${{ env.GH_TOKEN }}
|
||||
|
||||
@@ -16,7 +16,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
100
.github/workflows/check-valid-milestone.yml
vendored
Normal file
100
.github/workflows/check-valid-milestone.yml
vendored
Normal file
@@ -0,0 +1,100 @@
|
||||
name: PR Milestone Check
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, reopened, edited, synchronize, milestoned, demilestoned]
|
||||
branches:
|
||||
- "main"
|
||||
- "release-v*.*.*"
|
||||
|
||||
jobs:
|
||||
check-milestone:
|
||||
name: Check PR Milestone
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Validate PR milestone
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
// Get PR number directly from the event payload
|
||||
const prNumber = context.payload.pull_request.number;
|
||||
|
||||
// Get PR details
|
||||
const { data: prData } = await github.rest.pulls.get({
|
||||
owner: 'meilisearch',
|
||||
repo: 'meilisearch',
|
||||
pull_number: prNumber
|
||||
});
|
||||
|
||||
// Get base branch name
|
||||
const baseBranch = prData.base.ref;
|
||||
console.log(`Base branch: ${baseBranch}`);
|
||||
|
||||
// Get PR milestone
|
||||
const prMilestone = prData.milestone;
|
||||
if (!prMilestone) {
|
||||
core.setFailed('PR must have a milestone assigned');
|
||||
return;
|
||||
}
|
||||
console.log(`PR milestone: ${prMilestone.title}`);
|
||||
|
||||
// Validate milestone format: vx.y.z
|
||||
const milestoneRegex = /^v\d+\.\d+\.\d+$/;
|
||||
if (!milestoneRegex.test(prMilestone.title)) {
|
||||
core.setFailed(`Milestone "${prMilestone.title}" does not follow the required format vx.y.z`);
|
||||
return;
|
||||
}
|
||||
|
||||
// For main branch PRs, check if the milestone is the highest one
|
||||
if (baseBranch === 'main') {
|
||||
// Get all milestones
|
||||
const { data: milestones } = await github.rest.issues.listMilestones({
|
||||
owner: 'meilisearch',
|
||||
repo: 'meilisearch',
|
||||
state: 'open',
|
||||
sort: 'due_on',
|
||||
direction: 'desc'
|
||||
});
|
||||
|
||||
// Sort milestones by version number (vx.y.z)
|
||||
const sortedMilestones = milestones
|
||||
.filter(m => milestoneRegex.test(m.title))
|
||||
.sort((a, b) => {
|
||||
const versionA = a.title.substring(1).split('.').map(Number);
|
||||
const versionB = b.title.substring(1).split('.').map(Number);
|
||||
|
||||
// Compare major version
|
||||
if (versionA[0] !== versionB[0]) return versionB[0] - versionA[0];
|
||||
// Compare minor version
|
||||
if (versionA[1] !== versionB[1]) return versionB[1] - versionA[1];
|
||||
// Compare patch version
|
||||
return versionB[2] - versionA[2];
|
||||
});
|
||||
|
||||
if (sortedMilestones.length === 0) {
|
||||
core.setFailed('No valid milestones found in the repository. Please create at least one milestone with the format vx.y.z');
|
||||
return;
|
||||
}
|
||||
|
||||
const highestMilestone = sortedMilestones[0];
|
||||
console.log(`Highest milestone: ${highestMilestone.title}`);
|
||||
|
||||
if (prMilestone.title !== highestMilestone.title) {
|
||||
core.setFailed(`PRs targeting the main branch must use the highest milestone (${highestMilestone.title}), but this PR uses ${prMilestone.title}`);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// For release branches, the milestone should match the branch version
|
||||
const branchVersion = baseBranch.substring(8); // remove 'release-'
|
||||
if (prMilestone.title !== branchVersion) {
|
||||
core.setFailed(`PRs targeting release branch "${baseBranch}" must use the matching milestone "${branchVersion}", but this PR uses "${prMilestone.title}"`);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
console.log('PR milestone validation passed!');
|
||||
36
.github/workflows/flaky-tests.yml
vendored
36
.github/workflows/flaky-tests.yml
vendored
@@ -9,22 +9,22 @@ jobs:
|
||||
flaky:
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||
image: ubuntu:20.04
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- name: Install cargo-flaky
|
||||
run: cargo install cargo-flaky
|
||||
- name: Run cargo flaky in the dumps
|
||||
run: cd crates/dump; cargo flaky -i 100 --release
|
||||
- name: Run cargo flaky in the index-scheduler
|
||||
run: cd crates/index-scheduler; cargo flaky -i 100 --release
|
||||
- name: Run cargo flaky in the auth
|
||||
run: cd crates/meilisearch-auth; cargo flaky -i 100 --release
|
||||
- name: Run cargo flaky in meilisearch
|
||||
run: cd crates/meilisearch; cargo flaky -i 100 --release
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Install cargo-flaky
|
||||
run: cargo install cargo-flaky
|
||||
- name: Run cargo flaky in the dumps
|
||||
run: cd crates/dump; cargo flaky -i 100 --release
|
||||
- name: Run cargo flaky in the index-scheduler
|
||||
run: cd crates/index-scheduler; cargo flaky -i 100 --release
|
||||
- name: Run cargo flaky in the auth
|
||||
run: cd crates/meilisearch-auth; cargo flaky -i 100 --release
|
||||
- name: Run cargo flaky in meilisearch
|
||||
run: cd crates/meilisearch; cargo flaky -i 100 --release
|
||||
|
||||
2
.github/workflows/fuzzer-indexing.yml
vendored
2
.github/workflows/fuzzer-indexing.yml
vendored
@@ -12,7 +12,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
||||
42
.github/workflows/publish-apt-brew-pkg.yml
vendored
42
.github/workflows/publish-apt-brew-pkg.yml
vendored
@@ -18,28 +18,28 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
needs: check-version
|
||||
container:
|
||||
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||
image: ubuntu:20.04
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
steps:
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- name: Install cargo-deb
|
||||
run: cargo install cargo-deb
|
||||
- uses: actions/checkout@v3
|
||||
- name: Build deb package
|
||||
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
|
||||
- name: Upload debian pkg to release
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/debian/meilisearch.deb
|
||||
asset_name: meilisearch.deb
|
||||
tag: ${{ github.ref }}
|
||||
- name: Upload debian pkg to apt repository
|
||||
run: curl -F package=@target/debian/meilisearch.deb https://${{ secrets.GEMFURY_PUSH_TOKEN }}@push.fury.io/meilisearch/
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Install cargo-deb
|
||||
run: cargo install cargo-deb
|
||||
- uses: actions/checkout@v3
|
||||
- name: Build deb package
|
||||
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
|
||||
- name: Upload debian pkg to release
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/debian/meilisearch.deb
|
||||
asset_name: meilisearch.deb
|
||||
tag: ${{ github.ref }}
|
||||
- name: Upload debian pkg to apt repository
|
||||
run: curl -F package=@target/debian/meilisearch.deb https://${{ secrets.GEMFURY_PUSH_TOKEN }}@push.fury.io/meilisearch/
|
||||
|
||||
homebrew:
|
||||
name: Bump Homebrew formula
|
||||
|
||||
74
.github/workflows/publish-binaries.yml
vendored
74
.github/workflows/publish-binaries.yml
vendored
@@ -3,7 +3,7 @@ name: Publish binaries to GitHub release
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '0 2 * * *' # Every day at 2:00am
|
||||
- cron: "0 2 * * *" # Every day at 2:00am
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
@@ -37,26 +37,26 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
needs: check-version
|
||||
container:
|
||||
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||
image: ubuntu:20.04
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/meilisearch
|
||||
asset_name: meilisearch-linux-amd64
|
||||
tag: ${{ github.ref }}
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/meilisearch
|
||||
asset_name: meilisearch-linux-amd64
|
||||
tag: ${{ github.ref }}
|
||||
|
||||
publish-macos-windows:
|
||||
name: Publish binary for ${{ matrix.os }}
|
||||
@@ -74,19 +74,19 @@ jobs:
|
||||
artifact_name: meilisearch.exe
|
||||
asset_name: meilisearch-windows-amd64.exe
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/${{ matrix.artifact_name }}
|
||||
asset_name: ${{ matrix.asset_name }}
|
||||
tag: ${{ github.ref }}
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/${{ matrix.artifact_name }}
|
||||
asset_name: ${{ matrix.asset_name }}
|
||||
tag: ${{ github.ref }}
|
||||
|
||||
publish-macos-apple-silicon:
|
||||
name: Publish binary for macOS silicon
|
||||
@@ -101,7 +101,7 @@ jobs:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
- name: Installing Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@1.79
|
||||
uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
target: ${{ matrix.target }}
|
||||
@@ -127,8 +127,8 @@ jobs:
|
||||
env:
|
||||
DEBIAN_FRONTEND: noninteractive
|
||||
container:
|
||||
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||
image: ubuntu:20.04
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
@@ -148,7 +148,7 @@ jobs:
|
||||
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
|
||||
apt-get update -y && apt-get install -y docker-ce
|
||||
- name: Installing Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@1.79
|
||||
uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
target: ${{ matrix.target }}
|
||||
|
||||
2
.github/workflows/sdks-tests.yml
vendored
2
.github/workflows/sdks-tests.yml
vendored
@@ -52,7 +52,7 @@ jobs:
|
||||
- name: Setup .NET Core
|
||||
uses: actions/setup-dotnet@v4
|
||||
with:
|
||||
dotnet-version: "6.0.x"
|
||||
dotnet-version: "8.0.x"
|
||||
- name: Install dependencies
|
||||
run: dotnet restore
|
||||
- name: Build
|
||||
|
||||
86
.github/workflows/test-suite.yml
vendored
86
.github/workflows/test-suite.yml
vendored
@@ -4,13 +4,9 @@ on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Everyday at 5:00am
|
||||
- cron: '0 5 * * *'
|
||||
- cron: "0 5 * * *"
|
||||
pull_request:
|
||||
push:
|
||||
# trying and staging branches are for Bors config
|
||||
branches:
|
||||
- trying
|
||||
- staging
|
||||
merge_group:
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -19,21 +15,21 @@ env:
|
||||
|
||||
jobs:
|
||||
test-linux:
|
||||
name: Tests on ubuntu-20.04
|
||||
name: Tests on ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||
image: ubuntu:20.04
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- name: Setup test with Rust stable
|
||||
uses: dtolnay/rust-toolchain@1.79
|
||||
uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.5
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -55,8 +51,8 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.5
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -72,8 +68,8 @@ jobs:
|
||||
name: Tests almost all features
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||
image: ubuntu:20.04
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
@@ -81,19 +77,51 @@ jobs:
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Run cargo build with almost all features
|
||||
run: |
|
||||
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
|
||||
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
|
||||
- name: Run cargo test with almost all features
|
||||
run: |
|
||||
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
|
||||
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
|
||||
|
||||
ollama-ubuntu:
|
||||
name: Test with Ollama
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
MEILI_TEST_OLLAMA_SERVER: "http://localhost:11434"
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- name: Install Ollama
|
||||
run: |
|
||||
curl -fsSL https://ollama.com/install.sh | sudo -E sh
|
||||
- name: Start serving
|
||||
run: |
|
||||
# Run it in the background, there is no way to daemonise at the moment
|
||||
ollama serve &
|
||||
|
||||
# A short pause is required before the HTTP port is opened
|
||||
sleep 5
|
||||
|
||||
# This endpoint blocks until ready
|
||||
time curl -i http://localhost:11434
|
||||
|
||||
- name: Pull nomic-embed-text & all-minilm
|
||||
run: |
|
||||
ollama pull nomic-embed-text
|
||||
ollama pull all-minilm
|
||||
|
||||
- name: Run cargo test
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release --all --features test-ollama ollama
|
||||
|
||||
test-disabled-tokenization:
|
||||
name: Test disabled tokenization
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: ubuntu:20.04
|
||||
image: ubuntu:22.04
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
@@ -101,7 +129,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Run cargo tree without default features and check lindera is not present
|
||||
run: |
|
||||
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
|
||||
@@ -117,17 +145,17 @@ jobs:
|
||||
name: Run tests in debug
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-20.04 to compile with glibc 2.28
|
||||
image: ubuntu:20.04
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.5
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -139,12 +167,12 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.5
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -156,14 +184,14 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: nightly-2024-07-09
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.5
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
|
||||
@@ -18,7 +18,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
- name: Install sd
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -10,6 +10,7 @@
|
||||
/dumps
|
||||
/bench
|
||||
/_xtask_benchmark.ms
|
||||
/benchmarks
|
||||
|
||||
# Snapshots
|
||||
## ... large
|
||||
|
||||
@@ -48,6 +48,27 @@ cargo xtask bench --no-dashboard -- workloads/my_workload_1.json workloads/my_wo
|
||||
|
||||
For processing the results, look at [Looking at benchmark results/Without dashboard](#without-dashboard).
|
||||
|
||||
#### Sending a workload by hand
|
||||
|
||||
Sometimes you want to visualize the metrics of a worlkoad that comes from a custom report.
|
||||
It is not quite easy to trick the benchboard in thinking that your report is legitimate but here are the commands you can run to upload your firefox report on a running benchboard.
|
||||
|
||||
```bash
|
||||
# Name this hostname whatever you want
|
||||
echo '{ "hostname": "the-best-place" }' | xh PUT 'http://127.0.0.1:9001/api/v1/machine'
|
||||
|
||||
# You'll receive an UUID from this command that we will call $invocation_uuid
|
||||
echo '{ "commit": { "sha1": "1234567", "commit_date": "2024-09-05 12:00:12.0 +00:00:00", "message": "A cool message" }, "machine_hostname": "the-best-place", "max_workloads": 1 }' | xh PUT 'http://127.0.0.1:9001/api/v1/invocation'
|
||||
|
||||
# Just use UUID from the previous command
|
||||
# and you'll receive another UUID that we will call $workload_uuid
|
||||
echo '{ "invocation_uuid": "$invocation_uuid", "name": "toto", "max_runs": 1 }' | xh PUT 'http://127.0.0.1:9001/api/v1/workload'
|
||||
|
||||
# And now use your $workload_uuid and the content of your firefox report
|
||||
# but don't forget to convert your firefox report from JSONLines into an object
|
||||
echo '{ "workload_uuid": "$workload_uuid", "data": $REPORT_JSON_DATA }' | xh PUT 'http://127.0.0.1:9001/api/v1/run'
|
||||
```
|
||||
|
||||
### In CI
|
||||
|
||||
We have dedicated runners to run workloads on CI. Currently, there are three ways of running the CI:
|
||||
|
||||
@@ -95,6 +95,11 @@ Meilisearch follows the [cargo xtask](https://github.com/matklad/cargo-xtask) wo
|
||||
|
||||
Run `cargo xtask --help` from the root of the repository to find out what is available.
|
||||
|
||||
#### Update the openAPI file if the API changed
|
||||
|
||||
To update the openAPI file in the code, see [sprint_issue.md](https://github.com/meilisearch/meilisearch/blob/main/.github/ISSUE_TEMPLATE/sprint_issue.md#reminders-when-modifying-the-api).
|
||||
If you want to update the openAPI file on the [open-api repository](https://github.com/meilisearch/open-api), see [update-openapi-issue.md](https://github.com/meilisearch/engine-team/blob/main/issue-templates/update-openapi-issue.md).
|
||||
|
||||
### Logging
|
||||
|
||||
Meilisearch uses [`tracing`](https://lib.rs/crates/tracing) for logging purposes. Tracing logs are structured and can be displayed as JSON to the end user, so prefer passing arguments as fields rather than interpolating them in the message.
|
||||
@@ -145,7 +150,7 @@ Some notes on GitHub PRs:
|
||||
- The PR title should be accurate and descriptive of the changes.
|
||||
- [Convert your PR as a draft](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/changing-the-stage-of-a-pull-request) if your changes are a work in progress: no one will review it until you pass your PR as ready for review.<br>
|
||||
The draft PRs are recommended when you want to show that you are working on something and make your work visible.
|
||||
- The branch related to the PR must be **up-to-date with `main`** before merging. Fortunately, this project uses [Bors](https://github.com/bors-ng/bors-ng) to automatically enforce this requirement without the PR author having to rebase manually.
|
||||
- The branch related to the PR must be **up-to-date with `main`** before merging. Fortunately, this project uses [GitHub Merge Queues](https://github.blog/news-insights/product-news/github-merge-queue-is-generally-available/) to automatically enforce this requirement without the PR author having to rebase manually.
|
||||
|
||||
## Release Process (for internal team only)
|
||||
|
||||
@@ -153,8 +158,7 @@ Meilisearch tools follow the [Semantic Versioning Convention](https://semver.org
|
||||
|
||||
### Automation to rebase and Merge the PRs
|
||||
|
||||
This project integrates a bot that helps us manage pull requests merging.<br>
|
||||
_[Read more about this](https://github.com/meilisearch/integration-guides/blob/main/resources/bors.md)._
|
||||
This project uses GitHub Merge Queues that helps us manage pull requests merging.
|
||||
|
||||
### How to Publish a new Release
|
||||
|
||||
|
||||
1600
Cargo.lock
generated
1600
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
11
Cargo.toml
11
Cargo.toml
@@ -22,7 +22,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.12.0"
|
||||
version = "1.14.0"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
@@ -36,6 +36,12 @@ license = "MIT"
|
||||
[profile.release]
|
||||
codegen-units = 1
|
||||
|
||||
# We now compile heed without the NDEBUG define for better performance.
|
||||
# However, we still enable debug assertions for a better detection of
|
||||
# disk corruption on the cloud or in OSS.
|
||||
[profile.release.package.heed]
|
||||
debug-assertions = true
|
||||
|
||||
[profile.dev.package.flate2]
|
||||
opt-level = 3
|
||||
|
||||
@@ -43,6 +49,3 @@ opt-level = 3
|
||||
opt-level = 3
|
||||
[profile.dev.package.roaring]
|
||||
opt-level = 3
|
||||
|
||||
[patch.crates-io]
|
||||
roaring = { git = "https://github.com/RoaringBitmap/roaring-rs", branch = "clone-iter-slice" }
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Compile
|
||||
FROM rust:1.79.0-alpine3.20 AS compiler
|
||||
FROM rust:1.81.0-alpine3.20 AS compiler
|
||||
|
||||
RUN apk add -q --no-cache build-base openssl-dev
|
||||
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019-2024 Meili SAS
|
||||
Copyright (c) 2019-2025 Meili SAS
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
<p align="center">
|
||||
<a href="https://deps.rs/repo/github/meilisearch/meilisearch"><img src="https://deps.rs/repo/github/meilisearch/meilisearch/status.svg" alt="Dependency status"></a>
|
||||
<a href="https://github.com/meilisearch/meilisearch/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-informational" alt="License"></a>
|
||||
<a href="https://ms-bors.herokuapp.com/repositories/52"><img src="https://bors.tech/images/badge_small.svg" alt="Bors enabled"></a>
|
||||
<a href="https://github.com/meilisearch/meilisearch/queue"><img alt="Merge Queues enabled" src="https://img.shields.io/badge/Merge_Queues-enabled-%2357cf60?logo=github"></a>
|
||||
</p>
|
||||
|
||||
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
|
||||
@@ -58,6 +58,7 @@ See the list of all our example apps in our [demos repository](https://github.co
|
||||
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
|
||||
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
|
||||
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||
- **AI-ready:** works out of the box with [langchain](https://www.meilisearch.com/with/langchain) and the [model context protocol](https://github.com/meilisearch/meilisearch-mcp)
|
||||
- **Easy to install, deploy, and maintain**
|
||||
|
||||
## 📖 Documentation
|
||||
|
||||
@@ -1403,6 +1403,104 @@
|
||||
"title": "Number of tasks by indexes",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 15,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 11,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 51
|
||||
},
|
||||
"id": 29,
|
||||
"interval": "5s",
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "8.1.4",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"exemplar": true,
|
||||
"expr": "meilisearch_task_queue_latency_seconds{instance=\"$instance\", job=\"$job\"}",
|
||||
"interval": "",
|
||||
"legendFormat": "{{value}} ",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Task queue latency",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": true,
|
||||
"datasource": {
|
||||
|
||||
11
bors.toml
11
bors.toml
@@ -1,11 +0,0 @@
|
||||
status = [
|
||||
'Tests on ubuntu-20.04',
|
||||
'Tests on macos-13',
|
||||
'Tests on windows-2022',
|
||||
'Run Clippy',
|
||||
'Run Rustfmt',
|
||||
'Run tests in debug',
|
||||
]
|
||||
pr_status = ['Milestone Check']
|
||||
# 3 hours timeout
|
||||
timeout-sec = 10800
|
||||
@@ -11,27 +11,27 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.86"
|
||||
anyhow = "1.0.95"
|
||||
bumpalo = "3.16.0"
|
||||
csv = "1.3.0"
|
||||
csv = "1.3.1"
|
||||
memmap2 = "0.9.5"
|
||||
milli = { path = "../milli" }
|
||||
mimalloc = { version = "0.1.43", default-features = false }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
tempfile = "3.14.0"
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tempfile = "3.15.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
||||
rand = "0.8.5"
|
||||
rand_chacha = "0.3.1"
|
||||
roaring = "0.10.6"
|
||||
roaring = "0.10.10"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.86"
|
||||
bytes = "1.6.0"
|
||||
anyhow = "1.0.95"
|
||||
bytes = "1.9.0"
|
||||
convert_case = "0.6.0"
|
||||
flate2 = "1.0.30"
|
||||
reqwest = { version = "0.12.5", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
flate2 = "1.0.35"
|
||||
reqwest = { version = "0.12.12", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
|
||||
[features]
|
||||
default = ["milli/all-tokenizations"]
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -2,9 +2,10 @@ mod datasets_paths;
|
||||
mod utils;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use milli::{update::Settings, FilterableAttributesRule};
|
||||
use utils::Conf;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
@@ -20,8 +21,10 @@ fn base_conf(builder: &mut Settings) {
|
||||
["name", "alternatenames", "elevation"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
let filterable_fields =
|
||||
["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect();
|
||||
let filterable_fields = ["_geo", "population", "elevation"]
|
||||
.iter()
|
||||
.map(|s| FilterableAttributesRule::Field(s.to_string()))
|
||||
.collect();
|
||||
builder.set_filterable_fields(filterable_fields);
|
||||
|
||||
let sortable_fields =
|
||||
|
||||
@@ -2,9 +2,10 @@ mod datasets_paths;
|
||||
mod utils;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use milli::{update::Settings, FilterableAttributesRule};
|
||||
use utils::Conf;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
@@ -21,7 +22,7 @@ fn base_conf(builder: &mut Settings) {
|
||||
|
||||
let faceted_fields = ["released-timestamp", "duration-float", "genre", "country", "artist"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.map(|s| FilterableAttributesRule::Field(s.to_string()))
|
||||
.collect();
|
||||
builder.set_filterable_fields(faceted_fields);
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use utils::Conf;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
|
||||
@@ -10,8 +10,9 @@ use bumpalo::Bump;
|
||||
use criterion::BenchmarkId;
|
||||
use memmap2::Mmap;
|
||||
use milli::heed::EnvOpenOptions;
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||
use milli::update::{IndexerConfig, Settings};
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy};
|
||||
use serde_json::Value;
|
||||
@@ -64,10 +65,11 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
}
|
||||
create_dir_all(conf.database_name).unwrap();
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
let options = EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
options.max_readers(100);
|
||||
let index = Index::new(options, conf.database_name).unwrap();
|
||||
let index = Index::new(options, conf.database_name, true).unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
@@ -98,8 +100,8 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let documents = documents_from(conf.dataset, conf.dataset_format);
|
||||
let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
|
||||
indexer.add_documents(&documents).unwrap();
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
indexer.replace_documents(&documents).unwrap();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, _operation_stats, primary_key) = indexer
|
||||
@@ -110,13 +112,14 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -124,7 +127,7 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
&document_changes,
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -11,8 +11,8 @@ license.workspace = true
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
time = { version = "0.3.36", features = ["parsing"] }
|
||||
time = { version = "0.3.37", features = ["parsing"] }
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.86"
|
||||
vergen-git2 = "1.0.0"
|
||||
anyhow = "1.0.95"
|
||||
vergen-git2 = "1.0.2"
|
||||
|
||||
@@ -11,21 +11,21 @@ readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.86"
|
||||
flate2 = "1.0.30"
|
||||
http = "1.1.0"
|
||||
anyhow = "1.0.95"
|
||||
flate2 = "1.0.35"
|
||||
http = "1.2.0"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
once_cell = "1.19.0"
|
||||
regex = "1.10.5"
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
tar = "0.4.41"
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tracing = "0.1.40"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
once_cell = "1.20.2"
|
||||
regex = "1.11.1"
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tar = "0.4.43"
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tracing = "0.1.41"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
||||
@@ -10,8 +10,10 @@ dump
|
||||
├── instance-uid.uuid
|
||||
├── keys.jsonl
|
||||
├── metadata.json
|
||||
└── tasks
|
||||
├── update_files
|
||||
│ └── [task_id].jsonl
|
||||
├── tasks
|
||||
│ ├── update_files
|
||||
│ │ └── [task_id].jsonl
|
||||
│ └── queue.jsonl
|
||||
└── batches
|
||||
└── queue.jsonl
|
||||
```
|
||||
```
|
||||
|
||||
@@ -141,6 +141,9 @@ pub enum KindDump {
|
||||
instance_uid: Option<InstanceUid>,
|
||||
},
|
||||
SnapshotCreation,
|
||||
UpgradeDatabase {
|
||||
from: (u32, u32, u32),
|
||||
},
|
||||
}
|
||||
|
||||
impl From<Task> for TaskDump {
|
||||
@@ -210,6 +213,9 @@ impl From<KindWithContent> for KindDump {
|
||||
KindDump::DumpCreation { keys, instance_uid }
|
||||
}
|
||||
KindWithContent::SnapshotCreation => KindDump::SnapshotCreation,
|
||||
KindWithContent::UpgradeDatabase { from: version } => {
|
||||
KindDump::UpgradeDatabase { from: version }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -222,14 +228,16 @@ pub(crate) mod test {
|
||||
|
||||
use big_s::S;
|
||||
use maplit::{btreemap, btreeset};
|
||||
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
|
||||
use meilisearch_types::facet_values_sort::FacetValuesSort;
|
||||
use meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
use meilisearch_types::features::{Network, Remote, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::keys::{Action, Key};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::{self, FilterableAttributesRule};
|
||||
use meilisearch_types::settings::{Checked, FacetingSettings, Settings};
|
||||
use meilisearch_types::tasks::{Details, Status};
|
||||
use meilisearch_types::task_view::DetailsView;
|
||||
use meilisearch_types::tasks::{Details, Kind, Status};
|
||||
use serde_json::{json, Map, Value};
|
||||
use time::macros::datetime;
|
||||
use uuid::Uuid;
|
||||
@@ -271,7 +279,10 @@ pub(crate) mod test {
|
||||
let settings = Settings {
|
||||
displayed_attributes: Setting::Set(vec![S("race"), S("name")]).into(),
|
||||
searchable_attributes: Setting::Set(vec![S("name"), S("race")]).into(),
|
||||
filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }),
|
||||
filterable_attributes: Setting::Set(vec![
|
||||
FilterableAttributesRule::Field(S("race")),
|
||||
FilterableAttributesRule::Field(S("age")),
|
||||
]),
|
||||
sortable_attributes: Setting::Set(btreeset! { S("age") }),
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
@@ -299,6 +310,33 @@ pub(crate) mod test {
|
||||
settings.check()
|
||||
}
|
||||
|
||||
pub fn create_test_batches() -> Vec<Batch> {
|
||||
vec![Batch {
|
||||
uid: 0,
|
||||
details: DetailsView {
|
||||
received_documents: Some(12),
|
||||
indexed_documents: Some(Some(10)),
|
||||
..DetailsView::default()
|
||||
},
|
||||
progress: None,
|
||||
stats: BatchStats {
|
||||
total_nb_tasks: 1,
|
||||
status: maplit::btreemap! { Status::Succeeded => 1 },
|
||||
types: maplit::btreemap! { Kind::DocumentAdditionOrUpdate => 1 },
|
||||
index_uids: maplit::btreemap! { "doggo".to_string() => 1 },
|
||||
progress_trace: Default::default(),
|
||||
write_channel_congestion: None,
|
||||
internal_database_sizes: Default::default(),
|
||||
},
|
||||
enqueued_at: Some(BatchEnqueuedAt {
|
||||
earliest: datetime!(2022-11-11 0:00 UTC),
|
||||
oldest: datetime!(2022-11-11 0:00 UTC),
|
||||
}),
|
||||
started_at: datetime!(2022-11-20 0:00 UTC),
|
||||
finished_at: Some(datetime!(2022-11-21 0:00 UTC)),
|
||||
}]
|
||||
}
|
||||
|
||||
pub fn create_test_tasks() -> Vec<(TaskDump, Option<Vec<Document>>)> {
|
||||
vec![
|
||||
(
|
||||
@@ -421,6 +459,15 @@ pub(crate) mod test {
|
||||
index.flush().unwrap();
|
||||
index.settings(&settings).unwrap();
|
||||
|
||||
// ========== pushing the batch queue
|
||||
let batches = create_test_batches();
|
||||
|
||||
let mut batch_queue = dump.create_batches_queue().unwrap();
|
||||
for batch in &batches {
|
||||
batch_queue.push_batch(batch).unwrap();
|
||||
}
|
||||
batch_queue.flush().unwrap();
|
||||
|
||||
// ========== pushing the task queue
|
||||
let tasks = create_test_tasks();
|
||||
|
||||
@@ -449,6 +496,10 @@ pub(crate) mod test {
|
||||
|
||||
dump.create_experimental_features(features).unwrap();
|
||||
|
||||
// ========== network
|
||||
let network = create_test_network();
|
||||
dump.create_network(network).unwrap();
|
||||
|
||||
// create the dump
|
||||
let mut file = tempfile::tempfile().unwrap();
|
||||
dump.persist_to(&mut file).unwrap();
|
||||
@@ -458,7 +509,14 @@ pub(crate) mod test {
|
||||
}
|
||||
|
||||
fn create_test_features() -> RuntimeTogglableFeatures {
|
||||
RuntimeTogglableFeatures { vector_store: true, ..Default::default() }
|
||||
RuntimeTogglableFeatures::default()
|
||||
}
|
||||
|
||||
fn create_test_network() -> Network {
|
||||
Network {
|
||||
local: Some("myself".to_string()),
|
||||
remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()) }},
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -509,5 +567,9 @@ pub(crate) mod test {
|
||||
// ==== checking the features
|
||||
let expected = create_test_features();
|
||||
assert_eq!(dump.features().unwrap().unwrap(), expected);
|
||||
|
||||
// ==== checking the network
|
||||
let expected = create_test_network();
|
||||
assert_eq!(&expected, dump.network().unwrap().unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -196,6 +196,10 @@ impl CompatV5ToV6 {
|
||||
pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
pub fn network(&self) -> Result<Option<&v6::Network>> {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
pub enum CompatIndexV5ToV6 {
|
||||
@@ -318,7 +322,16 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
||||
v6::Settings {
|
||||
displayed_attributes: v6::Setting::from(settings.displayed_attributes).into(),
|
||||
searchable_attributes: v6::Setting::from(settings.searchable_attributes).into(),
|
||||
filterable_attributes: settings.filterable_attributes.into(),
|
||||
filterable_attributes: match settings.filterable_attributes {
|
||||
v5::settings::Setting::Set(filterable_attributes) => v6::Setting::Set(
|
||||
filterable_attributes
|
||||
.into_iter()
|
||||
.map(v6::FilterableAttributesRule::Field)
|
||||
.collect(),
|
||||
),
|
||||
v5::settings::Setting::Reset => v6::Setting::Reset,
|
||||
v5::settings::Setting::NotSet => v6::Setting::NotSet,
|
||||
},
|
||||
sortable_attributes: settings.sortable_attributes.into(),
|
||||
ranking_rules: {
|
||||
match settings.ranking_rules {
|
||||
|
||||
@@ -23,6 +23,7 @@ mod v6;
|
||||
pub type Document = serde_json::Map<String, serde_json::Value>;
|
||||
pub type UpdateFile = dyn Iterator<Item = Result<Document>>;
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum DumpReader {
|
||||
Current(V6Reader),
|
||||
Compat(CompatV5ToV6),
|
||||
@@ -101,6 +102,13 @@ impl DumpReader {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn batches(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Batch>> + '_>> {
|
||||
match self {
|
||||
DumpReader::Current(current) => Ok(current.batches()),
|
||||
DumpReader::Compat(_compat) => Ok(Box::new(std::iter::empty())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn keys(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Key>> + '_>> {
|
||||
match self {
|
||||
DumpReader::Current(current) => Ok(current.keys()),
|
||||
@@ -114,6 +122,13 @@ impl DumpReader {
|
||||
DumpReader::Compat(compat) => compat.features(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn network(&self) -> Result<Option<&v6::Network>> {
|
||||
match self {
|
||||
DumpReader::Current(current) => Ok(current.network()),
|
||||
DumpReader::Compat(compat) => compat.network(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<V6Reader> for DumpReader {
|
||||
@@ -219,6 +234,10 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
|
||||
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@@ -327,10 +346,8 @@ pub(crate) mod test {
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
dump.features().unwrap().unwrap(),
|
||||
RuntimeTogglableFeatures { vector_store: true, ..Default::default() }
|
||||
);
|
||||
assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default());
|
||||
assert_eq!(dump.network().unwrap(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -342,6 +359,10 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00");
|
||||
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@@ -373,10 +394,28 @@ pub(crate) mod test {
|
||||
|
||||
assert_eq!(test.documents().unwrap().count(), 1);
|
||||
|
||||
assert_eq!(
|
||||
dump.features().unwrap().unwrap(),
|
||||
RuntimeTogglableFeatures { vector_store: true, ..Default::default() }
|
||||
);
|
||||
assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn import_dump_v6_network() {
|
||||
let dump = File::open("tests/assets/v6-with-network.dump").unwrap();
|
||||
let dump = DumpReader::open(dump).unwrap();
|
||||
|
||||
// top level infos
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2025-01-29 15:45:32.738676 +00:00:00");
|
||||
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
|
||||
|
||||
// network
|
||||
|
||||
let network = dump.network().unwrap().unwrap();
|
||||
insta::assert_snapshot!(network.local.as_ref().unwrap(), @"ms-0");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().url, @"http://localhost:7700");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().search_api_key.is_none(), @"true");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().url, @"http://localhost:7701");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().search_api_key.is_none(), @"true");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().url, @"http://ms-5679.example.meilisearch.io");
|
||||
insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().search_api_key.as_ref().unwrap(), @"foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -388,6 +427,10 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
|
||||
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@@ -468,6 +511,10 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
|
||||
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@@ -545,6 +592,10 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
|
||||
assert_eq!(dump.instance_uid().unwrap(), None);
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@@ -638,6 +689,10 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
|
||||
assert_eq!(dump.instance_uid().unwrap(), None);
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@@ -731,6 +786,10 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
|
||||
assert_eq!(dump.instance_uid().unwrap(), None);
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
@@ -807,6 +866,10 @@ pub(crate) mod test {
|
||||
assert_eq!(dump.date(), None);
|
||||
assert_eq!(dump.instance_uid().unwrap(), None);
|
||||
|
||||
// batches didn't exists at the time
|
||||
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
source: crates/dump/src/reader/mod.rs
|
||||
expression: vector_index.settings().unwrap()
|
||||
---
|
||||
{
|
||||
@@ -49,6 +49,7 @@ expression: vector_index.settings().unwrap()
|
||||
"source": "huggingFace",
|
||||
"model": "BAAI/bge-base-en-v1.5",
|
||||
"revision": "617ca489d9e86b49b8167676d8220688b99db36e",
|
||||
"pooling": "forceMean",
|
||||
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}"
|
||||
}
|
||||
},
|
||||
|
||||
@@ -3,6 +3,7 @@ use std::io::{BufRead, BufReader, ErrorKind};
|
||||
use std::path::Path;
|
||||
|
||||
pub use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::vector::hf::OverridePooling;
|
||||
use tempfile::TempDir;
|
||||
use time::OffsetDateTime;
|
||||
use tracing::debug;
|
||||
@@ -18,8 +19,10 @@ pub type Checked = meilisearch_types::settings::Checked;
|
||||
pub type Unchecked = meilisearch_types::settings::Unchecked;
|
||||
|
||||
pub type Task = crate::TaskDump;
|
||||
pub type Batch = meilisearch_types::batches::Batch;
|
||||
pub type Key = meilisearch_types::keys::Key;
|
||||
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
pub type Network = meilisearch_types::features::Network;
|
||||
|
||||
// ===== Other types to clarify the code of the compat module
|
||||
// everything related to the tasks
|
||||
@@ -43,13 +46,17 @@ pub type ResponseError = meilisearch_types::error::ResponseError;
|
||||
pub type Code = meilisearch_types::error::Code;
|
||||
pub type RankingRuleView = meilisearch_types::settings::RankingRuleView;
|
||||
|
||||
pub type FilterableAttributesRule = meilisearch_types::milli::FilterableAttributesRule;
|
||||
|
||||
pub struct V6Reader {
|
||||
dump: TempDir,
|
||||
instance_uid: Option<Uuid>,
|
||||
metadata: Metadata,
|
||||
tasks: BufReader<File>,
|
||||
batches: Option<BufReader<File>>,
|
||||
keys: BufReader<File>,
|
||||
features: Option<RuntimeTogglableFeatures>,
|
||||
network: Option<Network>,
|
||||
}
|
||||
|
||||
impl V6Reader {
|
||||
@@ -77,13 +84,38 @@ impl V6Reader {
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let batches = match File::open(dump.path().join("batches").join("queue.jsonl")) {
|
||||
Ok(file) => Some(BufReader::new(file)),
|
||||
// The batch file was only introduced during the v1.13, anything prior to that won't have batches
|
||||
Err(err) if err.kind() == ErrorKind::NotFound => None,
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
|
||||
let network_file = match fs::read(dump.path().join("network.json")) {
|
||||
Ok(network_file) => Some(network_file),
|
||||
Err(error) => match error.kind() {
|
||||
// Allows the file to be missing, this will only result in all experimental features disabled.
|
||||
ErrorKind::NotFound => {
|
||||
debug!("`network.json` not found in dump");
|
||||
None
|
||||
}
|
||||
_ => return Err(error.into()),
|
||||
},
|
||||
};
|
||||
let network = if let Some(network_file) = network_file {
|
||||
Some(serde_json::from_reader(&*network_file)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(V6Reader {
|
||||
metadata: serde_json::from_reader(&*meta_file)?,
|
||||
instance_uid,
|
||||
tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?),
|
||||
batches,
|
||||
keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?),
|
||||
features,
|
||||
network,
|
||||
dump,
|
||||
})
|
||||
}
|
||||
@@ -124,7 +156,7 @@ impl V6Reader {
|
||||
&mut self,
|
||||
) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> {
|
||||
Box::new((&mut self.tasks).lines().map(|line| -> Result<_> {
|
||||
let task: Task = serde_json::from_str(&line?).unwrap();
|
||||
let task: Task = serde_json::from_str(&line?)?;
|
||||
|
||||
let update_file_path = self
|
||||
.dump
|
||||
@@ -136,8 +168,7 @@ impl V6Reader {
|
||||
if update_file_path.exists() {
|
||||
Ok((
|
||||
task,
|
||||
Some(Box::new(UpdateFile::new(&update_file_path).unwrap())
|
||||
as Box<super::UpdateFile>),
|
||||
Some(Box::new(UpdateFile::new(&update_file_path)?) as Box<super::UpdateFile>),
|
||||
))
|
||||
} else {
|
||||
Ok((task, None))
|
||||
@@ -145,6 +176,16 @@ impl V6Reader {
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn batches(&mut self) -> Box<dyn Iterator<Item = Result<Batch>> + '_> {
|
||||
match self.batches.as_mut() {
|
||||
Some(batches) => Box::new((batches).lines().map(|line| -> Result<_> {
|
||||
let batch = serde_json::from_str(&line?)?;
|
||||
Ok(batch)
|
||||
})),
|
||||
None => Box::new(std::iter::empty()) as Box<dyn Iterator<Item = Result<Batch>> + '_>,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Key>> + '_> {
|
||||
Box::new(
|
||||
(&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }),
|
||||
@@ -154,6 +195,10 @@ impl V6Reader {
|
||||
pub fn features(&self) -> Option<RuntimeTogglableFeatures> {
|
||||
self.features
|
||||
}
|
||||
|
||||
pub fn network(&self) -> Option<&Network> {
|
||||
self.network.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct UpdateFile {
|
||||
@@ -210,7 +255,29 @@ impl V6IndexReader {
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
||||
let settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
|
||||
let mut settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
|
||||
patch_embedders(&mut settings);
|
||||
Ok(settings.check())
|
||||
}
|
||||
}
|
||||
|
||||
fn patch_embedders(settings: &mut Settings<Unchecked>) {
|
||||
if let Setting::Set(embedders) = &mut settings.embedders {
|
||||
for settings in embedders.values_mut() {
|
||||
let Setting::Set(settings) = &mut settings.inner else {
|
||||
continue;
|
||||
};
|
||||
if settings.source != Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
settings.pooling = match settings.pooling {
|
||||
Setting::Set(pooling) => Setting::Set(pooling),
|
||||
// if the pooling for a hugging face embedder is not set, force it to `forceMean`
|
||||
// for backward compatibility with v1.13
|
||||
// dumps created in v1.14 and up will have the setting set for hugging face embedders
|
||||
Setting::Reset | Setting::NotSet => Setting::Set(OverridePooling::ForceMean),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,8 @@ use std::path::PathBuf;
|
||||
|
||||
use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::features::{Network, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::keys::Key;
|
||||
use meilisearch_types::settings::{Checked, Settings};
|
||||
use serde_json::{Map, Value};
|
||||
@@ -54,6 +55,10 @@ impl DumpWriter {
|
||||
TaskWriter::new(self.dir.path().join("tasks"))
|
||||
}
|
||||
|
||||
pub fn create_batches_queue(&self) -> Result<BatchWriter> {
|
||||
BatchWriter::new(self.dir.path().join("batches"))
|
||||
}
|
||||
|
||||
pub fn create_experimental_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
|
||||
Ok(std::fs::write(
|
||||
self.dir.path().join("experimental-features.json"),
|
||||
@@ -61,6 +66,10 @@ impl DumpWriter {
|
||||
)?)
|
||||
}
|
||||
|
||||
pub fn create_network(&self, network: Network) -> Result<()> {
|
||||
Ok(std::fs::write(self.dir.path().join("network.json"), serde_json::to_string(&network)?)?)
|
||||
}
|
||||
|
||||
pub fn persist_to(self, mut writer: impl Write) -> Result<()> {
|
||||
let gz_encoder = GzEncoder::new(&mut writer, Compression::default());
|
||||
let mut tar_encoder = tar::Builder::new(gz_encoder);
|
||||
@@ -84,7 +93,7 @@ impl KeyWriter {
|
||||
}
|
||||
|
||||
pub fn push_key(&mut self, key: &Key) -> Result<()> {
|
||||
self.keys.write_all(&serde_json::to_vec(key)?)?;
|
||||
serde_json::to_writer(&mut self.keys, &key)?;
|
||||
self.keys.write_all(b"\n")?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -114,7 +123,7 @@ impl TaskWriter {
|
||||
/// Pushes tasks in the dump.
|
||||
/// If the tasks has an associated `update_file` it'll use the `task_id` as its name.
|
||||
pub fn push_task(&mut self, task: &TaskDump) -> Result<UpdateFile> {
|
||||
self.queue.write_all(&serde_json::to_vec(task)?)?;
|
||||
serde_json::to_writer(&mut self.queue, &task)?;
|
||||
self.queue.write_all(b"\n")?;
|
||||
|
||||
Ok(UpdateFile::new(self.update_files.join(format!("{}.jsonl", task.uid))))
|
||||
@@ -126,6 +135,30 @@ impl TaskWriter {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BatchWriter {
|
||||
queue: BufWriter<File>,
|
||||
}
|
||||
|
||||
impl BatchWriter {
|
||||
pub(crate) fn new(path: PathBuf) -> Result<Self> {
|
||||
std::fs::create_dir(&path)?;
|
||||
let queue = File::create(path.join("queue.jsonl"))?;
|
||||
Ok(BatchWriter { queue: BufWriter::new(queue) })
|
||||
}
|
||||
|
||||
/// Pushes batches in the dump.
|
||||
pub fn push_batch(&mut self, batch: &Batch) -> Result<()> {
|
||||
serde_json::to_writer(&mut self.queue, &batch)?;
|
||||
self.queue.write_all(b"\n")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn flush(mut self) -> Result<()> {
|
||||
self.queue.flush()?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct UpdateFile {
|
||||
path: PathBuf,
|
||||
writer: Option<BufWriter<File>>,
|
||||
@@ -137,8 +170,8 @@ impl UpdateFile {
|
||||
}
|
||||
|
||||
pub fn push_document(&mut self, document: &Document) -> Result<()> {
|
||||
if let Some(writer) = self.writer.as_mut() {
|
||||
writer.write_all(&serde_json::to_vec(document)?)?;
|
||||
if let Some(mut writer) = self.writer.as_mut() {
|
||||
serde_json::to_writer(&mut writer, &document)?;
|
||||
writer.write_all(b"\n")?;
|
||||
} else {
|
||||
let file = File::create(&self.path).unwrap();
|
||||
@@ -205,8 +238,8 @@ pub(crate) mod test {
|
||||
use super::*;
|
||||
use crate::reader::Document;
|
||||
use crate::test::{
|
||||
create_test_api_keys, create_test_documents, create_test_dump, create_test_instance_uid,
|
||||
create_test_settings, create_test_tasks,
|
||||
create_test_api_keys, create_test_batches, create_test_documents, create_test_dump,
|
||||
create_test_instance_uid, create_test_settings, create_test_tasks,
|
||||
};
|
||||
|
||||
fn create_directory_hierarchy(dir: &Path) -> String {
|
||||
@@ -281,8 +314,10 @@ pub(crate) mod test {
|
||||
let dump_path = dump.path();
|
||||
|
||||
// ==== checking global file hierarchy (we want to be sure there isn't too many files or too few)
|
||||
insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r###"
|
||||
insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r"
|
||||
.
|
||||
├---- batches/
|
||||
│ └---- queue.jsonl
|
||||
├---- indexes/
|
||||
│ └---- doggos/
|
||||
│ │ ├---- documents.jsonl
|
||||
@@ -295,8 +330,9 @@ pub(crate) mod test {
|
||||
├---- experimental-features.json
|
||||
├---- instance_uid.uuid
|
||||
├---- keys.jsonl
|
||||
└---- metadata.json
|
||||
"###);
|
||||
├---- metadata.json
|
||||
└---- network.json
|
||||
");
|
||||
|
||||
// ==== checking the top level infos
|
||||
let metadata = fs::read_to_string(dump_path.join("metadata.json")).unwrap();
|
||||
@@ -349,6 +385,16 @@ pub(crate) mod test {
|
||||
}
|
||||
}
|
||||
|
||||
// ==== checking the batch queue
|
||||
let batches_queue = fs::read_to_string(dump_path.join("batches/queue.jsonl")).unwrap();
|
||||
for (batch, expected) in batches_queue.lines().zip(create_test_batches()) {
|
||||
let mut batch = serde_json::from_str::<Batch>(batch).unwrap();
|
||||
if batch.details.settings == Some(Box::new(Settings::<Unchecked>::default())) {
|
||||
batch.details.settings = None;
|
||||
}
|
||||
assert_eq!(batch, expected, "{batch:#?}{expected:#?}");
|
||||
}
|
||||
|
||||
// ==== checking the keys
|
||||
let keys = fs::read_to_string(dump_path.join("keys.jsonl")).unwrap();
|
||||
for (key, expected) in keys.lines().zip(create_test_api_keys()) {
|
||||
|
||||
BIN
crates/dump/tests/assets/v6-with-network.dump
Normal file
BIN
crates/dump/tests/assets/v6-with-network.dump
Normal file
Binary file not shown.
@@ -11,7 +11,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
tracing = "0.1.40"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
tracing = "0.1.41"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
|
||||
@@ -136,6 +136,14 @@ pub struct File {
|
||||
}
|
||||
|
||||
impl File {
|
||||
pub fn from_parts(path: PathBuf, file: Option<NamedTempFile>) -> Self {
|
||||
Self { path, file }
|
||||
}
|
||||
|
||||
pub fn into_parts(self) -> (PathBuf, Option<NamedTempFile>) {
|
||||
(self.path, self.file)
|
||||
}
|
||||
|
||||
pub fn dry_file() -> Result<Self> {
|
||||
Ok(Self { path: PathBuf::new(), file: None })
|
||||
}
|
||||
|
||||
@@ -17,4 +17,5 @@ nom_locate = "4.2.0"
|
||||
unescaper = "0.1.5"
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.39.0"
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = "=1.39.0"
|
||||
|
||||
@@ -30,6 +30,25 @@ pub enum Condition<'a> {
|
||||
StartsWith { keyword: Token<'a>, word: Token<'a> },
|
||||
}
|
||||
|
||||
impl Condition<'_> {
|
||||
pub fn operator(&self) -> &str {
|
||||
match self {
|
||||
Condition::GreaterThan(_) => ">",
|
||||
Condition::GreaterThanOrEqual(_) => ">=",
|
||||
Condition::Equal(_) => "=",
|
||||
Condition::NotEqual(_) => "!=",
|
||||
Condition::Null => "IS NULL",
|
||||
Condition::Empty => "IS EMPTY",
|
||||
Condition::Exists => "EXISTS",
|
||||
Condition::LowerThan(_) => "<",
|
||||
Condition::LowerThanOrEqual(_) => "<=",
|
||||
Condition::Between { .. } => "TO",
|
||||
Condition::Contains { .. } => "CONTAINS",
|
||||
Condition::StartsWith { .. } => "STARTS WITH",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// condition = value ("==" | ">" ...) value
|
||||
pub fn parse_condition(input: Span) -> IResult<FilterCondition> {
|
||||
let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("=")));
|
||||
|
||||
@@ -179,6 +179,26 @@ impl<'a> FilterCondition<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fids(&self, depth: usize) -> Box<dyn Iterator<Item = &Token> + '_> {
|
||||
if depth == 0 {
|
||||
return Box::new(std::iter::empty());
|
||||
}
|
||||
match self {
|
||||
FilterCondition::Condition { fid, .. } | FilterCondition::In { fid, .. } => {
|
||||
Box::new(std::iter::once(fid))
|
||||
}
|
||||
FilterCondition::Not(filter) => {
|
||||
let depth = depth.saturating_sub(1);
|
||||
filter.fids(depth)
|
||||
}
|
||||
FilterCondition::And(subfilters) | FilterCondition::Or(subfilters) => {
|
||||
let depth = depth.saturating_sub(1);
|
||||
Box::new(subfilters.iter().flat_map(move |f| f.fids(depth)))
|
||||
}
|
||||
_ => Box::new(std::iter::empty()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the first token found at the specified depth, `None` if no token at this depth.
|
||||
pub fn token_at_depth(&self, depth: usize) -> Option<&Token> {
|
||||
match self {
|
||||
@@ -978,6 +998,43 @@ pub mod tests {
|
||||
assert!(filter.token_at_depth(3).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fids() {
|
||||
let filter = Fc::parse("field = value").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
|
||||
assert_eq!(fids.len(), 1);
|
||||
assert_eq!(fids[0].value(), "field");
|
||||
|
||||
let filter = Fc::parse("field IN [1, 2, 3]").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
|
||||
assert_eq!(fids.len(), 1);
|
||||
assert_eq!(fids[0].value(), "field");
|
||||
|
||||
let filter = Fc::parse("field != value").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
|
||||
assert_eq!(fids.len(), 1);
|
||||
assert_eq!(fids[0].value(), "field");
|
||||
|
||||
let filter = Fc::parse("field1 = value1 AND field2 = value2").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
|
||||
assert_eq!(fids.len(), 2);
|
||||
assert!(fids[0].value() == "field1");
|
||||
assert!(fids[1].value() == "field2");
|
||||
|
||||
let filter = Fc::parse("field1 = value1 OR field2 = value2").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
|
||||
assert_eq!(fids.len(), 2);
|
||||
assert!(fids[0].value() == "field1");
|
||||
assert!(fids[1].value() == "field2");
|
||||
|
||||
let depth = 2;
|
||||
let filter =
|
||||
Fc::parse("field1 = value1 AND (field2 = value2 OR field3 = value3)").unwrap().unwrap();
|
||||
let fids: Vec<_> = filter.fids(depth).collect();
|
||||
assert_eq!(fids.len(), 1);
|
||||
assert_eq!(fids[0].value(), "field1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn token_from_str() {
|
||||
let s = "test string that should not be parsed";
|
||||
|
||||
@@ -11,12 +11,12 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
arbitrary = { version = "1.3.2", features = ["derive"] }
|
||||
arbitrary = { version = "1.4.1", features = ["derive"] }
|
||||
bumpalo = "3.16.0"
|
||||
clap = { version = "4.5.9", features = ["derive"] }
|
||||
clap = { version = "4.5.24", features = ["derive"] }
|
||||
either = "1.13.0"
|
||||
fastrand = "2.1.0"
|
||||
fastrand = "2.3.0"
|
||||
milli = { path = "../milli" }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
tempfile = "3.10.1"
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tempfile = "3.15.0"
|
||||
|
||||
@@ -10,8 +10,9 @@ use either::Either;
|
||||
use fuzzers::Operation;
|
||||
use milli::documents::mmap_from_objects;
|
||||
use milli::heed::EnvOpenOptions;
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexDocumentsMethod, IndexerConfig};
|
||||
use milli::update::IndexerConfig;
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::Index;
|
||||
use serde_json::Value;
|
||||
@@ -56,13 +57,14 @@ fn main() {
|
||||
let opt = opt.clone();
|
||||
|
||||
let handle = std::thread::spawn(move || {
|
||||
let mut options = EnvOpenOptions::new();
|
||||
let options = EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
options.map_size(1024 * 1024 * 1024 * 1024);
|
||||
let tempdir = match opt.path {
|
||||
Some(path) => TempDir::new_in(path).unwrap(),
|
||||
None => TempDir::new().unwrap(),
|
||||
};
|
||||
let index = Index::new(options, tempdir.path()).unwrap();
|
||||
let index = Index::new(options, tempdir.path(), true).unwrap();
|
||||
let indexer_config = IndexerConfig::default();
|
||||
|
||||
std::thread::scope(|s| {
|
||||
@@ -88,9 +90,7 @@ fn main() {
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let embedders = EmbeddingConfigs::default();
|
||||
let mut indexer = indexer::DocumentOperation::new(
|
||||
IndexDocumentsMethod::ReplaceDocuments,
|
||||
);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
|
||||
let mut operations = Vec::new();
|
||||
for op in batch.0 {
|
||||
@@ -114,7 +114,7 @@ fn main() {
|
||||
for op in &operations {
|
||||
match op {
|
||||
Either::Left(documents) => {
|
||||
indexer.add_documents(documents).unwrap()
|
||||
indexer.replace_documents(documents).unwrap()
|
||||
}
|
||||
Either::Right(ids) => indexer.delete_documents(ids),
|
||||
}
|
||||
@@ -128,13 +128,14 @@ fn main() {
|
||||
None,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
&|_progress| (),
|
||||
Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
@@ -142,7 +143,7 @@ fn main() {
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| false,
|
||||
&|_| (),
|
||||
&Progress::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -11,41 +11,44 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.86"
|
||||
anyhow = "1.0.95"
|
||||
bincode = "1.3.3"
|
||||
csv = "1.3.0"
|
||||
derive_builder = "0.20.0"
|
||||
byte-unit = "5.1.6"
|
||||
bumpalo = "3.16.0"
|
||||
bumparaw-collections = "0.1.4"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.3.1"
|
||||
derive_builder = "0.20.2"
|
||||
dump = { path = "../dump" }
|
||||
enum-iterator = "2.1.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.30"
|
||||
flate2 = "1.0.35"
|
||||
indexmap = "2.7.0"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
memmap2 = "0.9.5"
|
||||
page_size = "0.6.0"
|
||||
raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
|
||||
rayon = "1.10.0"
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.138", features = ["preserve_order"] }
|
||||
synchronoise = "1.0.1"
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
memmap2 = "0.9.4"
|
||||
time = { version = "0.3.36", features = [
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tracing = "0.1.40"
|
||||
ureq = "2.10.0"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
bumpalo = "3.16.0"
|
||||
tracing = "0.1.41"
|
||||
ureq = "2.12.1"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
arroy = "0.5.0"
|
||||
big_s = "1.0.2"
|
||||
crossbeam = "0.8.4"
|
||||
insta = { version = "1.39.0", features = ["json", "redactions"] }
|
||||
crossbeam-channel = "0.5.14"
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = { version = "=1.39.0", features = ["json", "redactions"] }
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
304
crates/index-scheduler/src/dump.rs
Normal file
304
crates/index-scheduler/src/dump.rs
Normal file
@@ -0,0 +1,304 @@
|
||||
use std::collections::HashMap;
|
||||
use std::io;
|
||||
|
||||
use dump::{KindDump, TaskDump, UpdateFile};
|
||||
use meilisearch_types::batches::{Batch, BatchId};
|
||||
use meilisearch_types::heed::RwTxn;
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{utils, Error, IndexScheduler, Result};
|
||||
|
||||
pub struct Dump<'a> {
|
||||
index_scheduler: &'a IndexScheduler,
|
||||
wtxn: RwTxn<'a>,
|
||||
|
||||
batch_to_task_mapping: HashMap<BatchId, RoaringBitmap>,
|
||||
|
||||
indexes: HashMap<String, RoaringBitmap>,
|
||||
statuses: HashMap<Status, RoaringBitmap>,
|
||||
kinds: HashMap<Kind, RoaringBitmap>,
|
||||
|
||||
batch_indexes: HashMap<String, RoaringBitmap>,
|
||||
batch_statuses: HashMap<Status, RoaringBitmap>,
|
||||
batch_kinds: HashMap<Kind, RoaringBitmap>,
|
||||
}
|
||||
|
||||
impl<'a> Dump<'a> {
|
||||
pub(crate) fn new(index_scheduler: &'a mut IndexScheduler) -> Result<Self> {
|
||||
// While loading a dump no one should be able to access the scheduler thus I can block everything.
|
||||
let wtxn = index_scheduler.env.write_txn()?;
|
||||
|
||||
Ok(Dump {
|
||||
index_scheduler,
|
||||
wtxn,
|
||||
batch_to_task_mapping: HashMap::new(),
|
||||
indexes: HashMap::new(),
|
||||
statuses: HashMap::new(),
|
||||
kinds: HashMap::new(),
|
||||
batch_indexes: HashMap::new(),
|
||||
batch_statuses: HashMap::new(),
|
||||
batch_kinds: HashMap::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Register a new batch coming from a dump in the scheduler.
|
||||
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
|
||||
pub fn register_dumped_batch(&mut self, batch: Batch) -> Result<()> {
|
||||
self.index_scheduler.queue.batches.all_batches.put(&mut self.wtxn, &batch.uid, &batch)?;
|
||||
if let Some(enqueued_at) = batch.enqueued_at {
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.batches.enqueued_at,
|
||||
enqueued_at.earliest,
|
||||
batch.uid,
|
||||
)?;
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.batches.enqueued_at,
|
||||
enqueued_at.oldest,
|
||||
batch.uid,
|
||||
)?;
|
||||
}
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.batches.started_at,
|
||||
batch.started_at,
|
||||
batch.uid,
|
||||
)?;
|
||||
if let Some(finished_at) = batch.finished_at {
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.batches.finished_at,
|
||||
finished_at,
|
||||
batch.uid,
|
||||
)?;
|
||||
}
|
||||
|
||||
for index in batch.stats.index_uids.keys() {
|
||||
match self.batch_indexes.get_mut(index) {
|
||||
Some(bitmap) => {
|
||||
bitmap.insert(batch.uid);
|
||||
}
|
||||
None => {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
bitmap.insert(batch.uid);
|
||||
self.batch_indexes.insert(index.to_string(), bitmap);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
for status in batch.stats.status.keys() {
|
||||
self.batch_statuses.entry(*status).or_default().insert(batch.uid);
|
||||
}
|
||||
for kind in batch.stats.types.keys() {
|
||||
self.batch_kinds.entry(*kind).or_default().insert(batch.uid);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Register a new task coming from a dump in the scheduler.
|
||||
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
|
||||
pub fn register_dumped_task(
|
||||
&mut self,
|
||||
task: TaskDump,
|
||||
content_file: Option<Box<UpdateFile>>,
|
||||
) -> Result<Task> {
|
||||
let task_has_no_docs = matches!(task.kind, KindDump::DocumentImport { documents_count, .. } if documents_count == 0);
|
||||
|
||||
let content_uuid = match content_file {
|
||||
Some(content_file) if task.status == Status::Enqueued => {
|
||||
let (uuid, file) = self.index_scheduler.queue.create_update_file(false)?;
|
||||
let mut writer = io::BufWriter::new(file);
|
||||
for doc in content_file {
|
||||
let doc = doc?;
|
||||
serde_json::to_writer(&mut writer, &doc).map_err(|e| {
|
||||
Error::from_milli(milli::InternalError::SerdeJson(e).into(), None)
|
||||
})?;
|
||||
}
|
||||
let file = writer.into_inner().map_err(|e| e.into_error())?;
|
||||
file.persist()?;
|
||||
|
||||
Some(uuid)
|
||||
}
|
||||
// If the task isn't `Enqueued` then just generate a recognisable `Uuid`
|
||||
// in case we try to open it later.
|
||||
_ if task.status != Status::Enqueued => Some(Uuid::nil()),
|
||||
None if task.status == Status::Enqueued && task_has_no_docs => {
|
||||
let (uuid, file) = self.index_scheduler.queue.create_update_file(false)?;
|
||||
file.persist()?;
|
||||
|
||||
Some(uuid)
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let task = Task {
|
||||
uid: task.uid,
|
||||
batch_uid: task.batch_uid,
|
||||
enqueued_at: task.enqueued_at,
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
error: task.error,
|
||||
canceled_by: task.canceled_by,
|
||||
details: task.details,
|
||||
status: task.status,
|
||||
kind: match task.kind {
|
||||
KindDump::DocumentImport {
|
||||
primary_key,
|
||||
method,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
} => KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
method,
|
||||
content_file: content_uuid.ok_or(Error::CorruptedDump)?,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
},
|
||||
KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion {
|
||||
documents_ids,
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::DocumentDeletionByFilter { filter } => {
|
||||
KindWithContent::DocumentDeletionByFilter {
|
||||
filter_expr: filter,
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
}
|
||||
}
|
||||
KindDump::DocumentEdition { filter, context, function } => {
|
||||
KindWithContent::DocumentEdition {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
filter_expr: filter,
|
||||
context,
|
||||
function,
|
||||
}
|
||||
}
|
||||
KindDump::DocumentClear => KindWithContent::DocumentClear {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::Settings { settings, is_deletion, allow_index_creation } => {
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
new_settings: settings,
|
||||
is_deletion,
|
||||
allow_index_creation,
|
||||
}
|
||||
}
|
||||
KindDump::IndexDeletion => KindWithContent::IndexDeletion {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
},
|
||||
KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
},
|
||||
KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps },
|
||||
KindDump::TaskCancelation { query, tasks } => {
|
||||
KindWithContent::TaskCancelation { query, tasks }
|
||||
}
|
||||
KindDump::TasksDeletion { query, tasks } => {
|
||||
KindWithContent::TaskDeletion { query, tasks }
|
||||
}
|
||||
KindDump::DumpCreation { keys, instance_uid } => {
|
||||
KindWithContent::DumpCreation { keys, instance_uid }
|
||||
}
|
||||
KindDump::SnapshotCreation => KindWithContent::SnapshotCreation,
|
||||
KindDump::UpgradeDatabase { from } => KindWithContent::UpgradeDatabase { from },
|
||||
},
|
||||
};
|
||||
|
||||
self.index_scheduler.queue.tasks.all_tasks.put(&mut self.wtxn, &task.uid, &task)?;
|
||||
if let Some(batch_id) = task.batch_uid {
|
||||
self.batch_to_task_mapping.entry(batch_id).or_default().insert(task.uid);
|
||||
}
|
||||
|
||||
for index in task.indexes() {
|
||||
match self.indexes.get_mut(index) {
|
||||
Some(bitmap) => {
|
||||
bitmap.insert(task.uid);
|
||||
}
|
||||
None => {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
bitmap.insert(task.uid);
|
||||
self.indexes.insert(index.to_string(), bitmap);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.tasks.enqueued_at,
|
||||
task.enqueued_at,
|
||||
task.uid,
|
||||
)?;
|
||||
|
||||
// we can't override the started_at & finished_at, so we must only set it if the tasks is finished and won't change
|
||||
if matches!(task.status, Status::Succeeded | Status::Failed | Status::Canceled) {
|
||||
if let Some(started_at) = task.started_at {
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.tasks.started_at,
|
||||
started_at,
|
||||
task.uid,
|
||||
)?;
|
||||
}
|
||||
if let Some(finished_at) = task.finished_at {
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.queue.tasks.finished_at,
|
||||
finished_at,
|
||||
task.uid,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
self.statuses.entry(task.status).or_default().insert(task.uid);
|
||||
self.kinds.entry(task.kind.as_kind()).or_default().insert(task.uid);
|
||||
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
/// Commit all the changes and exit the importing dump state
|
||||
pub fn finish(mut self) -> Result<()> {
|
||||
for (batch_id, task_ids) in self.batch_to_task_mapping {
|
||||
self.index_scheduler.queue.batch_to_tasks_mapping.put(
|
||||
&mut self.wtxn,
|
||||
&batch_id,
|
||||
&task_ids,
|
||||
)?;
|
||||
}
|
||||
|
||||
for (index, bitmap) in self.indexes {
|
||||
self.index_scheduler.queue.tasks.index_tasks.put(&mut self.wtxn, &index, &bitmap)?;
|
||||
}
|
||||
for (status, bitmap) in self.statuses {
|
||||
self.index_scheduler.queue.tasks.put_status(&mut self.wtxn, status, &bitmap)?;
|
||||
}
|
||||
for (kind, bitmap) in self.kinds {
|
||||
self.index_scheduler.queue.tasks.put_kind(&mut self.wtxn, kind, &bitmap)?;
|
||||
}
|
||||
|
||||
for (index, bitmap) in self.batch_indexes {
|
||||
self.index_scheduler.queue.batches.index_tasks.put(&mut self.wtxn, &index, &bitmap)?;
|
||||
}
|
||||
for (status, bitmap) in self.batch_statuses {
|
||||
self.index_scheduler.queue.batches.put_status(&mut self.wtxn, status, &bitmap)?;
|
||||
}
|
||||
for (kind, bitmap) in self.batch_kinds {
|
||||
self.index_scheduler.queue.batches.put_kind(&mut self.wtxn, kind, &bitmap)?;
|
||||
}
|
||||
|
||||
self.wtxn.commit()?;
|
||||
self.index_scheduler.scheduler.wake_up.signal();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -104,11 +104,13 @@ pub enum Error {
|
||||
)]
|
||||
InvalidTaskCanceledBy { canceled_by: String },
|
||||
#[error(
|
||||
"{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes."
|
||||
"{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 400 bytes."
|
||||
)]
|
||||
InvalidIndexUid { index_uid: String },
|
||||
#[error("Task `{0}` not found.")]
|
||||
TaskNotFound(TaskId),
|
||||
#[error("Task `{0}` does not contain any documents. Only `documentAdditionOrUpdate` tasks with the statuses `enqueued` or `processing` contain documents")]
|
||||
TaskFileNotFound(TaskId),
|
||||
#[error("Batch `{0}` not found.")]
|
||||
BatchNotFound(BatchId),
|
||||
#[error("Query parameters to filter the tasks to delete are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")]
|
||||
@@ -122,10 +124,13 @@ pub enum Error {
|
||||
Dump(#[from] dump::Error),
|
||||
#[error(transparent)]
|
||||
Heed(#[from] heed::Error),
|
||||
#[error(transparent)]
|
||||
Milli(#[from] milli::Error),
|
||||
#[error("An unexpected crash occurred when processing the task.")]
|
||||
ProcessBatchPanicked,
|
||||
#[error("{}", match .index_uid {
|
||||
Some(uid) if !uid.is_empty() => format!("Index `{}`: {error}", uid),
|
||||
_ => format!("{error}")
|
||||
})]
|
||||
Milli { error: milli::Error, index_uid: Option<String> },
|
||||
#[error("An unexpected crash occurred when processing the task: {0}")]
|
||||
ProcessBatchPanicked(String),
|
||||
#[error(transparent)]
|
||||
FileStore(#[from] file_store::Error),
|
||||
#[error(transparent)]
|
||||
@@ -144,7 +149,9 @@ pub enum Error {
|
||||
#[error("Corrupted task queue.")]
|
||||
CorruptedTaskQueue,
|
||||
#[error(transparent)]
|
||||
TaskDatabaseUpdate(Box<Self>),
|
||||
DatabaseUpgrade(Box<Self>),
|
||||
#[error(transparent)]
|
||||
UnrecoverableError(Box<Self>),
|
||||
#[error(transparent)]
|
||||
HeedTransaction(heed::Error),
|
||||
|
||||
@@ -184,14 +191,15 @@ impl Error {
|
||||
| Error::InvalidTaskCanceledBy { .. }
|
||||
| Error::InvalidIndexUid { .. }
|
||||
| Error::TaskNotFound(_)
|
||||
| Error::TaskFileNotFound(_)
|
||||
| Error::BatchNotFound(_)
|
||||
| Error::TaskDeletionWithEmptyQuery
|
||||
| Error::TaskCancelationWithEmptyQuery
|
||||
| Error::AbortedTask
|
||||
| Error::Dump(_)
|
||||
| Error::Heed(_)
|
||||
| Error::Milli(_)
|
||||
| Error::ProcessBatchPanicked
|
||||
| Error::Milli { .. }
|
||||
| Error::ProcessBatchPanicked(_)
|
||||
| Error::FileStore(_)
|
||||
| Error::IoError(_)
|
||||
| Error::Persist(_)
|
||||
@@ -199,7 +207,8 @@ impl Error {
|
||||
| Error::Anyhow(_) => true,
|
||||
Error::CreateBatch(_)
|
||||
| Error::CorruptedTaskQueue
|
||||
| Error::TaskDatabaseUpdate(_)
|
||||
| Error::DatabaseUpgrade(_)
|
||||
| Error::UnrecoverableError(_)
|
||||
| Error::HeedTransaction(_) => false,
|
||||
#[cfg(test)]
|
||||
Error::PlannedFailure => false,
|
||||
@@ -209,6 +218,20 @@ impl Error {
|
||||
pub fn with_custom_error_code(self, code: Code) -> Self {
|
||||
Self::WithCustomErrorCode(code, Box::new(self))
|
||||
}
|
||||
|
||||
pub fn from_milli(err: milli::Error, index_uid: Option<String>) -> Self {
|
||||
match err {
|
||||
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
|
||||
Self::Milli { error: err, index_uid }
|
||||
.with_custom_error_code(Code::InvalidDocumentFilter)
|
||||
}
|
||||
milli::Error::UserError(milli::UserError::InvalidFilterExpression { .. }) => {
|
||||
Self::Milli { error: err, index_uid }
|
||||
.with_custom_error_code(Code::InvalidDocumentFilter)
|
||||
}
|
||||
_ => Self::Milli { error: err, index_uid },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ErrorCode for Error {
|
||||
@@ -230,14 +253,15 @@ impl ErrorCode for Error {
|
||||
Error::InvalidTaskCanceledBy { .. } => Code::InvalidTaskCanceledBy,
|
||||
Error::InvalidIndexUid { .. } => Code::InvalidIndexUid,
|
||||
Error::TaskNotFound(_) => Code::TaskNotFound,
|
||||
Error::TaskFileNotFound(_) => Code::TaskFileNotFound,
|
||||
Error::BatchNotFound(_) => Code::BatchNotFound,
|
||||
Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters,
|
||||
Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters,
|
||||
// TODO: not sure of the Code to use
|
||||
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
|
||||
Error::Dump(e) => e.error_code(),
|
||||
Error::Milli(e) => e.error_code(),
|
||||
Error::ProcessBatchPanicked => Code::Internal,
|
||||
Error::Milli { error, .. } => error.error_code(),
|
||||
Error::ProcessBatchPanicked(_) => Code::Internal,
|
||||
Error::Heed(e) => e.error_code(),
|
||||
Error::HeedTransaction(e) => e.error_code(),
|
||||
Error::FileStore(e) => e.error_code(),
|
||||
@@ -249,7 +273,8 @@ impl ErrorCode for Error {
|
||||
Error::Anyhow(_) => Code::Internal,
|
||||
Error::CorruptedTaskQueue => Code::Internal,
|
||||
Error::CorruptedDump => Code::Internal,
|
||||
Error::TaskDatabaseUpdate(_) => Code::Internal,
|
||||
Error::DatabaseUpgrade(_) => Code::Internal,
|
||||
Error::UnrecoverableError(_) => Code::Internal,
|
||||
Error::CreateBatch(_) => Code::Internal,
|
||||
|
||||
// This one should never be seen by the end user
|
||||
|
||||
@@ -1,18 +1,29 @@
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RwTxn};
|
||||
use meilisearch_types::heed::{Database, Env, RwTxn, WithoutTls};
|
||||
|
||||
use crate::error::FeatureNotEnabledError;
|
||||
use crate::Result;
|
||||
|
||||
const EXPERIMENTAL_FEATURES: &str = "experimental-features";
|
||||
/// The number of database used by features
|
||||
const NUMBER_OF_DATABASES: u32 = 1;
|
||||
/// Database const names for the `FeatureData`.
|
||||
mod db_name {
|
||||
pub const EXPERIMENTAL_FEATURES: &str = "experimental-features";
|
||||
}
|
||||
|
||||
mod db_keys {
|
||||
pub const EXPERIMENTAL_FEATURES: &str = "experimental-features";
|
||||
pub const NETWORK: &str = "network";
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct FeatureData {
|
||||
persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
|
||||
runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
|
||||
network: Arc<RwLock<Network>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
@@ -56,19 +67,6 @@ impl RoFeatures {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_vector(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.vector_store {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action,
|
||||
feature: "vector store",
|
||||
issue_link: "https://github.com/meilisearch/product/discussions/677",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_edit_documents_by_function(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.edit_documents_by_function {
|
||||
Ok(())
|
||||
@@ -94,17 +92,62 @@ impl RoFeatures {
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_network(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.network {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action,
|
||||
feature: "network",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/805",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_get_task_documents_route(&self) -> Result<()> {
|
||||
if self.runtime.get_task_documents_route {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "Getting the documents of an enqueued task",
|
||||
feature: "get task documents route",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/808",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_composite_embedders(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.composite_embedders {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action,
|
||||
feature: "composite embedders",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/816",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FeatureData {
|
||||
pub fn new(env: &Env, instance_features: InstanceTogglableFeatures) -> Result<Self> {
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let runtime_features_db = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
|
||||
wtxn.commit()?;
|
||||
pub(crate) const fn nb_db() -> u32 {
|
||||
NUMBER_OF_DATABASES
|
||||
}
|
||||
|
||||
pub fn new(
|
||||
env: &Env<WithoutTls>,
|
||||
wtxn: &mut RwTxn,
|
||||
instance_features: InstanceTogglableFeatures,
|
||||
) -> Result<Self> {
|
||||
let runtime_features_db =
|
||||
env.create_database(wtxn, Some(db_name::EXPERIMENTAL_FEATURES))?;
|
||||
|
||||
let txn = env.read_txn()?;
|
||||
let persisted_features: RuntimeTogglableFeatures =
|
||||
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
|
||||
runtime_features_db.get(wtxn, db_keys::EXPERIMENTAL_FEATURES)?.unwrap_or_default();
|
||||
let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features;
|
||||
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
|
||||
metrics: metrics || persisted_features.metrics,
|
||||
@@ -113,7 +156,14 @@ impl FeatureData {
|
||||
..persisted_features
|
||||
}));
|
||||
|
||||
Ok(Self { persisted: runtime_features_db, runtime })
|
||||
let network_db = runtime_features_db.remap_data_type::<SerdeJson<Network>>();
|
||||
let network: Network = network_db.get(wtxn, db_keys::NETWORK)?.unwrap_or_default();
|
||||
|
||||
Ok(Self {
|
||||
persisted: runtime_features_db,
|
||||
runtime,
|
||||
network: Arc::new(RwLock::new(network)),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn put_runtime_features(
|
||||
@@ -121,7 +171,7 @@ impl FeatureData {
|
||||
mut wtxn: RwTxn,
|
||||
features: RuntimeTogglableFeatures,
|
||||
) -> Result<()> {
|
||||
self.persisted.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
|
||||
self.persisted.put(&mut wtxn, db_keys::EXPERIMENTAL_FEATURES, &features)?;
|
||||
wtxn.commit()?;
|
||||
|
||||
// safe to unwrap, the lock will only fail if:
|
||||
@@ -142,4 +192,21 @@ impl FeatureData {
|
||||
pub fn features(&self) -> RoFeatures {
|
||||
RoFeatures::new(self)
|
||||
}
|
||||
|
||||
pub fn put_network(&self, mut wtxn: RwTxn, new_network: Network) -> Result<()> {
|
||||
self.persisted.remap_data_type::<SerdeJson<Network>>().put(
|
||||
&mut wtxn,
|
||||
db_keys::NETWORK,
|
||||
&new_network,
|
||||
)?;
|
||||
wtxn.commit()?;
|
||||
|
||||
let mut network = self.network.write().unwrap();
|
||||
*network = new_network;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn network(&self) -> Network {
|
||||
Network::clone(&*self.network.read().unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,16 +1,17 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::env::VarError;
|
||||
use std::path::Path;
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
|
||||
use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions};
|
||||
use meilisearch_types::milli::Index;
|
||||
use meilisearch_types::milli::{Index, Result};
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::IndexStatus::{self, Available, BeingDeleted, Closing, Missing};
|
||||
use crate::clamp_to_page_size;
|
||||
use crate::lru::{InsertionOutcome, LruMap};
|
||||
use crate::{clamp_to_page_size, Result};
|
||||
|
||||
/// Keep an internally consistent view of the open indexes in memory.
|
||||
///
|
||||
/// This view is made of an LRU cache that will evict the least frequently used indexes when new indexes are opened.
|
||||
@@ -103,7 +104,7 @@ impl ReopenableIndex {
|
||||
return Ok(());
|
||||
}
|
||||
map.unavailable.remove(&self.uuid);
|
||||
map.create(&self.uuid, path, None, self.enable_mdb_writemap, self.map_size)?;
|
||||
map.create(&self.uuid, path, None, self.enable_mdb_writemap, self.map_size, false)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -172,11 +173,12 @@ impl IndexMap {
|
||||
date: Option<(OffsetDateTime, OffsetDateTime)>,
|
||||
enable_mdb_writemap: bool,
|
||||
map_size: usize,
|
||||
creation: bool,
|
||||
) -> Result<Index> {
|
||||
if !matches!(self.get_unavailable(uuid), Missing) {
|
||||
panic!("Attempt to open an index that was unavailable");
|
||||
}
|
||||
let index = create_or_open_index(path, date, enable_mdb_writemap, map_size)?;
|
||||
let index = create_or_open_index(path, date, enable_mdb_writemap, map_size, creation)?;
|
||||
match self.available.insert(*uuid, index.clone()) {
|
||||
InsertionOutcome::InsertedNew => (),
|
||||
InsertionOutcome::Evicted(evicted_uuid, evicted_index) => {
|
||||
@@ -300,18 +302,31 @@ fn create_or_open_index(
|
||||
date: Option<(OffsetDateTime, OffsetDateTime)>,
|
||||
enable_mdb_writemap: bool,
|
||||
map_size: usize,
|
||||
creation: bool,
|
||||
) -> Result<Index> {
|
||||
let mut options = EnvOpenOptions::new();
|
||||
let options = EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
options.map_size(clamp_to_page_size(map_size));
|
||||
options.max_readers(1024);
|
||||
|
||||
// You can find more details about this experimental
|
||||
// environment variable on the following GitHub discussion:
|
||||
// <https://github.com/orgs/meilisearch/discussions/806>
|
||||
let max_readers = match std::env::var("MEILI_EXPERIMENTAL_INDEX_MAX_READERS") {
|
||||
Ok(value) => u32::from_str(&value).unwrap(),
|
||||
Err(VarError::NotPresent) => 1024,
|
||||
Err(VarError::NotUnicode(value)) => panic!(
|
||||
"Invalid unicode for the `MEILI_EXPERIMENTAL_INDEX_MAX_READERS` env var: {value:?}"
|
||||
),
|
||||
};
|
||||
options.max_readers(max_readers);
|
||||
if enable_mdb_writemap {
|
||||
unsafe { options.flags(EnvFlags::WRITE_MAP) };
|
||||
}
|
||||
|
||||
if let Some((created, updated)) = date {
|
||||
Ok(Index::new_with_creation_dates(options, path, created, updated)?)
|
||||
Ok(Index::new_with_creation_dates(options, path, created, updated, creation)?)
|
||||
} else {
|
||||
Ok(Index::new(options, path)?)
|
||||
Ok(Index::new(options, path, creation)?)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -319,17 +334,17 @@ fn create_or_open_index(
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use meilisearch_types::heed::Env;
|
||||
use meilisearch_types::heed::{Env, WithoutTls};
|
||||
use meilisearch_types::Index;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::super::IndexMapper;
|
||||
use crate::tests::IndexSchedulerHandle;
|
||||
use crate::test_utils::IndexSchedulerHandle;
|
||||
use crate::utils::clamp_to_page_size;
|
||||
use crate::IndexScheduler;
|
||||
|
||||
impl IndexMapper {
|
||||
fn test() -> (Self, Env, IndexSchedulerHandle) {
|
||||
fn test() -> (Self, Env<WithoutTls>, IndexSchedulerHandle) {
|
||||
let (index_scheduler, handle) = IndexScheduler::test(true, vec![]);
|
||||
(index_scheduler.index_mapper, index_scheduler.env, handle)
|
||||
}
|
||||
|
||||
@@ -4,7 +4,9 @@ use std::time::Duration;
|
||||
use std::{fs, thread};
|
||||
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::database_stats::DatabaseStats;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::{FieldDistribution, Index};
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -15,12 +17,17 @@ use uuid::Uuid;
|
||||
use self::index_map::IndexMap;
|
||||
use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
|
||||
use crate::uuid_codec::UuidCodec;
|
||||
use crate::{Error, Result};
|
||||
use crate::{Error, IndexBudget, IndexSchedulerOptions, Result};
|
||||
|
||||
mod index_map;
|
||||
|
||||
const INDEX_MAPPING: &str = "index-mapping";
|
||||
const INDEX_STATS: &str = "index-stats";
|
||||
/// The number of database used by index mapper
|
||||
const NUMBER_OF_DATABASES: u32 = 2;
|
||||
/// Database const names for the `IndexMapper`.
|
||||
mod db_name {
|
||||
pub const INDEX_MAPPING: &str = "index-mapping";
|
||||
pub const INDEX_STATS: &str = "index-stats";
|
||||
}
|
||||
|
||||
/// Structure managing meilisearch's indexes.
|
||||
///
|
||||
@@ -92,19 +99,32 @@ pub enum IndexStatus {
|
||||
/// The statistics that can be computed from an `Index` object.
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct IndexStats {
|
||||
/// Number of documents in the index.
|
||||
pub number_of_documents: u64,
|
||||
/// Stats of the documents database.
|
||||
#[serde(default)]
|
||||
pub documents_database_stats: DatabaseStats,
|
||||
|
||||
#[serde(default, skip_serializing)]
|
||||
pub number_of_documents: Option<u64>,
|
||||
|
||||
/// Size taken up by the index' DB, in bytes.
|
||||
///
|
||||
/// This includes the size taken by both the used and free pages of the DB, and as the free pages
|
||||
/// are not returned to the disk after a deletion, this number is typically larger than
|
||||
/// `used_database_size` that only includes the size of the used pages.
|
||||
pub database_size: u64,
|
||||
/// Number of embeddings in the index.
|
||||
/// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch
|
||||
pub number_of_embeddings: Option<u64>,
|
||||
/// Number of embedded documents in the index.
|
||||
/// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch
|
||||
pub number_of_embedded_documents: Option<u64>,
|
||||
/// Size taken by the used pages of the index' DB, in bytes.
|
||||
///
|
||||
/// As the DB backend does not return to the disk the pages that are not currently used by the DB,
|
||||
/// this value is typically smaller than `database_size`.
|
||||
pub used_database_size: u64,
|
||||
/// The primary key of the index
|
||||
pub primary_key: Option<String>,
|
||||
/// Association of every field name with the number of times it occurs in the documents.
|
||||
pub field_distribution: FieldDistribution,
|
||||
/// Creation date of the index.
|
||||
@@ -121,11 +141,16 @@ impl IndexStats {
|
||||
/// # Parameters
|
||||
///
|
||||
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
|
||||
pub fn new(index: &Index, rtxn: &RoTxn) -> Result<Self> {
|
||||
pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> {
|
||||
let arroy_stats = index.arroy_stats(rtxn)?;
|
||||
Ok(IndexStats {
|
||||
number_of_documents: index.number_of_documents(rtxn)?,
|
||||
number_of_embeddings: Some(arroy_stats.number_of_embeddings),
|
||||
number_of_embedded_documents: Some(arroy_stats.documents.len()),
|
||||
documents_database_stats: index.documents_stats(rtxn)?.unwrap_or_default(),
|
||||
number_of_documents: None,
|
||||
database_size: index.on_disk_size()?,
|
||||
used_database_size: index.used_size()?,
|
||||
primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()),
|
||||
field_distribution: index.field_distribution(rtxn)?,
|
||||
created_at: index.created_at(rtxn)?,
|
||||
updated_at: index.updated_at(rtxn)?,
|
||||
@@ -134,29 +159,25 @@ impl IndexStats {
|
||||
}
|
||||
|
||||
impl IndexMapper {
|
||||
pub fn new(
|
||||
env: &Env,
|
||||
base_path: PathBuf,
|
||||
index_base_map_size: usize,
|
||||
index_growth_amount: usize,
|
||||
index_count: usize,
|
||||
enable_mdb_writemap: bool,
|
||||
indexer_config: IndexerConfig,
|
||||
) -> Result<Self> {
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let index_mapping = env.create_database(&mut wtxn, Some(INDEX_MAPPING))?;
|
||||
let index_stats = env.create_database(&mut wtxn, Some(INDEX_STATS))?;
|
||||
wtxn.commit()?;
|
||||
pub(crate) const fn nb_db() -> u32 {
|
||||
NUMBER_OF_DATABASES
|
||||
}
|
||||
|
||||
pub fn new(
|
||||
env: &Env<WithoutTls>,
|
||||
wtxn: &mut RwTxn,
|
||||
options: &IndexSchedulerOptions,
|
||||
budget: IndexBudget,
|
||||
) -> Result<Self> {
|
||||
Ok(Self {
|
||||
index_map: Arc::new(RwLock::new(IndexMap::new(index_count))),
|
||||
index_mapping,
|
||||
index_stats,
|
||||
base_path,
|
||||
index_base_map_size,
|
||||
index_growth_amount,
|
||||
enable_mdb_writemap,
|
||||
indexer_config: Arc::new(indexer_config),
|
||||
index_map: Arc::new(RwLock::new(IndexMap::new(budget.index_count))),
|
||||
index_mapping: env.create_database(wtxn, Some(db_name::INDEX_MAPPING))?,
|
||||
index_stats: env.create_database(wtxn, Some(db_name::INDEX_STATS))?,
|
||||
base_path: options.indexes_path.clone(),
|
||||
index_base_map_size: budget.map_size,
|
||||
index_growth_amount: options.index_growth_amount,
|
||||
enable_mdb_writemap: options.enable_mdb_writemap,
|
||||
indexer_config: options.indexer_config.clone(),
|
||||
currently_updating_index: Default::default(),
|
||||
})
|
||||
}
|
||||
@@ -183,13 +204,24 @@ impl IndexMapper {
|
||||
// Error if the UUIDv4 somehow already exists in the map, since it should be fresh.
|
||||
// This is very unlikely to happen in practice.
|
||||
// TODO: it would be better to lazily create the index. But we need an Index::open function for milli.
|
||||
let index = self.index_map.write().unwrap().create(
|
||||
&uuid,
|
||||
&index_path,
|
||||
date,
|
||||
self.enable_mdb_writemap,
|
||||
self.index_base_map_size,
|
||||
)?;
|
||||
let index = self
|
||||
.index_map
|
||||
.write()
|
||||
.unwrap()
|
||||
.create(
|
||||
&uuid,
|
||||
&index_path,
|
||||
date,
|
||||
self.enable_mdb_writemap,
|
||||
self.index_base_map_size,
|
||||
true,
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(name.to_string())))?;
|
||||
self.store_stats_of(&mut wtxn, name, &stats)?;
|
||||
drop(index_rtxn);
|
||||
|
||||
wtxn.commit()?;
|
||||
|
||||
@@ -357,7 +389,9 @@ impl IndexMapper {
|
||||
};
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
// take the lock to reopen the environment.
|
||||
reopen.reopen(&mut self.index_map.write().unwrap(), &index_path)?;
|
||||
reopen
|
||||
.reopen(&mut self.index_map.write().unwrap(), &index_path)
|
||||
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
|
||||
continue;
|
||||
}
|
||||
BeingDeleted => return Err(Error::IndexNotFound(name.to_string())),
|
||||
@@ -372,13 +406,16 @@ impl IndexMapper {
|
||||
Missing => {
|
||||
let index_path = self.base_path.join(uuid.to_string());
|
||||
|
||||
break index_map.create(
|
||||
&uuid,
|
||||
&index_path,
|
||||
None,
|
||||
self.enable_mdb_writemap,
|
||||
self.index_base_map_size,
|
||||
)?;
|
||||
break index_map
|
||||
.create(
|
||||
&uuid,
|
||||
&index_path,
|
||||
None,
|
||||
self.enable_mdb_writemap,
|
||||
self.index_base_map_size,
|
||||
false,
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
|
||||
}
|
||||
Available(index) => break index,
|
||||
Closing(_) => {
|
||||
@@ -460,6 +497,7 @@ impl IndexMapper {
|
||||
let index = self.index(rtxn, index_uid)?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
IndexStats::new(&index, &index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,15 +1,16 @@
|
||||
use std::collections::BTreeSet;
|
||||
use std::fmt::Write;
|
||||
|
||||
use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
|
||||
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, RoTxn};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Details, Task};
|
||||
use meilisearch_types::tasks::{Details, Kind, Status, Task};
|
||||
use meilisearch_types::versioning;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::index_mapper::IndexMapper;
|
||||
use crate::{IndexScheduler, Kind, Status, BEI128};
|
||||
use crate::{IndexScheduler, BEI128};
|
||||
|
||||
pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
// Since we'll snapshot the index right afterward, we don't need to ensure it's internally consistent for every run.
|
||||
@@ -18,41 +19,15 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
scheduler.assert_internally_consistent();
|
||||
|
||||
let IndexScheduler {
|
||||
autobatching_enabled,
|
||||
cleanup_enabled: _,
|
||||
must_stop_processing: _,
|
||||
processing_tasks,
|
||||
file_store,
|
||||
env,
|
||||
all_tasks,
|
||||
all_batches,
|
||||
batch_to_tasks_mapping,
|
||||
// task reverse index
|
||||
status,
|
||||
kind,
|
||||
index_tasks,
|
||||
canceled_by,
|
||||
enqueued_at,
|
||||
started_at,
|
||||
finished_at,
|
||||
|
||||
// batch reverse index
|
||||
batch_status,
|
||||
batch_kind,
|
||||
batch_index_tasks,
|
||||
batch_enqueued_at,
|
||||
batch_started_at,
|
||||
batch_finished_at,
|
||||
version,
|
||||
queue,
|
||||
scheduler,
|
||||
|
||||
index_mapper,
|
||||
features: _,
|
||||
max_number_of_tasks: _,
|
||||
max_number_of_batched_tasks: _,
|
||||
wake_up: _,
|
||||
dumps_path: _,
|
||||
snapshots_path: _,
|
||||
auth_path: _,
|
||||
version_file_path: _,
|
||||
webhook_url: _,
|
||||
webhook_authorization_header: _,
|
||||
test_breakpoint_sdr: _,
|
||||
@@ -65,8 +40,18 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
|
||||
let mut snap = String::new();
|
||||
|
||||
let indx_sched_version = version.get_version(&rtxn).unwrap();
|
||||
let latest_version = (
|
||||
versioning::VERSION_MAJOR.parse().unwrap(),
|
||||
versioning::VERSION_MINOR.parse().unwrap(),
|
||||
versioning::VERSION_PATCH.parse().unwrap(),
|
||||
);
|
||||
if indx_sched_version != Some(latest_version) {
|
||||
snap.push_str(&format!("index scheduler running on version {indx_sched_version:?}\n"));
|
||||
}
|
||||
|
||||
let processing = processing_tasks.read().unwrap().clone();
|
||||
snap.push_str(&format!("### Autobatching Enabled = {autobatching_enabled}\n"));
|
||||
snap.push_str(&format!("### Autobatching Enabled = {}\n", scheduler.autobatching_enabled));
|
||||
snap.push_str(&format!(
|
||||
"### Processing batch {:?}:\n",
|
||||
processing.batch.as_ref().map(|batch| batch.uid)
|
||||
@@ -79,19 +64,19 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
snap.push_str("\n----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### All Tasks:\n");
|
||||
snap.push_str(&snapshot_all_tasks(&rtxn, *all_tasks));
|
||||
snap.push_str(&snapshot_all_tasks(&rtxn, queue.tasks.all_tasks));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Status:\n");
|
||||
snap.push_str(&snapshot_status(&rtxn, *status));
|
||||
snap.push_str(&snapshot_status(&rtxn, queue.tasks.status));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Kind:\n");
|
||||
snap.push_str(&snapshot_kind(&rtxn, *kind));
|
||||
snap.push_str(&snapshot_kind(&rtxn, queue.tasks.kind));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Index Tasks:\n");
|
||||
snap.push_str(&snapshot_index_tasks(&rtxn, *index_tasks));
|
||||
snap.push_str(&snapshot_index_tasks(&rtxn, queue.tasks.index_tasks));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Index Mapper:\n");
|
||||
@@ -99,55 +84,55 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
snap.push_str("\n----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Canceled By:\n");
|
||||
snap.push_str(&snapshot_canceled_by(&rtxn, *canceled_by));
|
||||
snap.push_str(&snapshot_canceled_by(&rtxn, queue.tasks.canceled_by));
|
||||
snap.push_str("\n----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Enqueued At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *enqueued_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.enqueued_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Started At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *started_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.started_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Finished At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *finished_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.finished_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### All Batches:\n");
|
||||
snap.push_str(&snapshot_all_batches(&rtxn, *all_batches));
|
||||
snap.push_str(&snapshot_all_batches(&rtxn, queue.batches.all_batches));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batch to tasks mapping:\n");
|
||||
snap.push_str(&snapshot_batches_to_tasks_mappings(&rtxn, *batch_to_tasks_mapping));
|
||||
snap.push_str(&snapshot_batches_to_tasks_mappings(&rtxn, queue.batch_to_tasks_mapping));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Status:\n");
|
||||
snap.push_str(&snapshot_status(&rtxn, *batch_status));
|
||||
snap.push_str(&snapshot_status(&rtxn, queue.batches.status));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Kind:\n");
|
||||
snap.push_str(&snapshot_kind(&rtxn, *batch_kind));
|
||||
snap.push_str(&snapshot_kind(&rtxn, queue.batches.kind));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Index Tasks:\n");
|
||||
snap.push_str(&snapshot_index_tasks(&rtxn, *batch_index_tasks));
|
||||
snap.push_str(&snapshot_index_tasks(&rtxn, queue.batches.index_tasks));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Enqueued At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *batch_enqueued_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.batches.enqueued_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Started At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *batch_started_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.batches.started_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### Batches Finished At:\n");
|
||||
snap.push_str(&snapshot_date_db(&rtxn, *batch_finished_at));
|
||||
snap.push_str(&snapshot_date_db(&rtxn, queue.batches.finished_at));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### File Store:\n");
|
||||
snap.push_str(&snapshot_file_store(file_store));
|
||||
snap.push_str(&snapshot_file_store(&queue.file_store));
|
||||
snap.push_str("\n----------------------------------------------------------------------\n");
|
||||
|
||||
snap
|
||||
@@ -306,6 +291,9 @@ fn snapshot_details(d: &Details) -> String {
|
||||
Details::IndexSwap { swaps } => {
|
||||
format!("{{ swaps: {swaps:?} }}")
|
||||
}
|
||||
Details::UpgradeDatabase { from, to } => {
|
||||
format!("{{ from: {from:?}, to: {to:?} }}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -353,14 +341,24 @@ pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec
|
||||
|
||||
pub fn snapshot_batch(batch: &Batch) -> String {
|
||||
let mut snap = String::new();
|
||||
let Batch { uid, details, stats, started_at, finished_at } = batch;
|
||||
let Batch { uid, details, stats, started_at, finished_at, progress: _, enqueued_at } = batch;
|
||||
let stats = BatchStats {
|
||||
progress_trace: Default::default(),
|
||||
internal_database_sizes: Default::default(),
|
||||
write_channel_congestion: None,
|
||||
..stats.clone()
|
||||
};
|
||||
if let Some(finished_at) = finished_at {
|
||||
assert!(finished_at > started_at);
|
||||
}
|
||||
let BatchEnqueuedAt { earliest, oldest } = enqueued_at.unwrap();
|
||||
assert!(*started_at > earliest);
|
||||
assert!(earliest >= oldest);
|
||||
|
||||
snap.push('{');
|
||||
snap.push_str(&format!("uid: {uid}, "));
|
||||
snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap()));
|
||||
snap.push_str(&format!("stats: {}, ", serde_json::to_string(stats).unwrap()));
|
||||
snap.push_str(&format!("stats: {}, ", serde_json::to_string(&stats).unwrap()));
|
||||
snap.push('}');
|
||||
snap
|
||||
}
|
||||
@@ -373,7 +371,8 @@ pub fn snapshot_index_mapper(rtxn: &RoTxn, mapper: &IndexMapper) -> String {
|
||||
let stats = mapper.stats_of(rtxn, &name).unwrap();
|
||||
s.push_str(&format!(
|
||||
"{name}: {{ number_of_documents: {}, field_distribution: {:?} }}\n",
|
||||
stats.number_of_documents, stats.field_distribution
|
||||
stats.documents_database_stats.number_of_entries(),
|
||||
stats.field_distribution
|
||||
));
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
296
crates/index-scheduler/src/processing.rs
Normal file
296
crates/index-scheduler/src/processing.rs
Normal file
@@ -0,0 +1,296 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use meilisearch_types::milli::progress::{AtomicSubStep, NamedStep, Progress, ProgressView};
|
||||
use meilisearch_types::milli::{make_atomic_progress, make_enum_progress};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::utils::ProcessingBatch;
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
pub struct ProcessingTasks {
|
||||
pub batch: Option<Arc<ProcessingBatch>>,
|
||||
/// The list of tasks ids that are currently running.
|
||||
pub processing: Arc<RoaringBitmap>,
|
||||
/// The progress on processing tasks
|
||||
pub progress: Option<Progress>,
|
||||
}
|
||||
|
||||
impl ProcessingTasks {
|
||||
/// Creates an empty `ProcessingAt` struct.
|
||||
pub fn new() -> ProcessingTasks {
|
||||
ProcessingTasks::default()
|
||||
}
|
||||
|
||||
pub fn get_progress_view(&self) -> Option<ProgressView> {
|
||||
Some(self.progress.as_ref()?.as_progress_view())
|
||||
}
|
||||
|
||||
/// Stores the currently processing tasks, and the date time at which it started.
|
||||
pub fn start_processing(
|
||||
&mut self,
|
||||
processing_batch: ProcessingBatch,
|
||||
processing: RoaringBitmap,
|
||||
) -> Progress {
|
||||
self.batch = Some(Arc::new(processing_batch));
|
||||
self.processing = Arc::new(processing);
|
||||
let progress = Progress::default();
|
||||
progress.update_progress(BatchProgress::ProcessingTasks);
|
||||
self.progress = Some(progress.clone());
|
||||
|
||||
progress
|
||||
}
|
||||
|
||||
/// Set the processing tasks to an empty list
|
||||
pub fn stop_processing(&mut self) -> Self {
|
||||
self.progress = None;
|
||||
|
||||
Self {
|
||||
batch: std::mem::take(&mut self.batch),
|
||||
processing: std::mem::take(&mut self.processing),
|
||||
progress: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if there, at least, is one task that is currently processing that we must stop.
|
||||
pub fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool {
|
||||
!self.processing.is_disjoint(canceled_tasks)
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum BatchProgress {
|
||||
ProcessingTasks,
|
||||
WritingTasksToDisk,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum FinalizingIndexStep {
|
||||
Committing,
|
||||
ComputingStats,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum TaskCancelationProgress {
|
||||
RetrievingTasks,
|
||||
UpdatingTasks,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum TaskDeletionProgress {
|
||||
DeletingTasksDateTime,
|
||||
DeletingTasksMetadata,
|
||||
DeletingTasks,
|
||||
DeletingBatches,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum SnapshotCreationProgress {
|
||||
StartTheSnapshotCreation,
|
||||
SnapshotTheIndexScheduler,
|
||||
SnapshotTheUpdateFiles,
|
||||
SnapshotTheIndexes,
|
||||
SnapshotTheApiKeys,
|
||||
CreateTheTarball,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum DumpCreationProgress {
|
||||
StartTheDumpCreation,
|
||||
DumpTheApiKeys,
|
||||
DumpTheTasks,
|
||||
DumpTheBatches,
|
||||
DumpTheIndexes,
|
||||
DumpTheExperimentalFeatures,
|
||||
CompressTheDump,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum CreateIndexProgress {
|
||||
CreatingTheIndex,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum UpdateIndexProgress {
|
||||
UpdatingTheIndex,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum DeleteIndexProgress {
|
||||
DeletingTheIndex,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum SwappingTheIndexes {
|
||||
EnsuringCorrectnessOfTheSwap,
|
||||
SwappingTheIndexes,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum InnerSwappingTwoIndexes {
|
||||
RetrieveTheTasks,
|
||||
UpdateTheTasks,
|
||||
UpdateTheIndexesMetadata,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum DocumentOperationProgress {
|
||||
RetrievingConfig,
|
||||
ComputingDocumentChanges,
|
||||
Indexing,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum DocumentEditionProgress {
|
||||
RetrievingConfig,
|
||||
ComputingDocumentChanges,
|
||||
Indexing,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum DocumentDeletionProgress {
|
||||
RetrievingConfig,
|
||||
DeleteDocuments,
|
||||
Indexing,
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum SettingsProgress {
|
||||
RetrievingAndMergingTheSettings,
|
||||
ApplyTheSettings,
|
||||
}
|
||||
}
|
||||
|
||||
make_atomic_progress!(Task alias AtomicTaskStep => "task" );
|
||||
make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
|
||||
make_atomic_progress!(Batch alias AtomicBatchStep => "batch" );
|
||||
make_atomic_progress!(UpdateFile alias AtomicUpdateFileStep => "update file" );
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use meili_snap::{json_string, snapshot};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn one_level() {
|
||||
let mut processing = ProcessingTasks::new();
|
||||
processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new());
|
||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"currentStep": "processing tasks",
|
||||
"finished": 0,
|
||||
"total": 2
|
||||
}
|
||||
],
|
||||
"percentage": 0.0
|
||||
}
|
||||
"#);
|
||||
processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk);
|
||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"currentStep": "writing tasks to disk",
|
||||
"finished": 1,
|
||||
"total": 2
|
||||
}
|
||||
],
|
||||
"percentage": 50.0
|
||||
}
|
||||
"#);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn task_progress() {
|
||||
let mut processing = ProcessingTasks::new();
|
||||
processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new());
|
||||
let (atomic, tasks) = AtomicTaskStep::new(10);
|
||||
processing.progress.as_ref().unwrap().update_progress(tasks);
|
||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"currentStep": "processing tasks",
|
||||
"finished": 0,
|
||||
"total": 2
|
||||
},
|
||||
{
|
||||
"currentStep": "task",
|
||||
"finished": 0,
|
||||
"total": 10
|
||||
}
|
||||
],
|
||||
"percentage": 0.0
|
||||
}
|
||||
"#);
|
||||
atomic.fetch_add(6, Ordering::Relaxed);
|
||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"currentStep": "processing tasks",
|
||||
"finished": 0,
|
||||
"total": 2
|
||||
},
|
||||
{
|
||||
"currentStep": "task",
|
||||
"finished": 6,
|
||||
"total": 10
|
||||
}
|
||||
],
|
||||
"percentage": 30.000002
|
||||
}
|
||||
"#);
|
||||
processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk);
|
||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"currentStep": "writing tasks to disk",
|
||||
"finished": 1,
|
||||
"total": 2
|
||||
}
|
||||
],
|
||||
"percentage": 50.0
|
||||
}
|
||||
"#);
|
||||
let (atomic, tasks) = AtomicTaskStep::new(5);
|
||||
processing.progress.as_ref().unwrap().update_progress(tasks);
|
||||
atomic.fetch_add(4, Ordering::Relaxed);
|
||||
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||
{
|
||||
"steps": [
|
||||
{
|
||||
"currentStep": "writing tasks to disk",
|
||||
"finished": 1,
|
||||
"total": 2
|
||||
},
|
||||
{
|
||||
"currentStep": "task",
|
||||
"finished": 4,
|
||||
"total": 5
|
||||
}
|
||||
],
|
||||
"percentage": 90.0
|
||||
}
|
||||
"#);
|
||||
}
|
||||
}
|
||||
603
crates/index-scheduler/src/queue/batches.rs
Normal file
603
crates/index-scheduler/src/queue/batches.rs
Normal file
@@ -0,0 +1,603 @@
|
||||
use std::collections::HashSet;
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
|
||||
use meilisearch_types::batches::{Batch, BatchId};
|
||||
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, Status};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::{Query, Queue};
|
||||
use crate::processing::ProcessingTasks;
|
||||
use crate::utils::{
|
||||
insert_task_datetime, keep_ids_within_datetimes, map_bound,
|
||||
remove_n_tasks_datetime_earlier_than, remove_task_datetime, ProcessingBatch,
|
||||
};
|
||||
use crate::{Error, Result, BEI128};
|
||||
|
||||
/// The number of database used by the batch queue
|
||||
const NUMBER_OF_DATABASES: u32 = 7;
|
||||
/// Database const names for the `IndexScheduler`.
|
||||
mod db_name {
|
||||
pub const ALL_BATCHES: &str = "all-batches";
|
||||
|
||||
pub const BATCH_STATUS: &str = "batch-status";
|
||||
pub const BATCH_KIND: &str = "batch-kind";
|
||||
pub const BATCH_INDEX_TASKS: &str = "batch-index-tasks";
|
||||
pub const BATCH_ENQUEUED_AT: &str = "batch-enqueued-at";
|
||||
pub const BATCH_STARTED_AT: &str = "batch-started-at";
|
||||
pub const BATCH_FINISHED_AT: &str = "batch-finished-at";
|
||||
}
|
||||
|
||||
pub struct BatchQueue {
|
||||
/// Contains all the batches accessible by their Id.
|
||||
pub(crate) all_batches: Database<BEU32, SerdeJson<Batch>>,
|
||||
|
||||
/// All the batches containing a task matching the selected status.
|
||||
pub(crate) status: Database<SerdeBincode<Status>, RoaringBitmapCodec>,
|
||||
/// All the batches ids grouped by the kind of their task.
|
||||
pub(crate) kind: Database<SerdeBincode<Kind>, RoaringBitmapCodec>,
|
||||
/// Store the batches associated to an index.
|
||||
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
|
||||
/// Store the batches containing tasks which were enqueued at a specific date
|
||||
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
/// Store the batches containing finished tasks started at a specific date
|
||||
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
/// Store the batches containing tasks finished at a specific date
|
||||
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
}
|
||||
|
||||
impl BatchQueue {
|
||||
pub(crate) fn private_clone(&self) -> BatchQueue {
|
||||
BatchQueue {
|
||||
all_batches: self.all_batches,
|
||||
status: self.status,
|
||||
kind: self.kind,
|
||||
index_tasks: self.index_tasks,
|
||||
enqueued_at: self.enqueued_at,
|
||||
started_at: self.started_at,
|
||||
finished_at: self.finished_at,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) const fn nb_db() -> u32 {
|
||||
NUMBER_OF_DATABASES
|
||||
}
|
||||
|
||||
pub(super) fn new(env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> Result<Self> {
|
||||
Ok(Self {
|
||||
all_batches: env.create_database(wtxn, Some(db_name::ALL_BATCHES))?,
|
||||
status: env.create_database(wtxn, Some(db_name::BATCH_STATUS))?,
|
||||
kind: env.create_database(wtxn, Some(db_name::BATCH_KIND))?,
|
||||
index_tasks: env.create_database(wtxn, Some(db_name::BATCH_INDEX_TASKS))?,
|
||||
enqueued_at: env.create_database(wtxn, Some(db_name::BATCH_ENQUEUED_AT))?,
|
||||
started_at: env.create_database(wtxn, Some(db_name::BATCH_STARTED_AT))?,
|
||||
finished_at: env.create_database(wtxn, Some(db_name::BATCH_FINISHED_AT))?,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn all_batch_ids(&self, rtxn: &RoTxn) -> Result<RoaringBitmap> {
|
||||
enum_iterator::all().map(|s| self.get_status(rtxn, s)).union()
|
||||
}
|
||||
|
||||
pub(crate) fn next_batch_id(&self, rtxn: &RoTxn) -> Result<BatchId> {
|
||||
Ok(self
|
||||
.all_batches
|
||||
.remap_data_type::<DecodeIgnore>()
|
||||
.last(rtxn)?
|
||||
.map(|(k, _)| k + 1)
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn get_batch(&self, rtxn: &RoTxn, batch_id: BatchId) -> Result<Option<Batch>> {
|
||||
Ok(self.all_batches.get(rtxn, &batch_id)?)
|
||||
}
|
||||
|
||||
/// Returns the whole set of batches that belongs to this index.
|
||||
pub(crate) fn index_batches(&self, rtxn: &RoTxn, index: &str) -> Result<RoaringBitmap> {
|
||||
Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn update_index(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
index: &str,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
) -> Result<()> {
|
||||
let mut batches = self.index_batches(wtxn, index)?;
|
||||
f(&mut batches);
|
||||
if batches.is_empty() {
|
||||
self.index_tasks.delete(wtxn, index)?;
|
||||
} else {
|
||||
self.index_tasks.put(wtxn, index, &batches)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result<RoaringBitmap> {
|
||||
Ok(self.status.get(rtxn, &status)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn put_status(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
status: Status,
|
||||
bitmap: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
Ok(self.status.put(wtxn, &status, bitmap)?)
|
||||
}
|
||||
|
||||
pub(crate) fn update_status(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
status: Status,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
) -> Result<()> {
|
||||
let mut tasks = self.get_status(wtxn, status)?;
|
||||
f(&mut tasks);
|
||||
self.put_status(wtxn, status, &tasks)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result<RoaringBitmap> {
|
||||
Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn put_kind(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
kind: Kind,
|
||||
bitmap: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
Ok(self.kind.put(wtxn, &kind, bitmap)?)
|
||||
}
|
||||
|
||||
pub(crate) fn update_kind(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
kind: Kind,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
) -> Result<()> {
|
||||
let mut tasks = self.get_kind(wtxn, kind)?;
|
||||
f(&mut tasks);
|
||||
self.put_kind(wtxn, kind, &tasks)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn write_batch(&self, wtxn: &mut RwTxn, batch: ProcessingBatch) -> Result<()> {
|
||||
let old_batch = self.all_batches.get(wtxn, &batch.uid)?;
|
||||
|
||||
self.all_batches.put(
|
||||
wtxn,
|
||||
&batch.uid,
|
||||
&Batch {
|
||||
uid: batch.uid,
|
||||
progress: None,
|
||||
details: batch.details,
|
||||
stats: batch.stats,
|
||||
started_at: batch.started_at,
|
||||
finished_at: batch.finished_at,
|
||||
enqueued_at: batch.enqueued_at,
|
||||
},
|
||||
)?;
|
||||
|
||||
// Update the statuses
|
||||
if let Some(ref old_batch) = old_batch {
|
||||
for status in old_batch.stats.status.keys() {
|
||||
self.update_status(wtxn, *status, |bitmap| {
|
||||
bitmap.remove(batch.uid);
|
||||
})?;
|
||||
}
|
||||
}
|
||||
for status in batch.statuses {
|
||||
self.update_status(wtxn, status, |bitmap| {
|
||||
bitmap.insert(batch.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
// Update the kinds / types
|
||||
if let Some(ref old_batch) = old_batch {
|
||||
let kinds: HashSet<_> = old_batch.stats.types.keys().cloned().collect();
|
||||
for kind in kinds.difference(&batch.kinds) {
|
||||
self.update_kind(wtxn, *kind, |bitmap| {
|
||||
bitmap.remove(batch.uid);
|
||||
})?;
|
||||
}
|
||||
}
|
||||
for kind in batch.kinds {
|
||||
self.update_kind(wtxn, kind, |bitmap| {
|
||||
bitmap.insert(batch.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
// Update the indexes
|
||||
if let Some(ref old_batch) = old_batch {
|
||||
let indexes: HashSet<_> = old_batch.stats.index_uids.keys().cloned().collect();
|
||||
for index in indexes.difference(&batch.indexes) {
|
||||
self.update_index(wtxn, index, |bitmap| {
|
||||
bitmap.remove(batch.uid);
|
||||
})?;
|
||||
}
|
||||
}
|
||||
for index in batch.indexes {
|
||||
self.update_index(wtxn, &index, |bitmap| {
|
||||
bitmap.insert(batch.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
// Update the enqueued_at: we cannot retrieve the previous enqueued at from the previous batch, and
|
||||
// must instead go through the db looking for it. We cannot look at the task contained in this batch either
|
||||
// because they may have been removed.
|
||||
// What we know, though, is that the task date is from before the enqueued_at, and max two timestamps have been written
|
||||
// to the DB per batches.
|
||||
if let Some(ref old_batch) = old_batch {
|
||||
if let Some(enqueued_at) = old_batch.enqueued_at {
|
||||
remove_task_datetime(wtxn, self.enqueued_at, enqueued_at.earliest, old_batch.uid)?;
|
||||
remove_task_datetime(wtxn, self.enqueued_at, enqueued_at.oldest, old_batch.uid)?;
|
||||
} else {
|
||||
// If we don't have the enqueued at in the batch it means the database comes from the v1.12
|
||||
// and we still need to find the date by scrolling the database
|
||||
remove_n_tasks_datetime_earlier_than(
|
||||
wtxn,
|
||||
self.enqueued_at,
|
||||
old_batch.started_at,
|
||||
old_batch.stats.total_nb_tasks.clamp(1, 2) as usize,
|
||||
old_batch.uid,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
// A finished batch MUST contains at least one task and have an enqueued_at
|
||||
let enqueued_at = batch.enqueued_at.as_ref().unwrap();
|
||||
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at.earliest, batch.uid)?;
|
||||
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at.oldest, batch.uid)?;
|
||||
|
||||
// Update the started at and finished at
|
||||
if let Some(ref old_batch) = old_batch {
|
||||
remove_task_datetime(wtxn, self.started_at, old_batch.started_at, old_batch.uid)?;
|
||||
if let Some(finished_at) = old_batch.finished_at {
|
||||
remove_task_datetime(wtxn, self.finished_at, finished_at, old_batch.uid)?;
|
||||
}
|
||||
}
|
||||
insert_task_datetime(wtxn, self.started_at, batch.started_at, batch.uid)?;
|
||||
insert_task_datetime(wtxn, self.finished_at, batch.finished_at.unwrap(), batch.uid)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Convert an iterator to a `Vec` of batches. The batches MUST exist or a
|
||||
/// `CorruptedTaskQueue` error will be thrown.
|
||||
pub(crate) fn get_existing_batches(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
tasks: impl IntoIterator<Item = BatchId>,
|
||||
processing: &ProcessingTasks,
|
||||
) -> Result<Vec<Batch>> {
|
||||
tasks
|
||||
.into_iter()
|
||||
.map(|batch_id| {
|
||||
if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) {
|
||||
let mut batch = processing.batch.as_ref().unwrap().to_batch();
|
||||
batch.progress = processing.get_progress_view();
|
||||
Ok(batch)
|
||||
} else {
|
||||
self.get_batch(rtxn, batch_id)
|
||||
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))
|
||||
}
|
||||
})
|
||||
.collect::<Result<_>>()
|
||||
}
|
||||
}
|
||||
|
||||
impl Queue {
|
||||
/// Return the batch ids matched by the given query from the index scheduler's point of view.
|
||||
pub(crate) fn get_batch_ids(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
query: &Query,
|
||||
processing: &ProcessingTasks,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let Query {
|
||||
limit,
|
||||
from,
|
||||
reverse,
|
||||
uids,
|
||||
batch_uids,
|
||||
statuses,
|
||||
types,
|
||||
index_uids,
|
||||
canceled_by,
|
||||
before_enqueued_at,
|
||||
after_enqueued_at,
|
||||
before_started_at,
|
||||
after_started_at,
|
||||
before_finished_at,
|
||||
after_finished_at,
|
||||
} = query;
|
||||
|
||||
let mut batches = self.batches.all_batch_ids(rtxn)?;
|
||||
if let Some(batch_id) = processing.batch.as_ref().map(|batch| batch.uid) {
|
||||
batches.insert(batch_id);
|
||||
}
|
||||
|
||||
if let Some(from) = from {
|
||||
let range = if reverse.unwrap_or_default() {
|
||||
u32::MIN..*from
|
||||
} else {
|
||||
from.saturating_add(1)..u32::MAX
|
||||
};
|
||||
batches.remove_range(range);
|
||||
}
|
||||
|
||||
if let Some(batch_uids) = &batch_uids {
|
||||
let batches_uids = RoaringBitmap::from_iter(batch_uids);
|
||||
batches &= batches_uids;
|
||||
}
|
||||
|
||||
if let Some(status) = &statuses {
|
||||
let mut status_batches = RoaringBitmap::new();
|
||||
for status in status {
|
||||
match status {
|
||||
// special case for Processing batches
|
||||
Status::Processing => {
|
||||
if let Some(batch_id) = processing.batch.as_ref().map(|batch| batch.uid) {
|
||||
status_batches.insert(batch_id);
|
||||
}
|
||||
}
|
||||
// Enqueued tasks are not stored in batches
|
||||
Status::Enqueued => (),
|
||||
status => status_batches |= &self.batches.get_status(rtxn, *status)?,
|
||||
};
|
||||
}
|
||||
if !status.contains(&Status::Processing) {
|
||||
if let Some(ref batch) = processing.batch {
|
||||
batches.remove(batch.uid);
|
||||
}
|
||||
}
|
||||
batches &= status_batches;
|
||||
}
|
||||
|
||||
if let Some(task_uids) = &uids {
|
||||
let mut batches_by_task_uids = RoaringBitmap::new();
|
||||
for task_uid in task_uids {
|
||||
if let Some(task) = self.tasks.get_task(rtxn, *task_uid)? {
|
||||
if let Some(batch_uid) = task.batch_uid {
|
||||
batches_by_task_uids.insert(batch_uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
batches &= batches_by_task_uids;
|
||||
}
|
||||
|
||||
// There is no database for this query, we must retrieve the task queried by the client and ensure it's valid
|
||||
if let Some(canceled_by) = &canceled_by {
|
||||
let mut all_canceled_batches = RoaringBitmap::new();
|
||||
for cancel_uid in canceled_by {
|
||||
if let Some(task) = self.tasks.get_task(rtxn, *cancel_uid)? {
|
||||
if task.kind.as_kind() == Kind::TaskCancelation
|
||||
&& task.status == Status::Succeeded
|
||||
{
|
||||
if let Some(batch_uid) = task.batch_uid {
|
||||
all_canceled_batches.insert(batch_uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if the canceled_by has been specified but no batch
|
||||
// matches then we prefer matching zero than all batches.
|
||||
if all_canceled_batches.is_empty() {
|
||||
return Ok(RoaringBitmap::new());
|
||||
} else {
|
||||
batches &= all_canceled_batches;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(kind) = &types {
|
||||
let mut kind_batches = RoaringBitmap::new();
|
||||
for kind in kind {
|
||||
kind_batches |= self.batches.get_kind(rtxn, *kind)?;
|
||||
if let Some(uid) = processing
|
||||
.batch
|
||||
.as_ref()
|
||||
.and_then(|batch| batch.kinds.contains(kind).then_some(batch.uid))
|
||||
{
|
||||
kind_batches.insert(uid);
|
||||
}
|
||||
}
|
||||
batches &= &kind_batches;
|
||||
}
|
||||
|
||||
if let Some(index) = &index_uids {
|
||||
let mut index_batches = RoaringBitmap::new();
|
||||
for index in index {
|
||||
index_batches |= self.batches.index_batches(rtxn, index)?;
|
||||
if let Some(uid) = processing
|
||||
.batch
|
||||
.as_ref()
|
||||
.and_then(|batch| batch.indexes.contains(index).then_some(batch.uid))
|
||||
{
|
||||
index_batches.insert(uid);
|
||||
}
|
||||
}
|
||||
batches &= &index_batches;
|
||||
}
|
||||
|
||||
// For the started_at filter, we need to treat the part of the batches that are processing from the part of the
|
||||
// batches that are not processing. The non-processing ones are filtered normally while the processing ones
|
||||
// are entirely removed unless the in-memory startedAt variable falls within the date filter.
|
||||
// Once we have filtered the two subsets, we put them back together and assign it back to `batches`.
|
||||
batches = {
|
||||
let (mut filtered_non_processing_batches, mut filtered_processing_batches) =
|
||||
(&batches - &*processing.processing, &batches & &*processing.processing);
|
||||
|
||||
// special case for Processing batches
|
||||
// A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds
|
||||
let mut clear_filtered_processing_batches =
|
||||
|start: Bound<OffsetDateTime>, end: Bound<OffsetDateTime>| {
|
||||
let start = map_bound(start, |b| b.unix_timestamp_nanos());
|
||||
let end = map_bound(end, |b| b.unix_timestamp_nanos());
|
||||
let is_within_dates = RangeBounds::contains(
|
||||
&(start, end),
|
||||
&processing
|
||||
.batch
|
||||
.as_ref()
|
||||
.map_or_else(OffsetDateTime::now_utc, |batch| batch.started_at)
|
||||
.unix_timestamp_nanos(),
|
||||
);
|
||||
if !is_within_dates {
|
||||
filtered_processing_batches.clear();
|
||||
}
|
||||
};
|
||||
match (after_started_at, before_started_at) {
|
||||
(None, None) => (),
|
||||
(None, Some(before)) => {
|
||||
clear_filtered_processing_batches(Bound::Unbounded, Bound::Excluded(*before))
|
||||
}
|
||||
(Some(after), None) => {
|
||||
clear_filtered_processing_batches(Bound::Excluded(*after), Bound::Unbounded)
|
||||
}
|
||||
(Some(after), Some(before)) => clear_filtered_processing_batches(
|
||||
Bound::Excluded(*after),
|
||||
Bound::Excluded(*before),
|
||||
),
|
||||
};
|
||||
|
||||
keep_ids_within_datetimes(
|
||||
rtxn,
|
||||
&mut filtered_non_processing_batches,
|
||||
self.batches.started_at,
|
||||
*after_started_at,
|
||||
*before_started_at,
|
||||
)?;
|
||||
filtered_non_processing_batches | filtered_processing_batches
|
||||
};
|
||||
|
||||
keep_ids_within_datetimes(
|
||||
rtxn,
|
||||
&mut batches,
|
||||
self.batches.enqueued_at,
|
||||
*after_enqueued_at,
|
||||
*before_enqueued_at,
|
||||
)?;
|
||||
|
||||
keep_ids_within_datetimes(
|
||||
rtxn,
|
||||
&mut batches,
|
||||
self.batches.finished_at,
|
||||
*after_finished_at,
|
||||
*before_finished_at,
|
||||
)?;
|
||||
|
||||
if let Some(limit) = limit {
|
||||
batches = if query.reverse.unwrap_or_default() {
|
||||
batches.into_iter().take(*limit as usize).collect()
|
||||
} else {
|
||||
batches.into_iter().rev().take(*limit as usize).collect()
|
||||
};
|
||||
}
|
||||
|
||||
Ok(batches)
|
||||
}
|
||||
|
||||
/// Return the batch ids matching the query along with the total number of batches
|
||||
/// by ignoring the from and limit parameters from the user's point of view.
|
||||
///
|
||||
/// There are two differences between an internal query and a query executed by
|
||||
/// the user.
|
||||
///
|
||||
/// 1. IndexSwap tasks are not publicly associated with any index, but they are associated
|
||||
/// with many indexes internally.
|
||||
/// 2. The user may not have the rights to access the tasks (internally) associated with all indexes.
|
||||
pub(crate) fn get_batch_ids_from_authorized_indexes(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
query: &Query,
|
||||
filters: &meilisearch_auth::AuthFilter,
|
||||
processing: &ProcessingTasks,
|
||||
) -> Result<(RoaringBitmap, u64)> {
|
||||
// compute all batches matching the filter by ignoring the limits, to find the number of batches matching
|
||||
// the filter.
|
||||
// As this causes us to compute the filter twice it is slightly inefficient, but doing it this way spares
|
||||
// us from modifying the underlying implementation, and the performance remains sufficient.
|
||||
// Should this change, we would modify `get_batch_ids` to directly return the number of matching batches.
|
||||
let total_batches =
|
||||
self.get_batch_ids(rtxn, &query.clone().without_limits(), processing)?;
|
||||
let mut batches = self.get_batch_ids(rtxn, query, processing)?;
|
||||
|
||||
// If the query contains a list of index uid or there is a finite list of authorized indexes,
|
||||
// then we must exclude all the batches that only contains tasks associated to multiple indexes.
|
||||
// This works because we don't autobatch tasks associated to multiple indexes with tasks associated
|
||||
// to a single index. e.g: IndexSwap cannot be batched with IndexCreation.
|
||||
if query.index_uids.is_some() || !filters.all_indexes_authorized() {
|
||||
for kind in enum_iterator::all::<Kind>().filter(|kind| !kind.related_to_one_index()) {
|
||||
batches -= self.tasks.get_kind(rtxn, kind)?;
|
||||
if let Some(batch) = processing.batch.as_ref() {
|
||||
if batch.kinds.contains(&kind) {
|
||||
batches.remove(batch.uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Any batch that is internally associated with at least one authorized index
|
||||
// must be returned.
|
||||
if !filters.all_indexes_authorized() {
|
||||
let mut valid_indexes = RoaringBitmap::new();
|
||||
let mut forbidden_indexes = RoaringBitmap::new();
|
||||
|
||||
let all_indexes_iter = self.batches.index_tasks.iter(rtxn)?;
|
||||
for result in all_indexes_iter {
|
||||
let (index, index_tasks) = result?;
|
||||
if filters.is_index_authorized(index) {
|
||||
valid_indexes |= index_tasks;
|
||||
} else {
|
||||
forbidden_indexes |= index_tasks;
|
||||
}
|
||||
}
|
||||
if let Some(batch) = processing.batch.as_ref() {
|
||||
for index in &batch.indexes {
|
||||
if filters.is_index_authorized(index) {
|
||||
valid_indexes.insert(batch.uid);
|
||||
} else {
|
||||
forbidden_indexes.insert(batch.uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If a batch had ONE valid task then it should be returned
|
||||
let invalid_batches = forbidden_indexes - valid_indexes;
|
||||
|
||||
batches -= invalid_batches;
|
||||
}
|
||||
|
||||
Ok((batches, total_batches.len()))
|
||||
}
|
||||
|
||||
pub(crate) fn get_batches_from_authorized_indexes(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
query: &Query,
|
||||
filters: &meilisearch_auth::AuthFilter,
|
||||
processing: &ProcessingTasks,
|
||||
) -> Result<(Vec<Batch>, u64)> {
|
||||
let (batches, total) =
|
||||
self.get_batch_ids_from_authorized_indexes(rtxn, query, filters, processing)?;
|
||||
let batches = if query.reverse.unwrap_or_default() {
|
||||
Box::new(batches.into_iter()) as Box<dyn Iterator<Item = u32>>
|
||||
} else {
|
||||
Box::new(batches.into_iter().rev()) as Box<dyn Iterator<Item = u32>>
|
||||
};
|
||||
|
||||
let batches = self.batches.get_existing_batches(
|
||||
rtxn,
|
||||
batches.take(query.limit.unwrap_or(u32::MAX) as usize),
|
||||
processing,
|
||||
)?;
|
||||
|
||||
Ok((batches, total))
|
||||
}
|
||||
}
|
||||
476
crates/index-scheduler/src/queue/batches_test.rs
Normal file
476
crates/index-scheduler/src/queue/batches_test.rs
Normal file
@@ -0,0 +1,476 @@
|
||||
use meili_snap::snapshot;
|
||||
use meilisearch_auth::AuthFilter;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::tasks::{IndexSwap, KindWithContent, Status};
|
||||
use time::{Duration, OffsetDateTime};
|
||||
|
||||
use crate::insta_snapshot::{snapshot_bitmap, snapshot_index_scheduler};
|
||||
use crate::test_utils::Breakpoint::*;
|
||||
use crate::test_utils::{index_creation_task, FailureLocation};
|
||||
use crate::{IndexScheduler, Query};
|
||||
|
||||
#[test]
|
||||
fn query_batches_from_and_limit() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let kind = index_creation_task("doggo", "bone");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
|
||||
let kind = index_creation_task("whalo", "plankton");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
|
||||
let kind = index_creation_task("catto", "his_own_vomit");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task");
|
||||
|
||||
handle.advance_n_successful_batches(3);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_all_tasks");
|
||||
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap().clone();
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let query = Query { limit: Some(0), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
let query = Query { limit: Some(1), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
|
||||
let query = Query { limit: Some(2), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[1,2,]");
|
||||
|
||||
let query = Query { from: Some(1), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,]");
|
||||
|
||||
let query = Query { from: Some(2), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,2,]");
|
||||
|
||||
let query = Query { from: Some(1), limit: Some(1), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[1,]");
|
||||
|
||||
let query = Query { from: Some(1), limit: Some(2), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_batches_simple() {
|
||||
let start_time = OffsetDateTime::now_utc();
|
||||
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("whalo", "fish");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
|
||||
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
|
||||
let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() };
|
||||
let (mut batches, _) = index_scheduler
|
||||
.get_batches_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
assert_eq!(batches.len(), 1);
|
||||
batches[0].started_at = OffsetDateTime::UNIX_EPOCH;
|
||||
assert!(batches[0].enqueued_at.is_some());
|
||||
batches[0].enqueued_at = None;
|
||||
// Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689
|
||||
let batch = serde_json::to_string_pretty(&batches[0]).unwrap();
|
||||
snapshot!(batch, @r#"
|
||||
{
|
||||
"uid": 0,
|
||||
"details": {
|
||||
"primaryKey": "mouse"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
"status": {
|
||||
"processing": 1
|
||||
},
|
||||
"types": {
|
||||
"indexCreation": 1
|
||||
},
|
||||
"indexUids": {
|
||||
"catto": 1
|
||||
}
|
||||
},
|
||||
"startedAt": "1970-01-01T00:00:00Z",
|
||||
"finishedAt": null,
|
||||
"enqueuedAt": null
|
||||
}
|
||||
"#);
|
||||
|
||||
let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]"); // The batches don't contains any enqueued tasks
|
||||
|
||||
let query =
|
||||
Query { statuses: Some(vec![Status::Enqueued, Status::Processing]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,]"); // both enqueued and processing tasks in the first tick
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||
after_started_at: Some(start_time),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the test, which should excludes the enqueued tasks
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||
before_started_at: Some(start_time),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes before the start of the test, which should excludes all of them
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||
after_started_at: Some(start_time),
|
||||
before_started_at: Some(start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the test and before one minute after the start of the test,
|
||||
// which should exclude the enqueued tasks and include the only processing task
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,]");
|
||||
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after-advancing-a-bit");
|
||||
|
||||
let second_start_time = OffsetDateTime::now_utc();
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Succeeded, Status::Processing]),
|
||||
after_started_at: Some(start_time),
|
||||
before_started_at: Some(start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the test and before one minute after the start of the test,
|
||||
// which should include all tasks
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Succeeded, Status::Processing]),
|
||||
before_started_at: Some(start_time),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes before the start of the test, which should exclude all tasks
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the second part of the test and before one minute after the
|
||||
// second start of the test, which should exclude all tasks
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
// now we make one more batch, the started_at field of the new tasks will be past `second_start_time`
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// we run the same query to verify that, and indeed find that the last task is matched
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// enqueued, succeeded, or processing tasks started after the second part of the test, should
|
||||
// again only return the last task
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
|
||||
handle.advance_till([ProcessBatchFailed, AfterProcessing]);
|
||||
|
||||
// now the last task should have failed
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end");
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// so running the last query should return nothing
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// but the same query on failed tasks should return the last task
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// but the same query on failed tasks should return the last task
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
uids: Some(vec![1]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// same query but with an invalid uid
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
uids: Some(vec![2]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// same query but with a valid uid
|
||||
snapshot!(snapshot_bitmap(&batches), @"[2,]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_batches_special_rules() {
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }],
|
||||
};
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }],
|
||||
};
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
|
||||
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap().clone();
|
||||
|
||||
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
// only the first task associated with catto is returned, the indexSwap tasks are excluded!
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,]");
|
||||
|
||||
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks
|
||||
// associated with doggo -> empty result
|
||||
snapshot!(snapshot_bitmap(&batches), @"[]");
|
||||
|
||||
drop(rtxn);
|
||||
// We're going to advance and process all the batches for the next query to actually hit the db
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
]);
|
||||
handle.advance_one_successful_batch();
|
||||
handle.advance_n_failed_batches(2);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after-processing-everything");
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
|
||||
let query = Query::default();
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// we asked for all the tasks, but we are only authorized to retrieve the doggo tasks
|
||||
// -> only the index creation of doggo should be returned
|
||||
snapshot!(snapshot_bitmap(&batches), @"[1,]");
|
||||
|
||||
let query = Query::default();
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![
|
||||
IndexUidPattern::new_unchecked("catto"),
|
||||
IndexUidPattern::new_unchecked("doggo"),
|
||||
]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks
|
||||
// -> all tasks except the swap of catto with whalo are returned
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,]");
|
||||
|
||||
let query = Query::default();
|
||||
let (batches, _) = index_scheduler
|
||||
.queue
|
||||
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
// we asked for all the tasks with all index authorized -> all tasks returned
|
||||
snapshot!(snapshot_bitmap(&batches), @"[0,1,2,3,]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_batches_canceled_by() {
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _ = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _ = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }],
|
||||
};
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
handle.advance_n_successful_batches(1);
|
||||
let kind = KindWithContent::TaskCancelation {
|
||||
query: "test_query".to_string(),
|
||||
tasks: [0, 1, 2, 3].into_iter().collect(),
|
||||
};
|
||||
let task_cancelation = index_scheduler.register(kind, None, false).unwrap();
|
||||
handle.advance_n_successful_batches(1);
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
|
||||
|
||||
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// The batch zero was the index creation task, the 1 is the task cancellation
|
||||
snapshot!(snapshot_bitmap(&batches), @"[1,]");
|
||||
|
||||
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
|
||||
let (batches, _) = index_scheduler
|
||||
.get_batch_ids_from_authorized_indexes(
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
// Return only 1 because the user is not authorized to see task 2
|
||||
snapshot!(snapshot_bitmap(&batches), @"[1,]");
|
||||
}
|
||||
391
crates/index-scheduler/src/queue/mod.rs
Normal file
391
crates/index-scheduler/src/queue/mod.rs
Normal file
@@ -0,0 +1,391 @@
|
||||
mod batches;
|
||||
#[cfg(test)]
|
||||
mod batches_test;
|
||||
mod tasks;
|
||||
#[cfg(test)]
|
||||
mod tasks_test;
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs::File as StdFile;
|
||||
use std::time::Duration;
|
||||
|
||||
use file_store::FileStore;
|
||||
use meilisearch_types::batches::BatchId;
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
pub(crate) use self::batches::BatchQueue;
|
||||
pub(crate) use self::tasks::TaskQueue;
|
||||
use crate::processing::ProcessingTasks;
|
||||
use crate::utils::{
|
||||
check_index_swap_validity, filter_out_references_to_newer_tasks, ProcessingBatch,
|
||||
};
|
||||
use crate::{Error, IndexSchedulerOptions, Result, TaskId};
|
||||
|
||||
/// The number of database used by queue itself
|
||||
const NUMBER_OF_DATABASES: u32 = 1;
|
||||
/// Database const names for the `IndexScheduler`.
|
||||
mod db_name {
|
||||
pub const BATCH_TO_TASKS_MAPPING: &str = "batch-to-tasks-mapping";
|
||||
}
|
||||
|
||||
/// Defines a subset of tasks to be retrieved from the [`IndexScheduler`].
|
||||
///
|
||||
/// An empty/default query (where each field is set to `None`) matches all tasks.
|
||||
/// Each non-null field restricts the set of tasks further.
|
||||
#[derive(Default, Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Query {
|
||||
/// The maximum number of tasks to be matched
|
||||
pub limit: Option<u32>,
|
||||
/// The minimum [task id](`meilisearch_types::tasks::Task::uid`) to be matched
|
||||
pub from: Option<u32>,
|
||||
/// The order used to return the tasks. By default the newest tasks are returned first and the boolean is `false`.
|
||||
pub reverse: Option<bool>,
|
||||
/// The [task ids](`meilisearch_types::tasks::Task::uid`) to be matched
|
||||
pub uids: Option<Vec<TaskId>>,
|
||||
/// The [batch ids](`meilisearch_types::batches::Batch::uid`) to be matched
|
||||
pub batch_uids: Option<Vec<BatchId>>,
|
||||
/// The allowed [statuses](`meilisearch_types::tasks::Task::status`) of the matched tasls
|
||||
pub statuses: Option<Vec<Status>>,
|
||||
/// The allowed [kinds](meilisearch_types::tasks::Kind) of the matched tasks.
|
||||
///
|
||||
/// The kind of a task is given by:
|
||||
/// ```
|
||||
/// # use meilisearch_types::tasks::{Task, Kind};
|
||||
/// # fn doc_func(task: Task) -> Kind {
|
||||
/// task.kind.as_kind()
|
||||
/// # }
|
||||
/// ```
|
||||
pub types: Option<Vec<Kind>>,
|
||||
/// The allowed [index ids](meilisearch_types::tasks::Task::index_uid) of the matched tasks
|
||||
pub index_uids: Option<Vec<String>>,
|
||||
/// The [task ids](`meilisearch_types::tasks::Task::uid`) of the [`TaskCancelation`](meilisearch_types::tasks::Task::Kind::TaskCancelation) tasks
|
||||
/// that canceled the matched tasks.
|
||||
pub canceled_by: Option<Vec<TaskId>>,
|
||||
/// Exclusive upper bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field.
|
||||
pub before_enqueued_at: Option<OffsetDateTime>,
|
||||
/// Exclusive lower bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field.
|
||||
pub after_enqueued_at: Option<OffsetDateTime>,
|
||||
/// Exclusive upper bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field.
|
||||
pub before_started_at: Option<OffsetDateTime>,
|
||||
/// Exclusive lower bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field.
|
||||
pub after_started_at: Option<OffsetDateTime>,
|
||||
/// Exclusive upper bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field.
|
||||
pub before_finished_at: Option<OffsetDateTime>,
|
||||
/// Exclusive lower bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field.
|
||||
pub after_finished_at: Option<OffsetDateTime>,
|
||||
}
|
||||
|
||||
impl Query {
|
||||
/// Return `true` if every field of the query is set to `None`, such that the query
|
||||
/// matches all tasks.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
Query {
|
||||
limit: None,
|
||||
from: None,
|
||||
reverse: None,
|
||||
uids: None,
|
||||
batch_uids: None,
|
||||
statuses: None,
|
||||
types: None,
|
||||
index_uids: None,
|
||||
canceled_by: None,
|
||||
before_enqueued_at: None,
|
||||
after_enqueued_at: None,
|
||||
before_started_at: None,
|
||||
after_started_at: None,
|
||||
before_finished_at: None,
|
||||
after_finished_at: None,
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/// Add an [index id](meilisearch_types::tasks::Task::index_uid) to the list of permitted indexes.
|
||||
pub fn with_index(self, index_uid: String) -> Self {
|
||||
let mut index_vec = self.index_uids.unwrap_or_default();
|
||||
index_vec.push(index_uid);
|
||||
Self { index_uids: Some(index_vec), ..self }
|
||||
}
|
||||
|
||||
// Removes the `from` and `limit` restrictions from the query.
|
||||
// Useful to get the total number of tasks matching a filter.
|
||||
pub fn without_limits(self) -> Self {
|
||||
Query { limit: None, from: None, ..self }
|
||||
}
|
||||
}
|
||||
|
||||
/// Structure which holds meilisearch's indexes and schedules the tasks
|
||||
/// to be performed on them.
|
||||
pub struct Queue {
|
||||
pub(crate) tasks: tasks::TaskQueue,
|
||||
pub(crate) batches: batches::BatchQueue,
|
||||
|
||||
/// Matches a batch id with the associated task ids.
|
||||
pub(crate) batch_to_tasks_mapping: Database<BEU32, CboRoaringBitmapCodec>,
|
||||
|
||||
/// The list of files referenced by the tasks.
|
||||
pub(crate) file_store: FileStore,
|
||||
|
||||
/// The max number of tasks allowed before the scheduler starts to delete
|
||||
/// the finished tasks automatically.
|
||||
pub(crate) max_number_of_tasks: usize,
|
||||
}
|
||||
|
||||
impl Queue {
|
||||
pub(crate) fn private_clone(&self) -> Queue {
|
||||
Queue {
|
||||
tasks: self.tasks.private_clone(),
|
||||
batches: self.batches.private_clone(),
|
||||
batch_to_tasks_mapping: self.batch_to_tasks_mapping,
|
||||
file_store: self.file_store.clone(),
|
||||
max_number_of_tasks: self.max_number_of_tasks,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) const fn nb_db() -> u32 {
|
||||
tasks::TaskQueue::nb_db() + batches::BatchQueue::nb_db() + NUMBER_OF_DATABASES
|
||||
}
|
||||
|
||||
/// Create an index scheduler and start its run loop.
|
||||
pub(crate) fn new(
|
||||
env: &Env<WithoutTls>,
|
||||
wtxn: &mut RwTxn,
|
||||
options: &IndexSchedulerOptions,
|
||||
) -> Result<Self> {
|
||||
// allow unreachable_code to get rids of the warning in the case of a test build.
|
||||
Ok(Self {
|
||||
file_store: FileStore::new(&options.update_file_path)?,
|
||||
batch_to_tasks_mapping: env
|
||||
.create_database(wtxn, Some(db_name::BATCH_TO_TASKS_MAPPING))?,
|
||||
tasks: TaskQueue::new(env, wtxn)?,
|
||||
batches: BatchQueue::new(env, wtxn)?,
|
||||
max_number_of_tasks: options.max_number_of_tasks,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the whole set of tasks that belongs to this batch.
|
||||
pub(crate) fn tasks_in_batch(&self, rtxn: &RoTxn, batch_id: BatchId) -> Result<RoaringBitmap> {
|
||||
Ok(self.batch_to_tasks_mapping.get(rtxn, &batch_id)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
/// Convert an iterator to a `Vec` of tasks and edit the `ProcessingBatch` to add the given tasks.
|
||||
///
|
||||
/// The tasks MUST exist, or a `CorruptedTaskQueue` error will be thrown.
|
||||
pub(crate) fn get_existing_tasks_for_processing_batch(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
processing_batch: &mut ProcessingBatch,
|
||||
tasks: impl IntoIterator<Item = TaskId>,
|
||||
) -> Result<Vec<Task>> {
|
||||
tasks
|
||||
.into_iter()
|
||||
.map(|task_id| {
|
||||
let mut task = self
|
||||
.tasks
|
||||
.get_task(rtxn, task_id)
|
||||
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue));
|
||||
processing_batch.processing(&mut task);
|
||||
task
|
||||
})
|
||||
.collect::<Result<_>>()
|
||||
}
|
||||
|
||||
pub(crate) fn write_batch(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
batch: ProcessingBatch,
|
||||
tasks: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
self.batch_to_tasks_mapping.put(wtxn, &batch.uid, tasks)?;
|
||||
self.batches.write_batch(wtxn, batch)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn delete_persisted_task_data(&self, task: &Task) -> Result<()> {
|
||||
match task.content_uuid() {
|
||||
Some(content_file) => self.delete_update_file(content_file),
|
||||
None => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Open and returns the task's content File.
|
||||
pub fn update_file(&self, uuid: Uuid) -> file_store::Result<StdFile> {
|
||||
self.file_store.get_update(uuid)
|
||||
}
|
||||
|
||||
/// Delete a file from the index scheduler.
|
||||
///
|
||||
/// Counterpart to the [`create_update_file`](IndexScheduler::create_update_file) method.
|
||||
pub fn delete_update_file(&self, uuid: Uuid) -> Result<()> {
|
||||
Ok(self.file_store.delete(uuid)?)
|
||||
}
|
||||
|
||||
/// Create a file and register it in the index scheduler.
|
||||
///
|
||||
/// The returned file and uuid can be used to associate
|
||||
/// some data to a task. The file will be kept until
|
||||
/// the task has been fully processed.
|
||||
pub fn create_update_file(&self, dry_run: bool) -> Result<(Uuid, file_store::File)> {
|
||||
if dry_run {
|
||||
Ok((Uuid::nil(), file_store::File::dry_file()?))
|
||||
} else {
|
||||
Ok(self.file_store.new_update()?)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn create_update_file_with_uuid(&self, uuid: u128) -> Result<(Uuid, file_store::File)> {
|
||||
Ok(self.file_store.new_update_with_uuid(uuid)?)
|
||||
}
|
||||
|
||||
/// The size on disk taken by all the updates files contained in the `IndexScheduler`, in bytes.
|
||||
pub fn compute_update_file_size(&self) -> Result<u64> {
|
||||
Ok(self.file_store.compute_total_size()?)
|
||||
}
|
||||
|
||||
pub fn register(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
kind: &KindWithContent,
|
||||
task_id: Option<TaskId>,
|
||||
dry_run: bool,
|
||||
) -> Result<Task> {
|
||||
let next_task_id = self.tasks.next_task_id(wtxn)?;
|
||||
|
||||
if let Some(uid) = task_id {
|
||||
if uid < next_task_id {
|
||||
return Err(Error::BadTaskId { received: uid, expected: next_task_id });
|
||||
}
|
||||
}
|
||||
|
||||
let mut task = Task {
|
||||
uid: task_id.unwrap_or(next_task_id),
|
||||
// The batch is defined once we starts processing the task
|
||||
batch_uid: None,
|
||||
enqueued_at: OffsetDateTime::now_utc(),
|
||||
started_at: None,
|
||||
finished_at: None,
|
||||
error: None,
|
||||
canceled_by: None,
|
||||
details: kind.default_details(),
|
||||
status: Status::Enqueued,
|
||||
kind: kind.clone(),
|
||||
};
|
||||
// For deletion and cancelation tasks, we want to make extra sure that they
|
||||
// don't attempt to delete/cancel tasks that are newer than themselves.
|
||||
filter_out_references_to_newer_tasks(&mut task);
|
||||
// If the register task is an index swap task, verify that it is well-formed
|
||||
// (that it does not contain duplicate indexes).
|
||||
check_index_swap_validity(&task)?;
|
||||
|
||||
// At this point the task is going to be registered and no further checks will be done
|
||||
if dry_run {
|
||||
return Ok(task);
|
||||
}
|
||||
|
||||
// Get rid of the mutability.
|
||||
let task = task;
|
||||
self.tasks.register(wtxn, &task)?;
|
||||
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
/// Register a task to cleanup the task queue if needed
|
||||
pub fn cleanup_task_queue(&self, wtxn: &mut RwTxn) -> Result<()> {
|
||||
let nb_tasks = self.tasks.all_task_ids(wtxn)?.len();
|
||||
// if we have less than 1M tasks everything is fine
|
||||
if nb_tasks < self.max_number_of_tasks as u64 {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let finished = self.tasks.status.get(wtxn, &Status::Succeeded)?.unwrap_or_default()
|
||||
| self.tasks.status.get(wtxn, &Status::Failed)?.unwrap_or_default()
|
||||
| self.tasks.status.get(wtxn, &Status::Canceled)?.unwrap_or_default();
|
||||
|
||||
let to_delete = RoaringBitmap::from_iter(finished.into_iter().rev().take(100_000));
|
||||
|
||||
// /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete
|
||||
// the deletion tasks we enqueued ourselves.
|
||||
if to_delete.len() < 2 {
|
||||
tracing::warn!("The task queue is almost full, but no task can be deleted yet.");
|
||||
// the only thing we can do is hope that the user tasks are going to finish
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"The task queue is almost full. Deleting the oldest {} finished tasks.",
|
||||
to_delete.len()
|
||||
);
|
||||
|
||||
// it's safe to unwrap here because we checked the len above
|
||||
let newest_task_id = to_delete.iter().last().unwrap();
|
||||
let last_task_to_delete =
|
||||
self.tasks.get_task(wtxn, newest_task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
|
||||
// increase time by one nanosecond so that the enqueuedAt of the last task to delete is also lower than that date.
|
||||
let delete_before = last_task_to_delete.enqueued_at + Duration::from_nanos(1);
|
||||
|
||||
self.register(
|
||||
wtxn,
|
||||
&KindWithContent::TaskDeletion {
|
||||
query: format!(
|
||||
"?beforeEnqueuedAt={}&statuses=succeeded,failed,canceled",
|
||||
delete_before.format(&Rfc3339).map_err(|_| Error::CorruptedTaskQueue)?,
|
||||
),
|
||||
tasks: to_delete,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_stats(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
processing: &ProcessingTasks,
|
||||
) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
|
||||
let mut res = BTreeMap::new();
|
||||
let processing_tasks = processing.processing.len();
|
||||
|
||||
res.insert(
|
||||
"statuses".to_string(),
|
||||
enum_iterator::all::<Status>()
|
||||
.map(|s| {
|
||||
let tasks = self.tasks.get_status(rtxn, s)?.len();
|
||||
match s {
|
||||
Status::Enqueued => Ok((s.to_string(), tasks - processing_tasks)),
|
||||
Status::Processing => Ok((s.to_string(), processing_tasks)),
|
||||
s => Ok((s.to_string(), tasks)),
|
||||
}
|
||||
})
|
||||
.collect::<Result<BTreeMap<String, u64>>>()?,
|
||||
);
|
||||
res.insert(
|
||||
"types".to_string(),
|
||||
enum_iterator::all::<Kind>()
|
||||
.map(|s| Ok((s.to_string(), self.tasks.get_kind(rtxn, s)?.len())))
|
||||
.collect::<Result<BTreeMap<String, u64>>>()?,
|
||||
);
|
||||
res.insert(
|
||||
"indexes".to_string(),
|
||||
self.tasks
|
||||
.index_tasks
|
||||
.iter(rtxn)?
|
||||
.map(|res| Ok(res.map(|(name, bitmap)| (name.to_string(), bitmap.len()))?))
|
||||
.collect::<Result<BTreeMap<String, u64>>>()?,
|
||||
);
|
||||
|
||||
Ok(res)
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,11 +1,11 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch Some(1):
|
||||
[1,]
|
||||
{uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"doggo":2}}, }
|
||||
{uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/batches_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/tasks_test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
@@ -1,5 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
||||
@@ -1,5 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
||||
@@ -1,5 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
||||
@@ -1,5 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
||||
@@ -1,5 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
||||
@@ -1,5 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
||||
@@ -1,5 +1,6 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/lib.rs
|
||||
source: crates/index-scheduler/src/queue/test.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
{
|
||||
541
crates/index-scheduler/src/queue/tasks.rs
Normal file
541
crates/index-scheduler/src/queue/tasks.rs
Normal file
@@ -0,0 +1,541 @@
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
|
||||
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, Status, Task};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::{Query, Queue};
|
||||
use crate::processing::ProcessingTasks;
|
||||
use crate::utils::{
|
||||
self, insert_task_datetime, keep_ids_within_datetimes, map_bound, remove_task_datetime,
|
||||
};
|
||||
use crate::{Error, Result, TaskId, BEI128};
|
||||
|
||||
/// The number of database used by the task queue
|
||||
const NUMBER_OF_DATABASES: u32 = 8;
|
||||
/// Database const names for the `IndexScheduler`.
|
||||
mod db_name {
|
||||
pub const ALL_TASKS: &str = "all-tasks";
|
||||
|
||||
pub const STATUS: &str = "status";
|
||||
pub const KIND: &str = "kind";
|
||||
pub const INDEX_TASKS: &str = "index-tasks";
|
||||
pub const CANCELED_BY: &str = "canceled_by";
|
||||
pub const ENQUEUED_AT: &str = "enqueued-at";
|
||||
pub const STARTED_AT: &str = "started-at";
|
||||
pub const FINISHED_AT: &str = "finished-at";
|
||||
}
|
||||
|
||||
pub struct TaskQueue {
|
||||
/// The main database, it contains all the tasks accessible by their Id.
|
||||
pub(crate) all_tasks: Database<BEU32, SerdeJson<Task>>,
|
||||
|
||||
/// All the tasks ids grouped by their status.
|
||||
// TODO we should not be able to serialize a `Status::Processing` in this database.
|
||||
pub(crate) status: Database<SerdeBincode<Status>, RoaringBitmapCodec>,
|
||||
/// All the tasks ids grouped by their kind.
|
||||
pub(crate) kind: Database<SerdeBincode<Kind>, RoaringBitmapCodec>,
|
||||
/// Store the tasks associated to an index.
|
||||
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
|
||||
/// Store the tasks that were canceled by a task uid
|
||||
pub(crate) canceled_by: Database<BEU32, RoaringBitmapCodec>,
|
||||
/// Store the task ids of tasks which were enqueued at a specific date
|
||||
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
/// Store the task ids of finished tasks which started being processed at a specific date
|
||||
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
/// Store the task ids of tasks which finished at a specific date
|
||||
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
}
|
||||
|
||||
impl TaskQueue {
|
||||
pub(crate) fn private_clone(&self) -> TaskQueue {
|
||||
TaskQueue {
|
||||
all_tasks: self.all_tasks,
|
||||
status: self.status,
|
||||
kind: self.kind,
|
||||
index_tasks: self.index_tasks,
|
||||
canceled_by: self.canceled_by,
|
||||
enqueued_at: self.enqueued_at,
|
||||
started_at: self.started_at,
|
||||
finished_at: self.finished_at,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) const fn nb_db() -> u32 {
|
||||
NUMBER_OF_DATABASES
|
||||
}
|
||||
|
||||
pub(crate) fn new(env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> Result<Self> {
|
||||
Ok(Self {
|
||||
all_tasks: env.create_database(wtxn, Some(db_name::ALL_TASKS))?,
|
||||
status: env.create_database(wtxn, Some(db_name::STATUS))?,
|
||||
kind: env.create_database(wtxn, Some(db_name::KIND))?,
|
||||
index_tasks: env.create_database(wtxn, Some(db_name::INDEX_TASKS))?,
|
||||
canceled_by: env.create_database(wtxn, Some(db_name::CANCELED_BY))?,
|
||||
enqueued_at: env.create_database(wtxn, Some(db_name::ENQUEUED_AT))?,
|
||||
started_at: env.create_database(wtxn, Some(db_name::STARTED_AT))?,
|
||||
finished_at: env.create_database(wtxn, Some(db_name::FINISHED_AT))?,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result<Option<TaskId>> {
|
||||
Ok(self.all_tasks.remap_data_type::<DecodeIgnore>().last(rtxn)?.map(|(k, _)| k + 1))
|
||||
}
|
||||
|
||||
pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result<TaskId> {
|
||||
Ok(self.last_task_id(rtxn)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result<RoaringBitmap> {
|
||||
enum_iterator::all().map(|s| self.get_status(rtxn, s)).union()
|
||||
}
|
||||
|
||||
pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result<Option<Task>> {
|
||||
Ok(self.all_tasks.get(rtxn, &task_id)?)
|
||||
}
|
||||
|
||||
pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> {
|
||||
let old_task = self.get_task(wtxn, task.uid)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
let reprocessing = old_task.status != Status::Enqueued;
|
||||
|
||||
debug_assert!(old_task != *task);
|
||||
debug_assert_eq!(old_task.uid, task.uid);
|
||||
|
||||
// If we're processing a task that failed it may already contains a batch_uid
|
||||
debug_assert!(
|
||||
reprocessing || (old_task.batch_uid.is_none() && task.batch_uid.is_some()),
|
||||
"\n==> old: {old_task:?}\n==> new: {task:?}"
|
||||
);
|
||||
|
||||
if old_task.status != task.status {
|
||||
self.update_status(wtxn, old_task.status, |bitmap| {
|
||||
bitmap.remove(task.uid);
|
||||
})?;
|
||||
self.update_status(wtxn, task.status, |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
if old_task.kind.as_kind() != task.kind.as_kind() {
|
||||
self.update_kind(wtxn, old_task.kind.as_kind(), |bitmap| {
|
||||
bitmap.remove(task.uid);
|
||||
})?;
|
||||
self.update_kind(wtxn, task.kind.as_kind(), |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
old_task.enqueued_at, task.enqueued_at,
|
||||
"Cannot update a task's enqueued_at time"
|
||||
);
|
||||
if old_task.started_at != task.started_at {
|
||||
assert!(
|
||||
reprocessing || old_task.started_at.is_none(),
|
||||
"Cannot update a task's started_at time"
|
||||
);
|
||||
if let Some(started_at) = old_task.started_at {
|
||||
remove_task_datetime(wtxn, self.started_at, started_at, task.uid)?;
|
||||
}
|
||||
if let Some(started_at) = task.started_at {
|
||||
insert_task_datetime(wtxn, self.started_at, started_at, task.uid)?;
|
||||
}
|
||||
}
|
||||
if old_task.finished_at != task.finished_at {
|
||||
assert!(
|
||||
reprocessing || old_task.finished_at.is_none(),
|
||||
"Cannot update a task's finished_at time"
|
||||
);
|
||||
if let Some(finished_at) = old_task.finished_at {
|
||||
remove_task_datetime(wtxn, self.finished_at, finished_at, task.uid)?;
|
||||
}
|
||||
if let Some(finished_at) = task.finished_at {
|
||||
insert_task_datetime(wtxn, self.finished_at, finished_at, task.uid)?;
|
||||
}
|
||||
}
|
||||
|
||||
self.all_tasks.put(wtxn, &task.uid, task)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the whole set of tasks that belongs to this index.
|
||||
pub(crate) fn index_tasks(&self, rtxn: &RoTxn, index: &str) -> Result<RoaringBitmap> {
|
||||
Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn update_index(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
index: &str,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
) -> Result<()> {
|
||||
let mut tasks = self.index_tasks(wtxn, index)?;
|
||||
f(&mut tasks);
|
||||
if tasks.is_empty() {
|
||||
self.index_tasks.delete(wtxn, index)?;
|
||||
} else {
|
||||
self.index_tasks.put(wtxn, index, &tasks)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result<RoaringBitmap> {
|
||||
Ok(self.status.get(rtxn, &status)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn put_status(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
status: Status,
|
||||
bitmap: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
Ok(self.status.put(wtxn, &status, bitmap)?)
|
||||
}
|
||||
|
||||
pub(crate) fn update_status(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
status: Status,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
) -> Result<()> {
|
||||
let mut tasks = self.get_status(wtxn, status)?;
|
||||
f(&mut tasks);
|
||||
self.put_status(wtxn, status, &tasks)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result<RoaringBitmap> {
|
||||
Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub(crate) fn put_kind(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
kind: Kind,
|
||||
bitmap: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
Ok(self.kind.put(wtxn, &kind, bitmap)?)
|
||||
}
|
||||
|
||||
pub(crate) fn update_kind(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
kind: Kind,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
) -> Result<()> {
|
||||
let mut tasks = self.get_kind(wtxn, kind)?;
|
||||
f(&mut tasks);
|
||||
self.put_kind(wtxn, kind, &tasks)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a
|
||||
/// `CorruptedTaskQueue` error will be thrown.
|
||||
pub(crate) fn get_existing_tasks(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
tasks: impl IntoIterator<Item = TaskId>,
|
||||
) -> Result<Vec<Task>> {
|
||||
tasks
|
||||
.into_iter()
|
||||
.map(|task_id| {
|
||||
self.get_task(rtxn, task_id).and_then(|task| task.ok_or(Error::CorruptedTaskQueue))
|
||||
})
|
||||
.collect::<Result<_>>()
|
||||
}
|
||||
|
||||
pub(crate) fn register(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> {
|
||||
self.all_tasks.put(wtxn, &task.uid, task)?;
|
||||
|
||||
for index in task.indexes() {
|
||||
self.update_index(wtxn, index, |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
self.update_status(wtxn, Status::Enqueued, |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
|
||||
self.update_kind(wtxn, task.kind.as_kind(), |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
|
||||
utils::insert_task_datetime(wtxn, self.enqueued_at, task.enqueued_at, task.uid)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Queue {
|
||||
/// Return the task ids matched by the given query from the index scheduler's point of view.
|
||||
pub(crate) fn get_task_ids(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
query: &Query,
|
||||
processing_tasks: &ProcessingTasks,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let ProcessingTasks { batch: processing_batch, processing: processing_tasks, progress: _ } =
|
||||
processing_tasks;
|
||||
let Query {
|
||||
limit,
|
||||
from,
|
||||
reverse,
|
||||
uids,
|
||||
batch_uids,
|
||||
statuses,
|
||||
types,
|
||||
index_uids,
|
||||
canceled_by,
|
||||
before_enqueued_at,
|
||||
after_enqueued_at,
|
||||
before_started_at,
|
||||
after_started_at,
|
||||
before_finished_at,
|
||||
after_finished_at,
|
||||
} = query;
|
||||
|
||||
let mut tasks = self.tasks.all_task_ids(rtxn)?;
|
||||
|
||||
if let Some(from) = from {
|
||||
let range = if reverse.unwrap_or_default() {
|
||||
u32::MIN..*from
|
||||
} else {
|
||||
from.saturating_add(1)..u32::MAX
|
||||
};
|
||||
tasks.remove_range(range);
|
||||
}
|
||||
|
||||
if let Some(batch_uids) = batch_uids {
|
||||
let mut batch_tasks = RoaringBitmap::new();
|
||||
for batch_uid in batch_uids {
|
||||
if processing_batch.as_ref().map_or(false, |batch| batch.uid == *batch_uid) {
|
||||
batch_tasks |= &**processing_tasks;
|
||||
} else {
|
||||
batch_tasks |= self.tasks_in_batch(rtxn, *batch_uid)?;
|
||||
}
|
||||
}
|
||||
tasks &= batch_tasks;
|
||||
}
|
||||
|
||||
if let Some(status) = statuses {
|
||||
let mut status_tasks = RoaringBitmap::new();
|
||||
for status in status {
|
||||
match status {
|
||||
// special case for Processing tasks
|
||||
Status::Processing => {
|
||||
status_tasks |= &**processing_tasks;
|
||||
}
|
||||
status => status_tasks |= &self.tasks.get_status(rtxn, *status)?,
|
||||
};
|
||||
}
|
||||
if !status.contains(&Status::Processing) {
|
||||
tasks -= &**processing_tasks;
|
||||
}
|
||||
tasks &= status_tasks;
|
||||
}
|
||||
|
||||
if let Some(uids) = uids {
|
||||
let uids = RoaringBitmap::from_iter(uids);
|
||||
tasks &= &uids;
|
||||
}
|
||||
|
||||
if let Some(canceled_by) = canceled_by {
|
||||
let mut all_canceled_tasks = RoaringBitmap::new();
|
||||
for cancel_task_uid in canceled_by {
|
||||
if let Some(canceled_by_uid) = self.tasks.canceled_by.get(rtxn, cancel_task_uid)? {
|
||||
all_canceled_tasks |= canceled_by_uid;
|
||||
}
|
||||
}
|
||||
|
||||
// if the canceled_by has been specified but no task
|
||||
// matches then we prefer matching zero than all tasks.
|
||||
if all_canceled_tasks.is_empty() {
|
||||
return Ok(RoaringBitmap::new());
|
||||
} else {
|
||||
tasks &= all_canceled_tasks;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(kind) = types {
|
||||
let mut kind_tasks = RoaringBitmap::new();
|
||||
for kind in kind {
|
||||
kind_tasks |= self.tasks.get_kind(rtxn, *kind)?;
|
||||
}
|
||||
tasks &= &kind_tasks;
|
||||
}
|
||||
|
||||
if let Some(index) = index_uids {
|
||||
let mut index_tasks = RoaringBitmap::new();
|
||||
for index in index {
|
||||
index_tasks |= self.tasks.index_tasks(rtxn, index)?;
|
||||
}
|
||||
tasks &= &index_tasks;
|
||||
}
|
||||
|
||||
// For the started_at filter, we need to treat the part of the tasks that are processing from the part of the
|
||||
// tasks that are not processing. The non-processing ones are filtered normally while the processing ones
|
||||
// are entirely removed unless the in-memory startedAt variable falls within the date filter.
|
||||
// Once we have filtered the two subsets, we put them back together and assign it back to `tasks`.
|
||||
tasks = {
|
||||
let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) =
|
||||
(&tasks - &**processing_tasks, &tasks & &**processing_tasks);
|
||||
|
||||
// special case for Processing tasks
|
||||
// A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds
|
||||
let mut clear_filtered_processing_tasks =
|
||||
|start: Bound<OffsetDateTime>, end: Bound<OffsetDateTime>| {
|
||||
let start = map_bound(start, |b| b.unix_timestamp_nanos());
|
||||
let end = map_bound(end, |b| b.unix_timestamp_nanos());
|
||||
let is_within_dates = RangeBounds::contains(
|
||||
&(start, end),
|
||||
&processing_batch
|
||||
.as_ref()
|
||||
.map_or_else(OffsetDateTime::now_utc, |batch| batch.started_at)
|
||||
.unix_timestamp_nanos(),
|
||||
);
|
||||
if !is_within_dates {
|
||||
filtered_processing_tasks.clear();
|
||||
}
|
||||
};
|
||||
match (after_started_at, before_started_at) {
|
||||
(None, None) => (),
|
||||
(None, Some(before)) => {
|
||||
clear_filtered_processing_tasks(Bound::Unbounded, Bound::Excluded(*before))
|
||||
}
|
||||
(Some(after), None) => {
|
||||
clear_filtered_processing_tasks(Bound::Excluded(*after), Bound::Unbounded)
|
||||
}
|
||||
(Some(after), Some(before)) => clear_filtered_processing_tasks(
|
||||
Bound::Excluded(*after),
|
||||
Bound::Excluded(*before),
|
||||
),
|
||||
};
|
||||
|
||||
keep_ids_within_datetimes(
|
||||
rtxn,
|
||||
&mut filtered_non_processing_tasks,
|
||||
self.tasks.started_at,
|
||||
*after_started_at,
|
||||
*before_started_at,
|
||||
)?;
|
||||
filtered_non_processing_tasks | filtered_processing_tasks
|
||||
};
|
||||
|
||||
keep_ids_within_datetimes(
|
||||
rtxn,
|
||||
&mut tasks,
|
||||
self.tasks.enqueued_at,
|
||||
*after_enqueued_at,
|
||||
*before_enqueued_at,
|
||||
)?;
|
||||
|
||||
keep_ids_within_datetimes(
|
||||
rtxn,
|
||||
&mut tasks,
|
||||
self.tasks.finished_at,
|
||||
*after_finished_at,
|
||||
*before_finished_at,
|
||||
)?;
|
||||
|
||||
if let Some(limit) = limit {
|
||||
tasks = if query.reverse.unwrap_or_default() {
|
||||
tasks.into_iter().take(*limit as usize).collect()
|
||||
} else {
|
||||
tasks.into_iter().rev().take(*limit as usize).collect()
|
||||
};
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
|
||||
pub(crate) fn get_task_ids_from_authorized_indexes(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
query: &Query,
|
||||
filters: &meilisearch_auth::AuthFilter,
|
||||
processing_tasks: &ProcessingTasks,
|
||||
) -> Result<(RoaringBitmap, u64)> {
|
||||
// compute all tasks matching the filter by ignoring the limits, to find the number of tasks matching
|
||||
// the filter.
|
||||
// As this causes us to compute the filter twice it is slightly inefficient, but doing it this way spares
|
||||
// us from modifying the underlying implementation, and the performance remains sufficient.
|
||||
// Should this change, we would modify `get_task_ids` to directly return the number of matching tasks.
|
||||
let total_tasks =
|
||||
self.get_task_ids(rtxn, &query.clone().without_limits(), processing_tasks)?;
|
||||
let mut tasks = self.get_task_ids(rtxn, query, processing_tasks)?;
|
||||
|
||||
// If the query contains a list of index uid or there is a finite list of authorized indexes,
|
||||
// then we must exclude all the kinds that aren't associated to one and only one index.
|
||||
if query.index_uids.is_some() || !filters.all_indexes_authorized() {
|
||||
for kind in enum_iterator::all::<Kind>().filter(|kind| !kind.related_to_one_index()) {
|
||||
tasks -= self.tasks.get_kind(rtxn, kind)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Any task that is internally associated with a non-authorized index
|
||||
// must be discarded.
|
||||
if !filters.all_indexes_authorized() {
|
||||
let all_indexes_iter = self.tasks.index_tasks.iter(rtxn)?;
|
||||
for result in all_indexes_iter {
|
||||
let (index, index_tasks) = result?;
|
||||
if !filters.is_index_authorized(index) {
|
||||
tasks -= index_tasks;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok((tasks, total_tasks.len()))
|
||||
}
|
||||
|
||||
pub(crate) fn get_tasks_from_authorized_indexes(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
query: &Query,
|
||||
filters: &meilisearch_auth::AuthFilter,
|
||||
processing_tasks: &ProcessingTasks,
|
||||
) -> Result<(Vec<Task>, u64)> {
|
||||
let (tasks, total) =
|
||||
self.get_task_ids_from_authorized_indexes(rtxn, query, filters, processing_tasks)?;
|
||||
let tasks = if query.reverse.unwrap_or_default() {
|
||||
Box::new(tasks.into_iter()) as Box<dyn Iterator<Item = u32>>
|
||||
} else {
|
||||
Box::new(tasks.into_iter().rev()) as Box<dyn Iterator<Item = u32>>
|
||||
};
|
||||
let tasks = self
|
||||
.tasks
|
||||
.get_existing_tasks(rtxn, tasks.take(query.limit.unwrap_or(u32::MAX) as usize))?;
|
||||
|
||||
let ProcessingTasks { batch, processing, progress: _ } = processing_tasks;
|
||||
|
||||
let ret = tasks.into_iter();
|
||||
if processing.is_empty() || batch.is_none() {
|
||||
Ok((ret.collect(), total))
|
||||
} else {
|
||||
// Safe because we ensured there was a batch in the previous branch
|
||||
let batch = batch.as_ref().unwrap();
|
||||
Ok((
|
||||
ret.map(|task| {
|
||||
if processing.contains(task.uid) {
|
||||
Task {
|
||||
status: Status::Processing,
|
||||
batch_uid: Some(batch.uid),
|
||||
started_at: Some(batch.started_at),
|
||||
..task
|
||||
}
|
||||
} else {
|
||||
task
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
total,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
441
crates/index-scheduler/src/queue/tasks_test.rs
Normal file
441
crates/index-scheduler/src/queue/tasks_test.rs
Normal file
@@ -0,0 +1,441 @@
|
||||
use meili_snap::snapshot;
|
||||
use meilisearch_auth::AuthFilter;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::tasks::{IndexSwap, KindWithContent, Status};
|
||||
use time::{Duration, OffsetDateTime};
|
||||
|
||||
use crate::insta_snapshot::{snapshot_bitmap, snapshot_index_scheduler};
|
||||
use crate::test_utils::Breakpoint::*;
|
||||
use crate::test_utils::{index_creation_task, FailureLocation};
|
||||
use crate::{IndexScheduler, Query};
|
||||
|
||||
#[test]
|
||||
fn query_tasks_from_and_limit() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let kind = index_creation_task("doggo", "bone");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
|
||||
let kind = index_creation_task("whalo", "plankton");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
|
||||
let kind = index_creation_task("catto", "his_own_vomit");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task");
|
||||
|
||||
handle.advance_n_successful_batches(3);
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_all_tasks");
|
||||
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let processing = index_scheduler.processing_tasks.read().unwrap();
|
||||
let query = Query { limit: Some(0), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||
|
||||
let query = Query { limit: Some(1), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||
|
||||
let query = Query { limit: Some(2), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]");
|
||||
|
||||
let query = Query { from: Some(1), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
|
||||
|
||||
let query = Query { from: Some(2), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]");
|
||||
|
||||
let query = Query { from: Some(1), limit: Some(1), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[1,]");
|
||||
|
||||
let query = Query { from: Some(1), limit: Some(2), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_tasks_simple() {
|
||||
let start_time = OffsetDateTime::now_utc();
|
||||
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("whalo", "fish");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
|
||||
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
|
||||
let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,]"); // only the processing tasks in the first tick
|
||||
|
||||
let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); // only the enqueued tasks in the first tick
|
||||
|
||||
let query =
|
||||
Query { statuses: Some(vec![Status::Enqueued, Status::Processing]), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); // both enqueued and processing tasks in the first tick
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||
after_started_at: Some(start_time),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the test, which should excludes the enqueued tasks
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||
before_started_at: Some(start_time),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes before the start of the test, which should excludes all of them
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Processing]),
|
||||
after_started_at: Some(start_time),
|
||||
before_started_at: Some(start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the test and before one minute after the start of the test,
|
||||
// which should exclude the enqueued tasks and include the only processing task
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,]");
|
||||
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
|
||||
let second_start_time = OffsetDateTime::now_utc();
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Succeeded, Status::Processing]),
|
||||
after_started_at: Some(start_time),
|
||||
before_started_at: Some(start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the test and before one minute after the start of the test,
|
||||
// which should include all tasks
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Succeeded, Status::Processing]),
|
||||
before_started_at: Some(start_time),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes before the start of the test, which should exclude all tasks
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
|
||||
// that comes after the start of the second part of the test and before one minute after the
|
||||
// second start of the test, which should exclude all tasks
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||
|
||||
// now we make one more batch, the started_at field of the new tasks will be past `second_start_time`
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// we run the same query to verify that, and indeed find that the last task is matched
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// enqueued, succeeded, or processing tasks started after the second part of the test, should
|
||||
// again only return the last task
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||
|
||||
handle.advance_till([ProcessBatchFailed, AfterProcessing]);
|
||||
|
||||
// now the last task should have failed
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end");
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// so running the last query should return nothing
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// but the same query on failed tasks should return the last task
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// but the same query on failed tasks should return the last task
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
uids: Some(vec![1]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// same query but with an invalid uid
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||
|
||||
let query = Query {
|
||||
statuses: Some(vec![Status::Failed]),
|
||||
uids: Some(vec![2]),
|
||||
after_started_at: Some(second_start_time),
|
||||
before_started_at: Some(second_start_time + Duration::minutes(1)),
|
||||
..Default::default()
|
||||
};
|
||||
let (tasks, _) = index_scheduler
|
||||
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
|
||||
.unwrap();
|
||||
// same query but with a valid uid
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_tasks_special_rules() {
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }],
|
||||
};
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }],
|
||||
};
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
|
||||
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
|
||||
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
// only the first task associated with catto is returned, the indexSwap tasks are excluded!
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,]");
|
||||
|
||||
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks
|
||||
// associated with doggo -> empty result
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[]");
|
||||
|
||||
let query = Query::default();
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// we asked for all the tasks, but we are only authorized to retrieve the doggo tasks
|
||||
// -> only the index creation of doggo should be returned
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[1,]");
|
||||
|
||||
let query = Query::default();
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![
|
||||
IndexUidPattern::new_unchecked("catto"),
|
||||
IndexUidPattern::new_unchecked("doggo"),
|
||||
]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks
|
||||
// -> all tasks except the swap of catto with whalo are returned
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
|
||||
|
||||
let query = Query::default();
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
// we asked for all the tasks with all index authorized -> all tasks returned
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,3,]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_tasks_canceled_by() {
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _ = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _ = index_scheduler.register(kind, None, false).unwrap();
|
||||
let kind = KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }],
|
||||
};
|
||||
let _task = index_scheduler.register(kind, None, false).unwrap();
|
||||
|
||||
handle.advance_n_successful_batches(1);
|
||||
let kind = KindWithContent::TaskCancelation {
|
||||
query: "test_query".to_string(),
|
||||
tasks: [0, 1, 2, 3].into_iter().collect(),
|
||||
};
|
||||
let task_cancelation = index_scheduler.register(kind, None, false).unwrap();
|
||||
handle.advance_n_successful_batches(1);
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
|
||||
|
||||
let rtxn = index_scheduler.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
|
||||
.unwrap();
|
||||
// 0 is not returned because it was not canceled, 3 is not returned because it is the uid of the
|
||||
// taskCancelation itself
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]");
|
||||
|
||||
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
|
||||
let (tasks, _) = index_scheduler
|
||||
.queue
|
||||
.get_task_ids_from_authorized_indexes(
|
||||
&rtxn,
|
||||
&query,
|
||||
&AuthFilter::with_allowed_indexes(
|
||||
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
|
||||
),
|
||||
&proc,
|
||||
)
|
||||
.unwrap();
|
||||
// Return only 1 because the user is not authorized to see task 2
|
||||
snapshot!(snapshot_bitmap(&tasks), @"[1,]");
|
||||
}
|
||||
398
crates/index-scheduler/src/queue/test.rs
Normal file
398
crates/index-scheduler/src/queue/test.rs
Normal file
@@ -0,0 +1,398 @@
|
||||
use big_s::S;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch_types::error::ErrorCode;
|
||||
use meilisearch_types::tasks::{KindWithContent, Status};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::insta_snapshot::snapshot_index_scheduler;
|
||||
use crate::test_utils::Breakpoint::*;
|
||||
use crate::test_utils::{index_creation_task, replace_document_import_task};
|
||||
use crate::{IndexScheduler, Query};
|
||||
|
||||
#[test]
|
||||
fn register() {
|
||||
// In this test, the handle doesn't make any progress, we only check that the tasks are registered
|
||||
let (index_scheduler, mut _handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let kinds = [
|
||||
index_creation_task("catto", "mouse"),
|
||||
replace_document_import_task("catto", None, 0, 12),
|
||||
replace_document_import_task("catto", None, 1, 50),
|
||||
replace_document_import_task("doggo", Some("bone"), 2, 5000),
|
||||
];
|
||||
let (_, file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap();
|
||||
file.persist().unwrap();
|
||||
let (_, file) = index_scheduler.queue.create_update_file_with_uuid(1).unwrap();
|
||||
file.persist().unwrap();
|
||||
let (_, file) = index_scheduler.queue.create_update_file_with_uuid(2).unwrap();
|
||||
file.persist().unwrap();
|
||||
|
||||
for (idx, kind) in kinds.into_iter().enumerate() {
|
||||
let k = kind.as_kind();
|
||||
let task = index_scheduler.register(kind, None, false).unwrap();
|
||||
index_scheduler.assert_internally_consistent();
|
||||
|
||||
assert_eq!(task.uid, idx as u32);
|
||||
assert_eq!(task.status, Status::Enqueued);
|
||||
assert_eq!(task.kind.as_kind(), k);
|
||||
}
|
||||
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "everything_is_successfully_registered");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dry_run() {
|
||||
let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None };
|
||||
let task = index_scheduler.register(kind, None, true).unwrap();
|
||||
snapshot!(task.uid, @"0");
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), @r"
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Status:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Kind:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Enqueued At:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
");
|
||||
|
||||
let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None };
|
||||
let task = index_scheduler.register(kind, Some(12), true).unwrap();
|
||||
snapshot!(task.uid, @"12");
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), @r"
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Status:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Kind:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Enqueued At:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_set_taskid() {
|
||||
let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None };
|
||||
let task = index_scheduler.register(kind, None, false).unwrap();
|
||||
snapshot!(task.uid, @"0");
|
||||
|
||||
let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None };
|
||||
let task = index_scheduler.register(kind, Some(12), false).unwrap();
|
||||
snapshot!(task.uid, @"12");
|
||||
|
||||
let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None };
|
||||
let error = index_scheduler.register(kind, Some(5), false).unwrap_err();
|
||||
snapshot!(error, @"Received bad task id: 5 should be >= to 13.");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_disable_auto_deletion_of_tasks() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| {
|
||||
config.cleanup_enabled = false;
|
||||
config.max_number_of_tasks = 2;
|
||||
None
|
||||
});
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_failed_batch();
|
||||
|
||||
// at this point the max number of tasks is reached
|
||||
// we can still enqueue multiple tasks
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
|
||||
// now we're above the max number of tasks
|
||||
// and if we try to advance in the tick function no new task deletion should be enqueued
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_auto_deletion_of_tasks() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| {
|
||||
config.max_number_of_tasks = 2;
|
||||
None
|
||||
});
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_failed_batch();
|
||||
|
||||
// at this point the max number of tasks is reached
|
||||
// we can still enqueue multiple tasks
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
|
||||
// now we're above the max number of tasks
|
||||
// and if we try to advance in the tick function a new task deletion should be enqueued
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
|
||||
handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]);
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
|
||||
handle.advance_one_failed_batch();
|
||||
// a new task deletion has been enqueued
|
||||
handle.advance_one_successful_batch();
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
|
||||
handle.advance_one_failed_batch();
|
||||
handle.advance_one_successful_batch();
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let proc = index_scheduler.processing_tasks.read().unwrap();
|
||||
let tasks =
|
||||
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
|
||||
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed");
|
||||
drop(rtxn);
|
||||
drop(proc);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_task_queue_is_full() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| {
|
||||
// that's the minimum map size possible
|
||||
config.task_db_size = 1048576 * 3;
|
||||
None
|
||||
});
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
// on average this task takes ~600 bytes
|
||||
loop {
|
||||
let result = index_scheduler.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
);
|
||||
if result.is_err() {
|
||||
break;
|
||||
}
|
||||
handle.advance_one_failed_batch();
|
||||
}
|
||||
index_scheduler.assert_internally_consistent();
|
||||
|
||||
// at this point the task DB shoud have reached its limit and we should not be able to register new tasks
|
||||
let result = index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap_err();
|
||||
snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
|
||||
// we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code
|
||||
snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice");
|
||||
|
||||
// Even the task deletion that doesn't delete anything shouldn't be accepted
|
||||
let result = index_scheduler
|
||||
.register(
|
||||
KindWithContent::TaskDeletion { query: S("test"), tasks: RoaringBitmap::new() },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap_err();
|
||||
snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
|
||||
// we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code
|
||||
snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice");
|
||||
|
||||
// But a task deletion that delete something should works
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
// Now we should be able to enqueue a few tasks again
|
||||
index_scheduler
|
||||
.register(
|
||||
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
handle.advance_one_failed_batch();
|
||||
}
|
||||
474
crates/index-scheduler/src/scheduler/autobatcher.rs
Normal file
474
crates/index-scheduler/src/scheduler/autobatcher.rs
Normal file
@@ -0,0 +1,474 @@
|
||||
/*!
|
||||
The autobatcher is responsible for combining the next enqueued
|
||||
tasks affecting a single index into a [batch](crate::batch::Batch).
|
||||
|
||||
The main function of the autobatcher is [`next_autobatch`].
|
||||
*/
|
||||
|
||||
use meilisearch_types::tasks::TaskId;
|
||||
use std::ops::ControlFlow::{self, Break, Continue};
|
||||
|
||||
use crate::KindWithContent;
|
||||
|
||||
/// Succinctly describes a task's [`Kind`](meilisearch_types::tasks::Kind)
|
||||
/// for the purpose of simplifying the implementation of the autobatcher.
|
||||
///
|
||||
/// Only the non-prioritised tasks that can be grouped in a batch have a corresponding [`AutobatchKind`]
|
||||
enum AutobatchKind {
|
||||
DocumentImport { allow_index_creation: bool, primary_key: Option<String> },
|
||||
DocumentEdition,
|
||||
DocumentDeletion { by_filter: bool },
|
||||
DocumentClear,
|
||||
Settings { allow_index_creation: bool },
|
||||
IndexCreation,
|
||||
IndexDeletion,
|
||||
IndexUpdate,
|
||||
IndexSwap,
|
||||
}
|
||||
|
||||
impl AutobatchKind {
|
||||
#[rustfmt::skip]
|
||||
fn allow_index_creation(&self) -> Option<bool> {
|
||||
match self {
|
||||
AutobatchKind::DocumentImport { allow_index_creation, .. }
|
||||
| AutobatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn primary_key(&self) -> Option<Option<&str>> {
|
||||
match self {
|
||||
AutobatchKind::DocumentImport { primary_key, .. } => Some(primary_key.as_deref()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<KindWithContent> for AutobatchKind {
|
||||
fn from(kind: KindWithContent) -> Self {
|
||||
match kind {
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
allow_index_creation, primary_key, ..
|
||||
} => AutobatchKind::DocumentImport { allow_index_creation, primary_key },
|
||||
KindWithContent::DocumentEdition { .. } => AutobatchKind::DocumentEdition,
|
||||
KindWithContent::DocumentDeletion { .. } => {
|
||||
AutobatchKind::DocumentDeletion { by_filter: false }
|
||||
}
|
||||
KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear,
|
||||
KindWithContent::DocumentDeletionByFilter { .. } => {
|
||||
AutobatchKind::DocumentDeletion { by_filter: true }
|
||||
}
|
||||
KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => {
|
||||
AutobatchKind::Settings {
|
||||
allow_index_creation: allow_index_creation && !is_deletion,
|
||||
}
|
||||
}
|
||||
KindWithContent::IndexDeletion { .. } => AutobatchKind::IndexDeletion,
|
||||
KindWithContent::IndexCreation { .. } => AutobatchKind::IndexCreation,
|
||||
KindWithContent::IndexUpdate { .. } => AutobatchKind::IndexUpdate,
|
||||
KindWithContent::IndexSwap { .. } => AutobatchKind::IndexSwap,
|
||||
KindWithContent::TaskCancelation { .. }
|
||||
| KindWithContent::TaskDeletion { .. }
|
||||
| KindWithContent::DumpCreation { .. }
|
||||
| KindWithContent::UpgradeDatabase { .. }
|
||||
| KindWithContent::SnapshotCreation => {
|
||||
panic!("The autobatcher should never be called with tasks that don't apply to an index.")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum BatchKind {
|
||||
DocumentClear {
|
||||
ids: Vec<TaskId>,
|
||||
},
|
||||
DocumentOperation {
|
||||
allow_index_creation: bool,
|
||||
primary_key: Option<String>,
|
||||
operation_ids: Vec<TaskId>,
|
||||
},
|
||||
DocumentEdition {
|
||||
id: TaskId,
|
||||
},
|
||||
DocumentDeletion {
|
||||
deletion_ids: Vec<TaskId>,
|
||||
includes_by_filter: bool,
|
||||
},
|
||||
ClearAndSettings {
|
||||
other: Vec<TaskId>,
|
||||
allow_index_creation: bool,
|
||||
settings_ids: Vec<TaskId>,
|
||||
},
|
||||
Settings {
|
||||
allow_index_creation: bool,
|
||||
settings_ids: Vec<TaskId>,
|
||||
},
|
||||
IndexDeletion {
|
||||
ids: Vec<TaskId>,
|
||||
},
|
||||
IndexCreation {
|
||||
id: TaskId,
|
||||
},
|
||||
IndexUpdate {
|
||||
id: TaskId,
|
||||
},
|
||||
IndexSwap {
|
||||
id: TaskId,
|
||||
},
|
||||
}
|
||||
|
||||
impl BatchKind {
|
||||
#[rustfmt::skip]
|
||||
fn allow_index_creation(&self) -> Option<bool> {
|
||||
match self {
|
||||
BatchKind::DocumentOperation { allow_index_creation, .. }
|
||||
| BatchKind::ClearAndSettings { allow_index_creation, .. }
|
||||
| BatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn primary_key(&self) -> Option<Option<&str>> {
|
||||
match self {
|
||||
BatchKind::DocumentOperation { primary_key, .. } => Some(primary_key.as_deref()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BatchKind {
|
||||
/// Returns a `ControlFlow::Break` if you must stop right now.
|
||||
/// The boolean tell you if an index has been created by the batched task.
|
||||
/// To ease the writing of the code. `true` can be returned when you don't need to create an index
|
||||
/// but false can't be returned if you needs to create an index.
|
||||
// TODO use an AutoBatchKind as input
|
||||
pub fn new(
|
||||
task_id: TaskId,
|
||||
kind: KindWithContent,
|
||||
primary_key: Option<&str>,
|
||||
) -> (ControlFlow<BatchKind, BatchKind>, bool) {
|
||||
use AutobatchKind as K;
|
||||
|
||||
match AutobatchKind::from(kind) {
|
||||
K::IndexCreation => (Break(BatchKind::IndexCreation { id: task_id }), true),
|
||||
K::IndexDeletion => (Break(BatchKind::IndexDeletion { ids: vec![task_id] }), false),
|
||||
K::IndexUpdate => (Break(BatchKind::IndexUpdate { id: task_id }), false),
|
||||
K::IndexSwap => (Break(BatchKind::IndexSwap { id: task_id }), false),
|
||||
K::DocumentClear => (Continue(BatchKind::DocumentClear { ids: vec![task_id] }), false),
|
||||
K::DocumentImport { allow_index_creation, primary_key: pk }
|
||||
if primary_key.is_none() || pk.is_none() || primary_key == pk.as_deref() =>
|
||||
{
|
||||
(
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
allow_index_creation,
|
||||
primary_key: pk,
|
||||
operation_ids: vec![task_id],
|
||||
}),
|
||||
allow_index_creation,
|
||||
)
|
||||
}
|
||||
// if the primary key set in the task was different than ours we should stop and make this batch fail asap.
|
||||
K::DocumentImport { allow_index_creation, primary_key } => (
|
||||
Break(BatchKind::DocumentOperation {
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids: vec![task_id],
|
||||
}),
|
||||
allow_index_creation,
|
||||
),
|
||||
K::DocumentEdition => (Break(BatchKind::DocumentEdition { id: task_id }), false),
|
||||
K::DocumentDeletion { by_filter: includes_by_filter } => (
|
||||
Continue(BatchKind::DocumentDeletion {
|
||||
deletion_ids: vec![task_id],
|
||||
includes_by_filter,
|
||||
}),
|
||||
false,
|
||||
),
|
||||
K::Settings { allow_index_creation } => (
|
||||
Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }),
|
||||
allow_index_creation,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a `ControlFlow::Break` if you must stop right now.
|
||||
/// The boolean tell you if an index has been created by the batched task.
|
||||
/// To ease the writing of the code. `true` can be returned when you don't need to create an index
|
||||
/// but false can't be returned if you needs to create an index.
|
||||
#[rustfmt::skip]
|
||||
fn accumulate(self, id: TaskId, kind: AutobatchKind, index_already_exists: bool, primary_key: Option<&str>) -> ControlFlow<BatchKind, BatchKind> {
|
||||
use AutobatchKind as K;
|
||||
|
||||
match (self, kind) {
|
||||
// We don't batch any of these operations
|
||||
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => Break(this),
|
||||
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
|
||||
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
|
||||
Break(this)
|
||||
},
|
||||
// NOTE: We need to negate the whole condition since we're checking if we need to break instead of continue.
|
||||
// I wrote it this way because it's easier to understand than the other way around.
|
||||
(this, kind) if !(
|
||||
// 1. If both task don't interact with primary key -> we can continue
|
||||
(this.primary_key().is_none() && kind.primary_key().is_none()) ||
|
||||
// 2. Else ->
|
||||
(
|
||||
// 2.1 If we already have a primary-key ->
|
||||
(
|
||||
primary_key.is_some() &&
|
||||
// 2.1.1 If the task we're trying to accumulate have a pk it must be equal to our primary key
|
||||
// 2.1.2 If the task don't have a primary-key -> we can continue
|
||||
kind.primary_key().map_or(true, |pk| pk == primary_key)
|
||||
) ||
|
||||
// 2.2 If we don't have a primary-key ->
|
||||
(
|
||||
// 2.2.1 If both the batch and the task have a primary key they should be equal
|
||||
// 2.2.2 If the batch is set to Some(None), the task should be too
|
||||
// 2.2.3 If the batch is set to None -> we can continue
|
||||
this.primary_key().zip(kind.primary_key()).map_or(true, |(this, kind)| this == kind)
|
||||
)
|
||||
)
|
||||
|
||||
) // closing the negation
|
||||
|
||||
=> {
|
||||
Break(this)
|
||||
},
|
||||
// The index deletion can batch with everything but must stop after
|
||||
(
|
||||
BatchKind::DocumentClear { mut ids }
|
||||
| BatchKind::DocumentDeletion { deletion_ids: mut ids, includes_by_filter: _ }
|
||||
| BatchKind::DocumentOperation { allow_index_creation: _, primary_key: _, operation_ids: mut ids }
|
||||
| BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids },
|
||||
K::IndexDeletion,
|
||||
) => {
|
||||
ids.push(id);
|
||||
Break(BatchKind::IndexDeletion { ids })
|
||||
}
|
||||
(
|
||||
BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other },
|
||||
K::IndexDeletion,
|
||||
) => {
|
||||
ids.push(id);
|
||||
ids.append(&mut other);
|
||||
Break(BatchKind::IndexDeletion { ids })
|
||||
}
|
||||
|
||||
(
|
||||
BatchKind::DocumentClear { mut ids },
|
||||
K::DocumentClear | K::DocumentDeletion { by_filter: _ },
|
||||
) => {
|
||||
ids.push(id);
|
||||
Continue(BatchKind::DocumentClear { ids })
|
||||
}
|
||||
(
|
||||
this @ BatchKind::DocumentClear { .. },
|
||||
K::DocumentImport { .. } | K::Settings { .. },
|
||||
) => Break(this),
|
||||
(
|
||||
BatchKind::DocumentOperation { allow_index_creation: _, primary_key: _, mut operation_ids },
|
||||
K::DocumentClear,
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
Continue(BatchKind::DocumentClear { ids: operation_ids })
|
||||
}
|
||||
|
||||
// we can autobatch different kind of document operations and mix replacements with updates
|
||||
(
|
||||
BatchKind::DocumentOperation { allow_index_creation, primary_key: _, mut operation_ids },
|
||||
K::DocumentImport { primary_key: pk, .. },
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
allow_index_creation,
|
||||
operation_ids,
|
||||
primary_key: pk,
|
||||
})
|
||||
}
|
||||
(
|
||||
BatchKind::DocumentOperation { allow_index_creation, primary_key, mut operation_ids },
|
||||
K::DocumentDeletion { by_filter: false },
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids,
|
||||
})
|
||||
}
|
||||
// We can't batch a document operation with a delete by filter
|
||||
(
|
||||
this @ BatchKind::DocumentOperation { .. },
|
||||
K::DocumentDeletion { by_filter: true },
|
||||
) => {
|
||||
Break(this)
|
||||
}
|
||||
(
|
||||
this @ BatchKind::DocumentOperation { .. },
|
||||
K::Settings { .. },
|
||||
) => Break(this),
|
||||
|
||||
(BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: _ }, K::DocumentClear) => {
|
||||
deletion_ids.push(id);
|
||||
Continue(BatchKind::DocumentClear { ids: deletion_ids })
|
||||
}
|
||||
// we can't autobatch the deletion and import if the document deletion contained a filter
|
||||
(
|
||||
this @ BatchKind::DocumentDeletion { deletion_ids: _, includes_by_filter: true },
|
||||
K::DocumentImport { .. }
|
||||
) => Break(this),
|
||||
// we can autobatch the deletion and import if the index already exists
|
||||
(
|
||||
BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false },
|
||||
K::DocumentImport { allow_index_creation, primary_key }
|
||||
) if index_already_exists => {
|
||||
deletion_ids.push(id);
|
||||
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids: deletion_ids,
|
||||
})
|
||||
}
|
||||
// we can autobatch the deletion and import if both can't create an index
|
||||
(
|
||||
BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false },
|
||||
K::DocumentImport { allow_index_creation, primary_key }
|
||||
) if !allow_index_creation => {
|
||||
deletion_ids.push(id);
|
||||
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids: deletion_ids,
|
||||
})
|
||||
}
|
||||
// we can't autobatch a deletion and an import if the index does not exists but would be created by an addition
|
||||
(
|
||||
this @ BatchKind::DocumentDeletion { .. },
|
||||
K::DocumentImport { .. }
|
||||
) => {
|
||||
Break(this)
|
||||
}
|
||||
(BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter }, K::DocumentDeletion { by_filter }) => {
|
||||
deletion_ids.push(id);
|
||||
Continue(BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: includes_by_filter | by_filter })
|
||||
}
|
||||
(this @ BatchKind::DocumentDeletion { .. }, K::Settings { .. }) => Break(this),
|
||||
|
||||
(
|
||||
BatchKind::Settings { settings_ids, allow_index_creation },
|
||||
K::DocumentClear,
|
||||
) => Continue(BatchKind::ClearAndSettings {
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
other: vec![id],
|
||||
}),
|
||||
(
|
||||
this @ BatchKind::Settings { .. },
|
||||
K::DocumentImport { .. } | K::DocumentDeletion { .. },
|
||||
) => Break(this),
|
||||
(
|
||||
BatchKind::Settings { mut settings_ids, allow_index_creation },
|
||||
K::Settings { .. },
|
||||
) => {
|
||||
settings_ids.push(id);
|
||||
Continue(BatchKind::Settings {
|
||||
allow_index_creation,
|
||||
settings_ids,
|
||||
})
|
||||
}
|
||||
|
||||
(
|
||||
BatchKind::ClearAndSettings { mut other, settings_ids, allow_index_creation },
|
||||
K::DocumentClear,
|
||||
) => {
|
||||
other.push(id);
|
||||
Continue(BatchKind::ClearAndSettings {
|
||||
other,
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
})
|
||||
}
|
||||
(this @ BatchKind::ClearAndSettings { .. }, K::DocumentImport { .. }) => Break(this),
|
||||
(
|
||||
BatchKind::ClearAndSettings {
|
||||
mut other,
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
},
|
||||
K::DocumentDeletion { .. },
|
||||
) => {
|
||||
other.push(id);
|
||||
Continue(BatchKind::ClearAndSettings {
|
||||
other,
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
})
|
||||
}
|
||||
(
|
||||
BatchKind::ClearAndSettings { mut settings_ids, other, allow_index_creation },
|
||||
K::Settings { .. },
|
||||
) => {
|
||||
settings_ids.push(id);
|
||||
Continue(BatchKind::ClearAndSettings {
|
||||
other,
|
||||
settings_ids,
|
||||
allow_index_creation,
|
||||
})
|
||||
}
|
||||
|
||||
(
|
||||
BatchKind::IndexCreation { .. }
|
||||
| BatchKind::IndexDeletion { .. }
|
||||
| BatchKind::IndexUpdate { .. }
|
||||
| BatchKind::IndexSwap { .. }
|
||||
| BatchKind::DocumentEdition { .. },
|
||||
_,
|
||||
) => {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a batch from an ordered list of tasks.
|
||||
///
|
||||
/// ## Preconditions
|
||||
/// 1. The tasks must be enqueued and given in the order in which they were enqueued
|
||||
/// 2. The tasks must not be prioritised tasks (e.g. task cancellation, dump, snapshot, task deletion)
|
||||
/// 3. The tasks must all be related to the same index
|
||||
///
|
||||
/// ## Return
|
||||
/// `None` if the list of tasks is empty. Otherwise, an [`AutoBatch`] that represents
|
||||
/// a subset of the given tasks.
|
||||
pub fn autobatch(
|
||||
enqueued: Vec<(TaskId, KindWithContent)>,
|
||||
index_already_exists: bool,
|
||||
primary_key: Option<&str>,
|
||||
) -> Option<(BatchKind, bool)> {
|
||||
let mut enqueued = enqueued.into_iter();
|
||||
let (id, kind) = enqueued.next()?;
|
||||
|
||||
// index_exist will keep track of if the index should exist at this point after the tasks we batched.
|
||||
let mut index_exist = index_already_exists;
|
||||
|
||||
let (mut acc, must_create_index) = match BatchKind::new(id, kind, primary_key) {
|
||||
(Continue(acc), create) => (acc, create),
|
||||
(Break(acc), create) => return Some((acc, create)),
|
||||
};
|
||||
|
||||
// if an index has been created in the previous step we can consider it as existing.
|
||||
index_exist |= must_create_index;
|
||||
|
||||
for (id, kind) in enqueued {
|
||||
acc = match acc.accumulate(id, kind.into(), index_exist, primary_key) {
|
||||
Continue(acc) => acc,
|
||||
Break(acc) => return Some((acc, must_create_index)),
|
||||
};
|
||||
}
|
||||
|
||||
Some((acc, must_create_index))
|
||||
}
|
||||
395
crates/index-scheduler/src/scheduler/autobatcher_test.rs
Normal file
395
crates/index-scheduler/src/scheduler/autobatcher_test.rs
Normal file
@@ -0,0 +1,395 @@
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod::{
|
||||
self, ReplaceDocuments, UpdateDocuments,
|
||||
};
|
||||
use meilisearch_types::tasks::{IndexSwap, KindWithContent};
|
||||
use uuid::Uuid;
|
||||
|
||||
use self::autobatcher::{autobatch, BatchKind};
|
||||
use super::*;
|
||||
use crate::TaskId;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! debug_snapshot {
|
||||
($value:expr, @$snapshot:literal) => {{
|
||||
let value = format!("{:?}", $value);
|
||||
meili_snap::snapshot!(value, @$snapshot);
|
||||
}};
|
||||
}
|
||||
|
||||
fn autobatch_from(
|
||||
index_already_exists: bool,
|
||||
primary_key: Option<&str>,
|
||||
input: impl IntoIterator<Item = KindWithContent>,
|
||||
) -> Option<(BatchKind, bool)> {
|
||||
autobatch(
|
||||
input.into_iter().enumerate().map(|(id, kind)| (id as TaskId, kind)).collect(),
|
||||
index_already_exists,
|
||||
primary_key,
|
||||
)
|
||||
}
|
||||
|
||||
fn doc_imp(
|
||||
method: IndexDocumentsMethod,
|
||||
allow_index_creation: bool,
|
||||
primary_key: Option<&str>,
|
||||
) -> KindWithContent {
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: String::from("doggo"),
|
||||
primary_key: primary_key.map(|pk| pk.to_string()),
|
||||
method,
|
||||
content_file: Uuid::new_v4(),
|
||||
documents_count: 0,
|
||||
allow_index_creation,
|
||||
}
|
||||
}
|
||||
|
||||
fn doc_del() -> KindWithContent {
|
||||
KindWithContent::DocumentDeletion {
|
||||
index_uid: String::from("doggo"),
|
||||
documents_ids: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn doc_del_fil() -> KindWithContent {
|
||||
KindWithContent::DocumentDeletionByFilter {
|
||||
index_uid: String::from("doggo"),
|
||||
filter_expr: serde_json::json!("cuteness > 100"),
|
||||
}
|
||||
}
|
||||
|
||||
fn doc_clr() -> KindWithContent {
|
||||
KindWithContent::DocumentClear { index_uid: String::from("doggo") }
|
||||
}
|
||||
|
||||
fn settings(allow_index_creation: bool) -> KindWithContent {
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: String::from("doggo"),
|
||||
new_settings: Default::default(),
|
||||
is_deletion: false,
|
||||
allow_index_creation,
|
||||
}
|
||||
}
|
||||
|
||||
fn idx_create() -> KindWithContent {
|
||||
KindWithContent::IndexCreation { index_uid: String::from("doggo"), primary_key: None }
|
||||
}
|
||||
|
||||
fn idx_update() -> KindWithContent {
|
||||
KindWithContent::IndexUpdate { index_uid: String::from("doggo"), primary_key: None }
|
||||
}
|
||||
|
||||
fn idx_del() -> KindWithContent {
|
||||
KindWithContent::IndexDeletion { index_uid: String::from("doggo") }
|
||||
}
|
||||
|
||||
fn idx_swap() -> KindWithContent {
|
||||
KindWithContent::IndexSwap {
|
||||
swaps: vec![IndexSwap { indexes: (String::from("doggo"), String::from("catto")) }],
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn autobatch_simple_operation_together() {
|
||||
// we can autobatch one or multiple `ReplaceDocuments` together.
|
||||
// if the index exists.
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, false , None), doc_imp(ReplaceDocuments, false , None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
|
||||
|
||||
// if it doesn't exists.
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
|
||||
// we can autobatch one or multiple `UpdateDocuments` together.
|
||||
// if the index exists.
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
|
||||
|
||||
// if it doesn't exists.
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
|
||||
|
||||
// we can autobatch one or multiple DocumentDeletion together
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))");
|
||||
|
||||
// we can autobatch one or multiple DocumentDeletionByFilter together
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))");
|
||||
|
||||
// we can autobatch one or multiple Settings together
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");
|
||||
|
||||
// We can autobatch document addition with document deletion
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
// And the other way around
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
|
||||
// But we can't autobatch document addition with document deletion by filter
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
// And the other way around
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_different_document_operations_autobatch_together() {
|
||||
// addition and updates with deletion by filter can't batch together
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn document_addition_doesnt_batch_with_settings() {
|
||||
// simple case
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
|
||||
// multiple settings and doc addition
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
|
||||
// addition and setting unordered
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
|
||||
// Doesn't batch with other forbidden operations
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clear_and_additions() {
|
||||
// these two doesn't need to batch
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentClear { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentClear { ids: [0] }, false))");
|
||||
|
||||
// Basic use case
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
|
||||
|
||||
// This batch kind doesn't mix with other document addition
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
|
||||
|
||||
// But you can batch multiple clear together
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clear_and_additions_and_settings() {
|
||||
// A clear don't need to autobatch the settings that happens AFTER there is no documents
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), settings(true)]), @"Some((DocumentClear { ids: [0] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true), doc_clr(), settings(true)]), @"Some((ClearAndSettings { other: [1], allow_index_creation: true, settings_ids: [0, 2] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn anything_and_index_deletion() {
|
||||
// The `IndexDeletion` doesn't batch with anything that happens AFTER.
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))");
|
||||
|
||||
// The index deletion can accept almost any type of `BatchKind` and transform it to an `IndexDeletion`.
|
||||
// First, the basic cases
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allowed_and_disallowed_index_creation() {
|
||||
// `DocumentImport` can't be mixed with those disallowed to do so except if the index already exists.
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
|
||||
// batch deletion and addition
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn autobatch_primary_key() {
|
||||
// ==> If I have a pk
|
||||
// With a single update
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
|
||||
// With a multiple updates
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other"))]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
|
||||
// ==> If I don't have a pk
|
||||
// With a single update
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
|
||||
|
||||
// With a multiple updates
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
|
||||
}
|
||||
567
crates/index-scheduler/src/scheduler/create_batch.rs
Normal file
567
crates/index-scheduler/src/scheduler/create_batch.rs
Normal file
@@ -0,0 +1,567 @@
|
||||
use std::fmt;
|
||||
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_types::settings::{Settings, Unchecked};
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::autobatcher::{self, BatchKind};
|
||||
use crate::utils::ProcessingBatch;
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
/// Represents a combination of tasks that can all be processed at the same time.
|
||||
///
|
||||
/// A batch contains the set of tasks that it represents (accessible through
|
||||
/// [`self.ids()`](Batch::ids)), as well as additional information on how to
|
||||
/// be processed.
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum Batch {
|
||||
TaskCancelation {
|
||||
/// The task cancelation itself.
|
||||
task: Task,
|
||||
},
|
||||
TaskDeletions(Vec<Task>),
|
||||
SnapshotCreation(Vec<Task>),
|
||||
Dump(Task),
|
||||
IndexOperation {
|
||||
op: IndexOperation,
|
||||
must_create_index: bool,
|
||||
},
|
||||
IndexCreation {
|
||||
index_uid: String,
|
||||
primary_key: Option<String>,
|
||||
task: Task,
|
||||
},
|
||||
IndexUpdate {
|
||||
index_uid: String,
|
||||
primary_key: Option<String>,
|
||||
task: Task,
|
||||
},
|
||||
IndexDeletion {
|
||||
index_uid: String,
|
||||
tasks: Vec<Task>,
|
||||
index_has_been_created: bool,
|
||||
},
|
||||
IndexSwap {
|
||||
task: Task,
|
||||
},
|
||||
UpgradeDatabase {
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum DocumentOperation {
|
||||
Replace(Uuid),
|
||||
Update(Uuid),
|
||||
Delete(Vec<String>),
|
||||
}
|
||||
|
||||
/// A [batch](Batch) that combines multiple tasks operating on an index.
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum IndexOperation {
|
||||
DocumentOperation {
|
||||
index_uid: String,
|
||||
primary_key: Option<String>,
|
||||
operations: Vec<DocumentOperation>,
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
DocumentEdition {
|
||||
index_uid: String,
|
||||
task: Task,
|
||||
},
|
||||
DocumentDeletion {
|
||||
index_uid: String,
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
DocumentClear {
|
||||
index_uid: String,
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
Settings {
|
||||
index_uid: String,
|
||||
// The boolean indicates if it's a settings deletion or creation.
|
||||
settings: Vec<(bool, Settings<Unchecked>)>,
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
DocumentClearAndSetting {
|
||||
index_uid: String,
|
||||
cleared_tasks: Vec<Task>,
|
||||
|
||||
// The boolean indicates if it's a settings deletion or creation.
|
||||
settings: Vec<(bool, Settings<Unchecked>)>,
|
||||
settings_tasks: Vec<Task>,
|
||||
},
|
||||
}
|
||||
|
||||
impl Batch {
|
||||
/// Return the task ids associated with this batch.
|
||||
pub fn ids(&self) -> RoaringBitmap {
|
||||
match self {
|
||||
Batch::TaskCancelation { task, .. }
|
||||
| Batch::Dump(task)
|
||||
| Batch::IndexCreation { task, .. }
|
||||
| Batch::IndexUpdate { task, .. } => {
|
||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||
}
|
||||
Batch::SnapshotCreation(tasks)
|
||||
| Batch::TaskDeletions(tasks)
|
||||
| Batch::UpgradeDatabase { tasks }
|
||||
| Batch::IndexDeletion { tasks, .. } => {
|
||||
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
|
||||
}
|
||||
Batch::IndexOperation { op, .. } => match op {
|
||||
IndexOperation::DocumentOperation { tasks, .. }
|
||||
| IndexOperation::Settings { tasks, .. }
|
||||
| IndexOperation::DocumentDeletion { tasks, .. }
|
||||
| IndexOperation::DocumentClear { tasks, .. } => {
|
||||
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
|
||||
}
|
||||
IndexOperation::DocumentEdition { task, .. } => {
|
||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||
}
|
||||
IndexOperation::DocumentClearAndSetting {
|
||||
cleared_tasks: tasks,
|
||||
settings_tasks: other,
|
||||
..
|
||||
} => RoaringBitmap::from_iter(tasks.iter().chain(other).map(|task| task.uid)),
|
||||
},
|
||||
Batch::IndexSwap { task } => {
|
||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the index UID associated with this batch
|
||||
pub fn index_uid(&self) -> Option<&str> {
|
||||
use Batch::*;
|
||||
match self {
|
||||
TaskCancelation { .. }
|
||||
| TaskDeletions(_)
|
||||
| SnapshotCreation(_)
|
||||
| Dump(_)
|
||||
| UpgradeDatabase { .. }
|
||||
| IndexSwap { .. } => None,
|
||||
IndexOperation { op, .. } => Some(op.index_uid()),
|
||||
IndexCreation { index_uid, .. }
|
||||
| IndexUpdate { index_uid, .. }
|
||||
| IndexDeletion { index_uid, .. } => Some(index_uid),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Batch {
|
||||
/// A text used when we debug the profiling reports.
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let index_uid = self.index_uid();
|
||||
let tasks = self.ids();
|
||||
match self {
|
||||
Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?,
|
||||
Batch::TaskDeletions(_) => f.write_str("TaskDeletion")?,
|
||||
Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?,
|
||||
Batch::Dump(_) => f.write_str("Dump")?,
|
||||
Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
|
||||
Batch::IndexCreation { .. } => f.write_str("IndexCreation")?,
|
||||
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
|
||||
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
|
||||
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
|
||||
Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?,
|
||||
};
|
||||
match index_uid {
|
||||
Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")),
|
||||
None => f.write_fmt(format_args!(" from tasks: {tasks:?}")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexOperation {
|
||||
pub fn index_uid(&self) -> &str {
|
||||
match self {
|
||||
IndexOperation::DocumentOperation { index_uid, .. }
|
||||
| IndexOperation::DocumentEdition { index_uid, .. }
|
||||
| IndexOperation::DocumentDeletion { index_uid, .. }
|
||||
| IndexOperation::DocumentClear { index_uid, .. }
|
||||
| IndexOperation::Settings { index_uid, .. }
|
||||
| IndexOperation::DocumentClearAndSetting { index_uid, .. } => index_uid,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for IndexOperation {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
IndexOperation::DocumentOperation { .. } => {
|
||||
f.write_str("IndexOperation::DocumentOperation")
|
||||
}
|
||||
IndexOperation::DocumentEdition { .. } => {
|
||||
f.write_str("IndexOperation::DocumentEdition")
|
||||
}
|
||||
IndexOperation::DocumentDeletion { .. } => {
|
||||
f.write_str("IndexOperation::DocumentDeletion")
|
||||
}
|
||||
IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"),
|
||||
IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"),
|
||||
IndexOperation::DocumentClearAndSetting { .. } => {
|
||||
f.write_str("IndexOperation::DocumentClearAndSetting")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
/// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`].
|
||||
///
|
||||
/// ## Arguments
|
||||
/// - `rtxn`: read transaction
|
||||
/// - `index_uid`: name of the index affected by the operations of the autobatch
|
||||
/// - `batch`: the result of the autobatcher
|
||||
pub(crate) fn create_next_batch_index(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
index_uid: String,
|
||||
batch: BatchKind,
|
||||
current_batch: &mut ProcessingBatch,
|
||||
must_create_index: bool,
|
||||
) -> Result<Option<Batch>> {
|
||||
match batch {
|
||||
BatchKind::DocumentClear { ids } => Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::DocumentClear {
|
||||
tasks: self.queue.get_existing_tasks_for_processing_batch(
|
||||
rtxn,
|
||||
current_batch,
|
||||
ids,
|
||||
)?,
|
||||
index_uid,
|
||||
},
|
||||
must_create_index,
|
||||
})),
|
||||
BatchKind::DocumentEdition { id } => {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
match &task.kind {
|
||||
KindWithContent::DocumentEdition { index_uid, .. } => {
|
||||
Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::DocumentEdition {
|
||||
index_uid: index_uid.clone(),
|
||||
task,
|
||||
},
|
||||
must_create_index: false,
|
||||
}))
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
BatchKind::DocumentOperation { operation_ids, .. } => {
|
||||
let tasks = self.queue.get_existing_tasks_for_processing_batch(
|
||||
rtxn,
|
||||
current_batch,
|
||||
operation_ids,
|
||||
)?;
|
||||
let primary_key = tasks
|
||||
.iter()
|
||||
.find_map(|task| match task.kind {
|
||||
KindWithContent::DocumentAdditionOrUpdate { ref primary_key, .. } => {
|
||||
// we want to stop on the first document addition
|
||||
Some(primary_key.clone())
|
||||
}
|
||||
KindWithContent::DocumentDeletion { .. } => None,
|
||||
_ => unreachable!(),
|
||||
})
|
||||
.flatten();
|
||||
|
||||
let mut operations = Vec::new();
|
||||
|
||||
for task in tasks.iter() {
|
||||
match task.kind {
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
content_file, method, ..
|
||||
} => match method {
|
||||
IndexDocumentsMethod::ReplaceDocuments => {
|
||||
operations.push(DocumentOperation::Replace(content_file))
|
||||
}
|
||||
IndexDocumentsMethod::UpdateDocuments => {
|
||||
operations.push(DocumentOperation::Update(content_file))
|
||||
}
|
||||
_ => unreachable!("Unknown document merging method"),
|
||||
},
|
||||
KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
|
||||
operations.push(DocumentOperation::Delete(documents_ids.clone()));
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::DocumentOperation {
|
||||
index_uid,
|
||||
primary_key,
|
||||
operations,
|
||||
tasks,
|
||||
},
|
||||
must_create_index,
|
||||
}))
|
||||
}
|
||||
BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: _ } => {
|
||||
let tasks = self.queue.get_existing_tasks_for_processing_batch(
|
||||
rtxn,
|
||||
current_batch,
|
||||
deletion_ids,
|
||||
)?;
|
||||
|
||||
Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::DocumentDeletion { index_uid, tasks },
|
||||
must_create_index,
|
||||
}))
|
||||
}
|
||||
BatchKind::Settings { settings_ids, .. } => {
|
||||
let tasks = self.queue.get_existing_tasks_for_processing_batch(
|
||||
rtxn,
|
||||
current_batch,
|
||||
settings_ids,
|
||||
)?;
|
||||
|
||||
let mut settings = Vec::new();
|
||||
for task in &tasks {
|
||||
match task.kind {
|
||||
KindWithContent::SettingsUpdate {
|
||||
ref new_settings, is_deletion, ..
|
||||
} => settings.push((is_deletion, *new_settings.clone())),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::Settings { index_uid, settings, tasks },
|
||||
must_create_index,
|
||||
}))
|
||||
}
|
||||
BatchKind::ClearAndSettings { other, settings_ids, allow_index_creation } => {
|
||||
let (index_uid, settings, settings_tasks) = match self
|
||||
.create_next_batch_index(
|
||||
rtxn,
|
||||
index_uid,
|
||||
BatchKind::Settings { settings_ids, allow_index_creation },
|
||||
current_batch,
|
||||
must_create_index,
|
||||
)?
|
||||
.unwrap()
|
||||
{
|
||||
Batch::IndexOperation {
|
||||
op: IndexOperation::Settings { index_uid, settings, tasks, .. },
|
||||
..
|
||||
} => (index_uid, settings, tasks),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let (index_uid, cleared_tasks) = match self
|
||||
.create_next_batch_index(
|
||||
rtxn,
|
||||
index_uid,
|
||||
BatchKind::DocumentClear { ids: other },
|
||||
current_batch,
|
||||
must_create_index,
|
||||
)?
|
||||
.unwrap()
|
||||
{
|
||||
Batch::IndexOperation {
|
||||
op: IndexOperation::DocumentClear { index_uid, tasks },
|
||||
..
|
||||
} => (index_uid, tasks),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
Ok(Some(Batch::IndexOperation {
|
||||
op: IndexOperation::DocumentClearAndSetting {
|
||||
index_uid,
|
||||
cleared_tasks,
|
||||
settings,
|
||||
settings_tasks,
|
||||
},
|
||||
must_create_index,
|
||||
}))
|
||||
}
|
||||
BatchKind::IndexCreation { id } => {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
let (index_uid, primary_key) = match &task.kind {
|
||||
KindWithContent::IndexCreation { index_uid, primary_key } => {
|
||||
(index_uid.clone(), primary_key.clone())
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
Ok(Some(Batch::IndexCreation { index_uid, primary_key, task }))
|
||||
}
|
||||
BatchKind::IndexUpdate { id } => {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
let primary_key = match &task.kind {
|
||||
KindWithContent::IndexUpdate { primary_key, .. } => primary_key.clone(),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
Ok(Some(Batch::IndexUpdate { index_uid, primary_key, task }))
|
||||
}
|
||||
BatchKind::IndexDeletion { ids } => Ok(Some(Batch::IndexDeletion {
|
||||
index_uid,
|
||||
index_has_been_created: must_create_index,
|
||||
tasks: self.queue.get_existing_tasks_for_processing_batch(
|
||||
rtxn,
|
||||
current_batch,
|
||||
ids,
|
||||
)?,
|
||||
})),
|
||||
BatchKind::IndexSwap { id } => {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
Ok(Some(Batch::IndexSwap { task }))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create the next batch to be processed;
|
||||
/// 1. We get the *last* task to cancel.
|
||||
/// 2. We get the *next* task to delete.
|
||||
/// 3. We get the *next* snapshot to process.
|
||||
/// 4. We get the *next* dump to process.
|
||||
/// 5. We get the *next* tasks to process for a specific index.
|
||||
#[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")]
|
||||
pub(crate) fn create_next_batch(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
) -> Result<Option<(Batch, ProcessingBatch)>> {
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::InsideCreateBatch)?;
|
||||
|
||||
let batch_id = self.queue.batches.next_batch_id(rtxn)?;
|
||||
let mut current_batch = ProcessingBatch::new(batch_id);
|
||||
|
||||
let enqueued = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?;
|
||||
let failed = &self.queue.tasks.get_status(rtxn, Status::Failed)?;
|
||||
|
||||
// 0. The priority over everything is to upgrade the instance
|
||||
// There shouldn't be multiple upgrade tasks but just in case we're going to batch all of them at the same time
|
||||
let upgrade = self.queue.tasks.get_kind(rtxn, Kind::UpgradeDatabase)? & (enqueued | failed);
|
||||
if !upgrade.is_empty() {
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, upgrade)?;
|
||||
// In the case of an upgrade database batch, we want to find back the original batch that tried processing it
|
||||
// and re-use its id
|
||||
if let Some(batch_uid) = tasks.last().unwrap().batch_uid {
|
||||
current_batch.uid = batch_uid;
|
||||
}
|
||||
current_batch.processing(&mut tasks);
|
||||
return Ok(Some((Batch::UpgradeDatabase { tasks }, current_batch)));
|
||||
}
|
||||
|
||||
// 1. we get the last task to cancel.
|
||||
let to_cancel = self.queue.tasks.get_kind(rtxn, Kind::TaskCancelation)? & enqueued;
|
||||
if let Some(task_id) = to_cancel.max() {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
return Ok(Some((Batch::TaskCancelation { task }, current_batch)));
|
||||
}
|
||||
|
||||
// 2. we get the next task to delete
|
||||
let to_delete = self.queue.tasks.get_kind(rtxn, Kind::TaskDeletion)? & enqueued;
|
||||
if !to_delete.is_empty() {
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_delete)?;
|
||||
current_batch.processing(&mut tasks);
|
||||
return Ok(Some((Batch::TaskDeletions(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 3. we batch the snapshot.
|
||||
let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued;
|
||||
if !to_snapshot.is_empty() {
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?;
|
||||
current_batch.processing(&mut tasks);
|
||||
return Ok(Some((Batch::SnapshotCreation(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 4. we batch the dumps.
|
||||
let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued;
|
||||
if let Some(to_dump) = to_dump.min() {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, to_dump)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
current_batch.processing(Some(&mut task));
|
||||
return Ok(Some((Batch::Dump(task), current_batch)));
|
||||
}
|
||||
|
||||
// 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
|
||||
// If the task is not associated with any index, verify that it is an index swap and
|
||||
// create the batch directly. Otherwise, get the index name associated with the task
|
||||
// and use the autobatcher to batch the enqueued tasks associated with it
|
||||
|
||||
let index_name = if let Some(&index_name) = task.indexes().first() {
|
||||
index_name
|
||||
} else {
|
||||
assert!(matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty()));
|
||||
current_batch.processing(Some(&mut task));
|
||||
return Ok(Some((Batch::IndexSwap { task }, current_batch)));
|
||||
};
|
||||
|
||||
let index_already_exists = self.index_mapper.exists(rtxn, index_name)?;
|
||||
let mut primary_key = None;
|
||||
if index_already_exists {
|
||||
let index = self.index_mapper.index(rtxn, index_name)?;
|
||||
let rtxn = index.read_txn()?;
|
||||
primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
|
||||
}
|
||||
|
||||
let index_tasks = self.queue.tasks.index_tasks(rtxn, index_name)? & enqueued;
|
||||
|
||||
// If autobatching is disabled we only take one task at a time.
|
||||
// Otherwise, we take only a maximum of tasks to create batches.
|
||||
let tasks_limit = if self.scheduler.autobatching_enabled {
|
||||
self.scheduler.max_number_of_batched_tasks
|
||||
} else {
|
||||
1
|
||||
};
|
||||
|
||||
let mut enqueued = Vec::new();
|
||||
let mut total_size: u64 = 0;
|
||||
for task_id in index_tasks.into_iter().take(tasks_limit) {
|
||||
let task = self
|
||||
.queue
|
||||
.tasks
|
||||
.get_task(rtxn, task_id)
|
||||
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))?;
|
||||
|
||||
if let Some(uuid) = task.content_uuid() {
|
||||
let content_size = self.queue.file_store.compute_size(uuid)?;
|
||||
total_size = total_size.saturating_add(content_size);
|
||||
}
|
||||
|
||||
if total_size > self.scheduler.batched_tasks_size_limit && !enqueued.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
||||
enqueued.push((task.uid, task.kind));
|
||||
}
|
||||
|
||||
if let Some((batchkind, create_index)) =
|
||||
autobatcher::autobatch(enqueued, index_already_exists, primary_key.as_deref())
|
||||
{
|
||||
return Ok(self
|
||||
.create_next_batch_index(
|
||||
rtxn,
|
||||
index_name.to_string(),
|
||||
batchkind,
|
||||
&mut current_batch,
|
||||
create_index,
|
||||
)?
|
||||
.map(|batch| (batch, current_batch)));
|
||||
}
|
||||
|
||||
// If we found no tasks then we were notified for something that got autobatched
|
||||
// somehow and there is nothing to do.
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
453
crates/index-scheduler/src/scheduler/mod.rs
Normal file
453
crates/index-scheduler/src/scheduler/mod.rs
Normal file
@@ -0,0 +1,453 @@
|
||||
mod autobatcher;
|
||||
#[cfg(test)]
|
||||
mod autobatcher_test;
|
||||
mod create_batch;
|
||||
mod process_batch;
|
||||
mod process_dump_creation;
|
||||
mod process_index_operation;
|
||||
mod process_snapshot_creation;
|
||||
mod process_upgrade;
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
#[cfg(test)]
|
||||
mod test_document_addition;
|
||||
#[cfg(test)]
|
||||
mod test_embedders;
|
||||
#[cfg(test)]
|
||||
mod test_failure;
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use convert_case::{Case, Casing as _};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::heed::{Env, WithoutTls};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::tasks::Status;
|
||||
use process_batch::ProcessBatchInfo;
|
||||
use rayon::current_num_threads;
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||
use roaring::RoaringBitmap;
|
||||
use synchronoise::SignalEvent;
|
||||
|
||||
use crate::processing::{AtomicTaskStep, BatchProgress};
|
||||
use crate::{Error, IndexScheduler, IndexSchedulerOptions, Result, TickOutcome};
|
||||
|
||||
#[derive(Default, Clone, Debug)]
|
||||
pub struct MustStopProcessing(Arc<AtomicBool>);
|
||||
|
||||
impl MustStopProcessing {
|
||||
pub fn get(&self) -> bool {
|
||||
self.0.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub fn must_stop(&self) {
|
||||
self.0.store(true, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn reset(&self) {
|
||||
self.0.store(false, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Scheduler {
|
||||
/// A boolean that can be set to true to stop the currently processing tasks.
|
||||
pub must_stop_processing: MustStopProcessing,
|
||||
|
||||
/// Get a signal when a batch needs to be processed.
|
||||
pub(crate) wake_up: Arc<SignalEvent>,
|
||||
|
||||
/// Whether auto-batching is enabled or not.
|
||||
pub(crate) autobatching_enabled: bool,
|
||||
|
||||
/// The maximum number of tasks that will be batched together.
|
||||
pub(crate) max_number_of_batched_tasks: usize,
|
||||
|
||||
/// The maximum size, in bytes, of tasks in a batch.
|
||||
pub(crate) batched_tasks_size_limit: u64,
|
||||
|
||||
/// The path used to create the dumps.
|
||||
pub(crate) dumps_path: PathBuf,
|
||||
|
||||
/// The path used to create the snapshots.
|
||||
pub(crate) snapshots_path: PathBuf,
|
||||
|
||||
/// The path to the folder containing the auth LMDB env.
|
||||
pub(crate) auth_env: Env<WithoutTls>,
|
||||
|
||||
/// The path to the version file of Meilisearch.
|
||||
pub(crate) version_file_path: PathBuf,
|
||||
|
||||
/// The maximal number of entries in the search query cache of an embedder.
|
||||
///
|
||||
/// 0 disables the cache.
|
||||
pub(crate) embedding_cache_cap: usize,
|
||||
}
|
||||
|
||||
impl Scheduler {
|
||||
pub(crate) fn private_clone(&self) -> Scheduler {
|
||||
Scheduler {
|
||||
must_stop_processing: self.must_stop_processing.clone(),
|
||||
wake_up: self.wake_up.clone(),
|
||||
autobatching_enabled: self.autobatching_enabled,
|
||||
max_number_of_batched_tasks: self.max_number_of_batched_tasks,
|
||||
batched_tasks_size_limit: self.batched_tasks_size_limit,
|
||||
dumps_path: self.dumps_path.clone(),
|
||||
snapshots_path: self.snapshots_path.clone(),
|
||||
auth_env: self.auth_env.clone(),
|
||||
version_file_path: self.version_file_path.clone(),
|
||||
embedding_cache_cap: self.embedding_cache_cap,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(options: &IndexSchedulerOptions, auth_env: Env<WithoutTls>) -> Scheduler {
|
||||
Scheduler {
|
||||
must_stop_processing: MustStopProcessing::default(),
|
||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||
wake_up: Arc::new(SignalEvent::auto(true)),
|
||||
autobatching_enabled: options.autobatching_enabled,
|
||||
max_number_of_batched_tasks: options.max_number_of_batched_tasks,
|
||||
batched_tasks_size_limit: options.batched_tasks_size_limit,
|
||||
dumps_path: options.dumps_path.clone(),
|
||||
snapshots_path: options.snapshots_path.clone(),
|
||||
auth_env,
|
||||
version_file_path: options.version_file_path.clone(),
|
||||
embedding_cache_cap: options.embedding_cache_cap,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
/// Perform one iteration of the run loop.
|
||||
///
|
||||
/// 1. See if we need to cleanup the task queue
|
||||
/// 2. Find the next batch of tasks to be processed.
|
||||
/// 3. Update the information of these tasks following the start of their processing.
|
||||
/// 4. Update the in-memory list of processed tasks accordingly.
|
||||
/// 5. Process the batch:
|
||||
/// - perform the actions of each batched task
|
||||
/// - update the information of each batched task following the end
|
||||
/// of their processing.
|
||||
/// 6. Reset the in-memory list of processed tasks.
|
||||
///
|
||||
/// Returns the number of processed tasks.
|
||||
pub(crate) fn tick(&self) -> Result<TickOutcome> {
|
||||
#[cfg(test)]
|
||||
{
|
||||
*self.run_loop_iteration.write().unwrap() += 1;
|
||||
self.breakpoint(crate::test_utils::Breakpoint::Start);
|
||||
}
|
||||
|
||||
if self.cleanup_enabled {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
self.queue.cleanup_task_queue(&mut wtxn)?;
|
||||
wtxn.commit()?;
|
||||
}
|
||||
|
||||
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
||||
let (batch, mut processing_batch) =
|
||||
match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? {
|
||||
Some(batch) => batch,
|
||||
None => return Ok(TickOutcome::WaitForSignal),
|
||||
};
|
||||
let index_uid = batch.index_uid().map(ToOwned::to_owned);
|
||||
drop(rtxn);
|
||||
|
||||
// 1. store the starting date with the bitmap of processing tasks.
|
||||
let mut ids = batch.ids();
|
||||
let processed_tasks = ids.len();
|
||||
|
||||
// We reset the must_stop flag to be sure that we don't stop processing tasks
|
||||
self.scheduler.must_stop_processing.reset();
|
||||
let progress = self
|
||||
.processing_tasks
|
||||
.write()
|
||||
.unwrap()
|
||||
// We can clone the processing batch here because we don't want its modification to affect the view of the processing batches
|
||||
.start_processing(processing_batch.clone(), ids.clone());
|
||||
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::BatchCreated);
|
||||
|
||||
// 2. Process the tasks
|
||||
let res = {
|
||||
let cloned_index_scheduler = self.private_clone();
|
||||
let processing_batch = &mut processing_batch;
|
||||
let progress = progress.clone();
|
||||
std::thread::scope(|s| {
|
||||
let p = progress.clone();
|
||||
let handle = std::thread::Builder::new()
|
||||
.name(String::from("batch-operation"))
|
||||
.spawn_scoped(s, move || {
|
||||
cloned_index_scheduler.process_batch(batch, processing_batch, p)
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
match handle.join() {
|
||||
Ok(ret) => {
|
||||
if ret.is_err() {
|
||||
if let Ok(progress_view) =
|
||||
serde_json::to_string(&progress.as_progress_view())
|
||||
{
|
||||
tracing::warn!("Batch failed while doing: {progress_view}")
|
||||
}
|
||||
}
|
||||
ret
|
||||
}
|
||||
Err(panic) => {
|
||||
if let Ok(progress_view) =
|
||||
serde_json::to_string(&progress.as_progress_view())
|
||||
{
|
||||
tracing::warn!("Batch failed while doing: {progress_view}")
|
||||
}
|
||||
let msg = match panic.downcast_ref::<&'static str>() {
|
||||
Some(s) => *s,
|
||||
None => match panic.downcast_ref::<String>() {
|
||||
Some(s) => &s[..],
|
||||
None => "Box<dyn Any>",
|
||||
},
|
||||
};
|
||||
Err(Error::ProcessBatchPanicked(msg.to_string()))
|
||||
}
|
||||
}
|
||||
})
|
||||
};
|
||||
|
||||
// Reset the currently updating index to relinquish the index handle
|
||||
self.index_mapper.set_currently_updating_index(None);
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::AcquiringWtxn)?;
|
||||
|
||||
progress.update_progress(BatchProgress::WritingTasksToDisk);
|
||||
processing_batch.finished();
|
||||
let mut stop_scheduler_forever = false;
|
||||
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
|
||||
let mut canceled = RoaringBitmap::new();
|
||||
let mut process_batch_info = ProcessBatchInfo::default();
|
||||
|
||||
match res {
|
||||
Ok((tasks, info)) => {
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchSucceeded);
|
||||
|
||||
let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32);
|
||||
progress.update_progress(task_progress_obj);
|
||||
process_batch_info = info;
|
||||
let mut success = 0;
|
||||
let mut failure = 0;
|
||||
let mut canceled_by = None;
|
||||
|
||||
#[allow(unused_variables)]
|
||||
for (i, mut task) in tasks.into_iter().enumerate() {
|
||||
task_progress.fetch_add(1, Ordering::Relaxed);
|
||||
processing_batch.update(&mut task);
|
||||
if task.status == Status::Canceled {
|
||||
canceled.insert(task.uid);
|
||||
canceled_by = task.canceled_by;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(
|
||||
crate::test_utils::FailureLocation::UpdatingTaskAfterProcessBatchSuccess {
|
||||
task_uid: i as u32,
|
||||
},
|
||||
)?;
|
||||
|
||||
match task.error {
|
||||
Some(_) => failure += 1,
|
||||
None => success += 1,
|
||||
}
|
||||
|
||||
self.queue
|
||||
.tasks
|
||||
.update_task(&mut wtxn, &task)
|
||||
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?;
|
||||
}
|
||||
if let Some(canceled_by) = canceled_by {
|
||||
self.queue.tasks.canceled_by.put(&mut wtxn, &canceled_by, &canceled)?;
|
||||
}
|
||||
tracing::info!("A batch of tasks was successfully completed with {success} successful tasks and {failure} failed tasks.");
|
||||
}
|
||||
// If we have an abortion error we must stop the tick here and re-schedule tasks.
|
||||
Err(Error::Milli {
|
||||
error: milli::Error::InternalError(milli::InternalError::AbortedIndexation),
|
||||
..
|
||||
})
|
||||
| Err(Error::AbortedTask) => {
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::AbortedIndexation);
|
||||
wtxn.abort();
|
||||
|
||||
tracing::info!("A batch of tasks was aborted.");
|
||||
// We make sure that we don't call `stop_processing` on the `processing_tasks`,
|
||||
// this is because we want to let the next tick call `create_next_batch` and keep
|
||||
// the `started_at` date times and `processings` of the current processing tasks.
|
||||
// This date time is used by the task cancelation to store the right `started_at`
|
||||
// date in the task on disk.
|
||||
return Ok(TickOutcome::TickAgain(0));
|
||||
}
|
||||
// If an index said it was full, we need to:
|
||||
// 1. identify which index is full
|
||||
// 2. close the associated environment
|
||||
// 3. resize it
|
||||
// 4. re-schedule tasks
|
||||
Err(Error::Milli {
|
||||
error: milli::Error::UserError(milli::UserError::MaxDatabaseSizeReached),
|
||||
..
|
||||
}) if index_uid.is_some() => {
|
||||
// fixme: add index_uid to match to avoid the unwrap
|
||||
let index_uid = index_uid.unwrap();
|
||||
// fixme: handle error more gracefully? not sure when this could happen
|
||||
self.index_mapper.resize_index(&wtxn, &index_uid)?;
|
||||
wtxn.abort();
|
||||
|
||||
tracing::info!("The max database size was reached. Resizing the index.");
|
||||
|
||||
return Ok(TickOutcome::TickAgain(0));
|
||||
}
|
||||
// In case of a failure we must get back and patch all the tasks with the error.
|
||||
Err(err) => {
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchFailed);
|
||||
let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32);
|
||||
progress.update_progress(task_progress_obj);
|
||||
|
||||
if matches!(err, Error::DatabaseUpgrade(_)) {
|
||||
tracing::error!(
|
||||
"Upgrade task failed, tasks won't be processed until the following issue is fixed: {err}"
|
||||
);
|
||||
stop_scheduler_forever = true;
|
||||
}
|
||||
let error: ResponseError = err.into();
|
||||
for id in ids.iter() {
|
||||
task_progress.fetch_add(1, Ordering::Relaxed);
|
||||
let mut task = self
|
||||
.queue
|
||||
.tasks
|
||||
.get_task(&wtxn, id)
|
||||
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?
|
||||
.ok_or(Error::CorruptedTaskQueue)?;
|
||||
task.status = Status::Failed;
|
||||
task.error = Some(error.clone());
|
||||
task.details = task.details.map(|d| d.to_failed());
|
||||
processing_batch.update(&mut task);
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(
|
||||
crate::test_utils::FailureLocation::UpdatingTaskAfterProcessBatchFailure,
|
||||
)?;
|
||||
|
||||
tracing::error!("Batch failed {}", error);
|
||||
|
||||
self.queue
|
||||
.tasks
|
||||
.update_task(&mut wtxn, &task)
|
||||
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We must re-add the canceled task so they're part of the same batch.
|
||||
ids |= canceled;
|
||||
|
||||
let ProcessBatchInfo { congestion, pre_commit_dabases_sizes, post_commit_dabases_sizes } =
|
||||
process_batch_info;
|
||||
|
||||
processing_batch.stats.progress_trace =
|
||||
progress.accumulated_durations().into_iter().map(|(k, v)| (k, v.into())).collect();
|
||||
processing_batch.stats.write_channel_congestion = congestion.map(|congestion| {
|
||||
let mut congestion_info = serde_json::Map::new();
|
||||
congestion_info.insert("attempts".into(), congestion.attempts.into());
|
||||
congestion_info.insert("blocking_attempts".into(), congestion.blocking_attempts.into());
|
||||
congestion_info.insert("blocking_ratio".into(), congestion.congestion_ratio().into());
|
||||
congestion_info
|
||||
});
|
||||
processing_batch.stats.internal_database_sizes = pre_commit_dabases_sizes
|
||||
.iter()
|
||||
.flat_map(|(dbname, pre_size)| {
|
||||
post_commit_dabases_sizes
|
||||
.get(dbname)
|
||||
.map(|post_size| {
|
||||
use byte_unit::{Byte, UnitType::Binary};
|
||||
use std::cmp::Ordering::{Equal, Greater, Less};
|
||||
|
||||
let post = Byte::from_u64(*post_size as u64).get_appropriate_unit(Binary);
|
||||
let diff_size = post_size.abs_diff(*pre_size) as u64;
|
||||
let diff = Byte::from_u64(diff_size).get_appropriate_unit(Binary);
|
||||
let sign = match post_size.cmp(pre_size) {
|
||||
Equal => return None,
|
||||
Greater => "+",
|
||||
Less => "-",
|
||||
};
|
||||
|
||||
Some((
|
||||
dbname.to_case(Case::Camel),
|
||||
format!("{post:#.2} ({sign}{diff:#.2})").into(),
|
||||
))
|
||||
})
|
||||
.into_iter()
|
||||
.flatten()
|
||||
})
|
||||
.collect();
|
||||
|
||||
if let Some(congestion) = congestion {
|
||||
tracing::debug!(
|
||||
"Channel congestion metrics - Attempts: {}, Blocked attempts: {} ({:.1}% congestion)",
|
||||
congestion.attempts,
|
||||
congestion.blocking_attempts,
|
||||
congestion.congestion_ratio(),
|
||||
);
|
||||
}
|
||||
|
||||
tracing::debug!("call trace: {:?}", progress.accumulated_durations());
|
||||
|
||||
self.queue.write_batch(&mut wtxn, processing_batch, &ids)?;
|
||||
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::CommittingWtxn)?;
|
||||
|
||||
wtxn.commit().map_err(Error::HeedTransaction)?;
|
||||
|
||||
// We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task
|
||||
// and then become « not found » for some time until the commit everything is written and the final commit is made.
|
||||
self.processing_tasks.write().unwrap().stop_processing();
|
||||
|
||||
// Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart
|
||||
tracing::debug!("Deleting the update files");
|
||||
|
||||
//We take one read transaction **per thread**. Then, every thread is going to pull out new IDs from the roaring bitmap with the help of an atomic shared index into the bitmap
|
||||
let idx = AtomicU32::new(0);
|
||||
(0..current_num_threads()).into_par_iter().try_for_each(|_| -> Result<()> {
|
||||
let rtxn = self.read_txn()?;
|
||||
while let Some(id) = ids.select(idx.fetch_add(1, Ordering::Relaxed)) {
|
||||
let task = self
|
||||
.queue
|
||||
.tasks
|
||||
.get_task(&rtxn, id)
|
||||
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?
|
||||
.ok_or(Error::CorruptedTaskQueue)?;
|
||||
if let Err(e) = self.queue.delete_persisted_task_data(&task) {
|
||||
tracing::error!(
|
||||
"Failure to delete the content files associated with task {}. Error: {e}",
|
||||
task.uid
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
// We shouldn't crash the tick function if we can't send data to the webhook.
|
||||
let _ = self.notify_webhook(&ids);
|
||||
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::AfterProcessing);
|
||||
|
||||
if stop_scheduler_forever {
|
||||
Ok(TickOutcome::StopProcessingForever)
|
||||
} else {
|
||||
Ok(TickOutcome::TickAgain(processed_tasks))
|
||||
}
|
||||
}
|
||||
}
|
||||
687
crates/index-scheduler/src/scheduler/process_batch.rs
Normal file
687
crates/index-scheduler/src/scheduler/process_batch.rs
Normal file
@@ -0,0 +1,687 @@
|
||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
use std::panic::{catch_unwind, AssertUnwindSafe};
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use meilisearch_types::batches::{BatchEnqueuedAt, BatchId};
|
||||
use meilisearch_types::heed::{RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::milli::{self, ChannelCongestion};
|
||||
use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task};
|
||||
use milli::update::Settings as MilliSettings;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::create_batch::Batch;
|
||||
use crate::processing::{
|
||||
AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress, FinalizingIndexStep,
|
||||
InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress,
|
||||
UpdateIndexProgress,
|
||||
};
|
||||
use crate::utils::{
|
||||
self, remove_n_tasks_datetime_earlier_than, remove_task_datetime, swap_index_uid_in_task,
|
||||
ProcessingBatch,
|
||||
};
|
||||
use crate::{Error, IndexScheduler, Result, TaskId};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ProcessBatchInfo {
|
||||
/// The write channel congestion. None when unavailable: settings update.
|
||||
pub congestion: Option<ChannelCongestion>,
|
||||
/// The sizes of the different databases before starting the indexation.
|
||||
pub pre_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
|
||||
/// The sizes of the different databases after commiting the indexation.
|
||||
pub post_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
/// Apply the operation associated with the given batch.
|
||||
///
|
||||
/// ## Return
|
||||
/// The list of tasks that were processed. The metadata of each task in the returned
|
||||
/// list is updated accordingly, with the exception of the its date fields
|
||||
/// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at).
|
||||
#[tracing::instrument(level = "trace", skip(self, batch, progress), target = "indexing::scheduler", fields(batch=batch.to_string()))]
|
||||
pub(crate) fn process_batch(
|
||||
&self,
|
||||
batch: Batch,
|
||||
current_batch: &mut ProcessingBatch,
|
||||
progress: Progress,
|
||||
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
|
||||
#[cfg(test)]
|
||||
{
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::InsideProcessBatch)?;
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::PanicInsideProcessBatch)?;
|
||||
self.breakpoint(crate::test_utils::Breakpoint::InsideProcessBatch);
|
||||
}
|
||||
|
||||
match batch {
|
||||
Batch::TaskCancelation { mut task } => {
|
||||
// 1. Retrieve the tasks that matched the query at enqueue-time.
|
||||
let matched_tasks =
|
||||
if let KindWithContent::TaskCancelation { tasks, query: _ } = &task.kind {
|
||||
tasks
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let rtxn = self.env.read_txn()?;
|
||||
let mut canceled_tasks = self.cancel_matched_tasks(
|
||||
&rtxn,
|
||||
task.uid,
|
||||
current_batch,
|
||||
matched_tasks,
|
||||
&progress,
|
||||
)?;
|
||||
|
||||
task.status = Status::Succeeded;
|
||||
match &mut task.details {
|
||||
Some(Details::TaskCancelation {
|
||||
matched_tasks: _,
|
||||
canceled_tasks: canceled_tasks_details,
|
||||
original_filter: _,
|
||||
}) => {
|
||||
*canceled_tasks_details = Some(canceled_tasks.len() as u64);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
canceled_tasks.push(task);
|
||||
|
||||
Ok((canceled_tasks, ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::TaskDeletions(mut tasks) => {
|
||||
// 1. Retrieve the tasks that matched the query at enqueue-time.
|
||||
let mut matched_tasks = RoaringBitmap::new();
|
||||
|
||||
for task in tasks.iter() {
|
||||
if let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind {
|
||||
matched_tasks |= tasks;
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let mut deleted_tasks =
|
||||
self.delete_matched_tasks(&mut wtxn, &matched_tasks, &progress)?;
|
||||
wtxn.commit()?;
|
||||
|
||||
for task in tasks.iter_mut() {
|
||||
task.status = Status::Succeeded;
|
||||
let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let deleted_tasks_count = deleted_tasks.intersection_len(tasks);
|
||||
deleted_tasks -= tasks;
|
||||
|
||||
match &mut task.details {
|
||||
Some(Details::TaskDeletion {
|
||||
matched_tasks: _,
|
||||
deleted_tasks,
|
||||
original_filter: _,
|
||||
}) => {
|
||||
*deleted_tasks = Some(deleted_tasks_count);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
Ok((tasks, ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::SnapshotCreation(tasks) => self
|
||||
.process_snapshot(progress, tasks)
|
||||
.map(|tasks| (tasks, ProcessBatchInfo::default())),
|
||||
Batch::Dump(task) => self
|
||||
.process_dump_creation(progress, task)
|
||||
.map(|tasks| (tasks, ProcessBatchInfo::default())),
|
||||
Batch::IndexOperation { op, must_create_index } => {
|
||||
let index_uid = op.index_uid().to_string();
|
||||
let index = if must_create_index {
|
||||
// create the index if it doesn't already exist
|
||||
let wtxn = self.env.write_txn()?;
|
||||
self.index_mapper.create_index(wtxn, &index_uid, None)?
|
||||
} else {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.index_mapper.index(&rtxn, &index_uid)?
|
||||
};
|
||||
|
||||
// the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
|
||||
self.index_mapper
|
||||
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
|
||||
|
||||
let mut index_wtxn = index.write_txn()?;
|
||||
let pre_commit_dabases_sizes = index.database_sizes(&index_wtxn)?;
|
||||
let (tasks, congestion) =
|
||||
self.apply_index_operation(&mut index_wtxn, &index, op, &progress)?;
|
||||
|
||||
{
|
||||
progress.update_progress(FinalizingIndexStep::Committing);
|
||||
let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
|
||||
let _entered = span.enter();
|
||||
|
||||
index_wtxn.commit()?;
|
||||
}
|
||||
|
||||
// if the update processed successfully, we're going to store the new
|
||||
// stats of the index. Since the tasks have already been processed and
|
||||
// this is a non-critical operation. If it fails, we should not fail
|
||||
// the entire batch.
|
||||
let mut post_commit_dabases_sizes = None;
|
||||
let res = || -> Result<()> {
|
||||
progress.update_progress(FinalizingIndexStep::ComputingStats);
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
|
||||
post_commit_dabases_sizes = Some(index.database_sizes(&index_rtxn)?);
|
||||
wtxn.commit()?;
|
||||
Ok(())
|
||||
}();
|
||||
|
||||
match res {
|
||||
Ok(_) => (),
|
||||
Err(e) => tracing::error!(
|
||||
error = &e as &dyn std::error::Error,
|
||||
"Could not write the stats of the index"
|
||||
),
|
||||
}
|
||||
|
||||
let info = ProcessBatchInfo {
|
||||
congestion,
|
||||
// In case we fail to the get post-commit sizes we decide
|
||||
// that nothing changed and use the pre-commit sizes.
|
||||
post_commit_dabases_sizes: post_commit_dabases_sizes
|
||||
.unwrap_or_else(|| pre_commit_dabases_sizes.clone()),
|
||||
pre_commit_dabases_sizes,
|
||||
};
|
||||
|
||||
Ok((tasks, info))
|
||||
}
|
||||
Batch::IndexCreation { index_uid, primary_key, task } => {
|
||||
progress.update_progress(CreateIndexProgress::CreatingTheIndex);
|
||||
|
||||
let wtxn = self.env.write_txn()?;
|
||||
if self.index_mapper.exists(&wtxn, &index_uid)? {
|
||||
return Err(Error::IndexAlreadyExists(index_uid));
|
||||
}
|
||||
self.index_mapper.create_index(wtxn, &index_uid, None)?;
|
||||
|
||||
self.process_batch(
|
||||
Batch::IndexUpdate { index_uid, primary_key, task },
|
||||
current_batch,
|
||||
progress,
|
||||
)
|
||||
}
|
||||
Batch::IndexUpdate { index_uid, primary_key, mut task } => {
|
||||
progress.update_progress(UpdateIndexProgress::UpdatingTheIndex);
|
||||
let rtxn = self.env.read_txn()?;
|
||||
let index = self.index_mapper.index(&rtxn, &index_uid)?;
|
||||
|
||||
if let Some(primary_key) = primary_key.clone() {
|
||||
let mut index_wtxn = index.write_txn()?;
|
||||
let mut builder = MilliSettings::new(
|
||||
&mut index_wtxn,
|
||||
&index,
|
||||
self.index_mapper.indexer_config(),
|
||||
);
|
||||
builder.set_primary_key(primary_key);
|
||||
let must_stop_processing = self.scheduler.must_stop_processing.clone();
|
||||
builder
|
||||
.execute(
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
|
||||
index_wtxn.commit()?;
|
||||
}
|
||||
|
||||
// drop rtxn before starting a new wtxn on the same db
|
||||
rtxn.commit()?;
|
||||
|
||||
task.status = Status::Succeeded;
|
||||
task.details = Some(Details::IndexInfo { primary_key });
|
||||
|
||||
// if the update processed successfully, we're going to store the new
|
||||
// stats of the index. Since the tasks have already been processed and
|
||||
// this is a non-critical operation. If it fails, we should not fail
|
||||
// the entire batch.
|
||||
let res = || -> Result<()> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
|
||||
wtxn.commit()?;
|
||||
Ok(())
|
||||
}();
|
||||
|
||||
match res {
|
||||
Ok(_) => (),
|
||||
Err(e) => tracing::error!(
|
||||
error = &e as &dyn std::error::Error,
|
||||
"Could not write the stats of the index"
|
||||
),
|
||||
}
|
||||
|
||||
Ok((vec![task], ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => {
|
||||
progress.update_progress(DeleteIndexProgress::DeletingTheIndex);
|
||||
let wtxn = self.env.write_txn()?;
|
||||
|
||||
// it's possible that the index doesn't exist
|
||||
let number_of_documents = || -> Result<u64> {
|
||||
let index = self.index_mapper.index(&wtxn, &index_uid)?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
index
|
||||
.number_of_documents(&index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))
|
||||
}()
|
||||
.unwrap_or_default();
|
||||
|
||||
// The write transaction is directly owned and committed inside.
|
||||
match self.index_mapper.delete_index(wtxn, &index_uid) {
|
||||
Ok(()) => (),
|
||||
Err(Error::IndexNotFound(_)) if index_has_been_created => (),
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
|
||||
// We set all the tasks details to the default value.
|
||||
for task in &mut tasks {
|
||||
task.status = Status::Succeeded;
|
||||
task.details = match &task.kind {
|
||||
KindWithContent::IndexDeletion { .. } => {
|
||||
Some(Details::ClearAll { deleted_documents: Some(number_of_documents) })
|
||||
}
|
||||
otherwise => otherwise.default_finished_details(),
|
||||
};
|
||||
}
|
||||
|
||||
// Here we could also show that all the internal database sizes goes to 0
|
||||
// but it would mean opening the index and that's costly.
|
||||
Ok((tasks, ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::IndexSwap { mut task } => {
|
||||
progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap);
|
||||
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let swaps = if let KindWithContent::IndexSwap { swaps } = &task.kind {
|
||||
swaps
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
let mut not_found_indexes = BTreeSet::new();
|
||||
for IndexSwap { indexes: (lhs, rhs) } in swaps {
|
||||
for index in [lhs, rhs] {
|
||||
let index_exists = self.index_mapper.index_exists(&wtxn, index)?;
|
||||
if !index_exists {
|
||||
not_found_indexes.insert(index);
|
||||
}
|
||||
}
|
||||
}
|
||||
if !not_found_indexes.is_empty() {
|
||||
if not_found_indexes.len() == 1 {
|
||||
return Err(Error::SwapIndexNotFound(
|
||||
not_found_indexes.into_iter().next().unwrap().clone(),
|
||||
));
|
||||
} else {
|
||||
return Err(Error::SwapIndexesNotFound(
|
||||
not_found_indexes.into_iter().cloned().collect(),
|
||||
));
|
||||
}
|
||||
}
|
||||
progress.update_progress(SwappingTheIndexes::SwappingTheIndexes);
|
||||
for (step, swap) in swaps.iter().enumerate() {
|
||||
progress.update_progress(VariableNameStep::<SwappingTheIndexes>::new(
|
||||
format!("swapping index {} and {}", swap.indexes.0, swap.indexes.1),
|
||||
step as u32,
|
||||
swaps.len() as u32,
|
||||
));
|
||||
self.apply_index_swap(
|
||||
&mut wtxn,
|
||||
&progress,
|
||||
task.uid,
|
||||
&swap.indexes.0,
|
||||
&swap.indexes.1,
|
||||
)?;
|
||||
}
|
||||
wtxn.commit()?;
|
||||
task.status = Status::Succeeded;
|
||||
Ok((vec![task], ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::UpgradeDatabase { mut tasks } => {
|
||||
let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else {
|
||||
unreachable!();
|
||||
};
|
||||
let ret = catch_unwind(AssertUnwindSafe(|| self.process_upgrade(from, progress)));
|
||||
match ret {
|
||||
Ok(Ok(())) => (),
|
||||
Ok(Err(e)) => return Err(Error::DatabaseUpgrade(Box::new(e))),
|
||||
Err(e) => {
|
||||
let msg = match e.downcast_ref::<&'static str>() {
|
||||
Some(s) => *s,
|
||||
None => match e.downcast_ref::<String>() {
|
||||
Some(s) => &s[..],
|
||||
None => "Box<dyn Any>",
|
||||
},
|
||||
};
|
||||
return Err(Error::DatabaseUpgrade(Box::new(Error::ProcessBatchPanicked(
|
||||
msg.to_string(),
|
||||
))));
|
||||
}
|
||||
}
|
||||
|
||||
for task in tasks.iter_mut() {
|
||||
task.status = Status::Succeeded;
|
||||
// Since this task can be retried we must reset its error status
|
||||
task.error = None;
|
||||
}
|
||||
|
||||
Ok((tasks, ProcessBatchInfo::default()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Swap the index `lhs` with the index `rhs`.
|
||||
fn apply_index_swap(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
progress: &Progress,
|
||||
task_id: u32,
|
||||
lhs: &str,
|
||||
rhs: &str,
|
||||
) -> Result<()> {
|
||||
progress.update_progress(InnerSwappingTwoIndexes::RetrieveTheTasks);
|
||||
// 1. Verify that both lhs and rhs are existing indexes
|
||||
let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?;
|
||||
if !index_lhs_exists {
|
||||
return Err(Error::IndexNotFound(lhs.to_owned()));
|
||||
}
|
||||
let index_rhs_exists = self.index_mapper.index_exists(wtxn, rhs)?;
|
||||
if !index_rhs_exists {
|
||||
return Err(Error::IndexNotFound(rhs.to_owned()));
|
||||
}
|
||||
|
||||
// 2. Get the task set for index = name that appeared before the index swap task
|
||||
let mut index_lhs_task_ids = self.queue.tasks.index_tasks(wtxn, lhs)?;
|
||||
index_lhs_task_ids.remove_range(task_id..);
|
||||
let mut index_rhs_task_ids = self.queue.tasks.index_tasks(wtxn, rhs)?;
|
||||
index_rhs_task_ids.remove_range(task_id..);
|
||||
|
||||
// 3. before_name -> new_name in the task's KindWithContent
|
||||
progress.update_progress(InnerSwappingTwoIndexes::UpdateTheTasks);
|
||||
let tasks_to_update = &index_lhs_task_ids | &index_rhs_task_ids;
|
||||
let (atomic, task_progress) = AtomicTaskStep::new(tasks_to_update.len() as u32);
|
||||
progress.update_progress(task_progress);
|
||||
|
||||
for task_id in tasks_to_update {
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
swap_index_uid_in_task(&mut task, (lhs, rhs));
|
||||
self.queue.tasks.all_tasks.put(wtxn, &task_id, &task)?;
|
||||
atomic.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
// 4. remove the task from indexuid = before_name
|
||||
// 5. add the task to indexuid = after_name
|
||||
progress.update_progress(InnerSwappingTwoIndexes::UpdateTheIndexesMetadata);
|
||||
self.queue.tasks.update_index(wtxn, lhs, |lhs_tasks| {
|
||||
*lhs_tasks -= &index_lhs_task_ids;
|
||||
*lhs_tasks |= &index_rhs_task_ids;
|
||||
})?;
|
||||
self.queue.tasks.update_index(wtxn, rhs, |rhs_tasks| {
|
||||
*rhs_tasks -= &index_rhs_task_ids;
|
||||
*rhs_tasks |= &index_lhs_task_ids;
|
||||
})?;
|
||||
|
||||
// 6. Swap in the index mapper
|
||||
self.index_mapper.swap(wtxn, lhs, rhs)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete each given task from all the databases (if it is deleteable).
|
||||
///
|
||||
/// Return the number of tasks that were actually deleted.
|
||||
fn delete_matched_tasks(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
matched_tasks: &RoaringBitmap,
|
||||
progress: &Progress,
|
||||
) -> Result<RoaringBitmap> {
|
||||
progress.update_progress(TaskDeletionProgress::DeletingTasksDateTime);
|
||||
|
||||
// 1. Remove from this list the tasks that we are not allowed to delete
|
||||
let enqueued_tasks = self.queue.tasks.get_status(wtxn, Status::Enqueued)?;
|
||||
let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
|
||||
|
||||
let all_task_ids = self.queue.tasks.all_task_ids(wtxn)?;
|
||||
let mut to_delete_tasks = all_task_ids & matched_tasks;
|
||||
to_delete_tasks -= &**processing_tasks;
|
||||
to_delete_tasks -= &enqueued_tasks;
|
||||
|
||||
// 2. We now have a list of tasks to delete, delete them
|
||||
let mut affected_indexes = HashSet::new();
|
||||
let mut affected_statuses = HashSet::new();
|
||||
let mut affected_kinds = HashSet::new();
|
||||
let mut affected_canceled_by = RoaringBitmap::new();
|
||||
// The tasks that have been removed *per batches*.
|
||||
let mut affected_batches: HashMap<BatchId, RoaringBitmap> = HashMap::new();
|
||||
|
||||
let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32);
|
||||
progress.update_progress(task_progress);
|
||||
for task_id in to_delete_tasks.iter() {
|
||||
let task =
|
||||
self.queue.tasks.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
|
||||
affected_indexes.extend(task.indexes().into_iter().map(|x| x.to_owned()));
|
||||
affected_statuses.insert(task.status);
|
||||
affected_kinds.insert(task.kind.as_kind());
|
||||
// Note: don't delete the persisted task data since
|
||||
// we can only delete succeeded, failed, and canceled tasks.
|
||||
// In each of those cases, the persisted data is supposed to
|
||||
// have been deleted already.
|
||||
utils::remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.tasks.enqueued_at,
|
||||
task.enqueued_at,
|
||||
task.uid,
|
||||
)?;
|
||||
if let Some(started_at) = task.started_at {
|
||||
utils::remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.tasks.started_at,
|
||||
started_at,
|
||||
task.uid,
|
||||
)?;
|
||||
}
|
||||
if let Some(finished_at) = task.finished_at {
|
||||
utils::remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.tasks.finished_at,
|
||||
finished_at,
|
||||
task.uid,
|
||||
)?;
|
||||
}
|
||||
if let Some(canceled_by) = task.canceled_by {
|
||||
affected_canceled_by.insert(canceled_by);
|
||||
}
|
||||
if let Some(batch_uid) = task.batch_uid {
|
||||
affected_batches.entry(batch_uid).or_default().insert(task_id);
|
||||
}
|
||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
progress.update_progress(TaskDeletionProgress::DeletingTasksMetadata);
|
||||
let (atomic_progress, task_progress) = AtomicTaskStep::new(
|
||||
(affected_indexes.len() + affected_statuses.len() + affected_kinds.len()) as u32,
|
||||
);
|
||||
progress.update_progress(task_progress);
|
||||
for index in affected_indexes.iter() {
|
||||
self.queue.tasks.update_index(wtxn, index, |bitmap| *bitmap -= &to_delete_tasks)?;
|
||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
for status in affected_statuses.iter() {
|
||||
self.queue.tasks.update_status(wtxn, *status, |bitmap| *bitmap -= &to_delete_tasks)?;
|
||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
for kind in affected_kinds.iter() {
|
||||
self.queue.tasks.update_kind(wtxn, *kind, |bitmap| *bitmap -= &to_delete_tasks)?;
|
||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
progress.update_progress(TaskDeletionProgress::DeletingTasks);
|
||||
let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32);
|
||||
progress.update_progress(task_progress);
|
||||
for task in to_delete_tasks.iter() {
|
||||
self.queue.tasks.all_tasks.delete(wtxn, &task)?;
|
||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
for canceled_by in affected_canceled_by {
|
||||
if let Some(mut tasks) = self.queue.tasks.canceled_by.get(wtxn, &canceled_by)? {
|
||||
tasks -= &to_delete_tasks;
|
||||
if tasks.is_empty() {
|
||||
self.queue.tasks.canceled_by.delete(wtxn, &canceled_by)?;
|
||||
} else {
|
||||
self.queue.tasks.canceled_by.put(wtxn, &canceled_by, &tasks)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
progress.update_progress(TaskDeletionProgress::DeletingBatches);
|
||||
let (atomic_progress, batch_progress) = AtomicBatchStep::new(affected_batches.len() as u32);
|
||||
progress.update_progress(batch_progress);
|
||||
for (batch_id, to_delete_tasks) in affected_batches {
|
||||
if let Some(mut tasks) = self.queue.batch_to_tasks_mapping.get(wtxn, &batch_id)? {
|
||||
tasks -= &to_delete_tasks;
|
||||
// We must remove the batch entirely
|
||||
if tasks.is_empty() {
|
||||
if let Some(batch) = self.queue.batches.get_batch(wtxn, batch_id)? {
|
||||
if let Some(BatchEnqueuedAt { earliest, oldest }) = batch.enqueued_at {
|
||||
remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.batches.enqueued_at,
|
||||
earliest,
|
||||
batch_id,
|
||||
)?;
|
||||
remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.batches.enqueued_at,
|
||||
oldest,
|
||||
batch_id,
|
||||
)?;
|
||||
} else {
|
||||
// If we don't have the enqueued at in the batch it means the database comes from the v1.12
|
||||
// and we still need to find the date by scrolling the database
|
||||
remove_n_tasks_datetime_earlier_than(
|
||||
wtxn,
|
||||
self.queue.batches.enqueued_at,
|
||||
batch.started_at,
|
||||
batch.stats.total_nb_tasks.clamp(1, 2) as usize,
|
||||
batch_id,
|
||||
)?;
|
||||
}
|
||||
remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.batches.started_at,
|
||||
batch.started_at,
|
||||
batch_id,
|
||||
)?;
|
||||
if let Some(finished_at) = batch.finished_at {
|
||||
remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.batches.finished_at,
|
||||
finished_at,
|
||||
batch_id,
|
||||
)?;
|
||||
}
|
||||
|
||||
self.queue.batches.all_batches.delete(wtxn, &batch_id)?;
|
||||
self.queue.batch_to_tasks_mapping.delete(wtxn, &batch_id)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Anyway, we must remove the batch from all its reverse indexes.
|
||||
// The only way to do that is to check
|
||||
|
||||
for index in affected_indexes.iter() {
|
||||
let index_tasks = self.queue.tasks.index_tasks(wtxn, index)?;
|
||||
let remaining_index_tasks = index_tasks & &tasks;
|
||||
if remaining_index_tasks.is_empty() {
|
||||
self.queue.batches.update_index(wtxn, index, |bitmap| {
|
||||
bitmap.remove(batch_id);
|
||||
})?;
|
||||
}
|
||||
}
|
||||
|
||||
for status in affected_statuses.iter() {
|
||||
let status_tasks = self.queue.tasks.get_status(wtxn, *status)?;
|
||||
let remaining_status_tasks = status_tasks & &tasks;
|
||||
if remaining_status_tasks.is_empty() {
|
||||
self.queue.batches.update_status(wtxn, *status, |bitmap| {
|
||||
bitmap.remove(batch_id);
|
||||
})?;
|
||||
}
|
||||
}
|
||||
|
||||
for kind in affected_kinds.iter() {
|
||||
let kind_tasks = self.queue.tasks.get_kind(wtxn, *kind)?;
|
||||
let remaining_kind_tasks = kind_tasks & &tasks;
|
||||
if remaining_kind_tasks.is_empty() {
|
||||
self.queue.batches.update_kind(wtxn, *kind, |bitmap| {
|
||||
bitmap.remove(batch_id);
|
||||
})?;
|
||||
}
|
||||
}
|
||||
}
|
||||
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
Ok(to_delete_tasks)
|
||||
}
|
||||
|
||||
/// Cancel each given task from all the databases (if it is cancelable).
|
||||
///
|
||||
/// Returns the list of tasks that matched the filter and must be written in the database.
|
||||
fn cancel_matched_tasks(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
cancel_task_id: TaskId,
|
||||
current_batch: &mut ProcessingBatch,
|
||||
matched_tasks: &RoaringBitmap,
|
||||
progress: &Progress,
|
||||
) -> Result<Vec<Task>> {
|
||||
progress.update_progress(TaskCancelationProgress::RetrievingTasks);
|
||||
|
||||
// 1. Remove from this list the tasks that we are not allowed to cancel
|
||||
// Notice that only the _enqueued_ ones are cancelable and we should
|
||||
// have already aborted the indexation of the _processing_ ones
|
||||
let cancelable_tasks = self.queue.tasks.get_status(rtxn, Status::Enqueued)?;
|
||||
let tasks_to_cancel = cancelable_tasks & matched_tasks;
|
||||
|
||||
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
|
||||
progress.update_progress(progress_obj);
|
||||
|
||||
// 2. We now have a list of tasks to cancel, cancel them
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(
|
||||
rtxn,
|
||||
tasks_to_cancel.iter().inspect(|_| {
|
||||
task_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}),
|
||||
)?;
|
||||
|
||||
progress.update_progress(TaskCancelationProgress::UpdatingTasks);
|
||||
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
|
||||
progress.update_progress(progress_obj);
|
||||
for task in tasks.iter_mut() {
|
||||
task.status = Status::Canceled;
|
||||
task.canceled_by = Some(cancel_task_id);
|
||||
task.details = task.details.as_ref().map(|d| d.to_failed());
|
||||
current_batch.processing(Some(task));
|
||||
task_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
}
|
||||
278
crates/index-scheduler/src/scheduler/process_dump_creation.rs
Normal file
278
crates/index-scheduler/src/scheduler/process_dump_creation.rs
Normal file
@@ -0,0 +1,278 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs::File;
|
||||
use std::io::BufWriter;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use dump::IndexMetadata;
|
||||
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||
use meilisearch_types::milli::{self};
|
||||
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
|
||||
use time::macros::format_description;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::processing::{
|
||||
AtomicBatchStep, AtomicDocumentStep, AtomicTaskStep, DumpCreationProgress,
|
||||
};
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
impl IndexScheduler {
|
||||
pub(super) fn process_dump_creation(
|
||||
&self,
|
||||
progress: Progress,
|
||||
mut task: Task,
|
||||
) -> Result<Vec<Task>> {
|
||||
progress.update_progress(DumpCreationProgress::StartTheDumpCreation);
|
||||
let started_at = OffsetDateTime::now_utc();
|
||||
let (keys, instance_uid) =
|
||||
if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind {
|
||||
(keys, instance_uid)
|
||||
} else {
|
||||
unreachable!();
|
||||
};
|
||||
let dump = dump::DumpWriter::new(*instance_uid)?;
|
||||
|
||||
// 1. dump the keys
|
||||
progress.update_progress(DumpCreationProgress::DumpTheApiKeys);
|
||||
let mut dump_keys = dump.create_keys()?;
|
||||
for key in keys {
|
||||
dump_keys.push_key(key)?;
|
||||
}
|
||||
dump_keys.flush()?;
|
||||
|
||||
let rtxn = self.env.read_txn()?;
|
||||
|
||||
// 2. dump the tasks
|
||||
progress.update_progress(DumpCreationProgress::DumpTheTasks);
|
||||
let mut dump_tasks = dump.create_tasks_queue()?;
|
||||
|
||||
let (atomic, update_task_progress) =
|
||||
AtomicTaskStep::new(self.queue.tasks.all_tasks.len(&rtxn)? as u32);
|
||||
progress.update_progress(update_task_progress);
|
||||
|
||||
for ret in self.queue.tasks.all_tasks.iter(&rtxn)? {
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
let (_, mut t) = ret?;
|
||||
let status = t.status;
|
||||
let content_file = t.content_uuid();
|
||||
|
||||
// In the case we're dumping ourselves we want to be marked as finished
|
||||
// to not loop over ourselves indefinitely.
|
||||
if t.uid == task.uid {
|
||||
let finished_at = OffsetDateTime::now_utc();
|
||||
|
||||
// We're going to fake the date because we don't know if everything is going to go well.
|
||||
// But we need to dump the task as finished and successful.
|
||||
// If something fail everything will be set appropriately in the end.
|
||||
t.status = Status::Succeeded;
|
||||
t.started_at = Some(started_at);
|
||||
t.finished_at = Some(finished_at);
|
||||
}
|
||||
|
||||
// Patch the task to remove the batch uid, because as of v1.12.5 batches are not persisted.
|
||||
// This prevent from referencing *future* batches not actually associated with the task.
|
||||
//
|
||||
// See <https://github.com/meilisearch/meilisearch/issues/5247> for details.
|
||||
t.batch_uid = None;
|
||||
|
||||
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
|
||||
|
||||
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
||||
if let Some(content_file) = content_file {
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
if status == Status::Enqueued {
|
||||
let content_file = self.queue.file_store.get_update(content_file)?;
|
||||
|
||||
for document in
|
||||
serde_json::de::Deserializer::from_reader(content_file).into_iter()
|
||||
{
|
||||
let document = document.map_err(|e| {
|
||||
Error::from_milli(milli::InternalError::SerdeJson(e).into(), None)
|
||||
})?;
|
||||
dump_content_file.push_document(&document)?;
|
||||
}
|
||||
|
||||
dump_content_file.flush()?;
|
||||
}
|
||||
}
|
||||
atomic.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
dump_tasks.flush()?;
|
||||
|
||||
// 3. dump the batches
|
||||
progress.update_progress(DumpCreationProgress::DumpTheBatches);
|
||||
let mut dump_batches = dump.create_batches_queue()?;
|
||||
|
||||
let (atomic_batch_progress, update_batch_progress) =
|
||||
AtomicBatchStep::new(self.queue.batches.all_batches.len(&rtxn)? as u32);
|
||||
progress.update_progress(update_batch_progress);
|
||||
|
||||
for ret in self.queue.batches.all_batches.iter(&rtxn)? {
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
let (_, mut b) = ret?;
|
||||
// In the case we're dumping ourselves we want to be marked as finished
|
||||
// to not loop over ourselves indefinitely.
|
||||
if b.uid == task.uid {
|
||||
let finished_at = OffsetDateTime::now_utc();
|
||||
|
||||
// We're going to fake the date because we don't know if everything is going to go well.
|
||||
// But we need to dump the task as finished and successful.
|
||||
// If something fail everything will be set appropriately in the end.
|
||||
let mut statuses = BTreeMap::new();
|
||||
statuses.insert(Status::Succeeded, b.stats.total_nb_tasks);
|
||||
b.stats.status = statuses;
|
||||
b.finished_at = Some(finished_at);
|
||||
}
|
||||
|
||||
dump_batches.push_batch(&b)?;
|
||||
atomic_batch_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
dump_batches.flush()?;
|
||||
|
||||
// 4. Dump the indexes
|
||||
progress.update_progress(DumpCreationProgress::DumpTheIndexes);
|
||||
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
|
||||
let mut count = 0;
|
||||
let () = self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
|
||||
progress.update_progress(VariableNameStep::<DumpCreationProgress>::new(
|
||||
uid.to_string(),
|
||||
count,
|
||||
nb_indexes,
|
||||
));
|
||||
count += 1;
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
let metadata = IndexMetadata {
|
||||
uid: uid.to_owned(),
|
||||
primary_key: index.primary_key(&rtxn)?.map(String::from),
|
||||
created_at: index
|
||||
.created_at(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?,
|
||||
updated_at: index
|
||||
.updated_at(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?,
|
||||
};
|
||||
let mut index_dumper = dump.create_index(uid, &metadata)?;
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
let embedding_configs = index
|
||||
.embedding_configs(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
let nb_documents = index
|
||||
.number_of_documents(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?
|
||||
as u32;
|
||||
let (atomic, update_document_progress) = AtomicDocumentStep::new(nb_documents);
|
||||
progress.update_progress(update_document_progress);
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
// 4.1. Dump the documents
|
||||
for ret in documents {
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
let (id, doc) = ret.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
'inject_vectors: {
|
||||
let embeddings = index
|
||||
.embeddings(&rtxn, id)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
if embeddings.is_empty() {
|
||||
break 'inject_vectors;
|
||||
}
|
||||
|
||||
let vectors = document
|
||||
.entry(RESERVED_VECTORS_FIELD_NAME.to_owned())
|
||||
.or_insert(serde_json::Value::Object(Default::default()));
|
||||
|
||||
let serde_json::Value::Object(vectors) = vectors else {
|
||||
let user_err =
|
||||
milli::Error::UserError(milli::UserError::InvalidVectorsMapType {
|
||||
document_id: {
|
||||
if let Ok(Some(Ok(index))) = index
|
||||
.external_id_of(&rtxn, std::iter::once(id))
|
||||
.map(|it| it.into_iter().next())
|
||||
{
|
||||
index
|
||||
} else {
|
||||
format!("internal docid={id}")
|
||||
}
|
||||
},
|
||||
value: vectors.clone(),
|
||||
});
|
||||
|
||||
return Err(Error::from_milli(user_err, Some(uid.to_string())));
|
||||
};
|
||||
|
||||
for (embedder_name, embeddings) in embeddings {
|
||||
let user_provided = embedding_configs
|
||||
.iter()
|
||||
.find(|conf| conf.name == embedder_name)
|
||||
.is_some_and(|conf| conf.user_provided.contains(id));
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
|
||||
embeddings,
|
||||
)),
|
||||
regenerate: !user_provided,
|
||||
};
|
||||
vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
index_dumper.push_document(&document)?;
|
||||
atomic.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
// 4.2. Dump the settings
|
||||
let settings = meilisearch_types::settings::settings(
|
||||
index,
|
||||
&rtxn,
|
||||
meilisearch_types::settings::SecretPolicy::RevealSecrets,
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
index_dumper.settings(&settings)?;
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
// 5. Dump experimental feature settings
|
||||
progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures);
|
||||
let features = self.features().runtime_features();
|
||||
dump.create_experimental_features(features)?;
|
||||
let network = self.network();
|
||||
dump.create_network(network)?;
|
||||
|
||||
let dump_uid = started_at.format(format_description!(
|
||||
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
||||
)).unwrap();
|
||||
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
progress.update_progress(DumpCreationProgress::CompressTheDump);
|
||||
let path = self.scheduler.dumps_path.join(format!("{}.dump", dump_uid));
|
||||
let file = File::create(path)?;
|
||||
dump.persist_to(BufWriter::new(file))?;
|
||||
|
||||
// if we reached this step we can tell the scheduler we succeeded to dump ourselves.
|
||||
task.status = Status::Succeeded;
|
||||
task.details = Some(Details::Dump { dump_uid: Some(dump_uid) });
|
||||
Ok(vec![task])
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user