Compare commits

...

249 Commits

Author SHA1 Message Date
85d567e878 Sort the TOML dependencies 2022-10-20 18:03:35 +02:00
3f6bd7fb11 Introduce a rustfmt file 2022-10-20 18:00:07 +02:00
dd57e051d7 Reapply #2890 2022-10-20 17:44:15 +02:00
2d86304701 Reapply #2839 2022-10-20 17:38:57 +02:00
5dd9ba3079 Reapply #2819 2022-10-20 17:35:22 +02:00
2338eff20b Reapply #2830 2022-10-20 17:33:31 +02:00
d01d29764e Reapply #2773 2022-10-20 17:27:15 +02:00
5e8a34ea13 Reapply #2727 2022-10-20 17:24:15 +02:00
eae2c353eb Remove once for all the meilisearch-lib crate 2022-10-20 17:21:37 +02:00
e35bd263df Fix final compilation 2022-10-20 17:08:52 +02:00
cf58a76faf Fix the new config file with the index scheduler 2022-10-20 16:49:19 +02:00
2a51161258 Fix allow_index_creation useless field 2022-10-20 16:39:25 +02:00
a257904cf4 Fix the insta tests 2022-10-20 16:39:25 +02:00
cb3ccee401 push the snapshot files 2022-10-20 16:39:25 +02:00
f1acafcf1c fix the index deletion when the index doesn’t exists but would be created by one of the autobatched tasks 2022-10-20 16:39:24 +02:00
f816dc5221 fix the last rule about merging the allow_index_creation 2022-10-20 16:39:24 +02:00
6966256a3b simplify the code A LOT and create less false positive 2022-10-20 16:39:24 +02:00
1fa6193f48 add a whole new batch of tests around the index already exists / allow_index_creation 2022-10-20 16:39:24 +02:00
a65317982f fix all the snapshot tests 2022-10-20 16:39:24 +02:00
3357c439e3 it probably works but it's also horrendous 2022-10-20 16:39:23 +02:00
22d514645e Don't return an error when swapping 0 indexes 2022-10-20 16:39:23 +02:00
8514c1c12b Apply review suggestions and stop using rtxn.commit 2022-10-20 16:39:23 +02:00
14a44776f6 Implement POST /indexes-swap 2022-10-20 16:39:23 +02:00
bdb3702510 Remove key from index_tasks database when the value is empty 2022-10-20 16:39:23 +02:00
835745ac22 Add some documentation to the index scheduler 2022-10-20 16:39:22 +02:00
2f54dade04 fix all the snapshot tests in the dump 2022-10-20 16:39:22 +02:00
50d48df636 Fix bug where assert used != instead of ==
And update snapshot tests.
2022-10-20 16:39:22 +02:00
ffd5cb5ad3 cargo fmt 2022-10-20 16:39:22 +02:00
5205c7dcc7 Fix date parsing for task queries
Use rfc3339 or YYYY-MM-DD.

Add a day to the parsed date when it is an excluded lower bound
and the YYYY-MM-DD was used.

Also the Query type does not need to be serialisable anymore
2022-10-20 16:39:22 +02:00
af508b9a81 Apply suggestions from code review
Co-authored-by: Clément Renault <clement@meilisearch.com>

Apply suggestions from code review

Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>

Apply suggestions from code review

Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>

Apply code review suggestion

Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-10-20 16:39:22 +02:00
1eeef1c1c8 Implement task date filters
before/after enqueued/started/finished at
2022-10-20 16:39:21 +02:00
f3c2be1eb5 fix the auto-generated details 2022-10-20 16:37:29 +02:00
aa13fc448d remove the unused variants from the autobatcher 2022-10-20 16:37:29 +02:00
3702249b1a fix a bunch of snapshot tests 2022-10-20 16:37:29 +02:00
49b14064c2 fix the dumps tests since we added informations in the DumpTask 2022-10-20 16:37:28 +02:00
188e95d00f Delete the tasks content file once the transaction has been successfully committed 2022-10-20 16:37:28 +02:00
ad05bd9156 Remove the useless r#union thing 2022-10-20 16:37:28 +02:00
ea0dcf2c7c Clean up the delete_persisted_task_data function 2022-10-20 16:37:28 +02:00
0bbda61169 Throw the error if we can't register the tasks in the store 2022-10-20 16:37:28 +02:00
9945f984db Introduce a ProcessingTasks constructor 2022-10-20 16:37:27 +02:00
fd73306793 Extract the must_stop flag out of the RwLock 2022-10-20 16:37:27 +02:00
07db4cfab0 Prefer using an u64 instead of a usize in some places 2022-10-20 16:37:27 +02:00
cc6e2fd1e8 Delete the persisted data when we cancel a task 2022-10-20 16:37:27 +02:00
4f1c0535b4 Use a tokio block_in_place method for potentially blocking tasks 2022-10-20 16:37:27 +02:00
afafe8d207 Put the original URL query in the tasks details 2022-10-20 16:37:26 +02:00
973e2f71eb Add the tasks cancel route to cancel tasks 2022-10-20 16:37:26 +02:00
01ed1fb128 Update the canceledBy and finishedAt fields 2022-10-20 16:37:26 +02:00
74657bba2c Introduce the core algorithm of task cancelation 2022-10-20 16:37:26 +02:00
6276b2a382 Add more enum-iterator related stuff 2022-10-20 16:37:26 +02:00
607a0600e5 Prefer using TaskDeletion in the dumps 2022-10-20 16:35:06 +02:00
bcb7d1744a Introduce the task cancelation task type 2022-10-20 16:35:06 +02:00
4a7b5c7836 Add the canceled task status 2022-10-20 16:35:05 +02:00
b8803781dc Introduce the ProcessingTasks struct 2022-10-20 16:35:05 +02:00
47168c613d Use the indexation-abortion milli's branch 2022-10-20 16:35:05 +02:00
e7047e8377 Remove the meilisearch-auth milli dependency 2022-10-20 16:34:33 +02:00
baeb168b6e Delete a task's persisted data when appropriate 2022-10-20 16:34:32 +02:00
7308ccd530 make the tests compile again 2022-10-20 16:34:32 +02:00
656f809b1d fix the analytics 2022-10-20 16:26:17 +02:00
65e69c06bc extract the create_app function for the tests 2022-10-20 16:26:17 +02:00
aff67f78c1 bump enum-iter and fix a bunch of error messages 2022-10-20 16:24:23 +02:00
4921e926e5 fix meilisearch-http 2022-10-20 16:14:37 +02:00
99f1d5df20 fix the tests 2022-10-20 16:14:37 +02:00
0bde0abd74 share the rtxn between the access to the tasks and to the indexes 2022-10-20 16:14:37 +02:00
dbcc3456c6 dump the content of the dump tasks instead of recreating at import time with wrong API keys 2022-10-20 16:14:36 +02:00
09a0569228 remove useless todo 2022-10-20 16:14:36 +02:00
860d343831 get rids of the useless Seek before creating a grenad reader 2022-10-20 16:14:36 +02:00
8e469d8d1d flush the dump-writer only once everything has been inserted 2022-10-20 16:14:36 +02:00
b87b071718 apply most style comments of the review 2022-10-20 16:14:36 +02:00
e220cc2183 Update dump/src/error.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-10-20 16:14:36 +02:00
458a531547 rebase on index-scheduler 2022-10-20 16:14:35 +02:00
6987bc185e fix a synchronization bug while importing tasks 2022-10-20 16:13:18 +02:00
a2384dc627 fix all the errors code and settings issues when importing a dump v2 2022-10-20 16:13:18 +02:00
bab0e050f1 fix all the error codes 2022-10-20 16:13:18 +02:00
748249578d commit after creating an index 2022-10-20 16:13:17 +02:00
54d4d364c2 fix the deletion of the data.ms in case of errors 2022-10-20 16:13:17 +02:00
9b96085327 the index-scheduler needs to wake-up after importing a dump 2022-10-20 16:13:17 +02:00
4c1c108a19 remove the dbg 2022-10-20 16:13:17 +02:00
c82bd429db fix the content_file import 2022-10-20 16:13:17 +02:00
8c9b758fa8 stop dumping the current dumping task as enqueued so it's not looping for ever 2022-10-20 16:13:17 +02:00
ede46b716c add a bufwriter on the documents 2022-10-20 16:13:16 +02:00
cac924b663 first mostly working version 2022-10-20 16:13:16 +02:00
6fae317277 update the API a little bit 2022-10-20 16:12:26 +02:00
1a3fea4d8c fix the tests 2022-10-20 16:12:26 +02:00
9323f9f1c4 write the dump export 2022-10-20 16:12:25 +02:00
7ce336306d Fix number of deleted tasks details after duplicate task deletion 2022-10-20 16:09:33 +02:00
d49d7e9c2d Add task deletion tests where the same task is deleted twice 2022-10-20 16:09:33 +02:00
dabc30d3d6 Return an error when calling DELETE /tasks with an empty query 2022-10-20 16:09:32 +02:00
9067148270 Apply suggested changes from PR review 2022-10-20 16:09:32 +02:00
4bd8607cf9 Avoid creating two read txn at the same time 2022-10-20 16:09:32 +02:00
4c6145f782 Update snapshot tests following git rebase that fixes a bug 2022-10-20 16:09:32 +02:00
012c3e986c Finish first draft of the DELETE /tasks route 2022-10-20 16:09:32 +02:00
ef3e9e87f5 Continue implementation of task deletion
1. Matched tasks are a roaring bitmap
2. Start implementation in meilisearch-http
3. Snapshots use meili-snap
4. Rename to TaskDeletion
2022-10-20 16:08:16 +02:00
7d4527728e Make sure that we do not batch tasks from different indexes 2022-10-20 16:00:58 +02:00
1c697edb95 Add a test to check different indexes autobatching 2022-10-20 16:00:58 +02:00
8433a432b1 Extract index creation rights and simplify the autobatcher rules 2022-10-20 16:00:58 +02:00
ea192d31f4 Fix invalid import of tasks types 2022-10-20 16:00:58 +02:00
dc1d739785 move the API key in meilisearch_types 2022-10-20 16:00:58 +02:00
c9050759f5 remove an unused file 2022-10-20 15:59:53 +02:00
c0a6f7a021 uncomment a task serialization test 2022-10-20 15:59:53 +02:00
e533e740d4 refactor the Task a little bit 2022-10-20 15:59:52 +02:00
e6c033bd6b start moving a lot of task types to meilisearch_types 2022-10-20 15:59:52 +02:00
974b40ba04 store md5 instead of the whole snapshots 2022-10-20 15:59:30 +02:00
d85451b8e5 Add meili-snap crate to make writing snapshot tests easier 2022-10-20 15:59:30 +02:00
8a32b766c6 fix all the import and comment most of the dump v6 2022-10-20 15:57:53 +02:00
d8bbe18d71 push the document_format and settings I forgot in the previous PR 2022-10-20 15:57:53 +02:00
f456fb5e0b get rids of the index crate + the document_types crate 2022-10-20 15:57:52 +02:00
f3ec39a769 Fix compiler errors related autobatching option of the index scheduler 2022-10-20 15:45:57 +02:00
8d0ebbc9fa Fix typo and remove useless code in tests 2022-10-20 15:45:57 +02:00
5914889a29 Apply suggestions from code review
Co-authored-by: Tamo <tamo@meilisearch.com>
2022-10-20 15:45:57 +02:00
ccfe1babdc Add more task deletion tests 2022-10-20 15:45:57 +02:00
570ae05833 Use more complete snapshot tests for the index scheduler 2022-10-20 15:45:57 +02:00
fd73e65165 Add a DetailsView type and improve index scheduler snapshots
The DetailsView type is necessary because serde incorrectly
deserialises the `Details` type, so the database fails to correctly
decode Tasks
2022-10-20 15:45:57 +02:00
05753c663f Implement TaskDeletion in the index scheduler 2022-10-20 15:44:58 +02:00
ee352b6c7c Allow a user to disable the auto batching system 2022-10-20 15:44:58 +02:00
21d6c03bb4 Add a test to check that it works without autobatching 2022-10-20 15:44:58 +02:00
12c374e475 Remove the IndexScheduler::notify method 2022-10-20 15:44:58 +02:00
fc148212aa Make sure that the index-scheduler tick loop is rerun after processing 2022-10-20 15:44:58 +02:00
67e8ee6ef7 Add a test that verifies that sending multiple tasks works 2022-10-20 15:44:57 +02:00
4777449cc3 remove unused files 2022-10-20 15:44:57 +02:00
baa67fade2 fix the tests 2022-10-20 15:44:57 +02:00
997e7450d4 write a bunch of tests that goes through the whole compat layers 2022-10-20 15:44:57 +02:00
bd509f3593 remove the warnings 2022-10-20 15:44:57 +02:00
704b9566e4 rewrite the update file API 2022-10-20 15:44:56 +02:00
d8ac2a0063 finish the dump reader API, the dump Writer API now needs to be updated 2022-10-20 15:44:56 +02:00
2db486ad90 start dumping the update files to a known format 2022-10-20 15:44:56 +02:00
170af0c9bd fix the compat between v3 and v4 2022-10-20 15:44:56 +02:00
392e2829f8 remove the ununsed snapshot files 2022-10-20 15:44:56 +02:00
50dfe06efe remove all warnings 2022-10-20 15:44:55 +02:00
1a20ef2a79 write the compat layer from v2 to v3 2022-10-20 15:44:55 +02:00
0c8e690675 remove old compat files 2022-10-20 15:44:55 +02:00
70e3308e8c write the dump v2 import 2022-10-20 15:44:55 +02:00
2a7a6f9a30 make the open function public 2022-10-20 15:44:55 +02:00
fe973b8a98 fix some warnings 2022-10-20 15:44:54 +02:00
993011a5b6 rebase on main 2022-10-20 15:44:54 +02:00
5b8041c4fe write and test the compat layer from v3 to v4 2022-10-20 15:44:54 +02:00
1d4f27d65f write and test the dump v3 import 2022-10-20 15:44:54 +02:00
826f7633de fix the test once again 2022-10-20 15:44:54 +02:00
34a3ba6aa2 finish the test for the compatibility between v4 and v5 2022-10-20 15:44:53 +02:00
116c424fba rewrite the compat API to something more generic 2022-10-20 15:44:53 +02:00
2456a632e8 rename a few things for consistency 2022-10-20 15:44:53 +02:00
39044558f0 implement the dump v4 import 2022-10-20 15:44:53 +02:00
13e8247f77 add the compat layer between v5 and v6 2022-10-20 15:44:53 +02:00
f633e109dc get rids of the trait in most places 2022-10-20 15:44:52 +02:00
4958bf3a55 start writting the compat layer between v5 and v6 2022-10-20 15:44:52 +02:00
998b3533f5 test the dump v5 2022-10-20 15:44:52 +02:00
9d6987a412 write the v5 dump import 2022-10-20 15:44:52 +02:00
22b2fa0576 introduce the index metadata 2022-10-20 15:44:52 +02:00
b5ebab5c66 fix the tests 2022-10-20 15:44:51 +02:00
e3bc87bf22 implement the dump reader v6 2022-10-20 15:44:51 +02:00
b89ac5d7ac start implementing a skeleton of the v1 dump reader 2022-10-20 15:44:51 +02:00
e486878463 move the DumpWriter and Error to their own module 2022-10-20 15:44:51 +02:00
568b743ac8 fix the dump export 2022-10-20 15:44:51 +02:00
d2485cfed4 write a dump exporter 2022-10-20 15:44:51 +02:00
a358eaa816 Create the index only if the task has the rights to do so 2022-10-20 15:44:35 +02:00
134634d99a Correctly batch tasks with different index creation rights 2022-10-20 15:44:35 +02:00
cea19f5af1 Use a ControlFlow in the autobatcher function 2022-10-20 15:44:34 +02:00
2a45d24143 IndexDeletion operation have ClearAll details 2022-10-20 15:44:34 +02:00
be192f281d Only mark the first clear document with the amount of cleared documents 2022-10-20 15:44:34 +02:00
99d1e9a870 Let the tick function set the Failed status itself 2022-10-20 15:44:34 +02:00
945cb00fc3 Fill an IndexDeletion task with the number of documents removed 2022-10-20 15:44:34 +02:00
b7ad050737 Panic if we encountered a wring KindWithContent type 2022-10-20 15:44:34 +02:00
f3f6bbf624 Update the tasks statuses 2022-10-20 15:44:33 +02:00
88a0ed9736 Implement the IndexDeletion batch operation 2022-10-20 15:44:33 +02:00
4a53cb1c50 Implement the IndexUpdate batch operation 2022-10-20 15:44:33 +02:00
617994a9d7 Implement the IndexCreate batch operation 2022-10-20 15:44:33 +02:00
4871509507 Make sure that meilisearch-http works without index wrapper 2022-10-20 15:44:33 +02:00
b7898cd4ab Implement ErrorCode on the heed Error 2022-10-20 15:44:33 +02:00
b83710ea98 Implement ErrorCode on the milli::Error type 2022-10-20 15:44:32 +02:00
478e3f0f6b Remove the Index wrapper and use milli::Index directly 2022-10-20 15:43:03 +02:00
8bffe41886 Remove the IndexRename operation 2022-10-20 15:43:02 +02:00
257651c615 Move the IndexScheduler to the root of the index-scheduler crate 2022-10-20 15:43:02 +02:00
78e730ca37 Add a TODO about the index creation 2022-10-20 15:43:02 +02:00
373995dca9 Make clippy happy 2022-10-20 15:43:02 +02:00
3fda1942c0 Implement a recursive indexation for the index-related operations 2022-10-20 15:43:02 +02:00
248805d2df Fix meilisearch-http to use the new DocumentImport batch operation 2022-10-20 15:43:02 +02:00
e7ca5efe1e Implement the SettingsAndDocumentImport batch operation 2022-10-20 15:43:01 +02:00
e24715abe5 Merge both DocumentAddition/Update into one DocumentImport variant 2022-10-20 15:43:01 +02:00
9e1d7fbfe5 Implement the DocumentClear batch operation 2022-10-20 15:43:01 +02:00
eb8dda1f81 Implement the DocumentClearAndSettings batch operation 2022-10-20 15:43:01 +02:00
ad0d285f12 Implement the Settings batch operation 2022-10-20 15:43:01 +02:00
e84119fc75 Implement the DocumentUpdate batch operation 2022-10-20 15:43:01 +02:00
78922a7c67 Implement the DocumentDeletion batch operation 2022-10-20 15:43:00 +02:00
5cd1fc0b7b remove an old unworking part of the batch execution 2022-10-20 15:42:45 +02:00
0d0b2a9ac1 bring back the IndexMeta and IndexStats in meilisearch-http 2022-10-20 15:42:45 +02:00
f4ecf75cda fix import bug 2022-10-20 15:42:45 +02:00
2064b65d5f remove the create_app macro 2022-10-20 15:42:45 +02:00
2d31cff082 get rids of meilisearch-lib 2022-10-20 15:42:44 +02:00
2de8f08517 fix a deadlock 2022-10-20 15:36:10 +02:00
972dd6bcef remove Clone from the IndexScheduler 2022-10-20 15:36:10 +02:00
7f1a85d443 remove a bunch of comments 2022-10-20 15:36:10 +02:00
e43d5e9abe move as many fields as possible out of the IndexScheduler 2022-10-20 15:36:09 +02:00
a43fbc9367 move the test function in the test module 2022-10-20 15:36:09 +02:00
03de8669a2 start implementing some logic to test the internal states of the scheduler 2022-10-20 15:36:09 +02:00
2d1cc1a12d fix the tests 2022-10-20 15:36:09 +02:00
f58d969899 slightly refactor the autobatching tests 2022-10-20 15:34:55 +02:00
42f5c1fc3f connect the new scheduler to meilisearch-http officially.
I can index documents and do search
2022-10-20 15:34:55 +02:00
f84cbee170 implements the get_tasks 2022-10-20 15:34:55 +02:00
5b282acb7b fix all compilation errors 2022-10-20 15:34:55 +02:00
60ee1f5e64 wip integrating the scheduler in meilisearch-http 2022-10-20 15:34:55 +02:00
acc6d3a82b start integrating the index-scheduler in meilisearch-lib 2022-10-20 15:30:31 +02:00
01847a14bb add insta::json for later 2022-10-20 15:30:31 +02:00
16eab7b337 I can index documents without meilisearch 2022-10-20 15:30:20 +02:00
7bbd75e1fb wip 2022-10-20 15:29:48 +02:00
a4bf859779 create the end Batch type for all Index* operations 2022-10-20 15:29:48 +02:00
72496122c3 create the end Batch type for all Document* operation 2022-10-20 15:29:48 +02:00
d7146d7b21 write most of the run loop 2022-10-20 15:29:48 +02:00
e9d2689499 use faux in the file-store 2022-10-20 15:29:36 +02:00
0acc40b222 split the run function in two 2022-10-20 15:28:51 +02:00
9069439a14 fix the register test 2022-10-20 15:28:51 +02:00
e7c552a0d0 reduce the size of the snapshots 2022-10-20 15:28:50 +02:00
cfc01e4372 test the register tasks 2022-10-20 15:28:50 +02:00
f84ced7e38 start integrating the index-scheduler in the meilisearch codebase 2022-10-20 15:28:50 +02:00
9882b7fa57 greatly reduce the number of warnings 2022-10-20 15:26:57 +02:00
1a4461cd3a fix smol typo 2022-10-20 15:26:57 +02:00
ce0ebf9381 get rids of the horrendous spinlock in favor of synchronoise 2022-10-20 15:26:57 +02:00
3001ed3a5f implement the index swap in the index mapper 2022-10-20 15:26:46 +02:00
c326613e06 move the index mapping logic in another structure 2022-10-20 15:26:45 +02:00
48e10176b2 migrate the index handling code in a different file + implements the create index 2022-10-20 15:26:45 +02:00
f7357fc504 reintroduce the uuid mapping for the indexes 2022-10-20 15:26:45 +02:00
5b485e309c add a bunch of tests 2022-10-20 15:26:45 +02:00
3120057039 split the DocumentAdditionOrUpdate in two tasks; DocumentAddition and DocumentUpdate 2022-10-20 15:26:33 +02:00
00f13f45b6 starts importing the real tasks 2022-10-20 15:26:32 +02:00
bbb50d1b96 get rids of the auto-generated mains 2022-10-20 15:26:32 +02:00
cf8c4310ab extract the index abstraction out of the index-scheduler in its own module 2022-10-20 15:26:32 +02:00
5abb79f149 rename the update-file-store to file-store since it can store any kind of file 2022-10-20 15:26:22 +02:00
5dcd67c597 import the update_file_store in the index-scheduler 2022-10-20 15:26:22 +02:00
215d318b53 get rids of nelson 2022-10-20 15:26:22 +02:00
68e74e133f move the update file store to another crate with as little dependencies as possible 2022-10-20 15:26:06 +02:00
98bb742d77 finishes the global skelton of the auto-batcher 2022-10-20 15:26:06 +02:00
1efccb1bc4 polish the global structure of the batch creation 2022-10-20 15:26:06 +02:00
78ca4bf608 move the autobatcher logic to another file 2022-10-20 15:26:06 +02:00
66397bd0de add the document format file 2022-10-20 15:26:06 +02:00
0790a3cbbf fix the create_new_batch method 2022-10-20 15:26:06 +02:00
074dee59ec fix the whole batchKind thingy 2022-10-20 15:26:05 +02:00
ef4594c078 implements most operations 2022-10-20 15:26:05 +02:00
9aa31cd391 fix a few typos 2022-10-20 15:26:05 +02:00
b1f0431ab4 implements the index deletion, creation and swap 2022-10-20 15:26:05 +02:00
fa27485070 makes the updates getters smoother to uses 2022-10-20 15:26:05 +02:00
7c56fcfef3 make the project compile again 2022-10-20 15:26:05 +02:00
d87b769469 fix the file store 2022-10-20 15:26:04 +02:00
faac665d58 make the file store entirely synchronous, including the file deletion 2022-10-20 15:26:04 +02:00
fda5ca60bd fix most of the index module 2022-10-20 15:26:04 +02:00
e7b2b9306a wip porting the index back in the scheduler 2022-10-20 15:25:55 +02:00
a2587149f4 add a little bit of documentation 2022-10-20 15:25:42 +02:00
f359fcc220 implements the filter query 2022-10-20 15:25:41 +02:00
c1eda13e64 add the task to the index db in the register task 2022-10-20 15:25:41 +02:00
2b77d54412 split the scheduler into multiples files 2022-10-20 15:25:41 +02:00
ff5d6d1703 implement the get_batch method 2022-10-20 15:25:41 +02:00
5a9ac4ceb3 START THE REWRITE OF THE INDEX SCHEDULER: index & register has been implemented 2022-10-20 15:25:41 +02:00
206 changed files with 17696 additions and 10541 deletions

7
.gitignore vendored
View File

@ -7,3 +7,10 @@
/data.ms
/snapshots
/dumps
# Snapshots
## ... large
*.full.snap
## ... unreviewed
*.snap.new

5
.rustfmt.toml Normal file
View File

@ -0,0 +1,5 @@
unstable_features = true
use_small_heuristics = "max"
imports_granularity = "Module"
group_imports = "StdExternalCrate"

864
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -3,8 +3,11 @@ resolver = "2"
members = [
"meilisearch-http",
"meilisearch-types",
"meilisearch-lib",
"meilisearch-auth",
"meili-snap",
"index-scheduler",
"dump",
"file-store",
"permissive-json-pointer",
]

29
dump/Cargo.toml Normal file
View File

@ -0,0 +1,29 @@
[package]
name = "dump"
version = "0.29.0"
edition = "2021"
[dependencies]
anyhow = "1.0.65"
flate2 = "1.0.22"
http = "0.2.8"
log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
once_cell = "1.15.0"
regex = "1.6.0"
roaring = { version = "0.10.0", features = ["serde"] }
serde = { version = "1.0.136", features = ["derive"] }
serde_json = { version = "1.0.85", features = ["preserve_order"] }
tar = "0.4.38"
tempfile = "3.3.0"
thiserror = "1.0.30"
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.1.2", features = ["serde", "v4"] }
[dev-dependencies]
big_s = "1.0.2"
insta = { version = "1.19.1", features = ["json", "redactions"] }
maplit = "1.0.2"
meili-snap = { path = "../meili-snap" }
meilisearch-types = { path = "../meilisearch-types" }

17
dump/README.md Normal file
View File

@ -0,0 +1,17 @@
```
dump
├── indexes
│ ├── cattos
│ │ ├── documents.jsonl
│ │ └── settings.json
│ └── doggos
│ ├── documents.jsonl
│ └── settings.json
├── instance-uid.uuid
├── keys.jsonl
├── metadata.json
└── tasks
├── update_files
│ └── [task_id].jsonl
└── queue.jsonl
```

36
dump/src/error.rs Normal file
View File

@ -0,0 +1,36 @@
use meilisearch_types::error::{Code, ErrorCode};
use thiserror::Error;
#[derive(Debug, Error)]
pub enum Error {
#[error("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.")]
DumpV1Unsupported,
#[error("Bad index name.")]
BadIndexName,
#[error("Malformed task.")]
MalformedTask,
#[error(transparent)]
Io(#[from] std::io::Error),
#[error(transparent)]
Serde(#[from] serde_json::Error),
#[error(transparent)]
Uuid(#[from] uuid::Error),
}
impl ErrorCode for Error {
fn error_code(&self) -> Code {
match self {
// Are these three really Internal errors?
// TODO look at that later.
Error::Io(_) => Code::Internal,
Error::Serde(_) => Code::Internal,
Error::Uuid(_) => Code::Internal,
// all these errors should never be raised when creating a dump, thus no error code should be associated.
Error::DumpV1Unsupported => Code::Internal,
Error::BadIndexName => Code::Internal,
Error::MalformedTask => Code::Internal,
}
}
}

458
dump/src/lib.rs Normal file
View File

@ -0,0 +1,458 @@
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::Key;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::settings::Unchecked;
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task, TaskId};
use meilisearch_types::InstanceUid;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
mod error;
mod reader;
mod writer;
pub use error::Error;
pub use reader::{DumpReader, UpdateFile};
pub use writer::DumpWriter;
const CURRENT_DUMP_VERSION: Version = Version::V6;
type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Metadata {
pub dump_version: Version,
pub db_version: String,
#[serde(with = "time::serde::rfc3339")]
pub dump_date: OffsetDateTime,
}
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct IndexMetadata {
pub uid: String,
pub primary_key: Option<String>,
#[serde(with = "time::serde::rfc3339")]
pub created_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
pub updated_at: OffsetDateTime,
}
#[derive(Debug, PartialEq, Eq, Deserialize, Serialize)]
pub enum Version {
V1,
V2,
V3,
V4,
V5,
V6,
}
#[derive(Debug, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct TaskDump {
pub uid: TaskId,
#[serde(default)]
pub index_uid: Option<String>,
pub status: Status,
#[serde(rename = "type")]
pub kind: KindDump,
#[serde(skip_serializing_if = "Option::is_none")]
pub canceled_by: Option<TaskId>,
#[serde(skip_serializing_if = "Option::is_none")]
pub details: Option<Details>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<ResponseError>,
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
#[serde(
with = "time::serde::rfc3339::option",
skip_serializing_if = "Option::is_none",
default
)]
pub started_at: Option<OffsetDateTime>,
#[serde(
with = "time::serde::rfc3339::option",
skip_serializing_if = "Option::is_none",
default
)]
pub finished_at: Option<OffsetDateTime>,
}
// A `Kind` specific version made for the dump. If modified you may break the dump.
#[derive(Debug, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum KindDump {
DocumentImport {
primary_key: Option<String>,
method: IndexDocumentsMethod,
documents_count: u64,
allow_index_creation: bool,
},
DocumentDeletion {
documents_ids: Vec<String>,
},
DocumentClear,
Settings {
settings: meilisearch_types::settings::Settings<Unchecked>,
is_deletion: bool,
allow_index_creation: bool,
},
IndexDeletion,
IndexCreation {
primary_key: Option<String>,
},
IndexUpdate {
primary_key: Option<String>,
},
IndexSwap {
swaps: Vec<(String, String)>,
},
TaskCancelation {
query: String,
tasks: RoaringBitmap,
},
TasksDeletion {
query: String,
tasks: RoaringBitmap,
},
DumpExport {
dump_uid: String,
keys: Vec<Key>,
instance_uid: Option<InstanceUid>,
},
Snapshot,
}
impl From<Task> for TaskDump {
fn from(task: Task) -> Self {
TaskDump {
uid: task.uid,
index_uid: task.index_uid().map(|uid| uid.to_string()),
status: task.status,
kind: task.kind.into(),
canceled_by: task.canceled_by,
details: task.details,
error: task.error,
enqueued_at: task.enqueued_at,
started_at: task.started_at,
finished_at: task.finished_at,
}
}
}
impl From<KindWithContent> for KindDump {
fn from(kind: KindWithContent) -> Self {
match kind {
KindWithContent::DocumentImport {
primary_key,
method,
documents_count,
allow_index_creation,
..
} => KindDump::DocumentImport {
primary_key,
method,
documents_count,
allow_index_creation,
},
KindWithContent::DocumentDeletion { documents_ids, .. } => {
KindDump::DocumentDeletion { documents_ids }
}
KindWithContent::DocumentClear { .. } => KindDump::DocumentClear,
KindWithContent::Settings {
new_settings, is_deletion, allow_index_creation, ..
} => KindDump::Settings { settings: new_settings, is_deletion, allow_index_creation },
KindWithContent::IndexDeletion { .. } => KindDump::IndexDeletion,
KindWithContent::IndexCreation { primary_key, .. } => {
KindDump::IndexCreation { primary_key }
}
KindWithContent::IndexUpdate { primary_key, .. } => {
KindDump::IndexUpdate { primary_key }
}
KindWithContent::IndexSwap { swaps } => KindDump::IndexSwap { swaps },
KindWithContent::TaskCancelation { query, tasks } => {
KindDump::TaskCancelation { query, tasks }
}
KindWithContent::TaskDeletion { query, tasks } => {
KindDump::TasksDeletion { query, tasks }
}
KindWithContent::DumpExport { dump_uid, keys, instance_uid } => {
KindDump::DumpExport { dump_uid, keys, instance_uid }
}
KindWithContent::Snapshot => KindDump::Snapshot,
}
}
}
#[cfg(test)]
pub(crate) mod test {
use std::fs::File;
use std::io::{Seek, SeekFrom};
use std::str::FromStr;
use big_s::S;
use maplit::btreeset;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::keys::{Action, Key};
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::{self};
use meilisearch_types::settings::{Checked, Settings};
use meilisearch_types::star_or::StarOr;
use meilisearch_types::tasks::{Details, Status};
use serde_json::{json, Map, Value};
use time::macros::datetime;
use uuid::Uuid;
use crate::reader::Document;
use crate::{DumpReader, DumpWriter, IndexMetadata, KindDump, TaskDump, Version};
pub fn create_test_instance_uid() -> Uuid {
Uuid::parse_str("9e15e977-f2ae-4761-943f-1eaf75fd736d").unwrap()
}
pub fn create_test_index_metadata() -> IndexMetadata {
IndexMetadata {
uid: S("doggo"),
primary_key: None,
created_at: datetime!(2022-11-20 12:00 UTC),
updated_at: datetime!(2022-11-21 00:00 UTC),
}
}
pub fn create_test_documents() -> Vec<Map<String, Value>> {
vec![
json!({ "id": 1, "race": "golden retriever", "name": "paul", "age": 4 })
.as_object()
.unwrap()
.clone(),
json!({ "id": 2, "race": "bernese mountain", "name": "tamo", "age": 6 })
.as_object()
.unwrap()
.clone(),
json!({ "id": 3, "race": "great pyrenees", "name": "patou", "age": 5 })
.as_object()
.unwrap()
.clone(),
]
}
pub fn create_test_settings() -> Settings<Checked> {
let settings = Settings {
displayed_attributes: Setting::Set(vec![S("race"), S("name")]),
searchable_attributes: Setting::Set(vec![S("name"), S("race")]),
filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }),
sortable_attributes: Setting::Set(btreeset! { S("age") }),
ranking_rules: Setting::NotSet,
stop_words: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
typo_tolerance: Setting::NotSet,
faceting: Setting::NotSet,
pagination: Setting::NotSet,
_kind: std::marker::PhantomData,
};
settings.check()
}
pub fn create_test_tasks() -> Vec<(TaskDump, Option<Vec<Document>>)> {
vec![
(
TaskDump {
uid: 0,
index_uid: Some(S("doggo")),
status: Status::Succeeded,
kind: KindDump::DocumentImport {
method: milli::update::IndexDocumentsMethod::UpdateDocuments,
allow_index_creation: true,
primary_key: Some(S("bone")),
documents_count: 12,
},
canceled_by: None,
details: Some(Details::DocumentAddition {
received_documents: 12,
indexed_documents: Some(10),
}),
error: None,
enqueued_at: datetime!(2022-11-11 0:00 UTC),
started_at: Some(datetime!(2022-11-20 0:00 UTC)),
finished_at: Some(datetime!(2022-11-21 0:00 UTC)),
},
None,
),
(
TaskDump {
uid: 1,
index_uid: Some(S("doggo")),
status: Status::Enqueued,
kind: KindDump::DocumentImport {
method: milli::update::IndexDocumentsMethod::UpdateDocuments,
allow_index_creation: true,
primary_key: None,
documents_count: 2,
},
canceled_by: None,
details: Some(Details::DocumentAddition {
received_documents: 2,
indexed_documents: None,
}),
error: None,
enqueued_at: datetime!(2022-11-11 0:00 UTC),
started_at: None,
finished_at: None,
},
Some(vec![
json!({ "id": 4, "race": "leonberg" }).as_object().unwrap().clone(),
json!({ "id": 5, "race": "patou" }).as_object().unwrap().clone(),
]),
),
(
TaskDump {
uid: 5,
index_uid: Some(S("catto")),
status: Status::Enqueued,
kind: KindDump::IndexDeletion,
canceled_by: None,
details: None,
error: None,
enqueued_at: datetime!(2022-11-15 0:00 UTC),
started_at: None,
finished_at: None,
},
None,
),
]
}
pub fn create_test_api_keys() -> Vec<Key> {
vec![
Key {
description: Some(S("The main key to manage all the doggos")),
name: Some(S("doggos_key")),
uid: Uuid::from_str("9f8a34da-b6b2-42f0-939b-dbd4c3448655").unwrap(),
actions: vec![Action::DocumentsAll],
indexes: vec![StarOr::Other(IndexUid::from_str("doggos").unwrap())],
expires_at: Some(datetime!(4130-03-14 12:21 UTC)),
created_at: datetime!(1960-11-15 0:00 UTC),
updated_at: datetime!(2022-11-10 0:00 UTC),
},
Key {
description: Some(S("The master key for everything and even the doggos")),
name: Some(S("master_key")),
uid: Uuid::from_str("4622f717-1c00-47bb-a494-39d76a49b591").unwrap(),
actions: vec![Action::All],
indexes: vec![StarOr::Star],
expires_at: None,
created_at: datetime!(0000-01-01 00:01 UTC),
updated_at: datetime!(1964-05-04 17:25 UTC),
},
Key {
description: Some(S("The useless key to for nothing nor the doggos")),
name: Some(S("useless_key")),
uid: Uuid::from_str("fb80b58b-0a34-412f-8ba7-1ce868f8ac5c").unwrap(),
actions: vec![],
indexes: vec![],
expires_at: None,
created_at: datetime!(400-02-29 0:00 UTC),
updated_at: datetime!(1024-02-29 0:00 UTC),
},
]
}
pub fn create_test_dump() -> File {
let instance_uid = create_test_instance_uid();
let dump = DumpWriter::new(Some(instance_uid.clone())).unwrap();
// ========== Adding an index
let documents = create_test_documents();
let settings = create_test_settings();
let mut index = dump.create_index("doggos", &create_test_index_metadata()).unwrap();
for document in &documents {
index.push_document(document).unwrap();
}
index.flush().unwrap();
index.settings(&settings).unwrap();
// ========== pushing the task queue
let tasks = create_test_tasks();
let mut task_queue = dump.create_tasks_queue().unwrap();
for (task, update_file) in &tasks {
let mut update = task_queue.push_task(task).unwrap();
if let Some(update_file) = update_file {
for u in update_file {
update.push_document(u).unwrap();
}
}
}
task_queue.flush().unwrap();
// ========== pushing the api keys
let api_keys = create_test_api_keys();
let mut keys = dump.create_keys().unwrap();
for key in &api_keys {
keys.push_key(key).unwrap();
}
keys.flush().unwrap();
// create the dump
let mut file = tempfile::tempfile().unwrap();
dump.persist_to(&mut file).unwrap();
file.seek(SeekFrom::Start(0)).unwrap();
file
}
#[test]
fn test_creating_and_read_dump() {
let mut file = create_test_dump();
let mut dump = DumpReader::open(&mut file).unwrap();
// ==== checking the top level infos
assert_eq!(dump.version(), Version::V6);
assert!(dump.date().is_some());
assert_eq!(dump.instance_uid().unwrap().unwrap(), create_test_instance_uid());
// ==== checking the index
let mut indexes = dump.indexes().unwrap();
let mut index = indexes.next().unwrap().unwrap();
assert!(indexes.next().is_none()); // there was only one index in the dump
for (document, expected) in index.documents().unwrap().zip(create_test_documents()) {
assert_eq!(document.unwrap(), expected);
}
assert_eq!(index.settings().unwrap(), create_test_settings());
assert_eq!(index.metadata(), &create_test_index_metadata());
drop(index);
drop(indexes);
// ==== checking the task queue
for (task, expected) in dump.tasks().unwrap().zip(create_test_tasks()) {
let (task, content_file) = task.unwrap();
assert_eq!(task, expected.0);
if let Some(expected_update) = expected.1 {
assert!(
content_file.is_some(),
"A content file was expected for the task {}.",
expected.0.uid
);
let updates = content_file.unwrap().collect::<Result<Vec<_>, _>>().unwrap();
assert_eq!(updates, expected_update);
}
}
// ==== checking the keys
for (key, expected) in dump.keys().unwrap().zip(create_test_api_keys()) {
assert_eq!(key.unwrap(), expected);
}
}
}

View File

@ -0,0 +1,4 @@
pub mod v2_to_v3;
pub mod v3_to_v4;
pub mod v4_to_v5;
pub mod v5_to_v6;

View File

@ -0,0 +1,478 @@
use std::convert::TryInto;
use std::str::FromStr;
use time::OffsetDateTime;
use uuid::Uuid;
use super::v3_to_v4::CompatV3ToV4;
use crate::reader::{v2, v3, Document};
use crate::Result;
pub struct CompatV2ToV3 {
pub from: v2::V2Reader,
}
impl CompatV2ToV3 {
pub fn new(v2: v2::V2Reader) -> CompatV2ToV3 {
CompatV2ToV3 { from: v2 }
}
pub fn index_uuid(&self) -> Vec<v3::meta::IndexUuid> {
self.from
.index_uuid()
.into_iter()
.map(|index| v3::meta::IndexUuid { uid: index.uid, uuid: index.uuid })
.collect()
}
pub fn to_v4(self) -> CompatV3ToV4 {
CompatV3ToV4::Compat(self)
}
pub fn version(&self) -> crate::Version {
self.from.version()
}
pub fn date(&self) -> Option<time::OffsetDateTime> {
self.from.date()
}
pub fn instance_uid(&self) -> Result<Option<uuid::Uuid>> {
Ok(None)
}
pub fn indexes(&self) -> Result<impl Iterator<Item = Result<CompatIndexV2ToV3>> + '_> {
Ok(self.from.indexes()?.map(|index_reader| -> Result<_> {
let compat = CompatIndexV2ToV3::new(index_reader?);
Ok(compat)
}))
}
pub fn tasks(
&mut self,
) -> Box<
dyn Iterator<Item = Result<(v3::Task, Option<Box<dyn Iterator<Item = Result<Document>>>>)>>
+ '_,
> {
let _indexes = self.from.index_uuid.clone();
Box::new(
self.from
.tasks()
.map(move |task| {
task.map(|(task, content_file)| {
let task = v3::Task { uuid: task.uuid, update: task.update.into() };
Some((
task,
content_file.map(|content_file| {
Box::new(content_file) as Box<dyn Iterator<Item = Result<Document>>>
}),
))
})
})
.filter_map(|res| res.transpose()),
)
}
}
pub struct CompatIndexV2ToV3 {
from: v2::V2IndexReader,
}
impl CompatIndexV2ToV3 {
pub fn new(v2: v2::V2IndexReader) -> CompatIndexV2ToV3 {
CompatIndexV2ToV3 { from: v2 }
}
pub fn metadata(&self) -> &crate::IndexMetadata {
self.from.metadata()
}
pub fn documents(&mut self) -> Result<Box<dyn Iterator<Item = Result<Document>> + '_>> {
self.from
.documents()
.map(|iter| Box::new(iter) as Box<dyn Iterator<Item = Result<Document>> + '_>)
}
pub fn settings(&mut self) -> Result<v3::Settings<v3::Checked>> {
Ok(v3::Settings::<v3::Unchecked>::from(self.from.settings()?).check())
}
}
impl From<v2::updates::UpdateStatus> for v3::updates::UpdateStatus {
fn from(update: v2::updates::UpdateStatus) -> Self {
match update {
v2::updates::UpdateStatus::Processing(processing) => {
match (processing.from.meta.clone(), processing.from.content).try_into() {
Ok(meta) => v3::updates::UpdateStatus::Processing(v3::updates::Processing {
from: v3::updates::Enqueued {
update_id: processing.from.update_id,
meta,
enqueued_at: processing.from.enqueued_at,
},
started_processing_at: processing.started_processing_at,
}),
Err(e) => {
log::warn!("Error with task {}: {}", processing.from.update_id, e);
log::warn!("Task will be marked as `Failed`.");
v3::updates::UpdateStatus::Failed(v3::updates::Failed {
from: v3::updates::Processing {
from: v3::updates::Enqueued {
update_id: processing.from.update_id,
meta: update_from_unchecked_update_meta(processing.from.meta),
enqueued_at: processing.from.enqueued_at,
},
started_processing_at: processing.started_processing_at,
},
msg: e.to_string(),
code: v3::Code::MalformedDump,
failed_at: OffsetDateTime::now_utc(),
})
}
}
}
v2::updates::UpdateStatus::Enqueued(enqueued) => {
match (enqueued.meta.clone(), enqueued.content).try_into() {
Ok(meta) => v3::updates::UpdateStatus::Enqueued(v3::updates::Enqueued {
update_id: enqueued.update_id,
meta,
enqueued_at: enqueued.enqueued_at,
}),
Err(e) => {
log::warn!("Error with task {}: {}", enqueued.update_id, e);
log::warn!("Task will be marked as `Failed`.");
v3::updates::UpdateStatus::Failed(v3::updates::Failed {
from: v3::updates::Processing {
from: v3::updates::Enqueued {
update_id: enqueued.update_id,
meta: update_from_unchecked_update_meta(enqueued.meta),
enqueued_at: enqueued.enqueued_at,
},
started_processing_at: OffsetDateTime::now_utc(),
},
msg: e.to_string(),
code: v3::Code::MalformedDump,
failed_at: OffsetDateTime::now_utc(),
})
}
}
}
v2::updates::UpdateStatus::Processed(processed) => {
v3::updates::UpdateStatus::Processed(v3::updates::Processed {
success: processed.success.into(),
processed_at: processed.processed_at,
from: v3::updates::Processing {
from: v3::updates::Enqueued {
update_id: processed.from.from.update_id,
// since we're never going to read the content_file again it's ok to generate a fake one.
meta: update_from_unchecked_update_meta(processed.from.from.meta),
enqueued_at: processed.from.from.enqueued_at,
},
started_processing_at: processed.from.started_processing_at,
},
})
}
v2::updates::UpdateStatus::Aborted(aborted) => {
v3::updates::UpdateStatus::Aborted(v3::updates::Aborted {
from: v3::updates::Enqueued {
update_id: aborted.from.update_id,
// since we're never going to read the content_file again it's ok to generate a fake one.
meta: update_from_unchecked_update_meta(aborted.from.meta),
enqueued_at: aborted.from.enqueued_at,
},
aborted_at: aborted.aborted_at,
})
}
v2::updates::UpdateStatus::Failed(failed) => {
v3::updates::UpdateStatus::Failed(v3::updates::Failed {
from: v3::updates::Processing {
from: v3::updates::Enqueued {
update_id: failed.from.from.update_id,
// since we're never going to read the content_file again it's ok to generate a fake one.
meta: update_from_unchecked_update_meta(failed.from.from.meta),
enqueued_at: failed.from.from.enqueued_at,
},
started_processing_at: failed.from.started_processing_at,
},
msg: failed.error.message,
code: failed.error.error_code.into(),
failed_at: failed.failed_at,
})
}
}
}
}
impl TryFrom<(v2::updates::UpdateMeta, Option<Uuid>)> for v3::updates::Update {
type Error = crate::Error;
fn try_from((update, uuid): (v2::updates::UpdateMeta, Option<Uuid>)) -> Result<Self> {
Ok(match update {
v2::updates::UpdateMeta::DocumentsAddition { method, format: _, primary_key }
if uuid.is_some() =>
{
v3::updates::Update::DocumentAddition {
primary_key,
method: match method {
v2::updates::IndexDocumentsMethod::ReplaceDocuments => {
v3::updates::IndexDocumentsMethod::ReplaceDocuments
}
v2::updates::IndexDocumentsMethod::UpdateDocuments => {
v3::updates::IndexDocumentsMethod::UpdateDocuments
}
},
content_uuid: uuid.unwrap(),
}
}
v2::updates::UpdateMeta::DocumentsAddition { .. } => {
return Err(crate::Error::MalformedTask)
}
v2::updates::UpdateMeta::ClearDocuments => v3::updates::Update::ClearDocuments,
v2::updates::UpdateMeta::DeleteDocuments { ids } => {
v3::updates::Update::DeleteDocuments(ids)
}
v2::updates::UpdateMeta::Settings(settings) => {
v3::updates::Update::Settings(settings.into())
}
})
}
}
pub fn update_from_unchecked_update_meta(update: v2::updates::UpdateMeta) -> v3::updates::Update {
match update {
v2::updates::UpdateMeta::DocumentsAddition { method, format: _, primary_key } => {
v3::updates::Update::DocumentAddition {
primary_key,
method: match method {
v2::updates::IndexDocumentsMethod::ReplaceDocuments => {
v3::updates::IndexDocumentsMethod::ReplaceDocuments
}
v2::updates::IndexDocumentsMethod::UpdateDocuments => {
v3::updates::IndexDocumentsMethod::UpdateDocuments
}
},
// we use this special uuid so we can recognize it if one day there is a bug related to this field.
content_uuid: Uuid::from_str("00112233-4455-6677-8899-aabbccddeeff").unwrap(),
}
}
v2::updates::UpdateMeta::ClearDocuments => v3::updates::Update::ClearDocuments,
v2::updates::UpdateMeta::DeleteDocuments { ids } => {
v3::updates::Update::DeleteDocuments(ids)
}
v2::updates::UpdateMeta::Settings(settings) => {
v3::updates::Update::Settings(settings.into())
}
}
}
impl From<v2::updates::UpdateResult> for v3::updates::UpdateResult {
fn from(result: v2::updates::UpdateResult) -> Self {
match result {
v2::updates::UpdateResult::DocumentsAddition(addition) => {
v3::updates::UpdateResult::DocumentsAddition(v3::updates::DocumentAdditionResult {
nb_documents: addition.nb_documents,
})
}
v2::updates::UpdateResult::DocumentDeletion { deleted } => {
v3::updates::UpdateResult::DocumentDeletion { deleted }
}
v2::updates::UpdateResult::Other => v3::updates::UpdateResult::Other,
}
}
}
impl From<String> for v3::Code {
fn from(code: String) -> Self {
match code.as_ref() {
"create_index" => v3::Code::CreateIndex,
"index_already_exists" => v3::Code::IndexAlreadyExists,
"index_not_found" => v3::Code::IndexNotFound,
"invalid_index_uid" => v3::Code::InvalidIndexUid,
"invalid_state" => v3::Code::InvalidState,
"missing_primary_key" => v3::Code::MissingPrimaryKey,
"primary_key_already_present" => v3::Code::PrimaryKeyAlreadyPresent,
"max_fields_limit_exceeded" => v3::Code::MaxFieldsLimitExceeded,
"missing_document_id" => v3::Code::MissingDocumentId,
"invalid_document_id" => v3::Code::InvalidDocumentId,
"filter" => v3::Code::Filter,
"sort" => v3::Code::Sort,
"bad_parameter" => v3::Code::BadParameter,
"bad_request" => v3::Code::BadRequest,
"database_size_limit_reached" => v3::Code::DatabaseSizeLimitReached,
"document_not_found" => v3::Code::DocumentNotFound,
"internal" => v3::Code::Internal,
"invalid_geo_field" => v3::Code::InvalidGeoField,
"invalid_ranking_rule" => v3::Code::InvalidRankingRule,
"invalid_store" => v3::Code::InvalidStore,
"invalid_token" => v3::Code::InvalidToken,
"missing_authorization_header" => v3::Code::MissingAuthorizationHeader,
"no_space_left_on_device" => v3::Code::NoSpaceLeftOnDevice,
"dump_not_found" => v3::Code::DumpNotFound,
"task_not_found" => v3::Code::TaskNotFound,
"payload_too_large" => v3::Code::PayloadTooLarge,
"retrieve_document" => v3::Code::RetrieveDocument,
"search_documents" => v3::Code::SearchDocuments,
"unsupported_media_type" => v3::Code::UnsupportedMediaType,
"dump_already_in_progress" => v3::Code::DumpAlreadyInProgress,
"dump_process_failed" => v3::Code::DumpProcessFailed,
"invalid_content_type" => v3::Code::InvalidContentType,
"missing_content_type" => v3::Code::MissingContentType,
"malformed_payload" => v3::Code::MalformedPayload,
"missing_payload" => v3::Code::MissingPayload,
other => {
log::warn!("Unknown error code {}", other);
v3::Code::UnretrievableErrorCode
}
}
}
}
fn option_to_setting<T>(opt: Option<Option<T>>) -> v3::Setting<T> {
match opt {
Some(Some(t)) => v3::Setting::Set(t),
None => v3::Setting::NotSet,
Some(None) => v3::Setting::Reset,
}
}
impl<T> From<v2::Settings<T>> for v3::Settings<v3::Unchecked> {
fn from(settings: v2::Settings<T>) -> Self {
v3::Settings {
displayed_attributes: option_to_setting(settings.displayed_attributes),
searchable_attributes: option_to_setting(settings.searchable_attributes),
filterable_attributes: option_to_setting(settings.filterable_attributes)
.map(|f| f.into_iter().collect()),
sortable_attributes: v3::Setting::NotSet,
ranking_rules: option_to_setting(settings.ranking_rules).map(|criteria| {
criteria.into_iter().map(|criterion| patch_ranking_rules(&criterion)).collect()
}),
stop_words: option_to_setting(settings.stop_words),
synonyms: option_to_setting(settings.synonyms),
distinct_attribute: option_to_setting(settings.distinct_attribute),
_kind: std::marker::PhantomData,
}
}
}
fn patch_ranking_rules(ranking_rule: &str) -> String {
match v2::settings::Criterion::from_str(ranking_rule) {
Ok(v2::settings::Criterion::Words) => String::from("words"),
Ok(v2::settings::Criterion::Typo) => String::from("typo"),
Ok(v2::settings::Criterion::Proximity) => String::from("proximity"),
Ok(v2::settings::Criterion::Attribute) => String::from("attribute"),
Ok(v2::settings::Criterion::Exactness) => String::from("exactness"),
Ok(v2::settings::Criterion::Asc(name)) => format!("{name}:asc"),
Ok(v2::settings::Criterion::Desc(name)) => format!("{name}:desc"),
// we want to forward the error to the current version of meilisearch
Err(_) => ranking_rule.to_string(),
}
}
#[cfg(test)]
pub(crate) mod test {
use std::fs::File;
use std::io::BufReader;
use flate2::bufread::GzDecoder;
use tempfile::TempDir;
use super::*;
#[test]
fn compat_v2_v3() {
let dump = File::open("tests/assets/v2.dump").unwrap();
let dir = TempDir::new().unwrap();
let mut dump = BufReader::new(dump);
let gz = GzDecoder::new(&mut dump);
let mut archive = tar::Archive::new(gz);
archive.unpack(dir.path()).unwrap();
let mut dump = v2::V2Reader::open(dir).unwrap().to_v3();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"9507711db47c7171c79bc6d57d0bed79");
assert_eq!(update_files.len(), 9);
assert!(update_files[0].is_some()); // the enqueued document addition
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
let update_file = update_files.remove(0).unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(update_file), @"7b8889539b669c7b9ddba448bafa385d");
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut products = indexes.pop().unwrap();
let mut movies2 = indexes.pop().unwrap();
let mut movies = indexes.pop().unwrap();
let mut spells = indexes.pop().unwrap();
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"f43338ecceeddd1ce13ffd55438b2347");
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"0d76c745cb334e8c20d6d6a14df733e1");
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 110);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022");
// movies2
insta::assert_json_snapshot!(movies2.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies_2",
"primaryKey": null,
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies2.settings()), @"09a2f7c571729f70f4cd93e24e8e3f28");
let documents = movies2.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 0);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
// spells
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"09a2f7c571729f70f4cd93e24e8e3f28");
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
}
}

View File

@ -0,0 +1,448 @@
use super::v2_to_v3::{CompatIndexV2ToV3, CompatV2ToV3};
use super::v4_to_v5::CompatV4ToV5;
use crate::reader::{v3, v4, UpdateFile};
use crate::Result;
pub enum CompatV3ToV4 {
V3(v3::V3Reader),
Compat(CompatV2ToV3),
}
impl CompatV3ToV4 {
pub fn new(v3: v3::V3Reader) -> CompatV3ToV4 {
CompatV3ToV4::V3(v3)
}
pub fn to_v5(self) -> CompatV4ToV5 {
CompatV4ToV5::Compat(self)
}
pub fn version(&self) -> crate::Version {
match self {
CompatV3ToV4::V3(v3) => v3.version(),
CompatV3ToV4::Compat(compat) => compat.version(),
}
}
pub fn date(&self) -> Option<time::OffsetDateTime> {
match self {
CompatV3ToV4::V3(v3) => v3.date(),
CompatV3ToV4::Compat(compat) => compat.date(),
}
}
pub fn instance_uid(&self) -> Result<Option<uuid::Uuid>> {
Ok(None)
}
pub fn indexes(&self) -> Result<impl Iterator<Item = Result<CompatIndexV3ToV4>> + '_> {
Ok(match self {
CompatV3ToV4::V3(v3) => {
Box::new(v3.indexes()?.map(|index| index.map(CompatIndexV3ToV4::from)))
as Box<dyn Iterator<Item = Result<CompatIndexV3ToV4>> + '_>
}
CompatV3ToV4::Compat(compat) => {
Box::new(compat.indexes()?.map(|index| index.map(CompatIndexV3ToV4::from)))
as Box<dyn Iterator<Item = Result<CompatIndexV3ToV4>> + '_>
}
})
}
pub fn tasks(
&mut self,
) -> Box<dyn Iterator<Item = Result<(v4::Task, Option<Box<UpdateFile>>)>> + '_> {
let indexes = match self {
CompatV3ToV4::V3(v3) => v3.index_uuid(),
CompatV3ToV4::Compat(compat) => compat.index_uuid(),
};
let tasks = match self {
CompatV3ToV4::V3(v3) => v3.tasks(),
CompatV3ToV4::Compat(compat) => compat.tasks(),
};
Box::new(
tasks
// we need to override the old task ids that were generated
// by index in favor of a global unique incremental ID.
.enumerate()
.map(move |(task_id, task)| {
task.map(|(task, content_file)| {
let index_uid = indexes
.iter()
.find(|index| index.uuid == task.uuid)
.map(|index| index.uid.clone());
let index_uid = match index_uid {
Some(uid) => uid.to_string(),
None => {
log::warn!(
"Error while importing the update {}.",
task.update.id()
);
log::warn!(
"The index associated to the uuid `{}` could not be retrieved.",
task.uuid.to_string()
);
if task.update.is_finished() {
// we're fucking with his history but not his data, that's ok-ish.
log::warn!("The index-uuid will be set as `unknown`.");
String::from("unknown")
} else {
log::warn!("The task will be ignored.");
return None;
}
}
};
let task = v4::Task {
id: task_id as u32,
index_uid: v4::meta::IndexUid(index_uid),
content: match task.update.meta() {
v3::Kind::DeleteDocuments(documents) => {
v4::tasks::TaskContent::DocumentDeletion(
v4::tasks::DocumentDeletion::Ids(documents.clone()),
)
}
v3::Kind::DocumentAddition {
primary_key,
method,
content_uuid,
} => v4::tasks::TaskContent::DocumentAddition {
merge_strategy: match method {
v3::updates::IndexDocumentsMethod::ReplaceDocuments => {
v4::tasks::IndexDocumentsMethod::ReplaceDocuments
}
v3::updates::IndexDocumentsMethod::UpdateDocuments => {
v4::tasks::IndexDocumentsMethod::UpdateDocuments
}
},
primary_key: primary_key.clone(),
documents_count: 0, // we don't have this info
allow_index_creation: true, // there was no API-key in the v3
content_uuid: content_uuid.clone(),
},
v3::Kind::Settings(settings) => {
v4::tasks::TaskContent::SettingsUpdate {
settings: v4::Settings::from(settings.clone()),
is_deletion: false, // that didn't exist at this time
allow_index_creation: true, // there was no API-key in the v3
}
}
v3::Kind::ClearDocuments => {
v4::tasks::TaskContent::DocumentDeletion(
v4::tasks::DocumentDeletion::Clear,
)
}
},
events: match task.update {
v3::Status::Processing(processing) => {
vec![v4::tasks::TaskEvent::Created(processing.from.enqueued_at)]
}
v3::Status::Enqueued(enqueued) => {
vec![v4::tasks::TaskEvent::Created(enqueued.enqueued_at)]
}
v3::Status::Processed(processed) => {
vec![
v4::tasks::TaskEvent::Created(
processed.from.from.enqueued_at,
),
v4::tasks::TaskEvent::Processing(
processed.from.started_processing_at,
),
v4::tasks::TaskEvent::Succeded {
result: match processed.success {
v3::updates::UpdateResult::DocumentsAddition(
document_addition,
) => v4::tasks::TaskResult::DocumentAddition {
indexed_documents: document_addition
.nb_documents
as u64,
},
v3::updates::UpdateResult::DocumentDeletion {
deleted,
} => v4::tasks::TaskResult::DocumentDeletion {
deleted_documents: deleted,
},
v3::updates::UpdateResult::Other => {
v4::tasks::TaskResult::Other
}
},
timestamp: processed.processed_at,
},
]
}
v3::Status::Failed(failed) => vec![
v4::tasks::TaskEvent::Created(failed.from.from.enqueued_at),
v4::tasks::TaskEvent::Processing(
failed.from.started_processing_at,
),
v4::tasks::TaskEvent::Failed {
error: v4::ResponseError::from_msg(
failed.msg.to_string(),
failed.code.into(),
),
timestamp: failed.failed_at,
},
],
v3::Status::Aborted(aborted) => vec![
v4::tasks::TaskEvent::Created(aborted.from.enqueued_at),
v4::tasks::TaskEvent::Failed {
error: v4::ResponseError::from_msg(
"Task was aborted in a previous version of meilisearch."
.to_string(),
v4::errors::Code::UnretrievableErrorCode,
),
timestamp: aborted.aborted_at,
},
],
},
};
Some((task, content_file))
})
})
.filter_map(|res| res.transpose()),
)
}
pub fn keys(&mut self) -> Box<dyn Iterator<Item = Result<v4::Key>> + '_> {
Box::new(std::iter::empty())
}
}
pub enum CompatIndexV3ToV4 {
V3(v3::V3IndexReader),
Compat(CompatIndexV2ToV3),
}
impl From<v3::V3IndexReader> for CompatIndexV3ToV4 {
fn from(index_reader: v3::V3IndexReader) -> Self {
Self::V3(index_reader)
}
}
impl From<CompatIndexV2ToV3> for CompatIndexV3ToV4 {
fn from(index_reader: CompatIndexV2ToV3) -> Self {
Self::Compat(index_reader)
}
}
impl CompatIndexV3ToV4 {
pub fn new(v3: v3::V3IndexReader) -> CompatIndexV3ToV4 {
CompatIndexV3ToV4::V3(v3)
}
pub fn metadata(&self) -> &crate::IndexMetadata {
match self {
CompatIndexV3ToV4::V3(v3) => v3.metadata(),
CompatIndexV3ToV4::Compat(compat) => compat.metadata(),
}
}
pub fn documents(&mut self) -> Result<Box<dyn Iterator<Item = Result<v4::Document>> + '_>> {
match self {
CompatIndexV3ToV4::V3(v3) => v3
.documents()
.map(|iter| Box::new(iter) as Box<dyn Iterator<Item = Result<v4::Document>> + '_>),
CompatIndexV3ToV4::Compat(compat) => compat
.documents()
.map(|iter| Box::new(iter) as Box<dyn Iterator<Item = Result<v4::Document>> + '_>),
}
}
pub fn settings(&mut self) -> Result<v4::Settings<v4::Checked>> {
Ok(match self {
CompatIndexV3ToV4::V3(v3) => {
v4::Settings::<v4::Unchecked>::from(v3.settings()?).check()
}
CompatIndexV3ToV4::Compat(compat) => {
v4::Settings::<v4::Unchecked>::from(compat.settings()?).check()
}
})
}
}
impl<T> From<v3::Setting<T>> for v4::Setting<T> {
fn from(setting: v3::Setting<T>) -> Self {
match setting {
v3::Setting::Set(t) => v4::Setting::Set(t),
v3::Setting::Reset => v4::Setting::Reset,
v3::Setting::NotSet => v4::Setting::NotSet,
}
}
}
impl From<v3::Code> for v4::Code {
fn from(code: v3::Code) -> Self {
match code {
v3::Code::CreateIndex => v4::Code::CreateIndex,
v3::Code::IndexAlreadyExists => v4::Code::IndexAlreadyExists,
v3::Code::IndexNotFound => v4::Code::IndexNotFound,
v3::Code::InvalidIndexUid => v4::Code::InvalidIndexUid,
v3::Code::InvalidState => v4::Code::InvalidState,
v3::Code::MissingPrimaryKey => v4::Code::MissingPrimaryKey,
v3::Code::PrimaryKeyAlreadyPresent => v4::Code::PrimaryKeyAlreadyPresent,
v3::Code::MaxFieldsLimitExceeded => v4::Code::MaxFieldsLimitExceeded,
v3::Code::MissingDocumentId => v4::Code::MissingDocumentId,
v3::Code::InvalidDocumentId => v4::Code::InvalidDocumentId,
v3::Code::Filter => v4::Code::Filter,
v3::Code::Sort => v4::Code::Sort,
v3::Code::BadParameter => v4::Code::BadParameter,
v3::Code::BadRequest => v4::Code::BadRequest,
v3::Code::DatabaseSizeLimitReached => v4::Code::DatabaseSizeLimitReached,
v3::Code::DocumentNotFound => v4::Code::DocumentNotFound,
v3::Code::Internal => v4::Code::Internal,
v3::Code::InvalidGeoField => v4::Code::InvalidGeoField,
v3::Code::InvalidRankingRule => v4::Code::InvalidRankingRule,
v3::Code::InvalidStore => v4::Code::InvalidStore,
v3::Code::InvalidToken => v4::Code::InvalidToken,
v3::Code::MissingAuthorizationHeader => v4::Code::MissingAuthorizationHeader,
v3::Code::NoSpaceLeftOnDevice => v4::Code::NoSpaceLeftOnDevice,
v3::Code::DumpNotFound => v4::Code::DumpNotFound,
v3::Code::TaskNotFound => v4::Code::TaskNotFound,
v3::Code::PayloadTooLarge => v4::Code::PayloadTooLarge,
v3::Code::RetrieveDocument => v4::Code::RetrieveDocument,
v3::Code::SearchDocuments => v4::Code::SearchDocuments,
v3::Code::UnsupportedMediaType => v4::Code::UnsupportedMediaType,
v3::Code::DumpAlreadyInProgress => v4::Code::DumpAlreadyInProgress,
v3::Code::DumpProcessFailed => v4::Code::DumpProcessFailed,
v3::Code::InvalidContentType => v4::Code::InvalidContentType,
v3::Code::MissingContentType => v4::Code::MissingContentType,
v3::Code::MalformedPayload => v4::Code::MalformedPayload,
v3::Code::MissingPayload => v4::Code::MissingPayload,
v3::Code::UnretrievableErrorCode => v4::Code::UnretrievableErrorCode,
v3::Code::MalformedDump => v4::Code::MalformedDump,
}
}
}
impl<T> From<v3::Settings<T>> for v4::Settings<v4::Unchecked> {
fn from(settings: v3::Settings<T>) -> Self {
v4::Settings {
displayed_attributes: settings.displayed_attributes.into(),
searchable_attributes: settings.searchable_attributes.into(),
filterable_attributes: settings.filterable_attributes.into(),
sortable_attributes: settings.sortable_attributes.into(),
ranking_rules: settings.ranking_rules.into(),
stop_words: settings.stop_words.into(),
synonyms: settings.synonyms.into(),
distinct_attribute: settings.distinct_attribute.into(),
typo_tolerance: v4::Setting::NotSet,
_kind: std::marker::PhantomData,
}
}
}
#[cfg(test)]
pub(crate) mod test {
use std::fs::File;
use std::io::BufReader;
use flate2::bufread::GzDecoder;
use tempfile::TempDir;
use super::*;
#[test]
fn compat_v3_v4() {
let dump = File::open("tests/assets/v3.dump").unwrap();
let dir = TempDir::new().unwrap();
let mut dump = BufReader::new(dump);
let gz = GzDecoder::new(&mut dump);
let mut archive = tar::Archive::new(gz);
archive.unpack(dir.path()).unwrap();
let mut dump = v3::V3Reader::open(dir).unwrap().to_v4();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"79bc053583a1a7172bbaaafb1edaeb78");
assert_eq!(update_files.len(), 10);
assert!(update_files[0].is_some()); // the enqueued document addition
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
let update_file = update_files.remove(0).unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(update_file), @"7b8889539b669c7b9ddba448bafa385d");
// keys
let keys = dump.keys().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(keys, { "[].uid" => "[uuid]" }), @"d751713988987e9331980363e24189ce");
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut products = indexes.pop().unwrap();
let mut movies2 = indexes.pop().unwrap();
let mut movies = indexes.pop().unwrap();
let mut spells = indexes.pop().unwrap();
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"ea46dd6b58c5e1d65c1c8159a32695ea");
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"4df4074ef6bfb71e8dc66d08ff8c9dfd");
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 110);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022");
// movies2
insta::assert_json_snapshot!(movies2.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies_2",
"primaryKey": null,
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies2.settings()), @"24eaf4046d9718dabff36f35103352d4");
let documents = movies2.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 0);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
// spells
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"24eaf4046d9718dabff36f35103352d4");
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
}
}

View File

@ -0,0 +1,466 @@
use super::v3_to_v4::{CompatIndexV3ToV4, CompatV3ToV4};
use super::v5_to_v6::CompatV5ToV6;
use crate::reader::{v4, v5, Document};
use crate::Result;
pub enum CompatV4ToV5 {
V4(v4::V4Reader),
Compat(CompatV3ToV4),
}
impl CompatV4ToV5 {
pub fn new(v4: v4::V4Reader) -> CompatV4ToV5 {
CompatV4ToV5::V4(v4)
}
pub fn to_v6(self) -> CompatV5ToV6 {
CompatV5ToV6::Compat(self)
}
pub fn version(&self) -> crate::Version {
match self {
CompatV4ToV5::V4(v4) => v4.version(),
CompatV4ToV5::Compat(compat) => compat.version(),
}
}
pub fn date(&self) -> Option<time::OffsetDateTime> {
match self {
CompatV4ToV5::V4(v4) => v4.date(),
CompatV4ToV5::Compat(compat) => compat.date(),
}
}
pub fn instance_uid(&self) -> Result<Option<uuid::Uuid>> {
match self {
CompatV4ToV5::V4(v4) => v4.instance_uid(),
CompatV4ToV5::Compat(compat) => compat.instance_uid(),
}
}
pub fn indexes(&self) -> Result<Box<dyn Iterator<Item = Result<CompatIndexV4ToV5>> + '_>> {
Ok(match self {
CompatV4ToV5::V4(v4) => {
Box::new(v4.indexes()?.map(|index| index.map(CompatIndexV4ToV5::from)))
as Box<dyn Iterator<Item = Result<CompatIndexV4ToV5>> + '_>
}
CompatV4ToV5::Compat(compat) => {
Box::new(compat.indexes()?.map(|index| index.map(CompatIndexV4ToV5::from)))
as Box<dyn Iterator<Item = Result<CompatIndexV4ToV5>> + '_>
}
})
}
pub fn tasks(
&mut self,
) -> Box<dyn Iterator<Item = Result<(v5::Task, Option<Box<crate::reader::UpdateFile>>)>> + '_>
{
let tasks = match self {
CompatV4ToV5::V4(v4) => v4.tasks(),
CompatV4ToV5::Compat(compat) => compat.tasks(),
};
Box::new(tasks.map(|task| {
task.map(|(task, content_file)| {
let task = v5::Task {
id: task.id,
content: match task.content {
v4::tasks::TaskContent::DocumentAddition {
content_uuid,
merge_strategy,
primary_key,
documents_count,
allow_index_creation,
} => v5::tasks::TaskContent::DocumentAddition {
index_uid: v5::meta::IndexUid(task.index_uid.0),
content_uuid,
merge_strategy: match merge_strategy {
v4::tasks::IndexDocumentsMethod::ReplaceDocuments => {
v5::tasks::IndexDocumentsMethod::ReplaceDocuments
}
v4::tasks::IndexDocumentsMethod::UpdateDocuments => {
v5::tasks::IndexDocumentsMethod::UpdateDocuments
}
},
primary_key,
documents_count,
allow_index_creation,
},
v4::tasks::TaskContent::DocumentDeletion(deletion) => {
v5::tasks::TaskContent::DocumentDeletion {
index_uid: v5::meta::IndexUid(task.index_uid.0),
deletion: match deletion {
v4::tasks::DocumentDeletion::Clear => {
v5::tasks::DocumentDeletion::Clear
}
v4::tasks::DocumentDeletion::Ids(ids) => {
v5::tasks::DocumentDeletion::Ids(ids)
}
},
}
}
v4::tasks::TaskContent::SettingsUpdate {
settings,
is_deletion,
allow_index_creation,
} => v5::tasks::TaskContent::SettingsUpdate {
index_uid: v5::meta::IndexUid(task.index_uid.0),
settings: settings.into(),
is_deletion,
allow_index_creation,
},
v4::tasks::TaskContent::IndexDeletion => {
v5::tasks::TaskContent::IndexDeletion {
index_uid: v5::meta::IndexUid(task.index_uid.0),
}
}
v4::tasks::TaskContent::IndexCreation { primary_key } => {
v5::tasks::TaskContent::IndexCreation {
index_uid: v5::meta::IndexUid(task.index_uid.0),
primary_key,
}
}
v4::tasks::TaskContent::IndexUpdate { primary_key } => {
v5::tasks::TaskContent::IndexUpdate {
index_uid: v5::meta::IndexUid(task.index_uid.0),
primary_key,
}
}
},
events: task
.events
.into_iter()
.map(|event| match event {
v4::tasks::TaskEvent::Created(date) => {
v5::tasks::TaskEvent::Created(date)
}
v4::tasks::TaskEvent::Batched { timestamp, batch_id } => {
v5::tasks::TaskEvent::Batched { timestamp, batch_id }
}
v4::tasks::TaskEvent::Processing(date) => {
v5::tasks::TaskEvent::Processing(date)
}
v4::tasks::TaskEvent::Succeded { result, timestamp } => {
v5::tasks::TaskEvent::Succeeded {
result: match result {
v4::tasks::TaskResult::DocumentAddition {
indexed_documents,
} => v5::tasks::TaskResult::DocumentAddition {
indexed_documents,
},
v4::tasks::TaskResult::DocumentDeletion {
deleted_documents,
} => v5::tasks::TaskResult::DocumentDeletion {
deleted_documents,
},
v4::tasks::TaskResult::ClearAll { deleted_documents } => {
v5::tasks::TaskResult::ClearAll { deleted_documents }
}
v4::tasks::TaskResult::Other => {
v5::tasks::TaskResult::Other
}
},
timestamp,
}
}
v4::tasks::TaskEvent::Failed { error, timestamp } => {
v5::tasks::TaskEvent::Failed {
error: v5::ResponseError::from(error),
timestamp,
}
}
})
.collect(),
};
(task, content_file)
})
}))
}
pub fn keys(&mut self) -> Box<dyn Iterator<Item = Result<v5::Key>> + '_> {
let keys = match self {
CompatV4ToV5::V4(v4) => v4.keys(),
CompatV4ToV5::Compat(compat) => compat.keys(),
};
Box::new(keys.map(|key| {
key.map(|key| v5::Key {
description: key.description,
name: None,
uid: v5::keys::KeyId::new_v4(),
actions: key.actions.into_iter().filter_map(|action| action.into()).collect(),
indexes: key
.indexes
.into_iter()
.map(|index| match index.as_str() {
"*" => v5::StarOr::Star,
_ => v5::StarOr::Other(v5::meta::IndexUid(index)),
})
.collect(),
expires_at: key.expires_at,
created_at: key.created_at,
updated_at: key.updated_at,
})
}))
}
}
pub enum CompatIndexV4ToV5 {
V4(v4::V4IndexReader),
Compat(CompatIndexV3ToV4),
}
impl From<v4::V4IndexReader> for CompatIndexV4ToV5 {
fn from(index_reader: v4::V4IndexReader) -> Self {
Self::V4(index_reader)
}
}
impl From<CompatIndexV3ToV4> for CompatIndexV4ToV5 {
fn from(index_reader: CompatIndexV3ToV4) -> Self {
Self::Compat(index_reader)
}
}
impl CompatIndexV4ToV5 {
pub fn metadata(&self) -> &crate::IndexMetadata {
match self {
CompatIndexV4ToV5::V4(v4) => v4.metadata(),
CompatIndexV4ToV5::Compat(compat) => compat.metadata(),
}
}
pub fn documents(&mut self) -> Result<Box<dyn Iterator<Item = Result<Document>> + '_>> {
match self {
CompatIndexV4ToV5::V4(v4) => v4
.documents()
.map(|iter| Box::new(iter) as Box<dyn Iterator<Item = Result<Document>> + '_>),
CompatIndexV4ToV5::Compat(compat) => compat
.documents()
.map(|iter| Box::new(iter) as Box<dyn Iterator<Item = Result<Document>> + '_>),
}
}
pub fn settings(&mut self) -> Result<v5::Settings<v5::Checked>> {
match self {
CompatIndexV4ToV5::V4(v4) => Ok(v5::Settings::from(v4.settings()?).check()),
CompatIndexV4ToV5::Compat(compat) => Ok(v5::Settings::from(compat.settings()?).check()),
}
}
}
impl<T> From<v4::Setting<T>> for v5::Setting<T> {
fn from(setting: v4::Setting<T>) -> Self {
match setting {
v4::Setting::Set(t) => v5::Setting::Set(t),
v4::Setting::Reset => v5::Setting::Reset,
v4::Setting::NotSet => v5::Setting::NotSet,
}
}
}
impl From<v4::ResponseError> for v5::ResponseError {
fn from(error: v4::ResponseError) -> Self {
let code = match error.error_code.as_ref() {
"index_creation_failed" => v5::Code::CreateIndex,
"index_already_exists" => v5::Code::IndexAlreadyExists,
"index_not_found" => v5::Code::IndexNotFound,
"invalid_index_uid" => v5::Code::InvalidIndexUid,
"invalid_min_word_length_for_typo" => v5::Code::InvalidMinWordLengthForTypo,
"invalid_state" => v5::Code::InvalidState,
"primary_key_inference_failed" => v5::Code::MissingPrimaryKey,
"index_primary_key_already_exists" => v5::Code::PrimaryKeyAlreadyPresent,
"max_fields_limit_exceeded" => v5::Code::MaxFieldsLimitExceeded,
"missing_document_id" => v5::Code::MissingDocumentId,
"invalid_document_id" => v5::Code::InvalidDocumentId,
"invalid_filter" => v5::Code::Filter,
"invalid_sort" => v5::Code::Sort,
"bad_parameter" => v5::Code::BadParameter,
"bad_request" => v5::Code::BadRequest,
"database_size_limit_reached" => v5::Code::DatabaseSizeLimitReached,
"document_not_found" => v5::Code::DocumentNotFound,
"internal" => v5::Code::Internal,
"invalid_geo_field" => v5::Code::InvalidGeoField,
"invalid_ranking_rule" => v5::Code::InvalidRankingRule,
"invalid_store_file" => v5::Code::InvalidStore,
"invalid_api_key" => v5::Code::InvalidToken,
"missing_authorization_header" => v5::Code::MissingAuthorizationHeader,
"no_space_left_on_device" => v5::Code::NoSpaceLeftOnDevice,
"dump_not_found" => v5::Code::DumpNotFound,
"task_not_found" => v5::Code::TaskNotFound,
"payload_too_large" => v5::Code::PayloadTooLarge,
"unretrievable_document" => v5::Code::RetrieveDocument,
"search_error" => v5::Code::SearchDocuments,
"unsupported_media_type" => v5::Code::UnsupportedMediaType,
"dump_already_processing" => v5::Code::DumpAlreadyInProgress,
"dump_process_failed" => v5::Code::DumpProcessFailed,
"invalid_content_type" => v5::Code::InvalidContentType,
"missing_content_type" => v5::Code::MissingContentType,
"malformed_payload" => v5::Code::MalformedPayload,
"missing_payload" => v5::Code::MissingPayload,
"api_key_not_found" => v5::Code::ApiKeyNotFound,
"missing_parameter" => v5::Code::MissingParameter,
"invalid_api_key_actions" => v5::Code::InvalidApiKeyActions,
"invalid_api_key_indexes" => v5::Code::InvalidApiKeyIndexes,
"invalid_api_key_expires_at" => v5::Code::InvalidApiKeyExpiresAt,
"invalid_api_key_description" => v5::Code::InvalidApiKeyDescription,
other => {
log::warn!("Unknown error code {}", other);
v5::Code::UnretrievableErrorCode
}
};
v5::ResponseError::from_msg(error.message, code)
}
}
impl<T> From<v4::Settings<T>> for v5::Settings<v5::Unchecked> {
fn from(settings: v4::Settings<T>) -> Self {
v5::Settings {
displayed_attributes: settings.displayed_attributes.into(),
searchable_attributes: settings.searchable_attributes.into(),
filterable_attributes: settings.filterable_attributes.into(),
sortable_attributes: settings.sortable_attributes.into(),
ranking_rules: settings.ranking_rules.into(),
stop_words: settings.stop_words.into(),
synonyms: settings.synonyms.into(),
distinct_attribute: settings.distinct_attribute.into(),
typo_tolerance: match settings.typo_tolerance {
v4::Setting::Set(typo) => v5::Setting::Set(v5::TypoTolerance {
enabled: typo.enabled.into(),
min_word_size_for_typos: match typo.min_word_size_for_typos {
v4::Setting::Set(t) => v5::Setting::Set(v5::MinWordSizeForTypos {
one_typo: t.one_typo.into(),
two_typos: t.two_typos.into(),
}),
v4::Setting::Reset => v5::Setting::Reset,
v4::Setting::NotSet => v5::Setting::NotSet,
},
disable_on_words: typo.disable_on_words.into(),
disable_on_attributes: typo.disable_on_attributes.into(),
}),
v4::Setting::Reset => v5::Setting::Reset,
v4::Setting::NotSet => v5::Setting::NotSet,
},
faceting: v5::Setting::NotSet,
pagination: v5::Setting::NotSet,
_kind: std::marker::PhantomData,
}
}
}
impl From<v4::Action> for Option<v5::Action> {
fn from(key: v4::Action) -> Self {
match key {
v4::Action::All => Some(v5::Action::All),
v4::Action::Search => Some(v5::Action::Search),
v4::Action::DocumentsAdd => Some(v5::Action::DocumentsAdd),
v4::Action::DocumentsGet => Some(v5::Action::DocumentsGet),
v4::Action::DocumentsDelete => Some(v5::Action::DocumentsDelete),
v4::Action::IndexesAdd => Some(v5::Action::IndexesAdd),
v4::Action::IndexesGet => Some(v5::Action::IndexesGet),
v4::Action::IndexesUpdate => Some(v5::Action::IndexesUpdate),
v4::Action::IndexesDelete => Some(v5::Action::IndexesDelete),
v4::Action::TasksGet => Some(v5::Action::TasksGet),
v4::Action::SettingsGet => Some(v5::Action::SettingsGet),
v4::Action::SettingsUpdate => Some(v5::Action::SettingsUpdate),
v4::Action::StatsGet => Some(v5::Action::StatsGet),
v4::Action::DumpsCreate => Some(v5::Action::DumpsCreate),
v4::Action::DumpsGet => None,
v4::Action::Version => Some(v5::Action::Version),
}
}
}
#[cfg(test)]
pub(crate) mod test {
use std::fs::File;
use std::io::BufReader;
use flate2::bufread::GzDecoder;
use tempfile::TempDir;
use super::*;
#[test]
fn compat_v4_v5() {
let dump = File::open("tests/assets/v4.dump").unwrap();
let dir = TempDir::new().unwrap();
let mut dump = BufReader::new(dump);
let gz = GzDecoder::new(&mut dump);
let mut archive = tar::Archive::new(gz);
archive.unpack(dir.path()).unwrap();
let mut dump = v4::V4Reader::open(dir).unwrap().to_v5();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"ed9a30cded4c046ef46f7cff7450347e");
assert_eq!(update_files.len(), 10);
assert!(update_files[0].is_some()); // the enqueued document addition
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
// keys
let keys = dump.keys().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(keys, { "[].uid" => "[uuid]" }), @"1384361d734fd77c23804c9696228660");
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut products = indexes.pop().unwrap();
let mut movies = indexes.pop().unwrap();
let mut spells = indexes.pop().unwrap();
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"ed1a6977a832b1ab49cd5068b77ce498");
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca");
// movies
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"70681af1d52411218036fbd5a9b94ab5");
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 110);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"786022a66ecb992c8a2a60fee070a5ab");
// spells
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"7019bb8f146004dcdd91fc3c3254b742");
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
}
}

View File

@ -0,0 +1,481 @@
use super::v4_to_v5::{CompatIndexV4ToV5, CompatV4ToV5};
use crate::reader::{v5, v6, Document, UpdateFile};
use crate::Result;
pub enum CompatV5ToV6 {
V5(v5::V5Reader),
Compat(CompatV4ToV5),
}
impl CompatV5ToV6 {
pub fn new_v5(v5: v5::V5Reader) -> CompatV5ToV6 {
CompatV5ToV6::V5(v5)
}
pub fn version(&self) -> crate::Version {
match self {
CompatV5ToV6::V5(v5) => v5.version(),
CompatV5ToV6::Compat(compat) => compat.version(),
}
}
pub fn date(&self) -> Option<time::OffsetDateTime> {
match self {
CompatV5ToV6::V5(v5) => v5.date(),
CompatV5ToV6::Compat(compat) => compat.date(),
}
}
pub fn instance_uid(&self) -> Result<Option<uuid::Uuid>> {
match self {
CompatV5ToV6::V5(v5) => v5.instance_uid(),
CompatV5ToV6::Compat(compat) => compat.instance_uid(),
}
}
pub fn indexes(&self) -> Result<Box<dyn Iterator<Item = Result<CompatIndexV5ToV6>> + '_>> {
let indexes = match self {
CompatV5ToV6::V5(v5) => {
Box::new(v5.indexes()?.map(|index| index.map(CompatIndexV5ToV6::from)))
as Box<dyn Iterator<Item = Result<CompatIndexV5ToV6>> + '_>
}
CompatV5ToV6::Compat(compat) => {
Box::new(compat.indexes()?.map(|index| index.map(CompatIndexV5ToV6::from)))
as Box<dyn Iterator<Item = Result<CompatIndexV5ToV6>> + '_>
}
};
Ok(indexes)
}
pub fn tasks(
&mut self,
) -> Result<Box<dyn Iterator<Item = Result<(v6::Task, Option<Box<UpdateFile>>)>> + '_>> {
let instance_uid = self.instance_uid().ok().flatten().map(|uid| uid.clone());
let keys = self.keys()?.collect::<Result<Vec<_>>>()?;
let tasks = match self {
CompatV5ToV6::V5(v5) => v5.tasks(),
CompatV5ToV6::Compat(compat) => compat.tasks(),
};
Ok(Box::new(tasks.map(move |task| {
task.and_then(|(task, content_file)| {
let task_view: v5::tasks::TaskView = task.clone().into();
let task = v6::Task {
uid: task_view.uid,
index_uid: task_view.index_uid,
status: match task_view.status {
v5::Status::Enqueued => v6::Status::Enqueued,
v5::Status::Processing => v6::Status::Enqueued,
v5::Status::Succeeded => v6::Status::Succeeded,
v5::Status::Failed => v6::Status::Failed,
},
kind: match task.content.clone() {
v5::tasks::TaskContent::IndexCreation { primary_key, .. } => {
v6::Kind::IndexCreation { primary_key }
}
v5::tasks::TaskContent::IndexUpdate { primary_key, .. } => {
v6::Kind::IndexUpdate { primary_key }
}
v5::tasks::TaskContent::IndexDeletion { .. } => v6::Kind::IndexDeletion,
v5::tasks::TaskContent::DocumentAddition {
merge_strategy,
allow_index_creation,
primary_key,
documents_count,
..
} => v6::Kind::DocumentImport {
primary_key,
documents_count: documents_count as u64,
method: match merge_strategy {
v5::tasks::IndexDocumentsMethod::ReplaceDocuments => {
v6::milli::update::IndexDocumentsMethod::ReplaceDocuments
}
v5::tasks::IndexDocumentsMethod::UpdateDocuments => {
v6::milli::update::IndexDocumentsMethod::UpdateDocuments
}
},
allow_index_creation: allow_index_creation.clone(),
},
v5::tasks::TaskContent::DocumentDeletion { deletion, .. } => match deletion
{
v5::tasks::DocumentDeletion::Clear => v6::Kind::DocumentClear,
v5::tasks::DocumentDeletion::Ids(documents_ids) => {
v6::Kind::DocumentDeletion { documents_ids }
}
},
v5::tasks::TaskContent::SettingsUpdate {
allow_index_creation,
is_deletion,
settings,
..
} => v6::Kind::Settings {
is_deletion,
allow_index_creation,
settings: settings.into(),
},
v5::tasks::TaskContent::Dump { uid } => v6::Kind::DumpExport {
dump_uid: uid,
keys: keys.clone(),
instance_uid: instance_uid.clone(),
},
},
canceled_by: None,
details: task_view.details.map(|details| match details {
v5::Details::DocumentAddition { received_documents, indexed_documents } => {
v6::Details::DocumentAddition {
received_documents: received_documents as u64,
indexed_documents: indexed_documents.map(|i| i as u64),
}
}
v5::Details::Settings { settings } => {
v6::Details::Settings { settings: settings.into() }
}
v5::Details::IndexInfo { primary_key } => {
v6::Details::IndexInfo { primary_key }
}
v5::Details::DocumentDeletion {
received_document_ids,
deleted_documents,
} => v6::Details::DocumentDeletion {
received_document_ids,
deleted_documents,
},
v5::Details::ClearAll { deleted_documents } => {
v6::Details::ClearAll { deleted_documents }
}
v5::Details::Dump { dump_uid } => v6::Details::Dump { dump_uid },
}),
error: task_view.error.map(|e| e.into()),
enqueued_at: task_view.enqueued_at,
started_at: task_view.started_at,
finished_at: task_view.finished_at,
};
Ok((task, content_file))
})
})))
}
pub fn keys(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Key>> + '_>> {
let keys = match self {
CompatV5ToV6::V5(v5) => v5.keys()?,
CompatV5ToV6::Compat(compat) => compat.keys(),
};
Ok(Box::new(keys.map(|key| {
key.map(|key| v6::Key {
description: key.description,
name: key.name,
uid: key.uid,
actions: key.actions.into_iter().map(|action| action.into()).collect(),
indexes: key
.indexes
.into_iter()
.map(|index| match index {
v5::StarOr::Star => v6::StarOr::Star,
v5::StarOr::Other(uid) => {
v6::StarOr::Other(v6::IndexUid::new_unchecked(uid.as_str()))
}
})
.collect(),
expires_at: key.expires_at,
created_at: key.created_at,
updated_at: key.updated_at,
})
})))
}
}
pub enum CompatIndexV5ToV6 {
V5(v5::V5IndexReader),
Compat(CompatIndexV4ToV5),
}
impl From<v5::V5IndexReader> for CompatIndexV5ToV6 {
fn from(index_reader: v5::V5IndexReader) -> Self {
Self::V5(index_reader)
}
}
impl From<CompatIndexV4ToV5> for CompatIndexV5ToV6 {
fn from(index_reader: CompatIndexV4ToV5) -> Self {
Self::Compat(index_reader)
}
}
impl CompatIndexV5ToV6 {
pub fn new_v5(v5: v5::V5IndexReader) -> CompatIndexV5ToV6 {
CompatIndexV5ToV6::V5(v5)
}
pub fn metadata(&self) -> &crate::IndexMetadata {
match self {
CompatIndexV5ToV6::V5(v5) => v5.metadata(),
CompatIndexV5ToV6::Compat(compat) => compat.metadata(),
}
}
pub fn documents(&mut self) -> Result<Box<dyn Iterator<Item = Result<Document>> + '_>> {
match self {
CompatIndexV5ToV6::V5(v5) => v5
.documents()
.map(|iter| Box::new(iter) as Box<dyn Iterator<Item = Result<Document>> + '_>),
CompatIndexV5ToV6::Compat(compat) => compat
.documents()
.map(|iter| Box::new(iter) as Box<dyn Iterator<Item = Result<Document>> + '_>),
}
}
pub fn settings(&mut self) -> Result<v6::Settings<v6::Checked>> {
match self {
CompatIndexV5ToV6::V5(v5) => Ok(v6::Settings::from(v5.settings()?).check()),
CompatIndexV5ToV6::Compat(compat) => Ok(v6::Settings::from(compat.settings()?).check()),
}
}
}
impl<T> From<v5::Setting<T>> for v6::Setting<T> {
fn from(setting: v5::Setting<T>) -> Self {
match setting {
v5::Setting::Set(t) => v6::Setting::Set(t),
v5::Setting::Reset => v6::Setting::Reset,
v5::Setting::NotSet => v6::Setting::NotSet,
}
}
}
impl From<v5::ResponseError> for v6::ResponseError {
fn from(error: v5::ResponseError) -> Self {
let code = match error.error_code.as_ref() {
"index_creation_failed" => v6::Code::CreateIndex,
"index_already_exists" => v6::Code::IndexAlreadyExists,
"index_not_found" => v6::Code::IndexNotFound,
"invalid_index_uid" => v6::Code::InvalidIndexUid,
"invalid_min_word_length_for_typo" => v6::Code::InvalidMinWordLengthForTypo,
"invalid_state" => v6::Code::InvalidState,
"primary_key_inference_failed" => v6::Code::MissingPrimaryKey,
"index_primary_key_already_exists" => v6::Code::PrimaryKeyAlreadyPresent,
"max_fields_limit_exceeded" => v6::Code::MaxFieldsLimitExceeded,
"missing_document_id" => v6::Code::MissingDocumentId,
"invalid_document_id" => v6::Code::InvalidDocumentId,
"invalid_filter" => v6::Code::Filter,
"invalid_sort" => v6::Code::Sort,
"bad_parameter" => v6::Code::BadParameter,
"bad_request" => v6::Code::BadRequest,
"database_size_limit_reached" => v6::Code::DatabaseSizeLimitReached,
"document_not_found" => v6::Code::DocumentNotFound,
"internal" => v6::Code::Internal,
"invalid_geo_field" => v6::Code::InvalidGeoField,
"invalid_ranking_rule" => v6::Code::InvalidRankingRule,
"invalid_store_file" => v6::Code::InvalidStore,
"invalid_api_key" => v6::Code::InvalidToken,
"missing_authorization_header" => v6::Code::MissingAuthorizationHeader,
"no_space_left_on_device" => v6::Code::NoSpaceLeftOnDevice,
"dump_not_found" => v6::Code::DumpNotFound,
"task_not_found" => v6::Code::TaskNotFound,
"payload_too_large" => v6::Code::PayloadTooLarge,
"unretrievable_document" => v6::Code::RetrieveDocument,
"search_error" => v6::Code::SearchDocuments,
"unsupported_media_type" => v6::Code::UnsupportedMediaType,
"dump_already_processing" => v6::Code::DumpAlreadyInProgress,
"dump_process_failed" => v6::Code::DumpProcessFailed,
"invalid_content_type" => v6::Code::InvalidContentType,
"missing_content_type" => v6::Code::MissingContentType,
"malformed_payload" => v6::Code::MalformedPayload,
"missing_payload" => v6::Code::MissingPayload,
"api_key_not_found" => v6::Code::ApiKeyNotFound,
"missing_parameter" => v6::Code::MissingParameter,
"invalid_api_key_actions" => v6::Code::InvalidApiKeyActions,
"invalid_api_key_indexes" => v6::Code::InvalidApiKeyIndexes,
"invalid_api_key_expires_at" => v6::Code::InvalidApiKeyExpiresAt,
"invalid_api_key_description" => v6::Code::InvalidApiKeyDescription,
"invalid_api_key_name" => v6::Code::InvalidApiKeyName,
"invalid_api_key_uid" => v6::Code::InvalidApiKeyUid,
"immutable_field" => v6::Code::ImmutableField,
"api_key_already_exists" => v6::Code::ApiKeyAlreadyExists,
other => {
log::warn!("Unknown error code {}", other);
v6::Code::UnretrievableErrorCode
}
};
v6::ResponseError::from_msg(error.message, code)
}
}
impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
fn from(settings: v5::Settings<T>) -> Self {
v6::Settings {
displayed_attributes: settings.displayed_attributes.into(),
searchable_attributes: settings.searchable_attributes.into(),
filterable_attributes: settings.filterable_attributes.into(),
sortable_attributes: settings.sortable_attributes.into(),
ranking_rules: settings.ranking_rules.into(),
stop_words: settings.stop_words.into(),
synonyms: settings.synonyms.into(),
distinct_attribute: settings.distinct_attribute.into(),
typo_tolerance: match settings.typo_tolerance {
v5::Setting::Set(typo) => v6::Setting::Set(v6::TypoTolerance {
enabled: typo.enabled.into(),
min_word_size_for_typos: match typo.min_word_size_for_typos {
v5::Setting::Set(t) => v6::Setting::Set(v6::MinWordSizeForTypos {
one_typo: t.one_typo.into(),
two_typos: t.two_typos.into(),
}),
v5::Setting::Reset => v6::Setting::Reset,
v5::Setting::NotSet => v6::Setting::NotSet,
},
disable_on_words: typo.disable_on_words.into(),
disable_on_attributes: typo.disable_on_attributes.into(),
}),
v5::Setting::Reset => v6::Setting::Reset,
v5::Setting::NotSet => v6::Setting::NotSet,
},
faceting: match settings.faceting {
v5::Setting::Set(faceting) => v6::Setting::Set(v6::FacetingSettings {
max_values_per_facet: faceting.max_values_per_facet.into(),
}),
v5::Setting::Reset => v6::Setting::Reset,
v5::Setting::NotSet => v6::Setting::NotSet,
},
pagination: match settings.pagination {
v5::Setting::Set(pagination) => v6::Setting::Set(v6::PaginationSettings {
max_total_hits: pagination.max_total_hits.into(),
}),
v5::Setting::Reset => v6::Setting::Reset,
v5::Setting::NotSet => v6::Setting::NotSet,
},
_kind: std::marker::PhantomData,
}
}
}
impl From<v5::Action> for v6::Action {
fn from(key: v5::Action) -> Self {
match key {
v5::Action::All => v6::Action::All,
v5::Action::Search => v6::Action::Search,
v5::Action::DocumentsAll => v6::Action::DocumentsAll,
v5::Action::DocumentsAdd => v6::Action::DocumentsAdd,
v5::Action::DocumentsGet => v6::Action::DocumentsGet,
v5::Action::DocumentsDelete => v6::Action::DocumentsDelete,
v5::Action::IndexesAll => v6::Action::IndexesAll,
v5::Action::IndexesAdd => v6::Action::IndexesAdd,
v5::Action::IndexesGet => v6::Action::IndexesGet,
v5::Action::IndexesUpdate => v6::Action::IndexesUpdate,
v5::Action::IndexesDelete => v6::Action::IndexesDelete,
v5::Action::TasksAll => v6::Action::TasksAll,
v5::Action::TasksGet => v6::Action::TasksGet,
v5::Action::SettingsAll => v6::Action::SettingsAll,
v5::Action::SettingsGet => v6::Action::SettingsGet,
v5::Action::SettingsUpdate => v6::Action::SettingsUpdate,
v5::Action::StatsAll => v6::Action::StatsAll,
v5::Action::StatsGet => v6::Action::StatsGet,
v5::Action::MetricsAll => v6::Action::MetricsAll,
v5::Action::MetricsGet => v6::Action::MetricsGet,
v5::Action::DumpsAll => v6::Action::DumpsAll,
v5::Action::DumpsCreate => v6::Action::DumpsCreate,
v5::Action::Version => v6::Action::Version,
v5::Action::KeysAdd => v6::Action::KeysAdd,
v5::Action::KeysGet => v6::Action::KeysGet,
v5::Action::KeysUpdate => v6::Action::KeysUpdate,
v5::Action::KeysDelete => v6::Action::KeysDelete,
}
}
}
#[cfg(test)]
pub(crate) mod test {
use std::fs::File;
use std::io::BufReader;
use flate2::bufread::GzDecoder;
use tempfile::TempDir;
use super::*;
#[test]
fn compat_v5_v6() {
let dump = File::open("tests/assets/v5.dump").unwrap();
let dir = TempDir::new().unwrap();
let mut dump = BufReader::new(dump);
let gz = GzDecoder::new(&mut dump);
let mut archive = tar::Archive::new(gz);
archive.unpack(dir.path()).unwrap();
let mut dump = v5::V5Reader::open(dir).unwrap().to_v6();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"b37c01556be2e5ded407a9319915b406");
assert_eq!(update_files.len(), 22);
assert!(update_files[0].is_none()); // the dump creation
assert!(update_files[1].is_some()); // the enqueued document addition
assert!(update_files[2..].iter().all(|u| u.is_none())); // everything already processed
// keys
let keys = dump.keys().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(keys), @"c9d2b467fe2fca0b35580d8a999808fb");
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut products = indexes.pop().unwrap();
let mut movies = indexes.pop().unwrap();
let mut spells = indexes.pop().unwrap();
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"9896a66a399c24a0f4f6a3c8563cd14a");
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca");
// movies
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"d0dc7efd1360f95fce57d7931a70b7c9");
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 200);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"e962baafd2fbae4cdd14e876053b0c5a");
// spells
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"59c8e30c2022897987ea7b4394167b06");
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
}
}

529
dump/src/reader/mod.rs Normal file
View File

@ -0,0 +1,529 @@
use std::fs::File;
use std::io::{BufReader, Read};
use flate2::bufread::GzDecoder;
use serde::Deserialize;
use tempfile::TempDir;
use self::compat::v4_to_v5::CompatV4ToV5;
use self::compat::v5_to_v6::{CompatIndexV5ToV6, CompatV5ToV6};
use self::v5::V5Reader;
use self::v6::{V6IndexReader, V6Reader};
use crate::{Error, Result, Version};
mod compat;
// pub(self) mod v1;
pub(self) mod v2;
pub(self) mod v3;
pub(self) mod v4;
pub(self) mod v5;
pub(self) mod v6;
pub type Document = serde_json::Map<String, serde_json::Value>;
pub type UpdateFile = dyn Iterator<Item = Result<Document>>;
pub enum DumpReader {
Current(V6Reader),
Compat(CompatV5ToV6),
}
impl DumpReader {
pub fn open(dump: impl Read) -> Result<DumpReader> {
let path = TempDir::new()?;
let mut dump = BufReader::new(dump);
let gz = GzDecoder::new(&mut dump);
let mut archive = tar::Archive::new(gz);
archive.unpack(path.path())?;
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
struct MetadataVersion {
pub dump_version: Version,
}
let mut meta_file = File::open(path.path().join("metadata.json"))?;
let MetadataVersion { dump_version } = serde_json::from_reader(&mut meta_file)?;
match dump_version {
// Version::V1 => Ok(Box::new(v1::Reader::open(path)?)),
Version::V1 => Err(Error::DumpV1Unsupported),
Version::V2 => Ok(v2::V2Reader::open(path)?.to_v3().to_v4().to_v5().to_v6().into()),
Version::V3 => Ok(v3::V3Reader::open(path)?.to_v4().to_v5().to_v6().into()),
Version::V4 => Ok(v4::V4Reader::open(path)?.to_v5().to_v6().into()),
Version::V5 => Ok(v5::V5Reader::open(path)?.to_v6().into()),
Version::V6 => Ok(v6::V6Reader::open(path)?.into()),
}
}
pub fn version(&self) -> crate::Version {
match self {
DumpReader::Current(current) => current.version(),
DumpReader::Compat(compat) => compat.version(),
}
}
pub fn date(&self) -> Option<time::OffsetDateTime> {
match self {
DumpReader::Current(current) => current.date(),
DumpReader::Compat(compat) => compat.date(),
}
}
pub fn instance_uid(&self) -> Result<Option<uuid::Uuid>> {
match self {
DumpReader::Current(current) => current.instance_uid(),
DumpReader::Compat(compat) => compat.instance_uid(),
}
}
pub fn indexes(&self) -> Result<Box<dyn Iterator<Item = Result<DumpIndexReader>> + '_>> {
match self {
DumpReader::Current(current) => {
let indexes = Box::new(current.indexes()?.map(|res| res.map(DumpIndexReader::from)))
as Box<dyn Iterator<Item = Result<DumpIndexReader>> + '_>;
Ok(indexes)
}
DumpReader::Compat(compat) => {
let indexes = Box::new(compat.indexes()?.map(|res| res.map(DumpIndexReader::from)))
as Box<dyn Iterator<Item = Result<DumpIndexReader>> + '_>;
Ok(indexes)
}
}
}
pub fn tasks(
&mut self,
) -> Result<Box<dyn Iterator<Item = Result<(v6::Task, Option<Box<UpdateFile>>)>> + '_>> {
match self {
DumpReader::Current(current) => Ok(current.tasks()),
DumpReader::Compat(compat) => compat.tasks(),
}
}
pub fn keys(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Key>> + '_>> {
match self {
DumpReader::Current(current) => Ok(current.keys()),
DumpReader::Compat(compat) => compat.keys(),
}
}
}
impl From<V6Reader> for DumpReader {
fn from(value: V6Reader) -> Self {
DumpReader::Current(value)
}
}
impl From<CompatV5ToV6> for DumpReader {
fn from(value: CompatV5ToV6) -> Self {
DumpReader::Compat(value)
}
}
impl From<V5Reader> for DumpReader {
fn from(value: V5Reader) -> Self {
DumpReader::Compat(value.to_v6())
}
}
impl From<CompatV4ToV5> for DumpReader {
fn from(value: CompatV4ToV5) -> Self {
DumpReader::Compat(value.to_v6())
}
}
pub enum DumpIndexReader {
Current(v6::V6IndexReader),
Compat(CompatIndexV5ToV6),
}
impl DumpIndexReader {
pub fn new_v6(v6: v6::V6IndexReader) -> DumpIndexReader {
DumpIndexReader::Current(v6)
}
pub fn metadata(&self) -> &crate::IndexMetadata {
match self {
DumpIndexReader::Current(v6) => v6.metadata(),
DumpIndexReader::Compat(compat) => compat.metadata(),
}
}
pub fn documents(&mut self) -> Result<Box<dyn Iterator<Item = Result<Document>> + '_>> {
match self {
DumpIndexReader::Current(v6) => v6
.documents()
.map(|iter| Box::new(iter) as Box<dyn Iterator<Item = Result<Document>> + '_>),
DumpIndexReader::Compat(compat) => compat
.documents()
.map(|iter| Box::new(iter) as Box<dyn Iterator<Item = Result<Document>> + '_>),
}
}
pub fn settings(&mut self) -> Result<v6::Settings<v6::Checked>> {
match self {
DumpIndexReader::Current(v6) => v6.settings(),
DumpIndexReader::Compat(compat) => compat.settings(),
}
}
}
impl From<V6IndexReader> for DumpIndexReader {
fn from(value: V6IndexReader) -> Self {
DumpIndexReader::Current(value)
}
}
impl From<CompatIndexV5ToV6> for DumpIndexReader {
fn from(value: CompatIndexV5ToV6) -> Self {
DumpIndexReader::Compat(value)
}
}
#[cfg(test)]
pub(crate) mod test {
use std::fs::File;
use super::*;
#[test]
fn import_dump_v5() {
let dump = File::open("tests/assets/v5.dump").unwrap();
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"b37c01556be2e5ded407a9319915b406");
assert_eq!(update_files.len(), 22);
assert!(update_files[0].is_none()); // the dump creation
assert!(update_files[1].is_some()); // the enqueued document addition
assert!(update_files[2..].iter().all(|u| u.is_none())); // everything already processed
// keys
let keys = dump.keys().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(keys), @"c9d2b467fe2fca0b35580d8a999808fb");
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut products = indexes.pop().unwrap();
let mut movies = indexes.pop().unwrap();
let mut spells = indexes.pop().unwrap();
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"9896a66a399c24a0f4f6a3c8563cd14a");
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca");
// movies
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"d0dc7efd1360f95fce57d7931a70b7c9");
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 200);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"e962baafd2fbae4cdd14e876053b0c5a");
// spells
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"59c8e30c2022897987ea7b4394167b06");
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
}
#[test]
fn import_dump_v4() {
let dump = File::open("tests/assets/v4.dump").unwrap();
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"acd74244b4e6578c353899e6db30b0b5");
assert_eq!(update_files.len(), 10);
assert!(update_files[0].is_some()); // the enqueued document addition
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
// keys
let keys = dump.keys().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(keys, { "[].uid" => "[uuid]" }), @"d751713988987e9331980363e24189ce");
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut products = indexes.pop().unwrap();
let mut movies = indexes.pop().unwrap();
let mut spells = indexes.pop().unwrap();
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"ed1a6977a832b1ab49cd5068b77ce498");
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca");
// movies
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"70681af1d52411218036fbd5a9b94ab5");
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 110);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"786022a66ecb992c8a2a60fee070a5ab");
// spells
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"7019bb8f146004dcdd91fc3c3254b742");
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
}
#[test]
fn import_dump_v3() {
let dump = File::open("tests/assets/v3.dump").unwrap();
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
assert_eq!(dump.instance_uid().unwrap(), None);
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"fa74f7c6ab3014e09bb813fdc551db8f");
assert_eq!(update_files.len(), 10);
assert!(update_files[0].is_some()); // the enqueued document addition
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
// keys
let keys = dump.keys().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(keys), @"d751713988987e9331980363e24189ce");
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut products = indexes.pop().unwrap();
let mut movies2 = indexes.pop().unwrap();
let mut movies = indexes.pop().unwrap();
let mut spells = indexes.pop().unwrap();
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"1a5ed16d00e6163662d9d7ffe400c5d0");
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"9a6b511669b8f53d193d2f0bd1671baa");
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 110);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022");
// movies2
insta::assert_json_snapshot!(movies2.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies_2",
"primaryKey": null,
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies2.settings()), @"4fdf905496d9a511800ff523728728ac");
let documents = movies2.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 0);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
// spells
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"4fdf905496d9a511800ff523728728ac");
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
}
#[test]
fn import_dump_v2() {
let dump = File::open("tests/assets/v2.dump").unwrap();
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
assert_eq!(dump.instance_uid().unwrap(), None);
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"19882e94dc085f1d60eb7df5005a3224");
assert_eq!(update_files.len(), 9);
assert!(update_files[0].is_some()); // the enqueued document addition
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
// keys
let keys = dump.keys().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(keys), @"d751713988987e9331980363e24189ce");
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut products = indexes.pop().unwrap();
let mut movies2 = indexes.pop().unwrap();
let mut movies = indexes.pop().unwrap();
let mut spells = indexes.pop().unwrap();
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"a7d4fed93bfc91d0f1126d3371abf48e");
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"e79c3cc4eef44bd22acfb60957b459d9");
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 110);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022");
// movies2
insta::assert_json_snapshot!(movies2.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies_2",
"primaryKey": null,
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies2.settings()), @"7917f954b6f345336073bb155540ad6d");
let documents = movies2.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 0);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
// spells
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"7917f954b6f345336073bb155540ad6d");
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
}
}

173
dump/src/reader/v1/mod.rs Normal file
View File

@ -0,0 +1,173 @@
use std::{
convert::Infallible,
fs::{self, File},
io::{BufRead, BufReader},
path::Path,
};
use tempfile::TempDir;
use time::OffsetDateTime;
use self::update::UpdateStatus;
use super::{DumpReader, IndexReader};
use crate::{Error, Result, Version};
pub mod settings;
pub mod update;
pub mod v1;
pub struct V1Reader {
dump: TempDir,
metadata: v1::Metadata,
indexes: Vec<V1IndexReader>,
}
struct V1IndexReader {
name: String,
documents: BufReader<File>,
settings: BufReader<File>,
updates: BufReader<File>,
current_update: Option<UpdateStatus>,
}
impl V1IndexReader {
pub fn new(name: String, path: &Path) -> Result<Self> {
let mut ret = V1IndexReader {
name,
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
settings: BufReader::new(File::open(path.join("settings.json"))?),
updates: BufReader::new(File::open(path.join("updates.jsonl"))?),
current_update: None,
};
ret.next_update();
Ok(ret)
}
pub fn next_update(&mut self) -> Result<Option<UpdateStatus>> {
let current_update = if let Some(line) = self.updates.lines().next() {
Some(serde_json::from_str(&line?)?)
} else {
None
};
Ok(std::mem::replace(&mut self.current_update, current_update))
}
}
impl V1Reader {
pub fn open(dump: TempDir) -> Result<Self> {
let mut meta_file = fs::read(dump.path().join("metadata.json"))?;
let metadata = serde_json::from_reader(&*meta_file)?;
let mut indexes = Vec::new();
let entries = fs::read_dir(dump.path())?;
for entry in entries {
let entry = entry?;
if entry.file_type()?.is_dir() {
indexes.push(V1IndexReader::new(
entry
.file_name()
.to_str()
.ok_or(Error::BadIndexName)?
.to_string(),
&entry.path(),
)?);
}
}
Ok(V1Reader {
dump,
metadata,
indexes,
})
}
fn next_update(&mut self) -> Result<Option<UpdateStatus>> {
if let Some((idx, _)) = self
.indexes
.iter()
.map(|index| index.current_update)
.enumerate()
.filter_map(|(idx, update)| update.map(|u| (idx, u)))
.min_by_key(|(_, update)| update.enqueued_at())
{
self.indexes[idx].next_update()
} else {
Ok(None)
}
}
}
impl IndexReader for &V1IndexReader {
type Document = serde_json::Map<String, serde_json::Value>;
type Settings = settings::Settings;
fn name(&self) -> &str {
todo!()
}
fn documents(&self) -> Result<Box<dyn Iterator<Item = Result<Self::Document>>>> {
todo!()
}
fn settings(&self) -> Result<Self::Settings> {
todo!()
}
}
impl DumpReader for V1Reader {
type Document = serde_json::Map<String, serde_json::Value>;
type Settings = settings::Settings;
type Task = update::UpdateStatus;
type UpdateFile = Infallible;
type Key = Infallible;
fn date(&self) -> Option<OffsetDateTime> {
None
}
fn version(&self) -> Version {
Version::V1
}
fn indexes(
&self,
) -> Result<
Box<
dyn Iterator<
Item = Result<
Box<
dyn super::IndexReader<
Document = Self::Document,
Settings = Self::Settings,
>,
>,
>,
>,
>,
> {
Ok(Box::new(self.indexes.iter().map(|index| {
let index = Box::new(index)
as Box<dyn IndexReader<Document = Self::Document, Settings = Self::Settings>>;
Ok(index)
})))
}
fn tasks(&self) -> Box<dyn Iterator<Item = Result<(Self::Task, Option<Self::UpdateFile>)>>> {
Box::new(std::iter::from_fn(|| {
self.next_update()
.transpose()
.map(|result| result.map(|task| (task, None)))
}))
}
fn keys(&self) -> Box<dyn Iterator<Item = Result<Self::Key>>> {
Box::new(std::iter::empty())
}
}

View File

@ -0,0 +1,63 @@
use std::collections::{BTreeMap, BTreeSet};
use std::result::Result as StdResult;
use serde::{Deserialize, Deserializer, Serialize};
#[derive(Default, Clone, Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct Settings {
#[serde(default, deserialize_with = "deserialize_some")]
pub ranking_rules: Option<Option<Vec<String>>>,
#[serde(default, deserialize_with = "deserialize_some")]
pub distinct_attribute: Option<Option<String>>,
#[serde(default, deserialize_with = "deserialize_some")]
pub searchable_attributes: Option<Option<Vec<String>>>,
#[serde(default, deserialize_with = "deserialize_some")]
pub displayed_attributes: Option<Option<BTreeSet<String>>>,
#[serde(default, deserialize_with = "deserialize_some")]
pub stop_words: Option<Option<BTreeSet<String>>>,
#[serde(default, deserialize_with = "deserialize_some")]
pub synonyms: Option<Option<BTreeMap<String, Vec<String>>>>,
#[serde(default, deserialize_with = "deserialize_some")]
pub attributes_for_faceting: Option<Option<Vec<String>>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SettingsUpdate {
pub ranking_rules: UpdateState<Vec<RankingRule>>,
pub distinct_attribute: UpdateState<String>,
pub primary_key: UpdateState<String>,
pub searchable_attributes: UpdateState<Vec<String>>,
pub displayed_attributes: UpdateState<BTreeSet<String>>,
pub stop_words: UpdateState<BTreeSet<String>>,
pub synonyms: UpdateState<BTreeMap<String, Vec<String>>>,
pub attributes_for_faceting: UpdateState<Vec<String>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateState<T> {
Update(T),
Clear,
Nothing,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum RankingRule {
Typo,
Words,
Proximity,
Attribute,
WordsPosition,
Exactness,
Asc(String),
Desc(String),
}
// Any value that is present is considered Some value, including null.
fn deserialize_some<'de, T, D>(deserializer: D) -> StdResult<Option<T>, D::Error>
where
T: Deserialize<'de>,
D: Deserializer<'de>,
{
Deserialize::deserialize(deserializer).map(Some)
}

View File

@ -0,0 +1,120 @@
use serde::{Deserialize, Serialize};
use serde_json::Value;
use time::OffsetDateTime;
use super::settings::SettingsUpdate;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Update {
data: UpdateData,
#[serde(with = "time::serde::rfc3339")]
enqueued_at: OffsetDateTime,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateData {
ClearAll,
Customs(Vec<u8>),
// (primary key, documents)
DocumentsAddition {
primary_key: Option<String>,
documents: Vec<serde_json::Map<String, Value>>,
},
DocumentsPartial {
primary_key: Option<String>,
documents: Vec<serde_json::Map<String, Value>>,
},
DocumentsDeletion(Vec<String>),
Settings(Box<SettingsUpdate>),
}
impl UpdateData {
pub fn update_type(&self) -> UpdateType {
match self {
UpdateData::ClearAll => UpdateType::ClearAll,
UpdateData::Customs(_) => UpdateType::Customs,
UpdateData::DocumentsAddition { documents, .. } => UpdateType::DocumentsAddition {
number: documents.len(),
},
UpdateData::DocumentsPartial { documents, .. } => UpdateType::DocumentsPartial {
number: documents.len(),
},
UpdateData::DocumentsDeletion(deletion) => UpdateType::DocumentsDeletion {
number: deletion.len(),
},
UpdateData::Settings(update) => UpdateType::Settings {
settings: update.clone(),
},
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "name")]
pub enum UpdateType {
ClearAll,
Customs,
DocumentsAddition { number: usize },
DocumentsPartial { number: usize },
DocumentsDeletion { number: usize },
Settings { settings: Box<SettingsUpdate> },
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ProcessedUpdateResult {
pub update_id: u64,
#[serde(rename = "type")]
pub update_type: UpdateType,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error_type: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error_code: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error_link: Option<String>,
pub duration: f64, // in seconds
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
pub processed_at: OffsetDateTime,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct EnqueuedUpdateResult {
pub update_id: u64,
#[serde(rename = "type")]
pub update_type: UpdateType,
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase", tag = "status")]
pub enum UpdateStatus {
Enqueued {
#[serde(flatten)]
content: EnqueuedUpdateResult,
},
Failed {
#[serde(flatten)]
content: ProcessedUpdateResult,
},
Processed {
#[serde(flatten)]
content: ProcessedUpdateResult,
},
}
impl UpdateStatus {
pub fn enqueued_at(&self) -> &OffsetDateTime {
match self {
UpdateStatus::Enqueued { content } => &content.enqueued_at,
UpdateStatus::Failed { content } | UpdateStatus::Processed { content } => {
&content.enqueued_at
}
}
}
}

22
dump/src/reader/v1/v1.rs Normal file
View File

@ -0,0 +1,22 @@
use serde::Deserialize;
use time::OffsetDateTime;
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Index {
pub name: String,
pub uid: String,
#[serde(with = "time::serde::rfc3339")]
created_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
updated_at: OffsetDateTime,
pub primary_key: Option<String>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Metadata {
indexes: Vec<Index>,
db_version: String,
dump_version: crate::Version,
}

View File

@ -0,0 +1,14 @@
use http::StatusCode;
use serde::Deserialize;
#[derive(Debug, Deserialize, Clone)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(rename_all = "camelCase")]
pub struct ResponseError {
#[serde(skip)]
pub code: StatusCode,
pub message: String,
pub error_code: String,
pub error_type: String,
pub error_link: String,
}

View File

@ -0,0 +1,18 @@
use serde::Deserialize;
use uuid::Uuid;
use super::Settings;
#[derive(Deserialize, Debug, Clone)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct IndexUuid {
pub uid: String,
pub uuid: Uuid,
}
#[derive(Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct DumpMeta {
pub settings: Settings<super::Unchecked>,
pub primary_key: Option<String>,
}

308
dump/src/reader/v2/mod.rs Normal file
View File

@ -0,0 +1,308 @@
//! ```text
//! .
//! ├── indexes
//! │   ├── index-40d14c5f-37ae-4873-9d51-b69e014a0d30
//! │   │   ├── documents.jsonl
//! │   │   └── meta.json
//! │   ├── index-88202369-4524-4410-9b3d-3e924c867fec
//! │   │   ├── documents.jsonl
//! │   │   └── meta.json
//! │   ├── index-b7f2d03b-bf9b-40d9-a25b-94dc5ec60c32
//! │   │   ├── documents.jsonl
//! │   │   └── meta.json
//! │   └── index-dc9070b3-572d-4f30-ab45-d4903ab71708
//! │   ├── documents.jsonl
//! │   └── meta.json
//! ├── index_uuids
//! │   └── data.jsonl
//! ├── metadata.json
//! └── updates
//! ├── data.jsonl
//! └── update_files
//! └── update_202573df-718b-4d80-9a65-2ee397c23dc3
//! ```
use std::fs::{self, File};
use std::io::{BufRead, BufReader};
use std::path::Path;
use serde::{Deserialize, Serialize};
use tempfile::TempDir;
use time::OffsetDateTime;
pub mod errors;
pub mod meta;
pub mod settings;
pub mod updates;
use self::meta::{DumpMeta, IndexUuid};
use super::compat::v2_to_v3::CompatV2ToV3;
use super::Document;
use crate::{IndexMetadata, Result, Version};
pub type Settings<T> = settings::Settings<T>;
pub type Checked = settings::Checked;
pub type Unchecked = settings::Unchecked;
pub type Task = updates::UpdateEntry;
// everything related to the errors
pub type ResponseError = errors::ResponseError;
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct Metadata {
db_version: String,
index_db_size: usize,
update_db_size: usize,
#[serde(with = "time::serde::rfc3339")]
dump_date: OffsetDateTime,
}
pub struct V2Reader {
dump: TempDir,
metadata: Metadata,
tasks: BufReader<File>,
pub index_uuid: Vec<IndexUuid>,
}
impl V2Reader {
pub fn open(dump: TempDir) -> Result<Self> {
let meta_file = fs::read(dump.path().join("metadata.json"))?;
let metadata = serde_json::from_reader(&*meta_file)?;
let index_uuid = File::open(dump.path().join("index_uuids/data.jsonl"))?;
let index_uuid = BufReader::new(index_uuid);
let index_uuid = index_uuid
.lines()
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })
.collect::<Result<Vec<_>>>()?;
Ok(V2Reader {
metadata,
tasks: BufReader::new(
File::open(dump.path().join("updates").join("data.jsonl")).unwrap(),
),
index_uuid,
dump,
})
}
pub fn to_v3(self) -> CompatV2ToV3 {
CompatV2ToV3::new(self)
}
pub fn index_uuid(&self) -> Vec<IndexUuid> {
self.index_uuid.clone()
}
pub fn version(&self) -> Version {
Version::V2
}
pub fn date(&self) -> Option<OffsetDateTime> {
Some(self.metadata.dump_date)
}
pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V2IndexReader>> + '_> {
Ok(self.index_uuid.iter().map(|index| -> Result<_> {
Ok(V2IndexReader::new(
index.uid.clone(),
&self.dump.path().join("indexes").join(format!("index-{}", index.uuid.to_string())),
)?)
}))
}
pub fn tasks(&mut self) -> Box<dyn Iterator<Item = Result<(Task, Option<UpdateFile>)>> + '_> {
Box::new((&mut self.tasks).lines().map(|line| -> Result<_> {
let task: Task = serde_json::from_str(&line?)?;
if !task.is_finished() {
if let Some(uuid) = task.get_content_uuid() {
let update_file_path = self
.dump
.path()
.join("updates")
.join("update_files")
.join(format!("update_{}", uuid.to_string()));
Ok((task, Some(UpdateFile::new(&update_file_path)?)))
} else {
Ok((task, None))
}
} else {
Ok((task, None))
}
}))
}
}
pub struct V2IndexReader {
metadata: IndexMetadata,
settings: Settings<Checked>,
documents: BufReader<File>,
}
impl V2IndexReader {
pub fn new(name: String, path: &Path) -> Result<Self> {
let meta = File::open(path.join("meta.json"))?;
let meta: DumpMeta = serde_json::from_reader(meta)?;
let metadata = IndexMetadata {
uid: name,
primary_key: meta.primary_key,
// FIXME: Iterate over the whole task queue to find the creation and last update date.
created_at: OffsetDateTime::now_utc(),
updated_at: OffsetDateTime::now_utc(),
};
let ret = V2IndexReader {
metadata,
settings: meta.settings.check(),
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
};
Ok(ret)
}
pub fn metadata(&self) -> &IndexMetadata {
&self.metadata
}
pub fn documents(&mut self) -> Result<impl Iterator<Item = Result<Document>> + '_> {
Ok((&mut self.documents)
.lines()
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn settings(&mut self) -> Result<Settings<Checked>> {
Ok(self.settings.clone())
}
}
pub struct UpdateFile {
documents: Vec<Document>,
index: usize,
}
impl UpdateFile {
fn new(path: &Path) -> Result<Self> {
let reader = BufReader::new(File::open(path)?);
Ok(UpdateFile { documents: serde_json::from_reader(reader)?, index: 0 })
}
}
impl Iterator for UpdateFile {
type Item = Result<Document>;
fn next(&mut self) -> Option<Self::Item> {
self.index += 1;
self.documents.get(self.index - 1).cloned().map(Ok)
}
}
#[cfg(test)]
pub(crate) mod test {
use std::fs::File;
use std::io::BufReader;
use flate2::bufread::GzDecoder;
use tempfile::TempDir;
use super::*;
#[test]
fn read_dump_v2() {
let dump = File::open("tests/assets/v2.dump").unwrap();
let dir = TempDir::new().unwrap();
let mut dump = BufReader::new(dump);
let gz = GzDecoder::new(&mut dump);
let mut archive = tar::Archive::new(gz);
archive.unpack(dir.path()).unwrap();
let mut dump = V2Reader::open(dir).unwrap();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"ec5fc0a14bf735ad4e361d5aa8a89ac6");
assert_eq!(update_files.len(), 9);
assert!(update_files[0].is_some()); // the enqueued document addition
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
let update_file = update_files.remove(0).unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(update_file), @"7b8889539b669c7b9ddba448bafa385d");
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut products = indexes.pop().unwrap();
let mut movies2 = indexes.pop().unwrap();
let mut movies = indexes.pop().unwrap();
let mut spells = indexes.pop().unwrap();
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"b4814eab5e73e2dcfc90aad50aa583d1");
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"59dd69f590635a58f3d99edc9e1fa21f");
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 110);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022");
// movies2
insta::assert_json_snapshot!(movies2.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies_2",
"primaryKey": null,
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies2.settings()), @"ac041085004c43373fe90dc48f5c23ab");
let documents = movies2.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 0);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
// spells
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"ac041085004c43373fe90dc48f5c23ab");
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
}
}

View File

@ -0,0 +1,176 @@
use std::collections::{BTreeMap, BTreeSet, HashSet};
use std::marker::PhantomData;
use std::str::FromStr;
use once_cell::sync::Lazy;
use regex::Regex;
use serde::{Deserialize, Deserializer};
#[cfg(test)]
fn serialize_with_wildcard<S>(
field: &Option<Option<Vec<String>>>,
s: S,
) -> std::result::Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let wildcard = vec!["*".to_string()];
s.serialize_some(&field.as_ref().map(|o| o.as_ref().unwrap_or(&wildcard)))
}
fn deserialize_some<'de, T, D>(deserializer: D) -> std::result::Result<Option<T>, D::Error>
where
T: Deserialize<'de>,
D: Deserializer<'de>,
{
Deserialize::deserialize(deserializer).map(Some)
}
#[derive(Clone, Default, Debug)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct Checked;
#[derive(Clone, Default, Debug, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct Unchecked;
#[derive(Debug, Clone, Default, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
#[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: Deserialize<'static>"))]
pub struct Settings<T> {
#[serde(
default,
deserialize_with = "deserialize_some",
serialize_with = "serialize_with_wildcard",
skip_serializing_if = "Option::is_none"
)]
pub displayed_attributes: Option<Option<Vec<String>>>,
#[serde(
default,
deserialize_with = "deserialize_some",
serialize_with = "serialize_with_wildcard",
skip_serializing_if = "Option::is_none"
)]
pub searchable_attributes: Option<Option<Vec<String>>>,
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none"
)]
pub filterable_attributes: Option<Option<HashSet<String>>>,
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none"
)]
pub ranking_rules: Option<Option<Vec<String>>>,
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none"
)]
pub stop_words: Option<Option<BTreeSet<String>>>,
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none"
)]
pub synonyms: Option<Option<BTreeMap<String, Vec<String>>>>,
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none"
)]
pub distinct_attribute: Option<Option<String>>,
#[serde(skip)]
pub _kind: PhantomData<T>,
}
impl Settings<Unchecked> {
pub fn check(mut self) -> Settings<Checked> {
let displayed_attributes = match self.displayed_attributes.take() {
Some(Some(fields)) => {
if fields.iter().any(|f| f == "*") {
Some(None)
} else {
Some(Some(fields))
}
}
otherwise => otherwise,
};
let searchable_attributes = match self.searchable_attributes.take() {
Some(Some(fields)) => {
if fields.iter().any(|f| f == "*") {
Some(None)
} else {
Some(Some(fields))
}
}
otherwise => otherwise,
};
Settings {
displayed_attributes,
searchable_attributes,
filterable_attributes: self.filterable_attributes,
ranking_rules: self.ranking_rules,
stop_words: self.stop_words,
synonyms: self.synonyms,
distinct_attribute: self.distinct_attribute,
_kind: PhantomData,
}
}
}
static ASC_DESC_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap());
#[derive(Debug, Deserialize, Clone, PartialEq, Eq)]
pub enum Criterion {
/// Sorted by decreasing number of matched query terms.
/// Query words at the front of an attribute is considered better than if it was at the back.
Words,
/// Sorted by increasing number of typos.
Typo,
/// Sorted by increasing distance between matched query terms.
Proximity,
/// Documents with quey words contained in more important
/// attributes are considred better.
Attribute,
/// Sorted by the similarity of the matched words with the query words.
Exactness,
/// Sorted by the increasing value of the field specified.
Asc(String),
/// Sorted by the decreasing value of the field specified.
Desc(String),
}
impl FromStr for Criterion {
type Err = ();
fn from_str(txt: &str) -> Result<Criterion, Self::Err> {
match txt {
"words" => Ok(Criterion::Words),
"typo" => Ok(Criterion::Typo),
"proximity" => Ok(Criterion::Proximity),
"attribute" => Ok(Criterion::Attribute),
"exactness" => Ok(Criterion::Exactness),
text => {
let caps = ASC_DESC_REGEX.captures(text).ok_or(())?;
let order = caps.get(1).unwrap().as_str();
let field_name = caps.get(2).unwrap().as_str();
match order {
"asc" => Ok(Criterion::Asc(field_name.to_string())),
"desc" => Ok(Criterion::Desc(field_name.to_string())),
_text => Err(()),
}
}
}
}
}

View File

@ -0,0 +1,230 @@
use serde::Deserialize;
use time::OffsetDateTime;
use uuid::Uuid;
use super::{ResponseError, Settings, Unchecked};
#[derive(Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct UpdateEntry {
pub uuid: Uuid,
pub update: UpdateStatus,
}
impl UpdateEntry {
pub fn is_finished(&self) -> bool {
match self.update {
UpdateStatus::Processing(_) | UpdateStatus::Enqueued(_) => false,
UpdateStatus::Processed(_) | UpdateStatus::Aborted(_) | UpdateStatus::Failed(_) => true,
}
}
pub fn get_content_uuid(&self) -> Option<&Uuid> {
match &self.update {
UpdateStatus::Enqueued(enqueued) => enqueued.content.as_ref(),
UpdateStatus::Processing(processing) => processing.from.content.as_ref(),
UpdateStatus::Processed(processed) => processed.from.from.content.as_ref(),
UpdateStatus::Aborted(aborted) => aborted.from.content.as_ref(),
UpdateStatus::Failed(failed) => failed.from.from.content.as_ref(),
}
}
}
#[derive(Debug, Clone, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum UpdateResult {
DocumentsAddition(DocumentAdditionResult),
DocumentDeletion { deleted: u64 },
Other,
}
#[derive(Debug, Deserialize, Clone)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct DocumentAdditionResult {
pub nb_documents: usize,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
#[non_exhaustive]
pub enum IndexDocumentsMethod {
/// Replace the previous document with the new one,
/// removing all the already known attributes.
ReplaceDocuments,
/// Merge the previous version of the document with the new version,
/// replacing old attributes values with the new ones and add the new attributes.
UpdateDocuments,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
#[non_exhaustive]
pub enum UpdateFormat {
/// The given update is a real **comma seperated** CSV with headers on the first line.
Csv,
/// The given update is a JSON array with documents inside.
Json,
/// The given update is a JSON stream with a document on each line.
JsonStream,
}
#[allow(clippy::large_enum_variant)]
#[derive(Debug, Clone, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(tag = "type")]
pub enum UpdateMeta {
DocumentsAddition {
method: IndexDocumentsMethod,
format: UpdateFormat,
primary_key: Option<String>,
},
ClearDocuments,
DeleteDocuments {
ids: Vec<String>,
},
Settings(Settings<Unchecked>),
}
#[derive(Debug, Deserialize, Clone)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(rename_all = "camelCase")]
pub struct Enqueued {
pub update_id: u64,
pub meta: UpdateMeta,
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
pub content: Option<Uuid>,
}
impl Enqueued {
pub fn meta(&self) -> &UpdateMeta {
&self.meta
}
pub fn id(&self) -> u64 {
self.update_id
}
}
#[derive(Debug, Deserialize, Clone)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(rename_all = "camelCase")]
pub struct Processed {
pub success: UpdateResult,
#[serde(with = "time::serde::rfc3339")]
pub processed_at: OffsetDateTime,
#[serde(flatten)]
pub from: Processing,
}
impl Processed {
pub fn id(&self) -> u64 {
self.from.id()
}
pub fn meta(&self) -> &UpdateMeta {
self.from.meta()
}
}
#[derive(Debug, Deserialize, Clone)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(rename_all = "camelCase")]
pub struct Processing {
#[serde(flatten)]
pub from: Enqueued,
#[serde(with = "time::serde::rfc3339")]
pub started_processing_at: OffsetDateTime,
}
impl Processing {
pub fn id(&self) -> u64 {
self.from.id()
}
pub fn meta(&self) -> &UpdateMeta {
self.from.meta()
}
}
#[derive(Debug, Deserialize, Clone)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(rename_all = "camelCase")]
pub struct Aborted {
#[serde(flatten)]
pub from: Enqueued,
#[serde(with = "time::serde::rfc3339")]
pub aborted_at: OffsetDateTime,
}
impl Aborted {
pub fn id(&self) -> u64 {
self.from.id()
}
pub fn meta(&self) -> &UpdateMeta {
self.from.meta()
}
}
#[derive(Debug, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(rename_all = "camelCase")]
pub struct Failed {
#[serde(flatten)]
pub from: Processing,
pub error: ResponseError,
#[serde(with = "time::serde::rfc3339")]
pub failed_at: OffsetDateTime,
}
impl Failed {
pub fn id(&self) -> u64 {
self.from.id()
}
pub fn meta(&self) -> &UpdateMeta {
self.from.meta()
}
}
#[derive(Debug, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(tag = "status", rename_all = "camelCase")]
pub enum UpdateStatus {
Processing(Processing),
Enqueued(Enqueued),
Processed(Processed),
Aborted(Aborted),
Failed(Failed),
}
impl UpdateStatus {
pub fn id(&self) -> u64 {
match self {
UpdateStatus::Processing(u) => u.id(),
UpdateStatus::Enqueued(u) => u.id(),
UpdateStatus::Processed(u) => u.id(),
UpdateStatus::Aborted(u) => u.id(),
UpdateStatus::Failed(u) => u.id(),
}
}
pub fn meta(&self) -> &UpdateMeta {
match self {
UpdateStatus::Processing(u) => u.meta(),
UpdateStatus::Enqueued(u) => u.meta(),
UpdateStatus::Processed(u) => u.meta(),
UpdateStatus::Aborted(u) => u.meta(),
UpdateStatus::Failed(u) => u.meta(),
}
}
pub fn processed(&self) -> Option<&Processed> {
match self {
UpdateStatus::Processed(p) => Some(p),
_ => None,
}
}
}

View File

@ -0,0 +1,51 @@
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
pub enum Code {
// index related error
CreateIndex,
IndexAlreadyExists,
IndexNotFound,
InvalidIndexUid,
// invalid state error
InvalidState,
MissingPrimaryKey,
PrimaryKeyAlreadyPresent,
MaxFieldsLimitExceeded,
MissingDocumentId,
InvalidDocumentId,
Filter,
Sort,
BadParameter,
BadRequest,
DatabaseSizeLimitReached,
DocumentNotFound,
Internal,
InvalidGeoField,
InvalidRankingRule,
InvalidStore,
InvalidToken,
MissingAuthorizationHeader,
NoSpaceLeftOnDevice,
DumpNotFound,
TaskNotFound,
PayloadTooLarge,
RetrieveDocument,
SearchDocuments,
UnsupportedMediaType,
DumpAlreadyInProgress,
DumpProcessFailed,
InvalidContentType,
MissingContentType,
MalformedPayload,
MissingPayload,
MalformedDump,
UnretrievableErrorCode,
}

View File

@ -0,0 +1,18 @@
use serde::Deserialize;
use uuid::Uuid;
use super::Settings;
#[derive(Deserialize, Debug, Clone)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct IndexUuid {
pub uid: String,
pub uuid: Uuid,
}
#[derive(Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct DumpMeta {
pub settings: Settings<super::Unchecked>,
pub primary_key: Option<String>,
}

324
dump/src/reader/v3/mod.rs Normal file
View File

@ -0,0 +1,324 @@
//! ```text
//! .
//! ├── indexes
//! │   ├── 01d7dd17-8241-4f1f-a7d1-2d1cb255f5b0
//! │   │   ├── documents.jsonl
//! │   │   └── meta.json
//! │   ├── 78be64a3-cae1-449e-b7ed-13e77c9a8a0c
//! │   │   ├── documents.jsonl
//! │   │   └── meta.json
//! │   ├── ba553439-18fe-4733-ba53-44eed898280c
//! │   │   ├── documents.jsonl
//! │   │   └── meta.json
//! │   └── c408bc22-5859-49d1-8e9f-c88e2fa95cb0
//! │   ├── documents.jsonl
//! │   └── meta.json
//! ├── index_uuids
//! │   └── data.jsonl
//! ├── metadata.json
//! └── updates
//! ├── data.jsonl
//! └── updates_files
//! └── 66d3f12d-fcf3-4b53-88cb-407017373de7
//! ```
use std::fs::{self, File};
use std::io::{BufRead, BufReader};
use std::path::Path;
use serde::{Deserialize, Serialize};
use tempfile::TempDir;
use time::OffsetDateTime;
pub mod errors;
pub mod meta;
pub mod settings;
pub mod updates;
use self::meta::{DumpMeta, IndexUuid};
use super::compat::v3_to_v4::CompatV3ToV4;
use super::Document;
use crate::{Error, IndexMetadata, Result, Version};
pub type Settings<T> = settings::Settings<T>;
pub type Checked = settings::Checked;
pub type Unchecked = settings::Unchecked;
pub type Task = updates::UpdateEntry;
// ===== Other types to clarify the code of the compat module
// everything related to the tasks
pub type Status = updates::UpdateStatus;
pub type Kind = updates::Update;
// everything related to the settings
pub type Setting<T> = settings::Setting<T>;
// everything related to the errors
pub type Code = errors::Code;
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct Metadata {
db_version: String,
index_db_size: usize,
update_db_size: usize,
#[serde(with = "time::serde::rfc3339")]
dump_date: OffsetDateTime,
}
pub struct V3Reader {
dump: TempDir,
metadata: Metadata,
tasks: BufReader<File>,
index_uuid: Vec<IndexUuid>,
}
impl V3Reader {
pub fn open(dump: TempDir) -> Result<Self> {
let meta_file = fs::read(dump.path().join("metadata.json"))?;
let metadata = serde_json::from_reader(&*meta_file)?;
let index_uuid = File::open(dump.path().join("index_uuids/data.jsonl"))?;
let index_uuid = BufReader::new(index_uuid);
let index_uuid = index_uuid
.lines()
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })
.collect::<Result<Vec<_>>>()?;
Ok(V3Reader {
metadata,
tasks: BufReader::new(File::open(dump.path().join("updates").join("data.jsonl"))?),
index_uuid,
dump,
})
}
pub fn index_uuid(&self) -> Vec<IndexUuid> {
self.index_uuid.clone()
}
pub fn to_v4(self) -> CompatV3ToV4 {
CompatV3ToV4::new(self)
}
pub fn version(&self) -> Version {
Version::V3
}
pub fn date(&self) -> Option<OffsetDateTime> {
Some(self.metadata.dump_date)
}
pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V3IndexReader>> + '_> {
Ok(self.index_uuid.iter().map(|index| -> Result<_> {
Ok(V3IndexReader::new(
index.uid.clone(),
&self.dump.path().join("indexes").join(index.uuid.to_string()),
)?)
}))
}
pub fn tasks(
&mut self,
) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> {
Box::new((&mut self.tasks).lines().map(|line| -> Result<_> {
let task: Task = serde_json::from_str(&line?)?;
if !task.is_finished() {
if let Some(uuid) = task.get_content_uuid() {
let update_file_path = self
.dump
.path()
.join("updates")
.join("updates_files")
.join(uuid.to_string());
Ok((
task,
Some(
Box::new(UpdateFile::new(&update_file_path)?) as Box<super::UpdateFile>
),
))
} else {
Ok((task, None))
}
} else {
Ok((task, None))
}
}))
}
}
pub struct V3IndexReader {
metadata: IndexMetadata,
settings: Settings<Checked>,
documents: BufReader<File>,
}
impl V3IndexReader {
pub fn new(name: String, path: &Path) -> Result<Self> {
let meta = File::open(path.join("meta.json"))?;
let meta: DumpMeta = serde_json::from_reader(meta)?;
let metadata = IndexMetadata {
uid: name,
primary_key: meta.primary_key,
// FIXME: Iterate over the whole task queue to find the creation and last update date.
created_at: OffsetDateTime::now_utc(),
updated_at: OffsetDateTime::now_utc(),
};
let ret = V3IndexReader {
metadata,
settings: meta.settings.check(),
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
};
Ok(ret)
}
pub fn metadata(&self) -> &IndexMetadata {
&self.metadata
}
pub fn documents(&mut self) -> Result<impl Iterator<Item = Result<Document>> + '_> {
Ok((&mut self.documents)
.lines()
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn settings(&mut self) -> Result<Settings<Checked>> {
Ok(self.settings.clone())
}
}
pub struct UpdateFile {
reader: BufReader<File>,
}
impl UpdateFile {
fn new(path: &Path) -> Result<Self> {
Ok(UpdateFile { reader: BufReader::new(File::open(path)?) })
}
}
impl Iterator for UpdateFile {
type Item = Result<Document>;
fn next(&mut self) -> Option<Self::Item> {
(&mut self.reader)
.lines()
.map(|line| {
line.map_err(Error::from)
.and_then(|line| serde_json::from_str(&line).map_err(Error::from))
})
.next()
}
}
#[cfg(test)]
pub(crate) mod test {
use std::fs::File;
use std::io::BufReader;
use flate2::bufread::GzDecoder;
use tempfile::TempDir;
use super::*;
#[test]
fn read_dump_v3() {
let dump = File::open("tests/assets/v3.dump").unwrap();
let dir = TempDir::new().unwrap();
let mut dump = BufReader::new(dump);
let gz = GzDecoder::new(&mut dump);
let mut archive = tar::Archive::new(gz);
archive.unpack(dir.path()).unwrap();
let mut dump = V3Reader::open(dir).unwrap();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"63086d59c3f2074e4ab3fff7e8cc36c1");
assert_eq!(update_files.len(), 10);
assert!(update_files[0].is_some()); // the enqueued document addition
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
let update_file = update_files.remove(0).unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(update_file), @"7b8889539b669c7b9ddba448bafa385d");
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut products = indexes.pop().unwrap();
let mut movies2 = indexes.pop().unwrap();
let mut movies = indexes.pop().unwrap();
let mut spells = indexes.pop().unwrap();
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"7460d4b242b5c8b1bda223f63bbbf349");
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"d83ab8e79bb44595667d6ce3e6629a4f");
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 110);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022");
// movies2
insta::assert_json_snapshot!(movies2.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies_2",
"primaryKey": null,
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies2.settings()), @"44d3b5a3b3aa6cd950373ff751d05bb7");
let documents = movies2.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 0);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
// spells
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"44d3b5a3b3aa6cd950373ff751d05bb7");
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
}
}

View File

@ -0,0 +1,233 @@
use std::collections::{BTreeMap, BTreeSet};
use std::marker::PhantomData;
use std::num::NonZeroUsize;
use serde::{Deserialize, Deserializer};
#[cfg(test)]
fn serialize_with_wildcard<S>(
field: &Setting<Vec<String>>,
s: S,
) -> std::result::Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
use serde::Serialize;
let wildcard = vec!["*".to_string()];
match field {
Setting::Set(value) => Some(value),
Setting::Reset => Some(&wildcard),
Setting::NotSet => None,
}
.serialize(s)
}
#[derive(Clone, Default, Debug)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct Checked;
#[derive(Clone, Default, Debug, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct Unchecked;
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
/// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
#[derive(Debug, Clone, Default, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
#[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: Deserialize<'static>"))]
pub struct Settings<T> {
#[serde(
default,
serialize_with = "serialize_with_wildcard",
skip_serializing_if = "Setting::is_not_set"
)]
pub displayed_attributes: Setting<Vec<String>>,
#[serde(
default,
serialize_with = "serialize_with_wildcard",
skip_serializing_if = "Setting::is_not_set"
)]
pub searchable_attributes: Setting<Vec<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub filterable_attributes: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub sortable_attributes: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub ranking_rules: Setting<Vec<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub stop_words: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub distinct_attribute: Setting<String>,
#[serde(skip)]
pub _kind: PhantomData<T>,
}
impl Settings<Checked> {
pub fn cleared() -> Settings<Checked> {
Settings {
displayed_attributes: Setting::Reset,
searchable_attributes: Setting::Reset,
filterable_attributes: Setting::Reset,
sortable_attributes: Setting::Reset,
ranking_rules: Setting::Reset,
stop_words: Setting::Reset,
synonyms: Setting::Reset,
distinct_attribute: Setting::Reset,
_kind: PhantomData,
}
}
pub fn into_unchecked(self) -> Settings<Unchecked> {
let Self {
displayed_attributes,
searchable_attributes,
filterable_attributes,
sortable_attributes,
ranking_rules,
stop_words,
synonyms,
distinct_attribute,
..
} = self;
Settings {
displayed_attributes,
searchable_attributes,
filterable_attributes,
sortable_attributes,
ranking_rules,
stop_words,
synonyms,
distinct_attribute,
_kind: PhantomData,
}
}
}
impl Settings<Unchecked> {
pub fn check(self) -> Settings<Checked> {
let displayed_attributes = match self.displayed_attributes {
Setting::Set(fields) => {
if fields.iter().any(|f| f == "*") {
Setting::Reset
} else {
Setting::Set(fields)
}
}
otherwise => otherwise,
};
let searchable_attributes = match self.searchable_attributes {
Setting::Set(fields) => {
if fields.iter().any(|f| f == "*") {
Setting::Reset
} else {
Setting::Set(fields)
}
}
otherwise => otherwise,
};
Settings {
displayed_attributes,
searchable_attributes,
filterable_attributes: self.filterable_attributes,
sortable_attributes: self.sortable_attributes,
ranking_rules: self.ranking_rules,
stop_words: self.stop_words,
synonyms: self.synonyms,
distinct_attribute: self.distinct_attribute,
_kind: PhantomData,
}
}
}
#[derive(Debug, Clone, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct Facets {
pub level_group_size: Option<NonZeroUsize>,
pub min_level_size: Option<NonZeroUsize>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Setting<T> {
Set(T),
Reset,
NotSet,
}
impl<T> Default for Setting<T> {
fn default() -> Self {
Self::NotSet
}
}
impl<T> Setting<T> {
pub fn map<U, F>(self, f: F) -> Setting<U>
where
F: FnOnce(T) -> U,
{
match self {
Setting::Set(t) => Setting::Set(f(t)),
Setting::Reset => Setting::Reset,
Setting::NotSet => Setting::NotSet,
}
}
pub fn set(self) -> Option<T> {
match self {
Self::Set(value) => Some(value),
_ => None,
}
}
pub const fn as_ref(&self) -> Setting<&T> {
match *self {
Self::Set(ref value) => Setting::Set(value),
Self::Reset => Setting::Reset,
Self::NotSet => Setting::NotSet,
}
}
pub const fn is_not_set(&self) -> bool {
matches!(self, Self::NotSet)
}
}
#[cfg(test)]
impl<T: serde::Serialize> serde::Serialize for Setting<T> {
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
match self {
Self::Set(value) => Some(value),
// Usually not_set isn't serialized by setting skip_serializing_if field attribute
Self::NotSet | Self::Reset => None,
}
.serialize(serializer)
}
}
impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting<T> {
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
where
D: Deserializer<'de>,
{
Deserialize::deserialize(deserializer).map(|x| match x {
Some(x) => Self::Set(x),
None => Self::Reset, // Reset is forced by sending null value
})
}
}

View File

@ -0,0 +1,227 @@
use std::fmt::Display;
use serde::Deserialize;
use time::OffsetDateTime;
use uuid::Uuid;
use super::{Code, Settings, Unchecked};
#[derive(Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct UpdateEntry {
pub uuid: Uuid,
pub update: UpdateStatus,
}
impl UpdateEntry {
pub fn is_finished(&self) -> bool {
match self.update {
UpdateStatus::Processed(_) | UpdateStatus::Aborted(_) | UpdateStatus::Failed(_) => true,
UpdateStatus::Processing(_) | UpdateStatus::Enqueued(_) => false,
}
}
pub fn get_content_uuid(&self) -> Option<&Uuid> {
match self.update.meta() {
Update::DocumentAddition { content_uuid, .. } => Some(content_uuid),
Update::DeleteDocuments(_) | Update::Settings(_) | Update::ClearDocuments => None,
}
}
}
#[derive(Debug, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(tag = "status", rename_all = "camelCase")]
pub enum UpdateStatus {
Processing(Processing),
Enqueued(Enqueued),
Processed(Processed),
Aborted(Aborted),
Failed(Failed),
}
impl UpdateStatus {
pub fn id(&self) -> u64 {
match self {
UpdateStatus::Processing(u) => u.id(),
UpdateStatus::Enqueued(u) => u.id(),
UpdateStatus::Processed(u) => u.id(),
UpdateStatus::Aborted(u) => u.id(),
UpdateStatus::Failed(u) => u.id(),
}
}
pub fn meta(&self) -> &Update {
match self {
UpdateStatus::Processing(u) => u.meta(),
UpdateStatus::Enqueued(u) => u.meta(),
UpdateStatus::Processed(u) => u.meta(),
UpdateStatus::Aborted(u) => u.meta(),
UpdateStatus::Failed(u) => u.meta(),
}
}
pub fn is_finished(&self) -> bool {
match self {
UpdateStatus::Processing(_) | UpdateStatus::Enqueued(_) => false,
UpdateStatus::Aborted(_) | UpdateStatus::Failed(_) | UpdateStatus::Processed(_) => true,
}
}
pub fn processed(&self) -> Option<&Processed> {
match self {
UpdateStatus::Processed(p) => Some(p),
_ => None,
}
}
}
#[derive(Debug, Deserialize, Clone)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(rename_all = "camelCase")]
pub struct Enqueued {
pub update_id: u64,
pub meta: Update,
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
}
impl Enqueued {
pub fn meta(&self) -> &Update {
&self.meta
}
pub fn id(&self) -> u64 {
self.update_id
}
}
#[derive(Debug, Deserialize, Clone)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(rename_all = "camelCase")]
pub struct Processed {
pub success: UpdateResult,
#[serde(with = "time::serde::rfc3339")]
pub processed_at: OffsetDateTime,
#[serde(flatten)]
pub from: Processing,
}
impl Processed {
pub fn id(&self) -> u64 {
self.from.id()
}
pub fn meta(&self) -> &Update {
self.from.meta()
}
}
#[derive(Debug, Deserialize, Clone)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(rename_all = "camelCase")]
pub struct Processing {
#[serde(flatten)]
pub from: Enqueued,
#[serde(with = "time::serde::rfc3339")]
pub started_processing_at: OffsetDateTime,
}
impl Processing {
pub fn id(&self) -> u64 {
self.from.id()
}
pub fn meta(&self) -> &Update {
self.from.meta()
}
}
#[derive(Debug, Deserialize, Clone)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(rename_all = "camelCase")]
pub struct Aborted {
#[serde(flatten)]
pub from: Enqueued,
#[serde(with = "time::serde::rfc3339")]
pub aborted_at: OffsetDateTime,
}
impl Aborted {
pub fn id(&self) -> u64 {
self.from.id()
}
pub fn meta(&self) -> &Update {
self.from.meta()
}
}
#[derive(Debug, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(rename_all = "camelCase")]
pub struct Failed {
#[serde(flatten)]
pub from: Processing,
pub msg: String,
pub code: Code,
#[serde(with = "time::serde::rfc3339")]
pub failed_at: OffsetDateTime,
}
impl Display for Failed {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.msg.fmt(f)
}
}
impl Failed {
pub fn id(&self) -> u64 {
self.from.id()
}
pub fn meta(&self) -> &Update {
self.from.meta()
}
}
#[allow(clippy::large_enum_variant)]
#[derive(Debug, Clone, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum Update {
DeleteDocuments(Vec<String>),
DocumentAddition {
primary_key: Option<String>,
method: IndexDocumentsMethod,
content_uuid: Uuid,
},
Settings(Settings<Unchecked>),
ClearDocuments,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
#[non_exhaustive]
pub enum IndexDocumentsMethod {
/// Replace the previous document with the new one,
/// removing all the already known attributes.
ReplaceDocuments,
/// Merge the previous version of the document with the new version,
/// replacing old attributes values with the new ones and add the new attributes.
UpdateDocuments,
}
#[derive(Debug, Clone, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum UpdateResult {
DocumentsAddition(DocumentAdditionResult),
DocumentDeletion { deleted: u64 },
Other,
}
#[derive(Debug, Deserialize, Clone)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct DocumentAdditionResult {
pub nb_documents: usize,
}

View File

@ -0,0 +1,311 @@
use std::fmt;
use http::StatusCode;
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
#[cfg_attr(feature = "test-traits", derive(proptest_derive::Arbitrary))]
pub struct ResponseError {
#[serde(skip)]
#[cfg_attr(feature = "test-traits", proptest(strategy = "strategy::status_code_strategy()"))]
pub code: StatusCode,
pub message: String,
#[serde(rename = "code")]
pub error_code: String,
#[serde(rename = "type")]
pub error_type: String,
#[serde(rename = "link")]
pub error_link: String,
}
impl ResponseError {
pub fn from_msg(message: String, code: Code) -> Self {
Self {
code: code.http(),
message,
error_code: code.err_code().error_name.to_string(),
error_type: code.type_(),
error_link: code.url(),
}
}
}
impl fmt::Display for ResponseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.message.fmt(f)
}
}
impl std::error::Error for ResponseError {}
impl<T> From<T> for ResponseError
where
T: ErrorCode,
{
fn from(other: T) -> Self {
Self {
code: other.http_status(),
message: other.to_string(),
error_code: other.error_name(),
error_type: other.error_type(),
error_link: other.error_url(),
}
}
}
pub trait ErrorCode: std::error::Error {
fn error_code(&self) -> Code;
/// returns the HTTP status code ascociated with the error
fn http_status(&self) -> StatusCode {
self.error_code().http()
}
/// returns the doc url ascociated with the error
fn error_url(&self) -> String {
self.error_code().url()
}
/// returns error name, used as error code
fn error_name(&self) -> String {
self.error_code().name()
}
/// return the error type
fn error_type(&self) -> String {
self.error_code().type_()
}
}
#[allow(clippy::enum_variant_names)]
enum ErrorType {
InternalError,
InvalidRequestError,
AuthenticationError,
}
impl fmt::Display for ErrorType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use ErrorType::*;
match self {
InternalError => write!(f, "internal"),
InvalidRequestError => write!(f, "invalid_request"),
AuthenticationError => write!(f, "auth"),
}
}
}
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
pub enum Code {
// index related error
CreateIndex,
IndexAlreadyExists,
IndexNotFound,
InvalidIndexUid,
InvalidMinWordLengthForTypo,
// invalid state error
InvalidState,
MissingPrimaryKey,
PrimaryKeyAlreadyPresent,
MaxFieldsLimitExceeded,
MissingDocumentId,
InvalidDocumentId,
Filter,
Sort,
BadParameter,
BadRequest,
DatabaseSizeLimitReached,
DocumentNotFound,
Internal,
InvalidGeoField,
InvalidRankingRule,
InvalidStore,
InvalidToken,
MissingAuthorizationHeader,
NoSpaceLeftOnDevice,
DumpNotFound,
TaskNotFound,
PayloadTooLarge,
RetrieveDocument,
SearchDocuments,
UnsupportedMediaType,
DumpAlreadyInProgress,
DumpProcessFailed,
InvalidContentType,
MissingContentType,
MalformedPayload,
MissingPayload,
ApiKeyNotFound,
MissingParameter,
InvalidApiKeyActions,
InvalidApiKeyIndexes,
InvalidApiKeyExpiresAt,
InvalidApiKeyDescription,
UnretrievableErrorCode,
MalformedDump,
}
impl Code {
/// ascociate a `Code` variant to the actual ErrCode
fn err_code(&self) -> ErrCode {
use Code::*;
match self {
// index related errors
// create index is thrown on internal error while creating an index.
CreateIndex => {
ErrCode::internal("index_creation_failed", StatusCode::INTERNAL_SERVER_ERROR)
}
IndexAlreadyExists => ErrCode::invalid("index_already_exists", StatusCode::CONFLICT),
// thrown when requesting an unexisting index
IndexNotFound => ErrCode::invalid("index_not_found", StatusCode::NOT_FOUND),
InvalidIndexUid => ErrCode::invalid("invalid_index_uid", StatusCode::BAD_REQUEST),
// invalid state error
InvalidState => ErrCode::internal("invalid_state", StatusCode::INTERNAL_SERVER_ERROR),
// thrown when no primary key has been set
MissingPrimaryKey => {
ErrCode::invalid("primary_key_inference_failed", StatusCode::BAD_REQUEST)
}
// error thrown when trying to set an already existing primary key
PrimaryKeyAlreadyPresent => {
ErrCode::invalid("index_primary_key_already_exists", StatusCode::BAD_REQUEST)
}
// invalid ranking rule
InvalidRankingRule => ErrCode::invalid("invalid_ranking_rule", StatusCode::BAD_REQUEST),
// invalid database
InvalidStore => {
ErrCode::internal("invalid_store_file", StatusCode::INTERNAL_SERVER_ERROR)
}
// invalid document
MaxFieldsLimitExceeded => {
ErrCode::invalid("max_fields_limit_exceeded", StatusCode::BAD_REQUEST)
}
MissingDocumentId => ErrCode::invalid("missing_document_id", StatusCode::BAD_REQUEST),
InvalidDocumentId => ErrCode::invalid("invalid_document_id", StatusCode::BAD_REQUEST),
// error related to filters
Filter => ErrCode::invalid("invalid_filter", StatusCode::BAD_REQUEST),
// error related to sorts
Sort => ErrCode::invalid("invalid_sort", StatusCode::BAD_REQUEST),
BadParameter => ErrCode::invalid("bad_parameter", StatusCode::BAD_REQUEST),
BadRequest => ErrCode::invalid("bad_request", StatusCode::BAD_REQUEST),
DatabaseSizeLimitReached => {
ErrCode::internal("database_size_limit_reached", StatusCode::INTERNAL_SERVER_ERROR)
}
DocumentNotFound => ErrCode::invalid("document_not_found", StatusCode::NOT_FOUND),
Internal => ErrCode::internal("internal", StatusCode::INTERNAL_SERVER_ERROR),
InvalidGeoField => ErrCode::invalid("invalid_geo_field", StatusCode::BAD_REQUEST),
InvalidToken => ErrCode::authentication("invalid_api_key", StatusCode::FORBIDDEN),
MissingAuthorizationHeader => {
ErrCode::authentication("missing_authorization_header", StatusCode::UNAUTHORIZED)
}
TaskNotFound => ErrCode::invalid("task_not_found", StatusCode::NOT_FOUND),
DumpNotFound => ErrCode::invalid("dump_not_found", StatusCode::NOT_FOUND),
NoSpaceLeftOnDevice => {
ErrCode::internal("no_space_left_on_device", StatusCode::INTERNAL_SERVER_ERROR)
}
PayloadTooLarge => ErrCode::invalid("payload_too_large", StatusCode::PAYLOAD_TOO_LARGE),
RetrieveDocument => {
ErrCode::internal("unretrievable_document", StatusCode::BAD_REQUEST)
}
SearchDocuments => ErrCode::internal("search_error", StatusCode::BAD_REQUEST),
UnsupportedMediaType => {
ErrCode::invalid("unsupported_media_type", StatusCode::UNSUPPORTED_MEDIA_TYPE)
}
// error related to dump
DumpAlreadyInProgress => {
ErrCode::invalid("dump_already_processing", StatusCode::CONFLICT)
}
DumpProcessFailed => {
ErrCode::internal("dump_process_failed", StatusCode::INTERNAL_SERVER_ERROR)
}
MissingContentType => {
ErrCode::invalid("missing_content_type", StatusCode::UNSUPPORTED_MEDIA_TYPE)
}
MalformedPayload => ErrCode::invalid("malformed_payload", StatusCode::BAD_REQUEST),
InvalidContentType => {
ErrCode::invalid("invalid_content_type", StatusCode::UNSUPPORTED_MEDIA_TYPE)
}
MissingPayload => ErrCode::invalid("missing_payload", StatusCode::BAD_REQUEST),
// error related to keys
ApiKeyNotFound => ErrCode::invalid("api_key_not_found", StatusCode::NOT_FOUND),
MissingParameter => ErrCode::invalid("missing_parameter", StatusCode::BAD_REQUEST),
InvalidApiKeyActions => {
ErrCode::invalid("invalid_api_key_actions", StatusCode::BAD_REQUEST)
}
InvalidApiKeyIndexes => {
ErrCode::invalid("invalid_api_key_indexes", StatusCode::BAD_REQUEST)
}
InvalidApiKeyExpiresAt => {
ErrCode::invalid("invalid_api_key_expires_at", StatusCode::BAD_REQUEST)
}
InvalidApiKeyDescription => {
ErrCode::invalid("invalid_api_key_description", StatusCode::BAD_REQUEST)
}
InvalidMinWordLengthForTypo => {
ErrCode::invalid("invalid_min_word_length_for_typo", StatusCode::BAD_REQUEST)
}
UnretrievableErrorCode => {
ErrCode::invalid("unretrievable_error_code", StatusCode::BAD_REQUEST)
}
MalformedDump => ErrCode::invalid("malformed_dump", StatusCode::BAD_REQUEST),
}
}
/// return the HTTP status code ascociated with the `Code`
fn http(&self) -> StatusCode {
self.err_code().status_code
}
/// return error name, used as error code
fn name(&self) -> String {
self.err_code().error_name.to_string()
}
/// return the error type
fn type_(&self) -> String {
self.err_code().error_type.to_string()
}
/// return the doc url ascociated with the error
fn url(&self) -> String {
format!("https://docs.meilisearch.com/errors#{}", self.name())
}
}
/// Internal structure providing a convenient way to create error codes
struct ErrCode {
status_code: StatusCode,
error_type: ErrorType,
error_name: &'static str,
}
impl ErrCode {
fn authentication(error_name: &'static str, status_code: StatusCode) -> ErrCode {
ErrCode { status_code, error_name, error_type: ErrorType::AuthenticationError }
}
fn internal(error_name: &'static str, status_code: StatusCode) -> ErrCode {
ErrCode { status_code, error_name, error_type: ErrorType::InternalError }
}
fn invalid(error_name: &'static str, status_code: StatusCode) -> ErrCode {
ErrCode { status_code, error_name, error_type: ErrorType::InvalidRequestError }
}
}

View File

@ -0,0 +1,77 @@
use serde::Deserialize;
use time::OffsetDateTime;
pub const KEY_ID_LENGTH: usize = 8;
pub type KeyId = [u8; KEY_ID_LENGTH];
#[derive(Debug, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct Key {
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
pub id: KeyId,
pub actions: Vec<Action>,
pub indexes: Vec<String>,
#[serde(with = "time::serde::rfc3339::option")]
pub expires_at: Option<OffsetDateTime>,
#[serde(with = "time::serde::rfc3339")]
pub created_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
pub updated_at: OffsetDateTime,
}
#[derive(Copy, Clone, Deserialize, Debug, Eq, PartialEq)]
#[cfg_attr(test, derive(serde::Serialize))]
#[repr(u8)]
pub enum Action {
#[serde(rename = "*")]
All = 0,
#[serde(rename = "search")]
Search = actions::SEARCH,
#[serde(rename = "documents.add")]
DocumentsAdd = actions::DOCUMENTS_ADD,
#[serde(rename = "documents.get")]
DocumentsGet = actions::DOCUMENTS_GET,
#[serde(rename = "documents.delete")]
DocumentsDelete = actions::DOCUMENTS_DELETE,
#[serde(rename = "indexes.create")]
IndexesAdd = actions::INDEXES_CREATE,
#[serde(rename = "indexes.get")]
IndexesGet = actions::INDEXES_GET,
#[serde(rename = "indexes.update")]
IndexesUpdate = actions::INDEXES_UPDATE,
#[serde(rename = "indexes.delete")]
IndexesDelete = actions::INDEXES_DELETE,
#[serde(rename = "tasks.get")]
TasksGet = actions::TASKS_GET,
#[serde(rename = "settings.get")]
SettingsGet = actions::SETTINGS_GET,
#[serde(rename = "settings.update")]
SettingsUpdate = actions::SETTINGS_UPDATE,
#[serde(rename = "stats.get")]
StatsGet = actions::STATS_GET,
#[serde(rename = "dumps.create")]
DumpsCreate = actions::DUMPS_CREATE,
#[serde(rename = "dumps.get")]
DumpsGet = actions::DUMPS_GET,
#[serde(rename = "version")]
Version = actions::VERSION,
}
pub mod actions {
pub const SEARCH: u8 = 1;
pub const DOCUMENTS_ADD: u8 = 2;
pub const DOCUMENTS_GET: u8 = 3;
pub const DOCUMENTS_DELETE: u8 = 4;
pub const INDEXES_CREATE: u8 = 5;
pub const INDEXES_GET: u8 = 6;
pub const INDEXES_UPDATE: u8 = 7;
pub const INDEXES_DELETE: u8 = 8;
pub const TASKS_GET: u8 = 9;
pub const SETTINGS_GET: u8 = 10;
pub const SETTINGS_UPDATE: u8 = 11;
pub const STATS_GET: u8 = 12;
pub const DUMPS_CREATE: u8 = 13;
pub const DUMPS_GET: u8 = 14;
pub const VERSION: u8 = 15;
}

139
dump/src/reader/v4/meta.rs Normal file
View File

@ -0,0 +1,139 @@
use std::fmt::{self, Display, Formatter};
use std::marker::PhantomData;
use std::str::FromStr;
use serde::de::Visitor;
use serde::{Deserialize, Deserializer};
use uuid::Uuid;
use super::settings::{Settings, Unchecked};
#[derive(Deserialize, Debug)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct IndexUuid {
pub uid: String,
pub index_meta: IndexMeta,
}
#[derive(Deserialize, Debug)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct IndexMeta {
pub uuid: Uuid,
pub creation_task_id: usize,
}
// There is one in each indexes under `meta.json`.
#[derive(Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct DumpMeta {
pub settings: Settings<Unchecked>,
pub primary_key: Option<String>,
}
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct IndexUid(pub String);
impl TryFrom<String> for IndexUid {
type Error = IndexUidFormatError;
fn try_from(uid: String) -> Result<Self, Self::Error> {
if !uid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_')
|| uid.is_empty()
|| uid.len() > 400
{
Err(IndexUidFormatError { invalid_uid: uid })
} else {
Ok(IndexUid(uid))
}
}
}
impl FromStr for IndexUid {
type Err = IndexUidFormatError;
fn from_str(uid: &str) -> Result<IndexUid, IndexUidFormatError> {
uid.to_string().try_into()
}
}
impl From<IndexUid> for String {
fn from(uid: IndexUid) -> Self {
uid.into_inner()
}
}
#[derive(Debug)]
pub struct IndexUidFormatError {
pub invalid_uid: String,
}
impl Display for IndexUidFormatError {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(
f,
"invalid index uid `{}`, the uid must be an integer \
or a string containing only alphanumeric characters \
a-z A-Z 0-9, hyphens - and underscores _.",
self.invalid_uid,
)
}
}
impl std::error::Error for IndexUidFormatError {}
/// A type that tries to match either a star (*) or
/// any other thing that implements `FromStr`.
#[derive(Debug)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum StarOr<T> {
Star,
Other(T),
}
impl<'de, T, E> Deserialize<'de> for StarOr<T>
where
T: FromStr<Err = E>,
E: Display,
{
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
/// Serde can't differentiate between `StarOr::Star` and `StarOr::Other` without a tag.
/// Simply using `#[serde(untagged)]` + `#[serde(rename="*")]` will lead to attempting to
/// deserialize everything as a `StarOr::Other`, including "*".
/// [`#[serde(other)]`](https://serde.rs/variant-attrs.html#other) might have helped but is
/// not supported on untagged enums.
struct StarOrVisitor<T>(PhantomData<T>);
impl<'de, T, FE> Visitor<'de> for StarOrVisitor<T>
where
T: FromStr<Err = FE>,
FE: Display,
{
type Value = StarOr<T>;
fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result {
formatter.write_str("a string")
}
fn visit_str<SE>(self, v: &str) -> Result<Self::Value, SE>
where
SE: serde::de::Error,
{
match v {
"*" => Ok(StarOr::Star),
v => {
let other = FromStr::from_str(v).map_err(|e: T::Err| {
SE::custom(format!("Invalid `other` value: {}", e))
})?;
Ok(StarOr::Other(other))
}
}
}
}
deserializer.deserialize_str(StarOrVisitor(PhantomData))
}
}

302
dump/src/reader/v4/mod.rs Normal file
View File

@ -0,0 +1,302 @@
use std::fs::{self, File};
use std::io::{BufRead, BufReader};
use std::path::Path;
use serde::{Deserialize, Serialize};
use tempfile::TempDir;
use time::OffsetDateTime;
use uuid::Uuid;
pub mod errors;
pub mod keys;
pub mod meta;
pub mod settings;
pub mod tasks;
use self::meta::{DumpMeta, IndexUuid};
use super::compat::v4_to_v5::CompatV4ToV5;
use crate::{Error, IndexMetadata, Result, Version};
pub type Document = serde_json::Map<String, serde_json::Value>;
pub type Settings<T> = settings::Settings<T>;
pub type Checked = settings::Checked;
pub type Unchecked = settings::Unchecked;
pub type Task = tasks::Task;
pub type Key = keys::Key;
// everything related to the settings
pub type Setting<T> = settings::Setting<T>;
// everything related to the api keys
pub type Action = keys::Action;
// everything related to the errors
pub type ResponseError = errors::ResponseError;
pub type Code = errors::Code;
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct Metadata {
db_version: String,
index_db_size: usize,
update_db_size: usize,
#[serde(with = "time::serde::rfc3339")]
dump_date: OffsetDateTime,
}
pub struct V4Reader {
dump: TempDir,
metadata: Metadata,
tasks: BufReader<File>,
keys: BufReader<File>,
index_uuid: Vec<IndexUuid>,
}
impl V4Reader {
pub fn open(dump: TempDir) -> Result<Self> {
let meta_file = fs::read(dump.path().join("metadata.json"))?;
let metadata = serde_json::from_reader(&*meta_file)?;
let index_uuid = File::open(dump.path().join("index_uuids/data.jsonl"))?;
let index_uuid = BufReader::new(index_uuid);
let index_uuid = index_uuid
.lines()
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })
.collect::<Result<Vec<_>>>()?;
Ok(V4Reader {
metadata,
tasks: BufReader::new(
File::open(dump.path().join("updates").join("data.jsonl")).unwrap(),
),
keys: BufReader::new(File::open(dump.path().join("keys"))?),
index_uuid,
dump,
})
}
pub fn to_v5(self) -> CompatV4ToV5 {
CompatV4ToV5::new(self)
}
pub fn version(&self) -> Version {
Version::V4
}
pub fn date(&self) -> Option<OffsetDateTime> {
Some(self.metadata.dump_date)
}
pub fn instance_uid(&self) -> Result<Option<Uuid>> {
let uuid = fs::read_to_string(self.dump.path().join("instance-uid"))?;
Ok(Some(Uuid::parse_str(&uuid)?))
}
pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V4IndexReader>> + '_> {
Ok(self.index_uuid.iter().map(|index| -> Result<_> {
Ok(V4IndexReader::new(
index.uid.clone(),
&self.dump.path().join("indexes").join(index.index_meta.uuid.to_string()),
)?)
}))
}
pub fn tasks(
&mut self,
) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> {
Box::new((&mut self.tasks).lines().map(|line| -> Result<_> {
let task: Task = serde_json::from_str(&line?)?;
if !task.is_finished() {
if let Some(uuid) = task.get_content_uuid() {
let update_file_path = self
.dump
.path()
.join("updates")
.join("updates_files")
.join(uuid.to_string());
Ok((
task,
Some(
Box::new(UpdateFile::new(&update_file_path)?) as Box<super::UpdateFile>
),
))
} else {
Ok((task, None))
}
} else {
Ok((task, None))
}
}))
}
pub fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Key>> + '_> {
Box::new(
(&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }),
)
}
}
pub struct V4IndexReader {
metadata: IndexMetadata,
settings: Settings<Checked>,
documents: BufReader<File>,
}
impl V4IndexReader {
pub fn new(name: String, path: &Path) -> Result<Self> {
let meta = File::open(path.join("meta.json"))?;
let meta: DumpMeta = serde_json::from_reader(meta)?;
let metadata = IndexMetadata {
uid: name,
primary_key: meta.primary_key,
// FIXME: Iterate over the whole task queue to find the creation and last update date.
created_at: OffsetDateTime::now_utc(),
updated_at: OffsetDateTime::now_utc(),
};
let ret = V4IndexReader {
metadata,
settings: meta.settings.check(),
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
};
Ok(ret)
}
pub fn metadata(&self) -> &IndexMetadata {
&self.metadata
}
pub fn documents(&mut self) -> Result<impl Iterator<Item = Result<Document>> + '_> {
Ok((&mut self.documents)
.lines()
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn settings(&mut self) -> Result<Settings<Checked>> {
Ok(self.settings.clone())
}
}
pub struct UpdateFile {
reader: BufReader<File>,
}
impl UpdateFile {
fn new(path: &Path) -> Result<Self> {
Ok(UpdateFile { reader: BufReader::new(File::open(path)?) })
}
}
impl Iterator for UpdateFile {
type Item = Result<Document>;
fn next(&mut self) -> Option<Self::Item> {
(&mut self.reader)
.lines()
.map(|line| {
line.map_err(Error::from)
.and_then(|line| serde_json::from_str(&line).map_err(Error::from))
})
.next()
}
}
#[cfg(test)]
pub(crate) mod test {
use std::fs::File;
use std::io::BufReader;
use flate2::bufread::GzDecoder;
use tempfile::TempDir;
use super::*;
#[test]
fn read_dump_v4() {
let dump = File::open("tests/assets/v4.dump").unwrap();
let dir = TempDir::new().unwrap();
let mut dump = BufReader::new(dump);
let gz = GzDecoder::new(&mut dump);
let mut archive = tar::Archive::new(gz);
archive.unpack(dir.path()).unwrap();
let mut dump = V4Reader::open(dir).unwrap();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"f4efacbea0c1a4400873f4b2ee33f975");
assert_eq!(update_files.len(), 10);
assert!(update_files[0].is_some()); // the enqueued document addition
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
let update_file = update_files.remove(0).unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(update_file), @"7b8889539b669c7b9ddba448bafa385d");
// keys
let keys = dump.keys().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(keys, { "[].uid" => "[uuid]" }), @"9240300dca8f962cdf58359ef4c76f09");
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut products = indexes.pop().unwrap();
let mut movies = indexes.pop().unwrap();
let mut spells = indexes.pop().unwrap();
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"ace6546a6eb856ecb770b2409975c01d");
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca");
// movies
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"4dfa34fa34f2c03259482e1e4555faa8");
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 110);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"786022a66ecb992c8a2a60fee070a5ab");
// spells
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"1aa241a5e3afd8c85a4e7b9db42362d7");
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
}
}

View File

@ -0,0 +1,261 @@
use std::collections::{BTreeMap, BTreeSet};
use std::marker::PhantomData;
use std::num::NonZeroUsize;
use serde::{Deserialize, Deserializer};
#[cfg(test)]
fn serialize_with_wildcard<S>(
field: &Setting<Vec<String>>,
s: S,
) -> std::result::Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
use serde::Serialize;
let wildcard = vec!["*".to_string()];
match field {
Setting::Set(value) => Some(value),
Setting::Reset => Some(&wildcard),
Setting::NotSet => None,
}
.serialize(s)
}
#[derive(Clone, Default, Debug, PartialEq)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct Checked;
#[derive(Clone, Default, Debug, Deserialize, PartialEq)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct Unchecked;
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct MinWordSizeTyposSetting {
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub one_typo: Setting<u8>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub two_typos: Setting<u8>,
}
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct TypoSettings {
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub enabled: Setting<bool>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub min_word_size_for_typos: Setting<MinWordSizeTyposSetting>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub disable_on_words: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub disable_on_attributes: Setting<BTreeSet<String>>,
}
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
/// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
#[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: Deserialize<'static>"))]
pub struct Settings<T> {
#[serde(
default,
serialize_with = "serialize_with_wildcard",
skip_serializing_if = "Setting::is_not_set"
)]
pub displayed_attributes: Setting<Vec<String>>,
#[serde(
default,
serialize_with = "serialize_with_wildcard",
skip_serializing_if = "Setting::is_not_set"
)]
pub searchable_attributes: Setting<Vec<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub filterable_attributes: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub sortable_attributes: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub ranking_rules: Setting<Vec<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub stop_words: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub distinct_attribute: Setting<String>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub typo_tolerance: Setting<TypoSettings>,
#[serde(skip)]
pub _kind: PhantomData<T>,
}
impl Settings<Checked> {
pub fn cleared() -> Settings<Checked> {
Settings {
displayed_attributes: Setting::Reset,
searchable_attributes: Setting::Reset,
filterable_attributes: Setting::Reset,
sortable_attributes: Setting::Reset,
ranking_rules: Setting::Reset,
stop_words: Setting::Reset,
synonyms: Setting::Reset,
distinct_attribute: Setting::Reset,
typo_tolerance: Setting::Reset,
_kind: PhantomData,
}
}
pub fn into_unchecked(self) -> Settings<Unchecked> {
let Self {
displayed_attributes,
searchable_attributes,
filterable_attributes,
sortable_attributes,
ranking_rules,
stop_words,
synonyms,
distinct_attribute,
typo_tolerance,
..
} = self;
Settings {
displayed_attributes,
searchable_attributes,
filterable_attributes,
sortable_attributes,
ranking_rules,
stop_words,
synonyms,
distinct_attribute,
typo_tolerance,
_kind: PhantomData,
}
}
}
impl Settings<Unchecked> {
pub fn check(self) -> Settings<Checked> {
let displayed_attributes = match self.displayed_attributes {
Setting::Set(fields) => {
if fields.iter().any(|f| f == "*") {
Setting::Reset
} else {
Setting::Set(fields)
}
}
otherwise => otherwise,
};
let searchable_attributes = match self.searchable_attributes {
Setting::Set(fields) => {
if fields.iter().any(|f| f == "*") {
Setting::Reset
} else {
Setting::Set(fields)
}
}
otherwise => otherwise,
};
Settings {
displayed_attributes,
searchable_attributes,
filterable_attributes: self.filterable_attributes,
sortable_attributes: self.sortable_attributes,
ranking_rules: self.ranking_rules,
stop_words: self.stop_words,
synonyms: self.synonyms,
distinct_attribute: self.distinct_attribute,
typo_tolerance: self.typo_tolerance,
_kind: PhantomData,
}
}
}
#[derive(Debug, Clone, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct Facets {
pub level_group_size: Option<NonZeroUsize>,
pub min_level_size: Option<NonZeroUsize>,
}
#[derive(Debug, Clone, PartialEq, Copy)]
pub enum Setting<T> {
Set(T),
Reset,
NotSet,
}
impl<T> Default for Setting<T> {
fn default() -> Self {
Self::NotSet
}
}
impl<T> Setting<T> {
pub fn set(self) -> Option<T> {
match self {
Self::Set(value) => Some(value),
_ => None,
}
}
pub const fn as_ref(&self) -> Setting<&T> {
match *self {
Self::Set(ref value) => Setting::Set(value),
Self::Reset => Setting::Reset,
Self::NotSet => Setting::NotSet,
}
}
pub const fn is_not_set(&self) -> bool {
matches!(self, Self::NotSet)
}
/// If `Self` is `Reset`, then map self to `Set` with the provided `val`.
pub fn or_reset(self, val: T) -> Self {
match self {
Self::Reset => Self::Set(val),
otherwise => otherwise,
}
}
}
#[cfg(test)]
impl<T: serde::Serialize> serde::Serialize for Setting<T> {
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
match self {
Self::Set(value) => Some(value),
// Usually not_set isn't serialized by setting skip_serializing_if field attribute
Self::NotSet | Self::Reset => None,
}
.serialize(serializer)
}
}
impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting<T> {
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
where
D: Deserializer<'de>,
{
Deserialize::deserialize(deserializer).map(|x| match x {
Some(x) => Self::Set(x),
None => Self::Reset, // Reset is forced by sending null value
})
}
}

135
dump/src/reader/v4/tasks.rs Normal file
View File

@ -0,0 +1,135 @@
use serde::Deserialize;
use time::OffsetDateTime;
use uuid::Uuid;
use super::errors::ResponseError;
use super::meta::IndexUid;
use super::settings::{Settings, Unchecked};
pub type TaskId = u32;
pub type BatchId = u32;
#[derive(Clone, Debug, Deserialize, PartialEq)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct Task {
pub id: TaskId,
pub index_uid: IndexUid,
pub content: TaskContent,
pub events: Vec<TaskEvent>,
}
#[derive(Clone, Debug, Deserialize, PartialEq)]
#[cfg_attr(test, derive(serde::Serialize))]
#[allow(clippy::large_enum_variant)]
pub enum TaskContent {
DocumentAddition {
content_uuid: Uuid,
merge_strategy: IndexDocumentsMethod,
primary_key: Option<String>,
documents_count: usize,
allow_index_creation: bool,
},
DocumentDeletion(DocumentDeletion),
SettingsUpdate {
settings: Settings<Unchecked>,
/// Indicates whether the task was a deletion
is_deletion: bool,
allow_index_creation: bool,
},
IndexDeletion,
IndexCreation {
primary_key: Option<String>,
},
IndexUpdate {
primary_key: Option<String>,
},
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum IndexDocumentsMethod {
/// Replace the previous document with the new one,
/// removing all the already known attributes.
ReplaceDocuments,
/// Merge the previous version of the document with the new version,
/// replacing old attributes values with the new ones and add the new attributes.
UpdateDocuments,
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum DocumentDeletion {
Clear,
Ids(Vec<String>),
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum TaskEvent {
Created(#[serde(with = "time::serde::rfc3339")] OffsetDateTime),
Batched {
#[serde(with = "time::serde::rfc3339")]
timestamp: OffsetDateTime,
batch_id: BatchId,
},
Processing(#[serde(with = "time::serde::rfc3339")] OffsetDateTime),
Succeded {
result: TaskResult,
#[serde(with = "time::serde::rfc3339")]
timestamp: OffsetDateTime,
},
Failed {
error: ResponseError,
#[serde(with = "time::serde::rfc3339")]
timestamp: OffsetDateTime,
},
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum TaskResult {
DocumentAddition { indexed_documents: u64 },
DocumentDeletion { deleted_documents: u64 },
ClearAll { deleted_documents: u64 },
Other,
}
impl Task {
/// Return true when a task is finished.
/// A task is finished when its last state is either `Succeeded` or `Failed`.
pub fn is_finished(&self) -> bool {
self.events.last().map_or(false, |event| {
matches!(event, TaskEvent::Succeded { .. } | TaskEvent::Failed { .. })
})
}
/// Return the content_uuid of the `Task` if there is one.
pub fn get_content_uuid(&self) -> Option<Uuid> {
match self {
Task { content: TaskContent::DocumentAddition { content_uuid, .. }, .. } => {
Some(*content_uuid)
}
_ => None,
}
}
}
impl IndexUid {
pub fn into_inner(self) -> String {
self.0
}
/// Return a reference over the inner str.
pub fn as_str(&self) -> &str {
&self.0
}
}
impl std::ops::Deref for IndexUid {
type Target = str;
fn deref(&self) -> &Self::Target {
&self.0
}
}

View File

@ -0,0 +1,272 @@
use std::fmt;
use http::StatusCode;
use serde::Deserialize;
#[derive(Debug, Deserialize, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
#[cfg_attr(feature = "test-traits", derive(proptest_derive::Arbitrary))]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct ResponseError {
#[serde(skip)]
code: StatusCode,
pub message: String,
#[serde(rename = "code")]
pub error_code: String,
#[serde(rename = "type")]
pub error_type: String,
#[serde(rename = "link")]
pub error_link: String,
}
impl ResponseError {
pub fn from_msg(message: String, code: Code) -> Self {
Self {
code: code.http(),
message,
error_code: code.err_code().error_name.to_string(),
error_type: code.type_(),
error_link: code.url(),
}
}
}
#[derive(Deserialize, Debug, Clone, Copy)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum Code {
// index related error
CreateIndex,
IndexAlreadyExists,
IndexNotFound,
InvalidIndexUid,
InvalidMinWordLengthForTypo,
// invalid state error
InvalidState,
MissingPrimaryKey,
PrimaryKeyAlreadyPresent,
MaxFieldsLimitExceeded,
MissingDocumentId,
InvalidDocumentId,
Filter,
Sort,
BadParameter,
BadRequest,
DatabaseSizeLimitReached,
DocumentNotFound,
Internal,
InvalidGeoField,
InvalidRankingRule,
InvalidStore,
InvalidToken,
MissingAuthorizationHeader,
NoSpaceLeftOnDevice,
DumpNotFound,
TaskNotFound,
PayloadTooLarge,
RetrieveDocument,
SearchDocuments,
UnsupportedMediaType,
DumpAlreadyInProgress,
DumpProcessFailed,
InvalidContentType,
MissingContentType,
MalformedPayload,
MissingPayload,
ApiKeyNotFound,
MissingParameter,
InvalidApiKeyActions,
InvalidApiKeyIndexes,
InvalidApiKeyExpiresAt,
InvalidApiKeyDescription,
InvalidApiKeyName,
InvalidApiKeyUid,
ImmutableField,
ApiKeyAlreadyExists,
UnretrievableErrorCode,
}
impl Code {
/// associate a `Code` variant to the actual ErrCode
fn err_code(&self) -> ErrCode {
use Code::*;
match self {
// index related errors
// create index is thrown on internal error while creating an index.
CreateIndex => {
ErrCode::internal("index_creation_failed", StatusCode::INTERNAL_SERVER_ERROR)
}
IndexAlreadyExists => ErrCode::invalid("index_already_exists", StatusCode::CONFLICT),
// thrown when requesting an unexisting index
IndexNotFound => ErrCode::invalid("index_not_found", StatusCode::NOT_FOUND),
InvalidIndexUid => ErrCode::invalid("invalid_index_uid", StatusCode::BAD_REQUEST),
// invalid state error
InvalidState => ErrCode::internal("invalid_state", StatusCode::INTERNAL_SERVER_ERROR),
// thrown when no primary key has been set
MissingPrimaryKey => {
ErrCode::invalid("primary_key_inference_failed", StatusCode::BAD_REQUEST)
}
// error thrown when trying to set an already existing primary key
PrimaryKeyAlreadyPresent => {
ErrCode::invalid("index_primary_key_already_exists", StatusCode::BAD_REQUEST)
}
// invalid ranking rule
InvalidRankingRule => ErrCode::invalid("invalid_ranking_rule", StatusCode::BAD_REQUEST),
// invalid database
InvalidStore => {
ErrCode::internal("invalid_store_file", StatusCode::INTERNAL_SERVER_ERROR)
}
// invalid document
MaxFieldsLimitExceeded => {
ErrCode::invalid("max_fields_limit_exceeded", StatusCode::BAD_REQUEST)
}
MissingDocumentId => ErrCode::invalid("missing_document_id", StatusCode::BAD_REQUEST),
InvalidDocumentId => ErrCode::invalid("invalid_document_id", StatusCode::BAD_REQUEST),
// error related to filters
Filter => ErrCode::invalid("invalid_filter", StatusCode::BAD_REQUEST),
// error related to sorts
Sort => ErrCode::invalid("invalid_sort", StatusCode::BAD_REQUEST),
BadParameter => ErrCode::invalid("bad_parameter", StatusCode::BAD_REQUEST),
BadRequest => ErrCode::invalid("bad_request", StatusCode::BAD_REQUEST),
DatabaseSizeLimitReached => {
ErrCode::internal("database_size_limit_reached", StatusCode::INTERNAL_SERVER_ERROR)
}
DocumentNotFound => ErrCode::invalid("document_not_found", StatusCode::NOT_FOUND),
Internal => ErrCode::internal("internal", StatusCode::INTERNAL_SERVER_ERROR),
InvalidGeoField => ErrCode::invalid("invalid_geo_field", StatusCode::BAD_REQUEST),
InvalidToken => ErrCode::authentication("invalid_api_key", StatusCode::FORBIDDEN),
MissingAuthorizationHeader => {
ErrCode::authentication("missing_authorization_header", StatusCode::UNAUTHORIZED)
}
TaskNotFound => ErrCode::invalid("task_not_found", StatusCode::NOT_FOUND),
DumpNotFound => ErrCode::invalid("dump_not_found", StatusCode::NOT_FOUND),
NoSpaceLeftOnDevice => {
ErrCode::internal("no_space_left_on_device", StatusCode::INTERNAL_SERVER_ERROR)
}
PayloadTooLarge => ErrCode::invalid("payload_too_large", StatusCode::PAYLOAD_TOO_LARGE),
RetrieveDocument => {
ErrCode::internal("unretrievable_document", StatusCode::BAD_REQUEST)
}
SearchDocuments => ErrCode::internal("search_error", StatusCode::BAD_REQUEST),
UnsupportedMediaType => {
ErrCode::invalid("unsupported_media_type", StatusCode::UNSUPPORTED_MEDIA_TYPE)
}
// error related to dump
DumpAlreadyInProgress => {
ErrCode::invalid("dump_already_processing", StatusCode::CONFLICT)
}
DumpProcessFailed => {
ErrCode::internal("dump_process_failed", StatusCode::INTERNAL_SERVER_ERROR)
}
MissingContentType => {
ErrCode::invalid("missing_content_type", StatusCode::UNSUPPORTED_MEDIA_TYPE)
}
MalformedPayload => ErrCode::invalid("malformed_payload", StatusCode::BAD_REQUEST),
InvalidContentType => {
ErrCode::invalid("invalid_content_type", StatusCode::UNSUPPORTED_MEDIA_TYPE)
}
MissingPayload => ErrCode::invalid("missing_payload", StatusCode::BAD_REQUEST),
// error related to keys
ApiKeyNotFound => ErrCode::invalid("api_key_not_found", StatusCode::NOT_FOUND),
MissingParameter => ErrCode::invalid("missing_parameter", StatusCode::BAD_REQUEST),
InvalidApiKeyActions => {
ErrCode::invalid("invalid_api_key_actions", StatusCode::BAD_REQUEST)
}
InvalidApiKeyIndexes => {
ErrCode::invalid("invalid_api_key_indexes", StatusCode::BAD_REQUEST)
}
InvalidApiKeyExpiresAt => {
ErrCode::invalid("invalid_api_key_expires_at", StatusCode::BAD_REQUEST)
}
InvalidApiKeyDescription => {
ErrCode::invalid("invalid_api_key_description", StatusCode::BAD_REQUEST)
}
InvalidApiKeyName => ErrCode::invalid("invalid_api_key_name", StatusCode::BAD_REQUEST),
InvalidApiKeyUid => ErrCode::invalid("invalid_api_key_uid", StatusCode::BAD_REQUEST),
ApiKeyAlreadyExists => ErrCode::invalid("api_key_already_exists", StatusCode::CONFLICT),
ImmutableField => ErrCode::invalid("immutable_field", StatusCode::BAD_REQUEST),
InvalidMinWordLengthForTypo => {
ErrCode::invalid("invalid_min_word_length_for_typo", StatusCode::BAD_REQUEST)
}
UnretrievableErrorCode => {
ErrCode::invalid("unretrievable_error_code", StatusCode::BAD_REQUEST)
}
}
}
/// return the HTTP status code associated with the `Code`
fn http(&self) -> StatusCode {
self.err_code().status_code
}
/// return error name, used as error code
fn name(&self) -> String {
self.err_code().error_name.to_string()
}
/// return the error type
fn type_(&self) -> String {
self.err_code().error_type.to_string()
}
/// return the doc url associated with the error
fn url(&self) -> String {
format!("https://docs.meilisearch.com/errors#{}", self.name())
}
}
/// Internal structure providing a convenient way to create error codes
struct ErrCode {
status_code: StatusCode,
error_type: ErrorType,
error_name: &'static str,
}
impl ErrCode {
fn authentication(error_name: &'static str, status_code: StatusCode) -> ErrCode {
ErrCode { status_code, error_name, error_type: ErrorType::AuthenticationError }
}
fn internal(error_name: &'static str, status_code: StatusCode) -> ErrCode {
ErrCode { status_code, error_name, error_type: ErrorType::InternalError }
}
fn invalid(error_name: &'static str, status_code: StatusCode) -> ErrCode {
ErrCode { status_code, error_name, error_type: ErrorType::InvalidRequestError }
}
}
#[allow(clippy::enum_variant_names)]
enum ErrorType {
InternalError,
InvalidRequestError,
AuthenticationError,
}
impl fmt::Display for ErrorType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use ErrorType::*;
match self {
InternalError => write!(f, "internal"),
InvalidRequestError => write!(f, "invalid_request"),
AuthenticationError => write!(f, "auth"),
}
}
}

View File

@ -0,0 +1,83 @@
use serde::Deserialize;
use time::OffsetDateTime;
use uuid::Uuid;
use super::meta::{IndexUid, StarOr};
pub type KeyId = Uuid;
#[derive(Debug, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct Key {
pub description: Option<String>,
pub name: Option<String>,
pub uid: KeyId,
pub actions: Vec<Action>,
pub indexes: Vec<StarOr<IndexUid>>,
#[serde(with = "time::serde::rfc3339::option")]
pub expires_at: Option<OffsetDateTime>,
#[serde(with = "time::serde::rfc3339")]
pub created_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
pub updated_at: OffsetDateTime,
}
#[derive(Copy, Clone, Deserialize, Debug, Eq, PartialEq, Hash)]
#[cfg_attr(test, derive(serde::Serialize))]
#[repr(u8)]
pub enum Action {
#[serde(rename = "*")]
All = 0,
#[serde(rename = "search")]
Search,
#[serde(rename = "documents.*")]
DocumentsAll,
#[serde(rename = "documents.add")]
DocumentsAdd,
#[serde(rename = "documents.get")]
DocumentsGet,
#[serde(rename = "documents.delete")]
DocumentsDelete,
#[serde(rename = "indexes.*")]
IndexesAll,
#[serde(rename = "indexes.create")]
IndexesAdd,
#[serde(rename = "indexes.get")]
IndexesGet,
#[serde(rename = "indexes.update")]
IndexesUpdate,
#[serde(rename = "indexes.delete")]
IndexesDelete,
#[serde(rename = "tasks.*")]
TasksAll,
#[serde(rename = "tasks.get")]
TasksGet,
#[serde(rename = "settings.*")]
SettingsAll,
#[serde(rename = "settings.get")]
SettingsGet,
#[serde(rename = "settings.update")]
SettingsUpdate,
#[serde(rename = "stats.*")]
StatsAll,
#[serde(rename = "stats.get")]
StatsGet,
#[serde(rename = "metrics.*")]
MetricsAll,
#[serde(rename = "metrics.get")]
MetricsGet,
#[serde(rename = "dumps.*")]
DumpsAll,
#[serde(rename = "dumps.create")]
DumpsCreate,
#[serde(rename = "version")]
Version,
#[serde(rename = "keys.create")]
KeysAdd,
#[serde(rename = "keys.get")]
KeysGet,
#[serde(rename = "keys.update")]
KeysUpdate,
#[serde(rename = "keys.delete")]
KeysDelete,
}

139
dump/src/reader/v5/meta.rs Normal file
View File

@ -0,0 +1,139 @@
use std::fmt::{self, Display, Formatter};
use std::marker::PhantomData;
use std::str::FromStr;
use serde::de::Visitor;
use serde::{Deserialize, Deserializer};
use uuid::Uuid;
use super::settings::{Settings, Unchecked};
#[derive(Deserialize, Debug)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct IndexUuid {
pub uid: String,
pub index_meta: IndexMeta,
}
#[derive(Deserialize, Debug)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct IndexMeta {
pub uuid: Uuid,
pub creation_task_id: usize,
}
// There is one in each indexes under `meta.json`.
#[derive(Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct DumpMeta {
pub settings: Settings<Unchecked>,
pub primary_key: Option<String>,
}
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct IndexUid(pub String);
impl TryFrom<String> for IndexUid {
type Error = IndexUidFormatError;
fn try_from(uid: String) -> Result<Self, Self::Error> {
if !uid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_')
|| uid.is_empty()
|| uid.len() > 400
{
Err(IndexUidFormatError { invalid_uid: uid })
} else {
Ok(IndexUid(uid))
}
}
}
impl FromStr for IndexUid {
type Err = IndexUidFormatError;
fn from_str(uid: &str) -> Result<IndexUid, IndexUidFormatError> {
uid.to_string().try_into()
}
}
impl From<IndexUid> for String {
fn from(uid: IndexUid) -> Self {
uid.into_inner()
}
}
#[derive(Debug)]
pub struct IndexUidFormatError {
pub invalid_uid: String,
}
impl Display for IndexUidFormatError {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(
f,
"invalid index uid `{}`, the uid must be an integer \
or a string containing only alphanumeric characters \
a-z A-Z 0-9, hyphens - and underscores _.",
self.invalid_uid,
)
}
}
impl std::error::Error for IndexUidFormatError {}
/// A type that tries to match either a star (*) or
/// any other thing that implements `FromStr`.
#[derive(Debug)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum StarOr<T> {
Star,
Other(T),
}
impl<'de, T, E> Deserialize<'de> for StarOr<T>
where
T: FromStr<Err = E>,
E: Display,
{
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
/// Serde can't differentiate between `StarOr::Star` and `StarOr::Other` without a tag.
/// Simply using `#[serde(untagged)]` + `#[serde(rename="*")]` will lead to attempting to
/// deserialize everything as a `StarOr::Other`, including "*".
/// [`#[serde(other)]`](https://serde.rs/variant-attrs.html#other) might have helped but is
/// not supported on untagged enums.
struct StarOrVisitor<T>(PhantomData<T>);
impl<'de, T, FE> Visitor<'de> for StarOrVisitor<T>
where
T: FromStr<Err = FE>,
FE: Display,
{
type Value = StarOr<T>;
fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result {
formatter.write_str("a string")
}
fn visit_str<SE>(self, v: &str) -> Result<Self::Value, SE>
where
SE: serde::de::Error,
{
match v {
"*" => Ok(StarOr::Star),
v => {
let other = FromStr::from_str(v).map_err(|e: T::Err| {
SE::custom(format!("Invalid `other` value: {}", e))
})?;
Ok(StarOr::Other(other))
}
}
}
}
deserializer.deserialize_str(StarOrVisitor(PhantomData))
}
}

345
dump/src/reader/v5/mod.rs Normal file
View File

@ -0,0 +1,345 @@
//! Here is what a dump v5 look like.
//!
//! ```text
//! .
//! ├── indexes
//! │   ├── 22c269d8-fbbd-4416-bd46-7c7c02849325
//! │   │   ├── documents.jsonl
//! │   │   └── meta.json
//! │   ├── 6d0471ba-2ed1-41de-8ea6-10db10fa2bb8
//! │   │   ├── documents.jsonl
//! │   │   └── meta.json
//! │   └── f7d53ec4-0748-48e6-b66f-1fca9944b0fa
//! │   ├── documents.jsonl
//! │   └── meta.json
//! ├── index_uuids
//! │   └── data.jsonl
//! ├── instance-uid
//! ├── keys
//! ├── metadata.json
//! └── updates
//! ├── data.jsonl
//! └── updates_files
//! └── c83a004a-da98-4b94-b245-3256266c7281
//! ```
//!
//! Here is what `index_uuids/data.jsonl` looks like;
//!
//! ```json
//! {"uid":"dnd_spells","index_meta":{"uuid":"22c269d8-fbbd-4416-bd46-7c7c02849325","creation_task_id":9}}
//! {"uid":"movies","index_meta":{"uuid":"6d0471ba-2ed1-41de-8ea6-10db10fa2bb8","creation_task_id":1}}
//! {"uid":"products","index_meta":{"uuid":"f7d53ec4-0748-48e6-b66f-1fca9944b0fa","creation_task_id":4}}
//! ```
//!
use std::fs::{self, File};
use std::io::{BufRead, BufReader, Seek, SeekFrom};
use std::path::Path;
use serde::{Deserialize, Serialize};
use tempfile::TempDir;
use time::OffsetDateTime;
use uuid::Uuid;
use super::compat::v5_to_v6::CompatV5ToV6;
use super::Document;
use crate::{Error, IndexMetadata, Result, Version};
pub mod errors;
pub mod keys;
pub mod meta;
pub mod settings;
pub mod tasks;
pub type Settings<T> = settings::Settings<T>;
pub type Checked = settings::Checked;
pub type Unchecked = settings::Unchecked;
pub type Task = tasks::Task;
pub type Key = keys::Key;
// ===== Other types to clarify the code of the compat module
// everything related to the tasks
pub type Status = tasks::TaskStatus;
pub type Details = tasks::TaskDetails;
// everything related to the settings
pub type Setting<T> = settings::Setting<T>;
pub type TypoTolerance = settings::TypoSettings;
pub type MinWordSizeForTypos = settings::MinWordSizeTyposSetting;
// everything related to the api keys
pub type Action = keys::Action;
pub type StarOr<T> = meta::StarOr<T>;
// everything related to the errors
pub type ResponseError = errors::ResponseError;
pub type Code = errors::Code;
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct Metadata {
db_version: String,
index_db_size: usize,
update_db_size: usize,
#[serde(with = "time::serde::rfc3339")]
dump_date: OffsetDateTime,
}
pub struct V5Reader {
dump: TempDir,
metadata: Metadata,
tasks: BufReader<File>,
keys: BufReader<File>,
index_uuid: Vec<meta::IndexUuid>,
}
impl V5Reader {
pub fn open(dump: TempDir) -> Result<Self> {
let meta_file = fs::read(dump.path().join("metadata.json"))?;
let metadata = serde_json::from_reader(&*meta_file)?;
let index_uuid = File::open(dump.path().join("index_uuids/data.jsonl"))?;
let index_uuid = BufReader::new(index_uuid);
let index_uuid = index_uuid
.lines()
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })
.collect::<Result<Vec<_>>>()?;
Ok(V5Reader {
metadata,
tasks: BufReader::new(
File::open(dump.path().join("updates").join("data.jsonl")).unwrap(),
),
keys: BufReader::new(File::open(dump.path().join("keys"))?),
index_uuid,
dump,
})
}
pub fn to_v6(self) -> CompatV5ToV6 {
CompatV5ToV6::new_v5(self)
}
pub fn version(&self) -> Version {
Version::V5
}
pub fn date(&self) -> Option<OffsetDateTime> {
Some(self.metadata.dump_date)
}
pub fn instance_uid(&self) -> Result<Option<Uuid>> {
let uuid = fs::read_to_string(self.dump.path().join("instance-uid"))?;
Ok(Some(Uuid::parse_str(&uuid)?))
}
pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V5IndexReader>> + '_> {
Ok(self.index_uuid.iter().map(|index| -> Result<_> {
Ok(V5IndexReader::new(
index.uid.clone(),
&self.dump.path().join("indexes").join(index.index_meta.uuid.to_string()),
)?)
}))
}
pub fn tasks(
&mut self,
) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> {
Box::new((&mut self.tasks).lines().map(|line| -> Result<_> {
let task: Task = serde_json::from_str(&line?)?;
if !task.is_finished() {
if let Some(uuid) = task.get_content_uuid() {
let update_file_path = self
.dump
.path()
.join("updates")
.join("updates_files")
.join(uuid.to_string());
Ok((
task,
Some(
Box::new(UpdateFile::new(&update_file_path)?) as Box<super::UpdateFile>
),
))
} else {
Ok((task, None))
}
} else {
Ok((task, None))
}
}))
}
pub fn keys(&mut self) -> Result<Box<dyn Iterator<Item = Result<Key>> + '_>> {
self.keys.seek(SeekFrom::Start(0))?;
Ok(Box::new(
(&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }),
))
}
}
pub struct V5IndexReader {
metadata: IndexMetadata,
settings: Settings<Checked>,
documents: BufReader<File>,
}
impl V5IndexReader {
pub fn new(name: String, path: &Path) -> Result<Self> {
let meta = File::open(path.join("meta.json"))?;
let meta: meta::DumpMeta = serde_json::from_reader(meta)?;
let metadata = IndexMetadata {
uid: name,
primary_key: meta.primary_key,
// FIXME: Iterate over the whole task queue to find the creation and last update date.
created_at: OffsetDateTime::now_utc(),
updated_at: OffsetDateTime::now_utc(),
};
let ret = V5IndexReader {
metadata,
settings: meta.settings.check(),
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
};
Ok(ret)
}
pub fn metadata(&self) -> &IndexMetadata {
&self.metadata
}
pub fn documents(&mut self) -> Result<impl Iterator<Item = Result<Document>> + '_> {
Ok((&mut self.documents)
.lines()
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn settings(&mut self) -> Result<Settings<Checked>> {
Ok(self.settings.clone())
}
}
pub struct UpdateFile {
reader: BufReader<File>,
}
impl UpdateFile {
fn new(path: &Path) -> Result<Self> {
Ok(UpdateFile { reader: BufReader::new(File::open(path)?) })
}
}
impl Iterator for UpdateFile {
type Item = Result<Document>;
fn next(&mut self) -> Option<Self::Item> {
(&mut self.reader)
.lines()
.map(|line| {
line.map_err(Error::from)
.and_then(|line| serde_json::from_str(&line).map_err(Error::from))
})
.next()
}
}
#[cfg(test)]
pub(crate) mod test {
use std::fs::File;
use std::io::BufReader;
use flate2::bufread::GzDecoder;
use tempfile::TempDir;
use super::*;
#[test]
fn read_dump_v5() {
let dump = File::open("tests/assets/v5.dump").unwrap();
let dir = TempDir::new().unwrap();
let mut dump = BufReader::new(dump);
let gz = GzDecoder::new(&mut dump);
let mut archive = tar::Archive::new(gz);
archive.unpack(dir.path()).unwrap();
let mut dump = V5Reader::open(dir).unwrap();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"e159863f0442b2e987ce37fbd57af76b");
assert_eq!(update_files.len(), 22);
assert!(update_files[0].is_none()); // the dump creation
assert!(update_files[1].is_some()); // the enqueued document addition
assert!(update_files[2..].iter().all(|u| u.is_none())); // everything already processed
let update_file = update_files.remove(1).unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(update_file), @"7b8889539b669c7b9ddba448bafa385d");
// keys
let keys = dump.keys().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(keys), @"091ddad754f3cc7cf1d03a477855e819");
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut products = indexes.pop().unwrap();
let mut movies = indexes.pop().unwrap();
let mut spells = indexes.pop().unwrap();
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"9896a66a399c24a0f4f6a3c8563cd14a");
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca");
// movies
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"d0dc7efd1360f95fce57d7931a70b7c9");
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 200);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"e962baafd2fbae4cdd14e876053b0c5a");
// spells
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"59c8e30c2022897987ea7b4394167b06");
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
}
}

View File

@ -0,0 +1,239 @@
use std::collections::{BTreeMap, BTreeSet};
use std::marker::PhantomData;
use serde::{Deserialize, Deserializer, Serialize};
#[derive(Clone, Default, Debug, Serialize, PartialEq, Eq)]
pub struct Checked;
#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
pub struct Unchecked;
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
/// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))]
pub struct Settings<T> {
#[serde(default)]
pub displayed_attributes: Setting<Vec<String>>,
#[serde(default)]
pub searchable_attributes: Setting<Vec<String>>,
#[serde(default)]
pub filterable_attributes: Setting<BTreeSet<String>>,
#[serde(default)]
pub sortable_attributes: Setting<BTreeSet<String>>,
#[serde(default)]
pub ranking_rules: Setting<Vec<String>>,
#[serde(default)]
pub stop_words: Setting<BTreeSet<String>>,
#[serde(default)]
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
#[serde(default)]
pub distinct_attribute: Setting<String>,
#[serde(default)]
pub typo_tolerance: Setting<TypoSettings>,
#[serde(default)]
pub faceting: Setting<FacetingSettings>,
#[serde(default)]
pub pagination: Setting<PaginationSettings>,
#[serde(skip)]
pub _kind: PhantomData<T>,
}
#[derive(Debug, Clone, PartialEq, Copy)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum Setting<T> {
Set(T),
Reset,
NotSet,
}
impl<T> Default for Setting<T> {
fn default() -> Self {
Self::NotSet
}
}
impl<T> Setting<T> {
pub fn set(self) -> Option<T> {
match self {
Self::Set(value) => Some(value),
_ => None,
}
}
pub const fn as_ref(&self) -> Setting<&T> {
match *self {
Self::Set(ref value) => Setting::Set(value),
Self::Reset => Setting::Reset,
Self::NotSet => Setting::NotSet,
}
}
pub const fn is_not_set(&self) -> bool {
matches!(self, Self::NotSet)
}
/// If `Self` is `Reset`, then map self to `Set` with the provided `val`.
pub fn or_reset(self, val: T) -> Self {
match self {
Self::Reset => Self::Set(val),
otherwise => otherwise,
}
}
}
impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting<T> {
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
where
D: Deserializer<'de>,
{
Deserialize::deserialize(deserializer).map(|x| match x {
Some(x) => Self::Set(x),
None => Self::Reset, // Reset is forced by sending null value
})
}
}
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct MinWordSizeTyposSetting {
#[serde(default)]
pub one_typo: Setting<u8>,
#[serde(default)]
pub two_typos: Setting<u8>,
}
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct TypoSettings {
#[serde(default)]
pub enabled: Setting<bool>,
#[serde(default)]
pub min_word_size_for_typos: Setting<MinWordSizeTyposSetting>,
#[serde(default)]
pub disable_on_words: Setting<BTreeSet<String>>,
#[serde(default)]
pub disable_on_attributes: Setting<BTreeSet<String>>,
}
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct FacetingSettings {
#[serde(default)]
pub max_values_per_facet: Setting<usize>,
}
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct PaginationSettings {
#[serde(default)]
pub max_total_hits: Setting<usize>,
}
impl Settings<Checked> {
pub fn cleared() -> Settings<Checked> {
Settings {
displayed_attributes: Setting::Reset,
searchable_attributes: Setting::Reset,
filterable_attributes: Setting::Reset,
sortable_attributes: Setting::Reset,
ranking_rules: Setting::Reset,
stop_words: Setting::Reset,
synonyms: Setting::Reset,
distinct_attribute: Setting::Reset,
typo_tolerance: Setting::Reset,
faceting: Setting::Reset,
pagination: Setting::Reset,
_kind: PhantomData,
}
}
pub fn into_unchecked(self) -> Settings<Unchecked> {
let Self {
displayed_attributes,
searchable_attributes,
filterable_attributes,
sortable_attributes,
ranking_rules,
stop_words,
synonyms,
distinct_attribute,
typo_tolerance,
faceting,
pagination,
..
} = self;
Settings {
displayed_attributes,
searchable_attributes,
filterable_attributes,
sortable_attributes,
ranking_rules,
stop_words,
synonyms,
distinct_attribute,
typo_tolerance,
faceting,
pagination,
_kind: PhantomData,
}
}
}
impl Settings<Unchecked> {
pub fn check(self) -> Settings<Checked> {
let displayed_attributes = match self.displayed_attributes {
Setting::Set(fields) => {
if fields.iter().any(|f| f == "*") {
Setting::Reset
} else {
Setting::Set(fields)
}
}
otherwise => otherwise,
};
let searchable_attributes = match self.searchable_attributes {
Setting::Set(fields) => {
if fields.iter().any(|f| f == "*") {
Setting::Reset
} else {
Setting::Set(fields)
}
}
otherwise => otherwise,
};
Settings {
displayed_attributes,
searchable_attributes,
filterable_attributes: self.filterable_attributes,
sortable_attributes: self.sortable_attributes,
ranking_rules: self.ranking_rules,
stop_words: self.stop_words,
synonyms: self.synonyms,
distinct_attribute: self.distinct_attribute,
typo_tolerance: self.typo_tolerance,
faceting: self.faceting,
pagination: self.pagination,
_kind: PhantomData,
}
}
}

413
dump/src/reader/v5/tasks.rs Normal file
View File

@ -0,0 +1,413 @@
use serde::Deserialize;
use time::{Duration, OffsetDateTime};
use uuid::Uuid;
use super::errors::ResponseError;
use super::meta::IndexUid;
use super::settings::{Settings, Unchecked};
pub type TaskId = u32;
pub type BatchId = u32;
#[derive(Clone, Debug, Deserialize, PartialEq)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct Task {
pub id: TaskId,
/// The name of the index the task is targeting. If it isn't targeting any index (i.e Dump task)
/// then this is None
// TODO: when next forward breaking dumps, it would be a good idea to move this field inside of
// the TaskContent.
pub content: TaskContent,
pub events: Vec<TaskEvent>,
}
#[derive(Clone, Debug, Deserialize, PartialEq)]
#[cfg_attr(test, derive(serde::Serialize))]
#[allow(clippy::large_enum_variant)]
pub enum TaskContent {
DocumentAddition {
index_uid: IndexUid,
content_uuid: Uuid,
merge_strategy: IndexDocumentsMethod,
primary_key: Option<String>,
documents_count: usize,
allow_index_creation: bool,
},
DocumentDeletion {
index_uid: IndexUid,
deletion: DocumentDeletion,
},
SettingsUpdate {
index_uid: IndexUid,
settings: Settings<Unchecked>,
/// Indicates whether the task was a deletion
is_deletion: bool,
allow_index_creation: bool,
},
IndexDeletion {
index_uid: IndexUid,
},
IndexCreation {
index_uid: IndexUid,
primary_key: Option<String>,
},
IndexUpdate {
index_uid: IndexUid,
primary_key: Option<String>,
},
Dump {
uid: String,
},
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum IndexDocumentsMethod {
/// Replace the previous document with the new one,
/// removing all the already known attributes.
ReplaceDocuments,
/// Merge the previous version of the document with the new version,
/// replacing old attributes values with the new ones and add the new attributes.
UpdateDocuments,
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum DocumentDeletion {
Clear,
Ids(Vec<String>),
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum TaskEvent {
Created(#[serde(with = "time::serde::rfc3339")] OffsetDateTime),
Batched {
#[serde(with = "time::serde::rfc3339")]
timestamp: OffsetDateTime,
batch_id: BatchId,
},
Processing(#[serde(with = "time::serde::rfc3339")] OffsetDateTime),
Succeeded {
result: TaskResult,
#[serde(with = "time::serde::rfc3339")]
timestamp: OffsetDateTime,
},
Failed {
error: ResponseError,
#[serde(with = "time::serde::rfc3339")]
timestamp: OffsetDateTime,
},
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
#[cfg_attr(test, derive(serde::Serialize))]
pub enum TaskResult {
DocumentAddition { indexed_documents: u64 },
DocumentDeletion { deleted_documents: u64 },
ClearAll { deleted_documents: u64 },
Other,
}
impl Task {
/// Return true when a task is finished.
/// A task is finished when its last state is either `Succeeded` or `Failed`.
pub fn is_finished(&self) -> bool {
self.events.last().map_or(false, |event| {
matches!(event, TaskEvent::Succeeded { .. } | TaskEvent::Failed { .. })
})
}
/// Return the content_uuid of the `Task` if there is one.
pub fn get_content_uuid(&self) -> Option<Uuid> {
match self {
Task { content: TaskContent::DocumentAddition { content_uuid, .. }, .. } => {
Some(*content_uuid)
}
_ => None,
}
}
pub fn index_uid(&self) -> Option<&str> {
match &self.content {
TaskContent::DocumentAddition { index_uid, .. }
| TaskContent::DocumentDeletion { index_uid, .. }
| TaskContent::SettingsUpdate { index_uid, .. }
| TaskContent::IndexDeletion { index_uid }
| TaskContent::IndexCreation { index_uid, .. }
| TaskContent::IndexUpdate { index_uid, .. } => Some(index_uid.as_str()),
TaskContent::Dump { .. } => None,
}
}
}
impl IndexUid {
pub fn into_inner(self) -> String {
self.0
}
/// Return a reference over the inner str.
pub fn as_str(&self) -> &str {
&self.0
}
}
impl std::ops::Deref for IndexUid {
type Target = str;
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[derive(Debug)]
#[cfg_attr(test, derive(serde::Serialize))]
#[cfg_attr(test, serde(rename_all = "camelCase"))]
pub struct TaskView {
pub uid: TaskId,
pub index_uid: Option<String>,
pub status: TaskStatus,
#[cfg_attr(test, serde(rename = "type"))]
pub task_type: TaskType,
#[cfg_attr(test, serde(skip_serializing_if = "Option::is_none"))]
pub details: Option<TaskDetails>,
#[cfg_attr(test, serde(skip_serializing_if = "Option::is_none"))]
pub error: Option<ResponseError>,
#[cfg_attr(test, serde(serialize_with = "serialize_duration"))]
pub duration: Option<Duration>,
#[cfg_attr(test, serde(serialize_with = "time::serde::rfc3339::serialize"))]
pub enqueued_at: OffsetDateTime,
#[cfg_attr(test, serde(serialize_with = "time::serde::rfc3339::option::serialize"))]
pub started_at: Option<OffsetDateTime>,
#[cfg_attr(test, serde(serialize_with = "time::serde::rfc3339::option::serialize"))]
pub finished_at: Option<OffsetDateTime>,
}
impl From<Task> for TaskView {
fn from(task: Task) -> Self {
let index_uid = task.index_uid().map(String::from);
let Task { id, content, events } = task;
let (task_type, mut details) = match content {
TaskContent::DocumentAddition { documents_count, .. } => {
let details = TaskDetails::DocumentAddition {
received_documents: documents_count,
indexed_documents: None,
};
(TaskType::DocumentAdditionOrUpdate, Some(details))
}
TaskContent::DocumentDeletion { deletion: DocumentDeletion::Ids(ids), .. } => (
TaskType::DocumentDeletion,
Some(TaskDetails::DocumentDeletion {
received_document_ids: ids.len(),
deleted_documents: None,
}),
),
TaskContent::DocumentDeletion { deletion: DocumentDeletion::Clear, .. } => (
TaskType::DocumentDeletion,
Some(TaskDetails::ClearAll { deleted_documents: None }),
),
TaskContent::IndexDeletion { .. } => {
(TaskType::IndexDeletion, Some(TaskDetails::ClearAll { deleted_documents: None }))
}
TaskContent::SettingsUpdate { settings, .. } => {
(TaskType::SettingsUpdate, Some(TaskDetails::Settings { settings }))
}
TaskContent::IndexCreation { primary_key, .. } => {
(TaskType::IndexCreation, Some(TaskDetails::IndexInfo { primary_key }))
}
TaskContent::IndexUpdate { primary_key, .. } => {
(TaskType::IndexUpdate, Some(TaskDetails::IndexInfo { primary_key }))
}
TaskContent::Dump { uid } => {
(TaskType::DumpCreation, Some(TaskDetails::Dump { dump_uid: uid }))
}
};
// An event always has at least one event: "Created"
let (status, error, finished_at) = match events.last().unwrap() {
TaskEvent::Created(_) => (TaskStatus::Enqueued, None, None),
TaskEvent::Batched { .. } => (TaskStatus::Enqueued, None, None),
TaskEvent::Processing(_) => (TaskStatus::Processing, None, None),
TaskEvent::Succeeded { timestamp, result } => {
match (result, &mut details) {
(
TaskResult::DocumentAddition { indexed_documents: num, .. },
Some(TaskDetails::DocumentAddition { ref mut indexed_documents, .. }),
) => {
indexed_documents.replace(*num);
}
(
TaskResult::DocumentDeletion { deleted_documents: docs, .. },
Some(TaskDetails::DocumentDeletion { ref mut deleted_documents, .. }),
) => {
deleted_documents.replace(*docs);
}
(
TaskResult::ClearAll { deleted_documents: docs },
Some(TaskDetails::ClearAll { ref mut deleted_documents }),
) => {
deleted_documents.replace(*docs);
}
_ => (),
}
(TaskStatus::Succeeded, None, Some(*timestamp))
}
TaskEvent::Failed { timestamp, error } => {
match details {
Some(TaskDetails::DocumentDeletion { ref mut deleted_documents, .. }) => {
deleted_documents.replace(0);
}
Some(TaskDetails::ClearAll { ref mut deleted_documents, .. }) => {
deleted_documents.replace(0);
}
Some(TaskDetails::DocumentAddition { ref mut indexed_documents, .. }) => {
indexed_documents.replace(0);
}
_ => (),
}
(TaskStatus::Failed, Some(error.clone()), Some(*timestamp))
}
};
let enqueued_at = match events.first() {
Some(TaskEvent::Created(ts)) => *ts,
_ => unreachable!("A task must always have a creation event."),
};
let started_at = events.iter().find_map(|e| match e {
TaskEvent::Processing(ts) => Some(*ts),
_ => None,
});
let duration = finished_at.zip(started_at).map(|(tf, ts)| (tf - ts));
Self {
uid: id,
index_uid,
status,
task_type,
details,
error,
duration,
enqueued_at,
started_at,
finished_at,
}
}
}
#[derive(Debug, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(rename_all = "camelCase")]
pub enum TaskType {
IndexCreation,
IndexUpdate,
IndexDeletion,
DocumentAdditionOrUpdate,
DocumentDeletion,
SettingsUpdate,
DumpCreation,
}
impl From<TaskContent> for TaskType {
fn from(other: TaskContent) -> Self {
match other {
TaskContent::IndexCreation { .. } => TaskType::IndexCreation,
TaskContent::IndexUpdate { .. } => TaskType::IndexUpdate,
TaskContent::IndexDeletion { .. } => TaskType::IndexDeletion,
TaskContent::DocumentAddition { .. } => TaskType::DocumentAdditionOrUpdate,
TaskContent::DocumentDeletion { .. } => TaskType::DocumentDeletion,
TaskContent::SettingsUpdate { .. } => TaskType::SettingsUpdate,
TaskContent::Dump { .. } => TaskType::DumpCreation,
}
}
}
#[derive(Debug, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
#[serde(rename_all = "camelCase")]
pub enum TaskStatus {
Enqueued,
Processing,
Succeeded,
Failed,
}
#[derive(Debug)]
#[cfg_attr(test, derive(serde::Serialize))]
#[cfg_attr(test, serde(untagged))]
#[allow(clippy::large_enum_variant)]
pub enum TaskDetails {
#[cfg_attr(test, serde(rename_all = "camelCase"))]
DocumentAddition { received_documents: usize, indexed_documents: Option<u64> },
#[cfg_attr(test, serde(rename_all = "camelCase"))]
Settings {
#[cfg_attr(test, serde(flatten))]
settings: Settings<Unchecked>,
},
#[cfg_attr(test, serde(rename_all = "camelCase"))]
IndexInfo { primary_key: Option<String> },
#[cfg_attr(test, serde(rename_all = "camelCase"))]
DocumentDeletion { received_document_ids: usize, deleted_documents: Option<u64> },
#[cfg_attr(test, serde(rename_all = "camelCase"))]
ClearAll { deleted_documents: Option<u64> },
#[cfg_attr(test, serde(rename_all = "camelCase"))]
Dump { dump_uid: String },
}
/// Serialize a `time::Duration` as a best effort ISO 8601 while waiting for
/// https://github.com/time-rs/time/issues/378.
/// This code is a port of the old code of time that was removed in 0.2.
#[cfg(test)]
fn serialize_duration<S: serde::Serializer>(
duration: &Option<Duration>,
serializer: S,
) -> Result<S::Ok, S::Error> {
use std::fmt::Write;
match duration {
Some(duration) => {
// technically speaking, negative duration is not valid ISO 8601
if duration.is_negative() {
return serializer.serialize_none();
}
const SECS_PER_DAY: i64 = Duration::DAY.whole_seconds();
let secs = duration.whole_seconds();
let days = secs / SECS_PER_DAY;
let secs = secs - days * SECS_PER_DAY;
let hasdate = days != 0;
let nanos = duration.subsec_nanoseconds();
let hastime = (secs != 0 || nanos != 0) || !hasdate;
// all the following unwrap can't fail
let mut res = String::new();
write!(&mut res, "P").unwrap();
if hasdate {
write!(&mut res, "{}D", days).unwrap();
}
const NANOS_PER_MILLI: i32 = Duration::MILLISECOND.subsec_nanoseconds();
const NANOS_PER_MICRO: i32 = Duration::MICROSECOND.subsec_nanoseconds();
if hastime {
if nanos == 0 {
write!(&mut res, "T{}S", secs).unwrap();
} else if nanos % NANOS_PER_MILLI == 0 {
write!(&mut res, "T{}.{:03}S", secs, nanos / NANOS_PER_MILLI).unwrap();
} else if nanos % NANOS_PER_MICRO == 0 {
write!(&mut res, "T{}.{:06}S", secs, nanos / NANOS_PER_MICRO).unwrap();
} else {
write!(&mut res, "T{}.{:09}S", secs, nanos).unwrap();
}
}
serializer.serialize_str(&res)
}
None => serializer.serialize_none(),
}
}

189
dump/src/reader/v6/mod.rs Normal file
View File

@ -0,0 +1,189 @@
use std::fs::{self, File};
use std::io::{BufRead, BufReader};
use std::path::Path;
use std::str::FromStr;
pub use meilisearch_types::milli;
use tempfile::TempDir;
use time::OffsetDateTime;
use uuid::Uuid;
use super::Document;
use crate::{Error, IndexMetadata, Result, Version};
pub type Metadata = crate::Metadata;
pub type Settings<T> = meilisearch_types::settings::Settings<T>;
pub type Checked = meilisearch_types::settings::Checked;
pub type Unchecked = meilisearch_types::settings::Unchecked;
pub type Task = crate::TaskDump;
pub type Key = meilisearch_types::keys::Key;
// ===== Other types to clarify the code of the compat module
// everything related to the tasks
pub type Status = meilisearch_types::tasks::Status;
pub type Kind = crate::KindDump;
pub type Details = meilisearch_types::tasks::Details;
// everything related to the settings
pub type Setting<T> = meilisearch_types::milli::update::Setting<T>;
pub type TypoTolerance = meilisearch_types::settings::TypoSettings;
pub type MinWordSizeForTypos = meilisearch_types::settings::MinWordSizeTyposSetting;
pub type FacetingSettings = meilisearch_types::settings::FacetingSettings;
pub type PaginationSettings = meilisearch_types::settings::PaginationSettings;
// everything related to the api keys
pub type Action = meilisearch_types::keys::Action;
pub type StarOr<T> = meilisearch_types::star_or::StarOr<T>;
pub type IndexUid = meilisearch_types::index_uid::IndexUid;
// everything related to the errors
pub type ResponseError = meilisearch_types::error::ResponseError;
pub type Code = meilisearch_types::error::Code;
pub struct V6Reader {
dump: TempDir,
instance_uid: Uuid,
metadata: Metadata,
tasks: BufReader<File>,
keys: BufReader<File>,
}
impl V6Reader {
pub fn open(dump: TempDir) -> Result<Self> {
let meta_file = fs::read(dump.path().join("metadata.json"))?;
let instance_uid = fs::read_to_string(dump.path().join("instance_uid.uuid"))?;
let instance_uid = Uuid::from_str(&instance_uid)?;
Ok(V6Reader {
metadata: serde_json::from_reader(&*meta_file)?,
instance_uid,
tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?),
keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?),
dump,
})
}
pub fn version(&self) -> Version {
Version::V6
}
pub fn date(&self) -> Option<OffsetDateTime> {
Some(self.metadata.dump_date)
}
pub fn instance_uid(&self) -> Result<Option<Uuid>> {
Ok(Some(self.instance_uid))
}
pub fn indexes(&self) -> Result<Box<dyn Iterator<Item = Result<V6IndexReader>> + '_>> {
let entries = fs::read_dir(self.dump.path().join("indexes"))?;
Ok(Box::new(
entries
.map(|entry| -> Result<Option<_>> {
let entry = entry?;
if entry.file_type()?.is_dir() {
let index = V6IndexReader::new(
entry.file_name().to_str().ok_or(Error::BadIndexName)?.to_string(),
&entry.path(),
)?;
Ok(Some(index))
} else {
Ok(None)
}
})
.filter_map(|entry| entry.transpose()),
))
}
pub fn tasks(
&mut self,
) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> {
Box::new((&mut self.tasks).lines().map(|line| -> Result<_> {
let task: Task = serde_json::from_str(&line?).unwrap();
let update_file_path = self
.dump
.path()
.join("tasks")
.join("update_files")
.join(format!("{}.jsonl", task.uid.to_string()));
if update_file_path.exists() {
Ok((
task,
Some(Box::new(UpdateFile::new(&update_file_path).unwrap())
as Box<super::UpdateFile>),
))
} else {
Ok((task, None))
}
}))
}
pub fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Key>> + '_> {
Box::new(
(&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }),
)
}
}
pub struct UpdateFile {
reader: BufReader<File>,
}
impl UpdateFile {
fn new(path: &Path) -> Result<Self> {
Ok(UpdateFile { reader: BufReader::new(File::open(path)?) })
}
}
impl Iterator for UpdateFile {
type Item = Result<Document>;
fn next(&mut self) -> Option<Self::Item> {
(&mut self.reader)
.lines()
.map(|line| {
line.map_err(Error::from)
.and_then(|line| serde_json::from_str(&line).map_err(Error::from))
})
.next()
}
}
pub struct V6IndexReader {
metadata: IndexMetadata,
documents: BufReader<File>,
settings: BufReader<File>,
}
impl V6IndexReader {
pub fn new(_name: String, path: &Path) -> Result<Self> {
let metadata = File::open(path.join("metadata.json"))?;
let ret = V6IndexReader {
metadata: serde_json::from_reader(metadata)?,
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
settings: BufReader::new(File::open(path.join("settings.json"))?),
};
Ok(ret)
}
pub fn metadata(&self) -> &IndexMetadata {
&self.metadata
}
pub fn documents(&mut self) -> Result<impl Iterator<Item = Result<Document>> + '_> {
Ok((&mut self.documents)
.lines()
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn settings(&mut self) -> Result<Settings<Checked>> {
let settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
Ok(settings.check())
}
}

348
dump/src/writer.rs Normal file
View File

@ -0,0 +1,348 @@
use std::fs::{self, File};
use std::io::{BufWriter, Write};
use std::path::PathBuf;
use flate2::write::GzEncoder;
use flate2::Compression;
use meilisearch_types::keys::Key;
use meilisearch_types::settings::{Checked, Settings};
use serde_json::{Map, Value};
use tempfile::TempDir;
use time::OffsetDateTime;
use uuid::Uuid;
use crate::reader::Document;
use crate::{IndexMetadata, Metadata, Result, TaskDump, CURRENT_DUMP_VERSION};
pub struct DumpWriter {
dir: TempDir,
}
impl DumpWriter {
pub fn new(instance_uuid: Option<Uuid>) -> Result<DumpWriter> {
let dir = TempDir::new()?;
if let Some(instance_uuid) = instance_uuid {
fs::write(
dir.path().join("instance_uid.uuid"),
&instance_uuid.as_hyphenated().to_string(),
)?;
}
let metadata = Metadata {
dump_version: CURRENT_DUMP_VERSION,
db_version: env!("CARGO_PKG_VERSION").to_string(),
dump_date: OffsetDateTime::now_utc(),
};
fs::write(dir.path().join("metadata.json"), serde_json::to_string(&metadata)?)?;
std::fs::create_dir(&dir.path().join("indexes"))?;
Ok(DumpWriter { dir })
}
pub fn create_index(&self, index_name: &str, metadata: &IndexMetadata) -> Result<IndexWriter> {
IndexWriter::new(self.dir.path().join("indexes").join(index_name), metadata)
}
pub fn create_keys(&self) -> Result<KeyWriter> {
KeyWriter::new(self.dir.path().to_path_buf())
}
pub fn create_tasks_queue(&self) -> Result<TaskWriter> {
TaskWriter::new(self.dir.path().join("tasks"))
}
pub fn persist_to(self, mut writer: impl Write) -> Result<()> {
let gz_encoder = GzEncoder::new(&mut writer, Compression::default());
let mut tar_encoder = tar::Builder::new(gz_encoder);
tar_encoder.append_dir_all(".", self.dir.path())?;
let gz_encoder = tar_encoder.into_inner()?;
gz_encoder.finish()?;
writer.flush()?;
Ok(())
}
}
pub struct KeyWriter {
keys: BufWriter<File>,
}
impl KeyWriter {
pub(crate) fn new(path: PathBuf) -> Result<Self> {
let keys = File::create(path.join("keys.jsonl"))?;
Ok(KeyWriter { keys: BufWriter::new(keys) })
}
pub fn push_key(&mut self, key: &Key) -> Result<()> {
self.keys.write_all(&serde_json::to_vec(key)?)?;
self.keys.write_all(b"\n")?;
Ok(())
}
pub fn flush(mut self) -> Result<()> {
self.keys.flush()?;
Ok(())
}
}
pub struct TaskWriter {
queue: BufWriter<File>,
update_files: PathBuf,
}
impl TaskWriter {
pub(crate) fn new(path: PathBuf) -> Result<Self> {
std::fs::create_dir(&path)?;
let queue = File::create(path.join("queue.jsonl"))?;
let update_files = path.join("update_files");
std::fs::create_dir(&update_files)?;
Ok(TaskWriter { queue: BufWriter::new(queue), update_files })
}
/// Pushes tasks in the dump.
/// If the tasks has an associated `update_file` it'll use the `task_id` as its name.
pub fn push_task(&mut self, task: &TaskDump) -> Result<UpdateFile> {
self.queue.write_all(&serde_json::to_vec(task)?)?;
self.queue.write_all(b"\n")?;
Ok(UpdateFile::new(self.update_files.join(format!("{}.jsonl", task.uid))))
}
pub fn flush(mut self) -> Result<()> {
self.queue.flush()?;
Ok(())
}
}
pub struct UpdateFile {
path: PathBuf,
writer: Option<BufWriter<File>>,
}
impl UpdateFile {
pub(crate) fn new(path: PathBuf) -> UpdateFile {
UpdateFile { path, writer: None }
}
pub fn push_document(&mut self, document: &Document) -> Result<()> {
if let Some(writer) = self.writer.as_mut() {
writer.write_all(&serde_json::to_vec(document)?)?;
writer.write_all(b"\n")?;
} else {
let file = File::create(&self.path).unwrap();
self.writer = Some(BufWriter::new(file));
self.push_document(document)?;
}
Ok(())
}
pub fn flush(self) -> Result<()> {
if let Some(mut writer) = self.writer {
writer.flush()?;
}
Ok(())
}
}
pub struct IndexWriter {
documents: BufWriter<File>,
settings: File,
}
impl IndexWriter {
pub(self) fn new(path: PathBuf, metadata: &IndexMetadata) -> Result<Self> {
std::fs::create_dir(&path)?;
let metadata_file = File::create(path.join("metadata.json"))?;
serde_json::to_writer(metadata_file, metadata)?;
let documents = File::create(path.join("documents.jsonl"))?;
let settings = File::create(path.join("settings.json"))?;
Ok(IndexWriter { documents: BufWriter::new(documents), settings })
}
pub fn push_document(&mut self, document: &Map<String, Value>) -> Result<()> {
serde_json::to_writer(&mut self.documents, document)?;
self.documents.write_all(b"\n")?;
Ok(())
}
pub fn flush(&mut self) -> Result<()> {
self.documents.flush()?;
Ok(())
}
pub fn settings(mut self, settings: &Settings<Checked>) -> Result<()> {
self.settings.write_all(&serde_json::to_vec(&settings)?)?;
Ok(())
}
}
#[cfg(test)]
pub(crate) mod test {
use std::fmt::Write;
use std::io::BufReader;
use std::path::Path;
use std::str::FromStr;
use flate2::bufread::GzDecoder;
use meilisearch_types::settings::Unchecked;
use super::*;
use crate::reader::Document;
use crate::test::{
create_test_api_keys, create_test_documents, create_test_dump, create_test_instance_uid,
create_test_settings, create_test_tasks,
};
fn create_directory_hierarchy(dir: &Path) -> String {
let mut ret = String::new();
writeln!(ret, ".").unwrap();
ret.push_str(&_create_directory_hierarchy(dir, 0));
ret
}
fn _create_directory_hierarchy(dir: &Path, depth: usize) -> String {
let mut ret = String::new();
// the entries are not guarenteed to be returned in the same order thus we need to sort them.
let mut entries =
fs::read_dir(dir).unwrap().collect::<std::result::Result<Vec<_>, _>>().unwrap();
// I want the directories first and then sort by name.
entries.sort_by(|a, b| {
let (aft, bft) = (a.file_type().unwrap(), b.file_type().unwrap());
if aft.is_dir() && bft.is_dir() {
a.file_name().cmp(&b.file_name())
} else if aft.is_file() {
std::cmp::Ordering::Greater
} else if bft.is_file() {
std::cmp::Ordering::Less
} else {
a.file_name().cmp(&b.file_name())
}
});
for (idx, entry) in entries.iter().enumerate() {
let mut ident = String::new();
for _ in 0..depth {
ident.push_str(&"│");
ident.push_str(&" ".repeat(4));
}
if idx == entries.len() - 1 {
ident.push_str(&"â””");
} else {
ident.push_str(&"├");
}
ident.push_str(&"-".repeat(4));
let name = entry.file_name().into_string().unwrap();
let file_type = entry.file_type().unwrap();
let is_dir = file_type.is_dir().then_some("/").unwrap_or("");
assert!(!file_type.is_symlink());
writeln!(ret, "{ident} {name}{is_dir}").unwrap();
if file_type.is_dir() {
ret.push_str(&_create_directory_hierarchy(&entry.path(), depth + 1));
}
}
ret
}
#[test]
fn test_creating_dump() {
let file = create_test_dump();
let mut file = BufReader::new(file);
// ============ ensuring we wrote everything in the correct place.
let dump = tempfile::tempdir().unwrap();
let gz = GzDecoder::new(&mut file);
let mut tar = tar::Archive::new(gz);
tar.unpack(dump.path()).unwrap();
let dump_path = dump.path();
// ==== checking global file hierarchy (we want to be sure there isn't too many files or too few)
insta::assert_display_snapshot!(create_directory_hierarchy(dump_path), @r###"
.
├---- indexes/
│ └---- doggos/
│ │ ├---- settings.json
│ │ ├---- metadata.json
│ │ └---- documents.jsonl
├---- tasks/
│ ├---- update_files/
│ │ └---- 1.jsonl
│ └---- queue.jsonl
├---- keys.jsonl
├---- metadata.json
â””---- instance_uid.uuid
"###);
// ==== checking the top level infos
let metadata = fs::read_to_string(dump_path.join("metadata.json")).unwrap();
let metadata: Metadata = serde_json::from_str(&metadata).unwrap();
insta::assert_json_snapshot!(metadata, { ".dumpDate" => "[date]" }, @r###"
{
"dumpVersion": "V6",
"dbVersion": "0.29.0",
"dumpDate": "[date]"
}
"###);
let instance_uid = fs::read_to_string(dump_path.join("instance_uid.uuid")).unwrap();
assert_eq!(Uuid::from_str(&instance_uid).unwrap(), create_test_instance_uid());
// ==== checking the index
let docs = fs::read_to_string(dump_path.join("indexes/doggos/documents.jsonl")).unwrap();
for (document, expected) in docs.lines().zip(create_test_documents()) {
assert_eq!(serde_json::from_str::<Map<String, Value>>(document).unwrap(), expected);
}
let test_settings =
fs::read_to_string(dump_path.join("indexes/doggos/settings.json")).unwrap();
assert_eq!(
serde_json::from_str::<Settings<Unchecked>>(&test_settings).unwrap(),
create_test_settings().into_unchecked()
);
let metadata = fs::read_to_string(dump_path.join("indexes/doggos/metadata.json")).unwrap();
let metadata: IndexMetadata = serde_json::from_str(&metadata).unwrap();
insta::assert_json_snapshot!(metadata, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }, @r###"
{
"uid": "doggo",
"primaryKey": null,
"createdAt": "[date]",
"updatedAt": "[date]"
}
"###);
// ==== checking the task queue
let tasks_queue = fs::read_to_string(dump_path.join("tasks/queue.jsonl")).unwrap();
for (task, expected) in tasks_queue.lines().zip(create_test_tasks()) {
assert_eq!(serde_json::from_str::<TaskDump>(task).unwrap(), expected.0);
if let Some(expected_update) = expected.1 {
let path = dump_path.join(format!("tasks/update_files/{}.jsonl", expected.0.uid));
println!("trying to open {}", path.display());
let update = fs::read_to_string(path).unwrap();
let documents: Vec<Document> =
update.lines().map(|line| serde_json::from_str(line).unwrap()).collect();
assert_eq!(documents, expected_update);
}
}
// ==== checking the keys
let keys = fs::read_to_string(dump_path.join("keys.jsonl")).unwrap();
for (key, expected) in keys.lines().zip(create_test_api_keys()) {
assert_eq!(serde_json::from_str::<Key>(key).unwrap(), expected);
}
}
}

BIN
dump/tests/assets/v2.dump Normal file

Binary file not shown.

BIN
dump/tests/assets/v3.dump Normal file

Binary file not shown.

BIN
dump/tests/assets/v4.dump Normal file

Binary file not shown.

BIN
dump/tests/assets/v5.dump Normal file

Binary file not shown.

12
file-store/Cargo.toml Normal file
View File

@ -0,0 +1,12 @@
[package]
name = "file-store"
version = "0.1.0"
edition = "2021"
[dependencies]
tempfile = "3.3.0"
thiserror = "1.0.30"
uuid = { version = "1.1.2", features = ["serde", "v4"] }
[dev-dependencies]
faux = "0.1.8"

127
file-store/src/lib.rs Normal file
View File

@ -0,0 +1,127 @@
use std::collections::BTreeSet;
use std::fs::File as StdFile;
use std::ops::{Deref, DerefMut};
use std::path::{Path, PathBuf};
use std::str::FromStr;
use tempfile::NamedTempFile;
use uuid::Uuid;
const UPDATE_FILES_PATH: &str = "updates/updates_files";
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error(transparent)]
IoError(#[from] std::io::Error),
#[error(transparent)]
PersistError(#[from] tempfile::PersistError),
}
pub type Result<T> = std::result::Result<T, Error>;
impl Deref for File {
type Target = NamedTempFile;
fn deref(&self) -> &Self::Target {
&self.file
}
}
impl DerefMut for File {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.file
}
}
#[cfg_attr(test, faux::create)]
#[derive(Clone, Debug)]
pub struct FileStore {
path: PathBuf,
}
#[cfg(not(test))]
impl FileStore {
pub fn new(path: impl AsRef<Path>) -> Result<FileStore> {
let path = path.as_ref().to_path_buf();
std::fs::create_dir_all(&path)?;
Ok(FileStore { path })
}
}
#[cfg_attr(test, faux::methods)]
impl FileStore {
/// Creates a new temporary update file.
/// A call to `persist` is needed to persist the file in the database.
pub fn new_update(&self) -> Result<(Uuid, File)> {
let file = NamedTempFile::new_in(&self.path)?;
let uuid = Uuid::new_v4();
let path = self.path.join(uuid.to_string());
let update_file = File { file, path };
Ok((uuid, update_file))
}
/// Creates a new temporary update file with the given Uuid.
/// A call to `persist` is needed to persist the file in the database.
pub fn new_update_with_uuid(&self, uuid: u128) -> Result<(Uuid, File)> {
let file = NamedTempFile::new_in(&self.path)?;
let uuid = Uuid::from_u128(uuid);
let path = self.path.join(uuid.to_string());
let update_file = File { file, path };
Ok((uuid, update_file))
}
/// Returns the file corresponding to the requested uuid.
pub fn get_update(&self, uuid: Uuid) -> Result<StdFile> {
let path = self.path.join(uuid.to_string());
let file = StdFile::open(path)?;
Ok(file)
}
/// Copies the content of the update file pointed to by `uuid` to the `dst` directory.
pub fn snapshot(&self, uuid: Uuid, dst: impl AsRef<Path>) -> Result<()> {
let src = self.path.join(uuid.to_string());
let mut dst = dst.as_ref().join(UPDATE_FILES_PATH);
std::fs::create_dir_all(&dst)?;
dst.push(uuid.to_string());
std::fs::copy(src, dst)?;
Ok(())
}
pub fn get_size(&self, uuid: Uuid) -> Result<u64> {
Ok(self.get_update(uuid)?.metadata()?.len())
}
pub fn delete(&self, uuid: Uuid) -> Result<()> {
let path = self.path.join(uuid.to_string());
std::fs::remove_file(path)?;
Ok(())
}
/// List the Uuids of the files in the FileStore
///
/// This function is meant to be used by tests only.
#[doc(hidden)]
pub fn __all_uuids(&self) -> BTreeSet<Uuid> {
let mut uuids = BTreeSet::new();
for entry in self.path.read_dir().unwrap() {
let entry = entry.unwrap();
let uuid = Uuid::from_str(entry.file_name().to_str().unwrap()).unwrap();
uuids.insert(uuid);
}
uuids
}
}
pub struct File {
path: PathBuf,
file: NamedTempFile,
}
impl File {
pub fn persist(self) -> Result<()> {
self.file.persist(&self.path)?;
Ok(())
}
}

View File

@ -0,0 +1,30 @@
[package]
name = "index-scheduler"
version = "0.1.0"
edition = "2021"
[dependencies]
anyhow = "1.0.64"
bincode = "1.3.3"
csv = "1.1.6"
derive_builder = "0.11.2"
dump = { path = "../dump" }
enum-iterator = "1.1.3"
file-store = { path = "../file-store" }
log = "0.4.14"
meilisearch-types = { path = "../meilisearch-types" }
roaring = { version = "0.10.0", features = ["serde"] }
serde = { version = "1.0.136", features = ["derive"] }
serde_json = { version = "1.0.85", features = ["preserve_order"] }
synchronoise = "1.0.1"
tempfile = "3.3.0"
thiserror = "1.0.30"
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.1.2", features = ["serde", "v4"] }
[dev-dependencies]
big_s = "1.0.2"
crossbeam = "0.8.2"
insta = { version = "1.19.1", features = ["json", "redactions"] }
meili-snap = { path = "../meili-snap" }
nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}

View File

@ -0,0 +1,724 @@
/*!
The autobatcher is responsible for combining the next enqueued
tasks affecting a single index into a [batch](crate::batch::Batch).
The main function of the autobatcher is [`next_autobatch`].
*/
use std::ops::ControlFlow::{self, Break, Continue};
use meilisearch_types::milli::update::IndexDocumentsMethod::{
self, ReplaceDocuments, UpdateDocuments,
};
use meilisearch_types::tasks::TaskId;
use crate::KindWithContent;
/// Succinctly describes a task's [`Kind`](meilisearch_types::tasks::Kind)
/// for the purpose of simplifying the implementation of the autobatcher.
///
/// Only the non-prioritised tasks that can be grouped in a batch have a corresponding [`AutobatchKind`]
enum AutobatchKind {
DocumentImport { method: IndexDocumentsMethod, allow_index_creation: bool },
DocumentDeletion,
DocumentClear,
Settings { allow_index_creation: bool },
IndexCreation,
IndexDeletion,
IndexUpdate,
IndexSwap,
}
impl AutobatchKind {
#[rustfmt::skip]
fn allow_index_creation(&self) -> Option<bool> {
match self {
AutobatchKind::DocumentImport { allow_index_creation, .. }
| AutobatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
_ => None,
}
}
}
impl From<KindWithContent> for AutobatchKind {
fn from(kind: KindWithContent) -> Self {
match kind {
KindWithContent::DocumentImport { method, allow_index_creation, .. } => {
AutobatchKind::DocumentImport { method, allow_index_creation }
}
KindWithContent::DocumentDeletion { .. } => AutobatchKind::DocumentDeletion,
KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear,
KindWithContent::Settings { allow_index_creation, is_deletion, .. } => {
AutobatchKind::Settings {
allow_index_creation: allow_index_creation && !is_deletion,
}
}
KindWithContent::IndexDeletion { .. } => AutobatchKind::IndexDeletion,
KindWithContent::IndexCreation { .. } => AutobatchKind::IndexCreation,
KindWithContent::IndexUpdate { .. } => AutobatchKind::IndexUpdate,
KindWithContent::IndexSwap { .. } => AutobatchKind::IndexSwap,
KindWithContent::TaskCancelation { .. }
| KindWithContent::TaskDeletion { .. }
| KindWithContent::DumpExport { .. }
| KindWithContent::Snapshot => {
panic!("The autobatcher should never be called with tasks that don't apply to an index.")
}
}
}
}
#[derive(Debug)]
pub enum BatchKind {
DocumentClear {
ids: Vec<TaskId>,
},
DocumentImport {
method: IndexDocumentsMethod,
allow_index_creation: bool,
import_ids: Vec<TaskId>,
},
DocumentDeletion {
deletion_ids: Vec<TaskId>,
},
ClearAndSettings {
other: Vec<TaskId>,
allow_index_creation: bool,
settings_ids: Vec<TaskId>,
},
SettingsAndDocumentImport {
settings_ids: Vec<TaskId>,
method: IndexDocumentsMethod,
allow_index_creation: bool,
import_ids: Vec<TaskId>,
},
Settings {
allow_index_creation: bool,
settings_ids: Vec<TaskId>,
},
IndexDeletion {
ids: Vec<TaskId>,
},
IndexCreation {
id: TaskId,
},
IndexUpdate {
id: TaskId,
},
IndexSwap {
id: TaskId,
},
}
impl BatchKind {
#[rustfmt::skip]
fn allow_index_creation(&self) -> Option<bool> {
match self {
BatchKind::DocumentImport { allow_index_creation, .. }
| BatchKind::ClearAndSettings { allow_index_creation, .. }
| BatchKind::SettingsAndDocumentImport { allow_index_creation, .. }
| BatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
_ => None,
}
}
}
impl BatchKind {
/// Returns a `ControlFlow::Break` if you must stop right now.
/// The boolean tell you if an index has been created by the batched task.
/// To ease the writting of the code. `true` can be returned when you don't need to create an index
/// but false can't be returned if you needs to create an index.
// TODO use an AutoBatchKind as input
pub fn new(
task_id: TaskId,
kind: KindWithContent,
) -> (ControlFlow<BatchKind, BatchKind>, bool) {
use AutobatchKind as K;
match AutobatchKind::from(kind) {
K::IndexCreation => (Break(BatchKind::IndexCreation { id: task_id }), true),
K::IndexDeletion => (Break(BatchKind::IndexDeletion { ids: vec![task_id] }), false),
K::IndexUpdate => (Break(BatchKind::IndexUpdate { id: task_id }), false),
K::IndexSwap => (Break(BatchKind::IndexSwap { id: task_id }), false),
K::DocumentClear => (Continue(BatchKind::DocumentClear { ids: vec![task_id] }), false),
K::DocumentImport { method, allow_index_creation } => (
Continue(BatchKind::DocumentImport {
method,
allow_index_creation,
import_ids: vec![task_id],
}),
allow_index_creation,
),
K::DocumentDeletion => {
(Continue(BatchKind::DocumentDeletion { deletion_ids: vec![task_id] }), false)
}
K::Settings { allow_index_creation } => (
Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }),
allow_index_creation,
),
}
}
/// Returns a `ControlFlow::Break` if you must stop right now.
/// The boolean tell you if an index has been created by the batched task.
/// To ease the writting of the code. `true` can be returned when you don't need to create an index
/// but false can't be returned if you needs to create an index.
#[rustfmt::skip]
fn accumulate(self, id: TaskId, kind: AutobatchKind, index_already_exists: bool) -> ControlFlow<BatchKind, BatchKind> {
use AutobatchKind as K;
match (self, kind) {
// We don't batch any of these operations
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap) => Break(this),
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
(this, kind) if index_already_exists == false && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
Break(this)
},
// The index deletion can batch with everything but must stop after
(
BatchKind::DocumentClear { mut ids }
| BatchKind::DocumentDeletion { deletion_ids: mut ids }
| BatchKind::DocumentImport { method: _, allow_index_creation: _, import_ids: mut ids }
| BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids },
K::IndexDeletion,
) => {
ids.push(id);
Break(BatchKind::IndexDeletion { ids })
}
(
BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other }
| BatchKind::SettingsAndDocumentImport { import_ids: mut ids, method: _, allow_index_creation: _, settings_ids: mut other },
K::IndexDeletion,
) => {
ids.push(id);
ids.append(&mut other);
Break(BatchKind::IndexDeletion { ids })
}
(
BatchKind::DocumentClear { mut ids },
K::DocumentClear | K::DocumentDeletion,
) => {
ids.push(id);
Continue(BatchKind::DocumentClear { ids })
}
(
this @ BatchKind::DocumentClear { .. },
K::DocumentImport { .. } | K::Settings { .. },
) => Break(this),
(
BatchKind::DocumentImport { method: _, allow_index_creation: _, import_ids: mut ids },
K::DocumentClear,
) => {
ids.push(id);
Continue(BatchKind::DocumentClear { ids })
}
// we can autobatch the same kind of document additions / updates
(
BatchKind::DocumentImport { method: ReplaceDocuments, allow_index_creation, mut import_ids },
K::DocumentImport { method: ReplaceDocuments, .. },
) => {
import_ids.push(id);
Continue(BatchKind::DocumentImport {
method: ReplaceDocuments,
allow_index_creation,
import_ids,
})
}
(
BatchKind::DocumentImport { method: UpdateDocuments, allow_index_creation, mut import_ids },
K::DocumentImport { method: UpdateDocuments, .. },
) => {
import_ids.push(id);
Continue(BatchKind::DocumentImport {
method: UpdateDocuments,
allow_index_creation,
import_ids,
})
}
// but we can't autobatch documents if it's not the same kind
// this match branch MUST be AFTER the previous one
(
this @ BatchKind::DocumentImport { .. },
K::DocumentDeletion | K::DocumentImport { .. },
) => Break(this),
(
BatchKind::DocumentImport { method, allow_index_creation, import_ids },
K::Settings { .. },
) => Continue(BatchKind::SettingsAndDocumentImport {
settings_ids: vec![id],
method,
allow_index_creation,
import_ids,
}),
(BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentClear) => {
deletion_ids.push(id);
Continue(BatchKind::DocumentClear { ids: deletion_ids })
}
(this @ BatchKind::DocumentDeletion { .. }, K::DocumentImport { .. }) => Break(this),
(BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentDeletion) => {
deletion_ids.push(id);
Continue(BatchKind::DocumentDeletion { deletion_ids })
}
(this @ BatchKind::DocumentDeletion { .. }, K::Settings { .. }) => Break(this),
(
BatchKind::Settings { settings_ids, allow_index_creation },
K::DocumentClear,
) => Continue(BatchKind::ClearAndSettings {
settings_ids,
allow_index_creation,
other: vec![id],
}),
(
this @ BatchKind::Settings { .. },
K::DocumentImport { .. } | K::DocumentDeletion,
) => Break(this),
(
BatchKind::Settings { mut settings_ids, allow_index_creation },
K::Settings { .. },
) => {
settings_ids.push(id);
Continue(BatchKind::Settings {
allow_index_creation,
settings_ids,
})
}
(
BatchKind::ClearAndSettings { mut other, settings_ids, allow_index_creation },
K::DocumentClear,
) => {
other.push(id);
Continue(BatchKind::ClearAndSettings {
other,
settings_ids,
allow_index_creation,
})
}
(this @ BatchKind::ClearAndSettings { .. }, K::DocumentImport { .. }) => Break(this),
(
BatchKind::ClearAndSettings {
mut other,
settings_ids,
allow_index_creation,
},
K::DocumentDeletion,
) => {
other.push(id);
Continue(BatchKind::ClearAndSettings {
other,
settings_ids,
allow_index_creation,
})
}
(
BatchKind::ClearAndSettings { mut settings_ids, other, allow_index_creation },
K::Settings { .. },
) => {
settings_ids.push(id);
Continue(BatchKind::ClearAndSettings {
other,
settings_ids,
allow_index_creation,
})
}
(
BatchKind::SettingsAndDocumentImport { settings_ids, method: _, import_ids: mut other, allow_index_creation },
K::DocumentClear,
) => {
other.push(id);
Continue(BatchKind::ClearAndSettings {
settings_ids,
other,
allow_index_creation,
})
}
(
BatchKind::SettingsAndDocumentImport { settings_ids, method: ReplaceDocuments, mut import_ids, allow_index_creation },
K::DocumentImport { method: ReplaceDocuments, .. },
) => {
import_ids.push(id);
Continue(BatchKind::SettingsAndDocumentImport {
settings_ids,
method: ReplaceDocuments,
allow_index_creation,
import_ids,
})
}
(
BatchKind::SettingsAndDocumentImport { settings_ids, method: UpdateDocuments, allow_index_creation, mut import_ids },
K::DocumentImport { method: UpdateDocuments, .. },
) => {
import_ids.push(id);
Continue(BatchKind::SettingsAndDocumentImport {
settings_ids,
method: UpdateDocuments,
allow_index_creation,
import_ids,
})
}
// But we can't batch a settings and a doc op with another doc op
// this MUST be AFTER the two previous branch
(
this @ BatchKind::SettingsAndDocumentImport { .. },
K::DocumentDeletion | K::DocumentImport { .. },
) => Break(this),
(
BatchKind::SettingsAndDocumentImport { mut settings_ids, method, allow_index_creation, import_ids },
K::Settings { .. },
) => {
settings_ids.push(id);
Continue(BatchKind::SettingsAndDocumentImport {
settings_ids,
method,
allow_index_creation,
import_ids,
})
}
(
BatchKind::IndexCreation { .. }
| BatchKind::IndexDeletion { .. }
| BatchKind::IndexUpdate { .. }
| BatchKind::IndexSwap { .. },
_,
) => {
unreachable!()
}
}
}
}
/// Create a batch from an ordered list of tasks.
///
/// ## Preconditions
/// 1. The tasks must be enqueued and given in the order in which they were enqueued
/// 2. The tasks must not be prioritised tasks (e.g. task cancellation, dump, snapshot, task deletion)
/// 3. The tasks must all be related to the same index
///
/// ## Return
/// `None` if the list of tasks is empty. Otherwise, an [`AutoBatch`] that represents
/// a subset of the given tasks.
pub fn autobatch(
enqueued: Vec<(TaskId, KindWithContent)>,
index_already_exists: bool,
) -> Option<(BatchKind, bool)> {
let mut enqueued = enqueued.into_iter();
let (id, kind) = enqueued.next()?;
// index_exist will keep track of if the index should exist at this point after the tasks we batched.
let mut index_exist = index_already_exists;
let (mut acc, must_create_index) = match BatchKind::new(id, kind) {
(Continue(acc), create) => (acc, create),
(Break(acc), create) => return Some((acc, create)),
};
// if an index has been created in the previous step we can consider it as existing.
index_exist |= must_create_index;
for (id, kind) in enqueued {
acc = match acc.accumulate(id, kind.into(), index_exist) {
Continue(acc) => acc,
Break(acc) => return Some((acc, must_create_index)),
};
}
Some((acc, must_create_index))
}
#[cfg(test)]
mod tests {
use uuid::Uuid;
use super::*;
use crate::debug_snapshot;
fn autobatch_from(
index_already_exists: bool,
input: impl IntoIterator<Item = KindWithContent>,
) -> Option<(BatchKind, bool)> {
autobatch(
input.into_iter().enumerate().map(|(id, kind)| (id as TaskId, kind.into())).collect(),
index_already_exists,
)
}
fn doc_imp(method: IndexDocumentsMethod, allow_index_creation: bool) -> KindWithContent {
KindWithContent::DocumentImport {
index_uid: String::from("doggo"),
primary_key: None,
method,
content_file: Uuid::new_v4(),
documents_count: 0,
allow_index_creation,
}
}
fn doc_del() -> KindWithContent {
KindWithContent::DocumentDeletion {
index_uid: String::from("doggo"),
documents_ids: Vec::new(),
}
}
fn doc_clr() -> KindWithContent {
KindWithContent::DocumentClear { index_uid: String::from("doggo") }
}
fn settings(allow_index_creation: bool) -> KindWithContent {
KindWithContent::Settings {
index_uid: String::from("doggo"),
new_settings: Default::default(),
is_deletion: false,
allow_index_creation,
}
}
fn idx_create() -> KindWithContent {
KindWithContent::IndexCreation { index_uid: String::from("doggo"), primary_key: None }
}
fn idx_update() -> KindWithContent {
KindWithContent::IndexUpdate { index_uid: String::from("doggo"), primary_key: None }
}
fn idx_del() -> KindWithContent {
KindWithContent::IndexDeletion { index_uid: String::from("doggo") }
}
fn idx_swap() -> KindWithContent {
KindWithContent::IndexSwap { swaps: vec![(String::from("doggo"), String::from("catto"))] }
}
#[test]
fn autobatch_simple_operation_together() {
// we can autobatch one or multiple `ReplaceDocuments` together.
// if the index exists.
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, false)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp( ReplaceDocuments, true ), doc_imp(ReplaceDocuments, true )]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, false), doc_imp( ReplaceDocuments, false ), doc_imp(ReplaceDocuments, false )]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0, 1, 2] }, false))");
// if it doesn't exists.
debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, false)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, true), doc_imp( ReplaceDocuments, true ), doc_imp(ReplaceDocuments, true )]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, false), doc_imp( ReplaceDocuments, true ), doc_imp(ReplaceDocuments, true )]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))");
// we can autobatch one or multiple `UpdateDocuments` together.
// if the index exists.
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, false)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, false), doc_imp(UpdateDocuments, false), doc_imp(UpdateDocuments, false)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, import_ids: [0, 1, 2] }, false))");
// if it doesn't exists.
debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, true)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, false)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, false), doc_imp(UpdateDocuments, false), doc_imp(UpdateDocuments, false)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, import_ids: [0, 1, 2] }, false))");
// we can autobatch one or multiple DocumentDeletion together
debug_snapshot!(autobatch_from(true, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2] }, false))");
debug_snapshot!(autobatch_from(false, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2] }, false))");
// we can autobatch one or multiple Settings together
debug_snapshot!(autobatch_from(true, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");
debug_snapshot!(autobatch_from(false, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");
}
#[test]
fn simple_document_operation_dont_autobatch_with_other() {
// addition, updates and deletion can't batch together
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp(UpdateDocuments, true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), doc_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_del(), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, [doc_del(), doc_imp(UpdateDocuments, true)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), idx_create()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), idx_create()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), idx_update()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), idx_update()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), idx_swap()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), idx_swap()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
}
#[test]
fn document_addition_batch_with_settings() {
// simple case
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))");
// multiple settings and doc addition
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), settings(true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), settings(true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1] }, true))");
// addition and setting unordered
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), doc_imp(ReplaceDocuments, true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1, 3], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 2] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), doc_imp(UpdateDocuments, true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1, 3], method: UpdateDocuments, allow_index_creation: true, import_ids: [0, 2] }, true))");
// We ensure this kind of batch doesn't batch with forbidden operations
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), doc_imp(UpdateDocuments, true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), doc_imp(ReplaceDocuments, true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), doc_del()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), doc_del()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), idx_create()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), idx_create()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), idx_update()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), idx_update()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), idx_swap()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), idx_swap()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))");
}
#[test]
fn clear_and_additions() {
// these two doesn't need to batch
debug_snapshot!(autobatch_from(true, [doc_clr(), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentClear { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, [doc_clr(), doc_imp(UpdateDocuments, true)]), @"Some((DocumentClear { ids: [0] }, false))");
// Basic use case
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
// This batch kind doesn't mix with other document addition
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), doc_clr(), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_clr(), doc_imp(UpdateDocuments, true)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
// But you can batch multiple clear together
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))");
}
#[test]
fn clear_and_additions_and_settings() {
// A clear don't need to autobatch the settings that happens AFTER there is no documents
debug_snapshot!(autobatch_from(true, [doc_clr(), settings(true)]), @"Some((DocumentClear { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, [settings(true), doc_clr(), settings(true)]), @"Some((ClearAndSettings { other: [1], allow_index_creation: true, settings_ids: [0, 2] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), doc_clr()]), @"Some((ClearAndSettings { other: [0, 2], allow_index_creation: true, settings_ids: [1] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), doc_clr()]), @"Some((ClearAndSettings { other: [0, 2], allow_index_creation: true, settings_ids: [1] }, true))");
}
#[test]
fn anything_and_index_deletion() {
// The `IndexDeletion` doesn't batch with anything that happens AFTER.
debug_snapshot!(autobatch_from(true, [idx_del(), doc_imp(ReplaceDocuments, true)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, [idx_del(), doc_imp(UpdateDocuments, true)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, [idx_del(), doc_imp(ReplaceDocuments, false)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, [idx_del(), doc_imp(UpdateDocuments, false)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, [idx_del(), doc_imp(ReplaceDocuments, true)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, [idx_del(), doc_imp(UpdateDocuments, true)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, [idx_del(), doc_imp(ReplaceDocuments, false)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, [idx_del(), doc_imp(UpdateDocuments, false)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))");
// The index deletion can accept almost any type of `BatchKind` and transform it to an `IndexDeletion`.
// First, the basic cases
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
// Then the mixed cases.
// The index already exists, whatever is the right of the tasks it shouldn't change the result.
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments,false), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, false), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments,false), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, false), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments,false), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, false), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments,false), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, false), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments,true), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments,true), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
// When the index doesn't exists yet it's more complicated.
// Either the first task we encounter create it, in which case we can create a big batch with everything.
debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, true), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, true), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, true), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, true), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
// The right of the tasks following isn't really important.
debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments,true), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, true), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments,true), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, true), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
// Or, the second case; the first task doesn't create the index and thus we wants to batch it with only tasks that can't create an index.
// that can be a second task that don't have the right to create an index. Or anything that can't create an index like an index deletion, document deletion, document clear, etc.
// All theses tasks are going to throw an error `Index doesn't exist` once the batch is processed.
debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments,false), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, false), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments,false), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, false), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
// The third and final case is when the first task doesn't create an index but is directly followed by a task creating an index. In this case we can't batch whith what
// follows because we first need to process the erronous batch.
debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments,false), settings(true), idx_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, false), settings(true), idx_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments,false), settings(true), doc_clr(), idx_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, false), settings(true), doc_clr(), idx_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, import_ids: [0] }, false))");
}
#[test]
fn allowed_and_disallowed_index_creation() {
// `DocumentImport` can't be mixed with those disallowed to do so except if the index already exists.
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, false), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, false), doc_imp(ReplaceDocuments, false)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, false), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, false), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, false), doc_imp(ReplaceDocuments, false)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, false), settings(true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))");
}
}

1087
index-scheduler/src/batch.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,66 @@
use meilisearch_types::error::{Code, ErrorCode};
use meilisearch_types::{heed, milli};
use thiserror::Error;
use crate::TaskId;
#[derive(Error, Debug)]
pub enum Error {
#[error("Index `{0}` not found")]
IndexNotFound(String),
#[error("Index `{0}` already exists")]
IndexAlreadyExists(String),
#[error("Corrupted task queue.")]
CorruptedTaskQueue,
#[error("Corrupted dump.")]
CorruptedDump,
#[error("Task `{0}` not found")]
TaskNotFound(TaskId),
#[error("Query parameters to filter the tasks to delete are missing. Available query parameters are: `uid`, `indexUid`, `status`, `type`")]
TaskDeletionWithEmptyQuery,
#[error("Query parameters to filter the tasks to cancel are missing. Available query parameters are: `uid`, `indexUid`, `status`, `type`")]
TaskCancelationWithEmptyQuery,
// maybe the two next errors are going to move to the frontend
#[error("`{0}` is not a status. Available status are")]
InvalidStatus(String),
#[error("`{0}` is not a type. Available types are")]
InvalidKind(String),
#[error(transparent)]
Dump(#[from] dump::Error),
#[error(transparent)]
Heed(#[from] heed::Error),
#[error(transparent)]
Milli(#[from] milli::Error),
#[error(transparent)]
FileStore(#[from] file_store::Error),
#[error(transparent)]
IoError(#[from] std::io::Error),
#[error(transparent)]
Anyhow(#[from] anyhow::Error),
}
impl ErrorCode for Error {
fn error_code(&self) -> Code {
match self {
Error::IndexNotFound(_) => Code::IndexNotFound,
Error::IndexAlreadyExists(_) => Code::IndexAlreadyExists,
Error::TaskNotFound(_) => Code::TaskNotFound,
Error::TaskDeletionWithEmptyQuery => Code::TaskDeletionWithEmptyQuery,
Error::TaskCancelationWithEmptyQuery => Code::TaskCancelationWithEmptyQuery,
Error::InvalidStatus(_) => Code::BadRequest,
Error::InvalidKind(_) => Code::BadRequest,
Error::Dump(e) => e.error_code(),
Error::Milli(e) => e.error_code(),
// TODO: TAMO: are all these errors really internal?
Error::Heed(_) => Code::Internal,
Error::FileStore(_) => Code::Internal,
Error::IoError(_) => Code::Internal,
Error::Anyhow(_) => Code::Internal,
Error::CorruptedTaskQueue => Code::Internal,
Error::CorruptedDump => Code::Internal,
}
}
}

View File

@ -0,0 +1,213 @@
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::{Arc, RwLock};
use std::{fs, thread};
use log::error;
use meilisearch_types::heed::types::{SerdeBincode, Str};
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn};
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::Index;
use uuid::Uuid;
use self::IndexStatus::{Available, BeingDeleted};
use crate::{Error, Result};
const INDEX_MAPPING: &str = "index-mapping";
/// Structure managing meilisearch's indexes.
///
/// It is responsible for:
/// 1. Creating new indexes
/// 2. Opening indexes and storing references to these opened indexes
/// 3. Accessing indexes through their uuid
/// 4. Mapping a user-defined name to each index uuid.
#[derive(Clone)]
pub struct IndexMapper {
/// Keep track of the opened indexes. Used mainly by the index resolver.
index_map: Arc<RwLock<HashMap<Uuid, IndexStatus>>>,
// TODO create a UUID Codec that uses the 16 bytes representation
/// Map an index name with an index uuid currently available on disk.
index_mapping: Database<Str, SerdeBincode<Uuid>>,
/// Path to the folder where the LMDB environments of each index are.
base_path: PathBuf,
index_size: usize,
pub indexer_config: Arc<IndexerConfig>,
}
/// Whether the index is available for use or is forbidden to be inserted back in the index map
#[derive(Clone)]
pub enum IndexStatus {
/// Do not insert it back in the index map as it is currently being deleted.
BeingDeleted,
/// You can use the index without worrying about anything.
Available(Index),
}
impl IndexMapper {
pub fn new(
env: &Env,
base_path: PathBuf,
index_size: usize,
indexer_config: IndexerConfig,
) -> Result<Self> {
Ok(Self {
index_map: Arc::default(),
index_mapping: env.create_database(Some(INDEX_MAPPING))?,
base_path,
index_size,
indexer_config: Arc::new(indexer_config),
})
}
/// Get or create the index.
pub fn create_index(&self, wtxn: &mut RwTxn, name: &str) -> Result<Index> {
match self.index(wtxn, name) {
Ok(index) => Ok(index),
Err(Error::IndexNotFound(_)) => {
let uuid = Uuid::new_v4();
self.index_mapping.put(wtxn, name, &uuid)?;
let index_path = self.base_path.join(uuid.to_string());
fs::create_dir_all(&index_path)?;
let mut options = EnvOpenOptions::new();
options.map_size(self.index_size);
options.max_readers(1024);
Ok(Index::new(options, &index_path)?)
}
error => error,
}
}
/// Removes the index from the mapping table and the in-memory index map
/// but keeps the associated tasks.
pub fn delete_index(&self, mut wtxn: RwTxn, name: &str) -> Result<()> {
let uuid = self
.index_mapping
.get(&wtxn, name)?
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
// Once we retrieved the UUID of the index we remove it from the mapping table.
assert!(self.index_mapping.delete(&mut wtxn, name)?);
wtxn.commit()?;
// We remove the index from the in-memory index map.
let mut lock = self.index_map.write().unwrap();
let closing_event = match lock.insert(uuid, BeingDeleted) {
Some(Available(index)) => Some(index.prepare_for_closing()),
_ => None,
};
drop(lock);
let index_map = self.index_map.clone();
let index_path = self.base_path.join(uuid.to_string());
let index_name = name.to_string();
thread::spawn(move || {
// We first wait to be sure that the previously opened index is effectively closed.
// This can take a lot of time, this is why we do that in a seperate thread.
if let Some(closing_event) = closing_event {
closing_event.wait();
}
// Then we remove the content from disk.
if let Err(e) = fs::remove_dir_all(&index_path) {
error!(
"An error happened when deleting the index {} ({}): {}",
index_name, uuid, e
);
}
// Finally we remove the entry from the index map.
assert!(matches!(index_map.write().unwrap().remove(&uuid), Some(BeingDeleted)));
});
Ok(())
}
pub fn exists(&self, rtxn: &RoTxn, name: &str) -> Result<bool> {
Ok(self.index_mapping.get(rtxn, name)?.is_some())
}
/// Return an index, may open it if it wasn't already opened.
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
let uuid = self
.index_mapping
.get(rtxn, name)?
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
// we clone here to drop the lock before entering the match
let index = self.index_map.read().unwrap().get(&uuid).cloned();
let index = match index {
Some(Available(index)) => index,
Some(BeingDeleted) => return Err(Error::IndexNotFound(name.to_string())),
// since we're lazy, it's possible that the index has not been opened yet.
None => {
let mut index_map = self.index_map.write().unwrap();
// between the read lock and the write lock it's not impossible
// that someone already opened the index (eg if two search happens
// at the same time), thus before opening it we check a second time
// if it's not already there.
// Since there is a good chance it's not already there we can use
// the entry method.
match index_map.entry(uuid) {
Entry::Vacant(entry) => {
let index_path = self.base_path.join(uuid.to_string());
fs::create_dir_all(&index_path)?;
let mut options = EnvOpenOptions::new();
options.map_size(self.index_size);
let index = Index::new(options, &index_path)?;
entry.insert(Available(index.clone()));
index
}
Entry::Occupied(entry) => match entry.get() {
Available(index) => index.clone(),
BeingDeleted => return Err(Error::IndexNotFound(name.to_string())),
},
}
}
};
Ok(index)
}
/// Return all indexes, may open them if they weren't already opened.
pub fn indexes(&self, rtxn: &RoTxn) -> Result<Vec<(String, Index)>> {
self.index_mapping
.iter(rtxn)?
.map(|ret| {
ret.map_err(Error::from).and_then(|(name, _)| {
self.index(rtxn, name).map(|index| (name.to_string(), index))
})
})
.collect()
}
/// Swap two index names.
pub fn swap(&self, wtxn: &mut RwTxn, lhs: &str, rhs: &str) -> Result<()> {
let lhs_uuid = self
.index_mapping
.get(wtxn, lhs)?
.ok_or_else(|| Error::IndexNotFound(lhs.to_string()))?;
let rhs_uuid = self
.index_mapping
.get(wtxn, rhs)?
.ok_or_else(|| Error::IndexNotFound(rhs.to_string()))?;
self.index_mapping.put(wtxn, lhs, &rhs_uuid)?;
self.index_mapping.put(wtxn, rhs, &lhs_uuid)?;
Ok(())
}
pub fn index_exists(&self, rtxn: &RoTxn, name: &str) -> Result<bool> {
Ok(self.index_mapping.get(rtxn, name)?.is_some())
}
pub fn indexer_config(&self) -> &IndexerConfig {
&self.indexer_config
}
}

1291
index-scheduler/src/lib.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,226 @@
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, RoTxn};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Details, Task};
use roaring::RoaringBitmap;
use crate::index_mapper::IndexMapper;
use crate::{IndexScheduler, Kind, Status, BEI128};
pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
let IndexScheduler {
autobatching_enabled,
must_stop_processing: _,
processing_tasks,
file_store,
env,
all_tasks,
status,
kind,
index_tasks,
enqueued_at,
started_at,
finished_at,
index_mapper,
wake_up: _,
dumps_path: _,
test_breakpoint_sdr: _,
} = scheduler;
let rtxn = env.read_txn().unwrap();
let mut snap = String::new();
let processing_tasks = processing_tasks.read().unwrap().processing.clone();
snap.push_str(&format!("### Autobatching Enabled = {autobatching_enabled}\n"));
snap.push_str("### Processing Tasks:\n");
snap.push_str(&snapshot_bitmap(&processing_tasks));
snap.push_str("\n----------------------------------------------------------------------\n");
snap.push_str("### All Tasks:\n");
snap.push_str(&snapshot_all_tasks(&rtxn, *all_tasks));
snap.push_str("----------------------------------------------------------------------\n");
snap.push_str("### Status:\n");
snap.push_str(&snapshot_status(&rtxn, *status));
snap.push_str("----------------------------------------------------------------------\n");
snap.push_str("### Kind:\n");
snap.push_str(&snapshot_kind(&rtxn, *kind));
snap.push_str("----------------------------------------------------------------------\n");
snap.push_str("### Index Tasks:\n");
snap.push_str(&snapshot_index_tasks(&rtxn, *index_tasks));
snap.push_str("----------------------------------------------------------------------\n");
snap.push_str("### Index Mapper:\n");
snap.push_str(&snapshot_index_mapper(&rtxn, index_mapper));
snap.push_str("\n----------------------------------------------------------------------\n");
snap.push_str("### Enqueued At:\n");
snap.push_str(&snapshot_date_db(&rtxn, *enqueued_at));
snap.push_str("----------------------------------------------------------------------\n");
snap.push_str("### Started At:\n");
snap.push_str(&snapshot_date_db(&rtxn, *started_at));
snap.push_str("----------------------------------------------------------------------\n");
snap.push_str("### Finished At:\n");
snap.push_str(&snapshot_date_db(&rtxn, *finished_at));
snap.push_str("----------------------------------------------------------------------\n");
snap.push_str("### File Store:\n");
snap.push_str(&snapshot_file_store(file_store));
snap.push_str("\n----------------------------------------------------------------------\n");
snap
}
fn snapshot_file_store(file_store: &file_store::FileStore) -> String {
let mut snap = String::new();
for uuid in file_store.__all_uuids() {
snap.push_str(&format!("{uuid}\n"));
}
snap
}
fn snapshot_bitmap(r: &RoaringBitmap) -> String {
let mut snap = String::new();
snap.push('[');
for x in r {
snap.push_str(&format!("{x},"));
}
snap.push(']');
snap
}
fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU32>, SerdeJson<Task>>) -> String {
let mut snap = String::new();
let mut iter = db.iter(rtxn).unwrap();
while let Some(next) = iter.next() {
let (task_id, task) = next.unwrap();
snap.push_str(&format!("{task_id} {}\n", snapshot_task(&task)));
}
snap
}
fn snapshot_date_db(
rtxn: &RoTxn,
db: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
) -> String {
let mut snap = String::new();
let mut iter = db.iter(rtxn).unwrap();
while let Some(next) = iter.next() {
let (_timestamp, task_ids) = next.unwrap();
snap.push_str(&format!("[timestamp] {}\n", snapshot_bitmap(&task_ids)));
}
snap
}
fn snapshot_task(task: &Task) -> String {
let mut snap = String::new();
let Task {
uid,
enqueued_at: _,
started_at: _,
finished_at: _,
error,
canceled_by: _,
details,
status,
kind,
} = task;
snap.push('{');
snap.push_str(&format!("uid: {uid}, "));
snap.push_str(&format!("status: {status}, "));
if let Some(error) = error {
snap.push_str(&format!("error: {error:?}, "));
}
if let Some(details) = details {
snap.push_str(&format!("details: {}, ", &snaphsot_details(details)));
}
snap.push_str(&format!("kind: {kind:?}"));
snap.push('}');
snap
}
fn snaphsot_details(d: &Details) -> String {
match d {
Details::DocumentAddition {
received_documents,
indexed_documents,
} => {
format!("{{ received_documents: {received_documents}, indexed_documents: {indexed_documents:?} }}")
}
Details::Settings { settings } => {
format!("{{ settings: {settings:?} }}")
}
Details::IndexInfo { primary_key } => {
format!("{{ primary_key: {primary_key:?} }}")
}
Details::DocumentDeletion {
received_document_ids,
deleted_documents,
} => format!("{{ received_document_ids: {received_document_ids}, deleted_documents: {deleted_documents:?} }}"),
Details::ClearAll { deleted_documents } => {
format!("{{ deleted_documents: {deleted_documents:?} }}")
},
Details::TaskCancelation {
matched_tasks,
canceled_tasks,
original_query,
} => {
format!("{{ matched_tasks: {matched_tasks:?}, canceled_tasks: {canceled_tasks:?}, original_query: {original_query:?} }}")
}
Details::TaskDeletion {
matched_tasks,
deleted_tasks,
original_query,
} => {
format!("{{ matched_tasks: {matched_tasks:?}, deleted_tasks: {deleted_tasks:?}, original_query: {original_query:?} }}")
},
Details::Dump { dump_uid } => {
format!("{{ dump_uid: {dump_uid:?} }}")
},
Details::IndexSwap { swaps } => {
format!("{{ indexes: {swaps:?} }}")
}
}
}
fn snapshot_status(rtxn: &RoTxn, db: Database<SerdeBincode<Status>, RoaringBitmapCodec>) -> String {
let mut snap = String::new();
let mut iter = db.iter(rtxn).unwrap();
while let Some(next) = iter.next() {
let (status, task_ids) = next.unwrap();
snap.push_str(&format!("{status} {}\n", snapshot_bitmap(&task_ids)));
}
snap
}
fn snapshot_kind(rtxn: &RoTxn, db: Database<SerdeBincode<Kind>, RoaringBitmapCodec>) -> String {
let mut snap = String::new();
let mut iter = db.iter(rtxn).unwrap();
while let Some(next) = iter.next() {
let (kind, task_ids) = next.unwrap();
let kind = serde_json::to_string(&kind).unwrap();
snap.push_str(&format!("{kind} {}\n", snapshot_bitmap(&task_ids)));
}
snap
}
fn snapshot_index_tasks(rtxn: &RoTxn, db: Database<Str, RoaringBitmapCodec>) -> String {
let mut snap = String::new();
let mut iter = db.iter(rtxn).unwrap();
while let Some(next) = iter.next() {
let (index, task_ids) = next.unwrap();
snap.push_str(&format!("{index} {}\n", snapshot_bitmap(&task_ids)));
}
snap
}
fn snapshot_index_mapper(rtxn: &RoTxn, mapper: &IndexMapper) -> String {
let names = mapper.indexes(rtxn).unwrap().into_iter().map(|(n, _)| n).collect::<Vec<_>>();
format!("{names:?}")
}

View File

@ -0,0 +1,34 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentImport { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,]
----------------------------------------------------------------------
### Kind:
"documentImport" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@ -0,0 +1,34 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[0,]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentImport { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,]
----------------------------------------------------------------------
### Kind:
"documentImport" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@ -0,0 +1,36 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentImport { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,]
----------------------------------------------------------------------
### Kind:
"documentImport" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
["doggos"]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,40 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentImport { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: enqueued, kind: IndexDeletion { index_uid: "doggos" }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,2,]
----------------------------------------------------------------------
### Kind:
"documentImport" [1,]
"indexCreation" [0,]
"indexDeletion" [2,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,2,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@ -0,0 +1,44 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }}
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentImport { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: succeeded, details: { deleted_documents: Some(0) }, kind: IndexDeletion { index_uid: "doggos" }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,1,2,]
----------------------------------------------------------------------
### Kind:
"documentImport" [1,]
"indexCreation" [0,]
"indexDeletion" [2,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,2,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [2,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [2,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,37 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentImport { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: enqueued, kind: IndexDeletion { index_uid: "doggos" }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,]
----------------------------------------------------------------------
### Kind:
"documentImport" [0,]
"indexDeletion" [1,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@ -0,0 +1,39 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentImport { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { deleted_documents: Some(0) }, kind: IndexDeletion { index_uid: "doggos" }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,1,]
----------------------------------------------------------------------
### Kind:
"documentImport" [0,]
"indexDeletion" [1,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [1,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,39 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[0,]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_b", primary_key: Some("id") }}
2 {uid: 2, status: enqueued, kind: IndexDeletion { index_uid: "index_a" }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,2,]
----------------------------------------------------------------------
### Kind:
"indexCreation" [0,1,]
"indexDeletion" [2,]
----------------------------------------------------------------------
### Index Tasks:
index_a [0,2,]
index_b [1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,41 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
1 {uid: 1, status: enqueued, details: { received_documents: 12, indexed_documents: None }, kind: DocumentImport { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 12, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_documents: 50, indexed_documents: None }, kind: DocumentImport { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 50, allow_index_creation: true }}
3 {uid: 3, status: enqueued, details: { received_documents: 5000, indexed_documents: None }, kind: DocumentImport { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 5000, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,2,3,]
----------------------------------------------------------------------
### Kind:
"documentImport" [1,2,3,]
"indexCreation" [0,]
----------------------------------------------------------------------
### Index Tasks:
catto [0,1,2,]
doggo [3,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,56 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
1 {uid: 1, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }}
2 {uid: 2, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
3 {uid: 3, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }}
4 {uid: 4, status: succeeded, details: { indexes: [("a", "b"), ("c", "d")] }, kind: IndexSwap { swaps: [("a", "b"), ("c", "d")] }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,1,2,3,4,]
----------------------------------------------------------------------
### Kind:
"indexCreation" [0,1,2,3,]
"indexSwap" [4,]
----------------------------------------------------------------------
### Index Tasks:
a [1,4,]
b [0,4,]
c [3,4,]
d [2,4,]
----------------------------------------------------------------------
### Index Mapper:
["a", "b", "c", "d"]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,51 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }}
1 {uid: 1, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
2 {uid: 2, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }}
3 {uid: 3, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,1,2,3,]
----------------------------------------------------------------------
### Kind:
"indexCreation" [0,1,2,3,]
----------------------------------------------------------------------
### Index Tasks:
a [0,]
b [1,]
c [2,]
d [3,]
----------------------------------------------------------------------
### Index Mapper:
["a", "b", "c", "d"]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,60 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
1 {uid: 1, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }}
2 {uid: 2, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
3 {uid: 3, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }}
4 {uid: 4, status: succeeded, details: { indexes: [("a", "b"), ("c", "d")] }, kind: IndexSwap { swaps: [("c", "b"), ("a", "d")] }}
5 {uid: 5, status: succeeded, details: { indexes: [("a", "c")] }, kind: IndexSwap { swaps: [("a", "c")] }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,1,2,3,4,5,]
----------------------------------------------------------------------
### Kind:
"indexCreation" [0,1,2,3,]
"indexSwap" [4,5,]
----------------------------------------------------------------------
### Index Tasks:
a [3,4,5,]
b [0,4,]
c [1,4,5,]
d [2,4,]
----------------------------------------------------------------------
### Index Mapper:
["a", "b", "c", "d"]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
[timestamp] [5,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
[timestamp] [5,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
[timestamp] [5,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,38 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentImport { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentImport { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,]
----------------------------------------------------------------------
### Kind:
"documentImport" [0,1,]
----------------------------------------------------------------------
### Index Tasks:
catto [0,]
doggo [1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
00000000-0000-0000-0000-000000000001
----------------------------------------------------------------------

View File

@ -0,0 +1,40 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentImport { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentImport { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [1,]
succeeded [0,]
----------------------------------------------------------------------
### Kind:
"documentImport" [0,1,]
----------------------------------------------------------------------
### Index Tasks:
catto [0,]
doggo [1,]
----------------------------------------------------------------------
### Index Mapper:
["catto"]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000001
----------------------------------------------------------------------

View File

@ -0,0 +1,44 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentImport { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }}
3 {uid: 3, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(0), original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }}
----------------------------------------------------------------------
### Status:
enqueued [1,]
succeeded [2,3,]
----------------------------------------------------------------------
### Kind:
"documentImport" [1,]
"taskDeletion" [2,3,]
----------------------------------------------------------------------
### Index Tasks:
doggo [1,]
----------------------------------------------------------------------
### Index Mapper:
["catto"]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### Started At:
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000001
----------------------------------------------------------------------

View File

@ -0,0 +1,38 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentImport { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentImport { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,]
----------------------------------------------------------------------
### Kind:
"documentImport" [0,1,]
----------------------------------------------------------------------
### Index Tasks:
catto [0,]
doggo [1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
00000000-0000-0000-0000-000000000001
----------------------------------------------------------------------

View File

@ -0,0 +1,40 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentImport { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentImport { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [1,]
succeeded [0,]
----------------------------------------------------------------------
### Kind:
"documentImport" [0,1,]
----------------------------------------------------------------------
### Index Tasks:
catto [0,]
doggo [1,]
----------------------------------------------------------------------
### Index Mapper:
["catto"]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000001
----------------------------------------------------------------------

View File

@ -0,0 +1,40 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentImport { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }}
----------------------------------------------------------------------
### Status:
enqueued [1,]
succeeded [2,]
----------------------------------------------------------------------
### Kind:
"documentImport" [1,]
"taskDeletion" [2,]
----------------------------------------------------------------------
### Index Tasks:
doggo [1,]
----------------------------------------------------------------------
### Index Mapper:
["catto"]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [1,]
[timestamp] [2,]
----------------------------------------------------------------------
### Started At:
[timestamp] [2,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [2,]
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000001
----------------------------------------------------------------------

View File

@ -0,0 +1,39 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
1 {uid: 1, status: enqueued, details: { received_documents: 12, indexed_documents: None }, kind: DocumentImport { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 12, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_documents: 5000, indexed_documents: None }, kind: DocumentImport { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 5000, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,2,]
----------------------------------------------------------------------
### Kind:
"documentImport" [1,2,]
"indexCreation" [0,]
----------------------------------------------------------------------
### Index Tasks:
catto [0,1,]
doggo [2,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,45 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
1 {uid: 1, status: enqueued, details: { received_documents: 12, indexed_documents: None }, kind: DocumentImport { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 12, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_documents: 5000, indexed_documents: None }, kind: DocumentImport { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 5000, allow_index_creation: true }}
3 {uid: 3, status: succeeded, details: { matched_tasks: 2, deleted_tasks: Some(0), original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0, 1]> }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,2,]
succeeded [3,]
----------------------------------------------------------------------
### Kind:
"documentImport" [1,2,]
"indexCreation" [0,]
"taskDeletion" [3,]
----------------------------------------------------------------------
### Index Tasks:
catto [0,1,]
doggo [2,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### Started At:
[timestamp] [3,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [3,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,42 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
1 {uid: 1, status: enqueued, details: { received_documents: 12, indexed_documents: None }, kind: DocumentImport { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 12, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_documents: 5000, indexed_documents: None }, kind: DocumentImport { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 5000, allow_index_creation: true }}
3 {uid: 3, status: enqueued, details: { matched_tasks: 2, deleted_tasks: None, original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0, 1]> }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,2,3,]
----------------------------------------------------------------------
### Kind:
"documentImport" [1,2,]
"indexCreation" [0,]
"taskDeletion" [3,]
----------------------------------------------------------------------
### Index Tasks:
catto [0,1,]
doggo [2,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,42 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[3,]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
1 {uid: 1, status: enqueued, details: { received_documents: 12, indexed_documents: None }, kind: DocumentImport { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 12, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_documents: 5000, indexed_documents: None }, kind: DocumentImport { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 5000, allow_index_creation: true }}
3 {uid: 3, status: enqueued, details: { matched_tasks: 2, deleted_tasks: None, original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0, 1]> }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,2,3,]
----------------------------------------------------------------------
### Kind:
"documentImport" [1,2,]
"indexCreation" [0,]
"taskDeletion" [3,]
----------------------------------------------------------------------
### Index Tasks:
catto [0,1,]
doggo [2,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,267 @@
//! Utility functions on the DBs. Mainly getter and setters.
use std::ops::Bound;
use meilisearch_types::heed::types::{DecodeIgnore, OwnedType};
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, KindWithContent, Status};
use roaring::{MultiOps, RoaringBitmap};
use time::OffsetDateTime;
use crate::{Error, IndexScheduler, Result, Task, TaskId, BEI128};
impl IndexScheduler {
pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result<RoaringBitmap> {
enum_iterator::all().map(|s| self.get_status(&rtxn, s)).union()
}
pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result<Option<TaskId>> {
Ok(self.all_tasks.remap_data_type::<DecodeIgnore>().last(rtxn)?.map(|(k, _)| k.get() + 1))
}
pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result<TaskId> {
Ok(self.last_task_id(rtxn)?.unwrap_or_default())
}
pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result<Option<Task>> {
Ok(self.all_tasks.get(rtxn, &BEU32::new(task_id))?)
}
/// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a
/// `CorruptedTaskQueue` error will be throwed.
pub(crate) fn get_existing_tasks(
&self,
rtxn: &RoTxn,
tasks: impl IntoIterator<Item = TaskId>,
) -> Result<Vec<Task>> {
tasks
.into_iter()
.map(|task_id| {
self.get_task(rtxn, task_id).and_then(|task| task.ok_or(Error::CorruptedTaskQueue))
})
.collect::<Result<_>>()
}
pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> {
let old_task = self.get_task(wtxn, task.uid)?.ok_or(Error::CorruptedTaskQueue)?;
debug_assert_eq!(old_task.uid, task.uid);
if old_task == *task {
return Ok(());
}
if old_task.status != task.status {
self.update_status(wtxn, old_task.status, |bitmap| {
bitmap.remove(task.uid);
})?;
self.update_status(wtxn, task.status, |bitmap| {
bitmap.insert(task.uid);
})?;
}
if old_task.kind.as_kind() != task.kind.as_kind() {
self.update_kind(wtxn, old_task.kind.as_kind(), |bitmap| {
bitmap.remove(task.uid);
})?;
self.update_kind(wtxn, task.kind.as_kind(), |bitmap| {
bitmap.insert(task.uid);
})?;
}
assert_eq!(
old_task.enqueued_at, task.enqueued_at,
"Cannot update a task's enqueued_at time"
);
if old_task.started_at != task.started_at {
assert!(old_task.started_at.is_none(), "Cannot update a task's started_at time");
if let Some(started_at) = task.started_at {
insert_task_datetime(wtxn, self.started_at, started_at, task.uid)?;
}
}
if old_task.finished_at != task.finished_at {
assert!(old_task.finished_at.is_none(), "Cannot update a task's finished_at time");
if let Some(finished_at) = task.finished_at {
insert_task_datetime(wtxn, self.finished_at, finished_at, task.uid)?;
}
}
self.all_tasks.put(wtxn, &BEU32::new(task.uid), task)?;
Ok(())
}
/// Returns the whole set of tasks that belongs to this index.
pub(crate) fn index_tasks(&self, rtxn: &RoTxn, index: &str) -> Result<RoaringBitmap> {
Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default())
}
pub(crate) fn update_index(
&self,
wtxn: &mut RwTxn,
index: &str,
f: impl Fn(&mut RoaringBitmap),
) -> Result<()> {
let mut tasks = self.index_tasks(wtxn, index)?;
f(&mut tasks);
if tasks.is_empty() {
self.index_tasks.delete(wtxn, index)?;
} else {
self.index_tasks.put(wtxn, index, &tasks)?;
}
Ok(())
}
pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result<RoaringBitmap> {
Ok(self.status.get(rtxn, &status)?.unwrap_or_default())
}
pub(crate) fn put_status(
&self,
wtxn: &mut RwTxn,
status: Status,
bitmap: &RoaringBitmap,
) -> Result<()> {
Ok(self.status.put(wtxn, &status, bitmap)?)
}
pub(crate) fn update_status(
&self,
wtxn: &mut RwTxn,
status: Status,
f: impl Fn(&mut RoaringBitmap),
) -> Result<()> {
let mut tasks = self.get_status(wtxn, status)?;
f(&mut tasks);
self.put_status(wtxn, status, &tasks)?;
Ok(())
}
pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result<RoaringBitmap> {
Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default())
}
pub(crate) fn put_kind(
&self,
wtxn: &mut RwTxn,
kind: Kind,
bitmap: &RoaringBitmap,
) -> Result<()> {
Ok(self.kind.put(wtxn, &kind, bitmap)?)
}
pub(crate) fn update_kind(
&self,
wtxn: &mut RwTxn,
kind: Kind,
f: impl Fn(&mut RoaringBitmap),
) -> Result<()> {
let mut tasks = self.get_kind(wtxn, kind)?;
f(&mut tasks);
self.put_kind(wtxn, kind, &tasks)?;
Ok(())
}
}
pub(crate) fn insert_task_datetime(
wtxn: &mut RwTxn,
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
time: OffsetDateTime,
task_id: TaskId,
) -> Result<()> {
let timestamp = BEI128::new(time.unix_timestamp_nanos());
let mut task_ids = database.get(&wtxn, &timestamp)?.unwrap_or_default();
task_ids.insert(task_id);
database.put(wtxn, &timestamp, &RoaringBitmap::from_iter([task_id]))?;
Ok(())
}
pub(crate) fn remove_task_datetime(
wtxn: &mut RwTxn,
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
time: OffsetDateTime,
task_id: TaskId,
) -> Result<()> {
let timestamp = BEI128::new(time.unix_timestamp_nanos());
if let Some(mut existing) = database.get(&wtxn, &timestamp)? {
existing.remove(task_id);
if existing.is_empty() {
database.delete(wtxn, &timestamp)?;
} else {
database.put(wtxn, &timestamp, &RoaringBitmap::from_iter([task_id]))?;
}
}
Ok(())
}
pub(crate) fn keep_tasks_within_datetimes(
rtxn: &RoTxn,
tasks: &mut RoaringBitmap,
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
after: Option<OffsetDateTime>,
before: Option<OffsetDateTime>,
) -> Result<()> {
let (start, end) = match (&after, &before) {
(None, None) => return Ok(()),
(None, Some(before)) => (Bound::Unbounded, Bound::Excluded(*before)),
(Some(after), None) => (Bound::Excluded(*after), Bound::Unbounded),
(Some(after), Some(before)) => (Bound::Excluded(*after), Bound::Excluded(*before)),
};
let mut collected_task_ids = RoaringBitmap::new();
let start = map_bound(start, |b| BEI128::new(b.unix_timestamp_nanos()));
let end = map_bound(end, |b| BEI128::new(b.unix_timestamp_nanos()));
let iter = database.range(&rtxn, &(start, end))?;
for r in iter {
let (_timestamp, task_ids) = r?;
collected_task_ids |= task_ids;
}
*tasks &= collected_task_ids;
Ok(())
}
// TODO: remove when Bound::map ( https://github.com/rust-lang/rust/issues/86026 ) is available on stable
fn map_bound<T, U>(bound: Bound<T>, map: impl FnOnce(T) -> U) -> Bound<U> {
match bound {
Bound::Included(x) => Bound::Included(map(x)),
Bound::Excluded(x) => Bound::Excluded(map(x)),
Bound::Unbounded => Bound::Unbounded,
}
}
pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
use KindWithContent as K;
let mut index_uids = vec![];
match &mut task.kind {
K::DocumentImport { index_uid, .. } => index_uids.push(index_uid),
K::DocumentDeletion { index_uid, .. } => index_uids.push(index_uid),
K::DocumentClear { index_uid } => index_uids.push(index_uid),
K::Settings { index_uid, .. } => index_uids.push(index_uid),
K::IndexDeletion { index_uid } => index_uids.push(index_uid),
K::IndexCreation { index_uid, .. } => index_uids.push(index_uid),
K::IndexUpdate { index_uid, .. } => index_uids.push(index_uid),
K::IndexSwap { swaps } => {
for (lhs, rhs) in swaps.iter_mut() {
if lhs == &swap.0 || lhs == &swap.1 {
index_uids.push(lhs);
}
if rhs == &swap.0 || rhs == &swap.1 {
index_uids.push(rhs);
}
}
}
K::TaskCancelation { .. } | K::TaskDeletion { .. } | K::DumpExport { .. } | K::Snapshot => {
()
}
};
for index_uid in index_uids {
if index_uid == &swap.0 {
*index_uid = swap.1.to_owned();
} else if index_uid == &swap.1 {
*index_uid = swap.0.to_owned();
}
}
}

9
meili-snap/Cargo.toml Normal file
View File

@ -0,0 +1,9 @@
[package]
name = "meili-snap"
version = "0.1.0"
edition = "2021"
[dependencies]
insta = { version = "1.19.1", features = ["json", "redactions"] }
md5 = "0.7.0"
once_cell = "1.15"

223
meili-snap/src/lib.rs Normal file
View File

@ -0,0 +1,223 @@
use std::borrow::Cow;
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::Mutex;
pub use insta;
use once_cell::sync::Lazy;
static SNAPSHOT_NAMES: Lazy<Mutex<HashMap<PathBuf, usize>>> = Lazy::new(|| Mutex::default());
/// Return the md5 hash of the given string
pub fn hash_snapshot(snap: &str) -> String {
let hash = md5::compute(snap.as_bytes());
let hash_str = format!("{hash:x}");
hash_str
}
#[track_caller]
pub fn default_snapshot_settings_for_test(name: Option<&str>) -> (insta::Settings, Cow<'_, str>) {
let mut settings = insta::Settings::clone_current();
settings.set_prepend_module_to_snapshot(false);
let path = Path::new(std::panic::Location::caller().file());
let filename = path.file_name().unwrap().to_str().unwrap();
settings.set_omit_expression(true);
let test_name = std::thread::current().name().unwrap().rsplit("::").next().unwrap().to_owned();
let path = Path::new("snapshots").join(filename).join(&test_name).to_owned();
settings.set_snapshot_path(path.clone());
let snap_name = if let Some(name) = name {
Cow::Borrowed(name)
} else {
let mut snapshot_names = SNAPSHOT_NAMES.lock().unwrap();
let counter = snapshot_names.entry(path).or_default();
*counter += 1;
Cow::Owned(format!("{counter}"))
};
(settings, snap_name)
}
/**
Create a hashed snapshot test.
## Arguments:
1. The content of the snapshot. It is an expression whose result implements the `fmt::Display` trait.
2. `name: <name>`: the identifier for the snapshot test (optional)
3. `@""` to write the hash of the snapshot inline
## Behaviour
The content of the snapshot will be saved both in full and as a hash. The full snapshot will
be saved with the name `<name>.full.snap` but will not be saved to the git repository. The hashed
snapshot will be saved inline. If `<name>` is not specified, then a global counter is used to give an
identifier to the snapshot.
Running `cargo test` will check whether the old snapshot is identical to the
current one. If they are equal, the test passes. Otherwise, the test fails.
Use the command line `cargo insta` to approve or reject new snapshots.
## Example
```ignore
// The full snapshot is saved under 1.full.snap and contains `10`
snapshot_hash!(10, @"d3d9446802a44259755d38e6d163e820");
// The full snapshot is saved under snap_name.full.snap and contains `hello world`
snapshot_hash!("hello world", name: "snap_name", @"5f93f983524def3dca464469d2cf9f3e");
```
*/
#[macro_export]
macro_rules! snapshot_hash {
($value:expr, @$inline:literal) => {
let (settings, snap_name) = $crate::default_snapshot_settings_for_test(None);
settings.bind(|| {
let snap = format!("{}", $value);
let hash_snap = $crate::hash_snapshot(&snap);
meili_snap::insta::assert_snapshot!(hash_snap, @$inline);
meili_snap::insta::assert_snapshot!(format!("{}.full", snap_name), snap);
});
};
($value:expr, name: $name:expr, @$inline:literal) => {
let snap_name = format!("{}", $name);
let (settings, snap_name) = $crate::default_snapshot_settings_for_test(Some(&snap_name));
settings.bind(|| {
let snap = format!("{}", $value);
let hash_snap = $crate::hash_snapshot(&snap);
meili_snap::insta::assert_snapshot!(hash_snap, @$inline);
meili_snap::insta::assert_snapshot!(format!("{}.full", snap_name), snap);
});
};
}
/**
Create a hashed snapshot test.
## Arguments:
1. The content of the snapshot. It is an expression whose result implements the `fmt::Display` trait.
2. Optionally one of:
1. `name: <name>`: the identifier for the snapshot test
2. `@""` to write the hash of the snapshot inline
## Behaviour
The content of the snapshot will be saved in full with the given name
or using a global counter to give it an identifier.
Running `cargo test` will check whether the old snapshot is identical to the
current one. If they are equal, the test passes. Otherwise, the test fails.
Use the command line `cargo insta` to approve or reject new snapshots.
## Example
```ignore
// The full snapshot is saved under 1.snap and contains `10`
snapshot!(10);
// The full snapshot is saved under snap_name.snap and contains `10`
snapshot!("hello world", name: "snap_name");
// The full snapshot is saved inline
snapshot!(format!("{:?}", vec![1, 2]), @"[1, 2]");
```
*/
#[macro_export]
macro_rules! snapshot {
($value:expr, name: $name:expr) => {
let snap_name = format!("{}", $name);
let (settings, snap_name) = $crate::default_snapshot_settings_for_test(Some(&snap_name));
settings.bind(|| {
let snap = format!("{}", $value);
meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
});
};
($value:expr, @$inline:literal) => {
// Note that the name given as argument does not matter since it is only an inline snapshot
// We don't pass None because otherwise `meili-snap` will try to assign it a unique identifier
let (settings, _) = $crate::default_snapshot_settings_for_test(Some("_dummy_argument"));
settings.bind(|| {
let snap = format!("{}", $value);
meili_snap::insta::assert_snapshot!(snap, @$inline);
});
};
($value:expr) => {
let (settings, snap_name) = $crate::default_snapshot_settings_for_test(None);
settings.bind(|| {
let snap = format!("{}", $value);
meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
});
};
}
#[cfg(test)]
mod tests {
use crate as meili_snap;
#[test]
fn snap() {
snapshot_hash!(10, @"d3d9446802a44259755d38e6d163e820");
snapshot_hash!(20, @"98f13708210194c475687be6106a3b84");
snapshot_hash!(30, @"34173cb38f07f89ddbebc2ac9128303f");
snapshot!(40, @"40");
snapshot!(50, @"50");
snapshot!(60, @"60");
snapshot!(70);
snapshot!(80);
snapshot!(90);
snapshot!(100, name: "snap_name_1");
snapshot_hash!(110, name: "snap_name_2", @"5f93f983524def3dca464469d2cf9f3e");
snapshot!(120);
snapshot!(format!("{:?}", vec![1, 2]), @"[1, 2]");
}
// Currently the name of this module is not part of the snapshot path
// It does not bother me, but maybe it is worth changing later on.
mod snap {
use crate as meili_snap;
#[test]
fn some_test() {
snapshot_hash!(10, @"d3d9446802a44259755d38e6d163e820");
snapshot_hash!(20, @"98f13708210194c475687be6106a3b84");
snapshot_hash!(30, @"34173cb38f07f89ddbebc2ac9128303f");
snapshot!(40, @"40");
snapshot!(50, @"50");
snapshot!(60, @"60");
snapshot!(70);
snapshot!(80);
snapshot!(90);
snapshot!(100, name: "snap_name_1");
snapshot_hash!(110, name: "snap_name_2", @"5f93f983524def3dca464469d2cf9f3e");
snapshot!(120);
snapshot_hash!("", name: "", @"d41d8cd98f00b204e9800998ecf8427e");
}
}
}
/// Create a string from the value by serializing it as Json, optionally
/// redacting some parts of it.
///
/// The second argument to the macro can be an object expression for redaction.
/// It's in the form { selector => replacement }. For more information about redactions
/// refer to the redactions feature in the `insta` guide.
#[macro_export]
macro_rules! json_string {
($value:expr, {$($k:expr => $v:expr),*$(,)?}) => {
{
let (_, snap) = meili_snap::insta::_prepare_snapshot_for_redaction!($value, {$($k => $v),*}, Json, File);
snap
}
};
($value:expr) => {{
let value = meili_snap::insta::_macro_support::serialize_value(
&$value,
meili_snap::insta::_macro_support::SerializationFormat::Json,
meili_snap::insta::_macro_support::SnapshotLocation::File
);
value
}};
}

View File

@ -0,0 +1,4 @@
---
source: meili-snap/src/lib.rs
---
70

View File

@ -0,0 +1,4 @@
---
source: meili-snap/src/lib.rs
---
80

View File

@ -0,0 +1,4 @@
---
source: meili-snap/src/lib.rs
---
90

View File

@ -0,0 +1,4 @@
---
source: meili-snap/src/lib.rs
---
120

View File

@ -0,0 +1,4 @@
---
source: meili-snap/src/lib.rs
---
100

View File

@ -0,0 +1,4 @@
---
source: meili-snap/src/lib.rs
---
70

View File

@ -0,0 +1,4 @@
---
source: meili-snap/src/lib.rs
---
80

View File

@ -0,0 +1,4 @@
---
source: meili-snap/src/lib.rs
---
90

View File

@ -0,0 +1,4 @@
---
source: meili-snap/src/lib.rs
---
120

View File

@ -0,0 +1,4 @@
---
source: meili-snap/src/lib.rs
---
100

View File

@ -4,11 +4,11 @@ version = "0.29.1"
edition = "2021"
[dependencies]
enum-iterator = "1.1.2"
enum-iterator = "1.1.3"
hmac = "0.12.1"
meilisearch-types = { path = "../meilisearch-types" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4", default-features = false }
rand = "0.8.5"
roaring = { version = "0.10.0", features = ["serde"] }
serde = { version = "1.0.145", features = ["derive"] }
serde_json = { version = "1.0.85", features = ["preserve_order"] }
sha2 = "0.10.6"

View File

@ -1,135 +0,0 @@
use serde::{Deserialize, Serialize};
use std::hash::Hash;
#[derive(
enum_iterator::Sequence, Copy, Clone, Serialize, Deserialize, Debug, Eq, PartialEq, Hash,
)]
#[repr(u8)]
pub enum Action {
#[serde(rename = "*")]
All = 0,
#[serde(rename = "search")]
Search,
#[serde(rename = "documents.*")]
DocumentsAll,
#[serde(rename = "documents.add")]
DocumentsAdd,
#[serde(rename = "documents.get")]
DocumentsGet,
#[serde(rename = "documents.delete")]
DocumentsDelete,
#[serde(rename = "indexes.*")]
IndexesAll,
#[serde(rename = "indexes.create")]
IndexesAdd,
#[serde(rename = "indexes.get")]
IndexesGet,
#[serde(rename = "indexes.update")]
IndexesUpdate,
#[serde(rename = "indexes.delete")]
IndexesDelete,
#[serde(rename = "tasks.*")]
TasksAll,
#[serde(rename = "tasks.get")]
TasksGet,
#[serde(rename = "settings.*")]
SettingsAll,
#[serde(rename = "settings.get")]
SettingsGet,
#[serde(rename = "settings.update")]
SettingsUpdate,
#[serde(rename = "stats.*")]
StatsAll,
#[serde(rename = "stats.get")]
StatsGet,
#[serde(rename = "metrics.*")]
MetricsAll,
#[serde(rename = "metrics.get")]
MetricsGet,
#[serde(rename = "dumps.*")]
DumpsAll,
#[serde(rename = "dumps.create")]
DumpsCreate,
#[serde(rename = "version")]
Version,
#[serde(rename = "keys.create")]
KeysAdd,
#[serde(rename = "keys.get")]
KeysGet,
#[serde(rename = "keys.update")]
KeysUpdate,
#[serde(rename = "keys.delete")]
KeysDelete,
}
impl Action {
pub const fn from_repr(repr: u8) -> Option<Self> {
use actions::*;
match repr {
ALL => Some(Self::All),
SEARCH => Some(Self::Search),
DOCUMENTS_ALL => Some(Self::DocumentsAll),
DOCUMENTS_ADD => Some(Self::DocumentsAdd),
DOCUMENTS_GET => Some(Self::DocumentsGet),
DOCUMENTS_DELETE => Some(Self::DocumentsDelete),
INDEXES_ALL => Some(Self::IndexesAll),
INDEXES_CREATE => Some(Self::IndexesAdd),
INDEXES_GET => Some(Self::IndexesGet),
INDEXES_UPDATE => Some(Self::IndexesUpdate),
INDEXES_DELETE => Some(Self::IndexesDelete),
TASKS_ALL => Some(Self::TasksAll),
TASKS_GET => Some(Self::TasksGet),
SETTINGS_ALL => Some(Self::SettingsAll),
SETTINGS_GET => Some(Self::SettingsGet),
SETTINGS_UPDATE => Some(Self::SettingsUpdate),
STATS_ALL => Some(Self::StatsAll),
STATS_GET => Some(Self::StatsGet),
METRICS_ALL => Some(Self::MetricsAll),
METRICS_GET => Some(Self::MetricsGet),
DUMPS_ALL => Some(Self::DumpsAll),
DUMPS_CREATE => Some(Self::DumpsCreate),
VERSION => Some(Self::Version),
KEYS_CREATE => Some(Self::KeysAdd),
KEYS_GET => Some(Self::KeysGet),
KEYS_UPDATE => Some(Self::KeysUpdate),
KEYS_DELETE => Some(Self::KeysDelete),
_otherwise => None,
}
}
pub const fn repr(&self) -> u8 {
*self as u8
}
}
pub mod actions {
use super::Action::*;
pub(crate) const ALL: u8 = All.repr();
pub const SEARCH: u8 = Search.repr();
pub const DOCUMENTS_ALL: u8 = DocumentsAll.repr();
pub const DOCUMENTS_ADD: u8 = DocumentsAdd.repr();
pub const DOCUMENTS_GET: u8 = DocumentsGet.repr();
pub const DOCUMENTS_DELETE: u8 = DocumentsDelete.repr();
pub const INDEXES_ALL: u8 = IndexesAll.repr();
pub const INDEXES_CREATE: u8 = IndexesAdd.repr();
pub const INDEXES_GET: u8 = IndexesGet.repr();
pub const INDEXES_UPDATE: u8 = IndexesUpdate.repr();
pub const INDEXES_DELETE: u8 = IndexesDelete.repr();
pub const TASKS_ALL: u8 = TasksAll.repr();
pub const TASKS_GET: u8 = TasksGet.repr();
pub const SETTINGS_ALL: u8 = SettingsAll.repr();
pub const SETTINGS_GET: u8 = SettingsGet.repr();
pub const SETTINGS_UPDATE: u8 = SettingsUpdate.repr();
pub const STATS_ALL: u8 = StatsAll.repr();
pub const STATS_GET: u8 = StatsGet.repr();
pub const METRICS_ALL: u8 = MetricsAll.repr();
pub const METRICS_GET: u8 = MetricsGet.repr();
pub const DUMPS_ALL: u8 = DumpsAll.repr();
pub const DUMPS_CREATE: u8 = DumpsCreate.repr();
pub const VERSION: u8 = Version.repr();
pub const KEYS_CREATE: u8 = KeysAdd.repr();
pub const KEYS_GET: u8 = KeysGet.repr();
pub const KEYS_UPDATE: u8 = KeysUpdate.repr();
pub const KEYS_DELETE: u8 = KeysDelete.repr();
}

View File

@ -1,10 +1,9 @@
use serde_json::Deserializer;
use std::fs::File;
use std::io::BufReader;
use std::io::Write;
use std::io::{BufReader, Write};
use std::path::Path;
use serde_json::Deserializer;
use crate::{AuthController, HeedAuthStore, Result};
const KEYS_PATH: &str = "keys";

View File

@ -1,41 +1,24 @@
use std::error::Error;
use meilisearch_types::error::{Code, ErrorCode};
use meilisearch_types::internal_error;
use serde_json::Value;
use meilisearch_types::{internal_error, keys};
pub type Result<T> = std::result::Result<T, AuthControllerError>;
#[derive(Debug, thiserror::Error)]
pub enum AuthControllerError {
#[error("`{0}` field is mandatory.")]
MissingParameter(&'static str),
#[error("`actions` field value `{0}` is invalid. It should be an array of string representing action names.")]
InvalidApiKeyActions(Value),
#[error("`indexes` field value `{0}` is invalid. It should be an array of string representing index names.")]
InvalidApiKeyIndexes(Value),
#[error("`expiresAt` field value `{0}` is invalid. It should follow the RFC 3339 format to represents a date or datetime in the future or specified as a null value. e.g. 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM:SS'.")]
InvalidApiKeyExpiresAt(Value),
#[error("`description` field value `{0}` is invalid. It should be a string or specified as a null value.")]
InvalidApiKeyDescription(Value),
#[error(
"`name` field value `{0}` is invalid. It should be a string or specified as a null value."
)]
InvalidApiKeyName(Value),
#[error("`uid` field value `{0}` is invalid. It should be a valid UUID v4 string or omitted.")]
InvalidApiKeyUid(Value),
#[error("API key `{0}` not found.")]
ApiKeyNotFound(String),
#[error("`uid` field value `{0}` is already an existing API key.")]
ApiKeyAlreadyExists(String),
#[error("The `{0}` field cannot be modified for the given resource.")]
ImmutableField(String),
#[error(transparent)]
ApiKey(#[from] keys::Error),
#[error("Internal error: {0}")]
Internal(Box<dyn Error + Send + Sync + 'static>),
}
internal_error!(
AuthControllerError: milli::heed::Error,
AuthControllerError: meilisearch_types::milli::heed::Error,
std::io::Error,
serde_json::Error,
std::str::Utf8Error
@ -44,16 +27,9 @@ internal_error!(
impl ErrorCode for AuthControllerError {
fn error_code(&self) -> Code {
match self {
Self::MissingParameter(_) => Code::MissingParameter,
Self::InvalidApiKeyActions(_) => Code::InvalidApiKeyActions,
Self::InvalidApiKeyIndexes(_) => Code::InvalidApiKeyIndexes,
Self::InvalidApiKeyExpiresAt(_) => Code::InvalidApiKeyExpiresAt,
Self::InvalidApiKeyDescription(_) => Code::InvalidApiKeyDescription,
Self::InvalidApiKeyName(_) => Code::InvalidApiKeyName,
Self::ApiKey(e) => e.error_code(),
Self::ApiKeyNotFound(_) => Code::ApiKeyNotFound,
Self::InvalidApiKeyUid(_) => Code::InvalidApiKeyUid,
Self::ApiKeyAlreadyExists(_) => Code::ApiKeyAlreadyExists,
Self::ImmutableField(_) => Code::ImmutableField,
Self::Internal(_) => Code::Internal,
}
}

View File

@ -1,201 +0,0 @@
use crate::action::Action;
use crate::error::{AuthControllerError, Result};
use crate::store::KeyId;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::star_or::StarOr;
use serde::{Deserialize, Serialize};
use serde_json::{from_value, Value};
use time::format_description::well_known::Rfc3339;
use time::macros::{format_description, time};
use time::{Date, OffsetDateTime, PrimitiveDateTime};
use uuid::Uuid;
#[derive(Debug, Deserialize, Serialize)]
pub struct Key {
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub name: Option<String>,
pub uid: KeyId,
pub actions: Vec<Action>,
pub indexes: Vec<StarOr<IndexUid>>,
#[serde(with = "time::serde::rfc3339::option")]
pub expires_at: Option<OffsetDateTime>,
#[serde(with = "time::serde::rfc3339")]
pub created_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
pub updated_at: OffsetDateTime,
}
impl Key {
pub fn create_from_value(value: Value) -> Result<Self> {
let name = match value.get("name") {
None | Some(Value::Null) => None,
Some(des) => from_value(des.clone())
.map(Some)
.map_err(|_| AuthControllerError::InvalidApiKeyName(des.clone()))?,
};
let description = match value.get("description") {
None | Some(Value::Null) => None,
Some(des) => from_value(des.clone())
.map(Some)
.map_err(|_| AuthControllerError::InvalidApiKeyDescription(des.clone()))?,
};
let uid = value.get("uid").map_or_else(
|| Ok(Uuid::new_v4()),
|uid| {
from_value(uid.clone())
.map_err(|_| AuthControllerError::InvalidApiKeyUid(uid.clone()))
},
)?;
let actions = value
.get("actions")
.map(|act| {
from_value(act.clone())
.map_err(|_| AuthControllerError::InvalidApiKeyActions(act.clone()))
})
.ok_or(AuthControllerError::MissingParameter("actions"))??;
let indexes = value
.get("indexes")
.map(|ind| {
from_value(ind.clone())
.map_err(|_| AuthControllerError::InvalidApiKeyIndexes(ind.clone()))
})
.ok_or(AuthControllerError::MissingParameter("indexes"))??;
let expires_at = value
.get("expiresAt")
.map(parse_expiration_date)
.ok_or(AuthControllerError::MissingParameter("expiresAt"))??;
let created_at = OffsetDateTime::now_utc();
let updated_at = created_at;
Ok(Self {
name,
description,
uid,
actions,
indexes,
expires_at,
created_at,
updated_at,
})
}
pub fn update_from_value(&mut self, value: Value) -> Result<()> {
if let Some(des) = value.get("description") {
let des = from_value(des.clone())
.map_err(|_| AuthControllerError::InvalidApiKeyDescription(des.clone()));
self.description = des?;
}
if let Some(des) = value.get("name") {
let des = from_value(des.clone())
.map_err(|_| AuthControllerError::InvalidApiKeyName(des.clone()));
self.name = des?;
}
if value.get("uid").is_some() {
return Err(AuthControllerError::ImmutableField("uid".to_string()));
}
if value.get("actions").is_some() {
return Err(AuthControllerError::ImmutableField("actions".to_string()));
}
if value.get("indexes").is_some() {
return Err(AuthControllerError::ImmutableField("indexes".to_string()));
}
if value.get("expiresAt").is_some() {
return Err(AuthControllerError::ImmutableField("expiresAt".to_string()));
}
if value.get("createdAt").is_some() {
return Err(AuthControllerError::ImmutableField("createdAt".to_string()));
}
if value.get("updatedAt").is_some() {
return Err(AuthControllerError::ImmutableField("updatedAt".to_string()));
}
self.updated_at = OffsetDateTime::now_utc();
Ok(())
}
pub(crate) fn default_admin() -> Self {
let now = OffsetDateTime::now_utc();
let uid = Uuid::new_v4();
Self {
name: Some("Default Admin API Key".to_string()),
description: Some("Use it for anything that is not a search operation. Caution! Do not expose it on a public frontend".to_string()),
uid,
actions: vec![Action::All],
indexes: vec![StarOr::Star],
expires_at: None,
created_at: now,
updated_at: now,
}
}
pub(crate) fn default_search() -> Self {
let now = OffsetDateTime::now_utc();
let uid = Uuid::new_v4();
Self {
name: Some("Default Search API Key".to_string()),
description: Some("Use it to search from the frontend".to_string()),
uid,
actions: vec![Action::Search],
indexes: vec![StarOr::Star],
expires_at: None,
created_at: now,
updated_at: now,
}
}
}
fn parse_expiration_date(value: &Value) -> Result<Option<OffsetDateTime>> {
match value {
Value::String(string) => OffsetDateTime::parse(string, &Rfc3339)
.or_else(|_| {
PrimitiveDateTime::parse(
string,
format_description!(
"[year repr:full base:calendar]-[month repr:numerical]-[day]T[hour]:[minute]:[second]"
),
).map(|datetime| datetime.assume_utc())
})
.or_else(|_| {
PrimitiveDateTime::parse(
string,
format_description!(
"[year repr:full base:calendar]-[month repr:numerical]-[day] [hour]:[minute]:[second]"
),
).map(|datetime| datetime.assume_utc())
})
.or_else(|_| {
Date::parse(string, format_description!(
"[year repr:full base:calendar]-[month repr:numerical]-[day]"
)).map(|date| PrimitiveDateTime::new(date, time!(00:00)).assume_utc())
})
.map_err(|_| AuthControllerError::InvalidApiKeyExpiresAt(value.clone()))
// check if the key is already expired.
.and_then(|d| {
if d > OffsetDateTime::now_utc() {
Ok(d)
} else {
Err(AuthControllerError::InvalidApiKeyExpiresAt(value.clone()))
}
})
.map(Option::Some),
Value::Null => Ok(None),
_otherwise => Err(AuthControllerError::InvalidApiKeyExpiresAt(value.clone())),
}
}

View File

@ -1,7 +1,5 @@
mod action;
mod dump;
pub mod error;
mod key;
mod store;
use std::collections::{HashMap, HashSet};
@ -9,19 +7,16 @@ use std::ops::Deref;
use std::path::Path;
use std::sync::Arc;
use error::{AuthControllerError, Result};
use meilisearch_types::keys::{Action, Key};
use meilisearch_types::star_or::StarOr;
use serde::{Deserialize, Serialize};
use serde_json::Value;
pub use store::open_auth_store_env;
use store::{generate_key_as_hexa, HeedAuthStore};
use time::OffsetDateTime;
use uuid::Uuid;
pub use action::{actions, Action};
use error::{AuthControllerError, Result};
pub use key::Key;
use meilisearch_types::star_or::StarOr;
use store::generate_key_as_hexa;
pub use store::open_auth_store_env;
use store::HeedAuthStore;
#[derive(Clone)]
pub struct AuthController {
store: Arc<HeedAuthStore>,
@ -36,18 +31,13 @@ impl AuthController {
generate_default_keys(&store)?;
}
Ok(Self {
store: Arc::new(store),
master_key: master_key.clone(),
})
Ok(Self { store: Arc::new(store), master_key: master_key.clone() })
}
pub fn create_key(&self, value: Value) -> Result<Key> {
let key = Key::create_from_value(value)?;
match self.store.get_api_key(key.uid)? {
Some(_) => Err(AuthControllerError::ApiKeyAlreadyExists(
key.uid.to_string(),
)),
Some(_) => Err(AuthControllerError::ApiKeyAlreadyExists(key.uid.to_string())),
None => self.store.put_api_key(key),
}
}
@ -66,9 +56,9 @@ impl AuthController {
pub fn get_optional_uid_from_encoded_key(&self, encoded_key: &[u8]) -> Result<Option<Uuid>> {
match &self.master_key {
Some(master_key) => self
.store
.get_uid_from_encoded_key(encoded_key, master_key.as_bytes()),
Some(master_key) => {
self.store.get_uid_from_encoded_key(encoded_key, master_key.as_bytes())
}
None => Ok(None),
}
}
@ -134,9 +124,7 @@ impl AuthController {
/// Generate a valid key from a key id using the current master key.
/// Returns None if no master key has been set.
pub fn generate_key(&self, uid: Uuid) -> Option<String> {
self.master_key
.as_ref()
.map(|master_key| generate_key_as_hexa(uid, master_key.as_bytes()))
self.master_key.as_ref().map(|master_key| generate_key_as_hexa(uid, master_key.as_bytes()))
}
/// Check if the provided key is authorized to make a specific action
@ -154,8 +142,7 @@ impl AuthController {
.or(match index {
// else check if the key has access to the requested index.
Some(index) => {
self.store
.get_expiration_date(uid, action, Some(index.as_bytes()))?
self.store.get_expiration_date(uid, action, Some(index.as_bytes()))?
}
// or to any index if no index has been requested.
None => self.store.prefix_first_expiration_date(uid, action)?,
@ -168,6 +155,17 @@ impl AuthController {
None => Ok(false),
}
}
/// Delete all the keys in the DB.
pub fn raw_delete_all_keys(&mut self) -> Result<()> {
self.store.delete_all_keys()
}
/// Delete all the keys in the DB.
pub fn raw_insert_key(&mut self, key: Key) -> Result<()> {
self.store.put_api_key(key)?;
Ok(())
}
}
pub struct AuthFilter {
@ -177,10 +175,7 @@ pub struct AuthFilter {
impl Default for AuthFilter {
fn default() -> Self {
Self {
search_rules: SearchRules::default(),
allow_index_creation: true,
}
Self { search_rules: SearchRules::default(), allow_index_creation: true }
}
}
@ -215,10 +210,9 @@ impl SearchRules {
None
}
}
Self::Map(map) => map
.get(index)
.or_else(|| map.get("*"))
.map(|isr| isr.clone().unwrap_or_default()),
Self::Map(map) => {
map.get(index).or_else(|| map.get("*")).map(|isr| isr.clone().unwrap_or_default())
}
}
}
}

View File

@ -1,8 +1,7 @@
use std::borrow::Cow;
use std::cmp::Reverse;
use std::collections::HashSet;
use std::convert::TryFrom;
use std::convert::TryInto;
use std::convert::{TryFrom, TryInto};
use std::fs::create_dir_all;
use std::ops::Deref;
use std::path::Path;
@ -10,9 +9,11 @@ use std::str;
use std::sync::Arc;
use hmac::{Hmac, Mac};
use meilisearch_types::keys::KeyId;
use meilisearch_types::milli;
use meilisearch_types::milli::heed::types::{ByteSlice, DecodeIgnore, SerdeJson};
use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn};
use meilisearch_types::star_or::StarOr;
use milli::heed::types::{ByteSlice, DecodeIgnore, SerdeJson};
use milli::heed::{Database, Env, EnvOpenOptions, RwTxn};
use sha2::Sha256;
use time::OffsetDateTime;
use uuid::fmt::Hyphenated;
@ -26,8 +27,6 @@ const AUTH_DB_PATH: &str = "auth";
const KEY_DB_NAME: &str = "api-keys";
const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expiration";
pub type KeyId = Uuid;
#[derive(Clone)]
pub struct HeedAuthStore {
env: Arc<Env>,
@ -59,12 +58,7 @@ impl HeedAuthStore {
let keys = env.create_database(Some(KEY_DB_NAME))?;
let action_keyid_index_expiration =
env.create_database(Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?;
Ok(Self {
env,
keys,
action_keyid_index_expiration,
should_close_on_drop: true,
})
Ok(Self { env, keys, action_keyid_index_expiration, should_close_on_drop: true })
}
pub fn set_drop_on_close(&mut self, v: bool) {
@ -94,12 +88,8 @@ impl HeedAuthStore {
Action::All => actions.extend(enum_iterator::all::<Action>()),
Action::DocumentsAll => {
actions.extend(
[
Action::DocumentsGet,
Action::DocumentsDelete,
Action::DocumentsAdd,
]
.iter(),
[Action::DocumentsGet, Action::DocumentsDelete, Action::DocumentsAdd]
.iter(),
);
}
Action::IndexesAll => {
@ -197,6 +187,13 @@ impl HeedAuthStore {
Ok(existing)
}
pub fn delete_all_keys(&self) -> Result<()> {
let mut wtxn = self.env.write_txn()?;
self.keys.clear(&mut wtxn)?;
wtxn.commit()?;
Ok(())
}
pub fn list_api_keys(&self) -> Result<Vec<Key>> {
let mut list = Vec::new();
let rtxn = self.env.read_txn()?;

View File

@ -10,21 +10,10 @@ version = "0.29.1"
name = "meilisearch"
path = "src/main.rs"
[build-dependencies]
anyhow = { version = "1.0.65", optional = true }
cargo_toml = { version = "0.12.4", optional = true }
hex = { version = "0.4.3", optional = true }
reqwest = { version = "0.11.12", features = ["blocking", "rustls-tls"], default-features = false, optional = true }
sha-1 = { version = "0.10.0", optional = true }
static-files = { version = "0.2.3", optional = true }
tempfile = { version = "3.3.0", optional = true }
vergen = { version = "7.4.2", default-features = false, features = ["git"] }
zip = { version = "0.6.2", optional = true }
[dependencies]
actix-cors = "0.6.3"
actix-http = { version = "3.2.2", default-features = false, features = ["compress-brotli", "compress-gzip", "rustls"] }
actix-web = { version = "4.2.1", default-features = false, features = ["macros", "compress-brotli", "compress-gzip", "cookies", "rustls"] }
actix-http = "3.2.2"
actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
anyhow = { version = "1.0.65", features = ["backtrace"] }
async-stream = "0.3.3"
@ -34,28 +23,33 @@ byte-unit = { version = "4.0.14", default-features = false, features = ["std", "
bytes = "1.2.1"
clap = { version = "4.0.9", features = ["derive", "env"] }
crossbeam-channel = "0.5.6"
dump = { path = "../dump" }
either = "1.8.0"
env_logger = "0.9.1"
file-store = { path = "../file-store" }
flate2 = "1.0.24"
fst = "0.4.7"
futures = "0.3.24"
futures-util = "0.3.24"
http = "0.2.8"
index-scheduler = { path = "../index-scheduler" }
indexmap = { version = "1.9.1", features = ["serde-1"] }
itertools = "0.10.5"
jsonwebtoken = "8.1.1"
lazy_static = "1.4.0"
log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
meilisearch-lib = { path = "../meilisearch-lib", default-features = false }
mimalloc = { version = "0.1.29", default-features = false }
mime = "0.3.16"
num_cpus = "1.13.1"
obkv = "0.2.0"
once_cell = "1.15.0"
parking_lot = "0.12.1"
permissive-json-pointer = { path = "../permissive-json-pointer" }
pin-project-lite = "0.2.9"
platform-dirs = "0.3.0"
prometheus = { version = "0.13.2", features = ["process"], optional = true }
rand = "0.8.5"
rayon = "1.5.3"
regex = "1.6.0"
@ -80,8 +74,7 @@ tokio-stream = "0.1.10"
toml = "0.5.9"
uuid = { version = "1.1.2", features = ["serde", "v4"] }
walkdir = "2.3.2"
prometheus = { version = "0.13.2", features = ["process"], optional = true }
lazy_static = "1.4.0"
yaup = "0.2.0"
[dev-dependencies]
actix-rt = "2.7.0"
@ -89,12 +82,24 @@ assert-json-diff = "2.0.2"
brotli = "3.3.4"
manifest-dir-macros = "0.1.16"
maplit = "1.0.2"
meili-snap = {path = "../meili-snap"}
temp-env = "0.3.1"
urlencoding = "2.1.2"
yaup = "0.2.1"
temp-env = "0.3.1"
[build-dependencies]
anyhow = { version = "1.0.65", optional = true }
cargo_toml = { version = "0.12.4", optional = true }
hex = { version = "0.4.3", optional = true }
reqwest = { version = "0.11.12", features = ["blocking", "rustls-tls"], default-features = false, optional = true }
sha-1 = { version = "0.10.0", optional = true }
static-files = { version = "0.2.3", optional = true }
tempfile = { version = "3.3.0", optional = true }
vergen = { version = "7.4.2", default-features = false, features = ["git"] }
zip = { version = "0.6.2", optional = true }
[features]
default = ["analytics", "meilisearch-lib/default", "mini-dashboard"]
default = ["analytics", "meilisearch-types/default", "mini-dashboard"]
metrics = ["prometheus"]
analytics = ["segment"]
mini-dashboard = [
@ -108,10 +113,10 @@ mini-dashboard = [
"tempfile",
"zip",
]
chinese = ["meilisearch-lib/chinese"]
hebrew = ["meilisearch-lib/hebrew"]
japanese = ["meilisearch-lib/japanese"]
thai = ["meilisearch-lib/thai"]
chinese = ["meilisearch-types/chinese"]
hebrew = ["meilisearch-types/hebrew"]
japanese = ["meilisearch-types/japanese"]
thai = ["meilisearch-types/thai"]
[package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.2/build.zip"

View File

@ -72,11 +72,8 @@ mod mini_dashboard {
resource_dir(&dashboard_dir).build()?;
// Write the sha1 for the dashboard back to file.
let mut file = OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(sha1_path)?;
let mut file =
OpenOptions::new().write(true).create(true).truncate(true).open(sha1_path)?;
file.write_all(sha1.as_bytes())?;
file.flush()?;

Some files were not shown because too many files have changed in this diff Show More