diff --git a/.gitignore b/.gitignore index 8aa76ff15..6fc47753d 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,10 @@ /data.ms /snapshots /dumps + + +# Snapshots +## ... large +*.full.snap +## ... unreviewed +*.snap.new diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 000000000..250124b77 --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1,5 @@ +unstable_features = true + +use_small_heuristics = "max" +imports_granularity = "Module" +group_imports = "StdExternalCrate" diff --git a/Cargo.lock b/Cargo.lock index 69b0af37d..2f43356f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -59,7 +59,7 @@ dependencies = [ "http", "httparse", "httpdate", - "itoa 1.0.3", + "itoa 1.0.4", "language-tags", "local-channel", "mime", @@ -78,7 +78,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "465a6172cf69b960917811022d8f29bc0b7fa1398bc4f78b3c466673db1213b6" dependencies = [ "quote 1.0.21", - "syn 1.0.101", + "syn 1.0.103", ] [[package]] @@ -154,9 +154,9 @@ dependencies = [ [[package]] name = "actix-utils" -version = "3.0.0" +version = "3.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e491cbaac2e7fc788dfff99ff48ef317e23b3cf63dbaf7aaab6418f40f92aa94" +checksum = "88a1dcdff1466e3c2488e1cb5c36a71822750ad43839937f85d2f4d9f8b705d8" dependencies = [ "local-waker", "pin-project-lite", @@ -188,7 +188,7 @@ dependencies = [ "futures-core", "futures-util", "http", - "itoa 1.0.3", + "itoa 1.0.4", "language-tags", "log", "mime", @@ -211,9 +211,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fa9362663c8643d67b2d5eafba49e4cb2c8a053a29ed00a0bea121f17c76b13" dependencies = [ "actix-router", - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.101", + "syn 1.0.103", ] [[package]] @@ -297,9 +297,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.65" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98161a4e3e2184da77bb14f02184cdd111e83bbbcc9979dfee3c44b9a85f5602" +checksum = "216261ddc8289130e551ddcd5ce8a064710c0d064a4d2895c67151c92b5443f6" dependencies = [ "backtrace", ] @@ -330,20 +330,20 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "10f203db73a71dfa2fb6dd22763990fa26f3d2625a6da2da900d23b87d26be27" dependencies = [ - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.101", + "syn 1.0.103", ] [[package]] name = "async-trait" -version = "0.1.57" +version = "0.1.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76464446b8bc32758d7e88ee1a804d9914cd9b1cb264c029899680b0be29826f" +checksum = "1e805d94e6b5001b651426cf4cd446b1ab5f319d27bab5c644f61de0a804360c" dependencies = [ - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.101", + "syn 1.0.103", ] [[package]] @@ -355,12 +355,6 @@ dependencies = [ "critical-section", ] -[[package]] -name = "atomic_refcell" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73b5e5f48b927f04e952dedc932f31995a65a0bf65ec971c74436e51bf6e970d" - [[package]] name = "atty" version = "0.2.14" @@ -410,15 +404,15 @@ checksum = "f8fe8f5a8a398345e52358e18ff07cc17a568fbca5c6f73873d3a62056309603" [[package]] name = "base64" -version = "0.13.0" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" [[package]] name = "base64ct" -version = "1.0.1" +version = "1.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a32fd6af2b5827bce66c29053ba0e7c42b9dcab01835835058558c10851a46b" +checksum = "b645a089122eccb6111b4f81cbc1a49f5900ac4666bb93ac027feaecf15607bf" [[package]] name = "big_s" @@ -539,9 +533,9 @@ checksum = "b4ae4235e6dac0694637c763029ecea1a2ec9e4e06ec2729bd21ba4d9c863eb7" [[package]] name = "bumpalo" -version = "3.11.0" +version = "3.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1ad822118d20d2c234f427000d5acc36eabe1e29a348c89b63dd60b13f28e5d" +checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" [[package]] name = "byte-unit" @@ -574,9 +568,9 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b9e1f5fa78f69496407a27ae9ed989e3c3b072310286f5ef385525e4cbc24a9" dependencies = [ - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.101", + "syn 1.0.103", ] [[package]] @@ -685,9 +679,9 @@ dependencies = [ [[package]] name = "character_converter" -version = "2.1.3" +version = "2.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75387a5aa327fed13de2adb87ec4bcb351943bfb30af7004405a39da430c390" +checksum = "14eb54f15451a7095181d32b3ac148ba3684ab8dc261a74208b2063c9293bb1c" dependencies = [ "bincode", "fst", @@ -705,9 +699,9 @@ dependencies = [ [[package]] name = "clap" -version = "3.2.22" +version = "3.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86447ad904c7fb335a790c9d7fe3d0d971dc523b8ccd1561a520de9a85302750" +checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5" dependencies = [ "atty", "bitflags", @@ -722,13 +716,13 @@ dependencies = [ [[package]] name = "clap" -version = "4.0.9" +version = "4.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30607dd93c420c6f1f80b544be522a0238a7db35e6a12968d28910983fee0df0" +checksum = "335867764ed2de42325fafe6d18b8af74ba97ee0c590fa016f157535b42ab04b" dependencies = [ "atty", "bitflags", - "clap_derive 4.0.9", + "clap_derive 4.0.18", "clap_lex 0.3.0", "once_cell", "strsim", @@ -743,22 +737,22 @@ checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65" dependencies = [ "heck", "proc-macro-error", - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.101", + "syn 1.0.103", ] [[package]] name = "clap_derive" -version = "4.0.9" +version = "4.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a307492e1a34939f79d3b6b9650bd2b971513cd775436bf2b78defeb5af00b" +checksum = "16a1b0f6422af32d5da0c58e2703320f379216ee70198241c84173a8c5ac28f3" dependencies = [ "heck", "proc-macro-error", - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.101", + "syn 1.0.103", ] [[package]] @@ -785,9 +779,22 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1df715824eb382e34b7afb7463b0247bf41538aeba731fba05241ecdb5dc3747" dependencies = [ - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.101", + "syn 1.0.103", +] + +[[package]] +name = "console" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c050367d967ced717c04b65d8c619d863ef9292ce0c5760028655a2fb298718c" +dependencies = [ + "encode_unicode", + "lazy_static", + "libc", + "terminal_size", + "winapi", ] [[package]] @@ -876,6 +883,20 @@ dependencies = [ "riscv", ] +[[package]] +name = "crossbeam" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2801af0d36612ae591caa9568261fddce32ce6e08a7275ea334a06a4ad021a2c" +dependencies = [ + "cfg-if", + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + [[package]] name = "crossbeam-channel" version = "0.5.6" @@ -962,14 +983,69 @@ dependencies = [ ] [[package]] -name = "derivative" -version = "2.2.0" +name = "darling" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" +checksum = "4529658bdda7fd6769b8614be250cdcfc3aeb0ee72fe66f9e41e5e5eb73eac02" dependencies = [ - "proc-macro2 1.0.46", + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "649c91bc01e8b1eac09fb91e8dbc7d517684ca6be8ebc75bb9cafc894f9fdb6f" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.101", + "strsim", + "syn 1.0.103", +] + +[[package]] +name = "darling_macro" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddfc69c5bfcbd2fc09a0f38451d2daf0e372e367986a83906d1b0dbc88134fb5" +dependencies = [ + "darling_core", + "quote 1.0.21", + "syn 1.0.103", +] + +[[package]] +name = "derive_builder" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d07adf7be193b71cc36b193d0f5fe60b918a3a9db4dad0449f57bcfd519704a3" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f91d4cfa921f1c05904dc3c57b4a32c38aed3340cce209f3a6fd1478babafc4" +dependencies = [ + "darling", + "proc-macro2 1.0.47", + "quote 1.0.21", + "syn 1.0.103", +] + +[[package]] +name = "derive_builder_macro" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f0314b72bed045f3a68671b3c86328386762c93f82d98c65c3cb5e5f573dd68" +dependencies = [ + "derive_builder_core", + "syn 1.0.103", ] [[package]] @@ -979,10 +1055,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" dependencies = [ "convert_case", - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", "quote 1.0.21", "rustc_version 0.4.0", - "syn 1.0.101", + "syn 1.0.103", ] [[package]] @@ -991,12 +1067,6 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08ff6a4480d42625e59bc4e8b5dc3723279fd24d83afe8aa20df217276261cd6" -[[package]] -name = "difflib" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" - [[package]] name = "digest" version = "0.10.5" @@ -1030,10 +1100,29 @@ dependencies = [ ] [[package]] -name = "downcast" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1" +name = "dump" +version = "0.29.0" +dependencies = [ + "anyhow", + "big_s", + "flate2", + "http", + "log", + "maplit", + "meili-snap", + "meilisearch-auth", + "meilisearch-types", + "once_cell", + "regex", + "roaring", + "serde", + "serde_json", + "tar", + "tempfile", + "thiserror", + "time", + "uuid 1.2.1", +] [[package]] name = "either" @@ -1054,6 +1143,12 @@ dependencies = [ "void", ] +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + [[package]] name = "encoding" version = "0.2.33" @@ -1142,9 +1237,9 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "828de45d0ca18782232dfb8f3ea9cc428e8ced380eb26a520baaacfc70de39ce" dependencies = [ - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.101", + "syn 1.0.103", ] [[package]] @@ -1160,6 +1255,27 @@ dependencies = [ "termcolor", ] +[[package]] +name = "errno" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +dependencies = [ + "errno-dragonfly", + "libc", + "winapi", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "fastrand" version = "1.8.0" @@ -1170,21 +1286,54 @@ dependencies = [ ] [[package]] -name = "filetime" -version = "0.2.17" +name = "faux" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94a7bbaa59354bc20dd75b67f23e2797b4490e9d6928203fb105c79e448c86c" +checksum = "7c3b5e56a69ca67c241191cd9d484e14fb0fe89f5e539c2e8448eafd1f65c1f0" +dependencies = [ + "faux_macros", + "paste", +] + +[[package]] +name = "faux_macros" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35c9bb4a2c13ffb3a93a39902aaf4e7190a1706a4779b6db0449aee433d26c4a" +dependencies = [ + "darling", + "proc-macro2 1.0.47", + "quote 1.0.21", + "syn 1.0.103", + "uuid 0.8.2", +] + +[[package]] +name = "file-store" +version = "0.1.0" +dependencies = [ + "faux", + "tempfile", + "thiserror", + "uuid 1.2.1", +] + +[[package]] +name = "filetime" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b9663d381d07ae25dc88dbdf27df458faa83a9b25336bcac83d5e452b5fc9d3" dependencies = [ "cfg-if", "libc", "redox_syscall", - "windows-sys", + "windows-sys 0.42.0", ] [[package]] name = "filter-parser" -version = "0.34.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.34.0#2bf867982ab548a6d749c7534f69b44d3552ef70" +version = "0.35.0" +source = "git+https://github.com/meilisearch/milli.git#2e539249cb16f5e88be9f21ab712f8b4266cad36" dependencies = [ "nom", "nom_locate", @@ -1202,21 +1351,12 @@ dependencies = [ [[package]] name = "flatten-serde-json" -version = "0.34.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.34.0#2bf867982ab548a6d749c7534f69b44d3552ef70" +version = "0.35.0" +source = "git+https://github.com/meilisearch/milli.git#2e539249cb16f5e88be9f21ab712f8b4266cad36" dependencies = [ "serde_json", ] -[[package]] -name = "float-cmp" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" -dependencies = [ - "num-traits", -] - [[package]] name = "fnv" version = "1.0.7" @@ -1232,18 +1372,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "fragile" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85dcb89d2b10c5f6133de2efd8c11959ce9dbb46a2f7a4cab208c4eeda6ce1ab" - -[[package]] -name = "fs_extra" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394" - [[package]] name = "fst" version = "0.4.7" @@ -1252,9 +1380,9 @@ checksum = "7ab85b9b05e3978cc9a9cf8fea7f01b494e1a09ed3037e16ba39edc7a29eb61a" [[package]] name = "futures" -version = "0.3.24" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f21eda599937fba36daeb58a22e8f5cee2d14c4a17b5b7739c7c8e5e3b8230c" +checksum = "38390104763dc37a5145a53c29c63c1290b5d316d6086ec32c293f6736051bb0" dependencies = [ "futures-channel", "futures-core", @@ -1267,9 +1395,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.24" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30bdd20c28fadd505d0fd6712cdfcb0d4b5648baf45faef7f852afb2399bb050" +checksum = "52ba265a92256105f45b719605a571ffe2d1f0fea3807304b522c1d778f79eed" dependencies = [ "futures-core", "futures-sink", @@ -1277,15 +1405,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.24" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e5aa3de05362c3fb88de6531e6296e85cde7739cccad4b9dfeeb7f6ebce56bf" +checksum = "04909a7a7e4633ae6c4a9ab280aeb86da1236243a77b694a49eacd659a4bd3ac" [[package]] name = "futures-executor" -version = "0.3.24" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ff63c23854bee61b6e9cd331d523909f238fc7636290b96826e9cfa5faa00ab" +checksum = "7acc85df6714c176ab5edf386123fafe217be88c0840ec11f199441134a074e2" dependencies = [ "futures-core", "futures-task", @@ -1294,38 +1422,38 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.24" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbf4d2a7a308fd4578637c0b17c7e1c7ba127b8f6ba00b29f717e9655d85eb68" +checksum = "00f5fb52a06bdcadeb54e8d3671f8888a39697dcb0b81b23b55174030427f4eb" [[package]] name = "futures-macro" -version = "0.3.24" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42cd15d1c7456c04dbdf7e88bcd69760d74f3a798d6444e16974b505b0e62f17" +checksum = "bdfb8ce053d86b91919aad980c220b1fb8401a9394410e1c289ed7e66b61835d" dependencies = [ - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.101", + "syn 1.0.103", ] [[package]] name = "futures-sink" -version = "0.3.24" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b20ba5a92e727ba30e72834706623d94ac93a725410b6a6b6fbc1b07f7ba56" +checksum = "39c15cf1a4aa79df40f1bb462fb39676d0ad9e366c2a33b590d7c66f4f81fcf9" [[package]] name = "futures-task" -version = "0.3.24" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6508c467c73851293f390476d4491cf4d227dbabcd4170f3bb6044959b294f1" +checksum = "2ffb393ac5d9a6eaa9d3fdf37ae2776656b706e200c8e16b1bdb227f5198e6ea" [[package]] name = "futures-util" -version = "0.3.24" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44fb6cb1be61cc1d2e43b262516aafcf63b241cffdb1d3fa115f91d9c7b09c90" +checksum = "197676987abd2f9cadff84926f410af1c183608d36641465df73ae8211dc65d6" dependencies = [ "futures-channel", "futures-core", @@ -1366,9 +1494,9 @@ checksum = "36d244a08113319b5ebcabad2b8b7925732d15eec46d7e7ac3c11734f3b7a6ad" [[package]] name = "getrandom" -version = "0.2.7" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" +checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" dependencies = [ "cfg-if", "libc", @@ -1382,9 +1510,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e45727250e75cc04ff2846a66397da8ef2b3db8e40e0cef4df67950a07621eb9" dependencies = [ "proc-macro-error", - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.101", + "syn 1.0.103", ] [[package]] @@ -1425,9 +1553,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.3.14" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ca32592cf21ac7ccab1825cd87f6c9b3d9022c44d086172ed0966bec8af30be" +checksum = "5f9f29bc9dda355256b2916cf526ab02ce0aeaaaf2bad60d65ef3f12f11dd0f4" dependencies = [ "bytes", "fnv", @@ -1552,7 +1680,7 @@ checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399" dependencies = [ "bytes", "fnv", - "itoa 1.0.3", + "itoa 1.0.4", ] [[package]] @@ -1599,7 +1727,7 @@ dependencies = [ "http-body", "httparse", "httpdate", - "itoa 1.0.3", + "itoa 1.0.4", "pin-project-lite", "socket2", "tokio", @@ -1621,6 +1749,12 @@ dependencies = [ "tokio-rustls", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "0.3.0" @@ -1631,6 +1765,34 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "index-scheduler" +version = "0.1.0" +dependencies = [ + "anyhow", + "big_s", + "bincode", + "crossbeam", + "csv", + "derive_builder", + "dump", + "enum-iterator", + "file-store", + "insta", + "log", + "meili-snap", + "meilisearch-types", + "nelson", + "roaring", + "serde", + "serde_json", + "synchronoise", + "tempfile", + "thiserror", + "time", + "uuid 1.2.1", +] + [[package]] name = "indexmap" version = "1.9.1" @@ -1642,6 +1804,22 @@ dependencies = [ "serde", ] +[[package]] +name = "insta" +version = "1.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581d4e3314cae4536e5d22ffd23189d4a374696c5ef733eadafae0ed273fd303" +dependencies = [ + "console", + "lazy_static", + "linked-hash-map", + "pest", + "pest_derive", + "serde", + "similar", + "yaml-rust", +] + [[package]] name = "instant" version = "0.1.12" @@ -1651,6 +1829,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "io-lifetimes" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e481ccbe3dea62107216d0d1138bb8ad8e5e5c43009a098bd1990272c497b0" + [[package]] name = "ipnet" version = "2.5.0" @@ -1674,9 +1858,9 @@ checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" [[package]] name = "itoa" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754" +checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc" [[package]] name = "jieba-rs" @@ -1713,8 +1897,8 @@ dependencies = [ [[package]] name = "json-depth-checker" -version = "0.34.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.34.0#2bf867982ab548a6d749c7534f69b44d3552ef70" +version = "0.35.0" +source = "git+https://github.com/meilisearch/milli.git#2e539249cb16f5e88be9f21ab712f8b4266cad36" dependencies = [ "serde_json", ] @@ -1756,9 +1940,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.134" +version = "0.2.137" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "329c933548736bc49fd575ee68c89e8be4d260064184389a5b77517cddd99ffb" +checksum = "fc7fcc620a3bff7cdd7a365be3376c97191aeaccc2a603e600951e452615bf89" [[package]] name = "libgit2-sys" @@ -1780,9 +1964,9 @@ checksum = "292a948cd991e376cf75541fe5b97a1081d713c618b4f1b9500f8844e49eb565" [[package]] name = "libmimalloc-sys" -version = "0.1.25" +version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11ca136052550448f55df7898c6dbe651c6b574fe38a0d9ea687a9f8088a2e2c" +checksum = "8fc093ab289b0bfda3aa1bdfab9c9542be29c7ef385cfcbe77f8c9813588eb48" dependencies = [ "cc", ] @@ -1830,7 +2014,7 @@ dependencies = [ "anyhow", "bincode", "byteorder", - "clap 3.2.22", + "clap 3.2.23", "csv", "encoding", "env_logger", @@ -1905,7 +2089,7 @@ dependencies = [ "anyhow", "bincode", "byteorder", - "clap 3.2.22", + "clap 3.2.23", "encoding", "env_logger", "glob", @@ -1925,7 +2109,7 @@ dependencies = [ "anyhow", "bincode", "byteorder", - "clap 3.2.22", + "clap 3.2.23", "csv", "encoding", "env_logger", @@ -1945,7 +2129,7 @@ dependencies = [ "anyhow", "bincode", "byteorder", - "clap 3.2.22", + "clap 3.2.23", "csv", "encoding", "env_logger", @@ -1956,6 +2140,18 @@ dependencies = [ "yada", ] +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + +[[package]] +name = "linux-raw-sys" +version = "0.0.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4d2456c373231a208ad294c33dc5bff30051eafd954cd4caae83a712b12854d" + [[package]] name = "lmdb-rkv-sys" version = "0.15.0" @@ -2020,9 +2216,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "10a9062912d7952c5588cc474795e0b9ee008e7e6781127945b85413d4b99d81" dependencies = [ "log", - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.101", + "syn 1.0.103", ] [[package]] @@ -2042,9 +2238,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f08150cf2bab1fc47c2196f4f41173a27fcd0f684165e5458c0046b53a472e2f" dependencies = [ "once_cell", - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.101", + "syn 1.0.103", ] [[package]] @@ -2053,6 +2249,21 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" +[[package]] +name = "md5" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" + +[[package]] +name = "meili-snap" +version = "0.1.0" +dependencies = [ + "insta", + "md5", + "once_cell", +] + [[package]] name = "meilisearch-auth" version = "0.29.1" @@ -2060,14 +2271,14 @@ dependencies = [ "enum-iterator", "hmac", "meilisearch-types", - "milli", "rand", + "roaring", "serde", "serde_json", "sha2", "thiserror", "time", - "uuid", + "uuid 1.2.1", ] [[package]] @@ -2088,16 +2299,19 @@ dependencies = [ "byte-unit", "bytes", "cargo_toml", - "clap 4.0.9", + "clap 4.0.18", "crossbeam-channel", + "dump", "either", "env_logger", + "file-store", "flate2", "fst", "futures", "futures-util", "hex", "http", + "index-scheduler", "indexmap", "itertools", "jsonwebtoken", @@ -2105,8 +2319,8 @@ dependencies = [ "log", "manifest-dir-macros", "maplit", + "meili-snap", "meilisearch-auth", - "meilisearch-lib", "meilisearch-types", "mimalloc", "mime", @@ -2114,6 +2328,7 @@ dependencies = [ "obkv", "once_cell", "parking_lot", + "permissive-json-pointer", "pin-project-lite", "platform-dirs", "prometheus", @@ -2142,85 +2357,37 @@ dependencies = [ "tokio-stream", "toml", "urlencoding", - "uuid", + "uuid 1.2.1", "vergen", "walkdir", "yaup", "zip", ] -[[package]] -name = "meilisearch-lib" -version = "0.29.1" -dependencies = [ - "actix-rt", - "actix-web", - "anyhow", - "async-stream", - "async-trait", - "atomic_refcell", - "byte-unit", - "bytes", - "clap 4.0.9", - "crossbeam-channel", - "csv", - "derivative", - "either", - "flate2", - "fs_extra", - "fst", - "futures", - "futures-util", - "http", - "indexmap", - "itertools", - "lazy_static", - "log", - "meilisearch-auth", - "meilisearch-types", - "milli", - "mime", - "mockall", - "nelson", - "num_cpus", - "obkv", - "once_cell", - "page_size", - "parking_lot", - "paste", - "permissive-json-pointer", - "proptest", - "proptest-derive", - "rand", - "rayon", - "regex", - "reqwest", - "roaring", - "rustls", - "serde", - "serde_json", - "siphasher", - "slice-group-by", - "sysinfo", - "tar", - "tempfile", - "thiserror", - "time", - "tokio", - "uuid", - "walkdir", - "whoami", -] - [[package]] name = "meilisearch-types" version = "0.29.1" dependencies = [ "actix-web", + "anyhow", + "csv", + "either", + "enum-iterator", + "flate2", + "fst", + "insta", + "meili-snap", + "milli", "proptest", "proptest-derive", + "roaring", "serde", "serde_json", + "tar", + "thiserror", + "time", + "tokio", + "uuid 1.2.1", ] [[package]] @@ -2249,8 +2416,8 @@ dependencies = [ [[package]] name = "milli" -version = "0.34.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.34.0#2bf867982ab548a6d749c7534f69b44d3552ef70" +version = "0.35.0" +source = "git+https://github.com/meilisearch/milli.git#2e539249cb16f5e88be9f21ab712f8b4266cad36" dependencies = [ "bimap", "bincode", @@ -2289,14 +2456,14 @@ dependencies = [ "tempfile", "thiserror", "time", - "uuid", + "uuid 1.2.1", ] [[package]] name = "mimalloc" -version = "0.1.29" +version = "0.1.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f64ad83c969af2e732e907564deb0d0ed393cec4af80776f77dd77a1a427698" +checksum = "76ce6a4b40d3bff9eb3ce9881ca0737a85072f9f975886082640cd46a75cdb35" dependencies = [ "libmimalloc-sys", ] @@ -2334,41 +2501,14 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57ee1c23c7c63b0c9250c339ffdc69255f110b298b901b9f6c82547b7b87caaf" +checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de" dependencies = [ "libc", "log", "wasi", - "windows-sys", -] - -[[package]] -name = "mockall" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2be9a9090bc1cac2930688fa9478092a64c6a92ddc6ae0692d46b37d9cab709" -dependencies = [ - "cfg-if", - "downcast", - "fragile", - "lazy_static", - "mockall_derive", - "predicates", - "predicates-tree", -] - -[[package]] -name = "mockall_derive" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86d702a0530a0141cf4ed147cf5ec7be6f2c187d4e37fcbefc39cf34116bfe8f" -dependencies = [ - "cfg-if", - "proc-macro2 1.0.46", - "quote 1.0.21", - "syn 1.0.101", + "windows-sys 0.42.0", ] [[package]] @@ -2412,12 +2552,6 @@ dependencies = [ "nom", ] -[[package]] -name = "normalize-line-endings" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" - [[package]] name = "ntapi" version = "0.4.0" @@ -2541,22 +2675,22 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.3" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" +checksum = "4dc9e0dc2adc1c69d09143aff38d3d30c5c3f0df0dad82e6d25547af174ebec0" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-sys", + "windows-sys 0.42.0", ] [[package]] name = "password-hash" -version = "0.3.2" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d791538a6dcc1e7cb7fe6f6b58aca40e7f79403c45b2bc274008b5e647af1d8" +checksum = "7676374caaee8a325c9e7a2ae557f216c5563a171d6997b0ef8a65af35147700" dependencies = [ "base64ct", "rand_core", @@ -2586,9 +2720,9 @@ checksum = "498a099351efa4becc6a19c72aa9270598e8fd274ca47052e37455241c88b696" [[package]] name = "pbkdf2" -version = "0.10.1" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271779f35b581956db91a3e55737327a03aa051e90b1c47aeb189508533adfd7" +checksum = "83a0692ec44e4cf1ef28ca317f14f8f07da2d95ec3fa01f86e4467b725e60917" dependencies = [ "digest", "hmac", @@ -2619,6 +2753,50 @@ dependencies = [ "serde_json", ] +[[package]] +name = "pest" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbc7bc69c062e492337d74d59b120c274fd3d261b6bf6d3207d499b4b379c41a" +dependencies = [ + "thiserror", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b75706b9642ebcb34dab3bc7750f811609a0eb1dd8b88c2d15bf628c1c65b2" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4f9272122f5979a6511a749af9db9bfc810393f63119970d7085fed1c4ea0db" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2 1.0.47", + "quote 1.0.21", + "syn 1.0.103", +] + +[[package]] +name = "pest_meta" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c8717927f9b79515e565a64fe46c38b8cd0427e64c40680b14a7365ab09ac8d" +dependencies = [ + "once_cell", + "pest", + "sha1", +] + [[package]] name = "phf" version = "0.11.1" @@ -2671,9 +2849,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.25" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae" +checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" [[package]] name = "platform-dirs" @@ -2690,36 +2868,6 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" -[[package]] -name = "predicates" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5aab5be6e4732b473071984b3164dbbfb7a3674d30ea5ff44410b6bcd960c3c" -dependencies = [ - "difflib", - "float-cmp", - "itertools", - "normalize-line-endings", - "predicates-core", - "regex", -] - -[[package]] -name = "predicates-core" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da1c2388b1513e1b605fcec39a95e0a9e8ef088f71443ef37099fa9ae6673fcb" - -[[package]] -name = "predicates-tree" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d86de6de25020a36c6d3643a86d9a6a9f552107c0559c60ea03551b5e16c032" -dependencies = [ - "predicates-core", - "termtree", -] - [[package]] name = "proc-macro-error" version = "1.0.4" @@ -2727,9 +2875,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" dependencies = [ "proc-macro-error-attr", - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.101", + "syn 1.0.103", "version_check", ] @@ -2739,7 +2887,7 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" dependencies = [ - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", "quote 1.0.21", "version_check", ] @@ -2755,31 +2903,31 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.46" +version = "1.0.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94e2ef8dbfc347b10c094890f778ee2e36ca9bb4262e86dc99cd217e35f3470b" +checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" dependencies = [ "unicode-ident", ] [[package]] name = "procfs" -version = "0.12.0" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0941606b9934e2d98a3677759a971756eb821f75764d0e0d26946d08e74d9104" +checksum = "2dfb6451c91904606a1abe93e83a8ec851f45827fa84273f256ade45dc095818" dependencies = [ "bitflags", "byteorder", "hex", "lazy_static", - "libc", + "rustix", ] [[package]] name = "prometheus" -version = "0.13.2" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45c8babc29389186697fe5a2a4859d697825496b83db5d0b65271cdc0488e88c" +checksum = "449811d15fbdf5ceb5c1144416066429cf82316e2ec8ce0c1f6f8a02e7bbcf8c" dependencies = [ "cfg-if", "fnv", @@ -2856,7 +3004,7 @@ version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" dependencies = [ - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", ] [[package]] @@ -3064,6 +3212,7 @@ dependencies = [ "bytemuck", "byteorder", "retain_mut", + "serde", ] [[package]] @@ -3103,10 +3252,24 @@ dependencies = [ ] [[package]] -name = "rustls" -version = "0.20.6" +name = "rustix" +version = "0.35.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aab8ee6c7097ed6057f43c187a62418d0c05a4bd5f18b3571db50ee0f9ce033" +checksum = "985947f9b6423159c4726323f373be0a21bdb514c5af06a849cb3d2dce2d01e8" +dependencies = [ + "bitflags", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys", + "windows-sys 0.36.1", +] + +[[package]] +name = "rustls" +version = "0.20.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "539a2bfe908f471bfa933876bd1eb6a19cf2176d375f82ef7f99530a40e48c2c" dependencies = [ "log", "ring", @@ -3209,9 +3372,9 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] name = "serde" -version = "1.0.145" +version = "1.0.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "728eb6351430bccb993660dfffc5a72f91ccc1295abaa8ce19b27ebe4f75568b" +checksum = "d193d69bae983fc11a79df82342761dfbf28a99fc8d203dca4c3c1b590948965" dependencies = [ "serde_derive", ] @@ -3227,23 +3390,23 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.145" +version = "1.0.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81fa1584d3d1bcacd84c277a0dfe21f5b0f6accf4a23d04d4c6d61f1af522b4c" +checksum = "4f1d362ca8fc9c3e3a7484440752472d68a6caa98f1ab81d99b5dfe517cec852" dependencies = [ - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.101", + "syn 1.0.103", ] [[package]] name = "serde_json" -version = "1.0.85" +version = "1.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e55a28e3aaef9d5ce0506d0a14dbba8054ddc7e499ef522dd8b26859ec9d4a44" +checksum = "6ce777b7b150d76b9cf60d28b55f5847135a003f7d7350c6be7a773508ce7d45" dependencies = [ "indexmap", - "itoa 1.0.3", + "itoa 1.0.4", "ryu", "serde", ] @@ -3255,7 +3418,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" dependencies = [ "form_urlencoded", - "itoa 1.0.3", + "itoa 1.0.4", "ryu", "serde", ] @@ -3302,6 +3465,12 @@ dependencies = [ "libc", ] +[[package]] +name = "similar" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62ac7f900db32bf3fd12e0117dd3dc4da74bc52ebaac97f39668446d89694803" + [[package]] name = "simple_asn1" version = "0.6.2" @@ -3435,11 +3604,11 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.101" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e90cde112c4b9690b8cbe810cba9ddd8bc1d7472e2cae317b69e9438c1cba7d2" +checksum = "a864042229133ada95abf3b54fdc62ef5ccabe9515b64717bcb9a1919e59445d" dependencies = [ - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", "quote 1.0.21", "unicode-ident", ] @@ -3459,17 +3628,17 @@ version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" dependencies = [ - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.101", + "syn 1.0.103", "unicode-xid 0.2.4", ] [[package]] name = "sysinfo" -version = "0.26.4" +version = "0.26.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7890fff842b8db56f2033ebee8f6efe1921475c3830c115995552914fb967580" +checksum = "c6d0dedf2e65d25b365c588382be9dc3a3ee4b0ed792366cf722d174c359d948" dependencies = [ "cfg-if", "core-foundation-sys", @@ -3524,16 +3693,20 @@ dependencies = [ ] [[package]] -name = "termtree" -version = "0.2.4" +name = "terminal_size" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "507e9898683b6c43a9aa55b64259b721b52ba226e0f3779137e50ad114a4c90b" +checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" +dependencies = [ + "libc", + "winapi", +] [[package]] name = "textwrap" -version = "0.15.1" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "949517c0cf1bf4ee812e2e07e08ab448e3ae0d23472aee8a06c985f0c8815b16" +checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" [[package]] name = "thiserror" @@ -3550,29 +3723,39 @@ version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "982d17546b47146b28f7c22e3d08465f6b8903d0ea13c1660d9d84a6e7adcdbb" dependencies = [ - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.101", + "syn 1.0.103", ] [[package]] name = "time" -version = "0.3.15" +version = "0.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d634a985c4d4238ec39cacaed2e7ae552fbd3c476b552c1deac3021b7d7eaf0c" +checksum = "0fab5c8b9980850e06d92ddbe3ab839c062c801f3927c0fb8abd6fc8e918fbca" dependencies = [ - "itoa 1.0.3", + "itoa 1.0.4", "libc", "num_threads", "serde", + "time-core", "time-macros", ] [[package]] -name = "time-macros" -version = "0.2.4" +name = "time-core" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42657b1a6f4d817cda8e7a0ace261fe0cc946cf3a80314390b22cc61ae080792" +checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" + +[[package]] +name = "time-macros" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bb801831d812c562ae7d2bfb531f26e66e4e1f6b17307ba4149c5064710e5b" +dependencies = [ + "time-core", +] [[package]] name = "tinyvec" @@ -3615,9 +3798,9 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9724f9a975fb987ef7a3cd9be0350edcbe130698af5b8f7a631e23d42d052484" dependencies = [ - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.101", + "syn 1.0.103", ] [[package]] @@ -3633,9 +3816,9 @@ dependencies = [ [[package]] name = "tokio-stream" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6edf2d6bc038a43d31353570e27270603f4648d18f5ed10c0e179abe43255af" +checksum = "d660770404473ccd7bc9f8b28494a811bc18542b915c0855c51e8f419d5223ce" dependencies = [ "futures-core", "pin-project-lite", @@ -3673,9 +3856,9 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" [[package]] name = "tracing" -version = "0.1.36" +version = "0.1.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fce9567bd60a67d08a16488756721ba392f24f29006402881e43b19aac64307" +checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" dependencies = [ "cfg-if", "log", @@ -3685,9 +3868,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.29" +version = "0.1.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aeea4303076558a00714b823f9ad67d58a3bbda1df83d8827d21193156e22f7" +checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a" dependencies = [ "once_cell", ] @@ -3704,6 +3887,12 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" +[[package]] +name = "ucd-trie" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e79c4d996edb816c91e4308506774452e55e95c3c9de07b6729e17e15a5ef81" + [[package]] name = "unicase" version = "2.6.0" @@ -3721,9 +3910,9 @@ checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" [[package]] name = "unicode-ident" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd" +checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" [[package]] name = "unicode-normalization" @@ -3783,9 +3972,18 @@ checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1" [[package]] name = "uuid" -version = "1.1.2" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd6469f4314d5f1ffec476e05f17cc9a78bc7a27a6a857842170bdf8d6f98d2f" +checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" +dependencies = [ + "getrandom", +] + +[[package]] +name = "uuid" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "feb41e78f93363bb2df8b0e86a2ca30eed7806ea16ea0c790d757cf93f79be83" dependencies = [ "getrandom", "serde", @@ -3895,9 +4093,9 @@ dependencies = [ "bumpalo", "log", "once_cell", - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.101", + "syn 1.0.103", "wasm-bindgen-shared", ] @@ -3929,9 +4127,9 @@ version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" dependencies = [ - "proc-macro2 1.0.46", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.101", + "syn 1.0.103", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3980,17 +4178,6 @@ dependencies = [ "hashbrown 0.7.2", ] -[[package]] -name = "whoami" -version = "1.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6631b6a2fd59b1841b622e8f1a7ad241ef0a46f2d580464ce8140ac94cbd571" -dependencies = [ - "bumpalo", - "wasm-bindgen", - "web-sys", -] - [[package]] name = "winapi" version = "0.3.9" @@ -4028,43 +4215,100 @@ version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" dependencies = [ - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_msvc", + "windows_aarch64_msvc 0.36.1", + "windows_i686_gnu 0.36.1", + "windows_i686_msvc 0.36.1", + "windows_x86_64_gnu 0.36.1", + "windows_x86_64_msvc 0.36.1", ] +[[package]] +name = "windows-sys" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc 0.42.0", + "windows_i686_gnu 0.42.0", + "windows_i686_msvc 0.42.0", + "windows_x86_64_gnu 0.42.0", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc 0.42.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" + [[package]] name = "windows_aarch64_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" + [[package]] name = "windows_i686_gnu" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" +[[package]] +name = "windows_i686_gnu" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" + [[package]] name = "windows_i686_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" +[[package]] +name = "windows_i686_msvc" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" + [[package]] name = "windows_x86_64_gnu" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" + [[package]] name = "windows_x86_64_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" + [[package]] name = "winreg" version = "0.10.1" @@ -4089,6 +4333,15 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6d12cb7a57bbf2ab670ed9545bae3648048547f9039279a89ce000208e585c1" +[[package]] +name = "yaml-rust" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +dependencies = [ + "linked-hash-map", +] + [[package]] name = "yaup" version = "0.2.1" @@ -4115,16 +4368,16 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d498dbd1fd7beb83c86709ae1c33ca50942889473473d287d56ce4770a18edfb" dependencies = [ - "proc-macro2 1.0.46", - "syn 1.0.101", + "proc-macro2 1.0.47", + "syn 1.0.103", "synstructure", ] [[package]] name = "zip" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf225bcf73bb52cbb496e70475c7bd7a3f769df699c0020f6c7bd9a96dcf0b8d" +checksum = "537ce7411d25e54e8ae21a7ce0b15840e7bfcff15b51d697ec3266cc76bdf080" dependencies = [ "aes", "byteorder", @@ -4142,18 +4395,18 @@ dependencies = [ [[package]] name = "zstd" -version = "0.10.2+zstd.1.5.2" +version = "0.11.2+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f4a6bd64f22b5e3e94b4e238669ff9f10815c27a5180108b849d24174a83847" +checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" dependencies = [ "zstd-safe", ] [[package]] name = "zstd-safe" -version = "4.1.6+zstd.1.5.2" +version = "5.0.2+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94b61c51bb270702d6167b8ce67340d2754b088d0c091b06e593aa772c3ee9bb" +checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" dependencies = [ "libc", "zstd-sys", @@ -4161,9 +4414,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "1.6.3+zstd.1.5.2" +version = "2.0.1+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc49afa5c8d634e75761feda8c592051e7eeb4683ba827211eb0d731d3402ea8" +checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b" dependencies = [ "cc", "libc", diff --git a/Cargo.toml b/Cargo.toml index 678d1b78b..2b756f87c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,8 +3,11 @@ resolver = "2" members = [ "meilisearch-http", "meilisearch-types", - "meilisearch-lib", "meilisearch-auth", + "meili-snap", + "index-scheduler", + "dump", + "file-store", "permissive-json-pointer", ] diff --git a/dump/Cargo.toml b/dump/Cargo.toml new file mode 100644 index 000000000..c7bf76879 --- /dev/null +++ b/dump/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "dump" +version = "0.29.0" +edition = "2021" + +[dependencies] +anyhow = "1.0.65" +flate2 = "1.0.22" +http = "0.2.8" +log = "0.4.17" +meilisearch-auth = { path = "../meilisearch-auth" } +meilisearch-types = { path = "../meilisearch-types" } +once_cell = "1.15.0" +regex = "1.6.0" +roaring = { version = "0.10.0", features = ["serde"] } +serde = { version = "1.0.136", features = ["derive"] } +serde_json = { version = "1.0.85", features = ["preserve_order"] } +tar = "0.4.38" +tempfile = "3.3.0" +thiserror = "1.0.30" +time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] } +uuid = { version = "1.1.2", features = ["serde", "v4"] } + +[dev-dependencies] +big_s = "1.0.2" +maplit = "1.0.2" +meili-snap = { path = "../meili-snap" } +meilisearch-types = { path = "../meilisearch-types" } diff --git a/dump/README.md b/dump/README.md new file mode 100644 index 000000000..3537f188e --- /dev/null +++ b/dump/README.md @@ -0,0 +1,17 @@ +``` +dump +├── indexes +│ ├── cattos +│ │ ├── documents.jsonl +│ │ └── settings.json +│ └── doggos +│ ├── documents.jsonl +│ └── settings.json +├── instance-uid.uuid +├── keys.jsonl +├── metadata.json +└── tasks + ├── update_files + │ └── [task_id].jsonl + └── queue.jsonl +``` \ No newline at end of file diff --git a/dump/src/error.rs b/dump/src/error.rs new file mode 100644 index 000000000..a11aae9cf --- /dev/null +++ b/dump/src/error.rs @@ -0,0 +1,36 @@ +use meilisearch_types::error::{Code, ErrorCode}; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum Error { + #[error("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.")] + DumpV1Unsupported, + #[error("Bad index name.")] + BadIndexName, + #[error("Malformed task.")] + MalformedTask, + + #[error(transparent)] + Io(#[from] std::io::Error), + #[error(transparent)] + Serde(#[from] serde_json::Error), + #[error(transparent)] + Uuid(#[from] uuid::Error), +} + +impl ErrorCode for Error { + fn error_code(&self) -> Code { + match self { + // Are these three really Internal errors? + // TODO look at that later. + Error::Io(_) => Code::Internal, + Error::Serde(_) => Code::Internal, + Error::Uuid(_) => Code::Internal, + + // all these errors should never be raised when creating a dump, thus no error code should be associated. + Error::DumpV1Unsupported => Code::Internal, + Error::BadIndexName => Code::Internal, + Error::MalformedTask => Code::Internal, + } + } +} diff --git a/dump/src/lib.rs b/dump/src/lib.rs new file mode 100644 index 000000000..25e8d473b --- /dev/null +++ b/dump/src/lib.rs @@ -0,0 +1,465 @@ +#![allow(clippy::type_complexity)] +#![allow(clippy::wrong_self_convention)] + +use meilisearch_types::error::ResponseError; +use meilisearch_types::keys::Key; +use meilisearch_types::milli::update::IndexDocumentsMethod; +use meilisearch_types::settings::Unchecked; +use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task, TaskId}; +use meilisearch_types::InstanceUid; +use roaring::RoaringBitmap; +use serde::{Deserialize, Serialize}; +use time::OffsetDateTime; + +mod error; +mod reader; +mod writer; + +pub use error::Error; +pub use reader::{DumpReader, UpdateFile}; +pub use writer::DumpWriter; + +const CURRENT_DUMP_VERSION: Version = Version::V6; + +type Result = std::result::Result; + +#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct Metadata { + pub dump_version: Version, + pub db_version: String, + #[serde(with = "time::serde::rfc3339")] + pub dump_date: OffsetDateTime, +} + +#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct IndexMetadata { + pub uid: String, + pub primary_key: Option, + #[serde(with = "time::serde::rfc3339")] + pub created_at: OffsetDateTime, + #[serde(with = "time::serde::rfc3339")] + pub updated_at: OffsetDateTime, +} + +#[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] +pub enum Version { + V1, + V2, + V3, + V4, + V5, + V6, +} + +#[derive(Debug, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct TaskDump { + pub uid: TaskId, + #[serde(default)] + pub index_uid: Option, + pub status: Status, + #[serde(rename = "type")] + pub kind: KindDump, + + #[serde(skip_serializing_if = "Option::is_none")] + pub canceled_by: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub details: Option
, + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, + + #[serde(with = "time::serde::rfc3339")] + pub enqueued_at: OffsetDateTime, + #[serde( + with = "time::serde::rfc3339::option", + skip_serializing_if = "Option::is_none", + default + )] + pub started_at: Option, + #[serde( + with = "time::serde::rfc3339::option", + skip_serializing_if = "Option::is_none", + default + )] + pub finished_at: Option, +} + +// A `Kind` specific version made for the dump. If modified you may break the dump. +#[derive(Debug, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub enum KindDump { + DocumentImport { + primary_key: Option, + method: IndexDocumentsMethod, + documents_count: u64, + allow_index_creation: bool, + }, + DocumentDeletion { + documents_ids: Vec, + }, + DocumentClear, + Settings { + settings: Box>, + is_deletion: bool, + allow_index_creation: bool, + }, + IndexDeletion, + IndexCreation { + primary_key: Option, + }, + IndexUpdate { + primary_key: Option, + }, + IndexSwap { + swaps: Vec, + }, + TaskCancelation { + query: String, + tasks: RoaringBitmap, + }, + TasksDeletion { + query: String, + tasks: RoaringBitmap, + }, + DumpCreation { + dump_uid: String, + keys: Vec, + instance_uid: Option, + }, + SnapshotCreation, +} + +impl From for TaskDump { + fn from(task: Task) -> Self { + TaskDump { + uid: task.uid, + index_uid: task.index_uid().map(|uid| uid.to_string()), + status: task.status, + kind: task.kind.into(), + canceled_by: task.canceled_by, + details: task.details, + error: task.error, + enqueued_at: task.enqueued_at, + started_at: task.started_at, + finished_at: task.finished_at, + } + } +} + +impl From for KindDump { + fn from(kind: KindWithContent) -> Self { + match kind { + KindWithContent::DocumentAdditionOrUpdate { + primary_key, + method, + documents_count, + allow_index_creation, + .. + } => KindDump::DocumentImport { + primary_key, + method, + documents_count, + allow_index_creation, + }, + KindWithContent::DocumentDeletion { documents_ids, .. } => { + KindDump::DocumentDeletion { documents_ids } + } + KindWithContent::DocumentClear { .. } => KindDump::DocumentClear, + KindWithContent::SettingsUpdate { + new_settings, + is_deletion, + allow_index_creation, + .. + } => KindDump::Settings { settings: new_settings, is_deletion, allow_index_creation }, + KindWithContent::IndexDeletion { .. } => KindDump::IndexDeletion, + KindWithContent::IndexCreation { primary_key, .. } => { + KindDump::IndexCreation { primary_key } + } + KindWithContent::IndexUpdate { primary_key, .. } => { + KindDump::IndexUpdate { primary_key } + } + KindWithContent::IndexSwap { swaps } => KindDump::IndexSwap { swaps }, + KindWithContent::TaskCancelation { query, tasks } => { + KindDump::TaskCancelation { query, tasks } + } + KindWithContent::TaskDeletion { query, tasks } => { + KindDump::TasksDeletion { query, tasks } + } + KindWithContent::DumpCreation { dump_uid, keys, instance_uid } => { + KindDump::DumpCreation { dump_uid, keys, instance_uid } + } + KindWithContent::SnapshotCreation => KindDump::SnapshotCreation, + } + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + use std::io::{Seek, SeekFrom}; + use std::str::FromStr; + + use big_s::S; + use maplit::btreeset; + use meilisearch_types::index_uid::IndexUid; + use meilisearch_types::keys::{Action, Key}; + use meilisearch_types::milli::update::Setting; + use meilisearch_types::milli::{self}; + use meilisearch_types::settings::{Checked, Settings}; + use meilisearch_types::star_or::StarOr; + use meilisearch_types::tasks::{Details, Status}; + use serde_json::{json, Map, Value}; + use time::macros::datetime; + use uuid::Uuid; + + use crate::reader::Document; + use crate::{DumpReader, DumpWriter, IndexMetadata, KindDump, TaskDump, Version}; + + pub fn create_test_instance_uid() -> Uuid { + Uuid::parse_str("9e15e977-f2ae-4761-943f-1eaf75fd736d").unwrap() + } + + pub fn create_test_index_metadata() -> IndexMetadata { + IndexMetadata { + uid: S("doggo"), + primary_key: None, + created_at: datetime!(2022-11-20 12:00 UTC), + updated_at: datetime!(2022-11-21 00:00 UTC), + } + } + + pub fn create_test_documents() -> Vec> { + vec![ + json!({ "id": 1, "race": "golden retriever", "name": "paul", "age": 4 }) + .as_object() + .unwrap() + .clone(), + json!({ "id": 2, "race": "bernese mountain", "name": "tamo", "age": 6 }) + .as_object() + .unwrap() + .clone(), + json!({ "id": 3, "race": "great pyrenees", "name": "patou", "age": 5 }) + .as_object() + .unwrap() + .clone(), + ] + } + + pub fn create_test_settings() -> Settings { + let settings = Settings { + displayed_attributes: Setting::Set(vec![S("race"), S("name")]), + searchable_attributes: Setting::Set(vec![S("name"), S("race")]), + filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }), + sortable_attributes: Setting::Set(btreeset! { S("age") }), + ranking_rules: Setting::NotSet, + stop_words: Setting::NotSet, + synonyms: Setting::NotSet, + distinct_attribute: Setting::NotSet, + typo_tolerance: Setting::NotSet, + faceting: Setting::NotSet, + pagination: Setting::NotSet, + _kind: std::marker::PhantomData, + }; + settings.check() + } + + pub fn create_test_tasks() -> Vec<(TaskDump, Option>)> { + vec![ + ( + TaskDump { + uid: 0, + index_uid: Some(S("doggo")), + status: Status::Succeeded, + kind: KindDump::DocumentImport { + method: milli::update::IndexDocumentsMethod::UpdateDocuments, + allow_index_creation: true, + primary_key: Some(S("bone")), + documents_count: 12, + }, + canceled_by: None, + details: Some(Details::DocumentAdditionOrUpdate { + received_documents: 12, + indexed_documents: Some(10), + }), + error: None, + enqueued_at: datetime!(2022-11-11 0:00 UTC), + started_at: Some(datetime!(2022-11-20 0:00 UTC)), + finished_at: Some(datetime!(2022-11-21 0:00 UTC)), + }, + None, + ), + ( + TaskDump { + uid: 1, + index_uid: Some(S("doggo")), + status: Status::Enqueued, + kind: KindDump::DocumentImport { + method: milli::update::IndexDocumentsMethod::UpdateDocuments, + allow_index_creation: true, + primary_key: None, + documents_count: 2, + }, + canceled_by: None, + details: Some(Details::DocumentAdditionOrUpdate { + received_documents: 2, + indexed_documents: None, + }), + error: None, + enqueued_at: datetime!(2022-11-11 0:00 UTC), + started_at: None, + finished_at: None, + }, + Some(vec![ + json!({ "id": 4, "race": "leonberg" }).as_object().unwrap().clone(), + json!({ "id": 5, "race": "patou" }).as_object().unwrap().clone(), + ]), + ), + ( + TaskDump { + uid: 5, + index_uid: Some(S("catto")), + status: Status::Enqueued, + kind: KindDump::IndexDeletion, + canceled_by: None, + details: None, + error: None, + enqueued_at: datetime!(2022-11-15 0:00 UTC), + started_at: None, + finished_at: None, + }, + None, + ), + ] + } + + pub fn create_test_api_keys() -> Vec { + vec![ + Key { + description: Some(S("The main key to manage all the doggos")), + name: Some(S("doggos_key")), + uid: Uuid::from_str("9f8a34da-b6b2-42f0-939b-dbd4c3448655").unwrap(), + actions: vec![Action::DocumentsAll], + indexes: vec![StarOr::Other(IndexUid::from_str("doggos").unwrap())], + expires_at: Some(datetime!(4130-03-14 12:21 UTC)), + created_at: datetime!(1960-11-15 0:00 UTC), + updated_at: datetime!(2022-11-10 0:00 UTC), + }, + Key { + description: Some(S("The master key for everything and even the doggos")), + name: Some(S("master_key")), + uid: Uuid::from_str("4622f717-1c00-47bb-a494-39d76a49b591").unwrap(), + actions: vec![Action::All], + indexes: vec![StarOr::Star], + expires_at: None, + created_at: datetime!(0000-01-01 00:01 UTC), + updated_at: datetime!(1964-05-04 17:25 UTC), + }, + Key { + description: Some(S("The useless key to for nothing nor the doggos")), + name: Some(S("useless_key")), + uid: Uuid::from_str("fb80b58b-0a34-412f-8ba7-1ce868f8ac5c").unwrap(), + actions: vec![], + indexes: vec![], + expires_at: None, + created_at: datetime!(400-02-29 0:00 UTC), + updated_at: datetime!(1024-02-29 0:00 UTC), + }, + ] + } + + pub fn create_test_dump() -> File { + let instance_uid = create_test_instance_uid(); + let dump = DumpWriter::new(Some(instance_uid)).unwrap(); + + // ========== Adding an index + let documents = create_test_documents(); + let settings = create_test_settings(); + + let mut index = dump.create_index("doggos", &create_test_index_metadata()).unwrap(); + for document in &documents { + index.push_document(document).unwrap(); + } + index.flush().unwrap(); + index.settings(&settings).unwrap(); + + // ========== pushing the task queue + let tasks = create_test_tasks(); + + let mut task_queue = dump.create_tasks_queue().unwrap(); + for (task, update_file) in &tasks { + let mut update = task_queue.push_task(task).unwrap(); + if let Some(update_file) = update_file { + for u in update_file { + update.push_document(u).unwrap(); + } + } + } + task_queue.flush().unwrap(); + + // ========== pushing the api keys + let api_keys = create_test_api_keys(); + + let mut keys = dump.create_keys().unwrap(); + for key in &api_keys { + keys.push_key(key).unwrap(); + } + keys.flush().unwrap(); + + // create the dump + let mut file = tempfile::tempfile().unwrap(); + dump.persist_to(&mut file).unwrap(); + file.seek(SeekFrom::Start(0)).unwrap(); + + file + } + + #[test] + #[ignore] + fn test_creating_and_read_dump() { + let mut file = create_test_dump(); + let mut dump = DumpReader::open(&mut file).unwrap(); + + // ==== checking the top level infos + assert_eq!(dump.version(), Version::V6); + assert!(dump.date().is_some()); + assert_eq!(dump.instance_uid().unwrap().unwrap(), create_test_instance_uid()); + + // ==== checking the index + let mut indexes = dump.indexes().unwrap(); + let mut index = indexes.next().unwrap().unwrap(); + assert!(indexes.next().is_none()); // there was only one index in the dump + + for (document, expected) in index.documents().unwrap().zip(create_test_documents()) { + assert_eq!(document.unwrap(), expected); + } + assert_eq!(index.settings().unwrap(), create_test_settings()); + assert_eq!(index.metadata(), &create_test_index_metadata()); + + drop(index); + drop(indexes); + + // ==== checking the task queue + for (task, expected) in dump.tasks().unwrap().zip(create_test_tasks()) { + let (task, content_file) = task.unwrap(); + assert_eq!(task, expected.0); + + if let Some(expected_update) = expected.1 { + assert!( + content_file.is_some(), + "A content file was expected for the task {}.", + expected.0.uid + ); + let updates = content_file.unwrap().collect::, _>>().unwrap(); + assert_eq!(updates, expected_update); + } + } + + // ==== checking the keys + for (key, expected) in dump.keys().unwrap().zip(create_test_api_keys()) { + assert_eq!(key.unwrap(), expected); + } + } +} diff --git a/dump/src/reader/compat/mod.rs b/dump/src/reader/compat/mod.rs new file mode 100644 index 000000000..29836aa61 --- /dev/null +++ b/dump/src/reader/compat/mod.rs @@ -0,0 +1,4 @@ +pub mod v2_to_v3; +pub mod v3_to_v4; +pub mod v4_to_v5; +pub mod v5_to_v6; diff --git a/dump/src/reader/compat/v2_to_v3.rs b/dump/src/reader/compat/v2_to_v3.rs new file mode 100644 index 000000000..709670265 --- /dev/null +++ b/dump/src/reader/compat/v2_to_v3.rs @@ -0,0 +1,480 @@ +use std::convert::TryInto; +use std::str::FromStr; + +use time::OffsetDateTime; +use uuid::Uuid; + +use super::v3_to_v4::CompatV3ToV4; +use crate::reader::{v2, v3, Document}; +use crate::Result; + +pub struct CompatV2ToV3 { + pub from: v2::V2Reader, +} + +impl CompatV2ToV3 { + pub fn new(v2: v2::V2Reader) -> CompatV2ToV3 { + CompatV2ToV3 { from: v2 } + } + + pub fn index_uuid(&self) -> Vec { + self.from + .index_uuid() + .into_iter() + .map(|index| v3::meta::IndexUuid { uid: index.uid, uuid: index.uuid }) + .collect() + } + + pub fn to_v4(self) -> CompatV3ToV4 { + CompatV3ToV4::Compat(self) + } + + pub fn version(&self) -> crate::Version { + self.from.version() + } + + pub fn date(&self) -> Option { + self.from.date() + } + + pub fn instance_uid(&self) -> Result> { + Ok(None) + } + + pub fn indexes(&self) -> Result> + '_> { + Ok(self.from.indexes()?.map(|index_reader| -> Result<_> { + let compat = CompatIndexV2ToV3::new(index_reader?); + Ok(compat) + })) + } + + pub fn tasks( + &mut self, + ) -> Box< + dyn Iterator>>>)>> + + '_, + > { + let _indexes = self.from.index_uuid.clone(); + + Box::new( + self.from + .tasks() + .map(move |task| { + task.map(|(task, content_file)| { + let task = v3::Task { uuid: task.uuid, update: task.update.into() }; + + Some(( + task, + content_file.map(|content_file| { + Box::new(content_file) as Box>> + }), + )) + }) + }) + .filter_map(|res| res.transpose()), + ) + } +} + +pub struct CompatIndexV2ToV3 { + from: v2::V2IndexReader, +} + +impl CompatIndexV2ToV3 { + pub fn new(v2: v2::V2IndexReader) -> CompatIndexV2ToV3 { + CompatIndexV2ToV3 { from: v2 } + } + + pub fn metadata(&self) -> &crate::IndexMetadata { + self.from.metadata() + } + + pub fn documents(&mut self) -> Result> + '_>> { + self.from + .documents() + .map(|iter| Box::new(iter) as Box> + '_>) + } + + pub fn settings(&mut self) -> Result> { + Ok(v3::Settings::::from(self.from.settings()?).check()) + } +} + +impl From for v3::updates::UpdateStatus { + fn from(update: v2::updates::UpdateStatus) -> Self { + match update { + v2::updates::UpdateStatus::Processing(processing) => { + match (processing.from.meta.clone(), processing.from.content).try_into() { + Ok(meta) => v3::updates::UpdateStatus::Processing(v3::updates::Processing { + from: v3::updates::Enqueued { + update_id: processing.from.update_id, + meta, + enqueued_at: processing.from.enqueued_at, + }, + started_processing_at: processing.started_processing_at, + }), + Err(e) => { + log::warn!("Error with task {}: {}", processing.from.update_id, e); + log::warn!("Task will be marked as `Failed`."); + v3::updates::UpdateStatus::Failed(v3::updates::Failed { + from: v3::updates::Processing { + from: v3::updates::Enqueued { + update_id: processing.from.update_id, + meta: update_from_unchecked_update_meta(processing.from.meta), + enqueued_at: processing.from.enqueued_at, + }, + started_processing_at: processing.started_processing_at, + }, + msg: e.to_string(), + code: v3::Code::MalformedDump, + failed_at: OffsetDateTime::now_utc(), + }) + } + } + } + v2::updates::UpdateStatus::Enqueued(enqueued) => { + match (enqueued.meta.clone(), enqueued.content).try_into() { + Ok(meta) => v3::updates::UpdateStatus::Enqueued(v3::updates::Enqueued { + update_id: enqueued.update_id, + meta, + enqueued_at: enqueued.enqueued_at, + }), + Err(e) => { + log::warn!("Error with task {}: {}", enqueued.update_id, e); + log::warn!("Task will be marked as `Failed`."); + v3::updates::UpdateStatus::Failed(v3::updates::Failed { + from: v3::updates::Processing { + from: v3::updates::Enqueued { + update_id: enqueued.update_id, + meta: update_from_unchecked_update_meta(enqueued.meta), + enqueued_at: enqueued.enqueued_at, + }, + started_processing_at: OffsetDateTime::now_utc(), + }, + msg: e.to_string(), + code: v3::Code::MalformedDump, + failed_at: OffsetDateTime::now_utc(), + }) + } + } + } + v2::updates::UpdateStatus::Processed(processed) => { + v3::updates::UpdateStatus::Processed(v3::updates::Processed { + success: processed.success.into(), + processed_at: processed.processed_at, + from: v3::updates::Processing { + from: v3::updates::Enqueued { + update_id: processed.from.from.update_id, + // since we're never going to read the content_file again it's ok to generate a fake one. + meta: update_from_unchecked_update_meta(processed.from.from.meta), + enqueued_at: processed.from.from.enqueued_at, + }, + started_processing_at: processed.from.started_processing_at, + }, + }) + } + v2::updates::UpdateStatus::Aborted(aborted) => { + v3::updates::UpdateStatus::Aborted(v3::updates::Aborted { + from: v3::updates::Enqueued { + update_id: aborted.from.update_id, + // since we're never going to read the content_file again it's ok to generate a fake one. + meta: update_from_unchecked_update_meta(aborted.from.meta), + enqueued_at: aborted.from.enqueued_at, + }, + aborted_at: aborted.aborted_at, + }) + } + v2::updates::UpdateStatus::Failed(failed) => { + v3::updates::UpdateStatus::Failed(v3::updates::Failed { + from: v3::updates::Processing { + from: v3::updates::Enqueued { + update_id: failed.from.from.update_id, + // since we're never going to read the content_file again it's ok to generate a fake one. + meta: update_from_unchecked_update_meta(failed.from.from.meta), + enqueued_at: failed.from.from.enqueued_at, + }, + started_processing_at: failed.from.started_processing_at, + }, + msg: failed.error.message, + code: failed.error.error_code.into(), + failed_at: failed.failed_at, + }) + } + } + } +} + +impl TryFrom<(v2::updates::UpdateMeta, Option)> for v3::updates::Update { + type Error = crate::Error; + + fn try_from((update, uuid): (v2::updates::UpdateMeta, Option)) -> Result { + Ok(match update { + v2::updates::UpdateMeta::DocumentsAddition { method, format: _, primary_key } + if uuid.is_some() => + { + v3::updates::Update::DocumentAddition { + primary_key, + method: match method { + v2::updates::IndexDocumentsMethod::ReplaceDocuments => { + v3::updates::IndexDocumentsMethod::ReplaceDocuments + } + v2::updates::IndexDocumentsMethod::UpdateDocuments => { + v3::updates::IndexDocumentsMethod::UpdateDocuments + } + }, + content_uuid: uuid.unwrap(), + } + } + v2::updates::UpdateMeta::DocumentsAddition { .. } => { + return Err(crate::Error::MalformedTask) + } + v2::updates::UpdateMeta::ClearDocuments => v3::updates::Update::ClearDocuments, + v2::updates::UpdateMeta::DeleteDocuments { ids } => { + v3::updates::Update::DeleteDocuments(ids) + } + v2::updates::UpdateMeta::Settings(settings) => { + v3::updates::Update::Settings(settings.into()) + } + }) + } +} + +pub fn update_from_unchecked_update_meta(update: v2::updates::UpdateMeta) -> v3::updates::Update { + match update { + v2::updates::UpdateMeta::DocumentsAddition { method, format: _, primary_key } => { + v3::updates::Update::DocumentAddition { + primary_key, + method: match method { + v2::updates::IndexDocumentsMethod::ReplaceDocuments => { + v3::updates::IndexDocumentsMethod::ReplaceDocuments + } + v2::updates::IndexDocumentsMethod::UpdateDocuments => { + v3::updates::IndexDocumentsMethod::UpdateDocuments + } + }, + // we use this special uuid so we can recognize it if one day there is a bug related to this field. + content_uuid: Uuid::from_str("00112233-4455-6677-8899-aabbccddeeff").unwrap(), + } + } + v2::updates::UpdateMeta::ClearDocuments => v3::updates::Update::ClearDocuments, + v2::updates::UpdateMeta::DeleteDocuments { ids } => { + v3::updates::Update::DeleteDocuments(ids) + } + v2::updates::UpdateMeta::Settings(settings) => { + v3::updates::Update::Settings(settings.into()) + } + } +} + +impl From for v3::updates::UpdateResult { + fn from(result: v2::updates::UpdateResult) -> Self { + match result { + v2::updates::UpdateResult::DocumentsAddition(addition) => { + v3::updates::UpdateResult::DocumentsAddition(v3::updates::DocumentAdditionResult { + nb_documents: addition.nb_documents, + }) + } + v2::updates::UpdateResult::DocumentDeletion { deleted } => { + v3::updates::UpdateResult::DocumentDeletion { deleted } + } + v2::updates::UpdateResult::Other => v3::updates::UpdateResult::Other, + } + } +} + +impl From for v3::Code { + fn from(code: String) -> Self { + match code.as_ref() { + "create_index" => v3::Code::CreateIndex, + "index_already_exists" => v3::Code::IndexAlreadyExists, + "index_not_found" => v3::Code::IndexNotFound, + "invalid_index_uid" => v3::Code::InvalidIndexUid, + "invalid_state" => v3::Code::InvalidState, + "missing_primary_key" => v3::Code::MissingPrimaryKey, + "primary_key_already_present" => v3::Code::PrimaryKeyAlreadyPresent, + "max_fields_limit_exceeded" => v3::Code::MaxFieldsLimitExceeded, + "missing_document_id" => v3::Code::MissingDocumentId, + "invalid_document_id" => v3::Code::InvalidDocumentId, + "filter" => v3::Code::Filter, + "sort" => v3::Code::Sort, + "bad_parameter" => v3::Code::BadParameter, + "bad_request" => v3::Code::BadRequest, + "database_size_limit_reached" => v3::Code::DatabaseSizeLimitReached, + "document_not_found" => v3::Code::DocumentNotFound, + "internal" => v3::Code::Internal, + "invalid_geo_field" => v3::Code::InvalidGeoField, + "invalid_ranking_rule" => v3::Code::InvalidRankingRule, + "invalid_store" => v3::Code::InvalidStore, + "invalid_token" => v3::Code::InvalidToken, + "missing_authorization_header" => v3::Code::MissingAuthorizationHeader, + "no_space_left_on_device" => v3::Code::NoSpaceLeftOnDevice, + "dump_not_found" => v3::Code::DumpNotFound, + "task_not_found" => v3::Code::TaskNotFound, + "payload_too_large" => v3::Code::PayloadTooLarge, + "retrieve_document" => v3::Code::RetrieveDocument, + "search_documents" => v3::Code::SearchDocuments, + "unsupported_media_type" => v3::Code::UnsupportedMediaType, + "dump_already_in_progress" => v3::Code::DumpAlreadyInProgress, + "dump_process_failed" => v3::Code::DumpProcessFailed, + "invalid_content_type" => v3::Code::InvalidContentType, + "missing_content_type" => v3::Code::MissingContentType, + "malformed_payload" => v3::Code::MalformedPayload, + "missing_payload" => v3::Code::MissingPayload, + other => { + log::warn!("Unknown error code {}", other); + v3::Code::UnretrievableErrorCode + } + } + } +} + +fn option_to_setting(opt: Option>) -> v3::Setting { + match opt { + Some(Some(t)) => v3::Setting::Set(t), + None => v3::Setting::NotSet, + Some(None) => v3::Setting::Reset, + } +} + +impl From> for v3::Settings { + fn from(settings: v2::Settings) -> Self { + v3::Settings { + displayed_attributes: option_to_setting(settings.displayed_attributes), + searchable_attributes: option_to_setting(settings.searchable_attributes), + filterable_attributes: option_to_setting(settings.filterable_attributes) + .map(|f| f.into_iter().collect()), + sortable_attributes: v3::Setting::NotSet, + ranking_rules: option_to_setting(settings.ranking_rules).map(|criteria| { + criteria.into_iter().map(|criterion| patch_ranking_rules(&criterion)).collect() + }), + stop_words: option_to_setting(settings.stop_words), + synonyms: option_to_setting(settings.synonyms), + distinct_attribute: option_to_setting(settings.distinct_attribute), + _kind: std::marker::PhantomData, + } + } +} + +fn patch_ranking_rules(ranking_rule: &str) -> String { + match v2::settings::Criterion::from_str(ranking_rule) { + Ok(v2::settings::Criterion::Words) => String::from("words"), + Ok(v2::settings::Criterion::Typo) => String::from("typo"), + Ok(v2::settings::Criterion::Proximity) => String::from("proximity"), + Ok(v2::settings::Criterion::Attribute) => String::from("attribute"), + Ok(v2::settings::Criterion::Exactness) => String::from("exactness"), + Ok(v2::settings::Criterion::Asc(name)) => format!("{name}:asc"), + Ok(v2::settings::Criterion::Desc(name)) => format!("{name}:desc"), + // we want to forward the error to the current version of meilisearch + Err(_) => ranking_rule.to_string(), + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + use std::io::BufReader; + + use flate2::bufread::GzDecoder; + use meili_snap::insta; + use tempfile::TempDir; + + use super::*; + + #[test] + #[ignore] + fn compat_v2_v3() { + let dump = File::open("tests/assets/v2.dump").unwrap(); + let dir = TempDir::new().unwrap(); + let mut dump = BufReader::new(dump); + let gz = GzDecoder::new(&mut dump); + let mut archive = tar::Archive::new(gz); + archive.unpack(dir.path()).unwrap(); + + let mut dump = v2::V2Reader::open(dir).unwrap().to_v3(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00"); + + // tasks + let tasks = dump.tasks().collect::>>().unwrap(); + let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"9507711db47c7171c79bc6d57d0bed79"); + assert_eq!(update_files.len(), 9); + assert!(update_files[0].is_some()); // the enqueued document addition + assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed + + let update_file = update_files.remove(0).unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(update_file), @"7b8889539b669c7b9ddba448bafa385d"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies2 = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"54b3d7a0d96de35427d867fa17164a99"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"ae7c5ade2243a553152dab2f354e9095"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 110); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022"); + + // movies2 + insta::assert_json_snapshot!(movies2.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies_2", + "primaryKey": null, + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies2.settings()), @"1be82b894556d23953af557b6a328a58"); + let documents = movies2.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 0); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"1be82b894556d23953af557b6a328a58"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } +} diff --git a/dump/src/reader/compat/v3_to_v4.rs b/dump/src/reader/compat/v3_to_v4.rs new file mode 100644 index 000000000..46fe0c9b2 --- /dev/null +++ b/dump/src/reader/compat/v3_to_v4.rs @@ -0,0 +1,450 @@ +use super::v2_to_v3::{CompatIndexV2ToV3, CompatV2ToV3}; +use super::v4_to_v5::CompatV4ToV5; +use crate::reader::{v3, v4, UpdateFile}; +use crate::Result; + +pub enum CompatV3ToV4 { + V3(v3::V3Reader), + Compat(CompatV2ToV3), +} + +impl CompatV3ToV4 { + pub fn new(v3: v3::V3Reader) -> CompatV3ToV4 { + CompatV3ToV4::V3(v3) + } + + pub fn to_v5(self) -> CompatV4ToV5 { + CompatV4ToV5::Compat(self) + } + + pub fn version(&self) -> crate::Version { + match self { + CompatV3ToV4::V3(v3) => v3.version(), + CompatV3ToV4::Compat(compat) => compat.version(), + } + } + + pub fn date(&self) -> Option { + match self { + CompatV3ToV4::V3(v3) => v3.date(), + CompatV3ToV4::Compat(compat) => compat.date(), + } + } + + pub fn instance_uid(&self) -> Result> { + Ok(None) + } + + pub fn indexes(&self) -> Result> + '_> { + Ok(match self { + CompatV3ToV4::V3(v3) => { + Box::new(v3.indexes()?.map(|index| index.map(CompatIndexV3ToV4::from))) + as Box> + '_> + } + + CompatV3ToV4::Compat(compat) => { + Box::new(compat.indexes()?.map(|index| index.map(CompatIndexV3ToV4::from))) + as Box> + '_> + } + }) + } + + pub fn tasks( + &mut self, + ) -> Box>)>> + '_> { + let indexes = match self { + CompatV3ToV4::V3(v3) => v3.index_uuid(), + CompatV3ToV4::Compat(compat) => compat.index_uuid(), + }; + let tasks = match self { + CompatV3ToV4::V3(v3) => v3.tasks(), + CompatV3ToV4::Compat(compat) => compat.tasks(), + }; + + Box::new( + tasks + // we need to override the old task ids that were generated + // by index in favor of a global unique incremental ID. + .enumerate() + .map(move |(task_id, task)| { + task.map(|(task, content_file)| { + let index_uid = indexes + .iter() + .find(|index| index.uuid == task.uuid) + .map(|index| index.uid.clone()); + + let index_uid = match index_uid { + Some(uid) => uid, + None => { + log::warn!( + "Error while importing the update {}.", + task.update.id() + ); + log::warn!( + "The index associated to the uuid `{}` could not be retrieved.", + task.uuid.to_string() + ); + if task.update.is_finished() { + // we're fucking with his history but not his data, that's ok-ish. + log::warn!("The index-uuid will be set as `unknown`."); + String::from("unknown") + } else { + log::warn!("The task will be ignored."); + return None; + } + } + }; + + let task = v4::Task { + id: task_id as u32, + index_uid: v4::meta::IndexUid(index_uid), + content: match task.update.meta() { + v3::Kind::DeleteDocuments(documents) => { + v4::tasks::TaskContent::DocumentDeletion( + v4::tasks::DocumentDeletion::Ids(documents.clone()), + ) + } + v3::Kind::DocumentAddition { + primary_key, + method, + content_uuid, + } => v4::tasks::TaskContent::DocumentAddition { + merge_strategy: match method { + v3::updates::IndexDocumentsMethod::ReplaceDocuments => { + v4::tasks::IndexDocumentsMethod::ReplaceDocuments + } + v3::updates::IndexDocumentsMethod::UpdateDocuments => { + v4::tasks::IndexDocumentsMethod::UpdateDocuments + } + }, + primary_key: primary_key.clone(), + documents_count: 0, // we don't have this info + allow_index_creation: true, // there was no API-key in the v3 + content_uuid: *content_uuid, + }, + v3::Kind::Settings(settings) => { + v4::tasks::TaskContent::SettingsUpdate { + settings: v4::Settings::from(settings.clone()), + is_deletion: false, // that didn't exist at this time + allow_index_creation: true, // there was no API-key in the v3 + } + } + v3::Kind::ClearDocuments => { + v4::tasks::TaskContent::DocumentDeletion( + v4::tasks::DocumentDeletion::Clear, + ) + } + }, + events: match task.update { + v3::Status::Processing(processing) => { + vec![v4::tasks::TaskEvent::Created(processing.from.enqueued_at)] + } + v3::Status::Enqueued(enqueued) => { + vec![v4::tasks::TaskEvent::Created(enqueued.enqueued_at)] + } + v3::Status::Processed(processed) => { + vec![ + v4::tasks::TaskEvent::Created( + processed.from.from.enqueued_at, + ), + v4::tasks::TaskEvent::Processing( + processed.from.started_processing_at, + ), + v4::tasks::TaskEvent::Succeded { + result: match processed.success { + v3::updates::UpdateResult::DocumentsAddition( + document_addition, + ) => v4::tasks::TaskResult::DocumentAddition { + indexed_documents: document_addition + .nb_documents + as u64, + }, + v3::updates::UpdateResult::DocumentDeletion { + deleted, + } => v4::tasks::TaskResult::DocumentDeletion { + deleted_documents: deleted, + }, + v3::updates::UpdateResult::Other => { + v4::tasks::TaskResult::Other + } + }, + timestamp: processed.processed_at, + }, + ] + } + v3::Status::Failed(failed) => vec![ + v4::tasks::TaskEvent::Created(failed.from.from.enqueued_at), + v4::tasks::TaskEvent::Processing( + failed.from.started_processing_at, + ), + v4::tasks::TaskEvent::Failed { + error: v4::ResponseError::from_msg( + failed.msg.to_string(), + failed.code.into(), + ), + timestamp: failed.failed_at, + }, + ], + v3::Status::Aborted(aborted) => vec![ + v4::tasks::TaskEvent::Created(aborted.from.enqueued_at), + v4::tasks::TaskEvent::Failed { + error: v4::ResponseError::from_msg( + "Task was aborted in a previous version of meilisearch." + .to_string(), + v4::errors::Code::UnretrievableErrorCode, + ), + timestamp: aborted.aborted_at, + }, + ], + }, + }; + + Some((task, content_file)) + }) + }) + .filter_map(|res| res.transpose()), + ) + } + + pub fn keys(&mut self) -> Box> + '_> { + Box::new(std::iter::empty()) + } +} + +pub enum CompatIndexV3ToV4 { + V3(v3::V3IndexReader), + Compat(CompatIndexV2ToV3), +} + +impl From for CompatIndexV3ToV4 { + fn from(index_reader: v3::V3IndexReader) -> Self { + Self::V3(index_reader) + } +} + +impl From for CompatIndexV3ToV4 { + fn from(index_reader: CompatIndexV2ToV3) -> Self { + Self::Compat(index_reader) + } +} + +impl CompatIndexV3ToV4 { + pub fn new(v3: v3::V3IndexReader) -> CompatIndexV3ToV4 { + CompatIndexV3ToV4::V3(v3) + } + + pub fn metadata(&self) -> &crate::IndexMetadata { + match self { + CompatIndexV3ToV4::V3(v3) => v3.metadata(), + CompatIndexV3ToV4::Compat(compat) => compat.metadata(), + } + } + + pub fn documents(&mut self) -> Result> + '_>> { + match self { + CompatIndexV3ToV4::V3(v3) => v3 + .documents() + .map(|iter| Box::new(iter) as Box> + '_>), + + CompatIndexV3ToV4::Compat(compat) => compat + .documents() + .map(|iter| Box::new(iter) as Box> + '_>), + } + } + + pub fn settings(&mut self) -> Result> { + Ok(match self { + CompatIndexV3ToV4::V3(v3) => { + v4::Settings::::from(v3.settings()?).check() + } + CompatIndexV3ToV4::Compat(compat) => { + v4::Settings::::from(compat.settings()?).check() + } + }) + } +} + +impl From> for v4::Setting { + fn from(setting: v3::Setting) -> Self { + match setting { + v3::Setting::Set(t) => v4::Setting::Set(t), + v3::Setting::Reset => v4::Setting::Reset, + v3::Setting::NotSet => v4::Setting::NotSet, + } + } +} + +impl From for v4::Code { + fn from(code: v3::Code) -> Self { + match code { + v3::Code::CreateIndex => v4::Code::CreateIndex, + v3::Code::IndexAlreadyExists => v4::Code::IndexAlreadyExists, + v3::Code::IndexNotFound => v4::Code::IndexNotFound, + v3::Code::InvalidIndexUid => v4::Code::InvalidIndexUid, + v3::Code::InvalidState => v4::Code::InvalidState, + v3::Code::MissingPrimaryKey => v4::Code::MissingPrimaryKey, + v3::Code::PrimaryKeyAlreadyPresent => v4::Code::PrimaryKeyAlreadyPresent, + v3::Code::MaxFieldsLimitExceeded => v4::Code::MaxFieldsLimitExceeded, + v3::Code::MissingDocumentId => v4::Code::MissingDocumentId, + v3::Code::InvalidDocumentId => v4::Code::InvalidDocumentId, + v3::Code::Filter => v4::Code::Filter, + v3::Code::Sort => v4::Code::Sort, + v3::Code::BadParameter => v4::Code::BadParameter, + v3::Code::BadRequest => v4::Code::BadRequest, + v3::Code::DatabaseSizeLimitReached => v4::Code::DatabaseSizeLimitReached, + v3::Code::DocumentNotFound => v4::Code::DocumentNotFound, + v3::Code::Internal => v4::Code::Internal, + v3::Code::InvalidGeoField => v4::Code::InvalidGeoField, + v3::Code::InvalidRankingRule => v4::Code::InvalidRankingRule, + v3::Code::InvalidStore => v4::Code::InvalidStore, + v3::Code::InvalidToken => v4::Code::InvalidToken, + v3::Code::MissingAuthorizationHeader => v4::Code::MissingAuthorizationHeader, + v3::Code::NoSpaceLeftOnDevice => v4::Code::NoSpaceLeftOnDevice, + v3::Code::DumpNotFound => v4::Code::DumpNotFound, + v3::Code::TaskNotFound => v4::Code::TaskNotFound, + v3::Code::PayloadTooLarge => v4::Code::PayloadTooLarge, + v3::Code::RetrieveDocument => v4::Code::RetrieveDocument, + v3::Code::SearchDocuments => v4::Code::SearchDocuments, + v3::Code::UnsupportedMediaType => v4::Code::UnsupportedMediaType, + v3::Code::DumpAlreadyInProgress => v4::Code::DumpAlreadyInProgress, + v3::Code::DumpProcessFailed => v4::Code::DumpProcessFailed, + v3::Code::InvalidContentType => v4::Code::InvalidContentType, + v3::Code::MissingContentType => v4::Code::MissingContentType, + v3::Code::MalformedPayload => v4::Code::MalformedPayload, + v3::Code::MissingPayload => v4::Code::MissingPayload, + v3::Code::UnretrievableErrorCode => v4::Code::UnretrievableErrorCode, + v3::Code::MalformedDump => v4::Code::MalformedDump, + } + } +} + +impl From> for v4::Settings { + fn from(settings: v3::Settings) -> Self { + v4::Settings { + displayed_attributes: settings.displayed_attributes.into(), + searchable_attributes: settings.searchable_attributes.into(), + filterable_attributes: settings.filterable_attributes.into(), + sortable_attributes: settings.sortable_attributes.into(), + ranking_rules: settings.ranking_rules.into(), + stop_words: settings.stop_words.into(), + synonyms: settings.synonyms.into(), + distinct_attribute: settings.distinct_attribute.into(), + typo_tolerance: v4::Setting::NotSet, + _kind: std::marker::PhantomData, + } + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + use std::io::BufReader; + + use flate2::bufread::GzDecoder; + use meili_snap::insta; + use tempfile::TempDir; + + use super::*; + + #[test] + #[ignore] + fn compat_v3_v4() { + let dump = File::open("tests/assets/v3.dump").unwrap(); + let dir = TempDir::new().unwrap(); + let mut dump = BufReader::new(dump); + let gz = GzDecoder::new(&mut dump); + let mut archive = tar::Archive::new(gz); + archive.unpack(dir.path()).unwrap(); + + let mut dump = v3::V3Reader::open(dir).unwrap().to_v4(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00"); + + // tasks + let tasks = dump.tasks().collect::>>().unwrap(); + let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"79bc053583a1a7172bbaaafb1edaeb78"); + assert_eq!(update_files.len(), 10); + assert!(update_files[0].is_some()); // the enqueued document addition + assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed + + let update_file = update_files.remove(0).unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(update_file), @"7b8889539b669c7b9ddba448bafa385d"); + + // keys + let keys = dump.keys().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(keys, { "[].uid" => "[uuid]" }), @"d751713988987e9331980363e24189ce"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies2 = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"d3402aff19b90acea9e9a07c466690aa"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"687aaab250f01b55d57bc69aa313b581"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 110); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022"); + + // movies2 + insta::assert_json_snapshot!(movies2.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies_2", + "primaryKey": null, + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies2.settings()), @"cd9fedbd7e3492831a94da62c90013ea"); + let documents = movies2.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 0); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"cd9fedbd7e3492831a94da62c90013ea"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } +} diff --git a/dump/src/reader/compat/v4_to_v5.rs b/dump/src/reader/compat/v4_to_v5.rs new file mode 100644 index 000000000..7f985186f --- /dev/null +++ b/dump/src/reader/compat/v4_to_v5.rs @@ -0,0 +1,468 @@ +use super::v3_to_v4::{CompatIndexV3ToV4, CompatV3ToV4}; +use super::v5_to_v6::CompatV5ToV6; +use crate::reader::{v4, v5, Document}; +use crate::Result; + +pub enum CompatV4ToV5 { + V4(v4::V4Reader), + Compat(CompatV3ToV4), +} + +impl CompatV4ToV5 { + pub fn new(v4: v4::V4Reader) -> CompatV4ToV5 { + CompatV4ToV5::V4(v4) + } + + pub fn to_v6(self) -> CompatV5ToV6 { + CompatV5ToV6::Compat(self) + } + + pub fn version(&self) -> crate::Version { + match self { + CompatV4ToV5::V4(v4) => v4.version(), + CompatV4ToV5::Compat(compat) => compat.version(), + } + } + + pub fn date(&self) -> Option { + match self { + CompatV4ToV5::V4(v4) => v4.date(), + CompatV4ToV5::Compat(compat) => compat.date(), + } + } + + pub fn instance_uid(&self) -> Result> { + match self { + CompatV4ToV5::V4(v4) => v4.instance_uid(), + CompatV4ToV5::Compat(compat) => compat.instance_uid(), + } + } + + pub fn indexes(&self) -> Result> + '_>> { + Ok(match self { + CompatV4ToV5::V4(v4) => { + Box::new(v4.indexes()?.map(|index| index.map(CompatIndexV4ToV5::from))) + as Box> + '_> + } + + CompatV4ToV5::Compat(compat) => { + Box::new(compat.indexes()?.map(|index| index.map(CompatIndexV4ToV5::from))) + as Box> + '_> + } + }) + } + + pub fn tasks( + &mut self, + ) -> Box>)>> + '_> + { + let tasks = match self { + CompatV4ToV5::V4(v4) => v4.tasks(), + CompatV4ToV5::Compat(compat) => compat.tasks(), + }; + Box::new(tasks.map(|task| { + task.map(|(task, content_file)| { + let task = v5::Task { + id: task.id, + content: match task.content { + v4::tasks::TaskContent::DocumentAddition { + content_uuid, + merge_strategy, + primary_key, + documents_count, + allow_index_creation, + } => v5::tasks::TaskContent::DocumentAddition { + index_uid: v5::meta::IndexUid(task.index_uid.0), + content_uuid, + merge_strategy: match merge_strategy { + v4::tasks::IndexDocumentsMethod::ReplaceDocuments => { + v5::tasks::IndexDocumentsMethod::ReplaceDocuments + } + v4::tasks::IndexDocumentsMethod::UpdateDocuments => { + v5::tasks::IndexDocumentsMethod::UpdateDocuments + } + }, + primary_key, + documents_count, + allow_index_creation, + }, + v4::tasks::TaskContent::DocumentDeletion(deletion) => { + v5::tasks::TaskContent::DocumentDeletion { + index_uid: v5::meta::IndexUid(task.index_uid.0), + deletion: match deletion { + v4::tasks::DocumentDeletion::Clear => { + v5::tasks::DocumentDeletion::Clear + } + v4::tasks::DocumentDeletion::Ids(ids) => { + v5::tasks::DocumentDeletion::Ids(ids) + } + }, + } + } + v4::tasks::TaskContent::SettingsUpdate { + settings, + is_deletion, + allow_index_creation, + } => v5::tasks::TaskContent::SettingsUpdate { + index_uid: v5::meta::IndexUid(task.index_uid.0), + settings: settings.into(), + is_deletion, + allow_index_creation, + }, + v4::tasks::TaskContent::IndexDeletion => { + v5::tasks::TaskContent::IndexDeletion { + index_uid: v5::meta::IndexUid(task.index_uid.0), + } + } + v4::tasks::TaskContent::IndexCreation { primary_key } => { + v5::tasks::TaskContent::IndexCreation { + index_uid: v5::meta::IndexUid(task.index_uid.0), + primary_key, + } + } + v4::tasks::TaskContent::IndexUpdate { primary_key } => { + v5::tasks::TaskContent::IndexUpdate { + index_uid: v5::meta::IndexUid(task.index_uid.0), + primary_key, + } + } + }, + events: task + .events + .into_iter() + .map(|event| match event { + v4::tasks::TaskEvent::Created(date) => { + v5::tasks::TaskEvent::Created(date) + } + v4::tasks::TaskEvent::Batched { timestamp, batch_id } => { + v5::tasks::TaskEvent::Batched { timestamp, batch_id } + } + v4::tasks::TaskEvent::Processing(date) => { + v5::tasks::TaskEvent::Processing(date) + } + v4::tasks::TaskEvent::Succeded { result, timestamp } => { + v5::tasks::TaskEvent::Succeeded { + result: match result { + v4::tasks::TaskResult::DocumentAddition { + indexed_documents, + } => v5::tasks::TaskResult::DocumentAddition { + indexed_documents, + }, + v4::tasks::TaskResult::DocumentDeletion { + deleted_documents, + } => v5::tasks::TaskResult::DocumentDeletion { + deleted_documents, + }, + v4::tasks::TaskResult::ClearAll { deleted_documents } => { + v5::tasks::TaskResult::ClearAll { deleted_documents } + } + v4::tasks::TaskResult::Other => { + v5::tasks::TaskResult::Other + } + }, + timestamp, + } + } + v4::tasks::TaskEvent::Failed { error, timestamp } => { + v5::tasks::TaskEvent::Failed { + error: v5::ResponseError::from(error), + timestamp, + } + } + }) + .collect(), + }; + + (task, content_file) + }) + })) + } + + pub fn keys(&mut self) -> Box> + '_> { + let keys = match self { + CompatV4ToV5::V4(v4) => v4.keys(), + CompatV4ToV5::Compat(compat) => compat.keys(), + }; + Box::new(keys.map(|key| { + key.map(|key| v5::Key { + description: key.description, + name: None, + uid: v5::keys::KeyId::new_v4(), + actions: key.actions.into_iter().filter_map(|action| action.into()).collect(), + indexes: key + .indexes + .into_iter() + .map(|index| match index.as_str() { + "*" => v5::StarOr::Star, + _ => v5::StarOr::Other(v5::meta::IndexUid(index)), + }) + .collect(), + expires_at: key.expires_at, + created_at: key.created_at, + updated_at: key.updated_at, + }) + })) + } +} + +pub enum CompatIndexV4ToV5 { + V4(v4::V4IndexReader), + Compat(CompatIndexV3ToV4), +} + +impl From for CompatIndexV4ToV5 { + fn from(index_reader: v4::V4IndexReader) -> Self { + Self::V4(index_reader) + } +} + +impl From for CompatIndexV4ToV5 { + fn from(index_reader: CompatIndexV3ToV4) -> Self { + Self::Compat(index_reader) + } +} + +impl CompatIndexV4ToV5 { + pub fn metadata(&self) -> &crate::IndexMetadata { + match self { + CompatIndexV4ToV5::V4(v4) => v4.metadata(), + CompatIndexV4ToV5::Compat(compat) => compat.metadata(), + } + } + + pub fn documents(&mut self) -> Result> + '_>> { + match self { + CompatIndexV4ToV5::V4(v4) => v4 + .documents() + .map(|iter| Box::new(iter) as Box> + '_>), + CompatIndexV4ToV5::Compat(compat) => compat + .documents() + .map(|iter| Box::new(iter) as Box> + '_>), + } + } + + pub fn settings(&mut self) -> Result> { + match self { + CompatIndexV4ToV5::V4(v4) => Ok(v5::Settings::from(v4.settings()?).check()), + CompatIndexV4ToV5::Compat(compat) => Ok(v5::Settings::from(compat.settings()?).check()), + } + } +} + +impl From> for v5::Setting { + fn from(setting: v4::Setting) -> Self { + match setting { + v4::Setting::Set(t) => v5::Setting::Set(t), + v4::Setting::Reset => v5::Setting::Reset, + v4::Setting::NotSet => v5::Setting::NotSet, + } + } +} + +impl From for v5::ResponseError { + fn from(error: v4::ResponseError) -> Self { + let code = match error.error_code.as_ref() { + "index_creation_failed" => v5::Code::CreateIndex, + "index_already_exists" => v5::Code::IndexAlreadyExists, + "index_not_found" => v5::Code::IndexNotFound, + "invalid_index_uid" => v5::Code::InvalidIndexUid, + "invalid_min_word_length_for_typo" => v5::Code::InvalidMinWordLengthForTypo, + "invalid_state" => v5::Code::InvalidState, + "primary_key_inference_failed" => v5::Code::MissingPrimaryKey, + "index_primary_key_already_exists" => v5::Code::PrimaryKeyAlreadyPresent, + "max_fields_limit_exceeded" => v5::Code::MaxFieldsLimitExceeded, + "missing_document_id" => v5::Code::MissingDocumentId, + "invalid_document_id" => v5::Code::InvalidDocumentId, + "invalid_filter" => v5::Code::Filter, + "invalid_sort" => v5::Code::Sort, + "bad_parameter" => v5::Code::BadParameter, + "bad_request" => v5::Code::BadRequest, + "database_size_limit_reached" => v5::Code::DatabaseSizeLimitReached, + "document_not_found" => v5::Code::DocumentNotFound, + "internal" => v5::Code::Internal, + "invalid_geo_field" => v5::Code::InvalidGeoField, + "invalid_ranking_rule" => v5::Code::InvalidRankingRule, + "invalid_store_file" => v5::Code::InvalidStore, + "invalid_api_key" => v5::Code::InvalidToken, + "missing_authorization_header" => v5::Code::MissingAuthorizationHeader, + "no_space_left_on_device" => v5::Code::NoSpaceLeftOnDevice, + "dump_not_found" => v5::Code::DumpNotFound, + "task_not_found" => v5::Code::TaskNotFound, + "payload_too_large" => v5::Code::PayloadTooLarge, + "unretrievable_document" => v5::Code::RetrieveDocument, + "search_error" => v5::Code::SearchDocuments, + "unsupported_media_type" => v5::Code::UnsupportedMediaType, + "dump_already_processing" => v5::Code::DumpAlreadyInProgress, + "dump_process_failed" => v5::Code::DumpProcessFailed, + "invalid_content_type" => v5::Code::InvalidContentType, + "missing_content_type" => v5::Code::MissingContentType, + "malformed_payload" => v5::Code::MalformedPayload, + "missing_payload" => v5::Code::MissingPayload, + "api_key_not_found" => v5::Code::ApiKeyNotFound, + "missing_parameter" => v5::Code::MissingParameter, + "invalid_api_key_actions" => v5::Code::InvalidApiKeyActions, + "invalid_api_key_indexes" => v5::Code::InvalidApiKeyIndexes, + "invalid_api_key_expires_at" => v5::Code::InvalidApiKeyExpiresAt, + "invalid_api_key_description" => v5::Code::InvalidApiKeyDescription, + other => { + log::warn!("Unknown error code {}", other); + v5::Code::UnretrievableErrorCode + } + }; + v5::ResponseError::from_msg(error.message, code) + } +} + +impl From> for v5::Settings { + fn from(settings: v4::Settings) -> Self { + v5::Settings { + displayed_attributes: settings.displayed_attributes.into(), + searchable_attributes: settings.searchable_attributes.into(), + filterable_attributes: settings.filterable_attributes.into(), + sortable_attributes: settings.sortable_attributes.into(), + ranking_rules: settings.ranking_rules.into(), + stop_words: settings.stop_words.into(), + synonyms: settings.synonyms.into(), + distinct_attribute: settings.distinct_attribute.into(), + typo_tolerance: match settings.typo_tolerance { + v4::Setting::Set(typo) => v5::Setting::Set(v5::TypoTolerance { + enabled: typo.enabled.into(), + min_word_size_for_typos: match typo.min_word_size_for_typos { + v4::Setting::Set(t) => v5::Setting::Set(v5::MinWordSizeForTypos { + one_typo: t.one_typo.into(), + two_typos: t.two_typos.into(), + }), + v4::Setting::Reset => v5::Setting::Reset, + v4::Setting::NotSet => v5::Setting::NotSet, + }, + disable_on_words: typo.disable_on_words.into(), + disable_on_attributes: typo.disable_on_attributes.into(), + }), + v4::Setting::Reset => v5::Setting::Reset, + v4::Setting::NotSet => v5::Setting::NotSet, + }, + faceting: v5::Setting::NotSet, + pagination: v5::Setting::NotSet, + _kind: std::marker::PhantomData, + } + } +} + +impl From for Option { + fn from(key: v4::Action) -> Self { + match key { + v4::Action::All => Some(v5::Action::All), + v4::Action::Search => Some(v5::Action::Search), + v4::Action::DocumentsAdd => Some(v5::Action::DocumentsAdd), + v4::Action::DocumentsGet => Some(v5::Action::DocumentsGet), + v4::Action::DocumentsDelete => Some(v5::Action::DocumentsDelete), + v4::Action::IndexesAdd => Some(v5::Action::IndexesAdd), + v4::Action::IndexesGet => Some(v5::Action::IndexesGet), + v4::Action::IndexesUpdate => Some(v5::Action::IndexesUpdate), + v4::Action::IndexesDelete => Some(v5::Action::IndexesDelete), + v4::Action::TasksGet => Some(v5::Action::TasksGet), + v4::Action::SettingsGet => Some(v5::Action::SettingsGet), + v4::Action::SettingsUpdate => Some(v5::Action::SettingsUpdate), + v4::Action::StatsGet => Some(v5::Action::StatsGet), + v4::Action::DumpsCreate => Some(v5::Action::DumpsCreate), + v4::Action::DumpsGet => None, + v4::Action::Version => Some(v5::Action::Version), + } + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + use std::io::BufReader; + + use flate2::bufread::GzDecoder; + use meili_snap::insta; + use tempfile::TempDir; + + use super::*; + + #[test] + #[ignore] + fn compat_v4_v5() { + let dump = File::open("tests/assets/v4.dump").unwrap(); + let dir = TempDir::new().unwrap(); + let mut dump = BufReader::new(dump); + let gz = GzDecoder::new(&mut dump); + let mut archive = tar::Archive::new(gz); + archive.unpack(dir.path()).unwrap(); + + let mut dump = v4::V4Reader::open(dir).unwrap().to_v5(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00"); + insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); + + // tasks + let tasks = dump.tasks().collect::>>().unwrap(); + let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"ed9a30cded4c046ef46f7cff7450347e"); + assert_eq!(update_files.len(), 10); + assert!(update_files[0].is_some()); // the enqueued document addition + assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed + + // keys + let keys = dump.keys().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(keys, { "[].uid" => "[uuid]" }), @"1384361d734fd77c23804c9696228660"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"26947283836ee4cdf0974f82efcc5332"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"156871410d17e23803d0c90ddc6a66cb"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 110); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"786022a66ecb992c8a2a60fee070a5ab"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"69c9916142612cf4a2da9b9ed9455e9e"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } +} diff --git a/dump/src/reader/compat/v5_to_v6.rs b/dump/src/reader/compat/v5_to_v6.rs new file mode 100644 index 000000000..c73fe23d0 --- /dev/null +++ b/dump/src/reader/compat/v5_to_v6.rs @@ -0,0 +1,487 @@ +use super::v4_to_v5::{CompatIndexV4ToV5, CompatV4ToV5}; +use crate::reader::{v5, v6, Document, UpdateFile}; +use crate::Result; + +pub enum CompatV5ToV6 { + V5(v5::V5Reader), + Compat(CompatV4ToV5), +} + +impl CompatV5ToV6 { + pub fn new_v5(v5: v5::V5Reader) -> CompatV5ToV6 { + CompatV5ToV6::V5(v5) + } + + pub fn version(&self) -> crate::Version { + match self { + CompatV5ToV6::V5(v5) => v5.version(), + CompatV5ToV6::Compat(compat) => compat.version(), + } + } + + pub fn date(&self) -> Option { + match self { + CompatV5ToV6::V5(v5) => v5.date(), + CompatV5ToV6::Compat(compat) => compat.date(), + } + } + + pub fn instance_uid(&self) -> Result> { + match self { + CompatV5ToV6::V5(v5) => v5.instance_uid(), + CompatV5ToV6::Compat(compat) => compat.instance_uid(), + } + } + + pub fn indexes(&self) -> Result> + '_>> { + let indexes = match self { + CompatV5ToV6::V5(v5) => { + Box::new(v5.indexes()?.map(|index| index.map(CompatIndexV5ToV6::from))) + as Box> + '_> + } + + CompatV5ToV6::Compat(compat) => { + Box::new(compat.indexes()?.map(|index| index.map(CompatIndexV5ToV6::from))) + as Box> + '_> + } + }; + Ok(indexes) + } + + pub fn tasks( + &mut self, + ) -> Result>)>> + '_>> { + let instance_uid = self.instance_uid().ok().flatten(); + let keys = self.keys()?.collect::>>()?; + + let tasks = match self { + CompatV5ToV6::V5(v5) => v5.tasks(), + CompatV5ToV6::Compat(compat) => compat.tasks(), + }; + Ok(Box::new(tasks.map(move |task| { + task.map(|(task, content_file)| { + let mut task_view: v5::tasks::TaskView = task.clone().into(); + + if task_view.status == v5::Status::Processing { + task_view.started_at = None; + } + + let task = v6::Task { + uid: task_view.uid, + index_uid: task_view.index_uid, + status: match task_view.status { + v5::Status::Enqueued => v6::Status::Enqueued, + v5::Status::Processing => v6::Status::Enqueued, + v5::Status::Succeeded => v6::Status::Succeeded, + v5::Status::Failed => v6::Status::Failed, + }, + kind: match task.content { + v5::tasks::TaskContent::IndexCreation { primary_key, .. } => { + v6::Kind::IndexCreation { primary_key } + } + v5::tasks::TaskContent::IndexUpdate { primary_key, .. } => { + v6::Kind::IndexUpdate { primary_key } + } + v5::tasks::TaskContent::IndexDeletion { .. } => v6::Kind::IndexDeletion, + v5::tasks::TaskContent::DocumentAddition { + merge_strategy, + allow_index_creation, + primary_key, + documents_count, + .. + } => v6::Kind::DocumentImport { + primary_key, + documents_count: documents_count as u64, + method: match merge_strategy { + v5::tasks::IndexDocumentsMethod::ReplaceDocuments => { + v6::milli::update::IndexDocumentsMethod::ReplaceDocuments + } + v5::tasks::IndexDocumentsMethod::UpdateDocuments => { + v6::milli::update::IndexDocumentsMethod::UpdateDocuments + } + }, + allow_index_creation, + }, + v5::tasks::TaskContent::DocumentDeletion { deletion, .. } => match deletion + { + v5::tasks::DocumentDeletion::Clear => v6::Kind::DocumentClear, + v5::tasks::DocumentDeletion::Ids(documents_ids) => { + v6::Kind::DocumentDeletion { documents_ids } + } + }, + v5::tasks::TaskContent::SettingsUpdate { + allow_index_creation, + is_deletion, + settings, + .. + } => v6::Kind::Settings { + is_deletion, + allow_index_creation, + settings: Box::new(settings.into()), + }, + v5::tasks::TaskContent::Dump { uid } => v6::Kind::DumpCreation { + dump_uid: uid, + keys: keys.clone(), + instance_uid, + }, + }, + canceled_by: None, + details: task_view.details.map(|details| match details { + v5::Details::DocumentAddition { received_documents, indexed_documents } => { + v6::Details::DocumentAdditionOrUpdate { + received_documents: received_documents as u64, + indexed_documents, + } + } + v5::Details::Settings { settings } => { + v6::Details::SettingsUpdate { settings: Box::new(settings.into()) } + } + v5::Details::IndexInfo { primary_key } => { + v6::Details::IndexInfo { primary_key } + } + v5::Details::DocumentDeletion { + received_document_ids, + deleted_documents, + } => v6::Details::DocumentDeletion { + matched_documents: received_document_ids, + deleted_documents, + }, + v5::Details::ClearAll { deleted_documents } => { + v6::Details::ClearAll { deleted_documents } + } + v5::Details::Dump { dump_uid } => v6::Details::Dump { dump_uid }, + }), + error: task_view.error.map(|e| e.into()), + enqueued_at: task_view.enqueued_at, + started_at: task_view.started_at, + finished_at: task_view.finished_at, + }; + + (task, content_file) + }) + }))) + } + + pub fn keys(&mut self) -> Result> + '_>> { + let keys = match self { + CompatV5ToV6::V5(v5) => v5.keys()?, + CompatV5ToV6::Compat(compat) => compat.keys(), + }; + + Ok(Box::new(keys.map(|key| { + key.map(|key| v6::Key { + description: key.description, + name: key.name, + uid: key.uid, + actions: key.actions.into_iter().map(|action| action.into()).collect(), + indexes: key + .indexes + .into_iter() + .map(|index| match index { + v5::StarOr::Star => v6::StarOr::Star, + v5::StarOr::Other(uid) => { + v6::StarOr::Other(v6::IndexUid::new_unchecked(uid.as_str())) + } + }) + .collect(), + expires_at: key.expires_at, + created_at: key.created_at, + updated_at: key.updated_at, + }) + }))) + } +} + +pub enum CompatIndexV5ToV6 { + V5(v5::V5IndexReader), + Compat(CompatIndexV4ToV5), +} + +impl From for CompatIndexV5ToV6 { + fn from(index_reader: v5::V5IndexReader) -> Self { + Self::V5(index_reader) + } +} + +impl From for CompatIndexV5ToV6 { + fn from(index_reader: CompatIndexV4ToV5) -> Self { + Self::Compat(index_reader) + } +} + +impl CompatIndexV5ToV6 { + pub fn new_v5(v5: v5::V5IndexReader) -> CompatIndexV5ToV6 { + CompatIndexV5ToV6::V5(v5) + } + + pub fn metadata(&self) -> &crate::IndexMetadata { + match self { + CompatIndexV5ToV6::V5(v5) => v5.metadata(), + CompatIndexV5ToV6::Compat(compat) => compat.metadata(), + } + } + + pub fn documents(&mut self) -> Result> + '_>> { + match self { + CompatIndexV5ToV6::V5(v5) => v5 + .documents() + .map(|iter| Box::new(iter) as Box> + '_>), + CompatIndexV5ToV6::Compat(compat) => compat + .documents() + .map(|iter| Box::new(iter) as Box> + '_>), + } + } + + pub fn settings(&mut self) -> Result> { + match self { + CompatIndexV5ToV6::V5(v5) => Ok(v6::Settings::from(v5.settings()?).check()), + CompatIndexV5ToV6::Compat(compat) => Ok(v6::Settings::from(compat.settings()?).check()), + } + } +} + +impl From> for v6::Setting { + fn from(setting: v5::Setting) -> Self { + match setting { + v5::Setting::Set(t) => v6::Setting::Set(t), + v5::Setting::Reset => v6::Setting::Reset, + v5::Setting::NotSet => v6::Setting::NotSet, + } + } +} + +impl From for v6::ResponseError { + fn from(error: v5::ResponseError) -> Self { + let code = match error.error_code.as_ref() { + "index_creation_failed" => v6::Code::CreateIndex, + "index_already_exists" => v6::Code::IndexAlreadyExists, + "index_not_found" => v6::Code::IndexNotFound, + "invalid_index_uid" => v6::Code::InvalidIndexUid, + "invalid_min_word_length_for_typo" => v6::Code::InvalidMinWordLengthForTypo, + "invalid_state" => v6::Code::InvalidState, + "primary_key_inference_failed" => v6::Code::MissingPrimaryKey, + "index_primary_key_already_exists" => v6::Code::PrimaryKeyAlreadyPresent, + "max_fields_limit_exceeded" => v6::Code::MaxFieldsLimitExceeded, + "missing_document_id" => v6::Code::MissingDocumentId, + "invalid_document_id" => v6::Code::InvalidDocumentId, + "invalid_filter" => v6::Code::Filter, + "invalid_sort" => v6::Code::Sort, + "bad_parameter" => v6::Code::BadParameter, + "bad_request" => v6::Code::BadRequest, + "database_size_limit_reached" => v6::Code::DatabaseSizeLimitReached, + "document_not_found" => v6::Code::DocumentNotFound, + "internal" => v6::Code::Internal, + "invalid_geo_field" => v6::Code::InvalidGeoField, + "invalid_ranking_rule" => v6::Code::InvalidRankingRule, + "invalid_store_file" => v6::Code::InvalidStore, + "invalid_api_key" => v6::Code::InvalidToken, + "missing_authorization_header" => v6::Code::MissingAuthorizationHeader, + "no_space_left_on_device" => v6::Code::NoSpaceLeftOnDevice, + "dump_not_found" => v6::Code::DumpNotFound, + "task_not_found" => v6::Code::TaskNotFound, + "payload_too_large" => v6::Code::PayloadTooLarge, + "unretrievable_document" => v6::Code::RetrieveDocument, + "search_error" => v6::Code::SearchDocuments, + "unsupported_media_type" => v6::Code::UnsupportedMediaType, + "dump_already_processing" => v6::Code::DumpAlreadyInProgress, + "dump_process_failed" => v6::Code::DumpProcessFailed, + "invalid_content_type" => v6::Code::InvalidContentType, + "missing_content_type" => v6::Code::MissingContentType, + "malformed_payload" => v6::Code::MalformedPayload, + "missing_payload" => v6::Code::MissingPayload, + "api_key_not_found" => v6::Code::ApiKeyNotFound, + "missing_parameter" => v6::Code::MissingParameter, + "invalid_api_key_actions" => v6::Code::InvalidApiKeyActions, + "invalid_api_key_indexes" => v6::Code::InvalidApiKeyIndexes, + "invalid_api_key_expires_at" => v6::Code::InvalidApiKeyExpiresAt, + "invalid_api_key_description" => v6::Code::InvalidApiKeyDescription, + "invalid_api_key_name" => v6::Code::InvalidApiKeyName, + "invalid_api_key_uid" => v6::Code::InvalidApiKeyUid, + "immutable_field" => v6::Code::ImmutableField, + "api_key_already_exists" => v6::Code::ApiKeyAlreadyExists, + other => { + log::warn!("Unknown error code {}", other); + v6::Code::UnretrievableErrorCode + } + }; + v6::ResponseError::from_msg(error.message, code) + } +} + +impl From> for v6::Settings { + fn from(settings: v5::Settings) -> Self { + v6::Settings { + displayed_attributes: settings.displayed_attributes.into(), + searchable_attributes: settings.searchable_attributes.into(), + filterable_attributes: settings.filterable_attributes.into(), + sortable_attributes: settings.sortable_attributes.into(), + ranking_rules: settings.ranking_rules.into(), + stop_words: settings.stop_words.into(), + synonyms: settings.synonyms.into(), + distinct_attribute: settings.distinct_attribute.into(), + typo_tolerance: match settings.typo_tolerance { + v5::Setting::Set(typo) => v6::Setting::Set(v6::TypoTolerance { + enabled: typo.enabled.into(), + min_word_size_for_typos: match typo.min_word_size_for_typos { + v5::Setting::Set(t) => v6::Setting::Set(v6::MinWordSizeForTypos { + one_typo: t.one_typo.into(), + two_typos: t.two_typos.into(), + }), + v5::Setting::Reset => v6::Setting::Reset, + v5::Setting::NotSet => v6::Setting::NotSet, + }, + disable_on_words: typo.disable_on_words.into(), + disable_on_attributes: typo.disable_on_attributes.into(), + }), + v5::Setting::Reset => v6::Setting::Reset, + v5::Setting::NotSet => v6::Setting::NotSet, + }, + faceting: match settings.faceting { + v5::Setting::Set(faceting) => v6::Setting::Set(v6::FacetingSettings { + max_values_per_facet: faceting.max_values_per_facet.into(), + }), + v5::Setting::Reset => v6::Setting::Reset, + v5::Setting::NotSet => v6::Setting::NotSet, + }, + pagination: match settings.pagination { + v5::Setting::Set(pagination) => v6::Setting::Set(v6::PaginationSettings { + max_total_hits: pagination.max_total_hits.into(), + }), + v5::Setting::Reset => v6::Setting::Reset, + v5::Setting::NotSet => v6::Setting::NotSet, + }, + _kind: std::marker::PhantomData, + } + } +} + +impl From for v6::Action { + fn from(key: v5::Action) -> Self { + match key { + v5::Action::All => v6::Action::All, + v5::Action::Search => v6::Action::Search, + v5::Action::DocumentsAll => v6::Action::DocumentsAll, + v5::Action::DocumentsAdd => v6::Action::DocumentsAdd, + v5::Action::DocumentsGet => v6::Action::DocumentsGet, + v5::Action::DocumentsDelete => v6::Action::DocumentsDelete, + v5::Action::IndexesAll => v6::Action::IndexesAll, + v5::Action::IndexesAdd => v6::Action::IndexesAdd, + v5::Action::IndexesGet => v6::Action::IndexesGet, + v5::Action::IndexesUpdate => v6::Action::IndexesUpdate, + v5::Action::IndexesDelete => v6::Action::IndexesDelete, + v5::Action::TasksAll => v6::Action::TasksAll, + v5::Action::TasksGet => v6::Action::TasksGet, + v5::Action::SettingsAll => v6::Action::SettingsAll, + v5::Action::SettingsGet => v6::Action::SettingsGet, + v5::Action::SettingsUpdate => v6::Action::SettingsUpdate, + v5::Action::StatsAll => v6::Action::StatsAll, + v5::Action::StatsGet => v6::Action::StatsGet, + v5::Action::MetricsAll => v6::Action::MetricsAll, + v5::Action::MetricsGet => v6::Action::MetricsGet, + v5::Action::DumpsAll => v6::Action::DumpsAll, + v5::Action::DumpsCreate => v6::Action::DumpsCreate, + v5::Action::Version => v6::Action::Version, + v5::Action::KeysAdd => v6::Action::KeysAdd, + v5::Action::KeysGet => v6::Action::KeysGet, + v5::Action::KeysUpdate => v6::Action::KeysUpdate, + v5::Action::KeysDelete => v6::Action::KeysDelete, + } + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + use std::io::BufReader; + + use flate2::bufread::GzDecoder; + use meili_snap::insta; + use tempfile::TempDir; + + use super::*; + + #[test] + #[ignore] + fn compat_v5_v6() { + let dump = File::open("tests/assets/v5.dump").unwrap(); + let dir = TempDir::new().unwrap(); + let mut dump = BufReader::new(dump); + let gz = GzDecoder::new(&mut dump); + let mut archive = tar::Archive::new(gz); + archive.unpack(dir.path()).unwrap(); + + let mut dump = v5::V5Reader::open(dir).unwrap().to_v6(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00"); + insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); + + // tasks + let tasks = dump.tasks().unwrap().collect::>>().unwrap(); + let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"42d4200cf6d92a6449989ca48cd8e28a"); + assert_eq!(update_files.len(), 22); + assert!(update_files[0].is_none()); // the dump creation + assert!(update_files[1].is_some()); // the enqueued document addition + assert!(update_files[2..].iter().all(|u| u.is_none())); // everything already processed + + // keys + let keys = dump.keys().unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(keys), @"c9d2b467fe2fca0b35580d8a999808fb"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"8e5cadabf74aebe1160bf51c3d489efe"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"4894ac1e74b9e1069ed5ee262b7a1aca"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 200); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"e962baafd2fbae4cdd14e876053b0c5a"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"054dbf08a79e08bb9becba6f5d090f13"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } +} diff --git a/meilisearch-lib/src/dump/error.rs b/dump/src/reader/error.rs similarity index 100% rename from meilisearch-lib/src/dump/error.rs rename to dump/src/reader/error.rs diff --git a/dump/src/reader/mod.rs b/dump/src/reader/mod.rs new file mode 100644 index 000000000..9d0f6ae88 --- /dev/null +++ b/dump/src/reader/mod.rs @@ -0,0 +1,535 @@ +use std::fs::File; +use std::io::{BufReader, Read}; + +use flate2::bufread::GzDecoder; +use serde::Deserialize; +use tempfile::TempDir; + +use self::compat::v4_to_v5::CompatV4ToV5; +use self::compat::v5_to_v6::{CompatIndexV5ToV6, CompatV5ToV6}; +use self::v5::V5Reader; +use self::v6::{V6IndexReader, V6Reader}; +use crate::{Error, Result, Version}; + +mod compat; + +// pub(self) mod v1; +pub(self) mod v2; +pub(self) mod v3; +pub(self) mod v4; +pub(self) mod v5; +pub(self) mod v6; + +pub type Document = serde_json::Map; +pub type UpdateFile = dyn Iterator>; + +pub enum DumpReader { + Current(V6Reader), + Compat(CompatV5ToV6), +} + +impl DumpReader { + pub fn open(dump: impl Read) -> Result { + let path = TempDir::new()?; + let mut dump = BufReader::new(dump); + let gz = GzDecoder::new(&mut dump); + let mut archive = tar::Archive::new(gz); + archive.unpack(path.path())?; + + #[derive(Deserialize)] + #[serde(rename_all = "camelCase")] + struct MetadataVersion { + pub dump_version: Version, + } + let mut meta_file = File::open(path.path().join("metadata.json"))?; + let MetadataVersion { dump_version } = serde_json::from_reader(&mut meta_file)?; + + match dump_version { + // Version::V1 => Ok(Box::new(v1::Reader::open(path)?)), + Version::V1 => Err(Error::DumpV1Unsupported), + Version::V2 => Ok(v2::V2Reader::open(path)?.to_v3().to_v4().to_v5().to_v6().into()), + Version::V3 => Ok(v3::V3Reader::open(path)?.to_v4().to_v5().to_v6().into()), + Version::V4 => Ok(v4::V4Reader::open(path)?.to_v5().to_v6().into()), + Version::V5 => Ok(v5::V5Reader::open(path)?.to_v6().into()), + Version::V6 => Ok(v6::V6Reader::open(path)?.into()), + } + } + + pub fn version(&self) -> crate::Version { + match self { + DumpReader::Current(current) => current.version(), + DumpReader::Compat(compat) => compat.version(), + } + } + + pub fn date(&self) -> Option { + match self { + DumpReader::Current(current) => current.date(), + DumpReader::Compat(compat) => compat.date(), + } + } + + pub fn instance_uid(&self) -> Result> { + match self { + DumpReader::Current(current) => current.instance_uid(), + DumpReader::Compat(compat) => compat.instance_uid(), + } + } + + pub fn indexes(&self) -> Result> + '_>> { + match self { + DumpReader::Current(current) => { + let indexes = Box::new(current.indexes()?.map(|res| res.map(DumpIndexReader::from))) + as Box> + '_>; + Ok(indexes) + } + DumpReader::Compat(compat) => { + let indexes = Box::new(compat.indexes()?.map(|res| res.map(DumpIndexReader::from))) + as Box> + '_>; + Ok(indexes) + } + } + } + + pub fn tasks( + &mut self, + ) -> Result>)>> + '_>> { + match self { + DumpReader::Current(current) => Ok(current.tasks()), + DumpReader::Compat(compat) => compat.tasks(), + } + } + + pub fn keys(&mut self) -> Result> + '_>> { + match self { + DumpReader::Current(current) => Ok(current.keys()), + DumpReader::Compat(compat) => compat.keys(), + } + } +} + +impl From for DumpReader { + fn from(value: V6Reader) -> Self { + DumpReader::Current(value) + } +} + +impl From for DumpReader { + fn from(value: CompatV5ToV6) -> Self { + DumpReader::Compat(value) + } +} + +impl From for DumpReader { + fn from(value: V5Reader) -> Self { + DumpReader::Compat(value.to_v6()) + } +} + +impl From for DumpReader { + fn from(value: CompatV4ToV5) -> Self { + DumpReader::Compat(value.to_v6()) + } +} + +pub enum DumpIndexReader { + Current(v6::V6IndexReader), + Compat(Box), +} + +impl DumpIndexReader { + pub fn new_v6(v6: v6::V6IndexReader) -> DumpIndexReader { + DumpIndexReader::Current(v6) + } + + pub fn metadata(&self) -> &crate::IndexMetadata { + match self { + DumpIndexReader::Current(v6) => v6.metadata(), + DumpIndexReader::Compat(compat) => compat.metadata(), + } + } + + pub fn documents(&mut self) -> Result> + '_>> { + match self { + DumpIndexReader::Current(v6) => v6 + .documents() + .map(|iter| Box::new(iter) as Box> + '_>), + DumpIndexReader::Compat(compat) => compat + .documents() + .map(|iter| Box::new(iter) as Box> + '_>), + } + } + + pub fn settings(&mut self) -> Result> { + match self { + DumpIndexReader::Current(v6) => v6.settings(), + DumpIndexReader::Compat(compat) => compat.settings(), + } + } +} + +impl From for DumpIndexReader { + fn from(value: V6IndexReader) -> Self { + DumpIndexReader::Current(value) + } +} + +impl From for DumpIndexReader { + fn from(value: CompatIndexV5ToV6) -> Self { + DumpIndexReader::Compat(Box::new(value)) + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + + use meili_snap::insta; + + use super::*; + + #[test] + #[ignore] + fn import_dump_v5() { + let dump = File::open("tests/assets/v5.dump").unwrap(); + let mut dump = DumpReader::open(dump).unwrap(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00"); + insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); + + // tasks + let tasks = dump.tasks().unwrap().collect::>>().unwrap(); + let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"42d4200cf6d92a6449989ca48cd8e28a"); + assert_eq!(update_files.len(), 22); + assert!(update_files[0].is_none()); // the dump creation + assert!(update_files[1].is_some()); // the enqueued document addition + assert!(update_files[2..].iter().all(|u| u.is_none())); // everything already processed + + // keys + let keys = dump.keys().unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(keys), @"c9d2b467fe2fca0b35580d8a999808fb"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"8e5cadabf74aebe1160bf51c3d489efe"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"4894ac1e74b9e1069ed5ee262b7a1aca"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 200); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"e962baafd2fbae4cdd14e876053b0c5a"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"054dbf08a79e08bb9becba6f5d090f13"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } + + #[test] + #[ignore] + fn import_dump_v4() { + let dump = File::open("tests/assets/v4.dump").unwrap(); + let mut dump = DumpReader::open(dump).unwrap(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00"); + insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); + + // tasks + let tasks = dump.tasks().unwrap().collect::>>().unwrap(); + let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"491e244a80a19fe2a900b809d310c24a"); + assert_eq!(update_files.len(), 10); + assert!(update_files[0].is_some()); // the enqueued document addition + assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed + + // keys + let keys = dump.keys().unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(keys, { "[].uid" => "[uuid]" }), @"d751713988987e9331980363e24189ce"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"1f9da51a4518166fb440def5437eafdb"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"488816aba82c1bd65f1609630055c611"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 110); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"786022a66ecb992c8a2a60fee070a5ab"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"7b4f66dad597dc651650f35fe34be27f"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } + + #[test] + #[ignore] + fn import_dump_v3() { + let dump = File::open("tests/assets/v3.dump").unwrap(); + let mut dump = DumpReader::open(dump).unwrap(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00"); + assert_eq!(dump.instance_uid().unwrap(), None); + + // tasks + let tasks = dump.tasks().unwrap().collect::>>().unwrap(); + let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"7cacce2e21702be696b866808c726946"); + assert_eq!(update_files.len(), 10); + assert!(update_files[0].is_some()); // the enqueued document addition + assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed + + // keys + let keys = dump.keys().unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(keys), @"d751713988987e9331980363e24189ce"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies2 = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"855f3165dec609b919171ff83f82b364"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"43e0bf1746c3ea1d64c1e10ea544c190"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 110); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022"); + + // movies2 + insta::assert_json_snapshot!(movies2.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies_2", + "primaryKey": null, + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies2.settings()), @"5fd06a5038f49311600379d43412b655"); + let documents = movies2.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 0); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"5fd06a5038f49311600379d43412b655"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } + + #[test] + #[ignore] + fn import_dump_v2() { + let dump = File::open("tests/assets/v2.dump").unwrap(); + let mut dump = DumpReader::open(dump).unwrap(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00"); + assert_eq!(dump.instance_uid().unwrap(), None); + + // tasks + let tasks = dump.tasks().unwrap().collect::>>().unwrap(); + let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"6cabec4e252b74c8f3a2c8517622e85f"); + assert_eq!(update_files.len(), 9); + assert!(update_files[0].is_some()); // the enqueued document addition + assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed + + // keys + let keys = dump.keys().unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(keys), @"d751713988987e9331980363e24189ce"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies2 = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"b15b71f56dd082d8e8ec5182e688bf36"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"5389153ddf5527fa79c54b6a6e9c21f6"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 110); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022"); + + // movies2 + insta::assert_json_snapshot!(movies2.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies_2", + "primaryKey": null, + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies2.settings()), @"8aebab01301d266acf3e18dd449c008f"); + let documents = movies2.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 0); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"8aebab01301d266acf3e18dd449c008f"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } +} diff --git a/dump/src/reader/v1/mod.rs b/dump/src/reader/v1/mod.rs new file mode 100644 index 000000000..f638262cc --- /dev/null +++ b/dump/src/reader/v1/mod.rs @@ -0,0 +1,173 @@ +use std::{ + convert::Infallible, + fs::{self, File}, + io::{BufRead, BufReader}, + path::Path, +}; + +use tempfile::TempDir; +use time::OffsetDateTime; + +use self::update::UpdateStatus; + +use super::{DumpReader, IndexReader}; +use crate::{Error, Result, Version}; + +pub mod settings; +pub mod update; +pub mod v1; + +pub struct V1Reader { + dump: TempDir, + metadata: v1::Metadata, + indexes: Vec, +} + +struct V1IndexReader { + name: String, + documents: BufReader, + settings: BufReader, + updates: BufReader, + + current_update: Option, +} + +impl V1IndexReader { + pub fn new(name: String, path: &Path) -> Result { + let mut ret = V1IndexReader { + name, + documents: BufReader::new(File::open(path.join("documents.jsonl"))?), + settings: BufReader::new(File::open(path.join("settings.json"))?), + updates: BufReader::new(File::open(path.join("updates.jsonl"))?), + current_update: None, + }; + ret.next_update(); + + Ok(ret) + } + + pub fn next_update(&mut self) -> Result> { + let current_update = if let Some(line) = self.updates.lines().next() { + Some(serde_json::from_str(&line?)?) + } else { + None + }; + + Ok(std::mem::replace(&mut self.current_update, current_update)) + } +} + +impl V1Reader { + pub fn open(dump: TempDir) -> Result { + let mut meta_file = fs::read(dump.path().join("metadata.json"))?; + let metadata = serde_json::from_reader(&*meta_file)?; + + let mut indexes = Vec::new(); + + let entries = fs::read_dir(dump.path())?; + for entry in entries { + let entry = entry?; + if entry.file_type()?.is_dir() { + indexes.push(V1IndexReader::new( + entry + .file_name() + .to_str() + .ok_or(Error::BadIndexName)? + .to_string(), + &entry.path(), + )?); + } + } + + Ok(V1Reader { + dump, + metadata, + indexes, + }) + } + + fn next_update(&mut self) -> Result> { + if let Some((idx, _)) = self + .indexes + .iter() + .map(|index| index.current_update) + .enumerate() + .filter_map(|(idx, update)| update.map(|u| (idx, u))) + .min_by_key(|(_, update)| update.enqueued_at()) + { + self.indexes[idx].next_update() + } else { + Ok(None) + } + } +} + +impl IndexReader for &V1IndexReader { + type Document = serde_json::Map; + type Settings = settings::Settings; + + fn name(&self) -> &str { + todo!() + } + + fn documents(&self) -> Result>>> { + todo!() + } + + fn settings(&self) -> Result { + todo!() + } +} + +impl DumpReader for V1Reader { + type Document = serde_json::Map; + type Settings = settings::Settings; + + type Task = update::UpdateStatus; + type UpdateFile = Infallible; + + type Key = Infallible; + + fn date(&self) -> Option { + None + } + + fn version(&self) -> Version { + Version::V1 + } + + fn indexes( + &self, + ) -> Result< + Box< + dyn Iterator< + Item = Result< + Box< + dyn super::IndexReader< + Document = Self::Document, + Settings = Self::Settings, + >, + >, + >, + >, + >, + > { + Ok(Box::new(self.indexes.iter().map(|index| { + let index = Box::new(index) + as Box>; + Ok(index) + }))) + } + + fn tasks(&self) -> Box)>>> { + Box::new(std::iter::from_fn(|| { + self.next_update() + .transpose() + .map(|result| result.map(|task| (task, None))) + })) + } + + fn keys(&self) -> Box>> { + Box::new(std::iter::empty()) + } +} diff --git a/dump/src/reader/v1/settings.rs b/dump/src/reader/v1/settings.rs new file mode 100644 index 000000000..0065d3f97 --- /dev/null +++ b/dump/src/reader/v1/settings.rs @@ -0,0 +1,63 @@ +use std::collections::{BTreeMap, BTreeSet}; +use std::result::Result as StdResult; + +use serde::{Deserialize, Deserializer, Serialize}; + +#[derive(Default, Clone, Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase", deny_unknown_fields)] +pub struct Settings { + #[serde(default, deserialize_with = "deserialize_some")] + pub ranking_rules: Option>>, + #[serde(default, deserialize_with = "deserialize_some")] + pub distinct_attribute: Option>, + #[serde(default, deserialize_with = "deserialize_some")] + pub searchable_attributes: Option>>, + #[serde(default, deserialize_with = "deserialize_some")] + pub displayed_attributes: Option>>, + #[serde(default, deserialize_with = "deserialize_some")] + pub stop_words: Option>>, + #[serde(default, deserialize_with = "deserialize_some")] + pub synonyms: Option>>>, + #[serde(default, deserialize_with = "deserialize_some")] + pub attributes_for_faceting: Option>>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SettingsUpdate { + pub ranking_rules: UpdateState>, + pub distinct_attribute: UpdateState, + pub primary_key: UpdateState, + pub searchable_attributes: UpdateState>, + pub displayed_attributes: UpdateState>, + pub stop_words: UpdateState>, + pub synonyms: UpdateState>>, + pub attributes_for_faceting: UpdateState>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum UpdateState { + Update(T), + Clear, + Nothing, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum RankingRule { + Typo, + Words, + Proximity, + Attribute, + WordsPosition, + Exactness, + Asc(String), + Desc(String), +} + +// Any value that is present is considered Some value, including null. +fn deserialize_some<'de, T, D>(deserializer: D) -> StdResult, D::Error> +where + T: Deserialize<'de>, + D: Deserializer<'de>, +{ + Deserialize::deserialize(deserializer).map(Some) +} diff --git a/dump/src/reader/v1/update.rs b/dump/src/reader/v1/update.rs new file mode 100644 index 000000000..c9ccaf309 --- /dev/null +++ b/dump/src/reader/v1/update.rs @@ -0,0 +1,120 @@ +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use time::OffsetDateTime; + +use super::settings::SettingsUpdate; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Update { + data: UpdateData, + #[serde(with = "time::serde::rfc3339")] + enqueued_at: OffsetDateTime, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum UpdateData { + ClearAll, + Customs(Vec), + // (primary key, documents) + DocumentsAddition { + primary_key: Option, + documents: Vec>, + }, + DocumentsPartial { + primary_key: Option, + documents: Vec>, + }, + DocumentsDeletion(Vec), + Settings(Box), +} + +impl UpdateData { + pub fn update_type(&self) -> UpdateType { + match self { + UpdateData::ClearAll => UpdateType::ClearAll, + UpdateData::Customs(_) => UpdateType::Customs, + UpdateData::DocumentsAddition { documents, .. } => UpdateType::DocumentsAddition { + number: documents.len(), + }, + UpdateData::DocumentsPartial { documents, .. } => UpdateType::DocumentsPartial { + number: documents.len(), + }, + UpdateData::DocumentsDeletion(deletion) => UpdateType::DocumentsDeletion { + number: deletion.len(), + }, + UpdateData::Settings(update) => UpdateType::Settings { + settings: update.clone(), + }, + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "name")] +pub enum UpdateType { + ClearAll, + Customs, + DocumentsAddition { number: usize }, + DocumentsPartial { number: usize }, + DocumentsDeletion { number: usize }, + Settings { settings: Box }, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ProcessedUpdateResult { + pub update_id: u64, + #[serde(rename = "type")] + pub update_type: UpdateType, + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub error_type: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub error_code: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub error_link: Option, + pub duration: f64, // in seconds + #[serde(with = "time::serde::rfc3339")] + pub enqueued_at: OffsetDateTime, + #[serde(with = "time::serde::rfc3339")] + pub processed_at: OffsetDateTime, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct EnqueuedUpdateResult { + pub update_id: u64, + #[serde(rename = "type")] + pub update_type: UpdateType, + #[serde(with = "time::serde::rfc3339")] + pub enqueued_at: OffsetDateTime, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase", tag = "status")] +pub enum UpdateStatus { + Enqueued { + #[serde(flatten)] + content: EnqueuedUpdateResult, + }, + Failed { + #[serde(flatten)] + content: ProcessedUpdateResult, + }, + Processed { + #[serde(flatten)] + content: ProcessedUpdateResult, + }, +} + +impl UpdateStatus { + pub fn enqueued_at(&self) -> &OffsetDateTime { + match self { + UpdateStatus::Enqueued { content } => &content.enqueued_at, + UpdateStatus::Failed { content } | UpdateStatus::Processed { content } => { + &content.enqueued_at + } + } + } +} diff --git a/dump/src/reader/v1/v1.rs b/dump/src/reader/v1/v1.rs new file mode 100644 index 000000000..0f4312508 --- /dev/null +++ b/dump/src/reader/v1/v1.rs @@ -0,0 +1,22 @@ +use serde::Deserialize; +use time::OffsetDateTime; + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct Index { + pub name: String, + pub uid: String, + #[serde(with = "time::serde::rfc3339")] + created_at: OffsetDateTime, + #[serde(with = "time::serde::rfc3339")] + updated_at: OffsetDateTime, + pub primary_key: Option, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct Metadata { + indexes: Vec, + db_version: String, + dump_version: crate::Version, +} diff --git a/dump/src/reader/v2/errors.rs b/dump/src/reader/v2/errors.rs new file mode 100644 index 000000000..dc9740f90 --- /dev/null +++ b/dump/src/reader/v2/errors.rs @@ -0,0 +1,14 @@ +use http::StatusCode; +use serde::Deserialize; + +#[derive(Debug, Deserialize, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub struct ResponseError { + #[serde(skip)] + pub code: StatusCode, + pub message: String, + pub error_code: String, + pub error_type: String, + pub error_link: String, +} diff --git a/dump/src/reader/v2/meta.rs b/dump/src/reader/v2/meta.rs new file mode 100644 index 000000000..f83762914 --- /dev/null +++ b/dump/src/reader/v2/meta.rs @@ -0,0 +1,18 @@ +use serde::Deserialize; +use uuid::Uuid; + +use super::Settings; + +#[derive(Deserialize, Debug, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct IndexUuid { + pub uid: String, + pub uuid: Uuid, +} + +#[derive(Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct DumpMeta { + pub settings: Settings, + pub primary_key: Option, +} diff --git a/dump/src/reader/v2/mod.rs b/dump/src/reader/v2/mod.rs new file mode 100644 index 000000000..3a9e3e587 --- /dev/null +++ b/dump/src/reader/v2/mod.rs @@ -0,0 +1,310 @@ +//! ```text +//! . +//! ├── indexes +//! │   ├── index-40d14c5f-37ae-4873-9d51-b69e014a0d30 +//! │   │   ├── documents.jsonl +//! │   │   └── meta.json +//! │   ├── index-88202369-4524-4410-9b3d-3e924c867fec +//! │   │   ├── documents.jsonl +//! │   │   └── meta.json +//! │   ├── index-b7f2d03b-bf9b-40d9-a25b-94dc5ec60c32 +//! │   │   ├── documents.jsonl +//! │   │   └── meta.json +//! │   └── index-dc9070b3-572d-4f30-ab45-d4903ab71708 +//! │   ├── documents.jsonl +//! │   └── meta.json +//! ├── index_uuids +//! │   └── data.jsonl +//! ├── metadata.json +//! └── updates +//! ├── data.jsonl +//! └── update_files +//! └── update_202573df-718b-4d80-9a65-2ee397c23dc3 +//! ``` + +use std::fs::{self, File}; +use std::io::{BufRead, BufReader}; +use std::path::Path; + +use serde::{Deserialize, Serialize}; +use tempfile::TempDir; +use time::OffsetDateTime; + +pub mod errors; +pub mod meta; +pub mod settings; +pub mod updates; + +use self::meta::{DumpMeta, IndexUuid}; +use super::compat::v2_to_v3::CompatV2ToV3; +use super::Document; +use crate::{IndexMetadata, Result, Version}; + +pub type Settings = settings::Settings; +pub type Checked = settings::Checked; +pub type Unchecked = settings::Unchecked; + +pub type Task = updates::UpdateEntry; + +// everything related to the errors +pub type ResponseError = errors::ResponseError; + +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +pub struct Metadata { + db_version: String, + index_db_size: usize, + update_db_size: usize, + #[serde(with = "time::serde::rfc3339")] + dump_date: OffsetDateTime, +} + +pub struct V2Reader { + dump: TempDir, + metadata: Metadata, + tasks: BufReader, + pub index_uuid: Vec, +} + +impl V2Reader { + pub fn open(dump: TempDir) -> Result { + let meta_file = fs::read(dump.path().join("metadata.json"))?; + let metadata = serde_json::from_reader(&*meta_file)?; + let index_uuid = File::open(dump.path().join("index_uuids/data.jsonl"))?; + let index_uuid = BufReader::new(index_uuid); + let index_uuid = index_uuid + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }) + .collect::>>()?; + + Ok(V2Reader { + metadata, + tasks: BufReader::new( + File::open(dump.path().join("updates").join("data.jsonl")).unwrap(), + ), + index_uuid, + dump, + }) + } + + pub fn to_v3(self) -> CompatV2ToV3 { + CompatV2ToV3::new(self) + } + + pub fn index_uuid(&self) -> Vec { + self.index_uuid.clone() + } + + pub fn version(&self) -> Version { + Version::V2 + } + + pub fn date(&self) -> Option { + Some(self.metadata.dump_date) + } + + pub fn indexes(&self) -> Result> + '_> { + Ok(self.index_uuid.iter().map(|index| -> Result<_> { + V2IndexReader::new( + index.uid.clone(), + &self.dump.path().join("indexes").join(format!("index-{}", index.uuid)), + ) + })) + } + + pub fn tasks(&mut self) -> Box)>> + '_> { + Box::new((&mut self.tasks).lines().map(|line| -> Result<_> { + let task: Task = serde_json::from_str(&line?)?; + if !task.is_finished() { + if let Some(uuid) = task.get_content_uuid() { + let update_file_path = self + .dump + .path() + .join("updates") + .join("update_files") + .join(format!("update_{}", uuid)); + Ok((task, Some(UpdateFile::new(&update_file_path)?))) + } else { + Ok((task, None)) + } + } else { + Ok((task, None)) + } + })) + } +} + +pub struct V2IndexReader { + metadata: IndexMetadata, + settings: Settings, + + documents: BufReader, +} + +impl V2IndexReader { + pub fn new(name: String, path: &Path) -> Result { + let meta = File::open(path.join("meta.json"))?; + let meta: DumpMeta = serde_json::from_reader(meta)?; + + let metadata = IndexMetadata { + uid: name, + primary_key: meta.primary_key, + // FIXME: Iterate over the whole task queue to find the creation and last update date. + created_at: OffsetDateTime::now_utc(), + updated_at: OffsetDateTime::now_utc(), + }; + + let ret = V2IndexReader { + metadata, + settings: meta.settings.check(), + documents: BufReader::new(File::open(path.join("documents.jsonl"))?), + }; + + Ok(ret) + } + + pub fn metadata(&self) -> &IndexMetadata { + &self.metadata + } + + pub fn documents(&mut self) -> Result> + '_> { + Ok((&mut self.documents) + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) + } + + pub fn settings(&mut self) -> Result> { + Ok(self.settings.clone()) + } +} + +pub struct UpdateFile { + documents: Vec, + index: usize, +} + +impl UpdateFile { + fn new(path: &Path) -> Result { + let reader = BufReader::new(File::open(path)?); + Ok(UpdateFile { documents: serde_json::from_reader(reader)?, index: 0 }) + } +} + +impl Iterator for UpdateFile { + type Item = Result; + + fn next(&mut self) -> Option { + self.index += 1; + self.documents.get(self.index - 1).cloned().map(Ok) + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + use std::io::BufReader; + + use flate2::bufread::GzDecoder; + use meili_snap::insta; + use tempfile::TempDir; + + use super::*; + + #[test] + #[ignore] + fn read_dump_v2() { + let dump = File::open("tests/assets/v2.dump").unwrap(); + let dir = TempDir::new().unwrap(); + let mut dump = BufReader::new(dump); + let gz = GzDecoder::new(&mut dump); + let mut archive = tar::Archive::new(gz); + archive.unpack(dir.path()).unwrap(); + + let mut dump = V2Reader::open(dir).unwrap(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00"); + + // tasks + let tasks = dump.tasks().collect::>>().unwrap(); + let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"ec5fc0a14bf735ad4e361d5aa8a89ac6"); + assert_eq!(update_files.len(), 9); + assert!(update_files[0].is_some()); // the enqueued document addition + assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed + + let update_file = update_files.remove(0).unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(update_file), @"7b8889539b669c7b9ddba448bafa385d"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies2 = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"c41bf7315d404da46c99b9e3a2a3cc1e"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"3d1d96c85b6bab46e957bc8d2532a910"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 110); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022"); + + // movies2 + insta::assert_json_snapshot!(movies2.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies_2", + "primaryKey": null, + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies2.settings()), @"4f04afc086828d8da0da57a7d598ddba"); + let documents = movies2.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 0); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"4f04afc086828d8da0da57a7d598ddba"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } +} diff --git a/dump/src/reader/v2/settings.rs b/dump/src/reader/v2/settings.rs new file mode 100644 index 000000000..62e5c05f9 --- /dev/null +++ b/dump/src/reader/v2/settings.rs @@ -0,0 +1,176 @@ +use std::collections::{BTreeMap, BTreeSet}; +use std::marker::PhantomData; +use std::str::FromStr; + +use once_cell::sync::Lazy; +use regex::Regex; +use serde::{Deserialize, Deserializer}; + +#[cfg(test)] +fn serialize_with_wildcard( + field: &Option>>, + s: S, +) -> std::result::Result +where + S: serde::Serializer, +{ + let wildcard = vec!["*".to_string()]; + s.serialize_some(&field.as_ref().map(|o| o.as_ref().unwrap_or(&wildcard))) +} + +fn deserialize_some<'de, T, D>(deserializer: D) -> std::result::Result, D::Error> +where + T: Deserialize<'de>, + D: Deserializer<'de>, +{ + Deserialize::deserialize(deserializer).map(Some) +} + +#[derive(Clone, Default, Debug)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct Checked; +#[derive(Clone, Default, Debug, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct Unchecked; + +#[derive(Debug, Clone, Default, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +#[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: Deserialize<'static>"))] +pub struct Settings { + #[serde( + default, + deserialize_with = "deserialize_some", + serialize_with = "serialize_with_wildcard", + skip_serializing_if = "Option::is_none" + )] + pub displayed_attributes: Option>>, + + #[serde( + default, + deserialize_with = "deserialize_some", + serialize_with = "serialize_with_wildcard", + skip_serializing_if = "Option::is_none" + )] + pub searchable_attributes: Option>>, + + #[serde( + default, + deserialize_with = "deserialize_some", + skip_serializing_if = "Option::is_none" + )] + pub filterable_attributes: Option>>, + + #[serde( + default, + deserialize_with = "deserialize_some", + skip_serializing_if = "Option::is_none" + )] + pub ranking_rules: Option>>, + #[serde( + default, + deserialize_with = "deserialize_some", + skip_serializing_if = "Option::is_none" + )] + pub stop_words: Option>>, + #[serde( + default, + deserialize_with = "deserialize_some", + skip_serializing_if = "Option::is_none" + )] + pub synonyms: Option>>>, + #[serde( + default, + deserialize_with = "deserialize_some", + skip_serializing_if = "Option::is_none" + )] + pub distinct_attribute: Option>, + + #[serde(skip)] + pub _kind: PhantomData, +} + +impl Settings { + pub fn check(mut self) -> Settings { + let displayed_attributes = match self.displayed_attributes.take() { + Some(Some(fields)) => { + if fields.iter().any(|f| f == "*") { + Some(None) + } else { + Some(Some(fields)) + } + } + otherwise => otherwise, + }; + + let searchable_attributes = match self.searchable_attributes.take() { + Some(Some(fields)) => { + if fields.iter().any(|f| f == "*") { + Some(None) + } else { + Some(Some(fields)) + } + } + otherwise => otherwise, + }; + + Settings { + displayed_attributes, + searchable_attributes, + filterable_attributes: self.filterable_attributes, + ranking_rules: self.ranking_rules, + stop_words: self.stop_words, + synonyms: self.synonyms, + distinct_attribute: self.distinct_attribute, + _kind: PhantomData, + } + } +} + +static ASC_DESC_REGEX: Lazy = + Lazy::new(|| Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap()); + +#[derive(Debug, Deserialize, Clone, PartialEq, Eq)] +pub enum Criterion { + /// Sorted by decreasing number of matched query terms. + /// Query words at the front of an attribute is considered better than if it was at the back. + Words, + /// Sorted by increasing number of typos. + Typo, + /// Sorted by increasing distance between matched query terms. + Proximity, + /// Documents with quey words contained in more important + /// attributes are considred better. + Attribute, + /// Sorted by the similarity of the matched words with the query words. + Exactness, + /// Sorted by the increasing value of the field specified. + Asc(String), + /// Sorted by the decreasing value of the field specified. + Desc(String), +} + +impl FromStr for Criterion { + type Err = (); + + fn from_str(txt: &str) -> Result { + match txt { + "words" => Ok(Criterion::Words), + "typo" => Ok(Criterion::Typo), + "proximity" => Ok(Criterion::Proximity), + "attribute" => Ok(Criterion::Attribute), + "exactness" => Ok(Criterion::Exactness), + text => { + let caps = ASC_DESC_REGEX.captures(text).ok_or(())?; + let order = caps.get(1).unwrap().as_str(); + let field_name = caps.get(2).unwrap().as_str(); + match order { + "asc" => Ok(Criterion::Asc(field_name.to_string())), + "desc" => Ok(Criterion::Desc(field_name.to_string())), + _text => Err(()), + } + } + } + } +} diff --git a/dump/src/reader/v2/updates.rs b/dump/src/reader/v2/updates.rs new file mode 100644 index 000000000..33d88d46f --- /dev/null +++ b/dump/src/reader/v2/updates.rs @@ -0,0 +1,230 @@ +use serde::Deserialize; +use time::OffsetDateTime; +use uuid::Uuid; + +use super::{ResponseError, Settings, Unchecked}; + +#[derive(Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct UpdateEntry { + pub uuid: Uuid, + pub update: UpdateStatus, +} + +impl UpdateEntry { + pub fn is_finished(&self) -> bool { + match self.update { + UpdateStatus::Processing(_) | UpdateStatus::Enqueued(_) => false, + UpdateStatus::Processed(_) | UpdateStatus::Aborted(_) | UpdateStatus::Failed(_) => true, + } + } + + pub fn get_content_uuid(&self) -> Option<&Uuid> { + match &self.update { + UpdateStatus::Enqueued(enqueued) => enqueued.content.as_ref(), + UpdateStatus::Processing(processing) => processing.from.content.as_ref(), + UpdateStatus::Processed(processed) => processed.from.from.content.as_ref(), + UpdateStatus::Aborted(aborted) => aborted.from.content.as_ref(), + UpdateStatus::Failed(failed) => failed.from.from.content.as_ref(), + } + } +} + +#[derive(Debug, Clone, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum UpdateResult { + DocumentsAddition(DocumentAdditionResult), + DocumentDeletion { deleted: u64 }, + Other, +} + +#[derive(Debug, Deserialize, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct DocumentAdditionResult { + pub nb_documents: usize, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[non_exhaustive] +pub enum IndexDocumentsMethod { + /// Replace the previous document with the new one, + /// removing all the already known attributes. + ReplaceDocuments, + + /// Merge the previous version of the document with the new version, + /// replacing old attributes values with the new ones and add the new attributes. + UpdateDocuments, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[non_exhaustive] +pub enum UpdateFormat { + /// The given update is a real **comma seperated** CSV with headers on the first line. + Csv, + /// The given update is a JSON array with documents inside. + Json, + /// The given update is a JSON stream with a document on each line. + JsonStream, +} + +#[allow(clippy::large_enum_variant)] +#[derive(Debug, Clone, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(tag = "type")] +pub enum UpdateMeta { + DocumentsAddition { + method: IndexDocumentsMethod, + format: UpdateFormat, + primary_key: Option, + }, + ClearDocuments, + DeleteDocuments { + ids: Vec, + }, + Settings(Settings), +} + +#[derive(Debug, Deserialize, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub struct Enqueued { + pub update_id: u64, + pub meta: UpdateMeta, + #[serde(with = "time::serde::rfc3339")] + pub enqueued_at: OffsetDateTime, + pub content: Option, +} + +impl Enqueued { + pub fn meta(&self) -> &UpdateMeta { + &self.meta + } + + pub fn id(&self) -> u64 { + self.update_id + } +} + +#[derive(Debug, Deserialize, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub struct Processed { + pub success: UpdateResult, + #[serde(with = "time::serde::rfc3339")] + pub processed_at: OffsetDateTime, + #[serde(flatten)] + pub from: Processing, +} + +impl Processed { + pub fn id(&self) -> u64 { + self.from.id() + } + + pub fn meta(&self) -> &UpdateMeta { + self.from.meta() + } +} + +#[derive(Debug, Deserialize, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub struct Processing { + #[serde(flatten)] + pub from: Enqueued, + #[serde(with = "time::serde::rfc3339")] + pub started_processing_at: OffsetDateTime, +} + +impl Processing { + pub fn id(&self) -> u64 { + self.from.id() + } + + pub fn meta(&self) -> &UpdateMeta { + self.from.meta() + } +} + +#[derive(Debug, Deserialize, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub struct Aborted { + #[serde(flatten)] + pub from: Enqueued, + #[serde(with = "time::serde::rfc3339")] + pub aborted_at: OffsetDateTime, +} + +impl Aborted { + pub fn id(&self) -> u64 { + self.from.id() + } + + pub fn meta(&self) -> &UpdateMeta { + self.from.meta() + } +} + +#[derive(Debug, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub struct Failed { + #[serde(flatten)] + pub from: Processing, + pub error: ResponseError, + #[serde(with = "time::serde::rfc3339")] + pub failed_at: OffsetDateTime, +} + +impl Failed { + pub fn id(&self) -> u64 { + self.from.id() + } + + pub fn meta(&self) -> &UpdateMeta { + self.from.meta() + } +} + +#[derive(Debug, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(tag = "status", rename_all = "camelCase")] +pub enum UpdateStatus { + Processing(Processing), + Enqueued(Enqueued), + Processed(Processed), + Aborted(Aborted), + Failed(Failed), +} + +impl UpdateStatus { + pub fn id(&self) -> u64 { + match self { + UpdateStatus::Processing(u) => u.id(), + UpdateStatus::Enqueued(u) => u.id(), + UpdateStatus::Processed(u) => u.id(), + UpdateStatus::Aborted(u) => u.id(), + UpdateStatus::Failed(u) => u.id(), + } + } + + pub fn meta(&self) -> &UpdateMeta { + match self { + UpdateStatus::Processing(u) => u.meta(), + UpdateStatus::Enqueued(u) => u.meta(), + UpdateStatus::Processed(u) => u.meta(), + UpdateStatus::Aborted(u) => u.meta(), + UpdateStatus::Failed(u) => u.meta(), + } + } + + pub fn processed(&self) -> Option<&Processed> { + match self { + UpdateStatus::Processed(p) => Some(p), + _ => None, + } + } +} diff --git a/dump/src/reader/v3/errors.rs b/dump/src/reader/v3/errors.rs new file mode 100644 index 000000000..40c4d2c8d --- /dev/null +++ b/dump/src/reader/v3/errors.rs @@ -0,0 +1,51 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Debug, Clone, Copy)] +pub enum Code { + // index related error + CreateIndex, + IndexAlreadyExists, + IndexNotFound, + InvalidIndexUid, + + // invalid state error + InvalidState, + MissingPrimaryKey, + PrimaryKeyAlreadyPresent, + + MaxFieldsLimitExceeded, + MissingDocumentId, + InvalidDocumentId, + + Filter, + Sort, + + BadParameter, + BadRequest, + DatabaseSizeLimitReached, + DocumentNotFound, + Internal, + InvalidGeoField, + InvalidRankingRule, + InvalidStore, + InvalidToken, + MissingAuthorizationHeader, + NoSpaceLeftOnDevice, + DumpNotFound, + TaskNotFound, + PayloadTooLarge, + RetrieveDocument, + SearchDocuments, + UnsupportedMediaType, + + DumpAlreadyInProgress, + DumpProcessFailed, + + InvalidContentType, + MissingContentType, + MalformedPayload, + MissingPayload, + + MalformedDump, + UnretrievableErrorCode, +} diff --git a/dump/src/reader/v3/meta.rs b/dump/src/reader/v3/meta.rs new file mode 100644 index 000000000..f83762914 --- /dev/null +++ b/dump/src/reader/v3/meta.rs @@ -0,0 +1,18 @@ +use serde::Deserialize; +use uuid::Uuid; + +use super::Settings; + +#[derive(Deserialize, Debug, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct IndexUuid { + pub uid: String, + pub uuid: Uuid, +} + +#[derive(Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct DumpMeta { + pub settings: Settings, + pub primary_key: Option, +} diff --git a/dump/src/reader/v3/mod.rs b/dump/src/reader/v3/mod.rs new file mode 100644 index 000000000..d363a76f1 --- /dev/null +++ b/dump/src/reader/v3/mod.rs @@ -0,0 +1,326 @@ +//! ```text +//! . +//! ├── indexes +//! │   ├── 01d7dd17-8241-4f1f-a7d1-2d1cb255f5b0 +//! │   │   ├── documents.jsonl +//! │   │   └── meta.json +//! │   ├── 78be64a3-cae1-449e-b7ed-13e77c9a8a0c +//! │   │   ├── documents.jsonl +//! │   │   └── meta.json +//! │   ├── ba553439-18fe-4733-ba53-44eed898280c +//! │   │   ├── documents.jsonl +//! │   │   └── meta.json +//! │   └── c408bc22-5859-49d1-8e9f-c88e2fa95cb0 +//! │   ├── documents.jsonl +//! │   └── meta.json +//! ├── index_uuids +//! │   └── data.jsonl +//! ├── metadata.json +//! └── updates +//! ├── data.jsonl +//! └── updates_files +//! └── 66d3f12d-fcf3-4b53-88cb-407017373de7 +//! ``` + +use std::fs::{self, File}; +use std::io::{BufRead, BufReader}; +use std::path::Path; + +use serde::{Deserialize, Serialize}; +use tempfile::TempDir; +use time::OffsetDateTime; + +pub mod errors; +pub mod meta; +pub mod settings; +pub mod updates; + +use self::meta::{DumpMeta, IndexUuid}; +use super::compat::v3_to_v4::CompatV3ToV4; +use super::Document; +use crate::{Error, IndexMetadata, Result, Version}; + +pub type Settings = settings::Settings; +pub type Checked = settings::Checked; +pub type Unchecked = settings::Unchecked; + +pub type Task = updates::UpdateEntry; + +// ===== Other types to clarify the code of the compat module +// everything related to the tasks +pub type Status = updates::UpdateStatus; +pub type Kind = updates::Update; + +// everything related to the settings +pub type Setting = settings::Setting; + +// everything related to the errors +pub type Code = errors::Code; + +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +pub struct Metadata { + db_version: String, + index_db_size: usize, + update_db_size: usize, + #[serde(with = "time::serde::rfc3339")] + dump_date: OffsetDateTime, +} + +pub struct V3Reader { + dump: TempDir, + metadata: Metadata, + tasks: BufReader, + index_uuid: Vec, +} + +impl V3Reader { + pub fn open(dump: TempDir) -> Result { + let meta_file = fs::read(dump.path().join("metadata.json"))?; + let metadata = serde_json::from_reader(&*meta_file)?; + let index_uuid = File::open(dump.path().join("index_uuids/data.jsonl"))?; + let index_uuid = BufReader::new(index_uuid); + let index_uuid = index_uuid + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }) + .collect::>>()?; + + Ok(V3Reader { + metadata, + tasks: BufReader::new(File::open(dump.path().join("updates").join("data.jsonl"))?), + index_uuid, + dump, + }) + } + + pub fn index_uuid(&self) -> Vec { + self.index_uuid.clone() + } + + pub fn to_v4(self) -> CompatV3ToV4 { + CompatV3ToV4::new(self) + } + + pub fn version(&self) -> Version { + Version::V3 + } + + pub fn date(&self) -> Option { + Some(self.metadata.dump_date) + } + + pub fn indexes(&self) -> Result> + '_> { + Ok(self.index_uuid.iter().map(|index| -> Result<_> { + V3IndexReader::new( + index.uid.clone(), + &self.dump.path().join("indexes").join(index.uuid.to_string()), + ) + })) + } + + pub fn tasks( + &mut self, + ) -> Box>)>> + '_> { + Box::new((&mut self.tasks).lines().map(|line| -> Result<_> { + let task: Task = serde_json::from_str(&line?)?; + if !task.is_finished() { + if let Some(uuid) = task.get_content_uuid() { + let update_file_path = self + .dump + .path() + .join("updates") + .join("updates_files") + .join(uuid.to_string()); + Ok(( + task, + Some( + Box::new(UpdateFile::new(&update_file_path)?) as Box + ), + )) + } else { + Ok((task, None)) + } + } else { + Ok((task, None)) + } + })) + } +} + +pub struct V3IndexReader { + metadata: IndexMetadata, + settings: Settings, + + documents: BufReader, +} + +impl V3IndexReader { + pub fn new(name: String, path: &Path) -> Result { + let meta = File::open(path.join("meta.json"))?; + let meta: DumpMeta = serde_json::from_reader(meta)?; + + let metadata = IndexMetadata { + uid: name, + primary_key: meta.primary_key, + // FIXME: Iterate over the whole task queue to find the creation and last update date. + created_at: OffsetDateTime::now_utc(), + updated_at: OffsetDateTime::now_utc(), + }; + + let ret = V3IndexReader { + metadata, + settings: meta.settings.check(), + documents: BufReader::new(File::open(path.join("documents.jsonl"))?), + }; + + Ok(ret) + } + + pub fn metadata(&self) -> &IndexMetadata { + &self.metadata + } + + pub fn documents(&mut self) -> Result> + '_> { + Ok((&mut self.documents) + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) + } + + pub fn settings(&mut self) -> Result> { + Ok(self.settings.clone()) + } +} + +pub struct UpdateFile { + reader: BufReader, +} + +impl UpdateFile { + fn new(path: &Path) -> Result { + Ok(UpdateFile { reader: BufReader::new(File::open(path)?) }) + } +} + +impl Iterator for UpdateFile { + type Item = Result; + + fn next(&mut self) -> Option { + (&mut self.reader) + .lines() + .map(|line| { + line.map_err(Error::from) + .and_then(|line| serde_json::from_str(&line).map_err(Error::from)) + }) + .next() + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + use std::io::BufReader; + + use flate2::bufread::GzDecoder; + use meili_snap::insta; + use tempfile::TempDir; + + use super::*; + + #[test] + #[ignore] + fn read_dump_v3() { + let dump = File::open("tests/assets/v3.dump").unwrap(); + let dir = TempDir::new().unwrap(); + let mut dump = BufReader::new(dump); + let gz = GzDecoder::new(&mut dump); + let mut archive = tar::Archive::new(gz); + archive.unpack(dir.path()).unwrap(); + + let mut dump = V3Reader::open(dir).unwrap(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00"); + + // tasks + let tasks = dump.tasks().collect::>>().unwrap(); + let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"63086d59c3f2074e4ab3fff7e8cc36c1"); + assert_eq!(update_files.len(), 10); + assert!(update_files[0].is_some()); // the enqueued document addition + assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed + + let update_file = update_files.remove(0).unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(update_file), @"7b8889539b669c7b9ddba448bafa385d"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies2 = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"f309b009608cc0b770b2f74516f92647"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"95dff22ba3a7019616c12df9daa35e1e"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 110); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022"); + + // movies2 + insta::assert_json_snapshot!(movies2.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies_2", + "primaryKey": null, + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies2.settings()), @"1dafc4b123e3a8e14a889719cc01f6e5"); + let documents = movies2.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 0); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"1dafc4b123e3a8e14a889719cc01f6e5"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } +} diff --git a/dump/src/reader/v3/settings.rs b/dump/src/reader/v3/settings.rs new file mode 100644 index 000000000..0027bf4ff --- /dev/null +++ b/dump/src/reader/v3/settings.rs @@ -0,0 +1,233 @@ +use std::collections::{BTreeMap, BTreeSet}; +use std::marker::PhantomData; +use std::num::NonZeroUsize; + +use serde::{Deserialize, Deserializer}; + +#[cfg(test)] +fn serialize_with_wildcard( + field: &Setting>, + s: S, +) -> std::result::Result +where + S: serde::Serializer, +{ + use serde::Serialize; + + let wildcard = vec!["*".to_string()]; + match field { + Setting::Set(value) => Some(value), + Setting::Reset => Some(&wildcard), + Setting::NotSet => None, + } + .serialize(s) +} + +#[derive(Clone, Default, Debug)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct Checked; + +#[derive(Clone, Default, Debug, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct Unchecked; + +/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings +/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a +/// call to `check` will return a `Settings` from a `Settings`. +#[derive(Debug, Clone, Default, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +#[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: Deserialize<'static>"))] +pub struct Settings { + #[serde( + default, + serialize_with = "serialize_with_wildcard", + skip_serializing_if = "Setting::is_not_set" + )] + pub displayed_attributes: Setting>, + + #[serde( + default, + serialize_with = "serialize_with_wildcard", + skip_serializing_if = "Setting::is_not_set" + )] + pub searchable_attributes: Setting>, + + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub filterable_attributes: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub sortable_attributes: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub ranking_rules: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub stop_words: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub synonyms: Setting>>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub distinct_attribute: Setting, + + #[serde(skip)] + pub _kind: PhantomData, +} + +impl Settings { + pub fn cleared() -> Settings { + Settings { + displayed_attributes: Setting::Reset, + searchable_attributes: Setting::Reset, + filterable_attributes: Setting::Reset, + sortable_attributes: Setting::Reset, + ranking_rules: Setting::Reset, + stop_words: Setting::Reset, + synonyms: Setting::Reset, + distinct_attribute: Setting::Reset, + _kind: PhantomData, + } + } + + pub fn into_unchecked(self) -> Settings { + let Self { + displayed_attributes, + searchable_attributes, + filterable_attributes, + sortable_attributes, + ranking_rules, + stop_words, + synonyms, + distinct_attribute, + .. + } = self; + + Settings { + displayed_attributes, + searchable_attributes, + filterable_attributes, + sortable_attributes, + ranking_rules, + stop_words, + synonyms, + distinct_attribute, + _kind: PhantomData, + } + } +} + +impl Settings { + pub fn check(self) -> Settings { + let displayed_attributes = match self.displayed_attributes { + Setting::Set(fields) => { + if fields.iter().any(|f| f == "*") { + Setting::Reset + } else { + Setting::Set(fields) + } + } + otherwise => otherwise, + }; + + let searchable_attributes = match self.searchable_attributes { + Setting::Set(fields) => { + if fields.iter().any(|f| f == "*") { + Setting::Reset + } else { + Setting::Set(fields) + } + } + otherwise => otherwise, + }; + + Settings { + displayed_attributes, + searchable_attributes, + filterable_attributes: self.filterable_attributes, + sortable_attributes: self.sortable_attributes, + ranking_rules: self.ranking_rules, + stop_words: self.stop_words, + synonyms: self.synonyms, + distinct_attribute: self.distinct_attribute, + _kind: PhantomData, + } + } +} + +#[derive(Debug, Clone, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +pub struct Facets { + pub level_group_size: Option, + pub min_level_size: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Setting { + Set(T), + Reset, + NotSet, +} + +impl Default for Setting { + fn default() -> Self { + Self::NotSet + } +} + +impl Setting { + pub fn map(self, f: F) -> Setting + where + F: FnOnce(T) -> U, + { + match self { + Setting::Set(t) => Setting::Set(f(t)), + Setting::Reset => Setting::Reset, + Setting::NotSet => Setting::NotSet, + } + } + + pub fn set(self) -> Option { + match self { + Self::Set(value) => Some(value), + _ => None, + } + } + + pub const fn as_ref(&self) -> Setting<&T> { + match *self { + Self::Set(ref value) => Setting::Set(value), + Self::Reset => Setting::Reset, + Self::NotSet => Setting::NotSet, + } + } + + pub const fn is_not_set(&self) -> bool { + matches!(self, Self::NotSet) + } +} + +#[cfg(test)] +impl serde::Serialize for Setting { + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + match self { + Self::Set(value) => Some(value), + // Usually not_set isn't serialized by setting skip_serializing_if field attribute + Self::NotSet | Self::Reset => None, + } + .serialize(serializer) + } +} + +impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting { + fn deserialize(deserializer: D) -> std::result::Result + where + D: Deserializer<'de>, + { + Deserialize::deserialize(deserializer).map(|x| match x { + Some(x) => Self::Set(x), + None => Self::Reset, // Reset is forced by sending null value + }) + } +} diff --git a/dump/src/reader/v3/updates.rs b/dump/src/reader/v3/updates.rs new file mode 100644 index 000000000..2f9e49c1a --- /dev/null +++ b/dump/src/reader/v3/updates.rs @@ -0,0 +1,227 @@ +use std::fmt::Display; + +use serde::Deserialize; +use time::OffsetDateTime; +use uuid::Uuid; + +use super::{Code, Settings, Unchecked}; + +#[derive(Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct UpdateEntry { + pub uuid: Uuid, + pub update: UpdateStatus, +} + +impl UpdateEntry { + pub fn is_finished(&self) -> bool { + match self.update { + UpdateStatus::Processed(_) | UpdateStatus::Aborted(_) | UpdateStatus::Failed(_) => true, + UpdateStatus::Processing(_) | UpdateStatus::Enqueued(_) => false, + } + } + + pub fn get_content_uuid(&self) -> Option<&Uuid> { + match self.update.meta() { + Update::DocumentAddition { content_uuid, .. } => Some(content_uuid), + Update::DeleteDocuments(_) | Update::Settings(_) | Update::ClearDocuments => None, + } + } +} + +#[derive(Debug, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(tag = "status", rename_all = "camelCase")] +pub enum UpdateStatus { + Processing(Processing), + Enqueued(Enqueued), + Processed(Processed), + Aborted(Aborted), + Failed(Failed), +} + +impl UpdateStatus { + pub fn id(&self) -> u64 { + match self { + UpdateStatus::Processing(u) => u.id(), + UpdateStatus::Enqueued(u) => u.id(), + UpdateStatus::Processed(u) => u.id(), + UpdateStatus::Aborted(u) => u.id(), + UpdateStatus::Failed(u) => u.id(), + } + } + + pub fn meta(&self) -> &Update { + match self { + UpdateStatus::Processing(u) => u.meta(), + UpdateStatus::Enqueued(u) => u.meta(), + UpdateStatus::Processed(u) => u.meta(), + UpdateStatus::Aborted(u) => u.meta(), + UpdateStatus::Failed(u) => u.meta(), + } + } + + pub fn is_finished(&self) -> bool { + match self { + UpdateStatus::Processing(_) | UpdateStatus::Enqueued(_) => false, + UpdateStatus::Aborted(_) | UpdateStatus::Failed(_) | UpdateStatus::Processed(_) => true, + } + } + + pub fn processed(&self) -> Option<&Processed> { + match self { + UpdateStatus::Processed(p) => Some(p), + _ => None, + } + } +} + +#[derive(Debug, Deserialize, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub struct Enqueued { + pub update_id: u64, + pub meta: Update, + #[serde(with = "time::serde::rfc3339")] + pub enqueued_at: OffsetDateTime, +} + +impl Enqueued { + pub fn meta(&self) -> &Update { + &self.meta + } + + pub fn id(&self) -> u64 { + self.update_id + } +} + +#[derive(Debug, Deserialize, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub struct Processed { + pub success: UpdateResult, + #[serde(with = "time::serde::rfc3339")] + pub processed_at: OffsetDateTime, + #[serde(flatten)] + pub from: Processing, +} + +impl Processed { + pub fn id(&self) -> u64 { + self.from.id() + } + + pub fn meta(&self) -> &Update { + self.from.meta() + } +} + +#[derive(Debug, Deserialize, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub struct Processing { + #[serde(flatten)] + pub from: Enqueued, + #[serde(with = "time::serde::rfc3339")] + pub started_processing_at: OffsetDateTime, +} + +impl Processing { + pub fn id(&self) -> u64 { + self.from.id() + } + + pub fn meta(&self) -> &Update { + self.from.meta() + } +} + +#[derive(Debug, Deserialize, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub struct Aborted { + #[serde(flatten)] + pub from: Enqueued, + #[serde(with = "time::serde::rfc3339")] + pub aborted_at: OffsetDateTime, +} + +impl Aborted { + pub fn id(&self) -> u64 { + self.from.id() + } + + pub fn meta(&self) -> &Update { + self.from.meta() + } +} + +#[derive(Debug, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub struct Failed { + #[serde(flatten)] + pub from: Processing, + pub msg: String, + pub code: Code, + #[serde(with = "time::serde::rfc3339")] + pub failed_at: OffsetDateTime, +} + +impl Display for Failed { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.msg.fmt(f) + } +} + +impl Failed { + pub fn id(&self) -> u64 { + self.from.id() + } + + pub fn meta(&self) -> &Update { + self.from.meta() + } +} + +#[allow(clippy::large_enum_variant)] +#[derive(Debug, Clone, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum Update { + DeleteDocuments(Vec), + DocumentAddition { + primary_key: Option, + method: IndexDocumentsMethod, + content_uuid: Uuid, + }, + Settings(Settings), + ClearDocuments, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[non_exhaustive] +pub enum IndexDocumentsMethod { + /// Replace the previous document with the new one, + /// removing all the already known attributes. + ReplaceDocuments, + + /// Merge the previous version of the document with the new version, + /// replacing old attributes values with the new ones and add the new attributes. + UpdateDocuments, +} + +#[derive(Debug, Clone, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum UpdateResult { + DocumentsAddition(DocumentAdditionResult), + DocumentDeletion { deleted: u64 }, + Other, +} + +#[derive(Debug, Deserialize, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct DocumentAdditionResult { + pub nb_documents: usize, +} diff --git a/dump/src/reader/v4/errors.rs b/dump/src/reader/v4/errors.rs new file mode 100644 index 000000000..5a9a8d5df --- /dev/null +++ b/dump/src/reader/v4/errors.rs @@ -0,0 +1,311 @@ +use std::fmt; + +use http::StatusCode; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +#[cfg_attr(feature = "test-traits", derive(proptest_derive::Arbitrary))] +pub struct ResponseError { + #[serde(skip)] + #[cfg_attr(feature = "test-traits", proptest(strategy = "strategy::status_code_strategy()"))] + pub code: StatusCode, + pub message: String, + #[serde(rename = "code")] + pub error_code: String, + #[serde(rename = "type")] + pub error_type: String, + #[serde(rename = "link")] + pub error_link: String, +} + +impl ResponseError { + pub fn from_msg(message: String, code: Code) -> Self { + Self { + code: code.http(), + message, + error_code: code.err_code().error_name.to_string(), + error_type: code.type_(), + error_link: code.url(), + } + } +} + +impl fmt::Display for ResponseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.message.fmt(f) + } +} + +impl std::error::Error for ResponseError {} + +impl From for ResponseError +where + T: ErrorCode, +{ + fn from(other: T) -> Self { + Self { + code: other.http_status(), + message: other.to_string(), + error_code: other.error_name(), + error_type: other.error_type(), + error_link: other.error_url(), + } + } +} + +pub trait ErrorCode: std::error::Error { + fn error_code(&self) -> Code; + + /// returns the HTTP status code ascociated with the error + fn http_status(&self) -> StatusCode { + self.error_code().http() + } + + /// returns the doc url ascociated with the error + fn error_url(&self) -> String { + self.error_code().url() + } + + /// returns error name, used as error code + fn error_name(&self) -> String { + self.error_code().name() + } + + /// return the error type + fn error_type(&self) -> String { + self.error_code().type_() + } +} + +#[allow(clippy::enum_variant_names)] +enum ErrorType { + InternalError, + InvalidRequestError, + AuthenticationError, +} + +impl fmt::Display for ErrorType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use ErrorType::*; + + match self { + InternalError => write!(f, "internal"), + InvalidRequestError => write!(f, "invalid_request"), + AuthenticationError => write!(f, "auth"), + } + } +} + +#[derive(Serialize, Deserialize, Debug, Clone, Copy)] +pub enum Code { + // index related error + CreateIndex, + IndexAlreadyExists, + IndexNotFound, + InvalidIndexUid, + InvalidMinWordLengthForTypo, + + // invalid state error + InvalidState, + MissingPrimaryKey, + PrimaryKeyAlreadyPresent, + + MaxFieldsLimitExceeded, + MissingDocumentId, + InvalidDocumentId, + + Filter, + Sort, + + BadParameter, + BadRequest, + DatabaseSizeLimitReached, + DocumentNotFound, + Internal, + InvalidGeoField, + InvalidRankingRule, + InvalidStore, + InvalidToken, + MissingAuthorizationHeader, + NoSpaceLeftOnDevice, + DumpNotFound, + TaskNotFound, + PayloadTooLarge, + RetrieveDocument, + SearchDocuments, + UnsupportedMediaType, + + DumpAlreadyInProgress, + DumpProcessFailed, + + InvalidContentType, + MissingContentType, + MalformedPayload, + MissingPayload, + + ApiKeyNotFound, + MissingParameter, + InvalidApiKeyActions, + InvalidApiKeyIndexes, + InvalidApiKeyExpiresAt, + InvalidApiKeyDescription, + + UnretrievableErrorCode, + MalformedDump, +} + +impl Code { + /// ascociate a `Code` variant to the actual ErrCode + fn err_code(&self) -> ErrCode { + use Code::*; + + match self { + // index related errors + // create index is thrown on internal error while creating an index. + CreateIndex => { + ErrCode::internal("index_creation_failed", StatusCode::INTERNAL_SERVER_ERROR) + } + IndexAlreadyExists => ErrCode::invalid("index_already_exists", StatusCode::CONFLICT), + // thrown when requesting an unexisting index + IndexNotFound => ErrCode::invalid("index_not_found", StatusCode::NOT_FOUND), + InvalidIndexUid => ErrCode::invalid("invalid_index_uid", StatusCode::BAD_REQUEST), + + // invalid state error + InvalidState => ErrCode::internal("invalid_state", StatusCode::INTERNAL_SERVER_ERROR), + // thrown when no primary key has been set + MissingPrimaryKey => { + ErrCode::invalid("primary_key_inference_failed", StatusCode::BAD_REQUEST) + } + // error thrown when trying to set an already existing primary key + PrimaryKeyAlreadyPresent => { + ErrCode::invalid("index_primary_key_already_exists", StatusCode::BAD_REQUEST) + } + // invalid ranking rule + InvalidRankingRule => ErrCode::invalid("invalid_ranking_rule", StatusCode::BAD_REQUEST), + + // invalid database + InvalidStore => { + ErrCode::internal("invalid_store_file", StatusCode::INTERNAL_SERVER_ERROR) + } + + // invalid document + MaxFieldsLimitExceeded => { + ErrCode::invalid("max_fields_limit_exceeded", StatusCode::BAD_REQUEST) + } + MissingDocumentId => ErrCode::invalid("missing_document_id", StatusCode::BAD_REQUEST), + InvalidDocumentId => ErrCode::invalid("invalid_document_id", StatusCode::BAD_REQUEST), + + // error related to filters + Filter => ErrCode::invalid("invalid_filter", StatusCode::BAD_REQUEST), + // error related to sorts + Sort => ErrCode::invalid("invalid_sort", StatusCode::BAD_REQUEST), + + BadParameter => ErrCode::invalid("bad_parameter", StatusCode::BAD_REQUEST), + BadRequest => ErrCode::invalid("bad_request", StatusCode::BAD_REQUEST), + DatabaseSizeLimitReached => { + ErrCode::internal("database_size_limit_reached", StatusCode::INTERNAL_SERVER_ERROR) + } + DocumentNotFound => ErrCode::invalid("document_not_found", StatusCode::NOT_FOUND), + Internal => ErrCode::internal("internal", StatusCode::INTERNAL_SERVER_ERROR), + InvalidGeoField => ErrCode::invalid("invalid_geo_field", StatusCode::BAD_REQUEST), + InvalidToken => ErrCode::authentication("invalid_api_key", StatusCode::FORBIDDEN), + MissingAuthorizationHeader => { + ErrCode::authentication("missing_authorization_header", StatusCode::UNAUTHORIZED) + } + TaskNotFound => ErrCode::invalid("task_not_found", StatusCode::NOT_FOUND), + DumpNotFound => ErrCode::invalid("dump_not_found", StatusCode::NOT_FOUND), + NoSpaceLeftOnDevice => { + ErrCode::internal("no_space_left_on_device", StatusCode::INTERNAL_SERVER_ERROR) + } + PayloadTooLarge => ErrCode::invalid("payload_too_large", StatusCode::PAYLOAD_TOO_LARGE), + RetrieveDocument => { + ErrCode::internal("unretrievable_document", StatusCode::BAD_REQUEST) + } + SearchDocuments => ErrCode::internal("search_error", StatusCode::BAD_REQUEST), + UnsupportedMediaType => { + ErrCode::invalid("unsupported_media_type", StatusCode::UNSUPPORTED_MEDIA_TYPE) + } + + // error related to dump + DumpAlreadyInProgress => { + ErrCode::invalid("dump_already_processing", StatusCode::CONFLICT) + } + DumpProcessFailed => { + ErrCode::internal("dump_process_failed", StatusCode::INTERNAL_SERVER_ERROR) + } + MissingContentType => { + ErrCode::invalid("missing_content_type", StatusCode::UNSUPPORTED_MEDIA_TYPE) + } + MalformedPayload => ErrCode::invalid("malformed_payload", StatusCode::BAD_REQUEST), + InvalidContentType => { + ErrCode::invalid("invalid_content_type", StatusCode::UNSUPPORTED_MEDIA_TYPE) + } + MissingPayload => ErrCode::invalid("missing_payload", StatusCode::BAD_REQUEST), + + // error related to keys + ApiKeyNotFound => ErrCode::invalid("api_key_not_found", StatusCode::NOT_FOUND), + MissingParameter => ErrCode::invalid("missing_parameter", StatusCode::BAD_REQUEST), + InvalidApiKeyActions => { + ErrCode::invalid("invalid_api_key_actions", StatusCode::BAD_REQUEST) + } + InvalidApiKeyIndexes => { + ErrCode::invalid("invalid_api_key_indexes", StatusCode::BAD_REQUEST) + } + InvalidApiKeyExpiresAt => { + ErrCode::invalid("invalid_api_key_expires_at", StatusCode::BAD_REQUEST) + } + InvalidApiKeyDescription => { + ErrCode::invalid("invalid_api_key_description", StatusCode::BAD_REQUEST) + } + InvalidMinWordLengthForTypo => { + ErrCode::invalid("invalid_min_word_length_for_typo", StatusCode::BAD_REQUEST) + } + UnretrievableErrorCode => { + ErrCode::invalid("unretrievable_error_code", StatusCode::BAD_REQUEST) + } + MalformedDump => ErrCode::invalid("malformed_dump", StatusCode::BAD_REQUEST), + } + } + + /// return the HTTP status code ascociated with the `Code` + fn http(&self) -> StatusCode { + self.err_code().status_code + } + + /// return error name, used as error code + fn name(&self) -> String { + self.err_code().error_name.to_string() + } + + /// return the error type + fn type_(&self) -> String { + self.err_code().error_type.to_string() + } + + /// return the doc url ascociated with the error + fn url(&self) -> String { + format!("https://docs.meilisearch.com/errors#{}", self.name()) + } +} + +/// Internal structure providing a convenient way to create error codes +struct ErrCode { + status_code: StatusCode, + error_type: ErrorType, + error_name: &'static str, +} + +impl ErrCode { + fn authentication(error_name: &'static str, status_code: StatusCode) -> ErrCode { + ErrCode { status_code, error_name, error_type: ErrorType::AuthenticationError } + } + + fn internal(error_name: &'static str, status_code: StatusCode) -> ErrCode { + ErrCode { status_code, error_name, error_type: ErrorType::InternalError } + } + + fn invalid(error_name: &'static str, status_code: StatusCode) -> ErrCode { + ErrCode { status_code, error_name, error_type: ErrorType::InvalidRequestError } + } +} diff --git a/dump/src/reader/v4/keys.rs b/dump/src/reader/v4/keys.rs new file mode 100644 index 000000000..26e5cad7d --- /dev/null +++ b/dump/src/reader/v4/keys.rs @@ -0,0 +1,77 @@ +use serde::Deserialize; +use time::OffsetDateTime; + +pub const KEY_ID_LENGTH: usize = 8; +pub type KeyId = [u8; KEY_ID_LENGTH]; + +#[derive(Debug, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct Key { + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, + pub id: KeyId, + pub actions: Vec, + pub indexes: Vec, + #[serde(with = "time::serde::rfc3339::option")] + pub expires_at: Option, + #[serde(with = "time::serde::rfc3339")] + pub created_at: OffsetDateTime, + #[serde(with = "time::serde::rfc3339")] + pub updated_at: OffsetDateTime, +} + +#[derive(Copy, Clone, Deserialize, Debug, Eq, PartialEq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[repr(u8)] +pub enum Action { + #[serde(rename = "*")] + All = 0, + #[serde(rename = "search")] + Search = actions::SEARCH, + #[serde(rename = "documents.add")] + DocumentsAdd = actions::DOCUMENTS_ADD, + #[serde(rename = "documents.get")] + DocumentsGet = actions::DOCUMENTS_GET, + #[serde(rename = "documents.delete")] + DocumentsDelete = actions::DOCUMENTS_DELETE, + #[serde(rename = "indexes.create")] + IndexesAdd = actions::INDEXES_CREATE, + #[serde(rename = "indexes.get")] + IndexesGet = actions::INDEXES_GET, + #[serde(rename = "indexes.update")] + IndexesUpdate = actions::INDEXES_UPDATE, + #[serde(rename = "indexes.delete")] + IndexesDelete = actions::INDEXES_DELETE, + #[serde(rename = "tasks.get")] + TasksGet = actions::TASKS_GET, + #[serde(rename = "settings.get")] + SettingsGet = actions::SETTINGS_GET, + #[serde(rename = "settings.update")] + SettingsUpdate = actions::SETTINGS_UPDATE, + #[serde(rename = "stats.get")] + StatsGet = actions::STATS_GET, + #[serde(rename = "dumps.create")] + DumpsCreate = actions::DUMPS_CREATE, + #[serde(rename = "dumps.get")] + DumpsGet = actions::DUMPS_GET, + #[serde(rename = "version")] + Version = actions::VERSION, +} + +pub mod actions { + pub const SEARCH: u8 = 1; + pub const DOCUMENTS_ADD: u8 = 2; + pub const DOCUMENTS_GET: u8 = 3; + pub const DOCUMENTS_DELETE: u8 = 4; + pub const INDEXES_CREATE: u8 = 5; + pub const INDEXES_GET: u8 = 6; + pub const INDEXES_UPDATE: u8 = 7; + pub const INDEXES_DELETE: u8 = 8; + pub const TASKS_GET: u8 = 9; + pub const SETTINGS_GET: u8 = 10; + pub const SETTINGS_UPDATE: u8 = 11; + pub const STATS_GET: u8 = 12; + pub const DUMPS_CREATE: u8 = 13; + pub const DUMPS_GET: u8 = 14; + pub const VERSION: u8 = 15; +} diff --git a/dump/src/reader/v4/meta.rs b/dump/src/reader/v4/meta.rs new file mode 100644 index 000000000..cec05f57c --- /dev/null +++ b/dump/src/reader/v4/meta.rs @@ -0,0 +1,139 @@ +use std::fmt::{self, Display, Formatter}; +use std::marker::PhantomData; +use std::str::FromStr; + +use serde::de::Visitor; +use serde::{Deserialize, Deserializer}; +use uuid::Uuid; + +use super::settings::{Settings, Unchecked}; + +#[derive(Deserialize, Debug)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct IndexUuid { + pub uid: String, + pub index_meta: IndexMeta, +} + +#[derive(Deserialize, Debug)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct IndexMeta { + pub uuid: Uuid, + pub creation_task_id: usize, +} + +// There is one in each indexes under `meta.json`. +#[derive(Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct DumpMeta { + pub settings: Settings, + pub primary_key: Option, +} + +#[derive(Deserialize, Debug, Clone, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct IndexUid(pub String); + +impl TryFrom for IndexUid { + type Error = IndexUidFormatError; + + fn try_from(uid: String) -> Result { + if !uid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') + || uid.is_empty() + || uid.len() > 400 + { + Err(IndexUidFormatError { invalid_uid: uid }) + } else { + Ok(IndexUid(uid)) + } + } +} + +impl FromStr for IndexUid { + type Err = IndexUidFormatError; + + fn from_str(uid: &str) -> Result { + uid.to_string().try_into() + } +} + +impl From for String { + fn from(uid: IndexUid) -> Self { + uid.into_inner() + } +} + +#[derive(Debug)] +pub struct IndexUidFormatError { + pub invalid_uid: String, +} + +impl Display for IndexUidFormatError { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "invalid index uid `{}`, the uid must be an integer \ + or a string containing only alphanumeric characters \ + a-z A-Z 0-9, hyphens - and underscores _.", + self.invalid_uid, + ) + } +} + +impl std::error::Error for IndexUidFormatError {} + +/// A type that tries to match either a star (*) or +/// any other thing that implements `FromStr`. +#[derive(Debug)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum StarOr { + Star, + Other(T), +} + +impl<'de, T, E> Deserialize<'de> for StarOr +where + T: FromStr, + E: Display, +{ + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + /// Serde can't differentiate between `StarOr::Star` and `StarOr::Other` without a tag. + /// Simply using `#[serde(untagged)]` + `#[serde(rename="*")]` will lead to attempting to + /// deserialize everything as a `StarOr::Other`, including "*". + /// [`#[serde(other)]`](https://serde.rs/variant-attrs.html#other) might have helped but is + /// not supported on untagged enums. + struct StarOrVisitor(PhantomData); + + impl<'de, T, FE> Visitor<'de> for StarOrVisitor + where + T: FromStr, + FE: Display, + { + type Value = StarOr; + + fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result { + formatter.write_str("a string") + } + + fn visit_str(self, v: &str) -> Result + where + SE: serde::de::Error, + { + match v { + "*" => Ok(StarOr::Star), + v => { + let other = FromStr::from_str(v).map_err(|e: T::Err| { + SE::custom(format!("Invalid `other` value: {}", e)) + })?; + Ok(StarOr::Other(other)) + } + } + } + } + + deserializer.deserialize_str(StarOrVisitor(PhantomData)) + } +} diff --git a/dump/src/reader/v4/mod.rs b/dump/src/reader/v4/mod.rs new file mode 100644 index 000000000..3aad71ddb --- /dev/null +++ b/dump/src/reader/v4/mod.rs @@ -0,0 +1,307 @@ +use std::fs::{self, File}; +use std::io::{BufRead, BufReader, ErrorKind}; +use std::path::Path; + +use serde::{Deserialize, Serialize}; +use tempfile::TempDir; +use time::OffsetDateTime; +use uuid::Uuid; + +pub mod errors; +pub mod keys; +pub mod meta; +pub mod settings; +pub mod tasks; + +use self::meta::{DumpMeta, IndexUuid}; +use super::compat::v4_to_v5::CompatV4ToV5; +use crate::{Error, IndexMetadata, Result, Version}; + +pub type Document = serde_json::Map; +pub type Settings = settings::Settings; +pub type Checked = settings::Checked; +pub type Unchecked = settings::Unchecked; + +pub type Task = tasks::Task; +pub type Key = keys::Key; + +// everything related to the settings +pub type Setting = settings::Setting; + +// everything related to the api keys +pub type Action = keys::Action; + +// everything related to the errors +pub type ResponseError = errors::ResponseError; +pub type Code = errors::Code; + +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +pub struct Metadata { + db_version: String, + index_db_size: usize, + update_db_size: usize, + #[serde(with = "time::serde::rfc3339")] + dump_date: OffsetDateTime, +} + +pub struct V4Reader { + dump: TempDir, + metadata: Metadata, + tasks: BufReader, + keys: BufReader, + index_uuid: Vec, +} + +impl V4Reader { + pub fn open(dump: TempDir) -> Result { + let meta_file = fs::read(dump.path().join("metadata.json"))?; + let metadata = serde_json::from_reader(&*meta_file)?; + let index_uuid = File::open(dump.path().join("index_uuids/data.jsonl"))?; + let index_uuid = BufReader::new(index_uuid); + let index_uuid = index_uuid + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }) + .collect::>>()?; + + Ok(V4Reader { + metadata, + tasks: BufReader::new( + File::open(dump.path().join("updates").join("data.jsonl")).unwrap(), + ), + keys: BufReader::new(File::open(dump.path().join("keys"))?), + index_uuid, + dump, + }) + } + + pub fn to_v5(self) -> CompatV4ToV5 { + CompatV4ToV5::new(self) + } + + pub fn version(&self) -> Version { + Version::V4 + } + + pub fn date(&self) -> Option { + Some(self.metadata.dump_date) + } + + pub fn instance_uid(&self) -> Result> { + match fs::read_to_string(self.dump.path().join("instance-uid")) { + Ok(uuid) => Ok(Some(Uuid::parse_str(&uuid)?)), + Err(e) if e.kind() == ErrorKind::NotFound => Ok(None), + Err(e) => Err(e.into()), + } + } + + pub fn indexes(&self) -> Result> + '_> { + Ok(self.index_uuid.iter().map(|index| -> Result<_> { + V4IndexReader::new( + index.uid.clone(), + &self.dump.path().join("indexes").join(index.index_meta.uuid.to_string()), + ) + })) + } + + pub fn tasks( + &mut self, + ) -> Box>)>> + '_> { + Box::new((&mut self.tasks).lines().map(|line| -> Result<_> { + let task: Task = serde_json::from_str(&line?)?; + if !task.is_finished() { + if let Some(uuid) = task.get_content_uuid() { + let update_file_path = self + .dump + .path() + .join("updates") + .join("updates_files") + .join(uuid.to_string()); + Ok(( + task, + Some( + Box::new(UpdateFile::new(&update_file_path)?) as Box + ), + )) + } else { + Ok((task, None)) + } + } else { + Ok((task, None)) + } + })) + } + + pub fn keys(&mut self) -> Box> + '_> { + Box::new( + (&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }), + ) + } +} + +pub struct V4IndexReader { + metadata: IndexMetadata, + settings: Settings, + + documents: BufReader, +} + +impl V4IndexReader { + pub fn new(name: String, path: &Path) -> Result { + let meta = File::open(path.join("meta.json"))?; + let meta: DumpMeta = serde_json::from_reader(meta)?; + + let metadata = IndexMetadata { + uid: name, + primary_key: meta.primary_key, + // FIXME: Iterate over the whole task queue to find the creation and last update date. + created_at: OffsetDateTime::now_utc(), + updated_at: OffsetDateTime::now_utc(), + }; + + let ret = V4IndexReader { + metadata, + settings: meta.settings.check(), + documents: BufReader::new(File::open(path.join("documents.jsonl"))?), + }; + + Ok(ret) + } + + pub fn metadata(&self) -> &IndexMetadata { + &self.metadata + } + + pub fn documents(&mut self) -> Result> + '_> { + Ok((&mut self.documents) + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) + } + + pub fn settings(&mut self) -> Result> { + Ok(self.settings.clone()) + } +} + +pub struct UpdateFile { + reader: BufReader, +} + +impl UpdateFile { + fn new(path: &Path) -> Result { + Ok(UpdateFile { reader: BufReader::new(File::open(path)?) }) + } +} + +impl Iterator for UpdateFile { + type Item = Result; + + fn next(&mut self) -> Option { + (&mut self.reader) + .lines() + .map(|line| { + line.map_err(Error::from) + .and_then(|line| serde_json::from_str(&line).map_err(Error::from)) + }) + .next() + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + use std::io::BufReader; + + use flate2::bufread::GzDecoder; + use meili_snap::insta; + use tempfile::TempDir; + + use super::*; + + #[test] + #[ignore] + fn read_dump_v4() { + let dump = File::open("tests/assets/v4.dump").unwrap(); + let dir = TempDir::new().unwrap(); + let mut dump = BufReader::new(dump); + let gz = GzDecoder::new(&mut dump); + let mut archive = tar::Archive::new(gz); + archive.unpack(dir.path()).unwrap(); + + let mut dump = V4Reader::open(dir).unwrap(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00"); + insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); + + // tasks + let tasks = dump.tasks().collect::>>().unwrap(); + let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"f4efacbea0c1a4400873f4b2ee33f975"); + assert_eq!(update_files.len(), 10); + assert!(update_files[0].is_some()); // the enqueued document addition + assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed + + let update_file = update_files.remove(0).unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(update_file), @"7b8889539b669c7b9ddba448bafa385d"); + + // keys + let keys = dump.keys().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(keys, { "[].uid" => "[uuid]" }), @"9240300dca8f962cdf58359ef4c76f09"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"65b139c6b9fc251e187073c8557803e2"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"06aa1988493485d9b2cda7c751e6bb15"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 110); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"786022a66ecb992c8a2a60fee070a5ab"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"7d722fc2629eaa45032ed3deb0c9b4ce"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } +} diff --git a/dump/src/reader/v4/settings.rs b/dump/src/reader/v4/settings.rs new file mode 100644 index 000000000..964cd1152 --- /dev/null +++ b/dump/src/reader/v4/settings.rs @@ -0,0 +1,261 @@ +use std::collections::{BTreeMap, BTreeSet}; +use std::marker::PhantomData; +use std::num::NonZeroUsize; + +use serde::{Deserialize, Deserializer}; + +#[cfg(test)] +fn serialize_with_wildcard( + field: &Setting>, + s: S, +) -> std::result::Result +where + S: serde::Serializer, +{ + use serde::Serialize; + + let wildcard = vec!["*".to_string()]; + match field { + Setting::Set(value) => Some(value), + Setting::Reset => Some(&wildcard), + Setting::NotSet => None, + } + .serialize(s) +} + +#[derive(Clone, Default, Debug, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct Checked; + +#[derive(Clone, Default, Debug, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct Unchecked; + +#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +pub struct MinWordSizeTyposSetting { + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub one_typo: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub two_typos: Setting, +} + +#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +pub struct TypoSettings { + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub enabled: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub min_word_size_for_typos: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub disable_on_words: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub disable_on_attributes: Setting>, +} +/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings +/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a +/// call to `check` will return a `Settings` from a `Settings`. +#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +#[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: Deserialize<'static>"))] +pub struct Settings { + #[serde( + default, + serialize_with = "serialize_with_wildcard", + skip_serializing_if = "Setting::is_not_set" + )] + pub displayed_attributes: Setting>, + + #[serde( + default, + serialize_with = "serialize_with_wildcard", + skip_serializing_if = "Setting::is_not_set" + )] + pub searchable_attributes: Setting>, + + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub filterable_attributes: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub sortable_attributes: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub ranking_rules: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub stop_words: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub synonyms: Setting>>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub distinct_attribute: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub typo_tolerance: Setting, + + #[serde(skip)] + pub _kind: PhantomData, +} + +impl Settings { + pub fn cleared() -> Settings { + Settings { + displayed_attributes: Setting::Reset, + searchable_attributes: Setting::Reset, + filterable_attributes: Setting::Reset, + sortable_attributes: Setting::Reset, + ranking_rules: Setting::Reset, + stop_words: Setting::Reset, + synonyms: Setting::Reset, + distinct_attribute: Setting::Reset, + typo_tolerance: Setting::Reset, + _kind: PhantomData, + } + } + + pub fn into_unchecked(self) -> Settings { + let Self { + displayed_attributes, + searchable_attributes, + filterable_attributes, + sortable_attributes, + ranking_rules, + stop_words, + synonyms, + distinct_attribute, + typo_tolerance, + .. + } = self; + + Settings { + displayed_attributes, + searchable_attributes, + filterable_attributes, + sortable_attributes, + ranking_rules, + stop_words, + synonyms, + distinct_attribute, + typo_tolerance, + _kind: PhantomData, + } + } +} + +impl Settings { + pub fn check(self) -> Settings { + let displayed_attributes = match self.displayed_attributes { + Setting::Set(fields) => { + if fields.iter().any(|f| f == "*") { + Setting::Reset + } else { + Setting::Set(fields) + } + } + otherwise => otherwise, + }; + + let searchable_attributes = match self.searchable_attributes { + Setting::Set(fields) => { + if fields.iter().any(|f| f == "*") { + Setting::Reset + } else { + Setting::Set(fields) + } + } + otherwise => otherwise, + }; + + Settings { + displayed_attributes, + searchable_attributes, + filterable_attributes: self.filterable_attributes, + sortable_attributes: self.sortable_attributes, + ranking_rules: self.ranking_rules, + stop_words: self.stop_words, + synonyms: self.synonyms, + distinct_attribute: self.distinct_attribute, + typo_tolerance: self.typo_tolerance, + _kind: PhantomData, + } + } +} + +#[derive(Debug, Clone, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +pub struct Facets { + pub level_group_size: Option, + pub min_level_size: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq, Copy)] +pub enum Setting { + Set(T), + Reset, + NotSet, +} + +impl Default for Setting { + fn default() -> Self { + Self::NotSet + } +} + +impl Setting { + pub fn set(self) -> Option { + match self { + Self::Set(value) => Some(value), + _ => None, + } + } + + pub const fn as_ref(&self) -> Setting<&T> { + match *self { + Self::Set(ref value) => Setting::Set(value), + Self::Reset => Setting::Reset, + Self::NotSet => Setting::NotSet, + } + } + + pub const fn is_not_set(&self) -> bool { + matches!(self, Self::NotSet) + } + + /// If `Self` is `Reset`, then map self to `Set` with the provided `val`. + pub fn or_reset(self, val: T) -> Self { + match self { + Self::Reset => Self::Set(val), + otherwise => otherwise, + } + } +} + +#[cfg(test)] +impl serde::Serialize for Setting { + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + match self { + Self::Set(value) => Some(value), + // Usually not_set isn't serialized by setting skip_serializing_if field attribute + Self::NotSet | Self::Reset => None, + } + .serialize(serializer) + } +} + +impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting { + fn deserialize(deserializer: D) -> std::result::Result + where + D: Deserializer<'de>, + { + Deserialize::deserialize(deserializer).map(|x| match x { + Some(x) => Self::Set(x), + None => Self::Reset, // Reset is forced by sending null value + }) + } +} diff --git a/dump/src/reader/v4/tasks.rs b/dump/src/reader/v4/tasks.rs new file mode 100644 index 000000000..e1bdde0c7 --- /dev/null +++ b/dump/src/reader/v4/tasks.rs @@ -0,0 +1,135 @@ +use serde::Deserialize; +use time::OffsetDateTime; +use uuid::Uuid; + +use super::errors::ResponseError; +use super::meta::IndexUid; +use super::settings::{Settings, Unchecked}; + +pub type TaskId = u32; +pub type BatchId = u32; + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct Task { + pub id: TaskId, + pub index_uid: IndexUid, + pub content: TaskContent, + pub events: Vec, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[allow(clippy::large_enum_variant)] +pub enum TaskContent { + DocumentAddition { + content_uuid: Uuid, + merge_strategy: IndexDocumentsMethod, + primary_key: Option, + documents_count: usize, + allow_index_creation: bool, + }, + DocumentDeletion(DocumentDeletion), + SettingsUpdate { + settings: Settings, + /// Indicates whether the task was a deletion + is_deletion: bool, + allow_index_creation: bool, + }, + IndexDeletion, + IndexCreation { + primary_key: Option, + }, + IndexUpdate { + primary_key: Option, + }, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum IndexDocumentsMethod { + /// Replace the previous document with the new one, + /// removing all the already known attributes. + ReplaceDocuments, + + /// Merge the previous version of the document with the new version, + /// replacing old attributes values with the new ones and add the new attributes. + UpdateDocuments, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum DocumentDeletion { + Clear, + Ids(Vec), +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum TaskEvent { + Created(#[serde(with = "time::serde::rfc3339")] OffsetDateTime), + Batched { + #[serde(with = "time::serde::rfc3339")] + timestamp: OffsetDateTime, + batch_id: BatchId, + }, + Processing(#[serde(with = "time::serde::rfc3339")] OffsetDateTime), + Succeded { + result: TaskResult, + #[serde(with = "time::serde::rfc3339")] + timestamp: OffsetDateTime, + }, + Failed { + error: ResponseError, + #[serde(with = "time::serde::rfc3339")] + timestamp: OffsetDateTime, + }, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum TaskResult { + DocumentAddition { indexed_documents: u64 }, + DocumentDeletion { deleted_documents: u64 }, + ClearAll { deleted_documents: u64 }, + Other, +} + +impl Task { + /// Return true when a task is finished. + /// A task is finished when its last state is either `Succeeded` or `Failed`. + pub fn is_finished(&self) -> bool { + self.events.last().map_or(false, |event| { + matches!(event, TaskEvent::Succeded { .. } | TaskEvent::Failed { .. }) + }) + } + + /// Return the content_uuid of the `Task` if there is one. + pub fn get_content_uuid(&self) -> Option { + match self { + Task { content: TaskContent::DocumentAddition { content_uuid, .. }, .. } => { + Some(*content_uuid) + } + _ => None, + } + } +} + +impl IndexUid { + pub fn into_inner(self) -> String { + self.0 + } + + /// Return a reference over the inner str. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl std::ops::Deref for IndexUid { + type Target = str; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} diff --git a/dump/src/reader/v5/errors.rs b/dump/src/reader/v5/errors.rs new file mode 100644 index 000000000..c918c301c --- /dev/null +++ b/dump/src/reader/v5/errors.rs @@ -0,0 +1,272 @@ +use std::fmt; + +use http::StatusCode; +use serde::Deserialize; + +#[derive(Debug, Deserialize, Clone, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +#[cfg_attr(feature = "test-traits", derive(proptest_derive::Arbitrary))] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct ResponseError { + #[serde(skip)] + code: StatusCode, + + pub message: String, + #[serde(rename = "code")] + pub error_code: String, + #[serde(rename = "type")] + pub error_type: String, + #[serde(rename = "link")] + pub error_link: String, +} + +impl ResponseError { + pub fn from_msg(message: String, code: Code) -> Self { + Self { + code: code.http(), + message, + error_code: code.err_code().error_name.to_string(), + error_type: code.type_(), + error_link: code.url(), + } + } +} + +#[derive(Deserialize, Debug, Clone, Copy)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum Code { + // index related error + CreateIndex, + IndexAlreadyExists, + IndexNotFound, + InvalidIndexUid, + InvalidMinWordLengthForTypo, + + // invalid state error + InvalidState, + MissingPrimaryKey, + PrimaryKeyAlreadyPresent, + + MaxFieldsLimitExceeded, + MissingDocumentId, + InvalidDocumentId, + + Filter, + Sort, + + BadParameter, + BadRequest, + DatabaseSizeLimitReached, + DocumentNotFound, + Internal, + InvalidGeoField, + InvalidRankingRule, + InvalidStore, + InvalidToken, + MissingAuthorizationHeader, + NoSpaceLeftOnDevice, + DumpNotFound, + TaskNotFound, + PayloadTooLarge, + RetrieveDocument, + SearchDocuments, + UnsupportedMediaType, + + DumpAlreadyInProgress, + DumpProcessFailed, + + InvalidContentType, + MissingContentType, + MalformedPayload, + MissingPayload, + + ApiKeyNotFound, + MissingParameter, + InvalidApiKeyActions, + InvalidApiKeyIndexes, + InvalidApiKeyExpiresAt, + InvalidApiKeyDescription, + InvalidApiKeyName, + InvalidApiKeyUid, + ImmutableField, + ApiKeyAlreadyExists, + + UnretrievableErrorCode, +} + +impl Code { + /// associate a `Code` variant to the actual ErrCode + fn err_code(&self) -> ErrCode { + use Code::*; + + match self { + // index related errors + // create index is thrown on internal error while creating an index. + CreateIndex => { + ErrCode::internal("index_creation_failed", StatusCode::INTERNAL_SERVER_ERROR) + } + IndexAlreadyExists => ErrCode::invalid("index_already_exists", StatusCode::CONFLICT), + // thrown when requesting an unexisting index + IndexNotFound => ErrCode::invalid("index_not_found", StatusCode::NOT_FOUND), + InvalidIndexUid => ErrCode::invalid("invalid_index_uid", StatusCode::BAD_REQUEST), + + // invalid state error + InvalidState => ErrCode::internal("invalid_state", StatusCode::INTERNAL_SERVER_ERROR), + // thrown when no primary key has been set + MissingPrimaryKey => { + ErrCode::invalid("primary_key_inference_failed", StatusCode::BAD_REQUEST) + } + // error thrown when trying to set an already existing primary key + PrimaryKeyAlreadyPresent => { + ErrCode::invalid("index_primary_key_already_exists", StatusCode::BAD_REQUEST) + } + // invalid ranking rule + InvalidRankingRule => ErrCode::invalid("invalid_ranking_rule", StatusCode::BAD_REQUEST), + + // invalid database + InvalidStore => { + ErrCode::internal("invalid_store_file", StatusCode::INTERNAL_SERVER_ERROR) + } + + // invalid document + MaxFieldsLimitExceeded => { + ErrCode::invalid("max_fields_limit_exceeded", StatusCode::BAD_REQUEST) + } + MissingDocumentId => ErrCode::invalid("missing_document_id", StatusCode::BAD_REQUEST), + InvalidDocumentId => ErrCode::invalid("invalid_document_id", StatusCode::BAD_REQUEST), + + // error related to filters + Filter => ErrCode::invalid("invalid_filter", StatusCode::BAD_REQUEST), + // error related to sorts + Sort => ErrCode::invalid("invalid_sort", StatusCode::BAD_REQUEST), + + BadParameter => ErrCode::invalid("bad_parameter", StatusCode::BAD_REQUEST), + BadRequest => ErrCode::invalid("bad_request", StatusCode::BAD_REQUEST), + DatabaseSizeLimitReached => { + ErrCode::internal("database_size_limit_reached", StatusCode::INTERNAL_SERVER_ERROR) + } + DocumentNotFound => ErrCode::invalid("document_not_found", StatusCode::NOT_FOUND), + Internal => ErrCode::internal("internal", StatusCode::INTERNAL_SERVER_ERROR), + InvalidGeoField => ErrCode::invalid("invalid_geo_field", StatusCode::BAD_REQUEST), + InvalidToken => ErrCode::authentication("invalid_api_key", StatusCode::FORBIDDEN), + MissingAuthorizationHeader => { + ErrCode::authentication("missing_authorization_header", StatusCode::UNAUTHORIZED) + } + TaskNotFound => ErrCode::invalid("task_not_found", StatusCode::NOT_FOUND), + DumpNotFound => ErrCode::invalid("dump_not_found", StatusCode::NOT_FOUND), + NoSpaceLeftOnDevice => { + ErrCode::internal("no_space_left_on_device", StatusCode::INTERNAL_SERVER_ERROR) + } + PayloadTooLarge => ErrCode::invalid("payload_too_large", StatusCode::PAYLOAD_TOO_LARGE), + RetrieveDocument => { + ErrCode::internal("unretrievable_document", StatusCode::BAD_REQUEST) + } + SearchDocuments => ErrCode::internal("search_error", StatusCode::BAD_REQUEST), + UnsupportedMediaType => { + ErrCode::invalid("unsupported_media_type", StatusCode::UNSUPPORTED_MEDIA_TYPE) + } + + // error related to dump + DumpAlreadyInProgress => { + ErrCode::invalid("dump_already_processing", StatusCode::CONFLICT) + } + DumpProcessFailed => { + ErrCode::internal("dump_process_failed", StatusCode::INTERNAL_SERVER_ERROR) + } + MissingContentType => { + ErrCode::invalid("missing_content_type", StatusCode::UNSUPPORTED_MEDIA_TYPE) + } + MalformedPayload => ErrCode::invalid("malformed_payload", StatusCode::BAD_REQUEST), + InvalidContentType => { + ErrCode::invalid("invalid_content_type", StatusCode::UNSUPPORTED_MEDIA_TYPE) + } + MissingPayload => ErrCode::invalid("missing_payload", StatusCode::BAD_REQUEST), + + // error related to keys + ApiKeyNotFound => ErrCode::invalid("api_key_not_found", StatusCode::NOT_FOUND), + MissingParameter => ErrCode::invalid("missing_parameter", StatusCode::BAD_REQUEST), + InvalidApiKeyActions => { + ErrCode::invalid("invalid_api_key_actions", StatusCode::BAD_REQUEST) + } + InvalidApiKeyIndexes => { + ErrCode::invalid("invalid_api_key_indexes", StatusCode::BAD_REQUEST) + } + InvalidApiKeyExpiresAt => { + ErrCode::invalid("invalid_api_key_expires_at", StatusCode::BAD_REQUEST) + } + InvalidApiKeyDescription => { + ErrCode::invalid("invalid_api_key_description", StatusCode::BAD_REQUEST) + } + InvalidApiKeyName => ErrCode::invalid("invalid_api_key_name", StatusCode::BAD_REQUEST), + InvalidApiKeyUid => ErrCode::invalid("invalid_api_key_uid", StatusCode::BAD_REQUEST), + ApiKeyAlreadyExists => ErrCode::invalid("api_key_already_exists", StatusCode::CONFLICT), + ImmutableField => ErrCode::invalid("immutable_field", StatusCode::BAD_REQUEST), + InvalidMinWordLengthForTypo => { + ErrCode::invalid("invalid_min_word_length_for_typo", StatusCode::BAD_REQUEST) + } + UnretrievableErrorCode => { + ErrCode::invalid("unretrievable_error_code", StatusCode::BAD_REQUEST) + } + } + } + + /// return the HTTP status code associated with the `Code` + fn http(&self) -> StatusCode { + self.err_code().status_code + } + + /// return error name, used as error code + fn name(&self) -> String { + self.err_code().error_name.to_string() + } + + /// return the error type + fn type_(&self) -> String { + self.err_code().error_type.to_string() + } + + /// return the doc url associated with the error + fn url(&self) -> String { + format!("https://docs.meilisearch.com/errors#{}", self.name()) + } +} + +/// Internal structure providing a convenient way to create error codes +struct ErrCode { + status_code: StatusCode, + error_type: ErrorType, + error_name: &'static str, +} + +impl ErrCode { + fn authentication(error_name: &'static str, status_code: StatusCode) -> ErrCode { + ErrCode { status_code, error_name, error_type: ErrorType::AuthenticationError } + } + + fn internal(error_name: &'static str, status_code: StatusCode) -> ErrCode { + ErrCode { status_code, error_name, error_type: ErrorType::InternalError } + } + + fn invalid(error_name: &'static str, status_code: StatusCode) -> ErrCode { + ErrCode { status_code, error_name, error_type: ErrorType::InvalidRequestError } + } +} + +#[allow(clippy::enum_variant_names)] +enum ErrorType { + InternalError, + InvalidRequestError, + AuthenticationError, +} + +impl fmt::Display for ErrorType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use ErrorType::*; + + match self { + InternalError => write!(f, "internal"), + InvalidRequestError => write!(f, "invalid_request"), + AuthenticationError => write!(f, "auth"), + } + } +} diff --git a/dump/src/reader/v5/keys.rs b/dump/src/reader/v5/keys.rs new file mode 100644 index 000000000..12e44d85a --- /dev/null +++ b/dump/src/reader/v5/keys.rs @@ -0,0 +1,83 @@ +use serde::Deserialize; +use time::OffsetDateTime; +use uuid::Uuid; + +use super::meta::{IndexUid, StarOr}; + +pub type KeyId = Uuid; + +#[derive(Debug, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct Key { + pub description: Option, + pub name: Option, + pub uid: KeyId, + pub actions: Vec, + pub indexes: Vec>, + #[serde(with = "time::serde::rfc3339::option")] + pub expires_at: Option, + #[serde(with = "time::serde::rfc3339")] + pub created_at: OffsetDateTime, + #[serde(with = "time::serde::rfc3339")] + pub updated_at: OffsetDateTime, +} + +#[derive(Copy, Clone, Deserialize, Debug, Eq, PartialEq, Hash)] +#[cfg_attr(test, derive(serde::Serialize))] +#[repr(u8)] +pub enum Action { + #[serde(rename = "*")] + All = 0, + #[serde(rename = "search")] + Search, + #[serde(rename = "documents.*")] + DocumentsAll, + #[serde(rename = "documents.add")] + DocumentsAdd, + #[serde(rename = "documents.get")] + DocumentsGet, + #[serde(rename = "documents.delete")] + DocumentsDelete, + #[serde(rename = "indexes.*")] + IndexesAll, + #[serde(rename = "indexes.create")] + IndexesAdd, + #[serde(rename = "indexes.get")] + IndexesGet, + #[serde(rename = "indexes.update")] + IndexesUpdate, + #[serde(rename = "indexes.delete")] + IndexesDelete, + #[serde(rename = "tasks.*")] + TasksAll, + #[serde(rename = "tasks.get")] + TasksGet, + #[serde(rename = "settings.*")] + SettingsAll, + #[serde(rename = "settings.get")] + SettingsGet, + #[serde(rename = "settings.update")] + SettingsUpdate, + #[serde(rename = "stats.*")] + StatsAll, + #[serde(rename = "stats.get")] + StatsGet, + #[serde(rename = "metrics.*")] + MetricsAll, + #[serde(rename = "metrics.get")] + MetricsGet, + #[serde(rename = "dumps.*")] + DumpsAll, + #[serde(rename = "dumps.create")] + DumpsCreate, + #[serde(rename = "version")] + Version, + #[serde(rename = "keys.create")] + KeysAdd, + #[serde(rename = "keys.get")] + KeysGet, + #[serde(rename = "keys.update")] + KeysUpdate, + #[serde(rename = "keys.delete")] + KeysDelete, +} diff --git a/dump/src/reader/v5/meta.rs b/dump/src/reader/v5/meta.rs new file mode 100644 index 000000000..cec05f57c --- /dev/null +++ b/dump/src/reader/v5/meta.rs @@ -0,0 +1,139 @@ +use std::fmt::{self, Display, Formatter}; +use std::marker::PhantomData; +use std::str::FromStr; + +use serde::de::Visitor; +use serde::{Deserialize, Deserializer}; +use uuid::Uuid; + +use super::settings::{Settings, Unchecked}; + +#[derive(Deserialize, Debug)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct IndexUuid { + pub uid: String, + pub index_meta: IndexMeta, +} + +#[derive(Deserialize, Debug)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct IndexMeta { + pub uuid: Uuid, + pub creation_task_id: usize, +} + +// There is one in each indexes under `meta.json`. +#[derive(Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct DumpMeta { + pub settings: Settings, + pub primary_key: Option, +} + +#[derive(Deserialize, Debug, Clone, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct IndexUid(pub String); + +impl TryFrom for IndexUid { + type Error = IndexUidFormatError; + + fn try_from(uid: String) -> Result { + if !uid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') + || uid.is_empty() + || uid.len() > 400 + { + Err(IndexUidFormatError { invalid_uid: uid }) + } else { + Ok(IndexUid(uid)) + } + } +} + +impl FromStr for IndexUid { + type Err = IndexUidFormatError; + + fn from_str(uid: &str) -> Result { + uid.to_string().try_into() + } +} + +impl From for String { + fn from(uid: IndexUid) -> Self { + uid.into_inner() + } +} + +#[derive(Debug)] +pub struct IndexUidFormatError { + pub invalid_uid: String, +} + +impl Display for IndexUidFormatError { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "invalid index uid `{}`, the uid must be an integer \ + or a string containing only alphanumeric characters \ + a-z A-Z 0-9, hyphens - and underscores _.", + self.invalid_uid, + ) + } +} + +impl std::error::Error for IndexUidFormatError {} + +/// A type that tries to match either a star (*) or +/// any other thing that implements `FromStr`. +#[derive(Debug)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum StarOr { + Star, + Other(T), +} + +impl<'de, T, E> Deserialize<'de> for StarOr +where + T: FromStr, + E: Display, +{ + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + /// Serde can't differentiate between `StarOr::Star` and `StarOr::Other` without a tag. + /// Simply using `#[serde(untagged)]` + `#[serde(rename="*")]` will lead to attempting to + /// deserialize everything as a `StarOr::Other`, including "*". + /// [`#[serde(other)]`](https://serde.rs/variant-attrs.html#other) might have helped but is + /// not supported on untagged enums. + struct StarOrVisitor(PhantomData); + + impl<'de, T, FE> Visitor<'de> for StarOrVisitor + where + T: FromStr, + FE: Display, + { + type Value = StarOr; + + fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result { + formatter.write_str("a string") + } + + fn visit_str(self, v: &str) -> Result + where + SE: serde::de::Error, + { + match v { + "*" => Ok(StarOr::Star), + v => { + let other = FromStr::from_str(v).map_err(|e: T::Err| { + SE::custom(format!("Invalid `other` value: {}", e)) + })?; + Ok(StarOr::Other(other)) + } + } + } + } + + deserializer.deserialize_str(StarOrVisitor(PhantomData)) + } +} diff --git a/dump/src/reader/v5/mod.rs b/dump/src/reader/v5/mod.rs new file mode 100644 index 000000000..2265cbc63 --- /dev/null +++ b/dump/src/reader/v5/mod.rs @@ -0,0 +1,350 @@ +//! Here is what a dump v5 look like. +//! +//! ```text +//! . +//! ├── indexes +//! │   ├── 22c269d8-fbbd-4416-bd46-7c7c02849325 +//! │   │   ├── documents.jsonl +//! │   │   └── meta.json +//! │   ├── 6d0471ba-2ed1-41de-8ea6-10db10fa2bb8 +//! │   │   ├── documents.jsonl +//! │   │   └── meta.json +//! │   └── f7d53ec4-0748-48e6-b66f-1fca9944b0fa +//! │   ├── documents.jsonl +//! │   └── meta.json +//! ├── index_uuids +//! │   └── data.jsonl +//! ├── instance-uid +//! ├── keys +//! ├── metadata.json +//! └── updates +//! ├── data.jsonl +//! └── updates_files +//! └── c83a004a-da98-4b94-b245-3256266c7281 +//! ``` +//! +//! Here is what `index_uuids/data.jsonl` looks like; +//! +//! ```json +//! {"uid":"dnd_spells","index_meta":{"uuid":"22c269d8-fbbd-4416-bd46-7c7c02849325","creation_task_id":9}} +//! {"uid":"movies","index_meta":{"uuid":"6d0471ba-2ed1-41de-8ea6-10db10fa2bb8","creation_task_id":1}} +//! {"uid":"products","index_meta":{"uuid":"f7d53ec4-0748-48e6-b66f-1fca9944b0fa","creation_task_id":4}} +//! ``` +//! + +use std::fs::{self, File}; +use std::io::{BufRead, BufReader, ErrorKind, Seek, SeekFrom}; +use std::path::Path; + +use serde::{Deserialize, Serialize}; +use tempfile::TempDir; +use time::OffsetDateTime; +use uuid::Uuid; + +use super::compat::v5_to_v6::CompatV5ToV6; +use super::Document; +use crate::{Error, IndexMetadata, Result, Version}; + +pub mod errors; +pub mod keys; +pub mod meta; +pub mod settings; +pub mod tasks; + +pub type Settings = settings::Settings; +pub type Checked = settings::Checked; +pub type Unchecked = settings::Unchecked; + +pub type Task = tasks::Task; +pub type Key = keys::Key; + +// ===== Other types to clarify the code of the compat module +// everything related to the tasks +pub type Status = tasks::TaskStatus; +pub type Details = tasks::TaskDetails; + +// everything related to the settings +pub type Setting = settings::Setting; +pub type TypoTolerance = settings::TypoSettings; +pub type MinWordSizeForTypos = settings::MinWordSizeTyposSetting; + +// everything related to the api keys +pub type Action = keys::Action; +pub type StarOr = meta::StarOr; + +// everything related to the errors +pub type ResponseError = errors::ResponseError; +pub type Code = errors::Code; + +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +pub struct Metadata { + db_version: String, + index_db_size: usize, + update_db_size: usize, + #[serde(with = "time::serde::rfc3339")] + dump_date: OffsetDateTime, +} + +pub struct V5Reader { + dump: TempDir, + metadata: Metadata, + tasks: BufReader, + keys: BufReader, + index_uuid: Vec, +} + +impl V5Reader { + pub fn open(dump: TempDir) -> Result { + let meta_file = fs::read(dump.path().join("metadata.json"))?; + let metadata = serde_json::from_reader(&*meta_file)?; + let index_uuid = File::open(dump.path().join("index_uuids/data.jsonl"))?; + let index_uuid = BufReader::new(index_uuid); + let index_uuid = index_uuid + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }) + .collect::>>()?; + + Ok(V5Reader { + metadata, + tasks: BufReader::new( + File::open(dump.path().join("updates").join("data.jsonl")).unwrap(), + ), + keys: BufReader::new(File::open(dump.path().join("keys"))?), + index_uuid, + dump, + }) + } + + pub fn to_v6(self) -> CompatV5ToV6 { + CompatV5ToV6::new_v5(self) + } + + pub fn version(&self) -> Version { + Version::V5 + } + + pub fn date(&self) -> Option { + Some(self.metadata.dump_date) + } + + pub fn instance_uid(&self) -> Result> { + match fs::read_to_string(self.dump.path().join("instance-uid")) { + Ok(uuid) => Ok(Some(Uuid::parse_str(&uuid)?)), + Err(e) if e.kind() == ErrorKind::NotFound => Ok(None), + Err(e) => Err(e.into()), + } + } + + pub fn indexes(&self) -> Result> + '_> { + Ok(self.index_uuid.iter().map(|index| -> Result<_> { + V5IndexReader::new( + index.uid.clone(), + &self.dump.path().join("indexes").join(index.index_meta.uuid.to_string()), + ) + })) + } + + pub fn tasks( + &mut self, + ) -> Box>)>> + '_> { + Box::new((&mut self.tasks).lines().map(|line| -> Result<_> { + let task: Task = serde_json::from_str(&line?)?; + if !task.is_finished() { + if let Some(uuid) = task.get_content_uuid() { + let update_file_path = self + .dump + .path() + .join("updates") + .join("updates_files") + .join(uuid.to_string()); + Ok(( + task, + Some( + Box::new(UpdateFile::new(&update_file_path)?) as Box + ), + )) + } else { + Ok((task, None)) + } + } else { + Ok((task, None)) + } + })) + } + + pub fn keys(&mut self) -> Result> + '_>> { + self.keys.seek(SeekFrom::Start(0))?; + Ok(Box::new( + (&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }), + )) + } +} + +pub struct V5IndexReader { + metadata: IndexMetadata, + settings: Settings, + + documents: BufReader, +} + +impl V5IndexReader { + pub fn new(name: String, path: &Path) -> Result { + let meta = File::open(path.join("meta.json"))?; + let meta: meta::DumpMeta = serde_json::from_reader(meta)?; + + let metadata = IndexMetadata { + uid: name, + primary_key: meta.primary_key, + // FIXME: Iterate over the whole task queue to find the creation and last update date. + created_at: OffsetDateTime::now_utc(), + updated_at: OffsetDateTime::now_utc(), + }; + + let ret = V5IndexReader { + metadata, + settings: meta.settings.check(), + documents: BufReader::new(File::open(path.join("documents.jsonl"))?), + }; + + Ok(ret) + } + + pub fn metadata(&self) -> &IndexMetadata { + &self.metadata + } + + pub fn documents(&mut self) -> Result> + '_> { + Ok((&mut self.documents) + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) + } + + pub fn settings(&mut self) -> Result> { + Ok(self.settings.clone()) + } +} + +pub struct UpdateFile { + reader: BufReader, +} + +impl UpdateFile { + fn new(path: &Path) -> Result { + Ok(UpdateFile { reader: BufReader::new(File::open(path)?) }) + } +} + +impl Iterator for UpdateFile { + type Item = Result; + + fn next(&mut self) -> Option { + (&mut self.reader) + .lines() + .map(|line| { + line.map_err(Error::from) + .and_then(|line| serde_json::from_str(&line).map_err(Error::from)) + }) + .next() + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + use std::io::BufReader; + + use flate2::bufread::GzDecoder; + use meili_snap::insta; + use tempfile::TempDir; + + use super::*; + + #[test] + #[ignore] + fn read_dump_v5() { + let dump = File::open("tests/assets/v5.dump").unwrap(); + let dir = TempDir::new().unwrap(); + let mut dump = BufReader::new(dump); + let gz = GzDecoder::new(&mut dump); + let mut archive = tar::Archive::new(gz); + archive.unpack(dir.path()).unwrap(); + + let mut dump = V5Reader::open(dir).unwrap(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00"); + insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); + + // tasks + let tasks = dump.tasks().collect::>>().unwrap(); + let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"e159863f0442b2e987ce37fbd57af76b"); + assert_eq!(update_files.len(), 22); + assert!(update_files[0].is_none()); // the dump creation + assert!(update_files[1].is_some()); // the enqueued document addition + assert!(update_files[2..].iter().all(|u| u.is_none())); // everything already processed + + let update_file = update_files.remove(1).unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(update_file), @"7b8889539b669c7b9ddba448bafa385d"); + + // keys + let keys = dump.keys().unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(keys), @"091ddad754f3cc7cf1d03a477855e819"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"b392b928dab63468318b2bdaad844c5a"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"2f881248b7c3623e2ba2885dbf0b2c18"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 200); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"e962baafd2fbae4cdd14e876053b0c5a"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"ade154e63ab713de67919892917d3d9d"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } +} diff --git a/dump/src/reader/v5/settings.rs b/dump/src/reader/v5/settings.rs new file mode 100644 index 000000000..9a542149f --- /dev/null +++ b/dump/src/reader/v5/settings.rs @@ -0,0 +1,239 @@ +use std::collections::{BTreeMap, BTreeSet}; +use std::marker::PhantomData; + +use serde::{Deserialize, Deserializer, Serialize}; + +#[derive(Clone, Default, Debug, Serialize, PartialEq, Eq)] +pub struct Checked; + +#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct Unchecked; + +/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings +/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a +/// call to `check` will return a `Settings` from a `Settings`. +#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))] +pub struct Settings { + #[serde(default)] + pub displayed_attributes: Setting>, + + #[serde(default)] + pub searchable_attributes: Setting>, + + #[serde(default)] + pub filterable_attributes: Setting>, + #[serde(default)] + pub sortable_attributes: Setting>, + #[serde(default)] + pub ranking_rules: Setting>, + #[serde(default)] + pub stop_words: Setting>, + #[serde(default)] + pub synonyms: Setting>>, + #[serde(default)] + pub distinct_attribute: Setting, + #[serde(default)] + pub typo_tolerance: Setting, + #[serde(default)] + pub faceting: Setting, + #[serde(default)] + pub pagination: Setting, + + #[serde(skip)] + pub _kind: PhantomData, +} + +#[derive(Debug, Clone, PartialEq, Eq, Copy)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum Setting { + Set(T), + Reset, + NotSet, +} + +impl Default for Setting { + fn default() -> Self { + Self::NotSet + } +} + +impl Setting { + pub fn set(self) -> Option { + match self { + Self::Set(value) => Some(value), + _ => None, + } + } + + pub const fn as_ref(&self) -> Setting<&T> { + match *self { + Self::Set(ref value) => Setting::Set(value), + Self::Reset => Setting::Reset, + Self::NotSet => Setting::NotSet, + } + } + + pub const fn is_not_set(&self) -> bool { + matches!(self, Self::NotSet) + } + + /// If `Self` is `Reset`, then map self to `Set` with the provided `val`. + pub fn or_reset(self, val: T) -> Self { + match self { + Self::Reset => Self::Set(val), + otherwise => otherwise, + } + } +} + +impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting { + fn deserialize(deserializer: D) -> std::result::Result + where + D: Deserializer<'de>, + { + Deserialize::deserialize(deserializer).map(|x| match x { + Some(x) => Self::Set(x), + None => Self::Reset, // Reset is forced by sending null value + }) + } +} + +#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +pub struct MinWordSizeTyposSetting { + #[serde(default)] + pub one_typo: Setting, + #[serde(default)] + pub two_typos: Setting, +} + +#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +pub struct TypoSettings { + #[serde(default)] + pub enabled: Setting, + #[serde(default)] + pub min_word_size_for_typos: Setting, + #[serde(default)] + pub disable_on_words: Setting>, + #[serde(default)] + pub disable_on_attributes: Setting>, +} + +#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +pub struct FacetingSettings { + #[serde(default)] + pub max_values_per_facet: Setting, +} + +#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +pub struct PaginationSettings { + #[serde(default)] + pub max_total_hits: Setting, +} + +impl Settings { + pub fn cleared() -> Settings { + Settings { + displayed_attributes: Setting::Reset, + searchable_attributes: Setting::Reset, + filterable_attributes: Setting::Reset, + sortable_attributes: Setting::Reset, + ranking_rules: Setting::Reset, + stop_words: Setting::Reset, + synonyms: Setting::Reset, + distinct_attribute: Setting::Reset, + typo_tolerance: Setting::Reset, + faceting: Setting::Reset, + pagination: Setting::Reset, + _kind: PhantomData, + } + } + + pub fn into_unchecked(self) -> Settings { + let Self { + displayed_attributes, + searchable_attributes, + filterable_attributes, + sortable_attributes, + ranking_rules, + stop_words, + synonyms, + distinct_attribute, + typo_tolerance, + faceting, + pagination, + .. + } = self; + + Settings { + displayed_attributes, + searchable_attributes, + filterable_attributes, + sortable_attributes, + ranking_rules, + stop_words, + synonyms, + distinct_attribute, + typo_tolerance, + faceting, + pagination, + _kind: PhantomData, + } + } +} + +impl Settings { + pub fn check(self) -> Settings { + let displayed_attributes = match self.displayed_attributes { + Setting::Set(fields) => { + if fields.iter().any(|f| f == "*") { + Setting::Reset + } else { + Setting::Set(fields) + } + } + otherwise => otherwise, + }; + + let searchable_attributes = match self.searchable_attributes { + Setting::Set(fields) => { + if fields.iter().any(|f| f == "*") { + Setting::Reset + } else { + Setting::Set(fields) + } + } + otherwise => otherwise, + }; + + Settings { + displayed_attributes, + searchable_attributes, + filterable_attributes: self.filterable_attributes, + sortable_attributes: self.sortable_attributes, + ranking_rules: self.ranking_rules, + stop_words: self.stop_words, + synonyms: self.synonyms, + distinct_attribute: self.distinct_attribute, + typo_tolerance: self.typo_tolerance, + faceting: self.faceting, + pagination: self.pagination, + _kind: PhantomData, + } + } +} diff --git a/dump/src/reader/v5/tasks.rs b/dump/src/reader/v5/tasks.rs new file mode 100644 index 000000000..125e20559 --- /dev/null +++ b/dump/src/reader/v5/tasks.rs @@ -0,0 +1,413 @@ +use serde::Deserialize; +use time::{Duration, OffsetDateTime}; +use uuid::Uuid; + +use super::errors::ResponseError; +use super::meta::IndexUid; +use super::settings::{Settings, Unchecked}; + +pub type TaskId = u32; +pub type BatchId = u32; + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct Task { + pub id: TaskId, + /// The name of the index the task is targeting. If it isn't targeting any index (i.e Dump task) + /// then this is None + // TODO: when next forward breaking dumps, it would be a good idea to move this field inside of + // the TaskContent. + pub content: TaskContent, + pub events: Vec, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[allow(clippy::large_enum_variant)] +pub enum TaskContent { + DocumentAddition { + index_uid: IndexUid, + content_uuid: Uuid, + merge_strategy: IndexDocumentsMethod, + primary_key: Option, + documents_count: usize, + allow_index_creation: bool, + }, + DocumentDeletion { + index_uid: IndexUid, + deletion: DocumentDeletion, + }, + SettingsUpdate { + index_uid: IndexUid, + settings: Settings, + /// Indicates whether the task was a deletion + is_deletion: bool, + allow_index_creation: bool, + }, + IndexDeletion { + index_uid: IndexUid, + }, + IndexCreation { + index_uid: IndexUid, + primary_key: Option, + }, + IndexUpdate { + index_uid: IndexUid, + primary_key: Option, + }, + Dump { + uid: String, + }, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum IndexDocumentsMethod { + /// Replace the previous document with the new one, + /// removing all the already known attributes. + ReplaceDocuments, + + /// Merge the previous version of the document with the new version, + /// replacing old attributes values with the new ones and add the new attributes. + UpdateDocuments, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum DocumentDeletion { + Clear, + Ids(Vec), +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum TaskEvent { + Created(#[serde(with = "time::serde::rfc3339")] OffsetDateTime), + Batched { + #[serde(with = "time::serde::rfc3339")] + timestamp: OffsetDateTime, + batch_id: BatchId, + }, + Processing(#[serde(with = "time::serde::rfc3339")] OffsetDateTime), + Succeeded { + result: TaskResult, + #[serde(with = "time::serde::rfc3339")] + timestamp: OffsetDateTime, + }, + Failed { + error: ResponseError, + #[serde(with = "time::serde::rfc3339")] + timestamp: OffsetDateTime, + }, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum TaskResult { + DocumentAddition { indexed_documents: u64 }, + DocumentDeletion { deleted_documents: u64 }, + ClearAll { deleted_documents: u64 }, + Other, +} + +impl Task { + /// Return true when a task is finished. + /// A task is finished when its last state is either `Succeeded` or `Failed`. + pub fn is_finished(&self) -> bool { + self.events.last().map_or(false, |event| { + matches!(event, TaskEvent::Succeeded { .. } | TaskEvent::Failed { .. }) + }) + } + + /// Return the content_uuid of the `Task` if there is one. + pub fn get_content_uuid(&self) -> Option { + match self { + Task { content: TaskContent::DocumentAddition { content_uuid, .. }, .. } => { + Some(*content_uuid) + } + _ => None, + } + } + + pub fn index_uid(&self) -> Option<&str> { + match &self.content { + TaskContent::DocumentAddition { index_uid, .. } + | TaskContent::DocumentDeletion { index_uid, .. } + | TaskContent::SettingsUpdate { index_uid, .. } + | TaskContent::IndexDeletion { index_uid } + | TaskContent::IndexCreation { index_uid, .. } + | TaskContent::IndexUpdate { index_uid, .. } => Some(index_uid.as_str()), + TaskContent::Dump { .. } => None, + } + } +} + +impl IndexUid { + pub fn into_inner(self) -> String { + self.0 + } + + /// Return a reference over the inner str. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl std::ops::Deref for IndexUid { + type Target = str; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +#[derive(Debug)] +#[cfg_attr(test, derive(serde::Serialize))] +#[cfg_attr(test, serde(rename_all = "camelCase"))] +pub struct TaskView { + pub uid: TaskId, + pub index_uid: Option, + pub status: TaskStatus, + #[cfg_attr(test, serde(rename = "type"))] + pub task_type: TaskType, + #[cfg_attr(test, serde(skip_serializing_if = "Option::is_none"))] + pub details: Option, + #[cfg_attr(test, serde(skip_serializing_if = "Option::is_none"))] + pub error: Option, + #[cfg_attr(test, serde(serialize_with = "serialize_duration"))] + pub duration: Option, + #[cfg_attr(test, serde(serialize_with = "time::serde::rfc3339::serialize"))] + pub enqueued_at: OffsetDateTime, + #[cfg_attr(test, serde(serialize_with = "time::serde::rfc3339::option::serialize"))] + pub started_at: Option, + #[cfg_attr(test, serde(serialize_with = "time::serde::rfc3339::option::serialize"))] + pub finished_at: Option, +} + +impl From for TaskView { + fn from(task: Task) -> Self { + let index_uid = task.index_uid().map(String::from); + let Task { id, content, events } = task; + + let (task_type, mut details) = match content { + TaskContent::DocumentAddition { documents_count, .. } => { + let details = TaskDetails::DocumentAddition { + received_documents: documents_count, + indexed_documents: None, + }; + + (TaskType::DocumentAdditionOrUpdate, Some(details)) + } + TaskContent::DocumentDeletion { deletion: DocumentDeletion::Ids(ids), .. } => ( + TaskType::DocumentDeletion, + Some(TaskDetails::DocumentDeletion { + received_document_ids: ids.len(), + deleted_documents: None, + }), + ), + TaskContent::DocumentDeletion { deletion: DocumentDeletion::Clear, .. } => ( + TaskType::DocumentDeletion, + Some(TaskDetails::ClearAll { deleted_documents: None }), + ), + TaskContent::IndexDeletion { .. } => { + (TaskType::IndexDeletion, Some(TaskDetails::ClearAll { deleted_documents: None })) + } + TaskContent::SettingsUpdate { settings, .. } => { + (TaskType::SettingsUpdate, Some(TaskDetails::Settings { settings })) + } + TaskContent::IndexCreation { primary_key, .. } => { + (TaskType::IndexCreation, Some(TaskDetails::IndexInfo { primary_key })) + } + TaskContent::IndexUpdate { primary_key, .. } => { + (TaskType::IndexUpdate, Some(TaskDetails::IndexInfo { primary_key })) + } + TaskContent::Dump { uid } => { + (TaskType::DumpCreation, Some(TaskDetails::Dump { dump_uid: uid })) + } + }; + + // An event always has at least one event: "Created" + let (status, error, finished_at) = match events.last().unwrap() { + TaskEvent::Created(_) => (TaskStatus::Enqueued, None, None), + TaskEvent::Batched { .. } => (TaskStatus::Enqueued, None, None), + TaskEvent::Processing(_) => (TaskStatus::Processing, None, None), + TaskEvent::Succeeded { timestamp, result } => { + match (result, &mut details) { + ( + TaskResult::DocumentAddition { indexed_documents: num, .. }, + Some(TaskDetails::DocumentAddition { ref mut indexed_documents, .. }), + ) => { + indexed_documents.replace(*num); + } + ( + TaskResult::DocumentDeletion { deleted_documents: docs, .. }, + Some(TaskDetails::DocumentDeletion { ref mut deleted_documents, .. }), + ) => { + deleted_documents.replace(*docs); + } + ( + TaskResult::ClearAll { deleted_documents: docs }, + Some(TaskDetails::ClearAll { ref mut deleted_documents }), + ) => { + deleted_documents.replace(*docs); + } + _ => (), + } + (TaskStatus::Succeeded, None, Some(*timestamp)) + } + TaskEvent::Failed { timestamp, error } => { + match details { + Some(TaskDetails::DocumentDeletion { ref mut deleted_documents, .. }) => { + deleted_documents.replace(0); + } + Some(TaskDetails::ClearAll { ref mut deleted_documents, .. }) => { + deleted_documents.replace(0); + } + Some(TaskDetails::DocumentAddition { ref mut indexed_documents, .. }) => { + indexed_documents.replace(0); + } + _ => (), + } + (TaskStatus::Failed, Some(error.clone()), Some(*timestamp)) + } + }; + + let enqueued_at = match events.first() { + Some(TaskEvent::Created(ts)) => *ts, + _ => unreachable!("A task must always have a creation event."), + }; + + let started_at = events.iter().find_map(|e| match e { + TaskEvent::Processing(ts) => Some(*ts), + _ => None, + }); + + let duration = finished_at.zip(started_at).map(|(tf, ts)| (tf - ts)); + + Self { + uid: id, + index_uid, + status, + task_type, + details, + error, + duration, + enqueued_at, + started_at, + finished_at, + } + } +} + +#[derive(Debug, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub enum TaskType { + IndexCreation, + IndexUpdate, + IndexDeletion, + DocumentAdditionOrUpdate, + DocumentDeletion, + SettingsUpdate, + DumpCreation, +} + +impl From for TaskType { + fn from(other: TaskContent) -> Self { + match other { + TaskContent::IndexCreation { .. } => TaskType::IndexCreation, + TaskContent::IndexUpdate { .. } => TaskType::IndexUpdate, + TaskContent::IndexDeletion { .. } => TaskType::IndexDeletion, + TaskContent::DocumentAddition { .. } => TaskType::DocumentAdditionOrUpdate, + TaskContent::DocumentDeletion { .. } => TaskType::DocumentDeletion, + TaskContent::SettingsUpdate { .. } => TaskType::SettingsUpdate, + TaskContent::Dump { .. } => TaskType::DumpCreation, + } + } +} + +#[derive(Debug, PartialEq, Eq, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub enum TaskStatus { + Enqueued, + Processing, + Succeeded, + Failed, +} + +#[derive(Debug)] +#[cfg_attr(test, derive(serde::Serialize))] +#[cfg_attr(test, serde(untagged))] +#[allow(clippy::large_enum_variant)] +pub enum TaskDetails { + #[cfg_attr(test, serde(rename_all = "camelCase"))] + DocumentAddition { received_documents: usize, indexed_documents: Option }, + #[cfg_attr(test, serde(rename_all = "camelCase"))] + Settings { + #[cfg_attr(test, serde(flatten))] + settings: Settings, + }, + #[cfg_attr(test, serde(rename_all = "camelCase"))] + IndexInfo { primary_key: Option }, + #[cfg_attr(test, serde(rename_all = "camelCase"))] + DocumentDeletion { received_document_ids: usize, deleted_documents: Option }, + #[cfg_attr(test, serde(rename_all = "camelCase"))] + ClearAll { deleted_documents: Option }, + #[cfg_attr(test, serde(rename_all = "camelCase"))] + Dump { dump_uid: String }, +} + +/// Serialize a `time::Duration` as a best effort ISO 8601 while waiting for +/// https://github.com/time-rs/time/issues/378. +/// This code is a port of the old code of time that was removed in 0.2. +#[cfg(test)] +fn serialize_duration( + duration: &Option, + serializer: S, +) -> Result { + use std::fmt::Write; + + match duration { + Some(duration) => { + // technically speaking, negative duration is not valid ISO 8601 + if duration.is_negative() { + return serializer.serialize_none(); + } + + const SECS_PER_DAY: i64 = Duration::DAY.whole_seconds(); + let secs = duration.whole_seconds(); + let days = secs / SECS_PER_DAY; + let secs = secs - days * SECS_PER_DAY; + let hasdate = days != 0; + let nanos = duration.subsec_nanoseconds(); + let hastime = (secs != 0 || nanos != 0) || !hasdate; + + // all the following unwrap can't fail + let mut res = String::new(); + write!(&mut res, "P").unwrap(); + + if hasdate { + write!(&mut res, "{}D", days).unwrap(); + } + + const NANOS_PER_MILLI: i32 = Duration::MILLISECOND.subsec_nanoseconds(); + const NANOS_PER_MICRO: i32 = Duration::MICROSECOND.subsec_nanoseconds(); + + if hastime { + if nanos == 0 { + write!(&mut res, "T{}S", secs).unwrap(); + } else if nanos % NANOS_PER_MILLI == 0 { + write!(&mut res, "T{}.{:03}S", secs, nanos / NANOS_PER_MILLI).unwrap(); + } else if nanos % NANOS_PER_MICRO == 0 { + write!(&mut res, "T{}.{:06}S", secs, nanos / NANOS_PER_MICRO).unwrap(); + } else { + write!(&mut res, "T{}.{:09}S", secs, nanos).unwrap(); + } + } + + serializer.serialize_str(&res) + } + None => serializer.serialize_none(), + } +} diff --git a/dump/src/reader/v6/mod.rs b/dump/src/reader/v6/mod.rs new file mode 100644 index 000000000..4b08c6f8d --- /dev/null +++ b/dump/src/reader/v6/mod.rs @@ -0,0 +1,189 @@ +use std::fs::{self, File}; +use std::io::{BufRead, BufReader}; +use std::path::Path; +use std::str::FromStr; + +pub use meilisearch_types::milli; +use tempfile::TempDir; +use time::OffsetDateTime; +use uuid::Uuid; + +use super::Document; +use crate::{Error, IndexMetadata, Result, Version}; + +pub type Metadata = crate::Metadata; + +pub type Settings = meilisearch_types::settings::Settings; +pub type Checked = meilisearch_types::settings::Checked; +pub type Unchecked = meilisearch_types::settings::Unchecked; + +pub type Task = crate::TaskDump; +pub type Key = meilisearch_types::keys::Key; + +// ===== Other types to clarify the code of the compat module +// everything related to the tasks +pub type Status = meilisearch_types::tasks::Status; +pub type Kind = crate::KindDump; +pub type Details = meilisearch_types::tasks::Details; + +// everything related to the settings +pub type Setting = meilisearch_types::milli::update::Setting; +pub type TypoTolerance = meilisearch_types::settings::TypoSettings; +pub type MinWordSizeForTypos = meilisearch_types::settings::MinWordSizeTyposSetting; +pub type FacetingSettings = meilisearch_types::settings::FacetingSettings; +pub type PaginationSettings = meilisearch_types::settings::PaginationSettings; + +// everything related to the api keys +pub type Action = meilisearch_types::keys::Action; +pub type StarOr = meilisearch_types::star_or::StarOr; +pub type IndexUid = meilisearch_types::index_uid::IndexUid; + +// everything related to the errors +pub type ResponseError = meilisearch_types::error::ResponseError; +pub type Code = meilisearch_types::error::Code; + +pub struct V6Reader { + dump: TempDir, + instance_uid: Uuid, + metadata: Metadata, + tasks: BufReader, + keys: BufReader, +} + +impl V6Reader { + pub fn open(dump: TempDir) -> Result { + let meta_file = fs::read(dump.path().join("metadata.json"))?; + let instance_uid = fs::read_to_string(dump.path().join("instance_uid.uuid"))?; + let instance_uid = Uuid::from_str(&instance_uid)?; + + Ok(V6Reader { + metadata: serde_json::from_reader(&*meta_file)?, + instance_uid, + tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?), + keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?), + dump, + }) + } + + pub fn version(&self) -> Version { + Version::V6 + } + + pub fn date(&self) -> Option { + Some(self.metadata.dump_date) + } + + pub fn instance_uid(&self) -> Result> { + Ok(Some(self.instance_uid)) + } + + pub fn indexes(&self) -> Result> + '_>> { + let entries = fs::read_dir(self.dump.path().join("indexes"))?; + Ok(Box::new( + entries + .map(|entry| -> Result> { + let entry = entry?; + if entry.file_type()?.is_dir() { + let index = V6IndexReader::new( + entry.file_name().to_str().ok_or(Error::BadIndexName)?.to_string(), + &entry.path(), + )?; + Ok(Some(index)) + } else { + Ok(None) + } + }) + .filter_map(|entry| entry.transpose()), + )) + } + + pub fn tasks( + &mut self, + ) -> Box>)>> + '_> { + Box::new((&mut self.tasks).lines().map(|line| -> Result<_> { + let task: Task = serde_json::from_str(&line?).unwrap(); + + let update_file_path = self + .dump + .path() + .join("tasks") + .join("update_files") + .join(format!("{}.jsonl", task.uid)); + + if update_file_path.exists() { + Ok(( + task, + Some(Box::new(UpdateFile::new(&update_file_path).unwrap()) + as Box), + )) + } else { + Ok((task, None)) + } + })) + } + + pub fn keys(&mut self) -> Box> + '_> { + Box::new( + (&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }), + ) + } +} + +pub struct UpdateFile { + reader: BufReader, +} + +impl UpdateFile { + fn new(path: &Path) -> Result { + Ok(UpdateFile { reader: BufReader::new(File::open(path)?) }) + } +} + +impl Iterator for UpdateFile { + type Item = Result; + + fn next(&mut self) -> Option { + (&mut self.reader) + .lines() + .map(|line| { + line.map_err(Error::from) + .and_then(|line| serde_json::from_str(&line).map_err(Error::from)) + }) + .next() + } +} + +pub struct V6IndexReader { + metadata: IndexMetadata, + documents: BufReader, + settings: BufReader, +} + +impl V6IndexReader { + pub fn new(_name: String, path: &Path) -> Result { + let metadata = File::open(path.join("metadata.json"))?; + + let ret = V6IndexReader { + metadata: serde_json::from_reader(metadata)?, + documents: BufReader::new(File::open(path.join("documents.jsonl"))?), + settings: BufReader::new(File::open(path.join("settings.json"))?), + }; + + Ok(ret) + } + + pub fn metadata(&self) -> &IndexMetadata { + &self.metadata + } + + pub fn documents(&mut self) -> Result> + '_> { + Ok((&mut self.documents) + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) + } + + pub fn settings(&mut self) -> Result> { + let settings: Settings = serde_json::from_reader(&mut self.settings)?; + Ok(settings.check()) + } +} diff --git a/dump/src/writer.rs b/dump/src/writer.rs new file mode 100644 index 000000000..29aa2508d --- /dev/null +++ b/dump/src/writer.rs @@ -0,0 +1,350 @@ +use std::fs::{self, File}; +use std::io::{BufWriter, Write}; +use std::path::PathBuf; + +use flate2::write::GzEncoder; +use flate2::Compression; +use meilisearch_types::keys::Key; +use meilisearch_types::settings::{Checked, Settings}; +use serde_json::{Map, Value}; +use tempfile::TempDir; +use time::OffsetDateTime; +use uuid::Uuid; + +use crate::reader::Document; +use crate::{IndexMetadata, Metadata, Result, TaskDump, CURRENT_DUMP_VERSION}; + +pub struct DumpWriter { + dir: TempDir, +} + +impl DumpWriter { + pub fn new(instance_uuid: Option) -> Result { + let dir = TempDir::new()?; + + if let Some(instance_uuid) = instance_uuid { + fs::write( + dir.path().join("instance_uid.uuid"), + &instance_uuid.as_hyphenated().to_string(), + )?; + } + + let metadata = Metadata { + dump_version: CURRENT_DUMP_VERSION, + db_version: env!("CARGO_PKG_VERSION").to_string(), + dump_date: OffsetDateTime::now_utc(), + }; + fs::write(dir.path().join("metadata.json"), serde_json::to_string(&metadata)?)?; + + std::fs::create_dir(&dir.path().join("indexes"))?; + + Ok(DumpWriter { dir }) + } + + pub fn create_index(&self, index_name: &str, metadata: &IndexMetadata) -> Result { + IndexWriter::new(self.dir.path().join("indexes").join(index_name), metadata) + } + + pub fn create_keys(&self) -> Result { + KeyWriter::new(self.dir.path().to_path_buf()) + } + + pub fn create_tasks_queue(&self) -> Result { + TaskWriter::new(self.dir.path().join("tasks")) + } + + pub fn persist_to(self, mut writer: impl Write) -> Result<()> { + let gz_encoder = GzEncoder::new(&mut writer, Compression::default()); + let mut tar_encoder = tar::Builder::new(gz_encoder); + tar_encoder.append_dir_all(".", self.dir.path())?; + let gz_encoder = tar_encoder.into_inner()?; + gz_encoder.finish()?; + writer.flush()?; + + Ok(()) + } +} + +pub struct KeyWriter { + keys: BufWriter, +} + +impl KeyWriter { + pub(crate) fn new(path: PathBuf) -> Result { + let keys = File::create(path.join("keys.jsonl"))?; + Ok(KeyWriter { keys: BufWriter::new(keys) }) + } + + pub fn push_key(&mut self, key: &Key) -> Result<()> { + self.keys.write_all(&serde_json::to_vec(key)?)?; + self.keys.write_all(b"\n")?; + Ok(()) + } + + pub fn flush(mut self) -> Result<()> { + self.keys.flush()?; + Ok(()) + } +} + +pub struct TaskWriter { + queue: BufWriter, + update_files: PathBuf, +} + +impl TaskWriter { + pub(crate) fn new(path: PathBuf) -> Result { + std::fs::create_dir(&path)?; + + let queue = File::create(path.join("queue.jsonl"))?; + let update_files = path.join("update_files"); + std::fs::create_dir(&update_files)?; + + Ok(TaskWriter { queue: BufWriter::new(queue), update_files }) + } + + /// Pushes tasks in the dump. + /// If the tasks has an associated `update_file` it'll use the `task_id` as its name. + pub fn push_task(&mut self, task: &TaskDump) -> Result { + self.queue.write_all(&serde_json::to_vec(task)?)?; + self.queue.write_all(b"\n")?; + + Ok(UpdateFile::new(self.update_files.join(format!("{}.jsonl", task.uid)))) + } + + pub fn flush(mut self) -> Result<()> { + self.queue.flush()?; + Ok(()) + } +} + +pub struct UpdateFile { + path: PathBuf, + writer: Option>, +} + +impl UpdateFile { + pub(crate) fn new(path: PathBuf) -> UpdateFile { + UpdateFile { path, writer: None } + } + + pub fn push_document(&mut self, document: &Document) -> Result<()> { + if let Some(writer) = self.writer.as_mut() { + writer.write_all(&serde_json::to_vec(document)?)?; + writer.write_all(b"\n")?; + } else { + let file = File::create(&self.path).unwrap(); + self.writer = Some(BufWriter::new(file)); + self.push_document(document)?; + } + Ok(()) + } + + pub fn flush(self) -> Result<()> { + if let Some(mut writer) = self.writer { + writer.flush()?; + } + Ok(()) + } +} + +pub struct IndexWriter { + documents: BufWriter, + settings: File, +} + +impl IndexWriter { + pub(self) fn new(path: PathBuf, metadata: &IndexMetadata) -> Result { + std::fs::create_dir(&path)?; + + let metadata_file = File::create(path.join("metadata.json"))?; + serde_json::to_writer(metadata_file, metadata)?; + + let documents = File::create(path.join("documents.jsonl"))?; + let settings = File::create(path.join("settings.json"))?; + + Ok(IndexWriter { documents: BufWriter::new(documents), settings }) + } + + pub fn push_document(&mut self, document: &Map) -> Result<()> { + serde_json::to_writer(&mut self.documents, document)?; + self.documents.write_all(b"\n")?; + Ok(()) + } + + pub fn flush(&mut self) -> Result<()> { + self.documents.flush()?; + Ok(()) + } + + pub fn settings(mut self, settings: &Settings) -> Result<()> { + self.settings.write_all(&serde_json::to_vec(&settings)?)?; + Ok(()) + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fmt::Write; + use std::io::BufReader; + use std::path::Path; + use std::str::FromStr; + + use flate2::bufread::GzDecoder; + use meili_snap::insta; + use meilisearch_types::settings::Unchecked; + + use super::*; + use crate::reader::Document; + use crate::test::{ + create_test_api_keys, create_test_documents, create_test_dump, create_test_instance_uid, + create_test_settings, create_test_tasks, + }; + + fn create_directory_hierarchy(dir: &Path) -> String { + let mut ret = String::new(); + writeln!(ret, ".").unwrap(); + ret.push_str(&_create_directory_hierarchy(dir, 0)); + ret + } + + fn _create_directory_hierarchy(dir: &Path, depth: usize) -> String { + let mut ret = String::new(); + + // the entries are not guarenteed to be returned in the same order thus we need to sort them. + let mut entries = + fs::read_dir(dir).unwrap().collect::, _>>().unwrap(); + + // I want the directories first and then sort by name. + entries.sort_by(|a, b| { + let (aft, bft) = (a.file_type().unwrap(), b.file_type().unwrap()); + + if aft.is_dir() && bft.is_dir() { + a.file_name().cmp(&b.file_name()) + } else if aft.is_file() && bft.is_dir() { + std::cmp::Ordering::Greater + } else if bft.is_file() && aft.is_dir() { + std::cmp::Ordering::Less + } else { + a.file_name().cmp(&b.file_name()) + } + }); + + for (idx, entry) in entries.iter().enumerate() { + let mut ident = String::new(); + + for _ in 0..depth { + ident.push('│'); + ident.push_str(&" ".repeat(4)); + } + if idx == entries.len() - 1 { + ident.push('└'); + } else { + ident.push('├'); + } + ident.push_str(&"-".repeat(4)); + + let name = entry.file_name().into_string().unwrap(); + let file_type = entry.file_type().unwrap(); + let is_dir = if file_type.is_dir() { "/" } else { "" }; + + assert!(!file_type.is_symlink()); + writeln!(ret, "{ident} {name}{is_dir}").unwrap(); + + if file_type.is_dir() { + ret.push_str(&_create_directory_hierarchy(&entry.path(), depth + 1)); + } + } + ret + } + + #[test] + #[ignore] + fn test_creating_dump() { + let file = create_test_dump(); + let mut file = BufReader::new(file); + + // ============ ensuring we wrote everything in the correct place. + let dump = tempfile::tempdir().unwrap(); + + let gz = GzDecoder::new(&mut file); + let mut tar = tar::Archive::new(gz); + tar.unpack(dump.path()).unwrap(); + + let dump_path = dump.path(); + + // ==== checking global file hierarchy (we want to be sure there isn't too many files or too few) + insta::assert_display_snapshot!(create_directory_hierarchy(dump_path), @r###" + . + ├---- indexes/ + │ └---- doggos/ + │ │ ├---- documents.jsonl + │ │ ├---- metadata.json + │ │ └---- settings.json + ├---- tasks/ + │ ├---- update_files/ + │ │ └---- 1.jsonl + │ └---- queue.jsonl + ├---- instance_uid.uuid + ├---- keys.jsonl + └---- metadata.json + "###); + + // ==== checking the top level infos + let metadata = fs::read_to_string(dump_path.join("metadata.json")).unwrap(); + let metadata: Metadata = serde_json::from_str(&metadata).unwrap(); + insta::assert_json_snapshot!(metadata, { ".dumpDate" => "[date]" }, @r###" + { + "dumpVersion": "V6", + "dbVersion": "0.29.0", + "dumpDate": "[date]" + } + "###); + + let instance_uid = fs::read_to_string(dump_path.join("instance_uid.uuid")).unwrap(); + assert_eq!(Uuid::from_str(&instance_uid).unwrap(), create_test_instance_uid()); + + // ==== checking the index + let docs = fs::read_to_string(dump_path.join("indexes/doggos/documents.jsonl")).unwrap(); + for (document, expected) in docs.lines().zip(create_test_documents()) { + assert_eq!(serde_json::from_str::>(document).unwrap(), expected); + } + let test_settings = + fs::read_to_string(dump_path.join("indexes/doggos/settings.json")).unwrap(); + assert_eq!( + serde_json::from_str::>(&test_settings).unwrap(), + create_test_settings().into_unchecked() + ); + let metadata = fs::read_to_string(dump_path.join("indexes/doggos/metadata.json")).unwrap(); + let metadata: IndexMetadata = serde_json::from_str(&metadata).unwrap(); + insta::assert_json_snapshot!(metadata, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }, @r###" + { + "uid": "doggo", + "primaryKey": null, + "createdAt": "[date]", + "updatedAt": "[date]" + } + "###); + + // ==== checking the task queue + let tasks_queue = fs::read_to_string(dump_path.join("tasks/queue.jsonl")).unwrap(); + for (task, expected) in tasks_queue.lines().zip(create_test_tasks()) { + assert_eq!(serde_json::from_str::(task).unwrap(), expected.0); + + if let Some(expected_update) = expected.1 { + let path = dump_path.join(format!("tasks/update_files/{}.jsonl", expected.0.uid)); + println!("trying to open {}", path.display()); + let update = fs::read_to_string(path).unwrap(); + let documents: Vec = + update.lines().map(|line| serde_json::from_str(line).unwrap()).collect(); + assert_eq!(documents, expected_update); + } + } + + // ==== checking the keys + let keys = fs::read_to_string(dump_path.join("keys.jsonl")).unwrap(); + for (key, expected) in keys.lines().zip(create_test_api_keys()) { + assert_eq!(serde_json::from_str::(key).unwrap(), expected); + } + } +} diff --git a/dump/tests/assets/v2.dump b/dump/tests/assets/v2.dump new file mode 100644 index 000000000..eacea80a5 Binary files /dev/null and b/dump/tests/assets/v2.dump differ diff --git a/dump/tests/assets/v3.dump b/dump/tests/assets/v3.dump new file mode 100644 index 000000000..abf6fdf9f Binary files /dev/null and b/dump/tests/assets/v3.dump differ diff --git a/dump/tests/assets/v4.dump b/dump/tests/assets/v4.dump new file mode 100644 index 000000000..9dd276243 Binary files /dev/null and b/dump/tests/assets/v4.dump differ diff --git a/dump/tests/assets/v5.dump b/dump/tests/assets/v5.dump new file mode 100644 index 000000000..9b60049e4 Binary files /dev/null and b/dump/tests/assets/v5.dump differ diff --git a/file-store/Cargo.toml b/file-store/Cargo.toml new file mode 100644 index 000000000..3dafcbecc --- /dev/null +++ b/file-store/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "file-store" +version = "0.1.0" +edition = "2021" + +[dependencies] +tempfile = "3.3.0" +thiserror = "1.0.30" +uuid = { version = "1.1.2", features = ["serde", "v4"] } + +[dev-dependencies] +faux = "0.1.8" diff --git a/file-store/src/lib.rs b/file-store/src/lib.rs new file mode 100644 index 000000000..e05694c92 --- /dev/null +++ b/file-store/src/lib.rs @@ -0,0 +1,132 @@ +use std::collections::BTreeSet; +use std::fs::File as StdFile; +use std::ops::{Deref, DerefMut}; +use std::path::{Path, PathBuf}; +use std::str::FromStr; + +use tempfile::NamedTempFile; +use uuid::Uuid; + +const UPDATE_FILES_PATH: &str = "updates/updates_files"; + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error(transparent)] + IoError(#[from] std::io::Error), + #[error(transparent)] + PersistError(#[from] tempfile::PersistError), +} + +pub type Result = std::result::Result; + +impl Deref for File { + type Target = NamedTempFile; + + fn deref(&self) -> &Self::Target { + &self.file + } +} + +impl DerefMut for File { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.file + } +} + +#[cfg_attr(test, faux::create)] +#[derive(Clone, Debug)] +pub struct FileStore { + path: PathBuf, +} + +#[cfg(not(test))] +impl FileStore { + pub fn new(path: impl AsRef) -> Result { + let path = path.as_ref().to_path_buf(); + std::fs::create_dir_all(&path)?; + Ok(FileStore { path }) + } +} + +#[cfg_attr(test, faux::methods)] +impl FileStore { + /// Creates a new temporary update file. + /// A call to `persist` is needed to persist the file in the database. + pub fn new_update(&self) -> Result<(Uuid, File)> { + let file = NamedTempFile::new_in(&self.path)?; + let uuid = Uuid::new_v4(); + let path = self.path.join(uuid.to_string()); + let update_file = File { file, path }; + + Ok((uuid, update_file)) + } + + /// Creates a new temporary update file with the given Uuid. + /// A call to `persist` is needed to persist the file in the database. + pub fn new_update_with_uuid(&self, uuid: u128) -> Result<(Uuid, File)> { + let file = NamedTempFile::new_in(&self.path)?; + let uuid = Uuid::from_u128(uuid); + let path = self.path.join(uuid.to_string()); + let update_file = File { file, path }; + + Ok((uuid, update_file)) + } + + /// Returns the file corresponding to the requested uuid. + pub fn get_update(&self, uuid: Uuid) -> Result { + let path = self.get_update_path(uuid); + let file = StdFile::open(path)?; + Ok(file) + } + + /// Returns the path that correspond to this uuid, the path could not exists. + pub fn get_update_path(&self, uuid: Uuid) -> PathBuf { + self.path.join(uuid.to_string()) + } + + /// Copies the content of the update file pointed to by `uuid` to the `dst` directory. + pub fn snapshot(&self, uuid: Uuid, dst: impl AsRef) -> Result<()> { + let src = self.path.join(uuid.to_string()); + let mut dst = dst.as_ref().join(UPDATE_FILES_PATH); + std::fs::create_dir_all(&dst)?; + dst.push(uuid.to_string()); + std::fs::copy(src, dst)?; + Ok(()) + } + + pub fn get_size(&self, uuid: Uuid) -> Result { + Ok(self.get_update(uuid)?.metadata()?.len()) + } + + pub fn delete(&self, uuid: Uuid) -> Result<()> { + let path = self.path.join(uuid.to_string()); + std::fs::remove_file(path)?; + Ok(()) + } + + /// List the Uuids of the files in the FileStore + /// + /// This function is meant to be used by tests only. + #[doc(hidden)] + pub fn __all_uuids(&self) -> BTreeSet { + let mut uuids = BTreeSet::new(); + for entry in self.path.read_dir().unwrap() { + let entry = entry.unwrap(); + let uuid = Uuid::from_str(entry.file_name().to_str().unwrap()).unwrap(); + uuids.insert(uuid); + } + uuids + } +} + +pub struct File { + path: PathBuf, + file: NamedTempFile, +} + +impl File { + pub fn persist(self) -> Result<()> { + self.file.persist(&self.path)?; + Ok(()) + } +} diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml new file mode 100644 index 000000000..3b29d158f --- /dev/null +++ b/index-scheduler/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "index-scheduler" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = "1.0.64" +bincode = "1.3.3" +csv = "1.1.6" +derive_builder = "0.11.2" +dump = { path = "../dump" } +enum-iterator = "1.1.3" +file-store = { path = "../file-store" } +log = "0.4.14" +meilisearch-types = { path = "../meilisearch-types" } +roaring = { version = "0.10.0", features = ["serde"] } +serde = { version = "1.0.136", features = ["derive"] } +serde_json = { version = "1.0.85", features = ["preserve_order"] } +synchronoise = "1.0.1" +tempfile = "3.3.0" +thiserror = "1.0.30" +time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] } +uuid = { version = "1.1.2", features = ["serde", "v4"] } + +[dev-dependencies] +big_s = "1.0.2" +crossbeam = "0.8.2" +insta = { version = "1.19.1", features = ["json", "redactions"] } +meili-snap = { path = "../meili-snap" } +nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"} diff --git a/index-scheduler/src/autobatcher.rs b/index-scheduler/src/autobatcher.rs new file mode 100644 index 000000000..d1ed691c6 --- /dev/null +++ b/index-scheduler/src/autobatcher.rs @@ -0,0 +1,727 @@ +/*! +The autobatcher is responsible for combining the next enqueued +tasks affecting a single index into a [batch](crate::batch::Batch). + +The main function of the autobatcher is [`next_autobatch`]. +*/ + +use std::ops::ControlFlow::{self, Break, Continue}; + +use meilisearch_types::milli::update::IndexDocumentsMethod::{ + self, ReplaceDocuments, UpdateDocuments, +}; +use meilisearch_types::tasks::TaskId; + +use crate::KindWithContent; + +/// Succinctly describes a task's [`Kind`](meilisearch_types::tasks::Kind) +/// for the purpose of simplifying the implementation of the autobatcher. +/// +/// Only the non-prioritised tasks that can be grouped in a batch have a corresponding [`AutobatchKind`] +enum AutobatchKind { + DocumentImport { method: IndexDocumentsMethod, allow_index_creation: bool }, + DocumentDeletion, + DocumentClear, + Settings { allow_index_creation: bool }, + IndexCreation, + IndexDeletion, + IndexUpdate, + IndexSwap, +} + +impl AutobatchKind { + #[rustfmt::skip] + fn allow_index_creation(&self) -> Option { + match self { + AutobatchKind::DocumentImport { allow_index_creation, .. } + | AutobatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation), + _ => None, + } + } +} + +impl From for AutobatchKind { + fn from(kind: KindWithContent) -> Self { + match kind { + KindWithContent::DocumentAdditionOrUpdate { method, allow_index_creation, .. } => { + AutobatchKind::DocumentImport { method, allow_index_creation } + } + KindWithContent::DocumentDeletion { .. } => AutobatchKind::DocumentDeletion, + KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear, + KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => { + AutobatchKind::Settings { + allow_index_creation: allow_index_creation && !is_deletion, + } + } + KindWithContent::IndexDeletion { .. } => AutobatchKind::IndexDeletion, + KindWithContent::IndexCreation { .. } => AutobatchKind::IndexCreation, + KindWithContent::IndexUpdate { .. } => AutobatchKind::IndexUpdate, + KindWithContent::IndexSwap { .. } => AutobatchKind::IndexSwap, + KindWithContent::TaskCancelation { .. } + | KindWithContent::TaskDeletion { .. } + | KindWithContent::DumpCreation { .. } + | KindWithContent::SnapshotCreation => { + panic!("The autobatcher should never be called with tasks that don't apply to an index.") + } + } + } +} + +#[derive(Debug)] +pub enum BatchKind { + DocumentClear { + ids: Vec, + }, + DocumentImport { + method: IndexDocumentsMethod, + allow_index_creation: bool, + import_ids: Vec, + }, + DocumentDeletion { + deletion_ids: Vec, + }, + ClearAndSettings { + other: Vec, + allow_index_creation: bool, + settings_ids: Vec, + }, + SettingsAndDocumentImport { + settings_ids: Vec, + method: IndexDocumentsMethod, + allow_index_creation: bool, + import_ids: Vec, + }, + Settings { + allow_index_creation: bool, + settings_ids: Vec, + }, + IndexDeletion { + ids: Vec, + }, + IndexCreation { + id: TaskId, + }, + IndexUpdate { + id: TaskId, + }, + IndexSwap { + id: TaskId, + }, +} + +impl BatchKind { + #[rustfmt::skip] + fn allow_index_creation(&self) -> Option { + match self { + BatchKind::DocumentImport { allow_index_creation, .. } + | BatchKind::ClearAndSettings { allow_index_creation, .. } + | BatchKind::SettingsAndDocumentImport { allow_index_creation, .. } + | BatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation), + _ => None, + } + } +} + +impl BatchKind { + /// Returns a `ControlFlow::Break` if you must stop right now. + /// The boolean tell you if an index has been created by the batched task. + /// To ease the writting of the code. `true` can be returned when you don't need to create an index + /// but false can't be returned if you needs to create an index. + // TODO use an AutoBatchKind as input + pub fn new( + task_id: TaskId, + kind: KindWithContent, + ) -> (ControlFlow, bool) { + use AutobatchKind as K; + + match AutobatchKind::from(kind) { + K::IndexCreation => (Break(BatchKind::IndexCreation { id: task_id }), true), + K::IndexDeletion => (Break(BatchKind::IndexDeletion { ids: vec![task_id] }), false), + K::IndexUpdate => (Break(BatchKind::IndexUpdate { id: task_id }), false), + K::IndexSwap => (Break(BatchKind::IndexSwap { id: task_id }), false), + K::DocumentClear => (Continue(BatchKind::DocumentClear { ids: vec![task_id] }), false), + K::DocumentImport { method, allow_index_creation } => ( + Continue(BatchKind::DocumentImport { + method, + allow_index_creation, + import_ids: vec![task_id], + }), + allow_index_creation, + ), + K::DocumentDeletion => { + (Continue(BatchKind::DocumentDeletion { deletion_ids: vec![task_id] }), false) + } + K::Settings { allow_index_creation } => ( + Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }), + allow_index_creation, + ), + } + } + + /// Returns a `ControlFlow::Break` if you must stop right now. + /// The boolean tell you if an index has been created by the batched task. + /// To ease the writting of the code. `true` can be returned when you don't need to create an index + /// but false can't be returned if you needs to create an index. + #[rustfmt::skip] + fn accumulate(self, id: TaskId, kind: AutobatchKind, index_already_exists: bool) -> ControlFlow { + use AutobatchKind as K; + + match (self, kind) { + // We don't batch any of these operations + (this, K::IndexCreation | K::IndexUpdate | K::IndexSwap) => Break(this), + // We must not batch tasks that don't have the same index creation rights if the index doesn't already exists. + (this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => { + Break(this) + }, + // The index deletion can batch with everything but must stop after + ( + BatchKind::DocumentClear { mut ids } + | BatchKind::DocumentDeletion { deletion_ids: mut ids } + | BatchKind::DocumentImport { method: _, allow_index_creation: _, import_ids: mut ids } + | BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids }, + K::IndexDeletion, + ) => { + ids.push(id); + Break(BatchKind::IndexDeletion { ids }) + } + ( + BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other } + | BatchKind::SettingsAndDocumentImport { import_ids: mut ids, method: _, allow_index_creation: _, settings_ids: mut other }, + K::IndexDeletion, + ) => { + ids.push(id); + ids.append(&mut other); + Break(BatchKind::IndexDeletion { ids }) + } + + ( + BatchKind::DocumentClear { mut ids }, + K::DocumentClear | K::DocumentDeletion, + ) => { + ids.push(id); + Continue(BatchKind::DocumentClear { ids }) + } + ( + this @ BatchKind::DocumentClear { .. }, + K::DocumentImport { .. } | K::Settings { .. }, + ) => Break(this), + ( + BatchKind::DocumentImport { method: _, allow_index_creation: _, import_ids: mut ids }, + K::DocumentClear, + ) => { + ids.push(id); + Continue(BatchKind::DocumentClear { ids }) + } + + // we can autobatch the same kind of document additions / updates + ( + BatchKind::DocumentImport { method: ReplaceDocuments, allow_index_creation, mut import_ids }, + K::DocumentImport { method: ReplaceDocuments, .. }, + ) => { + import_ids.push(id); + Continue(BatchKind::DocumentImport { + method: ReplaceDocuments, + allow_index_creation, + import_ids, + }) + } + ( + BatchKind::DocumentImport { method: UpdateDocuments, allow_index_creation, mut import_ids }, + K::DocumentImport { method: UpdateDocuments, .. }, + ) => { + import_ids.push(id); + Continue(BatchKind::DocumentImport { + method: UpdateDocuments, + allow_index_creation, + import_ids, + }) + } + + // but we can't autobatch documents if it's not the same kind + // this match branch MUST be AFTER the previous one + ( + this @ BatchKind::DocumentImport { .. }, + K::DocumentDeletion | K::DocumentImport { .. }, + ) => Break(this), + + ( + BatchKind::DocumentImport { method, allow_index_creation, import_ids }, + K::Settings { .. }, + ) => Continue(BatchKind::SettingsAndDocumentImport { + settings_ids: vec![id], + method, + allow_index_creation, + import_ids, + }), + + (BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentClear) => { + deletion_ids.push(id); + Continue(BatchKind::DocumentClear { ids: deletion_ids }) + } + (this @ BatchKind::DocumentDeletion { .. }, K::DocumentImport { .. }) => Break(this), + (BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentDeletion) => { + deletion_ids.push(id); + Continue(BatchKind::DocumentDeletion { deletion_ids }) + } + (this @ BatchKind::DocumentDeletion { .. }, K::Settings { .. }) => Break(this), + + ( + BatchKind::Settings { settings_ids, allow_index_creation }, + K::DocumentClear, + ) => Continue(BatchKind::ClearAndSettings { + settings_ids, + allow_index_creation, + other: vec![id], + }), + ( + this @ BatchKind::Settings { .. }, + K::DocumentImport { .. } | K::DocumentDeletion, + ) => Break(this), + ( + BatchKind::Settings { mut settings_ids, allow_index_creation }, + K::Settings { .. }, + ) => { + settings_ids.push(id); + Continue(BatchKind::Settings { + allow_index_creation, + settings_ids, + }) + } + + ( + BatchKind::ClearAndSettings { mut other, settings_ids, allow_index_creation }, + K::DocumentClear, + ) => { + other.push(id); + Continue(BatchKind::ClearAndSettings { + other, + settings_ids, + allow_index_creation, + }) + } + (this @ BatchKind::ClearAndSettings { .. }, K::DocumentImport { .. }) => Break(this), + ( + BatchKind::ClearAndSettings { + mut other, + settings_ids, + allow_index_creation, + }, + K::DocumentDeletion, + ) => { + other.push(id); + Continue(BatchKind::ClearAndSettings { + other, + settings_ids, + allow_index_creation, + }) + } + ( + BatchKind::ClearAndSettings { mut settings_ids, other, allow_index_creation }, + K::Settings { .. }, + ) => { + settings_ids.push(id); + Continue(BatchKind::ClearAndSettings { + other, + settings_ids, + allow_index_creation, + }) + } + ( + BatchKind::SettingsAndDocumentImport { settings_ids, method: _, import_ids: mut other, allow_index_creation }, + K::DocumentClear, + ) => { + other.push(id); + Continue(BatchKind::ClearAndSettings { + settings_ids, + other, + allow_index_creation, + }) + } + + ( + BatchKind::SettingsAndDocumentImport { settings_ids, method: ReplaceDocuments, mut import_ids, allow_index_creation }, + K::DocumentImport { method: ReplaceDocuments, .. }, + ) => { + import_ids.push(id); + Continue(BatchKind::SettingsAndDocumentImport { + settings_ids, + method: ReplaceDocuments, + allow_index_creation, + import_ids, + }) + } + ( + BatchKind::SettingsAndDocumentImport { settings_ids, method: UpdateDocuments, allow_index_creation, mut import_ids }, + K::DocumentImport { method: UpdateDocuments, .. }, + ) => { + import_ids.push(id); + Continue(BatchKind::SettingsAndDocumentImport { + settings_ids, + method: UpdateDocuments, + allow_index_creation, + import_ids, + }) + } + // But we can't batch a settings and a doc op with another doc op + // this MUST be AFTER the two previous branch + ( + this @ BatchKind::SettingsAndDocumentImport { .. }, + K::DocumentDeletion | K::DocumentImport { .. }, + ) => Break(this), + ( + BatchKind::SettingsAndDocumentImport { mut settings_ids, method, allow_index_creation, import_ids }, + K::Settings { .. }, + ) => { + settings_ids.push(id); + Continue(BatchKind::SettingsAndDocumentImport { + settings_ids, + method, + allow_index_creation, + import_ids, + }) + } + ( + BatchKind::IndexCreation { .. } + | BatchKind::IndexDeletion { .. } + | BatchKind::IndexUpdate { .. } + | BatchKind::IndexSwap { .. }, + _, + ) => { + unreachable!() + } + } + } +} + +/// Create a batch from an ordered list of tasks. +/// +/// ## Preconditions +/// 1. The tasks must be enqueued and given in the order in which they were enqueued +/// 2. The tasks must not be prioritised tasks (e.g. task cancellation, dump, snapshot, task deletion) +/// 3. The tasks must all be related to the same index +/// +/// ## Return +/// `None` if the list of tasks is empty. Otherwise, an [`AutoBatch`] that represents +/// a subset of the given tasks. +pub fn autobatch( + enqueued: Vec<(TaskId, KindWithContent)>, + index_already_exists: bool, +) -> Option<(BatchKind, bool)> { + let mut enqueued = enqueued.into_iter(); + let (id, kind) = enqueued.next()?; + + // index_exist will keep track of if the index should exist at this point after the tasks we batched. + let mut index_exist = index_already_exists; + + let (mut acc, must_create_index) = match BatchKind::new(id, kind) { + (Continue(acc), create) => (acc, create), + (Break(acc), create) => return Some((acc, create)), + }; + + // if an index has been created in the previous step we can consider it as existing. + index_exist |= must_create_index; + + for (id, kind) in enqueued { + acc = match acc.accumulate(id, kind.into(), index_exist) { + Continue(acc) => acc, + Break(acc) => return Some((acc, must_create_index)), + }; + } + + Some((acc, must_create_index)) +} + +#[cfg(test)] +mod tests { + use meilisearch_types::tasks::IndexSwap; + use uuid::Uuid; + + use super::*; + use crate::debug_snapshot; + + fn autobatch_from( + index_already_exists: bool, + input: impl IntoIterator, + ) -> Option<(BatchKind, bool)> { + autobatch( + input.into_iter().enumerate().map(|(id, kind)| (id as TaskId, kind)).collect(), + index_already_exists, + ) + } + + fn doc_imp(method: IndexDocumentsMethod, allow_index_creation: bool) -> KindWithContent { + KindWithContent::DocumentAdditionOrUpdate { + index_uid: String::from("doggo"), + primary_key: None, + method, + content_file: Uuid::new_v4(), + documents_count: 0, + allow_index_creation, + } + } + + fn doc_del() -> KindWithContent { + KindWithContent::DocumentDeletion { + index_uid: String::from("doggo"), + documents_ids: Vec::new(), + } + } + + fn doc_clr() -> KindWithContent { + KindWithContent::DocumentClear { index_uid: String::from("doggo") } + } + + fn settings(allow_index_creation: bool) -> KindWithContent { + KindWithContent::SettingsUpdate { + index_uid: String::from("doggo"), + new_settings: Default::default(), + is_deletion: false, + allow_index_creation, + } + } + + fn idx_create() -> KindWithContent { + KindWithContent::IndexCreation { index_uid: String::from("doggo"), primary_key: None } + } + + fn idx_update() -> KindWithContent { + KindWithContent::IndexUpdate { index_uid: String::from("doggo"), primary_key: None } + } + + fn idx_del() -> KindWithContent { + KindWithContent::IndexDeletion { index_uid: String::from("doggo") } + } + + fn idx_swap() -> KindWithContent { + KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: (String::from("doggo"), String::from("catto")) }], + } + } + + #[test] + fn autobatch_simple_operation_together() { + // we can autobatch one or multiple `ReplaceDocuments` together. + // if the index exists. + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, false)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp( ReplaceDocuments, true ), doc_imp(ReplaceDocuments, true )]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, false), doc_imp( ReplaceDocuments, false ), doc_imp(ReplaceDocuments, false )]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0, 1, 2] }, false))"); + + // if it doesn't exists. + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, false)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, true), doc_imp( ReplaceDocuments, true ), doc_imp(ReplaceDocuments, true )]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, false), doc_imp( ReplaceDocuments, true ), doc_imp(ReplaceDocuments, true )]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + + // we can autobatch one or multiple `UpdateDocuments` together. + // if the index exists. + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, false)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, false), doc_imp(UpdateDocuments, false), doc_imp(UpdateDocuments, false)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, import_ids: [0, 1, 2] }, false))"); + + // if it doesn't exists. + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, true)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, false)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, false), doc_imp(UpdateDocuments, false), doc_imp(UpdateDocuments, false)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, import_ids: [0, 1, 2] }, false))"); + + // we can autobatch one or multiple DocumentDeletion together + debug_snapshot!(autobatch_from(true, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2] }, false))"); + + // we can autobatch one or multiple Settings together + debug_snapshot!(autobatch_from(true, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(true, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))"); + + debug_snapshot!(autobatch_from(false, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(false, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))"); + } + + #[test] + fn simple_document_operation_dont_autobatch_with_other() { + // addition, updates and deletion can't batch together + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp(UpdateDocuments, true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), doc_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_del(), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_del(), doc_imp(UpdateDocuments, true)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), idx_create()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), idx_create()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), idx_update()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), idx_update()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), idx_swap()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), idx_swap()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + } + + #[test] + fn document_addition_batch_with_settings() { + // simple case + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + + // multiple settings and doc addition + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), settings(true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), settings(true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1] }, true))"); + + // addition and setting unordered + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), doc_imp(ReplaceDocuments, true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1, 3], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 2] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), doc_imp(UpdateDocuments, true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1, 3], method: UpdateDocuments, allow_index_creation: true, import_ids: [0, 2] }, true))"); + + // We ensure this kind of batch doesn't batch with forbidden operations + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), doc_imp(UpdateDocuments, true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), doc_imp(ReplaceDocuments, true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), doc_del()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), doc_del()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), idx_create()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), idx_create()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), idx_update()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), idx_update()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), idx_swap()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), idx_swap()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + } + + #[test] + fn clear_and_additions() { + // these two doesn't need to batch + debug_snapshot!(autobatch_from(true, [doc_clr(), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentClear { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_clr(), doc_imp(UpdateDocuments, true)]), @"Some((DocumentClear { ids: [0] }, false))"); + + // Basic use case + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))"); + + // This batch kind doesn't mix with other document addition + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), doc_clr(), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_clr(), doc_imp(UpdateDocuments, true)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))"); + + // But you can batch multiple clear together + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))"); + } + + #[test] + fn clear_and_additions_and_settings() { + // A clear don't need to autobatch the settings that happens AFTER there is no documents + debug_snapshot!(autobatch_from(true, [doc_clr(), settings(true)]), @"Some((DocumentClear { ids: [0] }, false))"); + + debug_snapshot!(autobatch_from(true, [settings(true), doc_clr(), settings(true)]), @"Some((ClearAndSettings { other: [1], allow_index_creation: true, settings_ids: [0, 2] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), doc_clr()]), @"Some((ClearAndSettings { other: [0, 2], allow_index_creation: true, settings_ids: [1] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), doc_clr()]), @"Some((ClearAndSettings { other: [0, 2], allow_index_creation: true, settings_ids: [1] }, true))"); + } + + #[test] + fn anything_and_index_deletion() { + // The `IndexDeletion` doesn't batch with anything that happens AFTER. + debug_snapshot!(autobatch_from(true, [idx_del(), doc_imp(ReplaceDocuments, true)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [idx_del(), doc_imp(UpdateDocuments, true)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [idx_del(), doc_imp(ReplaceDocuments, false)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [idx_del(), doc_imp(UpdateDocuments, false)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))"); + + debug_snapshot!(autobatch_from(false, [idx_del(), doc_imp(ReplaceDocuments, true)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [idx_del(), doc_imp(UpdateDocuments, true)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [idx_del(), doc_imp(ReplaceDocuments, false)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [idx_del(), doc_imp(UpdateDocuments, false)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))"); + + // The index deletion can accept almost any type of `BatchKind` and transform it to an `IndexDeletion`. + // First, the basic cases + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(false, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + + // Then the mixed cases. + // The index already exists, whatever is the right of the tasks it shouldn't change the result. + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments,false), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, false), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments,false), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, false), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments,false), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, false), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments,false), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, false), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments,true), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments,true), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))"); + + // When the index doesn't exists yet it's more complicated. + // Either the first task we encounter create it, in which case we can create a big batch with everything. + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, true), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, true), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, true), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, true), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))"); + // The right of the tasks following isn't really important. + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments,true), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, true), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments,true), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, true), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))"); + // Or, the second case; the first task doesn't create the index and thus we wants to batch it with only tasks that can't create an index. + // that can be a second task that don't have the right to create an index. Or anything that can't create an index like an index deletion, document deletion, document clear, etc. + // All theses tasks are going to throw an error `Index doesn't exist` once the batch is processed. + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments,false), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, false), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments,false), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, false), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))"); + // The third and final case is when the first task doesn't create an index but is directly followed by a task creating an index. In this case we can't batch whith what + // follows because we first need to process the erronous batch. + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments,false), settings(true), idx_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, false), settings(true), idx_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments,false), settings(true), doc_clr(), idx_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, false), settings(true), doc_clr(), idx_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + } + + #[test] + fn allowed_and_disallowed_index_creation() { + // `DocumentImport` can't be mixed with those disallowed to do so except if the index already exists. + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, false), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, false), doc_imp(ReplaceDocuments, false)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, false), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, false), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, false), doc_imp(ReplaceDocuments, false)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, false), settings(true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + } +} diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs new file mode 100644 index 000000000..edbf2cae0 --- /dev/null +++ b/index-scheduler/src/batch.rs @@ -0,0 +1,1206 @@ +/*! +This module handles the creation and processing of batch operations. + +A batch is a combination of multiple tasks that can be processed at once. +Executing a batch operation should always be functionally equivalent to +executing each of its tasks' operations individually and in order. + +For example, if the user sends two tasks: +1. import documents X +2. import documents Y + +We can combine the two tasks in a single batch: +1. import documents X and Y + +Processing this batch is functionally equivalent to processing the two +tasks individally, but should be much faster since we are only performing +one indexing operation. +*/ + +use std::collections::HashSet; +use std::ffi::OsStr; +use std::fs::{self, File}; +use std::io::BufWriter; + +use dump::IndexMetadata; +use log::{debug, error, info}; +use meilisearch_types::heed::{RoTxn, RwTxn}; +use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; +use meilisearch_types::milli::heed::CompactionOption; +use meilisearch_types::milli::update::{ + DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod, + Settings as MilliSettings, +}; +use meilisearch_types::milli::{self, BEU32}; +use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked}; +use meilisearch_types::tasks::{Details, Kind, KindWithContent, Status, Task}; +use meilisearch_types::{compression, Index, VERSION_FILE_NAME}; +use roaring::RoaringBitmap; +use time::OffsetDateTime; +use uuid::Uuid; + +use crate::autobatcher::{self, BatchKind}; +use crate::utils::{self, swap_index_uid_in_task}; +use crate::{Error, IndexScheduler, Query, Result, TaskId}; + +/// Represents a combination of tasks that can all be processed at the same time. +/// +/// A batch contains the set of tasks that it represents (accessible through +/// [`self.ids()`](Batch::ids)), as well as additional information on how to +/// be processed. +#[derive(Debug)] +pub(crate) enum Batch { + TaskCancelation(Task), + TaskDeletion(Task), + SnapshotCreation(Vec), + Dump(Task), + IndexOperation { op: IndexOperation, must_create_index: bool }, + IndexCreation { index_uid: String, primary_key: Option, task: Task }, + IndexUpdate { index_uid: String, primary_key: Option, task: Task }, + IndexDeletion { index_uid: String, tasks: Vec, index_has_been_created: bool }, + IndexSwap { task: Task }, +} + +/// A [batch](Batch) that combines multiple tasks operating on an index. +#[derive(Debug)] +pub(crate) enum IndexOperation { + DocumentImport { + index_uid: String, + primary_key: Option, + method: IndexDocumentsMethod, + documents_counts: Vec, + content_files: Vec, + tasks: Vec, + }, + DocumentDeletion { + index_uid: String, + // The vec associated with each document deletion tasks. + documents: Vec>, + tasks: Vec, + }, + DocumentClear { + index_uid: String, + tasks: Vec, + }, + Settings { + index_uid: String, + // TODO what's that boolean, does it mean that it removes things or what? + settings: Vec<(bool, Settings)>, + tasks: Vec, + }, + DocumentClearAndSetting { + index_uid: String, + cleared_tasks: Vec, + + // TODO what's that boolean, does it mean that it removes things or what? + settings: Vec<(bool, Settings)>, + settings_tasks: Vec, + }, + SettingsAndDocumentImport { + index_uid: String, + + primary_key: Option, + method: IndexDocumentsMethod, + documents_counts: Vec, + content_files: Vec, + document_import_tasks: Vec, + + // TODO what's that boolean, does it mean that it removes things or what? + settings: Vec<(bool, Settings)>, + settings_tasks: Vec, + }, +} + +impl Batch { + /// Return the task ids associated with this batch. + pub fn ids(&self) -> Vec { + match self { + Batch::TaskCancelation(task) + | Batch::TaskDeletion(task) + | Batch::Dump(task) + | Batch::IndexCreation { task, .. } + | Batch::IndexUpdate { task, .. } => vec![task.uid], + Batch::SnapshotCreation(tasks) | Batch::IndexDeletion { tasks, .. } => { + tasks.iter().map(|task| task.uid).collect() + } + Batch::IndexOperation { op, .. } => match op { + IndexOperation::DocumentImport { tasks, .. } + | IndexOperation::DocumentDeletion { tasks, .. } + | IndexOperation::Settings { tasks, .. } + | IndexOperation::DocumentClear { tasks, .. } => { + tasks.iter().map(|task| task.uid).collect() + } + IndexOperation::SettingsAndDocumentImport { + document_import_tasks: tasks, + settings_tasks: other, + .. + } + | IndexOperation::DocumentClearAndSetting { + cleared_tasks: tasks, + settings_tasks: other, + .. + } => tasks.iter().chain(other).map(|task| task.uid).collect(), + }, + Batch::IndexSwap { task } => vec![task.uid], + } + } +} + +impl IndexOperation { + pub fn index_uid(&self) -> &str { + match self { + IndexOperation::DocumentImport { index_uid, .. } + | IndexOperation::DocumentDeletion { index_uid, .. } + | IndexOperation::DocumentClear { index_uid, .. } + | IndexOperation::Settings { index_uid, .. } + | IndexOperation::DocumentClearAndSetting { index_uid, .. } + | IndexOperation::SettingsAndDocumentImport { index_uid, .. } => index_uid, + } + } +} + +impl IndexScheduler { + /// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`]. + /// + /// ## Arguments + /// - `rtxn`: read transaction + /// - `index_uid`: name of the index affected by the operations of the autobatch + /// - `batch`: the result of the autobatcher + pub(crate) fn create_next_batch_index( + &self, + rtxn: &RoTxn, + index_uid: String, + batch: BatchKind, + must_create_index: bool, + ) -> Result> { + match batch { + BatchKind::DocumentClear { ids } => Ok(Some(Batch::IndexOperation { + op: IndexOperation::DocumentClear { + tasks: self.get_existing_tasks(rtxn, ids)?, + index_uid, + }, + must_create_index, + })), + BatchKind::DocumentImport { method, import_ids, .. } => { + let tasks = self.get_existing_tasks(rtxn, import_ids)?; + let primary_key = match &tasks[0].kind { + KindWithContent::DocumentAdditionOrUpdate { primary_key, .. } => { + primary_key.clone() + } + _ => unreachable!(), + }; + + let mut documents_counts = Vec::new(); + let mut content_files = Vec::new(); + for task in &tasks { + match task.kind { + KindWithContent::DocumentAdditionOrUpdate { + content_file, + documents_count, + .. + } => { + documents_counts.push(documents_count); + content_files.push(content_file); + } + _ => unreachable!(), + } + } + + Ok(Some(Batch::IndexOperation { + op: IndexOperation::DocumentImport { + index_uid, + primary_key, + method, + documents_counts, + content_files, + tasks, + }, + must_create_index, + })) + } + BatchKind::DocumentDeletion { deletion_ids } => { + let tasks = self.get_existing_tasks(rtxn, deletion_ids)?; + + let mut documents = Vec::new(); + for task in &tasks { + match task.kind { + KindWithContent::DocumentDeletion { ref documents_ids, .. } => { + documents.push(documents_ids.clone()) + } + _ => unreachable!(), + } + } + + Ok(Some(Batch::IndexOperation { + op: IndexOperation::DocumentDeletion { index_uid, documents, tasks }, + must_create_index, + })) + } + BatchKind::Settings { settings_ids, .. } => { + let tasks = self.get_existing_tasks(rtxn, settings_ids)?; + + let mut settings = Vec::new(); + for task in &tasks { + match task.kind { + KindWithContent::SettingsUpdate { + ref new_settings, is_deletion, .. + } => settings.push((is_deletion, *new_settings.clone())), + _ => unreachable!(), + } + } + + Ok(Some(Batch::IndexOperation { + op: IndexOperation::Settings { index_uid, settings, tasks }, + must_create_index, + })) + } + BatchKind::ClearAndSettings { other, settings_ids, allow_index_creation } => { + let (index_uid, settings, settings_tasks) = match self + .create_next_batch_index( + rtxn, + index_uid, + BatchKind::Settings { settings_ids, allow_index_creation }, + must_create_index, + )? + .unwrap() + { + Batch::IndexOperation { + op: IndexOperation::Settings { index_uid, settings, tasks, .. }, + .. + } => (index_uid, settings, tasks), + _ => unreachable!(), + }; + let (index_uid, cleared_tasks) = match self + .create_next_batch_index( + rtxn, + index_uid, + BatchKind::DocumentClear { ids: other }, + must_create_index, + )? + .unwrap() + { + Batch::IndexOperation { + op: IndexOperation::DocumentClear { index_uid, tasks }, + .. + } => (index_uid, tasks), + _ => unreachable!(), + }; + + Ok(Some(Batch::IndexOperation { + op: IndexOperation::DocumentClearAndSetting { + index_uid, + cleared_tasks, + settings, + settings_tasks, + }, + must_create_index, + })) + } + BatchKind::SettingsAndDocumentImport { + settings_ids, + method, + allow_index_creation, + import_ids, + } => { + let settings = self.create_next_batch_index( + rtxn, + index_uid.clone(), + BatchKind::Settings { settings_ids, allow_index_creation }, + must_create_index, + )?; + + let document_import = self.create_next_batch_index( + rtxn, + index_uid.clone(), + BatchKind::DocumentImport { method, allow_index_creation, import_ids }, + must_create_index, + )?; + + match (document_import, settings) { + ( + Some(Batch::IndexOperation { + op: + IndexOperation::DocumentImport { + primary_key, + documents_counts, + content_files, + tasks: document_import_tasks, + .. + }, + .. + }), + Some(Batch::IndexOperation { + op: IndexOperation::Settings { settings, tasks: settings_tasks, .. }, + .. + }), + ) => Ok(Some(Batch::IndexOperation { + op: IndexOperation::SettingsAndDocumentImport { + index_uid, + primary_key, + method, + documents_counts, + content_files, + document_import_tasks, + settings, + settings_tasks, + }, + must_create_index, + })), + _ => unreachable!(), + } + } + BatchKind::IndexCreation { id } => { + let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; + let (index_uid, primary_key) = match &task.kind { + KindWithContent::IndexCreation { index_uid, primary_key } => { + (index_uid.clone(), primary_key.clone()) + } + _ => unreachable!(), + }; + Ok(Some(Batch::IndexCreation { index_uid, primary_key, task })) + } + BatchKind::IndexUpdate { id } => { + let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; + let primary_key = match &task.kind { + KindWithContent::IndexUpdate { primary_key, .. } => primary_key.clone(), + _ => unreachable!(), + }; + Ok(Some(Batch::IndexUpdate { index_uid, primary_key, task })) + } + BatchKind::IndexDeletion { ids } => Ok(Some(Batch::IndexDeletion { + index_uid, + index_has_been_created: must_create_index, + tasks: self.get_existing_tasks(rtxn, ids)?, + })), + BatchKind::IndexSwap { id } => { + let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; + Ok(Some(Batch::IndexSwap { task })) + } + } + } + + /// Create the next batch to be processed; + /// 1. We get the *last* task to cancel. + /// 2. We get the *next* task to delete. + /// 3. We get the *next* snapshot to process. + /// 4. We get the *next* dump to process. + /// 5. We get the *next* tasks to process for a specific index. + pub(crate) fn create_next_batch(&self, rtxn: &RoTxn) -> Result> { + #[cfg(test)] + self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?; + + let enqueued = &self.get_status(rtxn, Status::Enqueued)?; + let to_cancel = self.get_kind(rtxn, Kind::TaskCancelation)? & enqueued; + + // 1. we get the last task to cancel. + if let Some(task_id) = to_cancel.max() { + return Ok(Some(Batch::TaskCancelation( + self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?, + ))); + } + + // 2. we get the next task to delete + let to_delete = self.get_kind(rtxn, Kind::TaskDeletion)? & enqueued; + if let Some(task_id) = to_delete.min() { + let task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; + return Ok(Some(Batch::TaskDeletion(task))); + } + + // 3. we batch the snapshot. + let to_snapshot = self.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued; + if !to_snapshot.is_empty() { + return Ok(Some(Batch::SnapshotCreation(self.get_existing_tasks(rtxn, to_snapshot)?))); + } + + // 4. we batch the dumps. + let to_dump = self.get_kind(rtxn, Kind::DumpCreation)? & enqueued; + if let Some(to_dump) = to_dump.min() { + return Ok(Some(Batch::Dump( + self.get_task(rtxn, to_dump)?.ok_or(Error::CorruptedTaskQueue)?, + ))); + } + + // 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task. + let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) }; + let task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; + + // If the task is not associated with any index, verify that it is an index swap and + // create the batch directly. Otherwise, get the index name associated with the task + // and use the autobatcher to batch the enqueued tasks associated with it + + let index_name = if let Some(&index_name) = task.indexes().first() { + index_name + } else { + assert!(matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty())); + return Ok(Some(Batch::IndexSwap { task })); + }; + + let index_already_exists = self.index_mapper.exists(rtxn, index_name)?; + + let index_tasks = self.index_tasks(rtxn, index_name)? & enqueued; + + // If autobatching is disabled we only take one task at a time. + let tasks_limit = if self.autobatching_enabled { usize::MAX } else { 1 }; + + let enqueued = index_tasks + .into_iter() + .take(tasks_limit) + .map(|task_id| { + self.get_task(rtxn, task_id) + .and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) + .map(|task| (task.uid, task.kind)) + }) + .collect::>>()?; + + if let Some((batchkind, create_index)) = + autobatcher::autobatch(enqueued, index_already_exists) + { + return self.create_next_batch_index( + rtxn, + index_name.to_string(), + batchkind, + create_index, + ); + } + + // If we found no tasks then we were notified for something that got autobatched + // somehow and there is nothing to do. + Ok(None) + } + + /// Apply the operation associated with the given batch. + /// + /// ## Return + /// The list of tasks that were processed. The metadata of each task in the returned + /// list is updated accordingly, with the exception of the its date fields + /// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at). + pub(crate) fn process_batch(&self, batch: Batch) -> Result> { + #[cfg(test)] + { + self.maybe_fail(crate::tests::FailureLocation::InsideProcessBatch)?; + self.maybe_fail(crate::tests::FailureLocation::PanicInsideProcessBatch)?; + self.breakpoint(crate::Breakpoint::InsideProcessBatch); + } + match batch { + Batch::TaskCancelation(mut task) => { + // 1. Retrieve the tasks that matched the query at enqueue-time. + let matched_tasks = + if let KindWithContent::TaskCancelation { tasks, query: _ } = &task.kind { + tasks + } else { + unreachable!() + }; + + let mut wtxn = self.env.write_txn()?; + let canceled_tasks_content_uuids = + self.cancel_matched_tasks(&mut wtxn, task.uid, matched_tasks)?; + + task.status = Status::Succeeded; + match &mut task.details { + Some(Details::TaskCancelation { + matched_tasks: _, + canceled_tasks, + original_query: _, + }) => { + *canceled_tasks = Some(canceled_tasks_content_uuids.len() as u64); + } + _ => unreachable!(), + } + + // We must only remove the content files if the transaction is successfully committed + // and if errors occurs when we are deleting files we must do our best to delete + // everything. We do not return the encountered errors when deleting the content + // files as it is not a breaking operation and we can safely continue our job. + match wtxn.commit() { + Ok(()) => { + for content_uuid in canceled_tasks_content_uuids { + if let Err(error) = self.delete_update_file(content_uuid) { + error!( + "We failed deleting the content file indentified as {}: {}", + content_uuid, error + ) + } + } + } + Err(e) => return Err(e.into()), + } + + Ok(vec![task]) + } + Batch::TaskDeletion(mut task) => { + // 1. Retrieve the tasks that matched the query at enqueue-time. + let matched_tasks = + if let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind { + tasks + } else { + unreachable!() + }; + + let mut wtxn = self.env.write_txn()?; + let deleted_tasks_count = self.delete_matched_tasks(&mut wtxn, matched_tasks)?; + + task.status = Status::Succeeded; + match &mut task.details { + Some(Details::TaskDeletion { + matched_tasks: _, + deleted_tasks, + original_query: _, + }) => { + *deleted_tasks = Some(deleted_tasks_count); + } + _ => unreachable!(), + } + wtxn.commit()?; + Ok(vec![task]) + } + Batch::SnapshotCreation(mut tasks) => { + fs::create_dir_all(&self.snapshots_path)?; + let temp_snapshot_dir = tempfile::tempdir()?; + + // 1. Snapshot the version file. + let dst = temp_snapshot_dir.path().join(VERSION_FILE_NAME); + fs::copy(&self.version_file_path, dst)?; + + // 2. Snapshot the index-scheduler LMDB env + // + // When we call copy_to_path, LMDB opens a read transaction by itself, + // we can't provide our own. It is an issue as we would like to know + // the update files to copy but new ones can be enqueued between the copy + // of the env and the new transaction we open to retrieve the enqueued tasks. + // So we prefer opening a new transaction after copying the env and copy more + // update files than not enough. + // + // Note that there cannot be any update files deleted between those + // two read operations as the task processing is synchronous. + + // 2.1 First copy the LMDB env of the index-scheduler + let dst = temp_snapshot_dir.path().join("tasks"); + fs::create_dir_all(&dst)?; + self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?; + + // 2.2 Create a read transaction on the index-scheduler + let rtxn = self.env.read_txn()?; + + // 2.3 Create the update files directory + let update_files_dir = temp_snapshot_dir.path().join("update_files"); + fs::create_dir_all(&update_files_dir)?; + + // 2.4 Only copy the update files of the enqueued tasks + for task_id in self.get_status(&rtxn, Status::Enqueued)? { + let task = self.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; + if let Some(content_uuid) = task.content_uuid() { + let src = self.file_store.get_update_path(content_uuid); + let dst = update_files_dir.join(content_uuid.to_string()); + fs::copy(src, dst)?; + } + } + + // 3. Snapshot every indexes + // TODO we are opening all of the indexes it can be too much we should unload all + // of the indexes we are trying to open. It would be even better to only unload + // the ones that were opened by us. Or maybe use a LRU in the index mapper. + for result in self.index_mapper.index_mapping.iter(&rtxn)? { + let (name, uuid) = result?; + let index = self.index_mapper.index(&rtxn, name)?; + let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string()); + fs::create_dir_all(&dst)?; + index.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?; + } + + drop(rtxn); + + // 4. Snapshot the auth LMDB env + let dst = temp_snapshot_dir.path().join("auth"); + fs::create_dir_all(&dst)?; + // TODO We can't use the open_auth_store_env function here but we should + let auth = milli::heed::EnvOpenOptions::new() + .map_size(1024 * 1024 * 1024) // 1 GiB + .max_dbs(2) + .open(&self.auth_path)?; + auth.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?; + + // 5. Copy and tarball the flat snapshot + // 5.1 Find the original name of the database + // TODO find a better way to get this path + let mut base_path = self.env.path().to_owned(); + base_path.pop(); + let db_name = base_path.file_name().and_then(OsStr::to_str).unwrap_or("data.ms"); + + // 5.2 Tarball the content of the snapshot in a tempfile with a .snapshot extension + let snapshot_path = self.snapshots_path.join(format!("{}.snapshot", db_name)); + let temp_snapshot_file = tempfile::NamedTempFile::new_in(&self.snapshots_path)?; + compression::to_tar_gz(temp_snapshot_dir.path(), temp_snapshot_file.path())?; + let file = temp_snapshot_file.persist(&snapshot_path)?; + + // 5.3 Change the permission to make the snapshot readonly + let mut permissions = file.metadata()?.permissions(); + permissions.set_readonly(true); + file.set_permissions(permissions)?; + + for task in &mut tasks { + task.status = Status::Succeeded; + } + + Ok(tasks) + } + Batch::Dump(mut task) => { + let started_at = OffsetDateTime::now_utc(); + let (keys, instance_uid, dump_uid) = + if let KindWithContent::DumpCreation { keys, instance_uid, dump_uid } = + &task.kind + { + (keys, instance_uid, dump_uid) + } else { + unreachable!(); + }; + let dump = dump::DumpWriter::new(*instance_uid)?; + + // 1. dump the keys + let mut dump_keys = dump.create_keys()?; + for key in keys { + dump_keys.push_key(key)?; + } + dump_keys.flush()?; + + let rtxn = self.env.read_txn()?; + + // 2. dump the tasks + let mut dump_tasks = dump.create_tasks_queue()?; + for ret in self.all_tasks.iter(&rtxn)? { + let (_, mut t) = ret?; + let status = t.status; + let content_file = t.content_uuid(); + + // In the case we're dumping ourselves we want to be marked as finished + // to not loop over ourselves indefinitely. + if t.uid == task.uid { + let finished_at = OffsetDateTime::now_utc(); + + // We're going to fake the date because we don't know if everything is going to go well. + // But we need to dump the task as finished and successful. + // If something fail everything will be set appropriately in the end. + t.status = Status::Succeeded; + t.started_at = Some(started_at); + t.finished_at = Some(finished_at); + } + let mut dump_content_file = dump_tasks.push_task(&t.into())?; + + // 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet. + if let Some(content_file) = content_file { + if status == Status::Enqueued { + let content_file = self.file_store.get_update(content_file)?; + + let reader = DocumentsBatchReader::from_reader(content_file) + .map_err(milli::Error::from)?; + + let (mut cursor, documents_batch_index) = + reader.into_cursor_and_fields_index(); + + while let Some(doc) = + cursor.next_document().map_err(milli::Error::from)? + { + dump_content_file.push_document(&obkv_to_object( + &doc, + &documents_batch_index, + )?)?; + } + dump_content_file.flush()?; + } + } + } + dump_tasks.flush()?; + + // 3. Dump the indexes + for (uid, index) in self.index_mapper.indexes(&rtxn)? { + let rtxn = index.read_txn()?; + let metadata = IndexMetadata { + uid: uid.clone(), + primary_key: index.primary_key(&rtxn)?.map(String::from), + created_at: index.created_at(&rtxn)?, + updated_at: index.updated_at(&rtxn)?, + }; + let mut index_dumper = dump.create_index(&uid, &metadata)?; + + let fields_ids_map = index.fields_ids_map(&rtxn)?; + let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); + + // 3.1. Dump the documents + for ret in index.all_documents(&rtxn)? { + let (_id, doc) = ret?; + let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?; + index_dumper.push_document(&document)?; + } + + // 3.2. Dump the settings + let settings = meilisearch_types::settings::settings(&index, &rtxn)?; + index_dumper.settings(&settings)?; + } + + let path = self.dumps_path.join(format!("{}.dump", dump_uid)); + let file = File::create(path)?; + dump.persist_to(BufWriter::new(file))?; + + // if we reached this step we can tell the scheduler we succeeded to dump ourselves. + task.status = Status::Succeeded; + Ok(vec![task]) + } + Batch::IndexOperation { op, must_create_index } => { + let index_uid = op.index_uid(); + let index = if must_create_index { + // create the index if it doesn't already exist + let wtxn = self.env.write_txn()?; + self.index_mapper.create_index(wtxn, index_uid)? + } else { + let rtxn = self.env.read_txn()?; + self.index_mapper.index(&rtxn, index_uid)? + }; + + let mut index_wtxn = index.write_txn()?; + let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?; + index_wtxn.commit()?; + + Ok(tasks) + } + Batch::IndexCreation { index_uid, primary_key, task } => { + let wtxn = self.env.write_txn()?; + if self.index_mapper.exists(&wtxn, &index_uid)? { + return Err(Error::IndexAlreadyExists(index_uid)); + } + self.index_mapper.create_index(wtxn, &index_uid)?; + + self.process_batch(Batch::IndexUpdate { index_uid, primary_key, task }) + } + Batch::IndexUpdate { index_uid, primary_key, mut task } => { + let rtxn = self.env.read_txn()?; + let index = self.index_mapper.index(&rtxn, &index_uid)?; + + if let Some(primary_key) = primary_key.clone() { + let mut index_wtxn = index.write_txn()?; + let mut builder = MilliSettings::new( + &mut index_wtxn, + &index, + self.index_mapper.indexer_config(), + ); + builder.set_primary_key(primary_key); + let must_stop_processing = self.must_stop_processing.clone(); + builder.execute( + |indexing_step| debug!("update: {:?}", indexing_step), + || must_stop_processing.get(), + )?; + index_wtxn.commit()?; + } + task.status = Status::Succeeded; + task.details = Some(Details::IndexInfo { primary_key }); + + Ok(vec![task]) + } + Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => { + let wtxn = self.env.write_txn()?; + + // it's possible that the index doesn't exist + let number_of_documents = || -> Result { + let index = self.index_mapper.index(&wtxn, &index_uid)?; + let index_rtxn = index.read_txn()?; + Ok(index.number_of_documents(&index_rtxn)?) + }() + .unwrap_or_default(); + + // The write transaction is directly owned and commited inside. + match self.index_mapper.delete_index(wtxn, &index_uid) { + Ok(()) => (), + Err(Error::IndexNotFound(_)) if index_has_been_created => (), + Err(e) => return Err(e), + } + + // We set all the tasks details to the default value. + for task in &mut tasks { + task.status = Status::Succeeded; + task.details = match &task.kind { + KindWithContent::IndexDeletion { .. } => { + Some(Details::ClearAll { deleted_documents: Some(number_of_documents) }) + } + otherwise => otherwise.default_finished_details(), + }; + } + + Ok(tasks) + } + Batch::IndexSwap { mut task } => { + let mut wtxn = self.env.write_txn()?; + let swaps = if let KindWithContent::IndexSwap { swaps } = &task.kind { + swaps + } else { + unreachable!() + }; + for swap in swaps { + self.apply_index_swap(&mut wtxn, task.uid, &swap.indexes.0, &swap.indexes.1)?; + } + wtxn.commit()?; + task.status = Status::Succeeded; + Ok(vec![task]) + } + } + } + + /// Swap the index `lhs` with the index `rhs`. + fn apply_index_swap(&self, wtxn: &mut RwTxn, task_id: u32, lhs: &str, rhs: &str) -> Result<()> { + // 1. Verify that both lhs and rhs are existing indexes + let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?; + if !index_lhs_exists { + return Err(Error::IndexNotFound(lhs.to_owned())); + } + let index_rhs_exists = self.index_mapper.index_exists(wtxn, rhs)?; + if !index_rhs_exists { + return Err(Error::IndexNotFound(rhs.to_owned())); + } + + // 2. Get the task set for index = name. + let mut index_lhs_task_ids = + self.get_task_ids(&Query::default().with_index(lhs.to_owned()))?; + index_lhs_task_ids.remove_range(task_id..); + let mut index_rhs_task_ids = + self.get_task_ids(&Query::default().with_index(rhs.to_owned()))?; + index_rhs_task_ids.remove_range(task_id..); + + // 3. before_name -> new_name in the task's KindWithContent + for task_id in &index_lhs_task_ids | &index_rhs_task_ids { + let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; + swap_index_uid_in_task(&mut task, (lhs, rhs)); + self.all_tasks.put(wtxn, &BEU32::new(task_id), &task)?; + } + + // 4. remove the task from indexuid = before_name + // 5. add the task to indexuid = after_name + self.update_index(wtxn, lhs, |lhs_tasks| { + *lhs_tasks -= &index_lhs_task_ids; + *lhs_tasks |= &index_rhs_task_ids; + })?; + self.update_index(wtxn, rhs, |rhs_tasks| { + *rhs_tasks -= &index_rhs_task_ids; + *rhs_tasks |= &index_lhs_task_ids; + })?; + + // 6. Swap in the index mapper + self.index_mapper.swap(wtxn, lhs, rhs)?; + + Ok(()) + } + + /// Process the index operation on the given index. + /// + /// ## Return + /// The list of processed tasks. + fn apply_index_operation<'txn, 'i>( + &self, + index_wtxn: &'txn mut RwTxn<'i, '_>, + index: &'i Index, + operation: IndexOperation, + ) -> Result> { + match operation { + IndexOperation::DocumentClear { mut tasks, .. } => { + let count = milli::update::ClearDocuments::new(index_wtxn, index).execute()?; + + let mut first_clear_found = false; + for task in &mut tasks { + task.status = Status::Succeeded; + // The first document clear will effectively delete every documents + // in the database but the next ones will clear 0 documents. + task.details = match &task.kind { + KindWithContent::DocumentClear { .. } => { + let count = if first_clear_found { 0 } else { count }; + first_clear_found = true; + Some(Details::ClearAll { deleted_documents: Some(count) }) + } + otherwise => otherwise.default_details(), + }; + } + + Ok(tasks) + } + IndexOperation::DocumentImport { + index_uid: _, + primary_key, + method, + documents_counts, + content_files, + mut tasks, + } => { + let mut primary_key_has_been_set = false; + let must_stop_processing = self.must_stop_processing.clone(); + let indexer_config = self.index_mapper.indexer_config(); + // TODO use the code from the IndexCreate operation + if let Some(primary_key) = primary_key { + if index.primary_key(index_wtxn)?.is_none() { + let mut builder = + milli::update::Settings::new(index_wtxn, index, indexer_config); + builder.set_primary_key(primary_key); + builder.execute( + |indexing_step| debug!("update: {:?}", indexing_step), + || must_stop_processing.clone().get(), + )?; + primary_key_has_been_set = true; + } + } + + let config = IndexDocumentsConfig { update_method: method, ..Default::default() }; + + let mut builder = milli::update::IndexDocuments::new( + index_wtxn, + index, + indexer_config, + config, + |indexing_step| debug!("update: {:?}", indexing_step), + || must_stop_processing.get(), + )?; + + let mut results = Vec::new(); + for content_uuid in content_files.into_iter() { + let content_file = self.file_store.get_update(content_uuid)?; + let reader = DocumentsBatchReader::from_reader(content_file) + .map_err(milli::Error::from)?; + let (new_builder, user_result) = builder.add_documents(reader)?; + builder = new_builder; + + let user_result = match user_result { + Ok(count) => Ok(DocumentAdditionResult { + indexed_documents: count, + number_of_documents: count, // TODO: this is wrong, we should use the value stored in the Details. + }), + Err(e) => Err(milli::Error::from(e)), + }; + + results.push(user_result); + } + + if results.iter().any(|res| res.is_ok()) { + let addition = builder.execute()?; + info!("document addition done: {:?}", addition); + } else if primary_key_has_been_set { + // Everything failed but we've set a primary key. + // We need to remove it. + let mut builder = + milli::update::Settings::new(index_wtxn, index, indexer_config); + builder.reset_primary_key(); + builder.execute( + |indexing_step| debug!("update: {:?}", indexing_step), + || must_stop_processing.clone().get(), + )?; + } + + for (task, (ret, count)) in + tasks.iter_mut().zip(results.into_iter().zip(documents_counts)) + { + match ret { + Ok(DocumentAdditionResult { indexed_documents, number_of_documents }) => { + task.status = Status::Succeeded; + task.details = Some(Details::DocumentAdditionOrUpdate { + received_documents: number_of_documents, + indexed_documents: Some(indexed_documents), + }); + } + Err(error) => { + task.status = Status::Failed; + task.details = Some(Details::DocumentAdditionOrUpdate { + received_documents: count, + indexed_documents: Some(count), + }); + task.error = Some(error.into()) + } + } + } + + Ok(tasks) + } + IndexOperation::DocumentDeletion { index_uid: _, documents, mut tasks } => { + let mut builder = milli::update::DeleteDocuments::new(index_wtxn, index)?; + documents.iter().flatten().for_each(|id| { + builder.delete_external_id(id); + }); + + let DocumentDeletionResult { deleted_documents, .. } = builder.execute()?; + + for (task, documents) in tasks.iter_mut().zip(documents) { + task.status = Status::Succeeded; + task.details = Some(Details::DocumentDeletion { + matched_documents: documents.len(), + deleted_documents: Some(deleted_documents.min(documents.len() as u64)), + }); + } + + Ok(tasks) + } + IndexOperation::Settings { index_uid: _, settings, mut tasks } => { + let indexer_config = self.index_mapper.indexer_config(); + // TODO merge the settings to only do *one* reindexation. + for (task, (_, settings)) in tasks.iter_mut().zip(settings) { + let checked_settings = settings.clone().check(); + task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) }); + + let mut builder = + milli::update::Settings::new(index_wtxn, index, indexer_config); + apply_settings_to_builder(&checked_settings, &mut builder); + let must_stop_processing = self.must_stop_processing.clone(); + builder.execute( + |indexing_step| debug!("update: {:?}", indexing_step), + || must_stop_processing.get(), + )?; + + task.status = Status::Succeeded; + } + + Ok(tasks) + } + IndexOperation::SettingsAndDocumentImport { + index_uid, + primary_key, + method, + documents_counts, + content_files, + document_import_tasks, + settings, + settings_tasks, + } => { + let settings_tasks = self.apply_index_operation( + index_wtxn, + index, + IndexOperation::Settings { + index_uid: index_uid.clone(), + settings, + tasks: settings_tasks, + }, + )?; + + let mut import_tasks = self.apply_index_operation( + index_wtxn, + index, + IndexOperation::DocumentImport { + index_uid, + primary_key, + method, + documents_counts, + content_files, + tasks: document_import_tasks, + }, + )?; + + let mut tasks = settings_tasks; + tasks.append(&mut import_tasks); + Ok(tasks) + } + IndexOperation::DocumentClearAndSetting { + index_uid, + cleared_tasks, + settings, + settings_tasks, + } => { + let mut import_tasks = self.apply_index_operation( + index_wtxn, + index, + IndexOperation::DocumentClear { + index_uid: index_uid.clone(), + tasks: cleared_tasks, + }, + )?; + + let settings_tasks = self.apply_index_operation( + index_wtxn, + index, + IndexOperation::Settings { index_uid, settings, tasks: settings_tasks }, + )?; + + let mut tasks = settings_tasks; + tasks.append(&mut import_tasks); + Ok(tasks) + } + } + } + + /// Delete each given task from all the databases (if it is deleteable). + /// + /// Return the number of tasks that were actually deleted. + fn delete_matched_tasks(&self, wtxn: &mut RwTxn, matched_tasks: &RoaringBitmap) -> Result { + // 1. Remove from this list the tasks that we are not allowed to delete + let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?; + let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone(); + + let all_task_ids = self.all_task_ids(wtxn)?; + let mut to_delete_tasks = all_task_ids & matched_tasks; + to_delete_tasks -= processing_tasks; + to_delete_tasks -= enqueued_tasks; + + // 2. We now have a list of tasks to delete, delete them + + let mut affected_indexes = HashSet::new(); + let mut affected_statuses = HashSet::new(); + let mut affected_kinds = HashSet::new(); + + for task_id in to_delete_tasks.iter() { + let task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; + + affected_indexes.extend(task.indexes().into_iter().map(|x| x.to_owned())); + affected_statuses.insert(task.status); + affected_kinds.insert(task.kind.as_kind()); + // Note: don't delete the persisted task data since + // we can only delete succeeded, failed, and canceled tasks. + // In each of those cases, the persisted data is supposed to + // have been deleted already. + utils::remove_task_datetime(wtxn, self.enqueued_at, task.enqueued_at, task.uid)?; + if let Some(started_at) = task.started_at { + utils::remove_task_datetime(wtxn, self.started_at, started_at, task.uid)?; + } + if let Some(finished_at) = task.finished_at { + utils::remove_task_datetime(wtxn, self.finished_at, finished_at, task.uid)?; + } + } + + for index in affected_indexes { + self.update_index(wtxn, &index, |bitmap| *bitmap -= &to_delete_tasks)?; + } + + for status in affected_statuses { + self.update_status(wtxn, status, |bitmap| *bitmap -= &to_delete_tasks)?; + } + + for kind in affected_kinds { + self.update_kind(wtxn, kind, |bitmap| *bitmap -= &to_delete_tasks)?; + } + + for task in to_delete_tasks.iter() { + self.all_tasks.delete(wtxn, &BEU32::new(task))?; + } + + Ok(to_delete_tasks.len()) + } + + /// Cancel each given task from all the databases (if it is cancelable). + /// + /// Returns the content files that the transaction owner must delete if the commit is successful. + fn cancel_matched_tasks( + &self, + wtxn: &mut RwTxn, + cancel_task_id: TaskId, + matched_tasks: &RoaringBitmap, + ) -> Result> { + let now = OffsetDateTime::now_utc(); + + // 1. Remove from this list the tasks that we are not allowed to cancel + // Notice that only the _enqueued_ ones are cancelable and we should + // have already aborted the indexation of the _processing_ ones + let cancelable_tasks = self.get_status(wtxn, Status::Enqueued)?; + let tasks_to_cancel = cancelable_tasks & matched_tasks; + + // 2. We now have a list of tasks to cancel, cancel them + let mut content_files_to_delete = Vec::new(); + for mut task in self.get_existing_tasks(wtxn, tasks_to_cancel.iter())? { + if let Some(uuid) = task.content_uuid() { + content_files_to_delete.push(uuid); + } + task.status = Status::Canceled; + task.canceled_by = Some(cancel_task_id); + task.finished_at = Some(now); + self.update_task(wtxn, &task)?; + } + + Ok(content_files_to_delete) + } +} diff --git a/index-scheduler/src/error.rs b/index-scheduler/src/error.rs new file mode 100644 index 000000000..4e404685d --- /dev/null +++ b/index-scheduler/src/error.rs @@ -0,0 +1,77 @@ +use meilisearch_types::error::{Code, ErrorCode}; +use meilisearch_types::{heed, milli}; +use thiserror::Error; + +use crate::TaskId; + +#[allow(clippy::large_enum_variant)] +#[derive(Error, Debug)] +pub enum Error { + #[error("Index `{0}` not found.")] + IndexNotFound(String), + #[error("Index `{0}` already exists.")] + IndexAlreadyExists(String), + #[error("Corrupted dump.")] + CorruptedDump, + #[error("Task `{0}` not found.")] + TaskNotFound(TaskId), + #[error("Query parameters to filter the tasks to delete are missing. Available query parameters are: `uid`, `indexUid`, `status`, `type`.")] + TaskDeletionWithEmptyQuery, + #[error("Query parameters to filter the tasks to cancel are missing. Available query parameters are: `uid`, `indexUid`, `status`, `type`.")] + TaskCancelationWithEmptyQuery, + + #[error(transparent)] + Dump(#[from] dump::Error), + #[error(transparent)] + Heed(#[from] heed::Error), + #[error(transparent)] + Milli(#[from] milli::Error), + #[error("An unexpected crash occurred when processing the task.")] + ProcessBatchPanicked, + #[error(transparent)] + FileStore(#[from] file_store::Error), + #[error(transparent)] + IoError(#[from] std::io::Error), + #[error(transparent)] + Persist(#[from] tempfile::PersistError), + + #[error(transparent)] + Anyhow(#[from] anyhow::Error), + + // Irrecoverable errors: + #[error(transparent)] + CreateBatch(Box), + #[error("Corrupted task queue.")] + CorruptedTaskQueue, + #[error(transparent)] + TaskDatabaseUpdate(Box), + #[error(transparent)] + HeedTransaction(heed::Error), +} + +impl ErrorCode for Error { + fn error_code(&self) -> Code { + match self { + Error::IndexNotFound(_) => Code::IndexNotFound, + Error::IndexAlreadyExists(_) => Code::IndexAlreadyExists, + Error::TaskNotFound(_) => Code::TaskNotFound, + Error::TaskDeletionWithEmptyQuery => Code::TaskDeletionWithEmptyQuery, + Error::TaskCancelationWithEmptyQuery => Code::TaskCancelationWithEmptyQuery, + + Error::Dump(e) => e.error_code(), + Error::Milli(e) => e.error_code(), + Error::ProcessBatchPanicked => Code::Internal, + // TODO: TAMO: are all these errors really internal? + Error::Heed(_) => Code::Internal, + Error::FileStore(_) => Code::Internal, + Error::IoError(_) => Code::Internal, + Error::Persist(_) => Code::Internal, + Error::Anyhow(_) => Code::Internal, + Error::CorruptedTaskQueue => Code::Internal, + Error::CorruptedDump => Code::Internal, + Error::TaskDatabaseUpdate(_) => Code::Internal, + Error::CreateBatch(_) => Code::Internal, + Error::HeedTransaction(_) => Code::Internal, + } + } +} diff --git a/index-scheduler/src/index_mapper.rs b/index-scheduler/src/index_mapper.rs new file mode 100644 index 000000000..80e4127c0 --- /dev/null +++ b/index-scheduler/src/index_mapper.rs @@ -0,0 +1,230 @@ +use std::collections::hash_map::Entry; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, RwLock}; +use std::{fs, thread}; + +use log::error; +use meilisearch_types::heed::types::{SerdeBincode, Str}; +use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn}; +use meilisearch_types::milli::update::IndexerConfig; +use meilisearch_types::milli::Index; +use uuid::Uuid; + +use self::IndexStatus::{Available, BeingDeleted}; +use crate::{Error, Result}; + +const INDEX_MAPPING: &str = "index-mapping"; + +/// Structure managing meilisearch's indexes. +/// +/// It is responsible for: +/// 1. Creating new indexes +/// 2. Opening indexes and storing references to these opened indexes +/// 3. Accessing indexes through their uuid +/// 4. Mapping a user-defined name to each index uuid. +#[derive(Clone)] +pub struct IndexMapper { + /// Keep track of the opened indexes. Used mainly by the index resolver. + index_map: Arc>>, + + // TODO create a UUID Codec that uses the 16 bytes representation + /// Map an index name with an index uuid currently available on disk. + pub(crate) index_mapping: Database>, + + /// Path to the folder where the LMDB environments of each index are. + base_path: PathBuf, + index_size: usize, + pub indexer_config: Arc, +} + +/// Whether the index is available for use or is forbidden to be inserted back in the index map +#[allow(clippy::large_enum_variant)] +#[derive(Clone)] +pub enum IndexStatus { + /// Do not insert it back in the index map as it is currently being deleted. + BeingDeleted, + /// You can use the index without worrying about anything. + Available(Index), +} + +impl IndexMapper { + pub fn new( + env: &Env, + base_path: PathBuf, + index_size: usize, + indexer_config: IndexerConfig, + ) -> Result { + Ok(Self { + index_map: Arc::default(), + index_mapping: env.create_database(Some(INDEX_MAPPING))?, + base_path, + index_size, + indexer_config: Arc::new(indexer_config), + }) + } + + /// Create or open an index in the specified path. + /// The path *must* exists or an error will be thrown. + fn create_or_open_index(&self, path: &Path) -> Result { + let mut options = EnvOpenOptions::new(); + options.map_size(self.index_size); + options.max_readers(1024); + Ok(Index::new(options, path)?) + } + + /// Get or create the index. + pub fn create_index(&self, mut wtxn: RwTxn, name: &str) -> Result { + match self.index(&wtxn, name) { + Ok(index) => { + wtxn.commit()?; + Ok(index) + } + Err(Error::IndexNotFound(_)) => { + let uuid = Uuid::new_v4(); + self.index_mapping.put(&mut wtxn, name, &uuid)?; + + let index_path = self.base_path.join(uuid.to_string()); + fs::create_dir_all(&index_path)?; + let index = self.create_or_open_index(&index_path)?; + + wtxn.commit()?; + // TODO: it would be better to lazily create the index. But we need an Index::open function for milli. + if let Some(BeingDeleted) = + self.index_map.write().unwrap().insert(uuid, Available(index.clone())) + { + panic!("Uuid v4 conflict."); + } + + Ok(index) + } + error => error, + } + } + + /// Removes the index from the mapping table and the in-memory index map + /// but keeps the associated tasks. + pub fn delete_index(&self, mut wtxn: RwTxn, name: &str) -> Result<()> { + let uuid = self + .index_mapping + .get(&wtxn, name)? + .ok_or_else(|| Error::IndexNotFound(name.to_string()))?; + + // Once we retrieved the UUID of the index we remove it from the mapping table. + assert!(self.index_mapping.delete(&mut wtxn, name)?); + + wtxn.commit()?; + // We remove the index from the in-memory index map. + let mut lock = self.index_map.write().unwrap(); + let closing_event = match lock.insert(uuid, BeingDeleted) { + Some(Available(index)) => Some(index.prepare_for_closing()), + _ => None, + }; + + drop(lock); + + let index_map = self.index_map.clone(); + let index_path = self.base_path.join(uuid.to_string()); + let index_name = name.to_string(); + thread::spawn(move || { + // We first wait to be sure that the previously opened index is effectively closed. + // This can take a lot of time, this is why we do that in a seperate thread. + if let Some(closing_event) = closing_event { + closing_event.wait(); + } + + // Then we remove the content from disk. + if let Err(e) = fs::remove_dir_all(&index_path) { + error!( + "An error happened when deleting the index {} ({}): {}", + index_name, uuid, e + ); + } + + // Finally we remove the entry from the index map. + assert!(matches!(index_map.write().unwrap().remove(&uuid), Some(BeingDeleted))); + }); + + Ok(()) + } + + pub fn exists(&self, rtxn: &RoTxn, name: &str) -> Result { + Ok(self.index_mapping.get(rtxn, name)?.is_some()) + } + + /// Return an index, may open it if it wasn't already opened. + pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result { + let uuid = self + .index_mapping + .get(rtxn, name)? + .ok_or_else(|| Error::IndexNotFound(name.to_string()))?; + + // we clone here to drop the lock before entering the match + let index = self.index_map.read().unwrap().get(&uuid).cloned(); + let index = match index { + Some(Available(index)) => index, + Some(BeingDeleted) => return Err(Error::IndexNotFound(name.to_string())), + // since we're lazy, it's possible that the index has not been opened yet. + None => { + let mut index_map = self.index_map.write().unwrap(); + // between the read lock and the write lock it's not impossible + // that someone already opened the index (eg if two search happens + // at the same time), thus before opening it we check a second time + // if it's not already there. + // Since there is a good chance it's not already there we can use + // the entry method. + match index_map.entry(uuid) { + Entry::Vacant(entry) => { + let index_path = self.base_path.join(uuid.to_string()); + let index = self.create_or_open_index(&index_path)?; + entry.insert(Available(index.clone())); + index + } + Entry::Occupied(entry) => match entry.get() { + Available(index) => index.clone(), + BeingDeleted => return Err(Error::IndexNotFound(name.to_string())), + }, + } + } + }; + + Ok(index) + } + + /// Return all indexes, may open them if they weren't already opened. + pub fn indexes(&self, rtxn: &RoTxn) -> Result> { + self.index_mapping + .iter(rtxn)? + .map(|ret| { + ret.map_err(Error::from).and_then(|(name, _)| { + self.index(rtxn, name).map(|index| (name.to_string(), index)) + }) + }) + .collect() + } + + /// Swap two index names. + pub fn swap(&self, wtxn: &mut RwTxn, lhs: &str, rhs: &str) -> Result<()> { + let lhs_uuid = self + .index_mapping + .get(wtxn, lhs)? + .ok_or_else(|| Error::IndexNotFound(lhs.to_string()))?; + let rhs_uuid = self + .index_mapping + .get(wtxn, rhs)? + .ok_or_else(|| Error::IndexNotFound(rhs.to_string()))?; + + self.index_mapping.put(wtxn, lhs, &rhs_uuid)?; + self.index_mapping.put(wtxn, rhs, &lhs_uuid)?; + + Ok(()) + } + + pub fn index_exists(&self, rtxn: &RoTxn, name: &str) -> Result { + Ok(self.index_mapping.get(rtxn, name)?.is_some()) + } + + pub fn indexer_config(&self) -> &IndexerConfig { + &self.indexer_config + } +} diff --git a/index-scheduler/src/insta_snapshot.rs b/index-scheduler/src/insta_snapshot.rs new file mode 100644 index 000000000..50846c555 --- /dev/null +++ b/index-scheduler/src/insta_snapshot.rs @@ -0,0 +1,238 @@ +use std::fmt::Write; + +use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str}; +use meilisearch_types::heed::{Database, RoTxn}; +use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; +use meilisearch_types::tasks::{Details, Task}; +use roaring::RoaringBitmap; + +use crate::index_mapper::IndexMapper; +use crate::{IndexScheduler, Kind, Status, BEI128}; + +pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { + let IndexScheduler { + autobatching_enabled, + must_stop_processing: _, + processing_tasks, + file_store, + env, + all_tasks, + status, + kind, + index_tasks, + enqueued_at, + started_at, + finished_at, + index_mapper, + wake_up: _, + dumps_path: _, + snapshots_path: _, + auth_path: _, + version_file_path: _, + test_breakpoint_sdr: _, + planned_failures: _, + run_loop_iteration: _, + } = scheduler; + + let rtxn = env.read_txn().unwrap(); + + let mut snap = String::new(); + + let processing_tasks = processing_tasks.read().unwrap().processing.clone(); + snap.push_str(&format!("### Autobatching Enabled = {autobatching_enabled}\n")); + snap.push_str("### Processing Tasks:\n"); + snap.push_str(&snapshot_bitmap(&processing_tasks)); + snap.push_str("\n----------------------------------------------------------------------\n"); + + snap.push_str("### All Tasks:\n"); + snap.push_str(&snapshot_all_tasks(&rtxn, *all_tasks)); + snap.push_str("----------------------------------------------------------------------\n"); + + snap.push_str("### Status:\n"); + snap.push_str(&snapshot_status(&rtxn, *status)); + snap.push_str("----------------------------------------------------------------------\n"); + + snap.push_str("### Kind:\n"); + snap.push_str(&snapshot_kind(&rtxn, *kind)); + snap.push_str("----------------------------------------------------------------------\n"); + + snap.push_str("### Index Tasks:\n"); + snap.push_str(&snapshot_index_tasks(&rtxn, *index_tasks)); + snap.push_str("----------------------------------------------------------------------\n"); + + snap.push_str("### Index Mapper:\n"); + snap.push_str(&snapshot_index_mapper(&rtxn, index_mapper)); + snap.push_str("\n----------------------------------------------------------------------\n"); + + snap.push_str("### Enqueued At:\n"); + snap.push_str(&snapshot_date_db(&rtxn, *enqueued_at)); + snap.push_str("----------------------------------------------------------------------\n"); + + snap.push_str("### Started At:\n"); + snap.push_str(&snapshot_date_db(&rtxn, *started_at)); + snap.push_str("----------------------------------------------------------------------\n"); + + snap.push_str("### Finished At:\n"); + snap.push_str(&snapshot_date_db(&rtxn, *finished_at)); + snap.push_str("----------------------------------------------------------------------\n"); + + snap.push_str("### File Store:\n"); + snap.push_str(&snapshot_file_store(file_store)); + snap.push_str("\n----------------------------------------------------------------------\n"); + + snap +} + +pub fn snapshot_file_store(file_store: &file_store::FileStore) -> String { + let mut snap = String::new(); + for uuid in file_store.__all_uuids() { + snap.push_str(&format!("{uuid}\n")); + } + snap +} + +pub fn snapshot_bitmap(r: &RoaringBitmap) -> String { + let mut snap = String::new(); + snap.push('['); + for x in r { + snap.push_str(&format!("{x},")); + } + snap.push(']'); + snap +} + +pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database, SerdeJson>) -> String { + let mut snap = String::new(); + let iter = db.iter(rtxn).unwrap(); + for next in iter { + let (task_id, task) = next.unwrap(); + snap.push_str(&format!("{task_id} {}\n", snapshot_task(&task))); + } + snap +} + +pub fn snapshot_date_db( + rtxn: &RoTxn, + db: Database, CboRoaringBitmapCodec>, +) -> String { + let mut snap = String::new(); + let iter = db.iter(rtxn).unwrap(); + for next in iter { + let (_timestamp, task_ids) = next.unwrap(); + snap.push_str(&format!("[timestamp] {}\n", snapshot_bitmap(&task_ids))); + } + snap +} + +pub fn snapshot_task(task: &Task) -> String { + let mut snap = String::new(); + let Task { + uid, + enqueued_at: _, + started_at: _, + finished_at: _, + error, + canceled_by, + details, + status, + kind, + } = task; + snap.push('{'); + snap.push_str(&format!("uid: {uid}, ")); + snap.push_str(&format!("status: {status}, ")); + if let Some(canceled_by) = canceled_by { + snap.push_str(&format!("canceled_by: {canceled_by}, ")); + } + if let Some(error) = error { + snap.push_str(&format!("error: {error:?}, ")); + } + if let Some(details) = details { + snap.push_str(&format!("details: {}, ", &snapshot_details(details))); + } + snap.push_str(&format!("kind: {kind:?}")); + + snap.push('}'); + snap +} + +fn snapshot_details(d: &Details) -> String { + match d { + Details::DocumentAdditionOrUpdate { + received_documents, + indexed_documents, + } => { + format!("{{ received_documents: {received_documents}, indexed_documents: {indexed_documents:?} }}") + } + Details::SettingsUpdate { settings } => { + format!("{{ settings: {settings:?} }}") + } + Details::IndexInfo { primary_key } => { + format!("{{ primary_key: {primary_key:?} }}") + } + Details::DocumentDeletion { + matched_documents: received_document_ids, + deleted_documents, + } => format!("{{ received_document_ids: {received_document_ids}, deleted_documents: {deleted_documents:?} }}"), + Details::ClearAll { deleted_documents } => { + format!("{{ deleted_documents: {deleted_documents:?} }}") + }, + Details::TaskCancelation { + matched_tasks, + canceled_tasks, + original_query, + } => { + format!("{{ matched_tasks: {matched_tasks:?}, canceled_tasks: {canceled_tasks:?}, original_query: {original_query:?} }}") + } + Details::TaskDeletion { + matched_tasks, + deleted_tasks, + original_query, + } => { + format!("{{ matched_tasks: {matched_tasks:?}, deleted_tasks: {deleted_tasks:?}, original_query: {original_query:?} }}") + }, + Details::Dump { dump_uid } => { + format!("{{ dump_uid: {dump_uid:?} }}") + }, + Details::IndexSwap { swaps } => { + format!("{{ swaps: {swaps:?} }}") + } + } +} + +pub fn snapshot_status( + rtxn: &RoTxn, + db: Database, RoaringBitmapCodec>, +) -> String { + let mut snap = String::new(); + let iter = db.iter(rtxn).unwrap(); + for next in iter { + let (status, task_ids) = next.unwrap(); + writeln!(snap, "{status} {}", snapshot_bitmap(&task_ids)).unwrap(); + } + snap +} +pub fn snapshot_kind(rtxn: &RoTxn, db: Database, RoaringBitmapCodec>) -> String { + let mut snap = String::new(); + let iter = db.iter(rtxn).unwrap(); + for next in iter { + let (kind, task_ids) = next.unwrap(); + let kind = serde_json::to_string(&kind).unwrap(); + writeln!(snap, "{kind} {}", snapshot_bitmap(&task_ids)).unwrap(); + } + snap +} + +pub fn snapshot_index_tasks(rtxn: &RoTxn, db: Database) -> String { + let mut snap = String::new(); + let iter = db.iter(rtxn).unwrap(); + for next in iter { + let (index, task_ids) = next.unwrap(); + writeln!(snap, "{index} {}", snapshot_bitmap(&task_ids)).unwrap(); + } + snap +} + +pub fn snapshot_index_mapper(rtxn: &RoTxn, mapper: &IndexMapper) -> String { + let names = mapper.indexes(rtxn).unwrap().into_iter().map(|(n, _)| n).collect::>(); + format!("{names:?}") +} diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs new file mode 100644 index 000000000..b60ea8718 --- /dev/null +++ b/index-scheduler/src/lib.rs @@ -0,0 +1,2772 @@ +/*! +This crate defines the index scheduler, which is responsible for: +1. Keeping references to meilisearch's indexes and mapping them to their +user-defined names. +2. Scheduling tasks given by the user and executing them, in batch if possible. + +When an `IndexScheduler` is created, a new thread containing a reference to the +scheduler is created. This thread runs the scheduler's run loop, where the +scheduler waits to be woken up to process new tasks. It wakes up when: + +1. it is launched for the first time +2. a new task is registered +3. a batch of tasks has been processed + +It is only within this thread that the scheduler is allowed to process tasks. +On the other hand, the publicly accessible methods of the scheduler can be +called asynchronously from any thread. These methods can either query the +content of the scheduler or enqueue new tasks. +*/ + +mod autobatcher; +mod batch; +pub mod error; +mod index_mapper; +#[cfg(test)] +mod insta_snapshot; +mod utils; + +pub type Result = std::result::Result; +pub type TaskId = u32; + +use std::ops::{Bound, RangeBounds}; +use std::path::PathBuf; +use std::sync::atomic::AtomicBool; +use std::sync::atomic::Ordering::Relaxed; +use std::sync::{Arc, RwLock}; +use std::time::Duration; + +use dump::{KindDump, TaskDump, UpdateFile}; +pub use error::Error; +use file_store::FileStore; +use meilisearch_types::error::ResponseError; +use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str}; +use meilisearch_types::heed::{self, Database, Env}; +use meilisearch_types::milli; +use meilisearch_types::milli::documents::DocumentsBatchBuilder; +use meilisearch_types::milli::update::IndexerConfig; +use meilisearch_types::milli::{CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32}; +use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; +use roaring::RoaringBitmap; +use synchronoise::SignalEvent; +use time::OffsetDateTime; +use utils::{filter_out_references_to_newer_tasks, keep_tasks_within_datetimes, map_bound}; +use uuid::Uuid; + +use crate::index_mapper::IndexMapper; + +pub(crate) type BEI128 = + meilisearch_types::heed::zerocopy::I128; + +/// Defines a subset of tasks to be retrieved from the [`IndexScheduler`]. +/// +/// An empty/default query (where each field is set to `None`) matches all tasks. +/// Each non-null field restricts the set of tasks further. +#[derive(Default, Debug, Clone, PartialEq, Eq)] +pub struct Query { + /// The maximum number of tasks to be matched + pub limit: Option, + /// The minimum [task id](`meilisearch_types::tasks::Task::uid`) to be matched + pub from: Option, + /// The allowed [statuses](`meilisearch_types::tasks::Task::status`) of the matched tasls + pub status: Option>, + /// The allowed [kinds](meilisearch_types::tasks::Kind) of the matched tasks. + /// + /// The kind of a task is given by: + /// ``` + /// # use meilisearch_types::tasks::{Task, Kind}; + /// # fn doc_func(task: Task) -> Kind { + /// task.kind.as_kind() + /// # } + /// ``` + pub kind: Option>, + /// The allowed [index ids](meilisearch_types::tasks::Task::index_uid) of the matched tasks + pub index_uid: Option>, + /// The [task ids](`meilisearch_types::tasks::Task::uid`) to be matched + pub uid: Option>, + + /// Exclusive upper bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field. + pub before_enqueued_at: Option, + /// Exclusive lower bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field. + pub after_enqueued_at: Option, + /// Exclusive upper bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field. + pub before_started_at: Option, + /// Exclusive lower bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field. + pub after_started_at: Option, + /// Exclusive upper bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field. + pub before_finished_at: Option, + /// Exclusive lower bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field. + pub after_finished_at: Option, +} + +impl Query { + /// Return `true` iff every field of the query is set to `None`, such that the query + /// matches all tasks. + pub fn is_empty(&self) -> bool { + matches!( + self, + Query { + limit: None, + from: None, + status: None, + kind: None, + index_uid: None, + uid: None, + before_enqueued_at: None, + after_enqueued_at: None, + before_started_at: None, + after_started_at: None, + before_finished_at: None, + after_finished_at: None, + } + ) + } + + /// Add an [index id](meilisearch_types::tasks::Task::index_uid) to the list of permitted indexes. + pub fn with_index(self, index_uid: String) -> Self { + let mut index_vec = self.index_uid.unwrap_or_default(); + index_vec.push(index_uid); + Self { index_uid: Some(index_vec), ..self } + } +} + +#[derive(Debug, Clone)] +struct ProcessingTasks { + /// The date and time at which the indexation started. + started_at: OffsetDateTime, + /// The list of tasks ids that are currently running. + processing: RoaringBitmap, +} + +impl ProcessingTasks { + /// Creates an empty `ProcessingAt` struct. + fn new() -> ProcessingTasks { + ProcessingTasks { started_at: OffsetDateTime::now_utc(), processing: RoaringBitmap::new() } + } + + /// Stores the currently processing tasks, and the date time at which it started. + fn start_processing_at(&mut self, started_at: OffsetDateTime, processing: RoaringBitmap) { + self.started_at = started_at; + self.processing = processing; + } + + /// Set the processing tasks to an empty list. + fn stop_processing_at(&mut self, stopped_at: OffsetDateTime) { + self.started_at = stopped_at; + self.processing = RoaringBitmap::new(); + } + + /// Returns `true` if there, at least, is one task that is currently processing we must stop. + fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool { + !self.processing.is_disjoint(canceled_tasks) + } +} + +#[derive(Default, Clone, Debug)] +struct MustStopProcessing(Arc); + +impl MustStopProcessing { + fn get(&self) -> bool { + self.0.load(Relaxed) + } + + fn must_stop(&self) { + self.0.store(true, Relaxed); + } + + fn reset(&self) { + self.0.store(false, Relaxed); + } +} + +/// Database const names for the `IndexScheduler`. +mod db_name { + pub const ALL_TASKS: &str = "all-tasks"; + pub const STATUS: &str = "status"; + pub const KIND: &str = "kind"; + pub const INDEX_TASKS: &str = "index-tasks"; + pub const ENQUEUED_AT: &str = "enqueued-at"; + pub const STARTED_AT: &str = "started-at"; + pub const FINISHED_AT: &str = "finished-at"; +} + +#[cfg(test)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Breakpoint { + Start, + BatchCreated, + BeforeProcessing, + AfterProcessing, + AbortedIndexation, + ProcessBatchSucceeded, + ProcessBatchFailed, + InsideProcessBatch, +} + +#[derive(Debug)] +pub struct IndexSchedulerOptions { + /// The path to the version file of Meilisearch. + pub version_file_path: PathBuf, + /// The path to the folder containing the auth LMDB env. + pub auth_path: PathBuf, + /// The path to the folder containing the task databases. + pub tasks_path: PathBuf, + /// The path to the file store containing the files associated to the tasks. + pub update_file_path: PathBuf, + /// The path to the folder containing meilisearch's indexes. + pub indexes_path: PathBuf, + /// The path to the folder containing the snapshots. + pub snapshots_path: PathBuf, + /// The path to the folder containing the dumps. + pub dumps_path: PathBuf, + /// The maximum size, in bytes, of each meilisearch index. + pub task_db_size: usize, + /// The maximum size, in bytes, of the tasks index. + pub index_size: usize, + /// Configuration used during indexing for each meilisearch index. + pub indexer_config: IndexerConfig, + /// Set to `true` iff the index scheduler is allowed to automatically + /// batch tasks together, to process multiple tasks at once. + pub autobatching_enabled: bool, +} + +/// Structure which holds meilisearch's indexes and schedules the tasks +/// to be performed on them. +pub struct IndexScheduler { + /// The LMDB environment which the DBs are associated with. + pub(crate) env: Env, + + /// A boolean that can be set to true to stop the currently processing tasks. + pub(crate) must_stop_processing: MustStopProcessing, + + /// The list of tasks currently processing + pub(crate) processing_tasks: Arc>, + + /// The list of files referenced by the tasks + pub(crate) file_store: FileStore, + + // The main database, it contains all the tasks accessible by their Id. + pub(crate) all_tasks: Database, SerdeJson>, + + /// All the tasks ids grouped by their status. + // TODO we should not be able to serialize a `Status::Processing` in this database. + pub(crate) status: Database, RoaringBitmapCodec>, + /// All the tasks ids grouped by their kind. + pub(crate) kind: Database, RoaringBitmapCodec>, + /// Store the tasks associated to an index. + pub(crate) index_tasks: Database, + + /// Store the task ids of tasks which were enqueued at a specific date + pub(crate) enqueued_at: Database, CboRoaringBitmapCodec>, + + /// Store the task ids of finished tasks which started being processed at a specific date + pub(crate) started_at: Database, CboRoaringBitmapCodec>, + + /// Store the task ids of tasks which finished at a specific date + pub(crate) finished_at: Database, CboRoaringBitmapCodec>, + + /// In charge of creating, opening, storing and returning indexes. + pub(crate) index_mapper: IndexMapper, + + /// Get a signal when a batch needs to be processed. + pub(crate) wake_up: Arc, + + /// Whether auto-batching is enabled or not. + pub(crate) autobatching_enabled: bool, + + /// The path used to create the dumps. + pub(crate) dumps_path: PathBuf, + + /// The path used to create the snapshots. + pub(crate) snapshots_path: PathBuf, + + /// The path to the folder containing the auth LMDB env. + pub(crate) auth_path: PathBuf, + + /// The path to the version file of Meilisearch. + pub(crate) version_file_path: PathBuf, + + // ================= test + // The next entry is dedicated to the tests. + /// Provide a way to set a breakpoint in multiple part of the scheduler. + /// + /// See [self.breakpoint()](`IndexScheduler::breakpoint`) for an explanation. + #[cfg(test)] + test_breakpoint_sdr: crossbeam::channel::Sender<(Breakpoint, bool)>, + + /// A list of planned failures within the [`tick`](IndexScheduler::tick) method of the index scheduler. + /// + /// The first field is the iteration index and the second field identifies a location in the code. + #[cfg(test)] + planned_failures: Vec<(usize, tests::FailureLocation)>, + + /// A counter that is incremented before every call to [`tick`](IndexScheduler::tick) + #[cfg(test)] + run_loop_iteration: Arc>, +} + +impl IndexScheduler { + fn private_clone(&self) -> IndexScheduler { + IndexScheduler { + env: self.env.clone(), + must_stop_processing: self.must_stop_processing.clone(), + processing_tasks: self.processing_tasks.clone(), + file_store: self.file_store.clone(), + all_tasks: self.all_tasks, + status: self.status, + kind: self.kind, + index_tasks: self.index_tasks, + enqueued_at: self.enqueued_at, + started_at: self.started_at, + finished_at: self.finished_at, + index_mapper: self.index_mapper.clone(), + wake_up: self.wake_up.clone(), + autobatching_enabled: self.autobatching_enabled, + snapshots_path: self.snapshots_path.clone(), + dumps_path: self.dumps_path.clone(), + auth_path: self.auth_path.clone(), + version_file_path: self.version_file_path.clone(), + #[cfg(test)] + test_breakpoint_sdr: self.test_breakpoint_sdr.clone(), + #[cfg(test)] + planned_failures: self.planned_failures.clone(), + #[cfg(test)] + run_loop_iteration: self.run_loop_iteration.clone(), + } + } +} + +impl IndexScheduler { + /// Create an index scheduler and start its run loop. + pub fn new( + options: IndexSchedulerOptions, + #[cfg(test)] test_breakpoint_sdr: crossbeam::channel::Sender<(Breakpoint, bool)>, + #[cfg(test)] planned_failures: Vec<(usize, tests::FailureLocation)>, + ) -> Result { + std::fs::create_dir_all(&options.tasks_path)?; + std::fs::create_dir_all(&options.update_file_path)?; + std::fs::create_dir_all(&options.indexes_path)?; + std::fs::create_dir_all(&options.dumps_path)?; + + let env = heed::EnvOpenOptions::new() + .max_dbs(9) + .map_size(options.task_db_size) + .open(options.tasks_path)?; + let file_store = FileStore::new(&options.update_file_path)?; + + // allow unreachable_code to get rids of the warning in the case of a test build. + let this = Self { + must_stop_processing: MustStopProcessing::default(), + processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())), + file_store, + all_tasks: env.create_database(Some(db_name::ALL_TASKS))?, + status: env.create_database(Some(db_name::STATUS))?, + kind: env.create_database(Some(db_name::KIND))?, + index_tasks: env.create_database(Some(db_name::INDEX_TASKS))?, + enqueued_at: env.create_database(Some(db_name::ENQUEUED_AT))?, + started_at: env.create_database(Some(db_name::STARTED_AT))?, + finished_at: env.create_database(Some(db_name::FINISHED_AT))?, + index_mapper: IndexMapper::new( + &env, + options.indexes_path, + options.index_size, + options.indexer_config, + )?, + env, + // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things + wake_up: Arc::new(SignalEvent::auto(true)), + autobatching_enabled: options.autobatching_enabled, + dumps_path: options.dumps_path, + snapshots_path: options.snapshots_path, + auth_path: options.auth_path, + version_file_path: options.version_file_path, + + #[cfg(test)] + test_breakpoint_sdr, + #[cfg(test)] + planned_failures, + #[cfg(test)] + run_loop_iteration: Arc::new(RwLock::new(0)), + }; + + this.run(); + Ok(this) + } + + /// Start the run loop for the given index scheduler. + /// + /// This function will execute in a different thread and must be called + /// only once per index scheduler. + fn run(&self) { + let run = self.private_clone(); + + std::thread::spawn(move || loop { + run.wake_up.wait(); + + match run.tick() { + Ok(0) => (), + Ok(_) => run.wake_up.signal(), + Err(e) => { + log::error!("{}", e); + // Wait one second when an irrecoverable error occurs. + if matches!( + e, + Error::CorruptedTaskQueue + | Error::TaskDatabaseUpdate(_) + | Error::HeedTransaction(_) + | Error::CreateBatch(_) + ) { + std::thread::sleep(Duration::from_secs(1)); + } + } + } + }); + } + + pub fn indexer_config(&self) -> &IndexerConfig { + &self.index_mapper.indexer_config + } + + /// Return the index corresponding to the name. + /// + /// * If the index wasn't opened before, the index will be opened. + /// * If the index doesn't exist on disk, the `IndexNotFoundError` is thrown. + pub fn index(&self, name: &str) -> Result { + let rtxn = self.env.read_txn()?; + self.index_mapper.index(&rtxn, name) + } + + /// Return and open all the indexes. + pub fn indexes(&self) -> Result> { + let rtxn = self.env.read_txn()?; + self.index_mapper.indexes(&rtxn) + } + + /// Return the task ids matched by the given query. + pub fn get_task_ids(&self, query: &Query) -> Result { + let rtxn = self.env.read_txn()?; + + let ProcessingTasks { started_at: started_at_processing, processing: processing_tasks } = + self.processing_tasks.read().unwrap().clone(); + + let mut tasks = self.all_task_ids(&rtxn)?; + + if let Some(from) = &query.from { + tasks.remove_range(from.saturating_add(1)..); + } + + if let Some(status) = &query.status { + let mut status_tasks = RoaringBitmap::new(); + for status in status { + match status { + // special case for Processing tasks + Status::Processing => { + status_tasks |= &processing_tasks; + } + status => status_tasks |= &self.get_status(&rtxn, *status)?, + }; + } + if !status.contains(&Status::Processing) { + tasks -= &processing_tasks; + } + tasks &= status_tasks; + } + + if let Some(uids) = &query.uid { + let uids = RoaringBitmap::from_iter(uids); + tasks &= &uids; + } + + if let Some(kind) = &query.kind { + let mut kind_tasks = RoaringBitmap::new(); + for kind in kind { + kind_tasks |= self.get_kind(&rtxn, *kind)?; + } + tasks &= &kind_tasks; + } + + if let Some(index) = &query.index_uid { + let mut index_tasks = RoaringBitmap::new(); + for index in index { + index_tasks |= self.index_tasks(&rtxn, index)?; + } + tasks &= &index_tasks; + } + + // For the started_at filter, we need to treat the part of the tasks that are processing from the part of the + // tasks that are not processing. The non-processing ones are filtered normally while the processing ones + // are entirely removed unless the in-memory startedAt variable falls within the date filter. + // Once we have filtered the two subsets, we put them back together and assign it back to `tasks`. + tasks = { + let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) = + (&tasks - &processing_tasks, &tasks & &processing_tasks); + + // special case for Processing tasks + // A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds + let mut clear_filtered_processing_tasks = + |start: Bound, end: Bound| { + let start = map_bound(start, |b| b.unix_timestamp_nanos()); + let end = map_bound(end, |b| b.unix_timestamp_nanos()); + let is_within_dates = RangeBounds::contains( + &(start, end), + &started_at_processing.unix_timestamp_nanos(), + ); + if !is_within_dates { + filtered_processing_tasks.clear(); + } + }; + match (query.after_started_at, query.before_started_at) { + (None, None) => (), + (None, Some(before)) => { + clear_filtered_processing_tasks(Bound::Unbounded, Bound::Excluded(before)) + } + (Some(after), None) => { + clear_filtered_processing_tasks(Bound::Excluded(after), Bound::Unbounded) + } + (Some(after), Some(before)) => { + clear_filtered_processing_tasks(Bound::Excluded(after), Bound::Excluded(before)) + } + }; + + keep_tasks_within_datetimes( + &rtxn, + &mut filtered_non_processing_tasks, + self.started_at, + query.after_started_at, + query.before_started_at, + )?; + filtered_non_processing_tasks | filtered_processing_tasks + }; + + keep_tasks_within_datetimes( + &rtxn, + &mut tasks, + self.enqueued_at, + query.after_enqueued_at, + query.before_enqueued_at, + )?; + + keep_tasks_within_datetimes( + &rtxn, + &mut tasks, + self.finished_at, + query.after_finished_at, + query.before_finished_at, + )?; + + if let Some(limit) = query.limit { + tasks = tasks.into_iter().rev().take(limit as usize).collect(); + } + + Ok(tasks) + } + + /// Returns the tasks matched by the given query. + pub fn get_tasks(&self, query: Query) -> Result> { + let tasks = self.get_task_ids(&query)?; + let rtxn = self.env.read_txn()?; + + let tasks = self.get_existing_tasks( + &rtxn, + tasks.into_iter().rev().take(query.limit.unwrap_or(u32::MAX) as usize), + )?; + + let ProcessingTasks { started_at, processing, .. } = + self.processing_tasks.read().map_err(|_| Error::CorruptedTaskQueue)?.clone(); + + let ret = tasks.into_iter(); + if processing.is_empty() { + Ok(ret.collect()) + } else { + Ok(ret + .map(|task| match processing.contains(task.uid) { + true => { + Task { status: Status::Processing, started_at: Some(started_at), ..task } + } + false => task, + }) + .collect()) + } + } + + /// Register a new task in the scheduler. + /// + /// If it fails and data was associated with the task, it tries to delete the associated data. + pub fn register(&self, kind: KindWithContent) -> Result { + let mut wtxn = self.env.write_txn()?; + + let mut task = Task { + uid: self.next_task_id(&wtxn)?, + enqueued_at: time::OffsetDateTime::now_utc(), + started_at: None, + finished_at: None, + error: None, + canceled_by: None, + details: kind.default_details(), + status: Status::Enqueued, + kind: kind.clone(), + }; + // For deletion and cancelation tasks, we want to make extra sure that they + // don't attempt to delete/cancel tasks that are newer than themselves. + filter_out_references_to_newer_tasks(&mut task); + // Get rid of the mutability. + let task = task; + + self.all_tasks.append(&mut wtxn, &BEU32::new(task.uid), &task)?; + + for index in task.indexes() { + self.update_index(&mut wtxn, index, |bitmap| { + bitmap.insert(task.uid); + })?; + } + + self.update_status(&mut wtxn, Status::Enqueued, |bitmap| { + bitmap.insert(task.uid); + })?; + + self.update_kind(&mut wtxn, task.kind.as_kind(), |bitmap| { + bitmap.insert(task.uid); + })?; + + utils::insert_task_datetime(&mut wtxn, self.enqueued_at, task.enqueued_at, task.uid)?; + + if let Err(e) = wtxn.commit() { + self.delete_persisted_task_data(&task)?; + return Err(e.into()); + } + + // If the registered task is a task cancelation + // we inform the processing tasks to stop (if necessary). + if let KindWithContent::TaskCancelation { tasks, .. } = kind { + let tasks_to_cancel = RoaringBitmap::from_iter(tasks); + if self.processing_tasks.read().unwrap().must_cancel_processing_tasks(&tasks_to_cancel) + { + self.must_stop_processing.must_stop(); + } + } + + // notify the scheduler loop to execute a new tick + self.wake_up.signal(); + + Ok(task) + } + + /// Register a new task comming from a dump in the scheduler. + /// By takinig a mutable ref we're pretty sure no one will ever import a dump while actix is running. + pub fn register_dumped_task( + &mut self, + task: TaskDump, + content_file: Option>, + ) -> Result { + // Currently we don't need to access the tasks queue while loading a dump thus I can block everything. + let mut wtxn = self.env.write_txn()?; + + let content_uuid = match content_file { + Some(content_file) if task.status == Status::Enqueued => { + let (uuid, mut file) = self.create_update_file()?; + let mut builder = DocumentsBatchBuilder::new(file.as_file_mut()); + for doc in content_file { + builder.append_json_object(&doc?)?; + } + builder.into_inner()?; + file.persist()?; + + Some(uuid) + } + // If the task isn't `Enqueued` then just generate a recognisable `Uuid` + // in case we try to open it later. + _ if task.status != Status::Enqueued => Some(Uuid::nil()), + _ => None, + }; + + let task = Task { + uid: task.uid, + enqueued_at: task.enqueued_at, + started_at: task.started_at, + finished_at: task.finished_at, + error: task.error, + canceled_by: task.canceled_by, + details: task.details, + status: task.status, + kind: match task.kind { + KindDump::DocumentImport { + primary_key, + method, + documents_count, + allow_index_creation, + } => KindWithContent::DocumentAdditionOrUpdate { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + primary_key, + method, + content_file: content_uuid.ok_or(Error::CorruptedDump)?, + documents_count, + allow_index_creation, + }, + KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion { + documents_ids, + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + }, + KindDump::DocumentClear => KindWithContent::DocumentClear { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + }, + KindDump::Settings { settings, is_deletion, allow_index_creation } => { + KindWithContent::SettingsUpdate { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + new_settings: settings, + is_deletion, + allow_index_creation, + } + } + KindDump::IndexDeletion => KindWithContent::IndexDeletion { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + }, + KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + primary_key, + }, + KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + primary_key, + }, + KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps }, + KindDump::TaskCancelation { query, tasks } => { + KindWithContent::TaskCancelation { query, tasks } + } + KindDump::TasksDeletion { query, tasks } => { + KindWithContent::TaskDeletion { query, tasks } + } + KindDump::DumpCreation { dump_uid, keys, instance_uid } => { + KindWithContent::DumpCreation { dump_uid, keys, instance_uid } + } + KindDump::SnapshotCreation => KindWithContent::SnapshotCreation, + }, + }; + + self.all_tasks.put(&mut wtxn, &BEU32::new(task.uid), &task)?; + + for index in task.indexes() { + self.update_index(&mut wtxn, index, |bitmap| { + bitmap.insert(task.uid); + })?; + } + + self.update_status(&mut wtxn, task.status, |bitmap| { + bitmap.insert(task.uid); + })?; + + self.update_kind(&mut wtxn, task.kind.as_kind(), |bitmap| { + (bitmap.insert(task.uid)); + })?; + + wtxn.commit()?; + self.wake_up.signal(); + + Ok(task) + } + + /// Create a new index without any associated task. + pub fn create_raw_index(&self, name: &str) -> Result { + let wtxn = self.env.write_txn()?; + let index = self.index_mapper.create_index(wtxn, name)?; + + Ok(index) + } + + /// Create a file and register it in the index scheduler. + /// + /// The returned file and uuid can be used to associate + /// some data to a task. The file will be kept until + /// the task has been fully processed. + pub fn create_update_file(&self) -> Result<(Uuid, file_store::File)> { + Ok(self.file_store.new_update()?) + } + + #[cfg(test)] + pub fn create_update_file_with_uuid(&self, uuid: u128) -> Result<(Uuid, file_store::File)> { + Ok(self.file_store.new_update_with_uuid(uuid)?) + } + + /// Delete a file from the index scheduler. + /// + /// Counterpart to the [`create_update_file`](IndexScheduler::create_update_file) method. + pub fn delete_update_file(&self, uuid: Uuid) -> Result<()> { + Ok(self.file_store.delete(uuid)?) + } + + /// Perform one iteration of the run loop. + /// + /// 1. Find the next batch of tasks to be processed. + /// 2. Update the information of these tasks following the start of their processing. + /// 3. Update the in-memory list of processed tasks accordingly. + /// 4. Process the batch: + /// - perform the actions of each batched task + /// - update the information of each batched task following the end + /// of their processing. + /// 5. Reset the in-memory list of processed tasks. + /// + /// Returns the number of processed tasks. + fn tick(&self) -> Result { + #[cfg(test)] + { + *self.run_loop_iteration.write().unwrap() += 1; + self.breakpoint(Breakpoint::Start); + } + + let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?; + let batch = + match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? { + Some(batch) => batch, + None => return Ok(0), + }; + drop(rtxn); + + // 1. store the starting date with the bitmap of processing tasks. + let mut ids = batch.ids(); + ids.sort_unstable(); + let processed_tasks = ids.len(); + let processing_tasks = RoaringBitmap::from_sorted_iter(ids.iter().copied()).unwrap(); + let started_at = OffsetDateTime::now_utc(); + + // We reset the must_stop flag to be sure that we don't stop processing tasks + self.must_stop_processing.reset(); + self.processing_tasks.write().unwrap().start_processing_at(started_at, processing_tasks); + + #[cfg(test)] + self.breakpoint(Breakpoint::BatchCreated); + + // 2. Process the tasks + let res = { + let cloned_index_scheduler = self.private_clone(); + let handle = std::thread::spawn(move || cloned_index_scheduler.process_batch(batch)); + handle.join().unwrap_or(Err(Error::ProcessBatchPanicked)) + }; + + #[cfg(test)] + self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?; + + let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?; + + let finished_at = OffsetDateTime::now_utc(); + match res { + Ok(tasks) => { + #[cfg(test)] + self.breakpoint(Breakpoint::ProcessBatchSucceeded); + #[allow(unused_variables)] + for (i, mut task) in tasks.into_iter().enumerate() { + task.started_at = Some(started_at); + task.finished_at = Some(finished_at); + + #[cfg(test)] + self.maybe_fail( + tests::FailureLocation::UpdatingTaskAfterProcessBatchSuccess { + task_uid: i as u32, + }, + )?; + + self.update_task(&mut wtxn, &task) + .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; + if let Err(e) = self.delete_persisted_task_data(&task) { + log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); + } + } + log::info!("A batch of tasks was successfully completed."); + } + // If we have an abortion error we must stop the tick here and re-schedule tasks. + Err(Error::Milli(milli::Error::InternalError( + milli::InternalError::AbortedIndexation, + ))) => { + #[cfg(test)] + self.breakpoint(Breakpoint::AbortedIndexation); + wtxn.abort().map_err(Error::HeedTransaction)?; + return Ok(0); + } + // In case of a failure we must get back and patch all the tasks with the error. + Err(err) => { + #[cfg(test)] + self.breakpoint(Breakpoint::ProcessBatchFailed); + let error: ResponseError = err.into(); + for id in ids { + let mut task = self + .get_task(&wtxn, id) + .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))? + .ok_or(Error::CorruptedTaskQueue)?; + task.started_at = Some(started_at); + task.finished_at = Some(finished_at); + task.status = Status::Failed; + task.error = Some(error.clone()); + + #[cfg(test)] + self.maybe_fail(tests::FailureLocation::UpdatingTaskAfterProcessBatchFailure)?; + + if let Err(e) = self.delete_persisted_task_data(&task) { + log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); + } + self.update_task(&mut wtxn, &task) + .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; + } + } + } + + self.processing_tasks.write().unwrap().stop_processing_at(finished_at); + + #[cfg(test)] + self.maybe_fail(tests::FailureLocation::CommittingWtxn)?; + + wtxn.commit().map_err(Error::HeedTransaction)?; + + #[cfg(test)] + self.breakpoint(Breakpoint::AfterProcessing); + + Ok(processed_tasks) + } + + pub(crate) fn delete_persisted_task_data(&self, task: &Task) -> Result<()> { + match task.content_uuid() { + Some(content_file) => self.delete_update_file(content_file), + None => Ok(()), + } + } + + /// Blocks the thread until the test handle asks to progress to/through this breakpoint. + /// + /// Two messages are sent through the channel for each breakpoint. + /// The first message is `(b, false)` and the second message is `(b, true)`. + /// + /// Since the channel has a capacity of zero, the `send` and `recv` calls wait for each other. + /// So when the index scheduler calls `test_breakpoint_sdr.send(b, false)`, it blocks + /// the thread until the test catches up by calling `test_breakpoint_rcv.recv()` enough. + /// From the test side, we call `recv()` repeatedly until we find the message `(breakpoint, false)`. + /// As soon as we find it, the index scheduler is unblocked but then wait again on the call to + /// `test_breakpoint_sdr.send(b, true)`. This message will only be able to send once the + /// test asks to progress to the next `(b2, false)`. + #[cfg(test)] + fn breakpoint(&self, b: Breakpoint) { + // We send two messages. The first one will sync with the call + // to `handle.wait_until(b)`. The second one will block until the + // the next call to `handle.wait_until(..)`. + self.test_breakpoint_sdr.send((b, false)).unwrap(); + // This one will only be able to be sent if the test handle stays alive. + // If it fails, then it means that we have exited the test. + // By crashing with `unwrap`, we kill the run loop. + self.test_breakpoint_sdr.send((b, true)).unwrap(); + } +} + +#[cfg(test)] +mod tests { + use std::time::Instant; + + use big_s::S; + use file_store::File; + use meili_snap::snapshot; + use meilisearch_types::milli::obkv_to_json; + use meilisearch_types::milli::update::IndexDocumentsMethod::{ + ReplaceDocuments, UpdateDocuments, + }; + use meilisearch_types::tasks::IndexSwap; + use meilisearch_types::VERSION_FILE_NAME; + use tempfile::TempDir; + use time::Duration; + use uuid::Uuid; + + use super::*; + use crate::insta_snapshot::{snapshot_bitmap, snapshot_index_scheduler}; + + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + pub enum FailureLocation { + InsideCreateBatch, + InsideProcessBatch, + PanicInsideProcessBatch, + AcquiringWtxn, + UpdatingTaskAfterProcessBatchSuccess { task_uid: u32 }, + UpdatingTaskAfterProcessBatchFailure, + CommittingWtxn, + } + + impl IndexScheduler { + pub fn test( + autobatching_enabled: bool, + planned_failures: Vec<(usize, FailureLocation)>, + ) -> (Self, IndexSchedulerHandle) { + let tempdir = TempDir::new().unwrap(); + let (sender, receiver) = crossbeam::channel::bounded(0); + + let options = IndexSchedulerOptions { + version_file_path: tempdir.path().join(VERSION_FILE_NAME), + auth_path: tempdir.path().join("auth"), + tasks_path: tempdir.path().join("db_path"), + update_file_path: tempdir.path().join("file_store"), + indexes_path: tempdir.path().join("indexes"), + snapshots_path: tempdir.path().join("snapshots"), + dumps_path: tempdir.path().join("dumps"), + task_db_size: 1024 * 1024, // 1 MiB + index_size: 1024 * 1024, // 1 MiB + indexer_config: IndexerConfig::default(), + autobatching_enabled, + }; + + let index_scheduler = Self::new(options, sender, planned_failures).unwrap(); + + let index_scheduler_handle = + IndexSchedulerHandle { _tempdir: tempdir, test_breakpoint_rcv: receiver }; + + (index_scheduler, index_scheduler_handle) + } + + /// Return a [`CorruptedTaskQueue`](Error::CorruptedTaskQueue) error if a failure is planned + /// for the given location and current run loop iteration. + pub fn maybe_fail(&self, location: FailureLocation) -> Result<()> { + if self.planned_failures.contains(&(*self.run_loop_iteration.read().unwrap(), location)) + { + match location { + FailureLocation::PanicInsideProcessBatch => { + panic!("simulated panic") + } + _ => Err(Error::CorruptedTaskQueue), + } + } else { + Ok(()) + } + } + } + + /// Return a `KindWithContent::IndexCreation` task + fn index_creation_task(index: &'static str, primary_key: &'static str) -> KindWithContent { + KindWithContent::IndexCreation { index_uid: S(index), primary_key: Some(S(primary_key)) } + } + /// Create a `KindWithContent::DocumentImport` task that imports documents. + /// + /// - `index_uid` is given as parameter + /// - `primary_key` is given as parameter + /// - `method` is set to `ReplaceDocuments` + /// - `content_file` is given as parameter + /// - `documents_count` is given as parameter + /// - `allow_index_creation` is set to `true` + fn replace_document_import_task( + index: &'static str, + primary_key: Option<&'static str>, + content_file_uuid: u128, + documents_count: u64, + ) -> KindWithContent { + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S(index), + primary_key: primary_key.map(ToOwned::to_owned), + method: ReplaceDocuments, + content_file: Uuid::from_u128(content_file_uuid), + documents_count, + allow_index_creation: true, + } + } + + /// Create an update file with the given file uuid. + /// + /// The update file contains just one simple document whose id is given by `document_id`. + /// + /// The uuid of the file and its documents count is returned. + fn sample_documents( + index_scheduler: &IndexScheduler, + file_uuid: u128, + document_id: usize, + ) -> (File, u64) { + let content = format!( + r#" + {{ + "id" : "{document_id}" + }}"# + ); + + let (_uuid, mut file) = index_scheduler.create_update_file_with_uuid(file_uuid).unwrap(); + let documents_count = + meilisearch_types::document_formats::read_json(content.as_bytes(), file.as_file_mut()) + .unwrap() as u64; + (file, documents_count) + } + + pub struct IndexSchedulerHandle { + _tempdir: TempDir, + test_breakpoint_rcv: crossbeam::channel::Receiver<(Breakpoint, bool)>, + } + + impl IndexSchedulerHandle { + /// Wait until the provided breakpoint is reached. + fn wait_till(&self, breakpoint: Breakpoint) { + self.test_breakpoint_rcv.iter().find(|b| *b == (breakpoint, false)); + } + + /// Wait for `n` tasks. + fn advance_n_batch(&self, n: usize) { + for _ in 0..n { + self.wait_till(Breakpoint::AfterProcessing); + } + } + } + + #[test] + fn register() { + // In this test, the handle doesn't make any progress, we only check that the tasks are registered + let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]); + + let kinds = [ + index_creation_task("catto", "mouse"), + replace_document_import_task("catto", None, 0, 12), + replace_document_import_task("catto", None, 1, 50), + replace_document_import_task("doggo", Some("bone"), 2, 5000), + ]; + let (_, file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); + file.persist().unwrap(); + let (_, file) = index_scheduler.create_update_file_with_uuid(1).unwrap(); + file.persist().unwrap(); + let (_, file) = index_scheduler.create_update_file_with_uuid(2).unwrap(); + file.persist().unwrap(); + + for (idx, kind) in kinds.into_iter().enumerate() { + let k = kind.as_kind(); + let task = index_scheduler.register(kind).unwrap(); + index_scheduler.assert_internally_consistent(); + + assert_eq!(task.uid, idx as u32); + assert_eq!(task.status, Status::Enqueued); + assert_eq!(task.kind.as_kind(), k); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + } + + #[test] + fn insert_task_while_another_task_is_processing() { + let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); + + index_scheduler.register(index_creation_task("index_a", "id")).unwrap(); + index_scheduler.assert_internally_consistent(); + + handle.wait_till(Breakpoint::BatchCreated); + index_scheduler.assert_internally_consistent(); + + // while the task is processing can we register another task? + index_scheduler.register(index_creation_task("index_b", "id")).unwrap(); + index_scheduler.assert_internally_consistent(); + + index_scheduler + .register(KindWithContent::IndexDeletion { index_uid: S("index_a") }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + } + + /// We send a lot of tasks but notify the tasks scheduler only once as + /// we send them very fast, we must make sure that they are all processed. + #[test] + fn process_tasks_inserted_without_new_signal() { + let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); + + index_scheduler + .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + + index_scheduler + .register(KindWithContent::IndexCreation { index_uid: S("cattos"), primary_key: None }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + + index_scheduler + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + + handle.wait_till(Breakpoint::Start); + index_scheduler.assert_internally_consistent(); + + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + + let mut tasks = index_scheduler.get_tasks(Query::default()).unwrap(); + tasks.reverse(); + assert_eq!(tasks.len(), 3); + assert_eq!(tasks[0].status, Status::Succeeded); + assert_eq!(tasks[1].status, Status::Succeeded); + assert_eq!(tasks[2].status, Status::Succeeded); + } + + #[test] + fn process_tasks_without_autobatching() { + let (index_scheduler, handle) = IndexScheduler::test(false, vec![]); + + index_scheduler + .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + + index_scheduler + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + + index_scheduler + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + + index_scheduler + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + + let mut tasks = index_scheduler.get_tasks(Query::default()).unwrap(); + tasks.reverse(); + assert_eq!(tasks.len(), 4); + assert_eq!(tasks[0].status, Status::Succeeded); + assert_eq!(tasks[1].status, Status::Succeeded); + assert_eq!(tasks[2].status, Status::Succeeded); + assert_eq!(tasks[3].status, Status::Succeeded); + } + + #[test] + fn task_deletion_undeleteable() { + let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); + file0.persist().unwrap(); + file1.persist().unwrap(); + + let to_enqueue = [ + index_creation_task("catto", "mouse"), + replace_document_import_task("catto", None, 0, documents_count0), + replace_document_import_task("doggo", Some("bone"), 1, documents_count1), + ]; + + for task in to_enqueue { + let _ = index_scheduler.register(task).unwrap(); + index_scheduler.assert_internally_consistent(); + } + + // here we have registered all the tasks, but the index scheduler + // has not progressed at all + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); + + index_scheduler + .register(KindWithContent::TaskDeletion { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0, 1]), + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + + // again, no progress made at all, but one more task is registered + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_enqueued"); + + // now we create the first batch + handle.wait_till(Breakpoint::BatchCreated); + index_scheduler.assert_internally_consistent(); + + // the task deletion should now be "processing" + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processing"); + + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + + // after the task deletion is processed, no task should actually have been deleted, + // because the tasks with ids 0 and 1 were still "enqueued", and thus undeleteable + // the "task deletion" task should be marked as "succeeded" and, in its details, the + // number of deleted tasks should be 0 + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_done"); + } + + #[test] + fn task_deletion_deleteable() { + let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); + file0.persist().unwrap(); + file1.persist().unwrap(); + + let to_enqueue = [ + replace_document_import_task("catto", None, 0, documents_count0), + replace_document_import_task("doggo", Some("bone"), 1, documents_count1), + ]; + + for task in to_enqueue { + let _ = index_scheduler.register(task).unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); + + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + + // first addition of documents should be successful + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_processed"); + + // Now we delete the first task + index_scheduler + .register(KindWithContent::TaskDeletion { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed"); + } + + #[test] + fn task_deletion_delete_same_task_twice() { + let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); + file0.persist().unwrap(); + file1.persist().unwrap(); + + let to_enqueue = [ + replace_document_import_task("catto", None, 0, documents_count0), + replace_document_import_task("doggo", Some("bone"), 1, documents_count1), + ]; + + for task in to_enqueue { + let _ = index_scheduler.register(task).unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); + + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + + // first addition of documents should be successful + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_processed"); + + // Now we delete the first task multiple times in a row + for _ in 0..2 { + index_scheduler + .register(KindWithContent::TaskDeletion { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + for _ in 0..2 { + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed"); + } + + #[test] + fn document_addition() { + let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); + + let content = r#" + { + "id": 1, + "doggo": "bob" + }"#; + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); + let documents_count = + meilisearch_types::document_formats::read_json(content.as_bytes(), file.as_file_mut()) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + handle.wait_till(Breakpoint::BatchCreated); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + } + + #[test] + fn document_addition_and_index_deletion() { + let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); + + let content = r#" + { + "id": 1, + "doggo": "bob" + }"#; + + index_scheduler + .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); + let documents_count = + meilisearch_types::document_formats::read_json(content.as_bytes(), file.as_file_mut()) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + + index_scheduler + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + handle.wait_till(Breakpoint::Start); // The index creation. + handle.wait_till(Breakpoint::Start); // before anything happens. + handle.wait_till(Breakpoint::Start); // after the execution of the two tasks in a single batch. + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + } + + #[test] + fn do_not_batch_task_of_different_indexes() { + let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); + let index_names = ["doggos", "cattos", "girafos"]; + + for name in index_names { + index_scheduler + .register(KindWithContent::IndexCreation { + index_uid: name.to_string(), + primary_key: None, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + for name in index_names { + index_scheduler + .register(KindWithContent::DocumentClear { index_uid: name.to_string() }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + for _ in 0..(index_names.len() * 2) { + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + } + + let mut tasks = index_scheduler.get_tasks(Query::default()).unwrap(); + tasks.reverse(); + assert_eq!(tasks.len(), 6); + assert_eq!(tasks[0].status, Status::Succeeded); + assert_eq!(tasks[1].status, Status::Succeeded); + assert_eq!(tasks[2].status, Status::Succeeded); + assert_eq!(tasks[3].status, Status::Succeeded); + assert_eq!(tasks[4].status, Status::Succeeded); + assert_eq!(tasks[5].status, Status::Succeeded); + } + + #[test] + fn swap_indexes() { + let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); + + let to_enqueue = [ + index_creation_task("a", "id"), + index_creation_task("b", "id"), + index_creation_task("c", "id"), + index_creation_task("d", "id"), + ]; + + for task in to_enqueue { + let _ = index_scheduler.register(task).unwrap(); + index_scheduler.assert_internally_consistent(); + } + + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_processed"); + + index_scheduler + .register(KindWithContent::IndexSwap { + swaps: vec![ + IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, + IndexSwap { indexes: ("c".to_owned(), "d".to_owned()) }, + ], + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_processed"); + + index_scheduler + .register(KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: ("a".to_owned(), "c".to_owned()) }], + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_swap_processed"); + + index_scheduler.register(KindWithContent::IndexSwap { swaps: vec![] }).unwrap(); + handle.wait_till(Breakpoint::AfterProcessing); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_empty_swap_processed"); + } + + #[test] + fn document_addition_and_index_deletion_on_unexisting_index() { + let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); + + let content = r#" + { + "id": 1, + "doggo": "bob" + }"#; + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); + let documents_count = + meilisearch_types::document_formats::read_json(content.as_bytes(), file.as_file_mut()) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + index_scheduler + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }) + .unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + handle.wait_till(Breakpoint::Start); // before anything happens. + handle.wait_till(Breakpoint::Start); // after the execution of the two tasks in a single batch. + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + } + + #[test] + fn cancel_enqueued_task() { + let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + file0.persist().unwrap(); + + let to_enqueue = [ + replace_document_import_task("catto", None, 0, documents_count0), + KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + ]; + for task in to_enqueue { + let _ = index_scheduler.register(task).unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); + + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); + } + + #[test] + fn cancel_succeeded_task() { + let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + file0.persist().unwrap(); + + let _ = index_scheduler + .register(replace_document_import_task("catto", None, 0, documents_count0)) + .unwrap(); + index_scheduler.assert_internally_consistent(); + + handle.wait_till(Breakpoint::AfterProcessing); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_task_processed"); + + index_scheduler + .register(KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }) + .unwrap(); + + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); + } + + #[test] + fn cancel_processing_task() { + let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + file0.persist().unwrap(); + + let _ = index_scheduler + .register(replace_document_import_task("catto", None, 0, documents_count0)) + .unwrap(); + index_scheduler.assert_internally_consistent(); + + handle.wait_till(Breakpoint::InsideProcessBatch); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_task_processing"); + + index_scheduler + .register(KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + + // Now we check that we can reach the AbortedIndexation error handling + handle.wait_till(Breakpoint::AbortedIndexation); + index_scheduler.assert_internally_consistent(); + + handle.wait_till(Breakpoint::AfterProcessing); + + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); + } + + #[test] + fn cancel_mix_of_tasks() { + let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + file0.persist().unwrap(); + let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); + file1.persist().unwrap(); + let (file2, documents_count2) = sample_documents(&index_scheduler, 2, 2); + file2.persist().unwrap(); + + let to_enqueue = [ + replace_document_import_task("catto", None, 0, documents_count0), + replace_document_import_task("beavero", None, 1, documents_count1), + replace_document_import_task("wolfo", None, 2, documents_count2), + ]; + for task in to_enqueue { + let _ = index_scheduler.register(task).unwrap(); + index_scheduler.assert_internally_consistent(); + } + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_task_processed"); + + handle.wait_till(Breakpoint::InsideProcessBatch); + index_scheduler + .register(KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0, 1, 2]), + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processing_second_task_cancel_enqueued"); + + handle.wait_till(Breakpoint::AbortedIndexation); + index_scheduler.assert_internally_consistent(); + + handle.wait_till(Breakpoint::AfterProcessing); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); + } + + #[test] + fn test_document_replace() { + let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); + let documents_count = meilisearch_types::document_formats::read_json( + content.as_bytes(), + file.as_file_mut(), + ) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + index_scheduler.assert_internally_consistent(); + // everything should be batched together. + handle.advance_n_batch(1); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap()); + } + + #[test] + fn test_document_update() { + let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); + let documents_count = meilisearch_types::document_formats::read_json( + content.as_bytes(), + file.as_file_mut(), + ) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: UpdateDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + index_scheduler.assert_internally_consistent(); + // everything should be batched together. + handle.advance_n_batch(1); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap()); + } + + #[test] + fn test_mixed_document_addition() { + let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); + + for i in 0..10 { + let method = if i % 2 == 0 { UpdateDocuments } else { ReplaceDocuments }; + + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); + let documents_count = meilisearch_types::document_formats::read_json( + content.as_bytes(), + file.as_file_mut(), + ) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // Only half of the task should've been processed since we can't autobatch replace and update together. + handle.advance_n_batch(5); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + handle.advance_n_batch(5); + index_scheduler.assert_internally_consistent(); + + // has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap()); + } + + #[test] + fn test_document_replace_without_autobatching() { + let (index_scheduler, handle) = IndexScheduler::test(false, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); + let documents_count = meilisearch_types::document_formats::read_json( + content.as_bytes(), + file.as_file_mut(), + ) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // Nothing should be batched thus half of the tasks are processed. + handle.advance_n_batch(5); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // Everything is processed. + handle.advance_n_batch(5); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap()); + } + + #[test] + fn test_document_update_without_autobatching() { + let (index_scheduler, handle) = IndexScheduler::test(false, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); + let documents_count = meilisearch_types::document_formats::read_json( + content.as_bytes(), + file.as_file_mut(), + ) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: UpdateDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // Nothing should be batched thus half of the tasks are processed. + handle.advance_n_batch(5); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // Everything is processed. + handle.advance_n_batch(5); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap()); + } + + #[macro_export] + macro_rules! debug_snapshot { + ($value:expr, @$snapshot:literal) => {{ + let value = format!("{:?}", $value); + meili_snap::snapshot!(value, @$snapshot); + }}; + } + + #[test] + fn simple_new() { + crate::IndexScheduler::test(true, vec![]); + } + + #[test] + fn query_tasks_from_and_limit() { + let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); + + let kind = index_creation_task("doggo", "bone"); + let _task = index_scheduler.register(kind).unwrap(); + index_scheduler.assert_internally_consistent(); + let kind = index_creation_task("whalo", "plankton"); + let _task = index_scheduler.register(kind).unwrap(); + index_scheduler.assert_internally_consistent(); + let kind = index_creation_task("catto", "his_own_vomit"); + let _task = index_scheduler.register(kind).unwrap(); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); + + handle.advance_n_batch(3); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "finished"); + + let query = Query { limit: Some(0), ..Default::default() }; + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query { limit: Some(1), ..Default::default() }; + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); + + let query = Query { limit: Some(2), ..Default::default() }; + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); + + let query = Query { from: Some(1), ..Default::default() }; + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); + + let query = Query { from: Some(2), ..Default::default() }; + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); + + let query = Query { from: Some(1), limit: Some(1), ..Default::default() }; + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[1,]"); + + let query = Query { from: Some(1), limit: Some(2), ..Default::default() }; + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); + } + + #[test] + fn query_processing_tasks() { + let start_time = OffsetDateTime::now_utc(); + + let (index_scheduler, handle) = + IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + let _task = index_scheduler.register(kind).unwrap(); + let kind = index_creation_task("doggo", "sheep"); + let _task = index_scheduler.register(kind).unwrap(); + let kind = index_creation_task("whalo", "fish"); + let _task = index_scheduler.register(kind).unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); + + handle.wait_till(Breakpoint::BatchCreated); + + let query = Query { status: Some(vec![Status::Processing]), ..Default::default() }; + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[0,]"); // only the processing tasks in the first tick + + let query = Query { status: Some(vec![Status::Enqueued]), ..Default::default() }; + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); // only the enqueued tasks in the first tick + + let query = Query { + status: Some(vec![Status::Enqueued, Status::Processing]), + ..Default::default() + }; + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); // both enqueued and processing tasks in the first tick + + let query = Query { + status: Some(vec![Status::Enqueued, Status::Processing]), + after_started_at: Some(start_time), + ..Default::default() + }; + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + // both enqueued and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the test, which should excludes the enqueued tasks + snapshot!(snapshot_bitmap(&tasks), @"[0,]"); + + let query = Query { + status: Some(vec![Status::Enqueued, Status::Processing]), + before_started_at: Some(start_time), + ..Default::default() + }; + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + // both enqueued and processing tasks in the first tick, but limited to those with a started_at + // that comes before the start of the test, which should excludes all of them + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query { + status: Some(vec![Status::Enqueued, Status::Processing]), + after_started_at: Some(start_time), + before_started_at: Some(start_time + Duration::minutes(1)), + ..Default::default() + }; + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + // both enqueued and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the test and before one minute after the start of the test, + // which should exclude the enqueued tasks and include the only processing task + snapshot!(snapshot_bitmap(&tasks), @"[0,]"); + + handle.wait_till(Breakpoint::BatchCreated); + + let second_start_time = OffsetDateTime::now_utc(); + + let query = Query { + status: Some(vec![Status::Succeeded, Status::Processing]), + after_started_at: Some(start_time), + before_started_at: Some(start_time + Duration::minutes(1)), + ..Default::default() + }; + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + // both succeeded and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the test and before one minute after the start of the test, + // which should include all tasks + snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); + + let query = Query { + status: Some(vec![Status::Succeeded, Status::Processing]), + before_started_at: Some(start_time), + ..Default::default() + }; + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + // both succeeded and processing tasks in the first tick, but limited to those with a started_at + // that comes before the start of the test, which should exclude all tasks + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query { + status: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + // both succeeded and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the second part of the test and before one minute after the + // second start of the test, which should exclude all tasks + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + // now we make one more batch, the started_at field of the new tasks will be past `second_start_time` + handle.wait_till(Breakpoint::BatchCreated); + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + // we run the same query to verify that, and indeed find that the last task is matched + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); + + let query = Query { + status: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + // enqueued, succeeded, or processing tasks started after the second part of the test, should + // again only return the last task + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); + + handle.wait_till(Breakpoint::AfterProcessing); + // now the last task should have failed + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end"); + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + // so running the last query should return nothing + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query { + status: Some(vec![Status::Failed]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + // but the same query on failed tasks should return the last task + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); + + let query = Query { + status: Some(vec![Status::Failed]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + // but the same query on failed tasks should return the last task + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); + + let query = Query { + status: Some(vec![Status::Failed]), + uid: Some(vec![1]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + // same query but with an invalid uid + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query { + status: Some(vec![Status::Failed]), + uid: Some(vec![2]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let tasks = index_scheduler.get_task_ids(&query).unwrap(); + // same query but with a valid uid + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); + } + + #[test] + fn fail_in_create_batch_for_index_creation() { + let (index_scheduler, handle) = + IndexScheduler::test(true, vec![(1, FailureLocation::InsideCreateBatch)]); + + let kinds = [index_creation_task("catto", "mouse")]; + + for kind in kinds { + let _task = index_scheduler.register(kind).unwrap(); + index_scheduler.assert_internally_consistent(); + } + handle.wait_till(Breakpoint::BatchCreated); + + // We skipped an iteration of `tick` to reach BatchCreated + assert_eq!(*index_scheduler.run_loop_iteration.read().unwrap(), 2); + // Otherwise nothing weird happened + index_scheduler.assert_internally_consistent(); + snapshot!(snapshot_index_scheduler(&index_scheduler)); + } + + #[test] + fn fail_in_process_batch_for_index_creation() { + let (index_scheduler, handle) = + IndexScheduler::test(true, vec![(1, FailureLocation::InsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + + let _task = index_scheduler.register(kind).unwrap(); + index_scheduler.assert_internally_consistent(); + + handle.wait_till(Breakpoint::AfterProcessing); + + // Still in the first iteration + assert_eq!(*index_scheduler.run_loop_iteration.read().unwrap(), 1); + // No matter what happens in process_batch, the index_scheduler should be internally consistent + index_scheduler.assert_internally_consistent(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "index_creation_failed"); + } + + #[test] + fn fail_in_process_batch_for_document_addition() { + let (index_scheduler, handle) = + IndexScheduler::test(true, vec![(1, FailureLocation::InsideProcessBatch)]); + + let content = r#" + { + "id": 1, + "doggo": "bob" + }"#; + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); + let documents_count = + meilisearch_types::document_formats::read_json(content.as_bytes(), file.as_file_mut()) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + handle.wait_till(Breakpoint::BatchCreated); + + snapshot!( + snapshot_index_scheduler(&index_scheduler), + name: "document_addition_batch_created" + ); + + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "document_addition_failed"); + } + + #[test] + fn fail_in_update_task_after_process_batch_success_for_document_addition() { + let (index_scheduler, handle) = IndexScheduler::test( + true, + vec![(1, FailureLocation::UpdatingTaskAfterProcessBatchSuccess { task_uid: 0 })], + ); + + let content = r#" + { + "id": 1, + "doggo": "bob" + }"#; + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); + let documents_count = + meilisearch_types::document_formats::read_json(content.as_bytes(), file.as_file_mut()) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + + // This tests that the index scheduler pauses for one second when an irrecoverable failure occurs + let start_time = Instant::now(); + + index_scheduler.assert_internally_consistent(); + handle.wait_till(Breakpoint::Start); + + index_scheduler.assert_internally_consistent(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "document_addition_succeeded_but_index_scheduler_not_updated"); + + handle.wait_till(Breakpoint::AfterProcessing); + index_scheduler.assert_internally_consistent(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_iteration"); + + let test_duration = start_time.elapsed(); + assert!(test_duration.as_millis() > 1000); + } + + #[test] + fn test_document_addition_cant_create_index_without_index() { + // We're going to autobatch multiple document addition that don't have + // the right to create an index while there is no index currently. + // Thus, everything should be batched together and a IndexDoesNotExists + // error should be throwed. + let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); + let documents_count = meilisearch_types::document_formats::read_json( + content.as_bytes(), + file.as_file_mut(), + ) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // Everything should be batched together. + handle.advance_n_batch(1); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // The index should not exists. + snapshot!(format!("{}", index_scheduler.index("doggos").map(|_| ()).unwrap_err()), @"Index `doggos` not found."); + } + + #[test] + fn test_document_addition_cant_create_index_without_index_without_autobatching() { + // We're going to execute multiple document addition that don't have + // the right to create an index while there is no index currently. + // Since the autobatching is disabled, every tasks should be processed + // sequentially and throw an IndexDoesNotExists. + let (index_scheduler, handle) = IndexScheduler::test(false, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); + let documents_count = meilisearch_types::document_formats::read_json( + content.as_bytes(), + file.as_file_mut(), + ) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // Nothing should be batched thus half of the tasks are processed. + handle.advance_n_batch(5); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // Everything is processed. + handle.advance_n_batch(5); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // The index should not exists. + snapshot!(format!("{}", index_scheduler.index("doggos").map(|_| ()).unwrap_err()), @"Index `doggos` not found."); + } + + #[test] + fn test_document_addition_cant_create_index_with_index() { + // We're going to autobatch multiple document addition that don't have + // the right to create an index while there is already an index. + // Thus, everything should be batched together and no error should be + // throwed. + let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); + + // Create the index. + index_scheduler + .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + handle.advance_n_batch(1); + index_scheduler.assert_internally_consistent(); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); + let documents_count = meilisearch_types::document_formats::read_json( + content.as_bytes(), + file.as_file_mut(), + ) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // Everything should be batched together. + handle.advance_n_batch(1); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // Has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap()); + } + + #[test] + fn test_document_addition_cant_create_index_with_index_without_autobatching() { + // We're going to execute multiple document addition that don't have + // the right to create an index while there is no index currently. + // Since the autobatching is disabled, every tasks should be processed + // sequentially and throw an IndexDoesNotExists. + let (index_scheduler, handle) = IndexScheduler::test(false, vec![]); + + // Create the index. + index_scheduler + .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .unwrap(); + handle.advance_n_batch(1); + index_scheduler.assert_internally_consistent(); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); + let documents_count = meilisearch_types::document_formats::read_json( + content.as_bytes(), + file.as_file_mut(), + ) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // Nothing should be batched thus half of the tasks are processed. + handle.advance_n_batch(5); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // Everything is processed. + handle.advance_n_batch(5); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // Has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap()); + } + + #[test] + fn test_document_addition_mixed_rights_with_index() { + // We're going to autobatch multiple document addition. + // - The index already exists + // - The first document addition don't have the right to create an index + // can it batch with the other one? + let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); + + // Create the index. + index_scheduler + .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .unwrap(); + handle.advance_n_batch(1); + index_scheduler.assert_internally_consistent(); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + let allow_index_creation = i % 2 != 0; + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); + let documents_count = meilisearch_types::document_formats::read_json( + content.as_bytes(), + file.as_file_mut(), + ) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // Everything should be batched together. + handle.advance_n_batch(1); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // Has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap()); + } + + #[test] + fn test_document_addition_mixed_right_without_index_starts_with_cant_create() { + // We're going to autobatch multiple document addition. + // - The index does not exists + // - The first document addition don't have the right to create an index + // - The second do. They should not batch together. + // - The second should batch with everything else as it's going to create an index. + let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + let allow_index_creation = i % 2 != 0; + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); + let documents_count = meilisearch_types::document_formats::read_json( + content.as_bytes(), + file.as_file_mut(), + ) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // A first batch should be processed with only the first documentAddition that's going to fail. + handle.advance_n_batch(1); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // Everything else should be batched together. + handle.advance_n_batch(1); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // Has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap()); + } + + #[test] + fn panic_in_process_batch_for_index_creation() { + let (index_scheduler, handle) = + IndexScheduler::test(true, vec![(1, FailureLocation::PanicInsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + + let _task = index_scheduler.register(kind).unwrap(); + index_scheduler.assert_internally_consistent(); + + handle.wait_till(Breakpoint::AfterProcessing); + + // Still in the first iteration + assert_eq!(*index_scheduler.run_loop_iteration.read().unwrap(), 1); + // No matter what happens in process_batch, the index_scheduler should be internally consistent + index_scheduler.assert_internally_consistent(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "index_creation_failed"); + } +} diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/cancel_processed.snap b/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/cancel_processed.snap new file mode 100644 index 000000000..659a325c5 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/cancel_processed.snap @@ -0,0 +1,41 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: canceled, canceled_by: 1, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(1), original_query: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [1,] +canceled [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +"taskCancelation" [1,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [1,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/initial_tasks_enqueued.snap b/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/initial_tasks_enqueued.snap new file mode 100644 index 000000000..6b44b0acc --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/initial_tasks_enqueued.snap @@ -0,0 +1,37 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_query: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +"taskCancelation" [1,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/cancel_processed.snap b/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/cancel_processed.snap new file mode 100644 index 000000000..e398ab205 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/cancel_processed.snap @@ -0,0 +1,49 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: canceled, canceled_by: 3, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: canceled, canceled_by: 3, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: succeeded, details: { matched_tasks: 3, canceled_tasks: Some(2), original_query: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,3,] +canceled [1,2,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,] +"taskCancelation" [3,] +---------------------------------------------------------------------- +### Index Tasks: +beavero [1,] +catto [0,] +wolfo [2,] +---------------------------------------------------------------------- +### Index Mapper: +["beavero", "catto"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/first_task_processed.snap b/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/first_task_processed.snap new file mode 100644 index 000000000..8e3ef1692 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/first_task_processed.snap @@ -0,0 +1,44 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [1,2,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,] +---------------------------------------------------------------------- +### Index Tasks: +beavero [1,] +catto [0,] +wolfo [2,] +---------------------------------------------------------------------- +### Index Mapper: +["catto"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap b/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap new file mode 100644 index 000000000..219ea9968 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap @@ -0,0 +1,47 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[1,] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: enqueued, details: { matched_tasks: 3, canceled_tasks: None, original_query: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }} +---------------------------------------------------------------------- +### Status: +enqueued [1,2,3,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,] +"taskCancelation" [3,] +---------------------------------------------------------------------- +### Index Tasks: +beavero [1,] +catto [0,] +wolfo [2,] +---------------------------------------------------------------------- +### Index Mapper: +["catto"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_processed.snap b/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_processed.snap new file mode 100644 index 000000000..f0706934b --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_processed.snap @@ -0,0 +1,41 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: canceled, canceled_by: 1, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(1), original_query: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [1,] +canceled [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +"taskCancelation" [1,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +["catto"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [1,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/initial_task_processing.snap b/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/initial_task_processing.snap new file mode 100644 index 000000000..9bcfbd2b3 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/initial_task_processing.snap @@ -0,0 +1,34 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[0,] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/cancel_processed.snap b/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/cancel_processed.snap new file mode 100644 index 000000000..7f071b2f2 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/cancel_processed.snap @@ -0,0 +1,41 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(0), original_query: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +"taskCancelation" [1,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +["catto"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/initial_task_processed.snap b/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/initial_task_processed.snap new file mode 100644 index 000000000..d16658b72 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/initial_task_processed.snap @@ -0,0 +1,36 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +["catto"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition/1.snap b/index-scheduler/src/snapshots/lib.rs/document_addition/1.snap new file mode 100644 index 000000000..6abb00f81 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition/1.snap @@ -0,0 +1,34 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition/2.snap b/index-scheduler/src/snapshots/lib.rs/document_addition/2.snap new file mode 100644 index 000000000..b9e745cf0 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition/2.snap @@ -0,0 +1,34 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[0,] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition/3.snap b/index-scheduler/src/snapshots/lib.rs/document_addition/3.snap new file mode 100644 index 000000000..2bcc9368d --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition/3.snap @@ -0,0 +1,36 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/1.snap b/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/1.snap new file mode 100644 index 000000000..448988c8c --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/1.snap @@ -0,0 +1,40 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, kind: IndexDeletion { index_uid: "doggos" }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"indexCreation" [0,] +"indexDeletion" [2,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/2.snap b/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/2.snap new file mode 100644 index 000000000..6954d37e0 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/2.snap @@ -0,0 +1,44 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { deleted_documents: Some(0) }, kind: IndexDeletion { index_uid: "doggos" }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"indexCreation" [0,] +"indexDeletion" [2,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,2,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,2,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/1.snap b/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/1.snap new file mode 100644 index 000000000..3f921934d --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/1.snap @@ -0,0 +1,37 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, kind: IndexDeletion { index_uid: "doggos" }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +"indexDeletion" [1,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap b/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap new file mode 100644 index 000000000..2abd3e4cf --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap @@ -0,0 +1,39 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { deleted_documents: Some(0) }, kind: IndexDeletion { index_uid: "doggos" }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +"indexDeletion" [1,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,1,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,1,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_create_batch_for_index_creation/1.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_create_batch_for_index_creation/1.snap new file mode 100644 index 000000000..b78d63444 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_create_batch_for_index_creation/1.snap @@ -0,0 +1,33 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[0,] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap new file mode 100644 index 000000000..b9e745cf0 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap @@ -0,0 +1,34 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[0,] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap new file mode 100644 index 000000000..750edbbf2 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap @@ -0,0 +1,36 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Corrupted task queue.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +failed [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap new file mode 100644 index 000000000..11bfb09c1 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap @@ -0,0 +1,36 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Corrupted task queue.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +---------------------------------------------------------------------- +### Status: +enqueued [] +failed [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/document_addition_succeeded_but_index_scheduler_not_updated.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/document_addition_succeeded_but_index_scheduler_not_updated.snap new file mode 100644 index 000000000..6abb00f81 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/document_addition_succeeded_but_index_scheduler_not_updated.snap @@ -0,0 +1,34 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/second_iteration.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/second_iteration.snap new file mode 100644 index 000000000..2bcc9368d --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/second_iteration.snap @@ -0,0 +1,36 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/1.snap b/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/1.snap new file mode 100644 index 000000000..ddac65249 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/1.snap @@ -0,0 +1,39 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[0,] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }} +1 {uid: 1, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_b", primary_key: Some("id") }} +2 {uid: 2, status: enqueued, kind: IndexDeletion { index_uid: "index_a" }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,] +"indexDeletion" [2,] +---------------------------------------------------------------------- +### Index Tasks: +index_a [0,2,] +index_b [1,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap b/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap new file mode 100644 index 000000000..211c67326 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap @@ -0,0 +1,36 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "An unexpected crash occurred when processing the task.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +---------------------------------------------------------------------- +### Status: +enqueued [] +failed [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/query_processing_tasks/end.snap b/index-scheduler/src/snapshots/lib.rs/query_processing_tasks/end.snap new file mode 100644 index 000000000..6b7ec2a2a --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/query_processing_tasks/end.snap @@ -0,0 +1,47 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +1 {uid: 1, status: succeeded, details: { primary_key: Some("sheep") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("sheep") }} +2 {uid: 2, status: failed, error: ResponseError { code: 200, message: "Corrupted task queue.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("fish") }, kind: IndexCreation { index_uid: "whalo", primary_key: Some("fish") }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,] +failed [2,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +doggo [1,] +whalo [2,] +---------------------------------------------------------------------- +### Index Mapper: +["catto", "doggo"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/query_processing_tasks/start.snap b/index-scheduler/src/snapshots/lib.rs/query_processing_tasks/start.snap new file mode 100644 index 000000000..60c8de558 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/query_processing_tasks/start.snap @@ -0,0 +1,39 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +1 {uid: 1, status: enqueued, details: { primary_key: Some("sheep") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("sheep") }} +2 {uid: 2, status: enqueued, details: { primary_key: Some("fish") }, kind: IndexCreation { index_uid: "whalo", primary_key: Some("fish") }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +doggo [1,] +whalo [2,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/finished.snap b/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/finished.snap new file mode 100644 index 000000000..dff6707f4 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/finished.snap @@ -0,0 +1,46 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} +1 {uid: 1, status: succeeded, details: { primary_key: Some("plankton") }, kind: IndexCreation { index_uid: "whalo", primary_key: Some("plankton") }} +2 {uid: 2, status: succeeded, details: { primary_key: Some("his_own_vomit") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("his_own_vomit") }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,] +---------------------------------------------------------------------- +### Index Tasks: +catto [2,] +doggo [0,] +whalo [1,] +---------------------------------------------------------------------- +### Index Mapper: +["catto", "doggo", "whalo"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/start.snap b/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/start.snap new file mode 100644 index 000000000..2717569f4 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/start.snap @@ -0,0 +1,39 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} +1 {uid: 1, status: enqueued, details: { primary_key: Some("plankton") }, kind: IndexCreation { index_uid: "whalo", primary_key: Some("plankton") }} +2 {uid: 2, status: enqueued, details: { primary_key: Some("his_own_vomit") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("his_own_vomit") }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,] +---------------------------------------------------------------------- +### Index Tasks: +catto [2,] +doggo [0,] +whalo [1,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/register/1.snap b/index-scheduler/src/snapshots/lib.rs/register/1.snap new file mode 100644 index 000000000..95eaa11c5 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/register/1.snap @@ -0,0 +1,44 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +1 {uid: 1, status: enqueued, details: { received_documents: 12, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 12, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 50, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 50, allow_index_creation: true }} +3 {uid: 3, status: enqueued, details: { received_documents: 5000, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 5000, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,3,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,1,2,] +doggo [3,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_processed.snap b/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_processed.snap new file mode 100644 index 000000000..7fafc3db5 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_processed.snap @@ -0,0 +1,56 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }} +1 {uid: 1, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }} +2 {uid: 2, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }} +3 {uid: 3, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }} +4 {uid: 4, status: succeeded, details: { swaps: [IndexSwap { indexes: ("a", "b") }, IndexSwap { indexes: ("c", "d") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "b") }, IndexSwap { indexes: ("c", "d") }] }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,4,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,3,] +"indexSwap" [4,] +---------------------------------------------------------------------- +### Index Tasks: +a [1,4,] +b [0,4,] +c [3,4,] +d [2,4,] +---------------------------------------------------------------------- +### Index Mapper: +["a", "b", "c", "d"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes/initial_tasks_processed.snap b/index-scheduler/src/snapshots/lib.rs/swap_indexes/initial_tasks_processed.snap new file mode 100644 index 000000000..073f280f3 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/swap_indexes/initial_tasks_processed.snap @@ -0,0 +1,51 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }} +1 {uid: 1, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }} +2 {uid: 2, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }} +3 {uid: 3, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,3,] +---------------------------------------------------------------------- +### Index Tasks: +a [0,] +b [1,] +c [2,] +d [3,] +---------------------------------------------------------------------- +### Index Mapper: +["a", "b", "c", "d"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes/second_swap_processed.snap b/index-scheduler/src/snapshots/lib.rs/swap_indexes/second_swap_processed.snap new file mode 100644 index 000000000..d820e04e6 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/swap_indexes/second_swap_processed.snap @@ -0,0 +1,60 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }} +1 {uid: 1, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }} +2 {uid: 2, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }} +3 {uid: 3, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }} +4 {uid: 4, status: succeeded, details: { swaps: [IndexSwap { indexes: ("c", "b") }, IndexSwap { indexes: ("a", "d") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("c", "b") }, IndexSwap { indexes: ("a", "d") }] }} +5 {uid: 5, status: succeeded, details: { swaps: [IndexSwap { indexes: ("a", "c") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "c") }] }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,4,5,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,3,] +"indexSwap" [4,5,] +---------------------------------------------------------------------- +### Index Tasks: +a [3,4,5,] +b [0,4,] +c [1,4,5,] +d [2,4,] +---------------------------------------------------------------------- +### Index Mapper: +["a", "b", "c", "d"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap b/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap new file mode 100644 index 000000000..26bd1b0d3 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap @@ -0,0 +1,64 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }} +1 {uid: 1, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }} +2 {uid: 2, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }} +3 {uid: 3, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }} +4 {uid: 4, status: succeeded, details: { swaps: [IndexSwap { indexes: ("c", "b") }, IndexSwap { indexes: ("a", "d") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("c", "b") }, IndexSwap { indexes: ("a", "d") }] }} +5 {uid: 5, status: succeeded, details: { swaps: [IndexSwap { indexes: ("a", "c") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "c") }] }} +6 {uid: 6, status: succeeded, details: { swaps: [] }, kind: IndexSwap { swaps: [] }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,4,5,6,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,3,] +"indexSwap" [4,5,6,] +---------------------------------------------------------------------- +### Index Tasks: +a [3,4,5,] +b [0,4,] +c [1,4,5,] +d [2,4,] +---------------------------------------------------------------------- +### Index Mapper: +["a", "b", "c", "d"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap new file mode 100644 index 000000000..162cffd2b --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap @@ -0,0 +1,38 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +doggo [1,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap new file mode 100644 index 000000000..c33926a04 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap @@ -0,0 +1,40 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [1,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +doggo [1,] +---------------------------------------------------------------------- +### Index Mapper: +["catto"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap new file mode 100644 index 000000000..bbea9ff8b --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap @@ -0,0 +1,44 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }} +3 {uid: 3, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(0), original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }} +---------------------------------------------------------------------- +### Status: +enqueued [1,] +succeeded [2,3,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"taskDeletion" [2,3,] +---------------------------------------------------------------------- +### Index Tasks: +doggo [1,] +---------------------------------------------------------------------- +### Index Mapper: +["catto"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap new file mode 100644 index 000000000..162cffd2b --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap @@ -0,0 +1,38 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +doggo [1,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap new file mode 100644 index 000000000..c33926a04 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap @@ -0,0 +1,40 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [1,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +doggo [1,] +---------------------------------------------------------------------- +### Index Mapper: +["catto"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap new file mode 100644 index 000000000..3ae98f06f --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap @@ -0,0 +1,40 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }} +---------------------------------------------------------------------- +### Status: +enqueued [1,] +succeeded [2,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"taskDeletion" [2,] +---------------------------------------------------------------------- +### Index Tasks: +doggo [1,] +---------------------------------------------------------------------- +### Index Mapper: +["catto"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [2,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [2,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap new file mode 100644 index 000000000..b22cad0ca --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap @@ -0,0 +1,41 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,1,] +doggo [2,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap new file mode 100644 index 000000000..acf3b752c --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap @@ -0,0 +1,47 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: succeeded, details: { matched_tasks: 2, deleted_tasks: Some(0), original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0, 1]> }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,] +succeeded [3,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,] +"indexCreation" [0,] +"taskDeletion" [3,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,1,] +doggo [2,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [3,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [3,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap new file mode 100644 index 000000000..f41fae458 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap @@ -0,0 +1,44 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: enqueued, details: { matched_tasks: 2, deleted_tasks: None, original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0, 1]> }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,] +"indexCreation" [0,] +"taskDeletion" [3,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,1,] +doggo [2,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap new file mode 100644 index 000000000..15638b4b4 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap @@ -0,0 +1,44 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[3,] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: enqueued, details: { matched_tasks: 2, deleted_tasks: None, original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0, 1]> }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,] +"indexCreation" [0,] +"taskDeletion" [3,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,1,] +doggo [2,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/1.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/1.snap new file mode 100644 index 000000000..5a1d5e749 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/1.snap @@ -0,0 +1,67 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +10 {uid: 10, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: false }} +---------------------------------------------------------------------- +### Status: +enqueued [1,2,3,4,5,6,7,8,9,10,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,3,4,5,6,7,8,9,10,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +[timestamp] [10,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/2.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/2.snap new file mode 100644 index 000000000..1fac082df --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/2.snap @@ -0,0 +1,59 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +10 {uid: 10, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: false }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,3,4,5,6,7,8,9,10,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +[timestamp] [10,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/3.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/3.snap new file mode 100644 index 000000000..5a839838d --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/3.snap @@ -0,0 +1,45 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/1.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/1.snap new file mode 100644 index 000000000..ae959d293 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/1.snap @@ -0,0 +1,67 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +10 {uid: 10, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: false }} +---------------------------------------------------------------------- +### Status: +enqueued [1,2,3,4,5,6,7,8,9,10,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,3,4,5,6,7,8,9,10,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +[timestamp] [10,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/2.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/2.snap new file mode 100644 index 000000000..6261c5f78 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/2.snap @@ -0,0 +1,72 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +10 {uid: 10, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: false }} +---------------------------------------------------------------------- +### Status: +enqueued [6,7,8,9,10,] +succeeded [0,1,2,3,4,5,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,3,4,5,6,7,8,9,10,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +[timestamp] [10,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/3.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/3.snap new file mode 100644 index 000000000..f27170870 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/3.snap @@ -0,0 +1,77 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +10 {uid: 10, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: false }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,3,4,5,6,7,8,9,10,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +[timestamp] [10,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +[timestamp] [10,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +[timestamp] [10,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/4.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/4.snap new file mode 100644 index 000000000..5a839838d --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/4.snap @@ -0,0 +1,45 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/1.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/1.snap new file mode 100644 index 000000000..a6e6954fa --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/1.snap @@ -0,0 +1,61 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: false }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/2.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/2.snap new file mode 100644 index 000000000..983bde528 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/2.snap @@ -0,0 +1,54 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +1 {uid: 1, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: false }} +---------------------------------------------------------------------- +### Status: +enqueued [] +failed [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/1.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/1.snap new file mode 100644 index 000000000..2f21c9ef2 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/1.snap @@ -0,0 +1,61 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: false }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/2.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/2.snap new file mode 100644 index 000000000..40dfd4fd1 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/2.snap @@ -0,0 +1,67 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +1 {uid: 1, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: false }} +---------------------------------------------------------------------- +### Status: +enqueued [5,6,7,8,9,] +failed [0,1,2,3,4,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/3.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/3.snap new file mode 100644 index 000000000..9540e40bc --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/3.snap @@ -0,0 +1,72 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +1 {uid: 1, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: false }} +---------------------------------------------------------------------- +### Status: +enqueued [] +failed [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/1.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/1.snap new file mode 100644 index 000000000..1ea5cf1e6 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/1.snap @@ -0,0 +1,61 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/2.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/2.snap new file mode 100644 index 000000000..8a6eb23e9 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/2.snap @@ -0,0 +1,63 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [1,2,3,4,5,6,7,8,9,] +failed [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/3.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/3.snap new file mode 100644 index 000000000..88a3866a7 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/3.snap @@ -0,0 +1,57 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [1,2,3,4,5,6,7,8,9,] +failed [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/4.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/4.snap new file mode 100644 index 000000000..cbd8d175a --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/4.snap @@ -0,0 +1,41 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/1.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/1.snap new file mode 100644 index 000000000..83f67d737 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/1.snap @@ -0,0 +1,67 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +10 {uid: 10, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [1,2,3,4,5,6,7,8,9,10,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,3,4,5,6,7,8,9,10,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +[timestamp] [10,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/2.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/2.snap new file mode 100644 index 000000000..09e43e490 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/2.snap @@ -0,0 +1,59 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +10 {uid: 10, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,3,4,5,6,7,8,9,10,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +[timestamp] [10,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/3.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/3.snap new file mode 100644 index 000000000..5a839838d --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/3.snap @@ -0,0 +1,45 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_replace/1.snap b/index-scheduler/src/snapshots/lib.rs/test_document_replace/1.snap new file mode 100644 index 000000000..3ef17fe8a --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_replace/1.snap @@ -0,0 +1,61 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_replace/2.snap b/index-scheduler/src/snapshots/lib.rs/test_document_replace/2.snap new file mode 100644 index 000000000..06c8fb066 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_replace/2.snap @@ -0,0 +1,54 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_replace/3.snap b/index-scheduler/src/snapshots/lib.rs/test_document_replace/3.snap new file mode 100644 index 000000000..5a839838d --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_replace/3.snap @@ -0,0 +1,45 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/1.snap b/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/1.snap new file mode 100644 index 000000000..f37b613e8 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/1.snap @@ -0,0 +1,61 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/2.snap b/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/2.snap new file mode 100644 index 000000000..37aedde10 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/2.snap @@ -0,0 +1,67 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [5,6,7,8,9,] +succeeded [0,1,2,3,4,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/3.snap b/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/3.snap new file mode 100644 index 000000000..028ec3e0b --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/3.snap @@ -0,0 +1,72 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/4.snap b/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/4.snap new file mode 100644 index 000000000..5a839838d --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/4.snap @@ -0,0 +1,45 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_update/1.snap b/index-scheduler/src/snapshots/lib.rs/test_document_update/1.snap new file mode 100644 index 000000000..cfaccc46f --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_update/1.snap @@ -0,0 +1,61 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_update/2.snap b/index-scheduler/src/snapshots/lib.rs/test_document_update/2.snap new file mode 100644 index 000000000..68d640fea --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_update/2.snap @@ -0,0 +1,54 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_update/3.snap b/index-scheduler/src/snapshots/lib.rs/test_document_update/3.snap new file mode 100644 index 000000000..5a839838d --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_update/3.snap @@ -0,0 +1,45 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/1.snap b/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/1.snap new file mode 100644 index 000000000..ceee17298 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/1.snap @@ -0,0 +1,61 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/2.snap b/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/2.snap new file mode 100644 index 000000000..62cb62c71 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/2.snap @@ -0,0 +1,67 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [5,6,7,8,9,] +succeeded [0,1,2,3,4,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/3.snap b/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/3.snap new file mode 100644 index 000000000..ed9f09f93 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/3.snap @@ -0,0 +1,72 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/4.snap b/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/4.snap new file mode 100644 index 000000000..5a839838d --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/4.snap @@ -0,0 +1,45 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/1.snap b/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/1.snap new file mode 100644 index 000000000..2875c299c --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/1.snap @@ -0,0 +1,61 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/2.snap b/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/2.snap new file mode 100644 index 000000000..0f9af60e7 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/2.snap @@ -0,0 +1,67 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [5,6,7,8,9,] +succeeded [0,1,2,3,4,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/3.snap b/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/3.snap new file mode 100644 index 000000000..5a839838d --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/3.snap @@ -0,0 +1,45 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/index-scheduler/src/utils.rs b/index-scheduler/src/utils.rs new file mode 100644 index 000000000..8828f102f --- /dev/null +++ b/index-scheduler/src/utils.rs @@ -0,0 +1,479 @@ +//! Utility functions on the DBs. Mainly getter and setters. + +use std::ops::Bound; + +use meilisearch_types::heed::types::{DecodeIgnore, OwnedType}; +use meilisearch_types::heed::{Database, RoTxn, RwTxn}; +use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32}; +use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status}; +use roaring::{MultiOps, RoaringBitmap}; +use time::OffsetDateTime; + +use crate::{Error, IndexScheduler, Result, Task, TaskId, BEI128}; + +impl IndexScheduler { + pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result { + enum_iterator::all().map(|s| self.get_status(rtxn, s)).union() + } + + pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result> { + Ok(self.all_tasks.remap_data_type::().last(rtxn)?.map(|(k, _)| k.get() + 1)) + } + + pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result { + Ok(self.last_task_id(rtxn)?.unwrap_or_default()) + } + + pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result> { + Ok(self.all_tasks.get(rtxn, &BEU32::new(task_id))?) + } + + /// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a + /// `CorruptedTaskQueue` error will be throwed. + pub(crate) fn get_existing_tasks( + &self, + rtxn: &RoTxn, + tasks: impl IntoIterator, + ) -> Result> { + tasks + .into_iter() + .map(|task_id| { + self.get_task(rtxn, task_id).and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) + }) + .collect::>() + } + + pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> { + let old_task = self.get_task(wtxn, task.uid)?.ok_or(Error::CorruptedTaskQueue)?; + + debug_assert_eq!(old_task.uid, task.uid); + + if old_task == *task { + return Ok(()); + } + + if old_task.status != task.status { + self.update_status(wtxn, old_task.status, |bitmap| { + bitmap.remove(task.uid); + })?; + self.update_status(wtxn, task.status, |bitmap| { + bitmap.insert(task.uid); + })?; + } + + if old_task.kind.as_kind() != task.kind.as_kind() { + self.update_kind(wtxn, old_task.kind.as_kind(), |bitmap| { + bitmap.remove(task.uid); + })?; + self.update_kind(wtxn, task.kind.as_kind(), |bitmap| { + bitmap.insert(task.uid); + })?; + } + + assert_eq!( + old_task.enqueued_at, task.enqueued_at, + "Cannot update a task's enqueued_at time" + ); + if old_task.started_at != task.started_at { + assert!(old_task.started_at.is_none(), "Cannot update a task's started_at time"); + if let Some(started_at) = task.started_at { + insert_task_datetime(wtxn, self.started_at, started_at, task.uid)?; + } + } + if old_task.finished_at != task.finished_at { + assert!(old_task.finished_at.is_none(), "Cannot update a task's finished_at time"); + if let Some(finished_at) = task.finished_at { + insert_task_datetime(wtxn, self.finished_at, finished_at, task.uid)?; + } + } + + self.all_tasks.put(wtxn, &BEU32::new(task.uid), task)?; + Ok(()) + } + + /// Returns the whole set of tasks that belongs to this index. + pub(crate) fn index_tasks(&self, rtxn: &RoTxn, index: &str) -> Result { + Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default()) + } + + pub(crate) fn update_index( + &self, + wtxn: &mut RwTxn, + index: &str, + f: impl Fn(&mut RoaringBitmap), + ) -> Result<()> { + let mut tasks = self.index_tasks(wtxn, index)?; + f(&mut tasks); + if tasks.is_empty() { + self.index_tasks.delete(wtxn, index)?; + } else { + self.index_tasks.put(wtxn, index, &tasks)?; + } + + Ok(()) + } + + pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result { + Ok(self.status.get(rtxn, &status)?.unwrap_or_default()) + } + + pub(crate) fn put_status( + &self, + wtxn: &mut RwTxn, + status: Status, + bitmap: &RoaringBitmap, + ) -> Result<()> { + Ok(self.status.put(wtxn, &status, bitmap)?) + } + + pub(crate) fn update_status( + &self, + wtxn: &mut RwTxn, + status: Status, + f: impl Fn(&mut RoaringBitmap), + ) -> Result<()> { + let mut tasks = self.get_status(wtxn, status)?; + f(&mut tasks); + self.put_status(wtxn, status, &tasks)?; + + Ok(()) + } + + pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result { + Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default()) + } + + pub(crate) fn put_kind( + &self, + wtxn: &mut RwTxn, + kind: Kind, + bitmap: &RoaringBitmap, + ) -> Result<()> { + Ok(self.kind.put(wtxn, &kind, bitmap)?) + } + + pub(crate) fn update_kind( + &self, + wtxn: &mut RwTxn, + kind: Kind, + f: impl Fn(&mut RoaringBitmap), + ) -> Result<()> { + let mut tasks = self.get_kind(wtxn, kind)?; + f(&mut tasks); + self.put_kind(wtxn, kind, &tasks)?; + + Ok(()) + } +} + +pub(crate) fn insert_task_datetime( + wtxn: &mut RwTxn, + database: Database, CboRoaringBitmapCodec>, + time: OffsetDateTime, + task_id: TaskId, +) -> Result<()> { + let timestamp = BEI128::new(time.unix_timestamp_nanos()); + let mut task_ids = database.get(wtxn, ×tamp)?.unwrap_or_default(); + task_ids.insert(task_id); + database.put(wtxn, ×tamp, &RoaringBitmap::from_iter(task_ids))?; + Ok(()) +} + +pub(crate) fn remove_task_datetime( + wtxn: &mut RwTxn, + database: Database, CboRoaringBitmapCodec>, + time: OffsetDateTime, + task_id: TaskId, +) -> Result<()> { + let timestamp = BEI128::new(time.unix_timestamp_nanos()); + if let Some(mut existing) = database.get(wtxn, ×tamp)? { + existing.remove(task_id); + if existing.is_empty() { + database.delete(wtxn, ×tamp)?; + } else { + database.put(wtxn, ×tamp, &RoaringBitmap::from_iter(existing))?; + } + } + + Ok(()) +} + +pub(crate) fn keep_tasks_within_datetimes( + rtxn: &RoTxn, + tasks: &mut RoaringBitmap, + database: Database, CboRoaringBitmapCodec>, + after: Option, + before: Option, +) -> Result<()> { + let (start, end) = match (&after, &before) { + (None, None) => return Ok(()), + (None, Some(before)) => (Bound::Unbounded, Bound::Excluded(*before)), + (Some(after), None) => (Bound::Excluded(*after), Bound::Unbounded), + (Some(after), Some(before)) => (Bound::Excluded(*after), Bound::Excluded(*before)), + }; + let mut collected_task_ids = RoaringBitmap::new(); + let start = map_bound(start, |b| BEI128::new(b.unix_timestamp_nanos())); + let end = map_bound(end, |b| BEI128::new(b.unix_timestamp_nanos())); + let iter = database.range(rtxn, &(start, end))?; + for r in iter { + let (_timestamp, task_ids) = r?; + collected_task_ids |= task_ids; + } + *tasks &= collected_task_ids; + Ok(()) +} + +// TODO: remove when Bound::map ( https://github.com/rust-lang/rust/issues/86026 ) is available on stable +pub(crate) fn map_bound(bound: Bound, map: impl FnOnce(T) -> U) -> Bound { + match bound { + Bound::Included(x) => Bound::Included(map(x)), + Bound::Excluded(x) => Bound::Excluded(map(x)), + Bound::Unbounded => Bound::Unbounded, + } +} + +pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) { + use KindWithContent as K; + let mut index_uids = vec![]; + match &mut task.kind { + K::DocumentAdditionOrUpdate { index_uid, .. } => index_uids.push(index_uid), + K::DocumentDeletion { index_uid, .. } => index_uids.push(index_uid), + K::DocumentClear { index_uid } => index_uids.push(index_uid), + K::SettingsUpdate { index_uid, .. } => index_uids.push(index_uid), + K::IndexDeletion { index_uid } => index_uids.push(index_uid), + K::IndexCreation { index_uid, .. } => index_uids.push(index_uid), + K::IndexUpdate { index_uid, .. } => index_uids.push(index_uid), + K::IndexSwap { swaps } => { + for IndexSwap { indexes: (lhs, rhs) } in swaps.iter_mut() { + if lhs == swap.0 || lhs == swap.1 { + index_uids.push(lhs); + } + if rhs == swap.0 || rhs == swap.1 { + index_uids.push(rhs); + } + } + } + K::TaskCancelation { .. } + | K::TaskDeletion { .. } + | K::DumpCreation { .. } + | K::SnapshotCreation => (), + }; + if let Some(Details::IndexSwap { swaps }) = &mut task.details { + for IndexSwap { indexes: (lhs, rhs) } in swaps.iter_mut() { + if lhs == swap.0 || lhs == swap.1 { + index_uids.push(lhs); + } + if rhs == swap.0 || rhs == swap.1 { + index_uids.push(rhs); + } + } + } + for index_uid in index_uids { + if index_uid == swap.0 { + *index_uid = swap.1.to_owned(); + } else if index_uid == swap.1 { + *index_uid = swap.0.to_owned(); + } + } +} + +/// Remove references to task ids that are greater than the id of the given task. +pub(crate) fn filter_out_references_to_newer_tasks(task: &mut Task) { + let new_nbr_of_matched_tasks = match &mut task.kind { + KindWithContent::TaskCancelation { tasks, .. } + | KindWithContent::TaskDeletion { tasks, .. } => { + tasks.remove_range(task.uid..); + tasks.len() + } + _ => return, + }; + if let Some( + Details::TaskCancelation { matched_tasks, .. } + | Details::TaskDeletion { matched_tasks, .. }, + ) = &mut task.details + { + *matched_tasks = new_nbr_of_matched_tasks; + } +} + +#[cfg(test)] +impl IndexScheduler { + /// Asserts that the index scheduler's content is internally consistent. + pub fn assert_internally_consistent(&self) { + let rtxn = self.env.read_txn().unwrap(); + for task in self.all_tasks.iter(&rtxn).unwrap() { + let (task_id, task) = task.unwrap(); + let task_id = task_id.get(); + + let task_index_uid = task.index_uid().map(ToOwned::to_owned); + + let Task { + uid, + enqueued_at, + started_at, + finished_at, + error: _, + canceled_by, + details, + status, + kind, + } = task; + assert_eq!(uid, task.uid); + if let Some(task_index_uid) = &task_index_uid { + assert!(self + .index_tasks + .get(&rtxn, task_index_uid.as_str()) + .unwrap() + .unwrap() + .contains(task.uid)); + } + let db_enqueued_at = self + .enqueued_at + .get(&rtxn, &BEI128::new(enqueued_at.unix_timestamp_nanos())) + .unwrap() + .unwrap(); + assert!(db_enqueued_at.contains(task_id)); + if let Some(started_at) = started_at { + let db_started_at = self + .started_at + .get(&rtxn, &BEI128::new(started_at.unix_timestamp_nanos())) + .unwrap() + .unwrap(); + assert!(db_started_at.contains(task_id)); + } + if let Some(finished_at) = finished_at { + let db_finished_at = self + .finished_at + .get(&rtxn, &BEI128::new(finished_at.unix_timestamp_nanos())) + .unwrap() + .unwrap(); + assert!(db_finished_at.contains(task_id)); + } + if let Some(canceled_by) = canceled_by { + let db_canceled_tasks = self.get_status(&rtxn, Status::Canceled).unwrap(); + assert!(db_canceled_tasks.contains(uid)); + let db_canceling_task = self.get_task(&rtxn, canceled_by).unwrap().unwrap(); + assert_eq!(db_canceling_task.status, Status::Succeeded); + match db_canceling_task.kind { + KindWithContent::TaskCancelation { query: _, tasks } => { + assert!(tasks.contains(uid)); + } + _ => panic!(), + } + } + if let Some(details) = details { + match details { + Details::IndexSwap { swaps: sw1 } => { + if let KindWithContent::IndexSwap { swaps: sw2 } = &kind { + assert_eq!(&sw1, sw2); + } + } + Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => { + assert_eq!(kind.as_kind(), Kind::DocumentAdditionOrUpdate); + if let Some(indexed_documents) = indexed_documents { + assert_eq!(status, Status::Succeeded); + assert!(indexed_documents <= received_documents); + } else { + assert_ne!(status, Status::Succeeded); + } + } + Details::SettingsUpdate { settings: _ } => { + assert_eq!(kind.as_kind(), Kind::SettingsUpdate); + } + Details::IndexInfo { primary_key: pk1 } => match &kind { + KindWithContent::IndexCreation { index_uid, primary_key: pk2 } + | KindWithContent::IndexUpdate { index_uid, primary_key: pk2 } => { + self.index_tasks + .get(&rtxn, index_uid.as_str()) + .unwrap() + .unwrap() + .contains(uid); + assert_eq!(&pk1, pk2); + } + _ => panic!(), + }, + Details::DocumentDeletion { + matched_documents: received_document_ids, + deleted_documents, + } => { + if let Some(deleted_documents) = deleted_documents { + assert_eq!(status, Status::Succeeded); + assert!(deleted_documents <= received_document_ids as u64); + assert_eq!(kind.as_kind(), Kind::DocumentDeletion); + + match &kind { + KindWithContent::DocumentDeletion { index_uid, documents_ids } => { + assert_eq!(&task_index_uid.unwrap(), index_uid); + assert!(documents_ids.len() >= received_document_ids); + } + _ => panic!(), + } + } else { + assert_ne!(status, Status::Succeeded); + } + } + Details::ClearAll { deleted_documents } => { + assert!(matches!( + kind.as_kind(), + Kind::DocumentDeletion | Kind::IndexDeletion + )); + if deleted_documents.is_some() { + assert_eq!(status, Status::Succeeded); + } else { + assert_ne!(status, Status::Succeeded); + } + } + Details::TaskCancelation { matched_tasks, canceled_tasks, original_query } => { + if let Some(canceled_tasks) = canceled_tasks { + assert_eq!(status, Status::Succeeded); + assert!(canceled_tasks <= matched_tasks); + match &kind { + KindWithContent::TaskCancelation { query, tasks } => { + assert_eq!(query, &original_query); + assert_eq!(tasks.len(), matched_tasks); + } + _ => panic!(), + } + } else { + assert_ne!(status, Status::Succeeded); + } + } + Details::TaskDeletion { matched_tasks, deleted_tasks, original_query } => { + if let Some(deleted_tasks) = deleted_tasks { + assert_eq!(status, Status::Succeeded); + assert!(deleted_tasks <= matched_tasks); + match &kind { + KindWithContent::TaskDeletion { query, tasks } => { + assert_eq!(query, &original_query); + assert_eq!(tasks.len(), matched_tasks); + } + _ => panic!(), + } + } else { + assert_ne!(status, Status::Succeeded); + } + } + Details::Dump { dump_uid: d1 } => { + assert!( + matches!(&kind, KindWithContent::DumpCreation { dump_uid: d2, keys: _, instance_uid: _ } if &d1 == d2 ) + ); + } + } + } + + assert!(self.get_status(&rtxn, status).unwrap().contains(uid)); + assert!(self.get_kind(&rtxn, kind.as_kind()).unwrap().contains(uid)); + + if let KindWithContent::DocumentAdditionOrUpdate { content_file, .. } = kind { + match status { + Status::Enqueued | Status::Processing => { + assert!(self.file_store.__all_uuids().contains(&content_file)); + } + Status::Succeeded | Status::Failed | Status::Canceled => { + assert!(!self.file_store.__all_uuids().contains(&content_file)); + } + } + } + } + } +} diff --git a/meili-snap/Cargo.toml b/meili-snap/Cargo.toml new file mode 100644 index 000000000..292b60cfa --- /dev/null +++ b/meili-snap/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "meili-snap" +version = "0.1.0" +edition = "2021" + +[dependencies] +insta = { version = "^1.19.1", features = ["json", "redactions"] } +md5 = "0.7.0" +once_cell = "1.15" diff --git a/meili-snap/src/lib.rs b/meili-snap/src/lib.rs new file mode 100644 index 000000000..a2abd0cea --- /dev/null +++ b/meili-snap/src/lib.rs @@ -0,0 +1,276 @@ +use std::borrow::Cow; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::Mutex; + +pub use insta; +use once_cell::sync::Lazy; + +static SNAPSHOT_NAMES: Lazy>> = Lazy::new(Mutex::default); + +/// Return the md5 hash of the given string +pub fn hash_snapshot(snap: &str) -> String { + let hash = md5::compute(snap.as_bytes()); + let hash_str = format!("{hash:x}"); + hash_str +} + +#[track_caller] +pub fn default_snapshot_settings_for_test<'a>( + test_name: &str, + name: Option<&'a str>, +) -> (insta::Settings, Cow<'a, str>, bool) { + let mut settings = insta::Settings::clone_current(); + settings.set_prepend_module_to_snapshot(false); + let path = Path::new(std::panic::Location::caller().file()); + let filename = path.file_name().unwrap().to_str().unwrap(); + settings.set_omit_expression(true); + + let test_name = test_name.strip_suffix("::{{closure}}").unwrap_or(test_name); + let test_name = test_name.rsplit("::").next().unwrap().to_owned(); + + let path = Path::new("snapshots").join(filename).join(&test_name); + settings.set_snapshot_path(path.clone()); + let snap_name = if let Some(name) = name { + Cow::Borrowed(name) + } else { + let mut snapshot_names = SNAPSHOT_NAMES.lock().unwrap(); + let counter = snapshot_names.entry(path).or_default(); + *counter += 1; + Cow::Owned(format!("{counter}")) + }; + + let store_whole_snapshot = + std::env::var("MEILI_TEST_FULL_SNAPS").unwrap_or_else(|_| "false".to_owned()); + let store_whole_snapshot: bool = store_whole_snapshot.parse().unwrap(); + + (settings, snap_name, store_whole_snapshot) +} + +/** +Create a hashed snapshot test. + +## Arguments: + +1. The content of the snapshot. It is an expression whose result implements the `fmt::Display` trait. +2. `name: `: the identifier for the snapshot test (optional) +3. `@""` to write the hash of the snapshot inline + +## Behaviour +The content of the snapshot will be saved both in full and as a hash. The full snapshot will +be saved with the name `.full.snap` but will not be saved to the git repository. The hashed +snapshot will be saved inline. If `` is not specified, then a global counter is used to give an +identifier to the snapshot. + +Running `cargo test` will check whether the old snapshot is identical to the +current one. If they are equal, the test passes. Otherwise, the test fails. + +Use the command line `cargo insta` to approve or reject new snapshots. + +## Example +```ignore +// The full snapshot is saved under 1.full.snap and contains `10` +snapshot_hash!(10, @"d3d9446802a44259755d38e6d163e820"); +// The full snapshot is saved under snap_name.full.snap and contains `hello world` +snapshot_hash!("hello world", name: "snap_name", @"5f93f983524def3dca464469d2cf9f3e"); +``` +*/ +#[macro_export] +macro_rules! snapshot_hash { + ($value:expr, @$inline:literal) => { + let test_name = { + fn f() {} + fn type_name_of_val(_: T) -> &'static str { + std::any::type_name::() + } + type_name_of_val(f).strip_suffix("::f").unwrap_or("") + }; + let test_name = test_name + .strip_suffix("::{{closure}}") + .unwrap_or(test_name); + + let (settings, snap_name, store_whole_snapshot) = $crate::default_snapshot_settings_for_test(test_name, None); + settings.bind(|| { + let snap = format!("{}", $value); + let hash_snap = $crate::hash_snapshot(&snap); + meili_snap::insta::assert_snapshot!(hash_snap, @$inline); + if store_whole_snapshot { + meili_snap::insta::assert_snapshot!(format!("{}.full", snap_name), snap); + } + }); + }; + ($value:expr, name: $name:expr, @$inline:literal) => { + let test_name = { + fn f() {} + fn type_name_of_val(_: T) -> &'static str { + std::any::type_name::() + } + type_name_of_val(f).strip_suffix("::f").unwrap_or("") + }; + + let snap_name = format!("{}", $name); + let (settings, snap_name, store_whole_snapshot) = $crate::default_snapshot_settings_for_test(test_name, Some(&snap_name)); + settings.bind(|| { + let snap = format!("{}", $value); + let hash_snap = $crate::hash_snapshot(&snap); + meili_snap::insta::assert_snapshot!(hash_snap, @$inline); + if store_whole_snapshot { + meili_snap::insta::assert_snapshot!(format!("{}.full", snap_name), snap); + } + }); + }; +} + +/** +Create a hashed snapshot test. + +## Arguments: +1. The content of the snapshot. It is an expression whose result implements the `fmt::Display` trait. +2. Optionally one of: + 1. `name: `: the identifier for the snapshot test + 2. `@""` to write the hash of the snapshot inline + +## Behaviour +The content of the snapshot will be saved in full with the given name +or using a global counter to give it an identifier. + +Running `cargo test` will check whether the old snapshot is identical to the +current one. If they are equal, the test passes. Otherwise, the test fails. + +Use the command line `cargo insta` to approve or reject new snapshots. + +## Example +```ignore +// The full snapshot is saved under 1.snap and contains `10` +snapshot!(10); +// The full snapshot is saved under snap_name.snap and contains `10` +snapshot!("hello world", name: "snap_name"); +// The full snapshot is saved inline +snapshot!(format!("{:?}", vec![1, 2]), @"[1, 2]"); +``` +*/ +#[macro_export] +macro_rules! snapshot { + ($value:expr, name: $name:expr) => { + let test_name = { + fn f() {} + fn type_name_of_val(_: T) -> &'static str { + std::any::type_name::() + } + type_name_of_val(f).strip_suffix("::f").unwrap_or("") + }; + let test_name = test_name + .strip_suffix("::{{closure}}") + .unwrap_or(test_name); + + let snap_name = format!("{}", $name); + let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, Some(&snap_name)); + settings.bind(|| { + let snap = format!("{}", $value); + meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap); + }); + }; + ($value:expr, @$inline:literal) => { + // Note that the name given as argument does not matter since it is only an inline snapshot + // We don't pass None because otherwise `meili-snap` will try to assign it a unique identifier + let (settings, _, _) = $crate::default_snapshot_settings_for_test("", Some("_dummy_argument")); + settings.bind(|| { + let snap = format!("{}", $value); + meili_snap::insta::assert_snapshot!(snap, @$inline); + }); + }; + ($value:expr) => { + let test_name = { + fn f() {} + fn type_name_of_val(_: T) -> &'static str { + std::any::type_name::() + } + type_name_of_val(f).strip_suffix("::f").unwrap_or("") + }; + let test_name = test_name + .strip_suffix("::{{closure}}") + .unwrap_or(test_name); + + let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, None); + settings.bind(|| { + let snap = format!("{}", $value); + meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap); + }); + }; +} + +#[cfg(test)] +mod tests { + use crate as meili_snap; + #[test] + fn snap() { + snapshot_hash!(10, @"d3d9446802a44259755d38e6d163e820"); + snapshot_hash!(20, @"98f13708210194c475687be6106a3b84"); + snapshot_hash!(30, @"34173cb38f07f89ddbebc2ac9128303f"); + + snapshot!(40, @"40"); + snapshot!(50, @"50"); + snapshot!(60, @"60"); + + snapshot!(70); + snapshot!(80); + snapshot!(90); + + snapshot!(100, name: "snap_name_1"); + snapshot_hash!(110, name: "snap_name_2", @"5f93f983524def3dca464469d2cf9f3e"); + + snapshot!(120); + snapshot!(format!("{:?}", vec![1, 2]), @"[1, 2]"); + } + + // Currently the name of this module is not part of the snapshot path + // It does not bother me, but maybe it is worth changing later on. + mod snap { + use crate as meili_snap; + #[test] + fn some_test() { + snapshot_hash!(10, @"d3d9446802a44259755d38e6d163e820"); + snapshot_hash!(20, @"98f13708210194c475687be6106a3b84"); + snapshot_hash!(30, @"34173cb38f07f89ddbebc2ac9128303f"); + + snapshot!(40, @"40"); + snapshot!(50, @"50"); + snapshot!(60, @"60"); + + snapshot!(70); + snapshot!(80); + snapshot!(90); + + snapshot!(100, name: "snap_name_1"); + snapshot_hash!(110, name: "snap_name_2", @"5f93f983524def3dca464469d2cf9f3e"); + + snapshot!(120); + + // snapshot_hash!("", name: "", @"d41d8cd98f00b204e9800998ecf8427e"); + } + } +} + +/// Create a string from the value by serializing it as Json, optionally +/// redacting some parts of it. +/// +/// The second argument to the macro can be an object expression for redaction. +/// It's in the form { selector => replacement }. For more information about redactions +/// refer to the redactions feature in the `insta` guide. +#[macro_export] +macro_rules! json_string { + ($value:expr, {$($k:expr => $v:expr),*$(,)?}) => { + { + let (_, snap) = meili_snap::insta::_prepare_snapshot_for_redaction!($value, {$($k => $v),*}, Json, File); + snap + } + }; + ($value:expr) => {{ + let value = meili_snap::insta::_macro_support::serialize_value( + &$value, + meili_snap::insta::_macro_support::SerializationFormat::Json, + meili_snap::insta::_macro_support::SnapshotLocation::File + ); + value + }}; +} diff --git a/meili-snap/src/snapshots/lib.rs/snap/4.snap b/meili-snap/src/snapshots/lib.rs/snap/4.snap new file mode 100644 index 000000000..5d0878f16 --- /dev/null +++ b/meili-snap/src/snapshots/lib.rs/snap/4.snap @@ -0,0 +1,4 @@ +--- +source: meili-snap/src/lib.rs +--- +70 diff --git a/meili-snap/src/snapshots/lib.rs/snap/5.snap b/meili-snap/src/snapshots/lib.rs/snap/5.snap new file mode 100644 index 000000000..ea547b823 --- /dev/null +++ b/meili-snap/src/snapshots/lib.rs/snap/5.snap @@ -0,0 +1,4 @@ +--- +source: meili-snap/src/lib.rs +--- +80 diff --git a/meili-snap/src/snapshots/lib.rs/snap/6.snap b/meili-snap/src/snapshots/lib.rs/snap/6.snap new file mode 100644 index 000000000..e91bbe6f7 --- /dev/null +++ b/meili-snap/src/snapshots/lib.rs/snap/6.snap @@ -0,0 +1,4 @@ +--- +source: meili-snap/src/lib.rs +--- +90 diff --git a/meili-snap/src/snapshots/lib.rs/snap/7.snap b/meili-snap/src/snapshots/lib.rs/snap/7.snap new file mode 100644 index 000000000..5ae6bb922 --- /dev/null +++ b/meili-snap/src/snapshots/lib.rs/snap/7.snap @@ -0,0 +1,4 @@ +--- +source: meili-snap/src/lib.rs +--- +120 diff --git a/meili-snap/src/snapshots/lib.rs/snap/snap_name_1.snap b/meili-snap/src/snapshots/lib.rs/snap/snap_name_1.snap new file mode 100644 index 000000000..3964679e6 --- /dev/null +++ b/meili-snap/src/snapshots/lib.rs/snap/snap_name_1.snap @@ -0,0 +1,4 @@ +--- +source: meili-snap/src/lib.rs +--- +100 diff --git a/meili-snap/src/snapshots/lib.rs/some_test/4.snap b/meili-snap/src/snapshots/lib.rs/some_test/4.snap new file mode 100644 index 000000000..5d0878f16 --- /dev/null +++ b/meili-snap/src/snapshots/lib.rs/some_test/4.snap @@ -0,0 +1,4 @@ +--- +source: meili-snap/src/lib.rs +--- +70 diff --git a/meili-snap/src/snapshots/lib.rs/some_test/5.snap b/meili-snap/src/snapshots/lib.rs/some_test/5.snap new file mode 100644 index 000000000..ea547b823 --- /dev/null +++ b/meili-snap/src/snapshots/lib.rs/some_test/5.snap @@ -0,0 +1,4 @@ +--- +source: meili-snap/src/lib.rs +--- +80 diff --git a/meili-snap/src/snapshots/lib.rs/some_test/6.snap b/meili-snap/src/snapshots/lib.rs/some_test/6.snap new file mode 100644 index 000000000..e91bbe6f7 --- /dev/null +++ b/meili-snap/src/snapshots/lib.rs/some_test/6.snap @@ -0,0 +1,4 @@ +--- +source: meili-snap/src/lib.rs +--- +90 diff --git a/meili-snap/src/snapshots/lib.rs/some_test/7.snap b/meili-snap/src/snapshots/lib.rs/some_test/7.snap new file mode 100644 index 000000000..5ae6bb922 --- /dev/null +++ b/meili-snap/src/snapshots/lib.rs/some_test/7.snap @@ -0,0 +1,4 @@ +--- +source: meili-snap/src/lib.rs +--- +120 diff --git a/meili-snap/src/snapshots/lib.rs/some_test/snap_name_1.snap b/meili-snap/src/snapshots/lib.rs/some_test/snap_name_1.snap new file mode 100644 index 000000000..3964679e6 --- /dev/null +++ b/meili-snap/src/snapshots/lib.rs/some_test/snap_name_1.snap @@ -0,0 +1,4 @@ +--- +source: meili-snap/src/lib.rs +--- +100 diff --git a/meilisearch-auth/Cargo.toml b/meilisearch-auth/Cargo.toml index a872b4e9a..e673c2f9a 100644 --- a/meilisearch-auth/Cargo.toml +++ b/meilisearch-auth/Cargo.toml @@ -4,11 +4,11 @@ version = "0.29.1" edition = "2021" [dependencies] -enum-iterator = "1.1.2" +enum-iterator = "1.1.3" hmac = "0.12.1" meilisearch-types = { path = "../meilisearch-types" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.34.0", default-features = false } rand = "0.8.5" +roaring = { version = "0.10.0", features = ["serde"] } serde = { version = "1.0.145", features = ["derive"] } serde_json = { version = "1.0.85", features = ["preserve_order"] } sha2 = "0.10.6" diff --git a/meilisearch-auth/src/action.rs b/meilisearch-auth/src/action.rs deleted file mode 100644 index 19944d882..000000000 --- a/meilisearch-auth/src/action.rs +++ /dev/null @@ -1,135 +0,0 @@ -use serde::{Deserialize, Serialize}; -use std::hash::Hash; - -#[derive( - enum_iterator::Sequence, Copy, Clone, Serialize, Deserialize, Debug, Eq, PartialEq, Hash, -)] -#[repr(u8)] -pub enum Action { - #[serde(rename = "*")] - All = 0, - #[serde(rename = "search")] - Search, - #[serde(rename = "documents.*")] - DocumentsAll, - #[serde(rename = "documents.add")] - DocumentsAdd, - #[serde(rename = "documents.get")] - DocumentsGet, - #[serde(rename = "documents.delete")] - DocumentsDelete, - #[serde(rename = "indexes.*")] - IndexesAll, - #[serde(rename = "indexes.create")] - IndexesAdd, - #[serde(rename = "indexes.get")] - IndexesGet, - #[serde(rename = "indexes.update")] - IndexesUpdate, - #[serde(rename = "indexes.delete")] - IndexesDelete, - #[serde(rename = "tasks.*")] - TasksAll, - #[serde(rename = "tasks.get")] - TasksGet, - #[serde(rename = "settings.*")] - SettingsAll, - #[serde(rename = "settings.get")] - SettingsGet, - #[serde(rename = "settings.update")] - SettingsUpdate, - #[serde(rename = "stats.*")] - StatsAll, - #[serde(rename = "stats.get")] - StatsGet, - #[serde(rename = "metrics.*")] - MetricsAll, - #[serde(rename = "metrics.get")] - MetricsGet, - #[serde(rename = "dumps.*")] - DumpsAll, - #[serde(rename = "dumps.create")] - DumpsCreate, - #[serde(rename = "version")] - Version, - #[serde(rename = "keys.create")] - KeysAdd, - #[serde(rename = "keys.get")] - KeysGet, - #[serde(rename = "keys.update")] - KeysUpdate, - #[serde(rename = "keys.delete")] - KeysDelete, -} - -impl Action { - pub const fn from_repr(repr: u8) -> Option { - use actions::*; - match repr { - ALL => Some(Self::All), - SEARCH => Some(Self::Search), - DOCUMENTS_ALL => Some(Self::DocumentsAll), - DOCUMENTS_ADD => Some(Self::DocumentsAdd), - DOCUMENTS_GET => Some(Self::DocumentsGet), - DOCUMENTS_DELETE => Some(Self::DocumentsDelete), - INDEXES_ALL => Some(Self::IndexesAll), - INDEXES_CREATE => Some(Self::IndexesAdd), - INDEXES_GET => Some(Self::IndexesGet), - INDEXES_UPDATE => Some(Self::IndexesUpdate), - INDEXES_DELETE => Some(Self::IndexesDelete), - TASKS_ALL => Some(Self::TasksAll), - TASKS_GET => Some(Self::TasksGet), - SETTINGS_ALL => Some(Self::SettingsAll), - SETTINGS_GET => Some(Self::SettingsGet), - SETTINGS_UPDATE => Some(Self::SettingsUpdate), - STATS_ALL => Some(Self::StatsAll), - STATS_GET => Some(Self::StatsGet), - METRICS_ALL => Some(Self::MetricsAll), - METRICS_GET => Some(Self::MetricsGet), - DUMPS_ALL => Some(Self::DumpsAll), - DUMPS_CREATE => Some(Self::DumpsCreate), - VERSION => Some(Self::Version), - KEYS_CREATE => Some(Self::KeysAdd), - KEYS_GET => Some(Self::KeysGet), - KEYS_UPDATE => Some(Self::KeysUpdate), - KEYS_DELETE => Some(Self::KeysDelete), - _otherwise => None, - } - } - - pub const fn repr(&self) -> u8 { - *self as u8 - } -} - -pub mod actions { - use super::Action::*; - - pub(crate) const ALL: u8 = All.repr(); - pub const SEARCH: u8 = Search.repr(); - pub const DOCUMENTS_ALL: u8 = DocumentsAll.repr(); - pub const DOCUMENTS_ADD: u8 = DocumentsAdd.repr(); - pub const DOCUMENTS_GET: u8 = DocumentsGet.repr(); - pub const DOCUMENTS_DELETE: u8 = DocumentsDelete.repr(); - pub const INDEXES_ALL: u8 = IndexesAll.repr(); - pub const INDEXES_CREATE: u8 = IndexesAdd.repr(); - pub const INDEXES_GET: u8 = IndexesGet.repr(); - pub const INDEXES_UPDATE: u8 = IndexesUpdate.repr(); - pub const INDEXES_DELETE: u8 = IndexesDelete.repr(); - pub const TASKS_ALL: u8 = TasksAll.repr(); - pub const TASKS_GET: u8 = TasksGet.repr(); - pub const SETTINGS_ALL: u8 = SettingsAll.repr(); - pub const SETTINGS_GET: u8 = SettingsGet.repr(); - pub const SETTINGS_UPDATE: u8 = SettingsUpdate.repr(); - pub const STATS_ALL: u8 = StatsAll.repr(); - pub const STATS_GET: u8 = StatsGet.repr(); - pub const METRICS_ALL: u8 = MetricsAll.repr(); - pub const METRICS_GET: u8 = MetricsGet.repr(); - pub const DUMPS_ALL: u8 = DumpsAll.repr(); - pub const DUMPS_CREATE: u8 = DumpsCreate.repr(); - pub const VERSION: u8 = Version.repr(); - pub const KEYS_CREATE: u8 = KeysAdd.repr(); - pub const KEYS_GET: u8 = KeysGet.repr(); - pub const KEYS_UPDATE: u8 = KeysUpdate.repr(); - pub const KEYS_DELETE: u8 = KeysDelete.repr(); -} diff --git a/meilisearch-auth/src/dump.rs b/meilisearch-auth/src/dump.rs index 7e607e574..0b26bf7da 100644 --- a/meilisearch-auth/src/dump.rs +++ b/meilisearch-auth/src/dump.rs @@ -1,10 +1,9 @@ -use serde_json::Deserializer; - use std::fs::File; -use std::io::BufReader; -use std::io::Write; +use std::io::{BufReader, Write}; use std::path::Path; +use serde_json::Deserializer; + use crate::{AuthController, HeedAuthStore, Result}; const KEYS_PATH: &str = "keys"; diff --git a/meilisearch-auth/src/error.rs b/meilisearch-auth/src/error.rs index 46c244a5a..37d3dce60 100644 --- a/meilisearch-auth/src/error.rs +++ b/meilisearch-auth/src/error.rs @@ -1,43 +1,24 @@ use std::error::Error; use meilisearch_types::error::{Code, ErrorCode}; -use meilisearch_types::internal_error; -use serde_json::Value; +use meilisearch_types::{internal_error, keys}; pub type Result = std::result::Result; #[derive(Debug, thiserror::Error)] pub enum AuthControllerError { - #[error("`{0}` field is mandatory.")] - MissingParameter(&'static str), - #[error("`actions` field value `{0}` is invalid. It should be an array of string representing action names.")] - InvalidApiKeyActions(Value), - #[error( - "`{0}` is not a valid index uid. It should be an array of string representing index names." - )] - InvalidApiKeyIndexes(Value), - #[error("`expiresAt` field value `{0}` is invalid. It should follow the RFC 3339 format to represents a date or datetime in the future or specified as a null value. e.g. 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM:SS'.")] - InvalidApiKeyExpiresAt(Value), - #[error("`description` field value `{0}` is invalid. It should be a string or specified as a null value.")] - InvalidApiKeyDescription(Value), - #[error( - "`name` field value `{0}` is invalid. It should be a string or specified as a null value." - )] - InvalidApiKeyName(Value), - #[error("`uid` field value `{0}` is invalid. It should be a valid UUID v4 string or omitted.")] - InvalidApiKeyUid(Value), #[error("API key `{0}` not found.")] ApiKeyNotFound(String), #[error("`uid` field value `{0}` is already an existing API key.")] ApiKeyAlreadyExists(String), - #[error("The `{0}` field cannot be modified for the given resource.")] - ImmutableField(String), + #[error(transparent)] + ApiKey(#[from] keys::Error), #[error("Internal error: {0}")] Internal(Box), } internal_error!( - AuthControllerError: milli::heed::Error, + AuthControllerError: meilisearch_types::milli::heed::Error, std::io::Error, serde_json::Error, std::str::Utf8Error @@ -46,16 +27,9 @@ internal_error!( impl ErrorCode for AuthControllerError { fn error_code(&self) -> Code { match self { - Self::MissingParameter(_) => Code::MissingParameter, - Self::InvalidApiKeyActions(_) => Code::InvalidApiKeyActions, - Self::InvalidApiKeyIndexes(_) => Code::InvalidApiKeyIndexes, - Self::InvalidApiKeyExpiresAt(_) => Code::InvalidApiKeyExpiresAt, - Self::InvalidApiKeyDescription(_) => Code::InvalidApiKeyDescription, - Self::InvalidApiKeyName(_) => Code::InvalidApiKeyName, + Self::ApiKey(e) => e.error_code(), Self::ApiKeyNotFound(_) => Code::ApiKeyNotFound, - Self::InvalidApiKeyUid(_) => Code::InvalidApiKeyUid, Self::ApiKeyAlreadyExists(_) => Code::ApiKeyAlreadyExists, - Self::ImmutableField(_) => Code::ImmutableField, Self::Internal(_) => Code::Internal, } } diff --git a/meilisearch-auth/src/key.rs b/meilisearch-auth/src/key.rs deleted file mode 100644 index eb72aaa72..000000000 --- a/meilisearch-auth/src/key.rs +++ /dev/null @@ -1,201 +0,0 @@ -use crate::action::Action; -use crate::error::{AuthControllerError, Result}; -use crate::store::KeyId; - -use meilisearch_types::index_uid::IndexUid; -use meilisearch_types::star_or::StarOr; -use serde::{Deserialize, Serialize}; -use serde_json::{from_value, Value}; -use time::format_description::well_known::Rfc3339; -use time::macros::{format_description, time}; -use time::{Date, OffsetDateTime, PrimitiveDateTime}; -use uuid::Uuid; - -#[derive(Debug, Deserialize, Serialize)] -pub struct Key { - #[serde(skip_serializing_if = "Option::is_none")] - pub description: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub name: Option, - pub uid: KeyId, - pub actions: Vec, - pub indexes: Vec>, - #[serde(with = "time::serde::rfc3339::option")] - pub expires_at: Option, - #[serde(with = "time::serde::rfc3339")] - pub created_at: OffsetDateTime, - #[serde(with = "time::serde::rfc3339")] - pub updated_at: OffsetDateTime, -} - -impl Key { - pub fn create_from_value(value: Value) -> Result { - let name = match value.get("name") { - None | Some(Value::Null) => None, - Some(des) => from_value(des.clone()) - .map(Some) - .map_err(|_| AuthControllerError::InvalidApiKeyName(des.clone()))?, - }; - - let description = match value.get("description") { - None | Some(Value::Null) => None, - Some(des) => from_value(des.clone()) - .map(Some) - .map_err(|_| AuthControllerError::InvalidApiKeyDescription(des.clone()))?, - }; - - let uid = value.get("uid").map_or_else( - || Ok(Uuid::new_v4()), - |uid| { - from_value(uid.clone()) - .map_err(|_| AuthControllerError::InvalidApiKeyUid(uid.clone())) - }, - )?; - - let actions = value - .get("actions") - .map(|act| { - from_value(act.clone()) - .map_err(|_| AuthControllerError::InvalidApiKeyActions(act.clone())) - }) - .ok_or(AuthControllerError::MissingParameter("actions"))??; - - let indexes = value - .get("indexes") - .map(|ind| { - from_value(ind.clone()) - .map_err(|_| AuthControllerError::InvalidApiKeyIndexes(ind.clone())) - }) - .ok_or(AuthControllerError::MissingParameter("indexes"))??; - - let expires_at = value - .get("expiresAt") - .map(parse_expiration_date) - .ok_or(AuthControllerError::MissingParameter("expiresAt"))??; - - let created_at = OffsetDateTime::now_utc(); - let updated_at = created_at; - - Ok(Self { - name, - description, - uid, - actions, - indexes, - expires_at, - created_at, - updated_at, - }) - } - - pub fn update_from_value(&mut self, value: Value) -> Result<()> { - if let Some(des) = value.get("description") { - let des = from_value(des.clone()) - .map_err(|_| AuthControllerError::InvalidApiKeyDescription(des.clone())); - self.description = des?; - } - - if let Some(des) = value.get("name") { - let des = from_value(des.clone()) - .map_err(|_| AuthControllerError::InvalidApiKeyName(des.clone())); - self.name = des?; - } - - if value.get("uid").is_some() { - return Err(AuthControllerError::ImmutableField("uid".to_string())); - } - - if value.get("actions").is_some() { - return Err(AuthControllerError::ImmutableField("actions".to_string())); - } - - if value.get("indexes").is_some() { - return Err(AuthControllerError::ImmutableField("indexes".to_string())); - } - - if value.get("expiresAt").is_some() { - return Err(AuthControllerError::ImmutableField("expiresAt".to_string())); - } - - if value.get("createdAt").is_some() { - return Err(AuthControllerError::ImmutableField("createdAt".to_string())); - } - - if value.get("updatedAt").is_some() { - return Err(AuthControllerError::ImmutableField("updatedAt".to_string())); - } - - self.updated_at = OffsetDateTime::now_utc(); - - Ok(()) - } - - pub(crate) fn default_admin() -> Self { - let now = OffsetDateTime::now_utc(); - let uid = Uuid::new_v4(); - Self { - name: Some("Default Admin API Key".to_string()), - description: Some("Use it for anything that is not a search operation. Caution! Do not expose it on a public frontend".to_string()), - uid, - actions: vec![Action::All], - indexes: vec![StarOr::Star], - expires_at: None, - created_at: now, - updated_at: now, - } - } - - pub(crate) fn default_search() -> Self { - let now = OffsetDateTime::now_utc(); - let uid = Uuid::new_v4(); - Self { - name: Some("Default Search API Key".to_string()), - description: Some("Use it to search from the frontend".to_string()), - uid, - actions: vec![Action::Search], - indexes: vec![StarOr::Star], - expires_at: None, - created_at: now, - updated_at: now, - } - } -} - -fn parse_expiration_date(value: &Value) -> Result> { - match value { - Value::String(string) => OffsetDateTime::parse(string, &Rfc3339) - .or_else(|_| { - PrimitiveDateTime::parse( - string, - format_description!( - "[year repr:full base:calendar]-[month repr:numerical]-[day]T[hour]:[minute]:[second]" - ), - ).map(|datetime| datetime.assume_utc()) - }) - .or_else(|_| { - PrimitiveDateTime::parse( - string, - format_description!( - "[year repr:full base:calendar]-[month repr:numerical]-[day] [hour]:[minute]:[second]" - ), - ).map(|datetime| datetime.assume_utc()) - }) - .or_else(|_| { - Date::parse(string, format_description!( - "[year repr:full base:calendar]-[month repr:numerical]-[day]" - )).map(|date| PrimitiveDateTime::new(date, time!(00:00)).assume_utc()) - }) - .map_err(|_| AuthControllerError::InvalidApiKeyExpiresAt(value.clone())) - // check if the key is already expired. - .and_then(|d| { - if d > OffsetDateTime::now_utc() { - Ok(d) - } else { - Err(AuthControllerError::InvalidApiKeyExpiresAt(value.clone())) - } - }) - .map(Option::Some), - Value::Null => Ok(None), - _otherwise => Err(AuthControllerError::InvalidApiKeyExpiresAt(value.clone())), - } -} diff --git a/meilisearch-auth/src/lib.rs b/meilisearch-auth/src/lib.rs index 43183d4cf..12d810aec 100644 --- a/meilisearch-auth/src/lib.rs +++ b/meilisearch-auth/src/lib.rs @@ -1,7 +1,5 @@ -mod action; mod dump; pub mod error; -mod key; mod store; use std::collections::{HashMap, HashSet}; @@ -9,19 +7,16 @@ use std::ops::Deref; use std::path::Path; use std::sync::Arc; +use error::{AuthControllerError, Result}; +use meilisearch_types::keys::{Action, Key}; +use meilisearch_types::star_or::StarOr; use serde::{Deserialize, Serialize}; use serde_json::Value; +pub use store::open_auth_store_env; +use store::{generate_key_as_hexa, HeedAuthStore}; use time::OffsetDateTime; use uuid::Uuid; -pub use action::{actions, Action}; -use error::{AuthControllerError, Result}; -pub use key::Key; -use meilisearch_types::star_or::StarOr; -use store::generate_key_as_hexa; -pub use store::open_auth_store_env; -use store::HeedAuthStore; - #[derive(Clone)] pub struct AuthController { store: Arc, @@ -36,18 +31,13 @@ impl AuthController { generate_default_keys(&store)?; } - Ok(Self { - store: Arc::new(store), - master_key: master_key.clone(), - }) + Ok(Self { store: Arc::new(store), master_key: master_key.clone() }) } pub fn create_key(&self, value: Value) -> Result { let key = Key::create_from_value(value)?; match self.store.get_api_key(key.uid)? { - Some(_) => Err(AuthControllerError::ApiKeyAlreadyExists( - key.uid.to_string(), - )), + Some(_) => Err(AuthControllerError::ApiKeyAlreadyExists(key.uid.to_string())), None => self.store.put_api_key(key), } } @@ -66,9 +56,9 @@ impl AuthController { pub fn get_optional_uid_from_encoded_key(&self, encoded_key: &[u8]) -> Result> { match &self.master_key { - Some(master_key) => self - .store - .get_uid_from_encoded_key(encoded_key, master_key.as_bytes()), + Some(master_key) => { + self.store.get_uid_from_encoded_key(encoded_key, master_key.as_bytes()) + } None => Ok(None), } } @@ -134,9 +124,7 @@ impl AuthController { /// Generate a valid key from a key id using the current master key. /// Returns None if no master key has been set. pub fn generate_key(&self, uid: Uuid) -> Option { - self.master_key - .as_ref() - .map(|master_key| generate_key_as_hexa(uid, master_key.as_bytes())) + self.master_key.as_ref().map(|master_key| generate_key_as_hexa(uid, master_key.as_bytes())) } /// Check if the provided key is authorized to make a specific action @@ -154,8 +142,7 @@ impl AuthController { .or(match index { // else check if the key has access to the requested index. Some(index) => { - self.store - .get_expiration_date(uid, action, Some(index.as_bytes()))? + self.store.get_expiration_date(uid, action, Some(index.as_bytes()))? } // or to any index if no index has been requested. None => self.store.prefix_first_expiration_date(uid, action)?, @@ -168,6 +155,17 @@ impl AuthController { None => Ok(false), } } + + /// Delete all the keys in the DB. + pub fn raw_delete_all_keys(&mut self) -> Result<()> { + self.store.delete_all_keys() + } + + /// Delete all the keys in the DB. + pub fn raw_insert_key(&mut self, key: Key) -> Result<()> { + self.store.put_api_key(key)?; + Ok(()) + } } pub struct AuthFilter { @@ -177,10 +175,7 @@ pub struct AuthFilter { impl Default for AuthFilter { fn default() -> Self { - Self { - search_rules: SearchRules::default(), - allow_index_creation: true, - } + Self { search_rules: SearchRules::default(), allow_index_creation: true } } } @@ -215,10 +210,9 @@ impl SearchRules { None } } - Self::Map(map) => map - .get(index) - .or_else(|| map.get("*")) - .map(|isr| isr.clone().unwrap_or_default()), + Self::Map(map) => { + map.get(index).or_else(|| map.get("*")).map(|isr| isr.clone().unwrap_or_default()) + } } } } diff --git a/meilisearch-auth/src/store.rs b/meilisearch-auth/src/store.rs index 847af9d36..b3f9ed672 100644 --- a/meilisearch-auth/src/store.rs +++ b/meilisearch-auth/src/store.rs @@ -1,8 +1,7 @@ use std::borrow::Cow; use std::cmp::Reverse; use std::collections::HashSet; -use std::convert::TryFrom; -use std::convert::TryInto; +use std::convert::{TryFrom, TryInto}; use std::fs::create_dir_all; use std::ops::Deref; use std::path::Path; @@ -10,9 +9,11 @@ use std::str; use std::sync::Arc; use hmac::{Hmac, Mac}; +use meilisearch_types::keys::KeyId; +use meilisearch_types::milli; +use meilisearch_types::milli::heed::types::{ByteSlice, DecodeIgnore, SerdeJson}; +use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn}; use meilisearch_types::star_or::StarOr; -use milli::heed::types::{ByteSlice, DecodeIgnore, SerdeJson}; -use milli::heed::{Database, Env, EnvOpenOptions, RwTxn}; use sha2::Sha256; use time::OffsetDateTime; use uuid::fmt::Hyphenated; @@ -26,8 +27,6 @@ const AUTH_DB_PATH: &str = "auth"; const KEY_DB_NAME: &str = "api-keys"; const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expiration"; -pub type KeyId = Uuid; - #[derive(Clone)] pub struct HeedAuthStore { env: Arc, @@ -59,12 +58,7 @@ impl HeedAuthStore { let keys = env.create_database(Some(KEY_DB_NAME))?; let action_keyid_index_expiration = env.create_database(Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?; - Ok(Self { - env, - keys, - action_keyid_index_expiration, - should_close_on_drop: true, - }) + Ok(Self { env, keys, action_keyid_index_expiration, should_close_on_drop: true }) } pub fn set_drop_on_close(&mut self, v: bool) { @@ -94,12 +88,8 @@ impl HeedAuthStore { Action::All => actions.extend(enum_iterator::all::()), Action::DocumentsAll => { actions.extend( - [ - Action::DocumentsGet, - Action::DocumentsDelete, - Action::DocumentsAdd, - ] - .iter(), + [Action::DocumentsGet, Action::DocumentsDelete, Action::DocumentsAdd] + .iter(), ); } Action::IndexesAll => { @@ -109,6 +99,7 @@ impl HeedAuthStore { Action::IndexesDelete, Action::IndexesGet, Action::IndexesUpdate, + Action::IndexesSwap, ] .iter(), ); @@ -120,7 +111,7 @@ impl HeedAuthStore { actions.insert(Action::DumpsCreate); } Action::TasksAll => { - actions.insert(Action::TasksGet); + actions.extend([Action::TasksGet, Action::TasksDelete, Action::TasksCancel]); } Action::StatsAll => { actions.insert(Action::StatsGet); @@ -197,6 +188,13 @@ impl HeedAuthStore { Ok(existing) } + pub fn delete_all_keys(&self) -> Result<()> { + let mut wtxn = self.env.write_txn()?; + self.keys.clear(&mut wtxn)?; + wtxn.commit()?; + Ok(()) + } + pub fn list_api_keys(&self) -> Result> { let mut list = Vec::new(); let rtxn = self.env.read_txn()?; diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 3522b9d9a..dd1c9d7e5 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -10,21 +10,10 @@ version = "0.29.1" name = "meilisearch" path = "src/main.rs" -[build-dependencies] -anyhow = { version = "1.0.65", optional = true } -cargo_toml = { version = "0.12.4", optional = true } -hex = { version = "0.4.3", optional = true } -reqwest = { version = "0.11.12", features = ["blocking", "rustls-tls"], default-features = false, optional = true } -sha-1 = { version = "0.10.0", optional = true } -static-files = { version = "0.2.3", optional = true } -tempfile = { version = "3.3.0", optional = true } -vergen = { version = "7.4.2", default-features = false, features = ["git"] } -zip = { version = "0.6.2", optional = true } - [dependencies] actix-cors = "0.6.3" +actix-http = { version = "3.2.2", default-features = false, features = ["compress-brotli", "compress-gzip", "rustls"] } actix-web = { version = "4.2.1", default-features = false, features = ["macros", "compress-brotli", "compress-gzip", "cookies", "rustls"] } -actix-http = "3.2.2" actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true } anyhow = { version = "1.0.65", features = ["backtrace"] } async-stream = "0.3.3" @@ -34,28 +23,33 @@ byte-unit = { version = "4.0.14", default-features = false, features = ["std", " bytes = "1.2.1" clap = { version = "4.0.9", features = ["derive", "env"] } crossbeam-channel = "0.5.6" +dump = { path = "../dump" } either = "1.8.0" env_logger = "0.9.1" +file-store = { path = "../file-store" } flate2 = "1.0.24" fst = "0.4.7" futures = "0.3.24" futures-util = "0.3.24" http = "0.2.8" +index-scheduler = { path = "../index-scheduler" } indexmap = { version = "1.9.1", features = ["serde-1"] } itertools = "0.10.5" jsonwebtoken = "8.1.1" +lazy_static = "1.4.0" log = "0.4.17" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } -meilisearch-lib = { path = "../meilisearch-lib", default-features = false } mimalloc = { version = "0.1.29", default-features = false } mime = "0.3.16" num_cpus = "1.13.1" obkv = "0.2.0" once_cell = "1.15.0" parking_lot = "0.12.1" +permissive-json-pointer = { path = "../permissive-json-pointer" } pin-project-lite = "0.2.9" platform-dirs = "0.3.0" +prometheus = { version = "0.13.2", features = ["process"], optional = true } rand = "0.8.5" rayon = "1.5.3" regex = "1.6.0" @@ -80,8 +74,7 @@ tokio-stream = "0.1.10" toml = "0.5.9" uuid = { version = "1.1.2", features = ["serde", "v4"] } walkdir = "2.3.2" -prometheus = { version = "0.13.2", features = ["process"], optional = true } -lazy_static = "1.4.0" +yaup = "0.2.0" [dev-dependencies] actix-rt = "2.7.0" @@ -89,12 +82,24 @@ assert-json-diff = "2.0.2" brotli = "3.3.4" manifest-dir-macros = "0.1.16" maplit = "1.0.2" +meili-snap = {path = "../meili-snap"} +temp-env = "0.3.1" urlencoding = "2.1.2" yaup = "0.2.1" -temp-env = "0.3.1" + +[build-dependencies] +anyhow = { version = "1.0.65", optional = true } +cargo_toml = { version = "0.12.4", optional = true } +hex = { version = "0.4.3", optional = true } +reqwest = { version = "0.11.12", features = ["blocking", "rustls-tls"], default-features = false, optional = true } +sha-1 = { version = "0.10.0", optional = true } +static-files = { version = "0.2.3", optional = true } +tempfile = { version = "3.3.0", optional = true } +vergen = { version = "7.4.2", default-features = false, features = ["git"] } +zip = { version = "0.6.2", optional = true } [features] -default = ["analytics", "meilisearch-lib/default", "mini-dashboard"] +default = ["analytics", "meilisearch-types/default", "mini-dashboard"] metrics = ["prometheus"] analytics = ["segment"] mini-dashboard = [ @@ -108,10 +113,10 @@ mini-dashboard = [ "tempfile", "zip", ] -chinese = ["meilisearch-lib/chinese"] -hebrew = ["meilisearch-lib/hebrew"] -japanese = ["meilisearch-lib/japanese"] -thai = ["meilisearch-lib/thai"] +chinese = ["meilisearch-types/chinese"] +hebrew = ["meilisearch-types/hebrew"] +japanese = ["meilisearch-types/japanese"] +thai = ["meilisearch-types/thai"] [package.metadata.mini-dashboard] assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.3/build.zip" diff --git a/meilisearch-http/build.rs b/meilisearch-http/build.rs index 1822cae00..e2207561b 100644 --- a/meilisearch-http/build.rs +++ b/meilisearch-http/build.rs @@ -72,11 +72,8 @@ mod mini_dashboard { resource_dir(&dashboard_dir).build()?; // Write the sha1 for the dashboard back to file. - let mut file = OpenOptions::new() - .write(true) - .create(true) - .truncate(true) - .open(sha1_path)?; + let mut file = + OpenOptions::new().write(true).create(true).truncate(true).open(sha1_path)?; file.write_all(sha1.as_bytes())?; file.flush()?; diff --git a/meilisearch-http/src/analytics/mock_analytics.rs b/meilisearch-http/src/analytics/mock_analytics.rs index 01838f223..ab93f5edc 100644 --- a/meilisearch-http/src/analytics/mock_analytics.rs +++ b/meilisearch-http/src/analytics/mock_analytics.rs @@ -1,16 +1,20 @@ -use std::{any::Any, sync::Arc}; +use std::any::Any; +use std::sync::Arc; use actix_web::HttpRequest; +use meilisearch_types::InstanceUid; use serde_json::Value; -use crate::{routes::indexes::documents::UpdateDocumentsQuery, Opt}; - use super::{find_user_id, Analytics}; +use crate::routes::indexes::documents::UpdateDocumentsQuery; +use crate::Opt; -pub struct MockAnalytics; +pub struct MockAnalytics { + instance_uid: Option, +} #[derive(Default)] -pub struct SearchAggregator {} +pub struct SearchAggregator; #[allow(dead_code)] impl SearchAggregator { @@ -23,13 +27,17 @@ impl SearchAggregator { impl MockAnalytics { #[allow(clippy::new_ret_no_self)] - pub fn new(opt: &Opt) -> (Arc, String) { - let user = find_user_id(&opt.db_path).unwrap_or_default(); - (Arc::new(Self), user) + pub fn new(opt: &Opt) -> Arc { + let instance_uid = find_user_id(&opt.db_path); + Arc::new(Self { instance_uid }) } } impl Analytics for MockAnalytics { + fn instance_uid(&self) -> Option<&meilisearch_types::InstanceUid> { + self.instance_uid.as_ref() + } + // These methods are noop and should be optimized out fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {} fn get_search(&self, _aggregate: super::SearchAggregator) {} diff --git a/meilisearch-http/src/analytics/mod.rs b/meilisearch-http/src/analytics/mod.rs index b51f306a9..ffebaea77 100644 --- a/meilisearch-http/src/analytics/mod.rs +++ b/meilisearch-http/src/analytics/mod.rs @@ -5,16 +5,17 @@ mod segment_analytics; use std::fs; use std::path::{Path, PathBuf}; +use std::str::FromStr; use actix_web::HttpRequest; +use meilisearch_types::InstanceUid; +pub use mock_analytics::MockAnalytics; use once_cell::sync::Lazy; use platform_dirs::AppDirs; use serde_json::Value; use crate::routes::indexes::documents::UpdateDocumentsQuery; -pub use mock_analytics::MockAnalytics; - // if we are in debug mode OR the analytics feature is disabled // the `SegmentAnalytics` point to the mock instead of the real analytics #[cfg(any(debug_assertions, not(feature = "analytics")))] @@ -40,24 +41,22 @@ fn config_user_id_path(db_path: &Path) -> Option { db_path .canonicalize() .ok() - .map(|path| { - path.join("instance-uid") - .display() - .to_string() - .replace('/', "-") - }) + .map(|path| path.join("instance-uid").display().to_string().replace('/', "-")) .zip(MEILISEARCH_CONFIG_PATH.as_ref()) .map(|(filename, config_path)| config_path.join(filename.trim_start_matches('-'))) } /// Look for the instance-uid in the `data.ms` or in `~/.config/Meilisearch/path-to-db-instance-uid` -fn find_user_id(db_path: &Path) -> Option { +fn find_user_id(db_path: &Path) -> Option { fs::read_to_string(db_path.join("instance-uid")) .ok() .or_else(|| fs::read_to_string(&config_user_id_path(db_path)?).ok()) + .and_then(|uid| InstanceUid::from_str(&uid).ok()) } pub trait Analytics: Sync + Send { + fn instance_uid(&self) -> Option<&InstanceUid>; + /// The method used to publish most analytics that do not need to be batched every hours fn publish(&self, event_name: String, send: Value, request: Option<&HttpRequest>); diff --git a/meilisearch-http/src/analytics/segment_analytics.rs b/meilisearch-http/src/analytics/segment_analytics.rs index 21d41d84f..13dba7896 100644 --- a/meilisearch-http/src/analytics/segment_analytics.rs +++ b/meilisearch-http/src/analytics/segment_analytics.rs @@ -7,13 +7,9 @@ use std::time::{Duration, Instant}; use actix_web::http::header::USER_AGENT; use actix_web::HttpRequest; use http::header::CONTENT_TYPE; +use index_scheduler::IndexScheduler; use meilisearch_auth::SearchRules; -use meilisearch_lib::index::{ - SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, - DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, -}; -use meilisearch_lib::index_controller::Stats; -use meilisearch_lib::MeiliSearch; +use meilisearch_types::InstanceUid; use once_cell::sync::Lazy; use regex::Regex; use segment::message::{Identify, Track, User}; @@ -25,24 +21,27 @@ use tokio::select; use tokio::sync::mpsc::{self, Receiver, Sender}; use uuid::Uuid; +use super::{config_user_id_path, MEILISEARCH_CONFIG_PATH}; use crate::analytics::Analytics; use crate::option::default_http_addr; use crate::routes::indexes::documents::UpdateDocumentsQuery; +use crate::routes::{create_all_stats, Stats}; +use crate::search::{ + SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, + DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, +}; use crate::Opt; -use super::{config_user_id_path, MEILISEARCH_CONFIG_PATH}; - const ANALYTICS_HEADER: &str = "X-Meilisearch-Client"; /// Write the instance-uid in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-instance-uid`. Ignore the errors. -fn write_user_id(db_path: &Path, user_id: &str) { +fn write_user_id(db_path: &Path, user_id: &InstanceUid) { let _ = fs::write(db_path.join("instance-uid"), user_id.as_bytes()); - if let Some((meilisearch_config_path, user_id_path)) = MEILISEARCH_CONFIG_PATH - .as_ref() - .zip(config_user_id_path(db_path)) + if let Some((meilisearch_config_path, user_id_path)) = + MEILISEARCH_CONFIG_PATH.as_ref().zip(config_user_id_path(db_path)) { let _ = fs::create_dir_all(&meilisearch_config_path); - let _ = fs::write(user_id_path, user_id.as_bytes()); + let _ = fs::write(user_id_path, user_id.to_string()); } } @@ -71,31 +70,28 @@ pub enum AnalyticsMsg { } pub struct SegmentAnalytics { + instance_uid: InstanceUid, sender: Sender, user: User, } impl SegmentAnalytics { - pub async fn new(opt: &Opt, meilisearch: &MeiliSearch) -> (Arc, String) { - let user_id = super::find_user_id(&opt.db_path); - let first_time_run = user_id.is_none(); - let user_id = user_id.unwrap_or_else(|| Uuid::new_v4().to_string()); - write_user_id(&opt.db_path, &user_id); + pub async fn new(opt: &Opt, index_scheduler: Arc) -> Arc { + let instance_uid = super::find_user_id(&opt.db_path); + let first_time_run = instance_uid.is_none(); + let instance_uid = instance_uid.unwrap_or_else(|| Uuid::new_v4()); + write_user_id(&opt.db_path, &instance_uid); - let client = reqwest::Client::builder() - .connect_timeout(Duration::from_secs(10)) - .build(); + let client = reqwest::Client::builder().connect_timeout(Duration::from_secs(10)).build(); // if reqwest throws an error we won't be able to send analytics if client.is_err() { return super::MockAnalytics::new(opt); } - let client = HttpClient::new( - client.unwrap(), - "https://telemetry.meilisearch.com".to_string(), - ); - let user = User::UserId { user_id }; + let client = + HttpClient::new(client.unwrap(), "https://telemetry.meilisearch.com".to_string()); + let user = User::UserId { user_id: instance_uid.to_string() }; let mut batcher = AutoBatcher::new(client, Batcher::new(None), SEGMENT_API_KEY.to_string()); // If Meilisearch is Launched for the first time: @@ -104,9 +100,7 @@ impl SegmentAnalytics { if first_time_run { let _ = batcher .push(Track { - user: User::UserId { - user_id: "total_launch".to_string(), - }, + user: User::UserId { user_id: "total_launch".to_string() }, event: "Launched".to_string(), ..Default::default() }) @@ -133,18 +127,19 @@ impl SegmentAnalytics { add_documents_aggregator: DocumentsAggregator::default(), update_documents_aggregator: DocumentsAggregator::default(), }); - tokio::spawn(segment.run(meilisearch.clone())); + tokio::spawn(segment.run(index_scheduler.clone())); - let this = Self { - sender, - user: user.clone(), - }; + let this = Self { instance_uid, sender, user: user.clone() }; - (Arc::new(this), user.to_string()) + Arc::new(this) } } impl super::Analytics for SegmentAnalytics { + fn instance_uid(&self) -> Option<&InstanceUid> { + Some(&self.instance_uid) + } + fn publish(&self, event_name: String, mut send: Value, request: Option<&HttpRequest>) { let user_agent = request.map(|req| extract_user_agents(req)); @@ -155,21 +150,15 @@ impl super::Analytics for SegmentAnalytics { properties: send, ..Default::default() }; - let _ = self - .sender - .try_send(AnalyticsMsg::BatchMessage(event.into())); + let _ = self.sender.try_send(AnalyticsMsg::BatchMessage(event.into())); } fn get_search(&self, aggregate: SearchAggregator) { - let _ = self - .sender - .try_send(AnalyticsMsg::AggregateGetSearch(aggregate)); + let _ = self.sender.try_send(AnalyticsMsg::AggregateGetSearch(aggregate)); } fn post_search(&self, aggregate: SearchAggregator) { - let _ = self - .sender - .try_send(AnalyticsMsg::AggregatePostSearch(aggregate)); + let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate)); } fn add_documents( @@ -179,9 +168,7 @@ impl super::Analytics for SegmentAnalytics { request: &HttpRequest, ) { let aggregate = DocumentsAggregator::from_query(documents_query, index_creation, request); - let _ = self - .sender - .try_send(AnalyticsMsg::AggregateAddDocuments(aggregate)); + let _ = self.sender.try_send(AnalyticsMsg::AggregateAddDocuments(aggregate)); } fn update_documents( @@ -191,9 +178,7 @@ impl super::Analytics for SegmentAnalytics { request: &HttpRequest, ) { let aggregate = DocumentsAggregator::from_query(documents_query, index_creation, request); - let _ = self - .sender - .try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate)); + let _ = self.sender.try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate)); } } @@ -252,11 +237,8 @@ impl Segment { infos }; - let number_of_documents = stats - .indexes - .values() - .map(|index| index.number_of_documents) - .collect::>(); + let number_of_documents = + stats.indexes.values().map(|index| index.number_of_documents).collect::>(); json!({ "start_since_days": FIRST_START_TIMESTAMP.elapsed().as_secs() / (60 * 60 * 24), // one day @@ -270,7 +252,7 @@ impl Segment { }) } - async fn run(mut self, meilisearch: MeiliSearch) { + async fn run(mut self, index_scheduler: Arc) { const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour // The first batch must be sent after one hour. let mut interval = @@ -279,7 +261,7 @@ impl Segment { loop { select! { _ = interval.tick() => { - self.tick(meilisearch.clone()).await; + self.tick(index_scheduler.clone()).await; }, msg = self.inbox.recv() => { match msg { @@ -295,8 +277,8 @@ impl Segment { } } - async fn tick(&mut self, meilisearch: MeiliSearch) { - if let Ok(stats) = meilisearch.get_all_stats(&SearchRules::default()).await { + async fn tick(&mut self, index_scheduler: Arc) { + if let Ok(stats) = create_all_stats(index_scheduler.into(), &SearchRules::default()) { let _ = self .batcher .push(Identify { @@ -404,11 +386,7 @@ impl SearchAggregator { let syntax = match filter { Value::String(_) => "string".to_string(), Value::Array(values) => { - if values - .iter() - .map(|v| v.to_string()) - .any(|s| RE.is_match(&s)) - { + if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) { "mixed".to_string() } else { "array".to_string() @@ -439,8 +417,7 @@ impl SearchAggregator { ret.finite_pagination = 0; } - ret.matching_strategy - .insert(format!("{:?}", query.matching_strategy), 1); + ret.matching_strategy.insert(format!("{:?}", query.matching_strategy), 1); ret.highlight_pre_tag = query.highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG(); ret.highlight_post_tag = query.highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG(); @@ -472,17 +449,14 @@ impl SearchAggregator { self.time_spent.append(&mut other.time_spent); // sort self.sort_with_geo_point |= other.sort_with_geo_point; - self.sort_sum_of_criteria_terms = self - .sort_sum_of_criteria_terms - .saturating_add(other.sort_sum_of_criteria_terms); - self.sort_total_number_of_criteria = self - .sort_total_number_of_criteria - .saturating_add(other.sort_total_number_of_criteria); + self.sort_sum_of_criteria_terms = + self.sort_sum_of_criteria_terms.saturating_add(other.sort_sum_of_criteria_terms); + self.sort_total_number_of_criteria = + self.sort_total_number_of_criteria.saturating_add(other.sort_total_number_of_criteria); // filter self.filter_with_geo_radius |= other.filter_with_geo_radius; - self.filter_sum_of_criteria_terms = self - .filter_sum_of_criteria_terms - .saturating_add(other.filter_sum_of_criteria_terms); + self.filter_sum_of_criteria_terms = + self.filter_sum_of_criteria_terms.saturating_add(other.filter_sum_of_criteria_terms); self.filter_total_number_of_criteria = self .filter_total_number_of_criteria .saturating_add(other.filter_total_number_of_criteria); diff --git a/meilisearch-http/src/error.rs b/meilisearch-http/src/error.rs index 86b7c1964..b0f29f9fd 100644 --- a/meilisearch-http/src/error.rs +++ b/meilisearch-http/src/error.rs @@ -1,6 +1,10 @@ use actix_web as aweb; use aweb::error::{JsonPayloadError, QueryPayloadError}; +use meilisearch_types::document_formats::{DocumentFormatError, PayloadType}; use meilisearch_types::error::{Code, ErrorCode, ResponseError}; +use meilisearch_types::index_uid::IndexUidFormatError; +use serde_json::Value; +use tokio::task::JoinError; #[derive(Debug, thiserror::Error)] pub enum MeilisearchHttpError { @@ -12,13 +16,74 @@ pub enum MeilisearchHttpError { .1.iter().map(|s| format!("`{}`", s)).collect::>().join(", ") )] InvalidContentType(String, Vec), + #[error("Document `{0}` not found.")] + DocumentNotFound(String), + #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))] + InvalidExpression(&'static [&'static str], Value), + #[error("A {0} payload is missing.")] + MissingPayload(PayloadType), + #[error("The provided payload reached the size limit.")] + PayloadTooLarge, + #[error( + "Indexes {} not found.", + .0.iter().map(|s| format!("`{}`", s)).collect::>().join(", ") + )] + IndexesNotFound(Vec), + #[error( + "Indexes must be declared only once during a swap. `{0}` was specified several times." + )] + SwapDuplicateIndexFound(String), + #[error( + "Indexes must be declared only once during a swap. {} were specified several times.", + .0.iter().map(|s| format!("`{}`", s)).collect::>().join(", ") + )] + SwapDuplicateIndexesFound(Vec), + #[error("Two indexes must be given for each swap. The list `{:?}` contains {} indexes.", + .0, .0.len() + )] + SwapIndexPayloadWrongLength(Vec), + #[error(transparent)] + IndexUid(#[from] IndexUidFormatError), + #[error(transparent)] + SerdeJson(#[from] serde_json::Error), + #[error(transparent)] + HeedError(#[from] meilisearch_types::heed::Error), + #[error(transparent)] + IndexScheduler(#[from] index_scheduler::Error), + #[error(transparent)] + Milli(#[from] meilisearch_types::milli::Error), + #[error(transparent)] + Payload(#[from] PayloadError), + #[error(transparent)] + FileStore(#[from] file_store::Error), + #[error(transparent)] + DocumentFormat(#[from] DocumentFormatError), + #[error(transparent)] + Join(#[from] JoinError), } impl ErrorCode for MeilisearchHttpError { fn error_code(&self) -> Code { match self { MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType, + MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload, MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType, + MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound, + MeilisearchHttpError::InvalidExpression(_, _) => Code::Filter, + MeilisearchHttpError::PayloadTooLarge => Code::PayloadTooLarge, + MeilisearchHttpError::IndexesNotFound(_) => Code::IndexNotFound, + MeilisearchHttpError::SwapDuplicateIndexFound(_) => Code::DuplicateIndexFound, + MeilisearchHttpError::SwapDuplicateIndexesFound(_) => Code::DuplicateIndexFound, + MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::BadRequest, + MeilisearchHttpError::IndexUid(e) => e.error_code(), + MeilisearchHttpError::SerdeJson(_) => Code::Internal, + MeilisearchHttpError::HeedError(_) => Code::Internal, + MeilisearchHttpError::IndexScheduler(e) => e.error_code(), + MeilisearchHttpError::Milli(e) => e.error_code(), + MeilisearchHttpError::Payload(e) => e.error_code(), + MeilisearchHttpError::FileStore(_) => Code::Internal, + MeilisearchHttpError::DocumentFormat(e) => e.error_code(), + MeilisearchHttpError::Join(_) => Code::Internal, } } } @@ -29,11 +94,19 @@ impl From for aweb::Error { } } +impl From for MeilisearchHttpError { + fn from(error: aweb::error::PayloadError) -> Self { + MeilisearchHttpError::Payload(PayloadError::Payload(error)) + } +} + #[derive(Debug, thiserror::Error)] pub enum PayloadError { - #[error("{0}")] + #[error(transparent)] + Payload(aweb::error::PayloadError), + #[error(transparent)] Json(JsonPayloadError), - #[error("{0}")] + #[error(transparent)] Query(QueryPayloadError), #[error("The json payload provided is malformed. `{0}`.")] MalformedPayload(serde_json::error::Error), @@ -44,6 +117,15 @@ pub enum PayloadError { impl ErrorCode for PayloadError { fn error_code(&self) -> Code { match self { + PayloadError::Payload(e) => match e { + aweb::error::PayloadError::Incomplete(_) => Code::Internal, + aweb::error::PayloadError::EncodingCorrupted => Code::Internal, + aweb::error::PayloadError::Overflow => Code::PayloadTooLarge, + aweb::error::PayloadError::UnknownLength => Code::Internal, + aweb::error::PayloadError::Http2Payload(_) => Code::Internal, + aweb::error::PayloadError::Io(_) => Code::Internal, + _ => todo!(), + }, PayloadError::Json(err) => match err { JsonPayloadError::Overflow { .. } => Code::PayloadTooLarge, JsonPayloadError::ContentType => Code::UnsupportedMediaType, diff --git a/meilisearch-http/src/extractors/authentication/mod.rs b/meilisearch-http/src/extractors/authentication/mod.rs index 4107a6194..cd7a43114 100644 --- a/meilisearch-http/src/extractors/authentication/mod.rs +++ b/meilisearch-http/src/extractors/authentication/mod.rs @@ -33,11 +33,7 @@ impl GuardedData { { match Self::authenticate(auth, token, index).await? { Some(filters) => match data { - Some(data) => Ok(Self { - data, - filters, - _marker: PhantomData, - }), + Some(data) => Ok(Self { data, filters, _marker: PhantomData }), None => Err(AuthenticationError::IrretrievableState.into()), }, None => Err(AuthenticationError::InvalidToken.into()), @@ -52,12 +48,7 @@ impl GuardedData { match Self::authenticate(auth, String::new(), None).await? { Some(filters) => match data { - Some(data) => Ok(Self { - data, - filters, - _marker: PhantomData, - }), - + Some(data) => Ok(Self { data, filters, _marker: PhantomData }), None => Err(AuthenticationError::IrretrievableState.into()), }, None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()), @@ -133,14 +124,14 @@ pub trait Policy { pub mod policies { use jsonwebtoken::{decode, Algorithm, DecodingKey, Validation}; + use meilisearch_auth::{AuthController, AuthFilter, SearchRules}; + // reexport actions in policies in order to be used in routes configuration. + pub use meilisearch_types::keys::{actions, Action}; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; use uuid::Uuid; use crate::extractors::authentication::Policy; - use meilisearch_auth::{Action, AuthController, AuthFilter, SearchRules}; - // reexport actions in policies in order to be used in routes configuration. - pub use meilisearch_auth::actions; fn tenant_token_validation() -> Validation { let mut validation = Validation::default(); @@ -178,10 +169,7 @@ pub mod policies { // authenticate if token is the master key. // master key can only have access to keys routes. // if master key is None only keys routes are inaccessible. - if auth - .get_master_key() - .map_or_else(|| !is_keys_action(A), |mk| mk == token) - { + if auth.get_master_key().map_or_else(|| !is_keys_action(A), |mk| mk == token) { return Some(AuthFilter::default()); } @@ -239,9 +227,7 @@ pub mod policies { } } - return auth - .get_key_filters(uid, Some(data.claims.search_rules)) - .ok(); + return auth.get_key_filters(uid, Some(data.claims.search_rules)).ok(); } None diff --git a/meilisearch-http/src/extractors/payload.rs b/meilisearch-http/src/extractors/payload.rs index f16fdd67b..0ccebe8f9 100644 --- a/meilisearch-http/src/extractors/payload.rs +++ b/meilisearch-http/src/extractors/payload.rs @@ -2,11 +2,12 @@ use std::pin::Pin; use std::task::{Context, Poll}; use actix_http::encoding::Decoder as Decompress; -use actix_web::error::PayloadError; use actix_web::{dev, web, FromRequest, HttpRequest}; use futures::future::{ready, Ready}; use futures::Stream; +use crate::error::MeilisearchHttpError; + pub struct Payload { payload: Decompress, limit: usize, @@ -29,7 +30,7 @@ impl Default for PayloadConfig { } impl FromRequest for Payload { - type Error = PayloadError; + type Error = MeilisearchHttpError; type Future = Ready>; @@ -47,7 +48,7 @@ impl FromRequest for Payload { } impl Stream for Payload { - type Item = Result; + type Item = Result; #[inline] fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { @@ -58,11 +59,11 @@ impl Stream for Payload { self.limit = new_limit; Poll::Ready(Some(Ok(bytes))) } - None => Poll::Ready(Some(Err(PayloadError::Overflow))), + None => Poll::Ready(Some(Err(MeilisearchHttpError::PayloadTooLarge))), }, - x => Poll::Ready(Some(x)), + x => Poll::Ready(Some(x.map_err(MeilisearchHttpError::from))), }, - otherwise => otherwise, + otherwise => otherwise.map(|o| o.map(|o| o.map_err(MeilisearchHttpError::from))), } } } diff --git a/meilisearch-http/src/extractors/sequential_extractor.rs b/meilisearch-http/src/extractors/sequential_extractor.rs index d6cee6083..c04210616 100644 --- a/meilisearch-http/src/extractors/sequential_extractor.rs +++ b/meilisearch-http/src/extractors/sequential_extractor.rs @@ -1,7 +1,10 @@ #![allow(non_snake_case)] -use std::{future::Future, pin::Pin, task::Poll}; +use std::future::Future; +use std::pin::Pin; +use std::task::Poll; -use actix_web::{dev::Payload, FromRequest, Handler, HttpRequest}; +use actix_web::dev::Payload; +use actix_web::{FromRequest, Handler, HttpRequest}; use pin_project_lite::pin_project; /// `SeqHandler` is an actix `Handler` that enforces that extractors errors are returned in the diff --git a/meilisearch-http/src/lib.rs b/meilisearch-http/src/lib.rs index 1711fe7ba..9a3ce857e 100644 --- a/meilisearch-http/src/lib.rs +++ b/meilisearch-http/src/lib.rs @@ -2,86 +2,337 @@ #[macro_use] pub mod error; pub mod analytics; -pub mod task; #[macro_use] pub mod extractors; pub mod option; pub mod routes; +pub mod search; #[cfg(feature = "metrics")] pub mod metrics; #[cfg(feature = "metrics")] pub mod route_metrics; -use std::sync::{atomic::AtomicBool, Arc}; +use std::fs::File; +use std::io::{BufReader, BufWriter}; +use std::path::Path; +use std::sync::atomic::AtomicBool; +use std::sync::Arc; +use std::thread; use std::time::Duration; -use crate::error::MeilisearchHttpError; +use actix_cors::Cors; +use actix_http::body::MessageBody; +use actix_web::dev::{ServiceFactory, ServiceResponse}; use actix_web::error::JsonPayloadError; +use actix_web::web::Data; +use actix_web::{middleware, web, HttpRequest}; use analytics::Analytics; +use anyhow::bail; use error::PayloadError; +use extractors::payload::PayloadConfig; use http::header::CONTENT_TYPE; +use index_scheduler::{IndexScheduler, IndexSchedulerOptions}; +use log::error; +use meilisearch_auth::AuthController; +use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; +use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod}; +use meilisearch_types::settings::apply_settings_to_builder; +use meilisearch_types::tasks::KindWithContent; +use meilisearch_types::versioning::{check_version_file, create_version_file}; +use meilisearch_types::{compression, milli, VERSION_FILE_NAME}; pub use option::Opt; -use actix_web::{web, HttpRequest}; - -use extractors::payload::PayloadConfig; -use meilisearch_auth::AuthController; -use meilisearch_lib::MeiliSearch; +use crate::error::MeilisearchHttpError; pub static AUTOBATCHING_ENABLED: AtomicBool = AtomicBool::new(false); -pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result { - let mut meilisearch = MeiliSearch::builder(); +/// Check if a db is empty. It does not provide any information on the +/// validity of the data in it. +/// We consider a database as non empty when it's a non empty directory. +fn is_empty_db(db_path: impl AsRef) -> bool { + let db_path = db_path.as_ref(); - // disable autobatching? - AUTOBATCHING_ENABLED.store( - !opt.scheduler_options.disable_auto_batching, - std::sync::atomic::Ordering::Relaxed, - ); - - meilisearch - .set_max_index_size(opt.max_index_size.get_bytes() as usize) - .set_max_task_store_size(opt.max_task_db_size.get_bytes() as usize) - // snapshot - .set_ignore_missing_snapshot(opt.ignore_missing_snapshot) - .set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists) - .set_snapshot_interval(Duration::from_secs(opt.snapshot_interval_sec)) - .set_snapshot_dir(opt.snapshot_dir.clone()) - // dump - .set_ignore_missing_dump(opt.ignore_missing_dump) - .set_ignore_dump_if_db_exists(opt.ignore_dump_if_db_exists) - .set_dump_dst(opt.dumps_dir.clone()); - - if let Some(ref path) = opt.import_snapshot { - meilisearch.set_import_snapshot(path.clone()); + if !db_path.exists() { + true + // if we encounter an error or if the db is a file we consider the db non empty + } else if let Ok(dir) = db_path.read_dir() { + dir.count() == 0 + } else { + true } +} - if let Some(ref path) = opt.import_dump { - meilisearch.set_dump_src(path.clone()); - } +pub fn create_app( + index_scheduler: Data, + auth_controller: AuthController, + opt: Opt, + analytics: Arc, + enable_dashboard: bool, +) -> actix_web::App< + impl ServiceFactory< + actix_web::dev::ServiceRequest, + Config = (), + Response = ServiceResponse, + Error = actix_web::Error, + InitError = (), + >, +> { + let app = actix_web::App::new() + .configure(|s| { + configure_data( + s, + index_scheduler.clone(), + auth_controller.clone(), + &opt, + analytics.clone(), + ) + }) + .configure(routes::configure) + .configure(|s| dashboard(s, enable_dashboard)); + #[cfg(feature = "metrics")] + let app = app.configure(|s| configure_metrics_route(s, opt.enable_metrics_route)); - if opt.schedule_snapshot { - meilisearch.set_schedule_snapshot(); - } - - meilisearch.build( - opt.db_path.clone(), - opt.indexer_options.clone(), - opt.scheduler_options.clone(), + #[cfg(feature = "metrics")] + let app = app.wrap(Condition::new(opt.enable_metrics_route, route_metrics::RouteMetrics)); + app.wrap( + Cors::default() + .send_wildcard() + .allow_any_header() + .allow_any_origin() + .allow_any_method() + .max_age(86_400), // 24h ) + .wrap(middleware::Logger::default()) + .wrap(middleware::Compress::default()) + .wrap(middleware::NormalizePath::new(middleware::TrailingSlash::Trim)) +} + +// TODO: TAMO: Finish setting up things +pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc, AuthController)> { + // we don't want to create anything in the data.ms yet, thus we + // wrap our two builders in a closure that'll be executed later. + let auth_controller_builder = || AuthController::new(&opt.db_path, &opt.master_key); + let index_scheduler_builder = || { + IndexScheduler::new(IndexSchedulerOptions { + version_file_path: opt.db_path.join(VERSION_FILE_NAME), + auth_path: opt.db_path.join("auth"), + tasks_path: opt.db_path.join("tasks"), + update_file_path: opt.db_path.join("update_files"), + indexes_path: opt.db_path.join("indexes"), + snapshots_path: opt.snapshot_dir.clone(), + dumps_path: opt.dumps_dir.clone(), + task_db_size: opt.max_task_db_size.get_bytes() as usize, + index_size: opt.max_index_size.get_bytes() as usize, + indexer_config: (&opt.indexer_options).try_into()?, + autobatching_enabled: !opt.scheduler_options.disable_auto_batching, + }) + }; + let meilisearch_builder = || -> anyhow::Result<_> { + // if anything wrong happens we delete the `data.ms` entirely. + match ( + index_scheduler_builder().map_err(anyhow::Error::from), + auth_controller_builder().map_err(anyhow::Error::from), + create_version_file(&opt.db_path).map_err(anyhow::Error::from), + ) { + (Ok(i), Ok(a), Ok(())) => Ok((i, a)), + (Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => { + std::fs::remove_dir_all(&opt.db_path)?; + Err(e) + } + } + }; + + let empty_db = is_empty_db(&opt.db_path); + let (index_scheduler, auth_controller) = if let Some(ref snapshot_path) = opt.import_snapshot { + let snapshot_path_exists = snapshot_path.exists(); + if empty_db && snapshot_path_exists { + match compression::from_tar_gz(snapshot_path, &opt.db_path) { + Ok(()) => meilisearch_builder()?, + Err(e) => { + std::fs::remove_dir_all(&opt.db_path)?; + return Err(e); + } + } + } else if !empty_db && !opt.ignore_snapshot_if_db_exists { + bail!( + "database already exists at {:?}, try to delete it or rename it", + opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned()) + ) + } else if !snapshot_path_exists && !opt.ignore_missing_snapshot { + bail!("snapshot doesn't exist at {}", snapshot_path.display()) + } else { + meilisearch_builder()? + } + } else if let Some(ref path) = opt.import_dump { + let src_path_exists = path.exists(); + if empty_db && src_path_exists { + let (mut index_scheduler, mut auth_controller) = meilisearch_builder()?; + match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) { + Ok(()) => (index_scheduler, auth_controller), + Err(e) => { + std::fs::remove_dir_all(&opt.db_path)?; + return Err(e); + } + } + } else if !empty_db && !opt.ignore_dump_if_db_exists { + bail!( + "database already exists at {:?}, try to delete it or rename it", + opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned()) + ) + } else if !src_path_exists && !opt.ignore_missing_dump { + bail!("dump doesn't exist at {:?}", path) + } else { + let (mut index_scheduler, mut auth_controller) = meilisearch_builder()?; + match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) { + Ok(()) => (index_scheduler, auth_controller), + Err(e) => { + std::fs::remove_dir_all(&opt.db_path)?; + return Err(e); + } + } + } + } else { + if !empty_db { + check_version_file(&opt.db_path)?; + } + meilisearch_builder()? + }; + + // We create a loop in a thread that registers snapshotCreation tasks + let index_scheduler = Arc::new(index_scheduler); + if opt.schedule_snapshot { + let snapshot_delay = Duration::from_secs(opt.snapshot_interval_sec); + let index_scheduler = index_scheduler.clone(); + thread::spawn(move || loop { + thread::sleep(snapshot_delay); + if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) { + error!("Error while registering snapshot: {}", e); + } + }); + } + + Ok((index_scheduler, auth_controller)) +} + +fn import_dump( + db_path: &Path, + dump_path: &Path, + index_scheduler: &mut IndexScheduler, + auth: &mut AuthController, +) -> Result<(), anyhow::Error> { + let reader = File::open(dump_path)?; + let mut dump_reader = dump::DumpReader::open(reader)?; + + if let Some(date) = dump_reader.date() { + log::info!( + "Importing a dump of meilisearch `{:?}` from the {}", + dump_reader.version(), // TODO: get the meilisearch version instead of the dump version + date + ); + } else { + log::info!( + "Importing a dump of meilisearch `{:?}`", + dump_reader.version(), // TODO: get the meilisearch version instead of the dump version + ); + } + + let instance_uid = dump_reader.instance_uid()?; + + // 1. Import the instance-uid. + if let Some(ref instance_uid) = instance_uid { + // we don't want to panic if there is an error with the instance-uid. + let _ = std::fs::write(db_path.join("instance-uid"), instance_uid.to_string().as_bytes()); + }; + + // 2. Import the `Key`s. + let mut keys = Vec::new(); + auth.raw_delete_all_keys()?; + for key in dump_reader.keys()? { + let key = key?; + auth.raw_insert_key(key.clone())?; + keys.push(key); + } + + let indexer_config = index_scheduler.indexer_config(); + + // /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might + // try to process tasks while we're trying to import the indexes. + + // 3. Import the indexes. + for index_reader in dump_reader.indexes()? { + let mut index_reader = index_reader?; + let metadata = index_reader.metadata(); + log::info!("Importing index `{}`.", metadata.uid); + let index = index_scheduler.create_raw_index(&metadata.uid)?; + + let mut wtxn = index.write_txn()?; + + let mut builder = milli::update::Settings::new(&mut wtxn, &index, indexer_config); + // 3.1 Import the primary key if there is one. + if let Some(ref primary_key) = metadata.primary_key { + builder.set_primary_key(primary_key.to_string()); + } + + // 3.2 Import the settings. + log::info!("Importing the settings."); + let settings = index_reader.settings()?; + apply_settings_to_builder(&settings, &mut builder); + builder.execute(|indexing_step| log::debug!("update: {:?}", indexing_step), || false)?; + + // 3.3 Import the documents. + // 3.3.1 We need to recreate the grenad+obkv format accepted by the index. + log::info!("Importing the documents."); + let file = tempfile::tempfile()?; + let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file)); + for document in index_reader.documents()? { + builder.append_json_object(&document?)?; + } + + // This flush the content of the batch builder. + let file = builder.into_inner()?.into_inner()?; + + // 3.3.2 We feed it to the milli index. + let reader = BufReader::new(file); + let reader = DocumentsBatchReader::from_reader(reader)?; + + let builder = milli::update::IndexDocuments::new( + &mut wtxn, + &index, + indexer_config, + IndexDocumentsConfig { + update_method: IndexDocumentsMethod::ReplaceDocuments, + ..Default::default() + }, + |indexing_step| log::debug!("update: {:?}", indexing_step), + || false, + )?; + + let (builder, user_result) = builder.add_documents(reader)?; + log::info!("{} documents found.", user_result?); + builder.execute()?; + wtxn.commit()?; + log::info!("All documents successfully imported."); + } + + // 4. Import the tasks. + for ret in dump_reader.tasks()? { + let (task, file) = ret?; + index_scheduler.register_dumped_task(task, file)?; + } + Ok(()) } pub fn configure_data( config: &mut web::ServiceConfig, - data: MeiliSearch, + index_scheduler: Data, auth: AuthController, opt: &Opt, analytics: Arc, ) { let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize; config - .app_data(data) + .app_data(index_scheduler) .app_data(auth) .app_data(web::Data::from(analytics)) .app_data( @@ -121,9 +372,7 @@ pub fn dashboard(config: &mut web::ServiceConfig, enable_frontend: bool) { let generated = generated::generate(); // Generate routes for mini-dashboard assets for (path, resource) in generated.into_iter() { - let Resource { - mime_type, data, .. - } = resource; + let Resource { mime_type, data, .. } = resource; // Redirect index.html to / if path == "index.html" { config.service(web::resource("/").route(web::get().to(move || async move { @@ -153,52 +402,3 @@ pub fn configure_metrics_route(config: &mut web::ServiceConfig, enable_metrics_r ); } } - -#[macro_export] -macro_rules! create_app { - ($data:expr, $auth:expr, $enable_frontend:expr, $opt:expr, $analytics:expr) => {{ - use actix_cors::Cors; - use actix_web::dev::Service; - use actix_web::middleware::Condition; - use actix_web::middleware::TrailingSlash; - use actix_web::App; - use actix_web::{middleware, web}; - use meilisearch_http::error::MeilisearchHttpError; - use meilisearch_http::routes; - use meilisearch_http::{configure_data, dashboard}; - #[cfg(feature = "metrics")] - use meilisearch_http::{configure_metrics_route, metrics, route_metrics}; - use meilisearch_types::error::ResponseError; - - let app = App::new() - .configure(|s| configure_data(s, $data.clone(), $auth.clone(), &$opt, $analytics)) - .configure(routes::configure) - .configure(|s| dashboard(s, $enable_frontend)); - - #[cfg(feature = "metrics")] - let app = app.configure(|s| configure_metrics_route(s, $opt.enable_metrics_route)); - - let app = app - .wrap( - Cors::default() - .send_wildcard() - .allow_any_header() - .allow_any_origin() - .allow_any_method() - .max_age(86_400), // 24h - ) - .wrap(middleware::Logger::default()) - .wrap(middleware::Compress::default()) - .wrap(middleware::NormalizePath::new( - middleware::TrailingSlash::Trim, - )); - - #[cfg(feature = "metrics")] - let app = app.wrap(Condition::new( - $opt.enable_metrics_route, - route_metrics::RouteMetrics, - )); - - app - }}; -} diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index b6f92ae28..087b65247 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -3,12 +3,12 @@ use std::path::PathBuf; use std::sync::Arc; use actix_web::http::KeepAlive; +use actix_web::web::Data; use actix_web::HttpServer; +use index_scheduler::IndexScheduler; use meilisearch_auth::AuthController; -use meilisearch_http::analytics; use meilisearch_http::analytics::Analytics; -use meilisearch_http::{create_app, setup_meilisearch, Opt}; -use meilisearch_lib::MeiliSearch; +use meilisearch_http::{analytics, create_app, setup_meilisearch, Opt}; #[global_allocator] static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; @@ -45,66 +45,64 @@ async fn main() -> anyhow::Result<()> { _ => unreachable!(), } - let meilisearch = setup_meilisearch(&opt)?; - - let auth_controller = AuthController::new(&opt.db_path, &opt.master_key)?; + let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?; #[cfg(all(not(debug_assertions), feature = "analytics"))] - let (analytics, user) = if !opt.no_analytics { - analytics::SegmentAnalytics::new(&opt, &meilisearch).await + let analytics = if !opt.no_analytics { + analytics::SegmentAnalytics::new(&opt, index_scheduler.clone()).await } else { analytics::MockAnalytics::new(&opt) }; #[cfg(any(debug_assertions, not(feature = "analytics")))] - let (analytics, user) = analytics::MockAnalytics::new(&opt); + let analytics = analytics::MockAnalytics::new(&opt); - print_launch_resume(&opt, &user, config_read_from); + print_launch_resume(&opt, analytics.clone(), config_read_from); - run_http(meilisearch, auth_controller, opt, analytics).await?; + run_http(index_scheduler, auth_controller, opt, analytics).await?; Ok(()) } async fn run_http( - data: MeiliSearch, + index_scheduler: Arc, auth_controller: AuthController, opt: Opt, analytics: Arc, ) -> anyhow::Result<()> { - let _enable_dashboard = &opt.env == "development"; + let enable_dashboard = &opt.env == "development"; let opt_clone = opt.clone(); + let index_scheduler = Data::from(index_scheduler); + let http_server = HttpServer::new(move || { - create_app!( - data, - auth_controller, - _enable_dashboard, - opt_clone, - analytics.clone() + create_app( + index_scheduler.clone(), + auth_controller.clone(), + opt.clone(), + analytics.clone(), + enable_dashboard, ) }) // Disable signals allows the server to terminate immediately when a user enter CTRL-C .disable_signals() .keep_alive(KeepAlive::Os); - if let Some(config) = opt.get_ssl_config()? { - http_server - .bind_rustls(opt.http_addr, config)? - .run() - .await?; + if let Some(config) = opt_clone.get_ssl_config()? { + http_server.bind_rustls(opt_clone.http_addr, config)?.run().await?; } else { - http_server.bind(&opt.http_addr)?.run().await?; + http_server.bind(&opt_clone.http_addr)?.run().await?; } Ok(()) } -pub fn print_launch_resume(opt: &Opt, user: &str, config_read_from: Option) { +pub fn print_launch_resume( + opt: &Opt, + analytics: Arc, + config_read_from: Option, +) { let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown"); let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown"); - let protocol = if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() { - "https" - } else { - "http" - }; + let protocol = + if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() { "https" } else { "http" }; let ascii_name = r#" 888b d888 d8b 888 d8b 888 8888b d8888 Y8P 888 Y8P 888 @@ -129,10 +127,7 @@ pub fn print_launch_resume(opt: &Opt, user: &str, config_read_from: Option, + + /// Sets the maximum amount of RAM Meilisearch can use when indexing. By default, Meilisearch + /// uses no more than two thirds of available memory. + #[clap(long, env = MEILI_MAX_INDEXING_MEMORY, default_value_t)] + #[serde(default)] + pub max_indexing_memory: MaxMemory, + + /// Sets the maximum number of threads Meilisearch can use during indexation. By default, the + /// indexer avoids using more than half of a machine's total processing units. This ensures + /// Meilisearch is always ready to perform searches, even while you are updating an index. + #[clap(long, env = MEILI_MAX_INDEXING_THREADS, default_value_t)] + #[serde(default)] + pub max_indexing_threads: MaxThreads, +} + +impl IndexerOpts { + /// Exports the values to their corresponding env vars if they are not set. + pub fn export_to_env(self) { + let IndexerOpts { + max_indexing_memory, + max_indexing_threads, + log_every_n: _, + max_nb_chunks: _, + } = self; + if let Some(max_indexing_memory) = max_indexing_memory.0 { + export_to_env_if_not_present( + MEILI_MAX_INDEXING_MEMORY, + max_indexing_memory.to_string(), + ); + } + export_to_env_if_not_present( + MEILI_MAX_INDEXING_THREADS, + max_indexing_threads.0.to_string(), + ); + } +} + +#[derive(Debug, Clone, Parser, Default, Deserialize, Serialize)] +#[serde(rename_all = "snake_case", deny_unknown_fields)] +pub struct SchedulerConfig { + /// Deactivates auto-batching when provided. + #[clap(long, env = DISABLE_AUTO_BATCHING)] + #[serde(default)] + pub disable_auto_batching: bool, +} + +impl SchedulerConfig { + pub fn export_to_env(self) { + let SchedulerConfig { disable_auto_batching } = self; + export_to_env_if_not_present(DISABLE_AUTO_BATCHING, disable_auto_batching.to_string()); + } +} + +impl TryFrom<&IndexerOpts> for IndexerConfig { + type Error = anyhow::Error; + + fn try_from(other: &IndexerOpts) -> Result { + let thread_pool = + rayon::ThreadPoolBuilder::new().num_threads(*other.max_indexing_threads).build()?; + + Ok(Self { + log_every_n: Some(other.log_every_n), + max_nb_chunks: other.max_nb_chunks, + max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize), + thread_pool: Some(thread_pool), + max_positions_per_attributes: None, + ..Default::default() + }) + } +} + +impl Default for IndexerOpts { + fn default() -> Self { + Self { + log_every_n: 100_000, + max_nb_chunks: None, + max_indexing_memory: MaxMemory::default(), + max_indexing_threads: MaxThreads::default(), + } + } +} + +/// A type used to detect the max memory available and use 2/3 of it. +#[derive(Debug, Clone, Copy, Deserialize, Serialize)] +pub struct MaxMemory(Option); + +impl FromStr for MaxMemory { + type Err = ByteError; + + fn from_str(s: &str) -> Result { + Byte::from_str(s).map(Some).map(MaxMemory) + } +} + +impl Default for MaxMemory { + fn default() -> MaxMemory { + MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_bytes)) + } +} + +impl fmt::Display for MaxMemory { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.0 { + Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)), + None => f.write_str("unknown"), + } + } +} + +impl Deref for MaxMemory { + type Target = Option; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl MaxMemory { + pub fn unlimited() -> Self { + Self(None) + } +} + +/// Returns the total amount of bytes available or `None` if this system isn't supported. +fn total_memory_bytes() -> Option { + if System::IS_SUPPORTED { + let memory_kind = RefreshKind::new().with_memory(); + let mut system = System::new_with_specifics(memory_kind); + system.refresh_memory(); + Some(system.total_memory() * 1024) // KiB into bytes + } else { + None + } +} + +#[derive(Debug, Clone, Copy, Deserialize, Serialize)] +pub struct MaxThreads(usize); + +impl FromStr for MaxThreads { + type Err = ParseIntError; + + fn from_str(s: &str) -> Result { + usize::from_str(s).map(Self) + } +} + +impl Default for MaxThreads { + fn default() -> Self { + MaxThreads(num_cpus::get() / 2) + } +} + +impl fmt::Display for MaxThreads { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl Deref for MaxThreads { + type Target = usize; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + fn load_certs(filename: PathBuf) -> anyhow::Result> { let certfile = fs::File::open(filename).map_err(|_| anyhow::anyhow!("cannot open certificate file"))?; @@ -485,6 +668,17 @@ fn load_ocsp(filename: &Option) -> anyhow::Result> { Ok(ret) } +/// Checks if the key is defined in the environment variables. +/// If not, inserts it with the given value. +pub fn export_to_env_if_not_present(key: &str, value: T) +where + T: AsRef, +{ + if let Err(VarError::NotPresent) = std::env::var(key) { + std::env::set_var(key, value); + } +} + /// Functions used to get default value for `Opt` fields, needs to be function because of serde's default attribute. fn default_db_path() -> PathBuf { @@ -527,6 +721,10 @@ fn default_log_level() -> String { DEFAULT_LOG_LEVEL.to_string() } +fn default_log_every_n() -> usize { + DEFAULT_LOG_EVERY_N +} + #[cfg(test)] mod test { @@ -538,6 +736,7 @@ mod test { } #[test] + #[ignore] fn test_meilli_config_file_path_valid() { temp_env::with_vars( vec![("MEILI_CONFIG_FILE_PATH", Some("../config.toml"))], // Relative path in meilisearch_http package @@ -548,22 +747,20 @@ mod test { } #[test] + #[ignore] fn test_meilli_config_file_path_invalid() { - temp_env::with_vars( - vec![("MEILI_CONFIG_FILE_PATH", Some("../configgg.toml"))], - || { - let possible_error_messages = [ + temp_env::with_vars(vec![("MEILI_CONFIG_FILE_PATH", Some("../configgg.toml"))], || { + let possible_error_messages = [ "unable to open or read the \"../configgg.toml\" configuration file: No such file or directory (os error 2).", "unable to open or read the \"../configgg.toml\" configuration file: The system cannot find the file specified. (os error 2).", // Windows ]; - let error_message = Opt::try_build().unwrap_err().to_string(); - assert!( - possible_error_messages.contains(&error_message.as_str()), - "Expected onf of {:?}, got {:?}.", - possible_error_messages, - error_message - ); - }, - ); + let error_message = Opt::try_build().unwrap_err().to_string(); + assert!( + possible_error_messages.contains(&error_message.as_str()), + "Expected onf of {:?}, got {:?}.", + possible_error_messages, + error_message + ); + }); } } diff --git a/meilisearch-http/src/route_metrics.rs b/meilisearch-http/src/route_metrics.rs index b2b5f4abc..c1d35cf8d 100644 --- a/meilisearch-http/src/route_metrics.rs +++ b/meilisearch-http/src/route_metrics.rs @@ -1,17 +1,13 @@ use std::future::{ready, Ready}; +use actix_web::dev::{self, Service, ServiceRequest, ServiceResponse, Transform}; use actix_web::http::header; -use actix_web::HttpResponse; -use actix_web::{ - dev::{self, Service, ServiceRequest, ServiceResponse, Transform}, - Error, -}; +use actix_web::{Error, HttpResponse}; use futures_util::future::LocalBoxFuture; use meilisearch_auth::actions; use meilisearch_lib::MeiliSearch; use meilisearch_types::error::ResponseError; -use prometheus::HistogramTimer; -use prometheus::{Encoder, TextEncoder}; +use prometheus::{Encoder, HistogramTimer, TextEncoder}; use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::GuardedData; @@ -33,15 +29,11 @@ pub async fn get_metrics( let encoder = TextEncoder::new(); let mut buffer = vec![]; - encoder - .encode(&prometheus::gather(), &mut buffer) - .expect("Failed to encode metrics"); + encoder.encode(&prometheus::gather(), &mut buffer).expect("Failed to encode metrics"); let response = String::from_utf8(buffer).expect("Failed to convert bytes to string"); - Ok(HttpResponse::Ok() - .insert_header(header::ContentType(mime::TEXT_PLAIN)) - .body(response)) + Ok(HttpResponse::Ok().insert_header(header::ContentType(mime::TEXT_PLAIN)).body(response)) } pub struct RouteMetrics; diff --git a/meilisearch-http/src/routes/api_key.rs b/meilisearch-http/src/routes/api_key.rs index 7605fa644..b53fd3895 100644 --- a/meilisearch-http/src/routes/api_key.rs +++ b/meilisearch-http/src/routes/api_key.rs @@ -1,18 +1,18 @@ use std::str; use actix_web::{web, HttpRequest, HttpResponse}; +use meilisearch_auth::error::AuthControllerError; +use meilisearch_auth::AuthController; +use meilisearch_types::error::{Code, ResponseError}; +use meilisearch_types::keys::{Action, Key}; use serde::{Deserialize, Serialize}; use serde_json::Value; use time::OffsetDateTime; use uuid::Uuid; -use meilisearch_auth::{error::AuthControllerError, Action, AuthController, Key}; -use meilisearch_types::error::{Code, ResponseError}; - -use crate::extractors::{ - authentication::{policies::*, GuardedData}, - sequential_extractor::SeqHandler, -}; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::GuardedData; +use crate::extractors::sequential_extractor::SeqHandler; use crate::routes::Pagination; pub fn configure(cfg: &mut web::ServiceConfig) { @@ -51,10 +51,8 @@ pub async fn list_api_keys( ) -> Result { let page_view = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> { let keys = auth_controller.list_keys()?; - let page_view = paginate.auto_paginate_sized( - keys.into_iter() - .map(|k| KeyView::from_key(k, &auth_controller)), - ); + let page_view = paginate + .auto_paginate_sized(keys.into_iter().map(|k| KeyView::from_key(k, &auth_controller))); Ok(page_view) }) diff --git a/meilisearch-http/src/routes/dump.rs b/meilisearch-http/src/routes/dump.rs index 4d9106ee0..1148cdcb6 100644 --- a/meilisearch-http/src/routes/dump.rs +++ b/meilisearch-http/src/routes/dump.rs @@ -1,27 +1,46 @@ +use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; +use index_scheduler::IndexScheduler; use log::debug; -use meilisearch_lib::MeiliSearch; +use meilisearch_auth::AuthController; use meilisearch_types::error::ResponseError; +use meilisearch_types::tasks::KindWithContent; use serde_json::json; +use time::macros::format_description; +use time::OffsetDateTime; use crate::analytics::Analytics; -use crate::extractors::authentication::{policies::*, GuardedData}; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; -use crate::task::SummarizedTaskView; +use crate::routes::SummarizedTaskView; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump)))); } pub async fn create_dump( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, + auth_controller: GuardedData, AuthController>, req: HttpRequest, analytics: web::Data, ) -> Result { analytics.publish("Dump Created".to_string(), json!({}), Some(&req)); - let res: SummarizedTaskView = meilisearch.register_dump_task().await?.into(); + let dump_uid = OffsetDateTime::now_utc() + .format(format_description!( + "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" + )) + .unwrap(); - debug!("returns: {:?}", res); - Ok(HttpResponse::Accepted().json(res)) + let task = KindWithContent::DumpCreation { + keys: auth_controller.list_keys()?, + instance_uid: analytics.instance_uid().cloned(), + dump_uid, + }; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + + debug!("returns: {:?}", task); + Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch-http/src/routes/indexes/documents.rs b/meilisearch-http/src/routes/indexes/documents.rs index 3e3db86b2..0cdb11e8a 100644 --- a/meilisearch-http/src/routes/indexes/documents.rs +++ b/meilisearch-http/src/routes/indexes/documents.rs @@ -1,50 +1,38 @@ -use actix_web::error::PayloadError; +use std::io::{Cursor, ErrorKind}; + use actix_web::http::header::CONTENT_TYPE; -use actix_web::web::Bytes; -use actix_web::HttpMessage; -use actix_web::{web, HttpRequest, HttpResponse}; +use actix_web::web::Data; +use actix_web::{web, HttpMessage, HttpRequest, HttpResponse}; use bstr::ByteSlice; -use futures::{Stream, StreamExt}; +use futures::StreamExt; +use index_scheduler::IndexScheduler; use log::debug; -use meilisearch_lib::index_controller::{DocumentAdditionFormat, Update}; -use meilisearch_lib::milli::update::IndexDocumentsMethod; -use meilisearch_lib::MeiliSearch; +use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType}; use meilisearch_types::error::ResponseError; +use meilisearch_types::heed::RoTxn; +use meilisearch_types::index_uid::IndexUid; +use meilisearch_types::milli::update::IndexDocumentsMethod; use meilisearch_types::star_or::StarOr; +use meilisearch_types::tasks::KindWithContent; +use meilisearch_types::{milli, Document, Index}; use mime::Mime; use once_cell::sync::Lazy; use serde::Deserialize; use serde_cs::vec::CS; use serde_json::Value; -use tokio::sync::mpsc; use crate::analytics::Analytics; use crate::error::MeilisearchHttpError; -use crate::extractors::authentication::{policies::*, GuardedData}; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::GuardedData; use crate::extractors::payload::Payload; use crate::extractors::sequential_extractor::SeqHandler; -use crate::routes::{fold_star_or, PaginationView}; -use crate::task::SummarizedTaskView; +use crate::routes::{fold_star_or, PaginationView, SummarizedTaskView}; static ACCEPTED_CONTENT_TYPE: Lazy> = Lazy::new(|| { - vec![ - "application/json".to_string(), - "application/x-ndjson".to_string(), - "text/csv".to_string(), - ] + vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()] }); -/// This is required because Payload is not Sync nor Send -fn payload_to_stream(mut payload: Payload) -> impl Stream> { - let (snd, recv) = mpsc::channel(1); - tokio::task::spawn_local(async move { - while let Some(data) = payload.next().await { - let _ = snd.send(data).await; - } - }); - tokio_stream::wrappers::ReceiverStream::new(recv) -} - /// Extracts the mime type from the content type and return /// a meilisearch error if anything bad happen. fn extract_mime_type(req: &HttpRequest) -> Result, MeilisearchHttpError> { @@ -56,9 +44,7 @@ fn extract_mime_type(req: &HttpRequest) -> Result, MeilisearchHttpE content_type.as_bytes().as_bstr().to_string(), ACCEPTED_CONTENT_TYPE.clone(), )), - None => Err(MeilisearchHttpError::MissingContentType( - ACCEPTED_CONTENT_TYPE.clone(), - )), + None => Err(MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone())), }, } } @@ -93,32 +79,27 @@ pub struct GetDocument { } pub async fn get_document( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, path: web::Path, params: web::Query, ) -> Result { - let index = path.index_uid.clone(); - let id = path.document_id.clone(); let GetDocument { fields } = params.into_inner(); let attributes_to_retrieve = fields.and_then(fold_star_or); - let document = meilisearch - .document(index, id, attributes_to_retrieve) - .await?; + let index = index_scheduler.index(&path.index_uid)?; + let document = retrieve_document(&index, &path.document_id, attributes_to_retrieve)?; debug!("returns: {:?}", document); Ok(HttpResponse::Ok().json(document)) } pub async fn delete_document( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, path: web::Path, ) -> Result { - let DocumentParam { - document_id, - index_uid, - } = path.into_inner(); - let update = Update::DeleteDocuments(vec![document_id]); - let task: SummarizedTaskView = meilisearch.register_update(index_uid, update).await?.into(); + let DocumentParam { document_id, index_uid } = path.into_inner(); + let task = KindWithContent::DocumentDeletion { index_uid, documents_ids: vec![document_id] }; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) } @@ -134,21 +115,16 @@ pub struct BrowseQuery { } pub async fn get_all_documents( - meilisearch: GuardedData, MeiliSearch>, - path: web::Path, + index_scheduler: GuardedData, Data>, + index_uid: web::Path, params: web::Query, ) -> Result { debug!("called with params: {:?}", params); - let BrowseQuery { - limit, - offset, - fields, - } = params.into_inner(); + let BrowseQuery { limit, offset, fields } = params.into_inner(); let attributes_to_retrieve = fields.and_then(fold_star_or); - let (total, documents) = meilisearch - .documents(path.into_inner(), offset, limit, attributes_to_retrieve) - .await?; + let index = index_scheduler.index(&index_uid)?; + let (total, documents) = retrieve_documents(&index, offset, limit, attributes_to_retrieve)?; let ret = PaginationView::new(offset, limit, total as usize, documents); @@ -163,8 +139,8 @@ pub struct UpdateDocumentsQuery { } pub async fn add_documents( - meilisearch: GuardedData, MeiliSearch>, - path: web::Path, + index_scheduler: GuardedData, Data>, + index_uid: web::Path, params: web::Query, body: Payload, req: HttpRequest, @@ -172,19 +148,14 @@ pub async fn add_documents( ) -> Result { debug!("called with params: {:?}", params); let params = params.into_inner(); - let index_uid = path.into_inner(); - analytics.add_documents( - ¶ms, - meilisearch.get_index(index_uid.clone()).await.is_err(), - &req, - ); + analytics.add_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req); - let allow_index_creation = meilisearch.filters().allow_index_creation; + let allow_index_creation = index_scheduler.filters().allow_index_creation; let task = document_addition( extract_mime_type(&req)?, - meilisearch, - index_uid, + index_scheduler, + index_uid.into_inner(), params.primary_key, body, IndexDocumentsMethod::ReplaceDocuments, @@ -196,7 +167,7 @@ pub async fn add_documents( } pub async fn update_documents( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, path: web::Path, params: web::Query, body: Payload, @@ -206,16 +177,12 @@ pub async fn update_documents( debug!("called with params: {:?}", params); let index_uid = path.into_inner(); - analytics.update_documents( - ¶ms, - meilisearch.get_index(index_uid.clone()).await.is_err(), - &req, - ); + analytics.update_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req); - let allow_index_creation = meilisearch.filters().allow_index_creation; + let allow_index_creation = index_scheduler.filters().allow_index_creation; let task = document_addition( extract_mime_type(&req)?, - meilisearch, + index_scheduler, index_uid, params.into_inner().primary_key, body, @@ -229,83 +196,202 @@ pub async fn update_documents( async fn document_addition( mime_type: Option, - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, index_uid: String, primary_key: Option, - body: Payload, + mut body: Payload, method: IndexDocumentsMethod, allow_index_creation: bool, -) -> Result { - let format = match mime_type - .as_ref() - .map(|m| (m.type_().as_str(), m.subtype().as_str())) - { - Some(("application", "json")) => DocumentAdditionFormat::Json, - Some(("application", "x-ndjson")) => DocumentAdditionFormat::Ndjson, - Some(("text", "csv")) => DocumentAdditionFormat::Csv, +) -> Result { + let format = match mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())) { + Some(("application", "json")) => PayloadType::Json, + Some(("application", "x-ndjson")) => PayloadType::Ndjson, + Some(("text", "csv")) => PayloadType::Csv, Some((type_, subtype)) => { return Err(MeilisearchHttpError::InvalidContentType( format!("{}/{}", type_, subtype), ACCEPTED_CONTENT_TYPE.clone(), - ) - .into()) + )) } None => { - return Err( - MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone()).into(), - ) + return Err(MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone())) } }; - let update = Update::DocumentAddition { - payload: Box::new(payload_to_stream(body)), - primary_key, - method, - format, - allow_index_creation, + // is your indexUid valid? + let index_uid = IndexUid::try_from(index_uid)?.into_inner(); + + let (uuid, mut update_file) = index_scheduler.create_update_file()?; + + // TODO: this can be slow, maybe we should spawn a thread? But the payload isn't Send+Sync :weary: + // push the entire stream into a `Vec`. + // If someone sends us a never ending stream we're going to block the thread. + // TODO: Maybe we should write it to a file to reduce the RAM consumption + // and then reread it to convert it to obkv? + let mut buffer = Vec::new(); + while let Some(bytes) = body.next().await { + buffer.extend_from_slice(&bytes?); + } + if buffer.is_empty() { + return Err(MeilisearchHttpError::MissingPayload(format)); + } + let reader = Cursor::new(buffer); + + let documents_count = + tokio::task::spawn_blocking(move || -> Result<_, MeilisearchHttpError> { + let documents_count = match format { + PayloadType::Json => read_json(reader, update_file.as_file_mut())?, + PayloadType::Csv => read_csv(reader, update_file.as_file_mut())?, + PayloadType::Ndjson => read_ndjson(reader, update_file.as_file_mut())?, + }; + // we NEED to persist the file here because we moved the `udpate_file` in another task. + update_file.persist()?; + Ok(documents_count) + }) + .await; + + let documents_count = match documents_count { + Ok(Ok(documents_count)) => documents_count as u64, + // in this case the file has not possibly be persisted. + Ok(Err(e)) => return Err(e), + Err(e) => { + // Here the file MAY have been persisted or not. + // We don't know thus we ignore the file not found error. + match index_scheduler.delete_update_file(uuid) { + Ok(()) => (), + Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e))) + if e.kind() == ErrorKind::NotFound => {} + Err(e) => { + log::warn!("Unknown error happened while deleting a malformed update file with uuid {uuid}: {e}"); + } + } + // We still want to return the original error to the end user. + return Err(e.into()); + } }; - let task = meilisearch.register_update(index_uid, update).await?.into(); + let task = KindWithContent::DocumentAdditionOrUpdate { + method, + content_file: uuid, + documents_count, + primary_key, + allow_index_creation, + index_uid, + }; + + let scheduler = index_scheduler.clone(); + let task = match tokio::task::spawn_blocking(move || scheduler.register(task)).await? { + Ok(task) => task, + Err(e) => { + index_scheduler.delete_update_file(uuid)?; + return Err(e.into()); + } + }; debug!("returns: {:?}", task); - Ok(task) + Ok(task.into()) } pub async fn delete_documents( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, path: web::Path, body: web::Json>, ) -> Result { debug!("called with params: {:?}", body); let ids = body .iter() - .map(|v| { - v.as_str() - .map(String::from) - .unwrap_or_else(|| v.to_string()) - }) + .map(|v| v.as_str().map(String::from).unwrap_or_else(|| v.to_string())) .collect(); - let update = Update::DeleteDocuments(ids); - let task: SummarizedTaskView = meilisearch - .register_update(path.into_inner(), update) - .await? - .into(); + let task = + KindWithContent::DocumentDeletion { index_uid: path.into_inner(), documents_ids: ids }; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) } pub async fn clear_all_documents( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, path: web::Path, ) -> Result { - let update = Update::ClearDocuments; - let task: SummarizedTaskView = meilisearch - .register_update(path.into_inner(), update) - .await? - .into(); + let task = KindWithContent::DocumentClear { index_uid: path.into_inner() }; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) } + +fn all_documents<'a>( + index: &Index, + rtxn: &'a RoTxn, +) -> Result> + 'a, ResponseError> { + let fields_ids_map = index.fields_ids_map(rtxn)?; + let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); + + Ok(index.all_documents(rtxn)?.map(move |ret| { + ret.map_err(ResponseError::from).and_then(|(_key, document)| -> Result<_, ResponseError> { + Ok(milli::obkv_to_json(&all_fields, &fields_ids_map, document)?) + }) + })) +} + +fn retrieve_documents>( + index: &Index, + offset: usize, + limit: usize, + attributes_to_retrieve: Option>, +) -> Result<(u64, Vec), ResponseError> { + let rtxn = index.read_txn()?; + + let mut documents = Vec::new(); + for document in all_documents(index, &rtxn)?.skip(offset).take(limit) { + let document = match &attributes_to_retrieve { + Some(attributes_to_retrieve) => permissive_json_pointer::select_values( + &document?, + attributes_to_retrieve.iter().map(|s| s.as_ref()), + ), + None => document?, + }; + documents.push(document); + } + + let number_of_documents = index.number_of_documents(&rtxn)?; + Ok((number_of_documents, documents)) +} + +fn retrieve_document>( + index: &Index, + doc_id: &str, + attributes_to_retrieve: Option>, +) -> Result { + let txn = index.read_txn()?; + + let fields_ids_map = index.fields_ids_map(&txn)?; + let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); + + let internal_id = index + .external_documents_ids(&txn)? + .get(doc_id.as_bytes()) + .ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?; + + let document = index + .documents(&txn, std::iter::once(internal_id))? + .into_iter() + .next() + .map(|(_, d)| d) + .ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?; + + let document = meilisearch_types::milli::obkv_to_json(&all_fields, &fields_ids_map, document)?; + let document = match &attributes_to_retrieve { + Some(attributes_to_retrieve) => permissive_json_pointer::select_values( + &document, + attributes_to_retrieve.iter().map(|s| s.as_ref()), + ), + None => document, + }; + + Ok(document) +} diff --git a/meilisearch-http/src/routes/indexes/mod.rs b/meilisearch-http/src/routes/indexes/mod.rs index 3fa0adba8..d370483c6 100644 --- a/meilisearch-http/src/routes/indexes/mod.rs +++ b/meilisearch-http/src/routes/indexes/mod.rs @@ -1,18 +1,20 @@ +use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; +use index_scheduler::{IndexScheduler, Query}; use log::debug; -use meilisearch_lib::index_controller::Update; -use meilisearch_lib::MeiliSearch; use meilisearch_types::error::ResponseError; +use meilisearch_types::index_uid::IndexUid; +use meilisearch_types::milli::{self, FieldDistribution, Index}; +use meilisearch_types::tasks::{KindWithContent, Status}; use serde::{Deserialize, Serialize}; use serde_json::json; use time::OffsetDateTime; +use super::{Pagination, SummarizedTaskView}; use crate::analytics::Analytics; -use crate::extractors::authentication::{policies::*, AuthenticationError, GuardedData}; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; -use crate::task::SummarizedTaskView; - -use super::Pagination; pub mod documents; pub mod search; @@ -39,19 +41,42 @@ pub fn configure(cfg: &mut web::ServiceConfig) { ); } +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(rename_all = "camelCase")] +pub struct IndexView { + pub uid: String, + #[serde(with = "time::serde::rfc3339")] + pub created_at: OffsetDateTime, + #[serde(with = "time::serde::rfc3339")] + pub updated_at: OffsetDateTime, + pub primary_key: Option, +} + +impl IndexView { + fn new(uid: String, index: &Index) -> Result { + let rtxn = index.read_txn()?; + Ok(IndexView { + uid, + created_at: index.created_at(&rtxn)?, + updated_at: index.updated_at(&rtxn)?, + primary_key: index.primary_key(&rtxn)?.map(String::from), + }) + } +} + pub async fn list_indexes( - data: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, paginate: web::Query, ) -> Result { - let search_rules = &data.filters().search_rules; - let indexes: Vec<_> = data.list_indexes().await?; - let nb_indexes = indexes.len(); - let iter = indexes + let search_rules = &index_scheduler.filters().search_rules; + let indexes: Vec<_> = index_scheduler.indexes()?; + let indexes = indexes .into_iter() - .filter(|i| search_rules.is_index_authorized(&i.uid)); - let ret = paginate - .into_inner() - .auto_paginate_unsized(nb_indexes, iter); + .filter(|(name, _)| search_rules.is_index_authorized(name)) + .map(|(name, index)| IndexView::new(name, &index)) + .collect::, _>>()?; + + let ret = paginate.auto_paginate_sized(indexes.into_iter()); debug!("returns: {:?}", ret); Ok(HttpResponse::Ok().json(ret)) @@ -65,16 +90,15 @@ pub struct IndexCreateRequest { } pub async fn create_index( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, body: web::Json, req: HttpRequest, analytics: web::Data, ) -> Result { - let IndexCreateRequest { - primary_key, uid, .. - } = body.into_inner(); + let IndexCreateRequest { primary_key, uid } = body.into_inner(); + let uid = IndexUid::try_from(uid)?.into_inner(); - let allow_index_creation = meilisearch.filters().search_rules.is_index_authorized(&uid); + let allow_index_creation = index_scheduler.filters().search_rules.is_index_authorized(&uid); if allow_index_creation { analytics.publish( "Index Created".to_string(), @@ -82,8 +106,9 @@ pub async fn create_index( Some(&req), ); - let update = Update::CreateIndex { primary_key }; - let task: SummarizedTaskView = meilisearch.register_update(uid, update).await?.into(); + let task = KindWithContent::IndexCreation { index_uid: uid, primary_key }; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); Ok(HttpResponse::Accepted().json(task)) } else { @@ -99,30 +124,20 @@ pub struct UpdateIndexRequest { primary_key: Option, } -#[derive(Debug, Serialize)] -#[serde(rename_all = "camelCase")] -pub struct UpdateIndexResponse { - name: String, - uid: String, - #[serde(serialize_with = "time::serde::rfc3339::serialize")] - created_at: OffsetDateTime, - #[serde(serialize_with = "time::serde::rfc3339::serialize")] - updated_at: OffsetDateTime, - #[serde(serialize_with = "time::serde::rfc3339::serialize")] - primary_key: OffsetDateTime, -} - pub async fn get_index( - meilisearch: GuardedData, MeiliSearch>, - path: web::Path, + index_scheduler: GuardedData, Data>, + index_uid: web::Path, ) -> Result { - let meta = meilisearch.get_index(path.into_inner()).await?; - debug!("returns: {:?}", meta); - Ok(HttpResponse::Ok().json(meta)) + let index = index_scheduler.index(&index_uid)?; + let index_view = IndexView::new(index_uid.into_inner(), &index)?; + + debug!("returns: {:?}", index_view); + + Ok(HttpResponse::Ok().json(index_view)) } pub async fn update_index( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, path: web::Path, body: web::Json, req: HttpRequest, @@ -136,43 +151,71 @@ pub async fn update_index( Some(&req), ); - let update = Update::UpdateIndex { + let task = KindWithContent::IndexUpdate { + index_uid: path.into_inner(), primary_key: body.primary_key, }; - let task: SummarizedTaskView = meilisearch - .register_update(path.into_inner(), update) - .await? - .into(); + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) } pub async fn delete_index( - meilisearch: GuardedData, MeiliSearch>, - path: web::Path, + index_scheduler: GuardedData, Data>, + index_uid: web::Path, ) -> Result { - let uid = path.into_inner(); - let update = Update::DeleteIndex; - let task: SummarizedTaskView = meilisearch.register_update(uid, update).await?.into(); + let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() }; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); Ok(HttpResponse::Accepted().json(task)) } pub async fn get_index_stats( - meilisearch: GuardedData, MeiliSearch>, - path: web::Path, + index_scheduler: GuardedData, Data>, + index_uid: web::Path, req: HttpRequest, analytics: web::Data, ) -> Result { - analytics.publish( - "Stats Seen".to_string(), - json!({ "per_index_uid": true }), - Some(&req), - ); - let response = meilisearch.get_index_stats(path.into_inner()).await?; + analytics.publish("Stats Seen".to_string(), json!({ "per_index_uid": true }), Some(&req)); - debug!("returns: {:?}", response); - Ok(HttpResponse::Ok().json(response)) + let stats = IndexStats::new((*index_scheduler).clone(), index_uid.into_inner())?; + + debug!("returns: {:?}", stats); + Ok(HttpResponse::Ok().json(stats)) +} + +#[derive(Serialize, Debug)] +#[serde(rename_all = "camelCase")] +pub struct IndexStats { + pub number_of_documents: u64, + pub is_indexing: bool, + pub field_distribution: FieldDistribution, +} + +impl IndexStats { + pub fn new( + index_scheduler: Data, + index_uid: String, + ) -> Result { + // we check if there is currently a task processing associated with this index. + let processing_task = index_scheduler.get_tasks(Query { + status: Some(vec![Status::Processing]), + index_uid: Some(vec![index_uid.clone()]), + limit: Some(1), + ..Query::default() + })?; + let is_processing = !processing_task.is_empty(); + + let index = index_scheduler.index(&index_uid)?; + let rtxn = index.read_txn()?; + Ok(IndexStats { + number_of_documents: index.number_of_documents(&rtxn)?, + is_indexing: is_processing, + field_distribution: index.field_distribution(&rtxn)?, + }) + } } diff --git a/meilisearch-http/src/routes/indexes/search.rs b/meilisearch-http/src/routes/indexes/search.rs index 4b5e0dbca..8c901035d 100644 --- a/meilisearch-http/src/routes/indexes/search.rs +++ b/meilisearch-http/src/routes/indexes/search.rs @@ -1,20 +1,22 @@ +use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; +use index_scheduler::IndexScheduler; use log::debug; use meilisearch_auth::IndexSearchRules; -use meilisearch_lib::index::{ - MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, - DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, - DEFAULT_SEARCH_OFFSET, -}; -use meilisearch_lib::MeiliSearch; use meilisearch_types::error::ResponseError; use serde::Deserialize; use serde_cs::vec::CS; use serde_json::Value; use crate::analytics::{Analytics, SearchAggregator}; -use crate::extractors::authentication::{policies::*, GuardedData}; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; +use crate::search::{ + perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, + DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, + DEFAULT_SEARCH_OFFSET, +}; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( @@ -70,14 +72,10 @@ impl From for SearchQuery { limit: other.limit, page: other.page, hits_per_page: other.hits_per_page, - attributes_to_retrieve: other - .attributes_to_retrieve - .map(|o| o.into_iter().collect()), + attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()), attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()), crop_length: other.crop_length, - attributes_to_highlight: other - .attributes_to_highlight - .map(|o| o.into_iter().collect()), + attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()), filter, sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)), show_matches_position: other.show_matches_position, @@ -136,8 +134,8 @@ fn fix_sort_query_parameters(sort_query: &str) -> Vec { } pub async fn search_with_url_query( - meilisearch: GuardedData, MeiliSearch>, - path: web::Path, + index_scheduler: GuardedData, Data>, + index_uid: web::Path, params: web::Query, req: HttpRequest, analytics: web::Data, @@ -145,19 +143,17 @@ pub async fn search_with_url_query( debug!("called with params: {:?}", params); let mut query: SearchQuery = params.into_inner().into(); - let index_uid = path.into_inner(); // Tenant token search_rules. - if let Some(search_rules) = meilisearch - .filters() - .search_rules - .get_index_search_rules(&index_uid) + if let Some(search_rules) = + index_scheduler.filters().search_rules.get_index_search_rules(&index_uid) { add_search_rules(&mut query, search_rules); } let mut aggregate = SearchAggregator::from_query(&query, &req); - let search_result = meilisearch.search(index_uid, query).await; + let index = index_scheduler.index(&index_uid)?; + let search_result = perform_search(&index, query); if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); } @@ -170,8 +166,8 @@ pub async fn search_with_url_query( } pub async fn search_with_post( - meilisearch: GuardedData, MeiliSearch>, - path: web::Path, + index_scheduler: GuardedData, Data>, + index_uid: web::Path, params: web::Json, req: HttpRequest, analytics: web::Data, @@ -179,19 +175,17 @@ pub async fn search_with_post( let mut query = params.into_inner(); debug!("search called with params: {:?}", query); - let index_uid = path.into_inner(); // Tenant token search_rules. - if let Some(search_rules) = meilisearch - .filters() - .search_rules - .get_index_search_rules(&index_uid) + if let Some(search_rules) = + index_scheduler.filters().search_rules.get_index_search_rules(&index_uid) { add_search_rules(&mut query, search_rules); } let mut aggregate = SearchAggregator::from_query(&query, &req); - let search_result = meilisearch.search(index_uid, query).await; + let index = index_scheduler.index(&index_uid)?; + let search_result = perform_search(&index, query); if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); } @@ -212,13 +206,7 @@ mod test { let sort = fix_sort_query_parameters("_geoPoint(12, 13):asc"); assert_eq!(sort, vec!["_geoPoint(12,13):asc".to_string()]); let sort = fix_sort_query_parameters("doggo:asc,_geoPoint(12.45,13.56):desc"); - assert_eq!( - sort, - vec![ - "doggo:asc".to_string(), - "_geoPoint(12.45,13.56):desc".to_string(), - ] - ); + assert_eq!(sort, vec!["doggo:asc".to_string(), "_geoPoint(12.45,13.56):desc".to_string(),]); let sort = fix_sort_query_parameters( "doggo:asc , _geoPoint(12.45, 13.56, 2590352):desc , catto:desc", ); @@ -232,12 +220,6 @@ mod test { ); let sort = fix_sort_query_parameters("doggo:asc , _geoPoint(1, 2), catto:desc"); // This is ugly but eh, I don't want to write a full parser just for this unused route - assert_eq!( - sort, - vec![ - "doggo:asc".to_string(), - "_geoPoint(1,2),catto:desc".to_string(), - ] - ); + assert_eq!(sort, vec!["doggo:asc".to_string(), "_geoPoint(1,2),catto:desc".to_string(),]); } } diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index bc8642def..4e33cb0b4 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -1,58 +1,68 @@ -use log::debug; - +use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; -use meilisearch_lib::index::{Settings, Unchecked}; -use meilisearch_lib::index_controller::Update; -use meilisearch_lib::MeiliSearch; +use index_scheduler::IndexScheduler; +use log::debug; use meilisearch_types::error::ResponseError; +use meilisearch_types::index_uid::IndexUid; +use meilisearch_types::settings::{settings, Settings, Unchecked}; +use meilisearch_types::tasks::KindWithContent; use serde_json::json; use crate::analytics::Analytics; -use crate::extractors::authentication::{policies::*, GuardedData}; -use crate::task::SummarizedTaskView; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::GuardedData; +use crate::routes::SummarizedTaskView; #[macro_export] macro_rules! make_setting_route { ($route:literal, $update_verb:ident, $type:ty, $attr:ident, $camelcase_attr:literal, $analytics_var:ident, $analytics:expr) => { pub mod $attr { + use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse, Resource}; + use index_scheduler::IndexScheduler; use log::debug; - - use meilisearch_lib::milli::update::Setting; - use meilisearch_lib::{index::Settings, index_controller::Update, MeiliSearch}; - use meilisearch_types::error::ResponseError; + use meilisearch_types::index_uid::IndexUid; + use meilisearch_types::milli::update::Setting; + use meilisearch_types::settings::{settings, Settings}; + use meilisearch_types::tasks::KindWithContent; use $crate::analytics::Analytics; - use $crate::extractors::authentication::{policies::*, GuardedData}; + use $crate::extractors::authentication::policies::*; + use $crate::extractors::authentication::GuardedData; use $crate::extractors::sequential_extractor::SeqHandler; - use $crate::task::SummarizedTaskView; + use $crate::routes::SummarizedTaskView; pub async fn delete( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData< + ActionPolicy<{ actions::SETTINGS_UPDATE }>, + Data, + >, index_uid: web::Path, ) -> Result { - let settings = Settings { - $attr: Setting::Reset, - ..Default::default() - }; + let new_settings = Settings { $attr: Setting::Reset, ..Default::default() }; - let allow_index_creation = meilisearch.filters().allow_index_creation; - let update = Update::Settings { - settings, + let allow_index_creation = index_scheduler.filters().allow_index_creation; + let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner(); + let task = KindWithContent::SettingsUpdate { + index_uid, + new_settings: Box::new(new_settings), is_deletion: true, allow_index_creation, }; - let task: SummarizedTaskView = meilisearch - .register_update(index_uid.into_inner(), update) - .await? - .into(); + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)) + .await?? + .into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) } pub async fn update( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData< + ActionPolicy<{ actions::SETTINGS_UPDATE }>, + Data, + >, index_uid: actix_web::web::Path, body: actix_web::web::Json>, req: HttpRequest, @@ -62,7 +72,7 @@ macro_rules! make_setting_route { $analytics(&body, &req); - let settings = Settings { + let new_settings = Settings { $attr: match body { Some(inner_body) => Setting::Set(inner_body), None => Setting::Reset, @@ -70,26 +80,34 @@ macro_rules! make_setting_route { ..Default::default() }; - let allow_index_creation = meilisearch.filters().allow_index_creation; - let update = Update::Settings { - settings, + let allow_index_creation = index_scheduler.filters().allow_index_creation; + let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner(); + let task = KindWithContent::SettingsUpdate { + index_uid, + new_settings: Box::new(new_settings), is_deletion: false, allow_index_creation, }; - let task: SummarizedTaskView = meilisearch - .register_update(index_uid.into_inner(), update) - .await? - .into(); + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)) + .await?? + .into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) } pub async fn get( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData< + ActionPolicy<{ actions::SETTINGS_GET }>, + Data, + >, index_uid: actix_web::web::Path, ) -> std::result::Result { - let settings = meilisearch.settings(index_uid.into_inner()).await?; + let index = index_scheduler.index(&index_uid)?; + let rtxn = index.read_txn()?; + let settings = settings(&index, &rtxn)?; + debug!("returns: {:?}", settings); let mut json = serde_json::json!(&settings); let val = json[$camelcase_attr].take(); @@ -175,11 +193,11 @@ make_setting_route!( make_setting_route!( "/typo-tolerance", patch, - meilisearch_lib::index::updates::TypoSettings, + meilisearch_types::settings::TypoSettings, typo_tolerance, "typoTolerance", analytics, - |setting: &Option, req: &HttpRequest| { + |setting: &Option, req: &HttpRequest| { use serde_json::json; analytics.publish( @@ -252,13 +270,7 @@ make_setting_route!( "synonyms" ); -make_setting_route!( - "/distinct-attribute", - put, - String, - distinct_attribute, - "distinctAttribute" -); +make_setting_route!("/distinct-attribute", put, String, distinct_attribute, "distinctAttribute"); make_setting_route!( "/ranking-rules", @@ -285,11 +297,11 @@ make_setting_route!( make_setting_route!( "/faceting", patch, - meilisearch_lib::index::updates::FacetingSettings, + meilisearch_types::settings::FacetingSettings, faceting, "faceting", analytics, - |setting: &Option, req: &HttpRequest| { + |setting: &Option, req: &HttpRequest| { use serde_json::json; analytics.publish( @@ -307,11 +319,11 @@ make_setting_route!( make_setting_route!( "/pagination", patch, - meilisearch_lib::index::updates::PaginationSettings, + meilisearch_types::settings::PaginationSettings, pagination, "pagination", analytics, - |setting: &Option, req: &HttpRequest| { + |setting: &Option, req: &HttpRequest| { use serde_json::json; analytics.publish( @@ -355,46 +367,46 @@ generate_configure!( ); pub async fn update_all( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, index_uid: web::Path, body: web::Json>, req: HttpRequest, analytics: web::Data, ) -> Result { - let settings = body.into_inner(); + let new_settings = body.into_inner(); analytics.publish( "Settings Updated".to_string(), json!({ "ranking_rules": { - "sort_position": settings.ranking_rules.as_ref().set().map(|sort| sort.iter().position(|s| s == "sort")), + "sort_position": new_settings.ranking_rules.as_ref().set().map(|sort| sort.iter().position(|s| s == "sort")), }, "searchable_attributes": { - "total": settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()), + "total": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()), }, "sortable_attributes": { - "total": settings.sortable_attributes.as_ref().set().map(|sort| sort.len()), - "has_geo": settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")), + "total": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.len()), + "has_geo": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")), }, "filterable_attributes": { - "total": settings.filterable_attributes.as_ref().set().map(|filter| filter.len()), - "has_geo": settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")), + "total": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.len()), + "has_geo": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")), }, "typo_tolerance": { - "enabled": settings.typo_tolerance + "enabled": new_settings.typo_tolerance .as_ref() .set() .and_then(|s| s.enabled.as_ref().set()) .copied(), - "disable_on_attributes": settings.typo_tolerance + "disable_on_attributes": new_settings.typo_tolerance .as_ref() .set() .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())), - "disable_on_words": settings.typo_tolerance + "disable_on_words": new_settings.typo_tolerance .as_ref() .set() .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())), - "min_word_size_for_one_typo": settings.typo_tolerance + "min_word_size_for_one_typo": new_settings.typo_tolerance .as_ref() .set() .and_then(|s| s.min_word_size_for_typos @@ -402,7 +414,7 @@ pub async fn update_all( .set() .map(|s| s.one_typo.set())) .flatten(), - "min_word_size_for_two_typos": settings.typo_tolerance + "min_word_size_for_two_typos": new_settings.typo_tolerance .as_ref() .set() .and_then(|s| s.min_word_size_for_typos @@ -412,13 +424,13 @@ pub async fn update_all( .flatten(), }, "faceting": { - "max_values_per_facet": settings.faceting + "max_values_per_facet": new_settings.faceting .as_ref() .set() .and_then(|s| s.max_values_per_facet.as_ref().set()), }, "pagination": { - "max_total_hits": settings.pagination + "max_total_hits": new_settings.pagination .as_ref() .set() .and_then(|s| s.max_total_hits.as_ref().set()), @@ -427,46 +439,48 @@ pub async fn update_all( Some(&req), ); - let allow_index_creation = meilisearch.filters().allow_index_creation; - let update = Update::Settings { - settings, + let allow_index_creation = index_scheduler.filters().allow_index_creation; + let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner(); + let task = KindWithContent::SettingsUpdate { + index_uid, + new_settings: Box::new(new_settings), is_deletion: false, allow_index_creation, }; - let task: SummarizedTaskView = meilisearch - .register_update(index_uid.into_inner(), update) - .await? - .into(); + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) } pub async fn get_all( - data: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, index_uid: web::Path, ) -> Result { - let settings = data.settings(index_uid.into_inner()).await?; - debug!("returns: {:?}", settings); - Ok(HttpResponse::Ok().json(settings)) + let index = index_scheduler.index(&index_uid)?; + let rtxn = index.read_txn()?; + let new_settings = settings(&index, &rtxn)?; + debug!("returns: {:?}", new_settings); + Ok(HttpResponse::Ok().json(new_settings)) } pub async fn delete_all( - data: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, index_uid: web::Path, ) -> Result { - let settings = Settings::cleared().into_unchecked(); + let new_settings = Settings::cleared().into_unchecked(); - let allow_index_creation = data.filters().allow_index_creation; - let update = Update::Settings { - settings, + let allow_index_creation = index_scheduler.filters().allow_index_creation; + let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner(); + let task = KindWithContent::SettingsUpdate { + index_uid, + new_settings: Box::new(new_settings), is_deletion: true, allow_index_creation, }; - let task: SummarizedTaskView = data - .register_update(index_uid.into_inner(), update) - .await? - .into(); + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch-http/src/routes/mod.rs b/meilisearch-http/src/routes/mod.rs index 6a673f600..4463aee5e 100644 --- a/meilisearch-http/src/routes/mod.rs +++ b/meilisearch-http/src/routes/mod.rs @@ -1,21 +1,26 @@ -use actix_web::{web, HttpRequest, HttpResponse}; -use log::debug; -use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; +use actix_web::web::Data; +use actix_web::{web, HttpRequest, HttpResponse}; +use index_scheduler::{IndexScheduler, Query}; +use log::debug; +use meilisearch_types::error::ResponseError; +use meilisearch_types::settings::{Settings, Unchecked}; +use meilisearch_types::star_or::StarOr; +use meilisearch_types::tasks::{Kind, Status, Task, TaskId}; +use serde::{Deserialize, Serialize}; use serde_json::json; use time::OffsetDateTime; -use meilisearch_lib::index::{Settings, Unchecked}; -use meilisearch_lib::MeiliSearch; -use meilisearch_types::error::ResponseError; -use meilisearch_types::star_or::StarOr; - +use self::indexes::IndexStats; use crate::analytics::Analytics; -use crate::extractors::authentication::{policies::*, GuardedData}; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::GuardedData; mod api_key; mod dump; pub mod indexes; +mod swap_indexes; mod tasks; pub fn configure(cfg: &mut web::ServiceConfig) { @@ -25,7 +30,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .service(web::scope("/dumps").configure(dump::configure)) .service(web::resource("/stats").route(web::get().to(get_stats))) .service(web::resource("/version").route(web::get().to(get_version))) - .service(web::scope("/indexes").configure(indexes::configure)); + .service(web::scope("/indexes").configure(indexes::configure)) + .service(web::scope("/swap-indexes").configure(swap_indexes::configure)); } /// Extracts the raw values from the `StarOr` types and @@ -45,6 +51,30 @@ where const PAGINATION_DEFAULT_LIMIT: fn() -> usize = || 20; +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct SummarizedTaskView { + task_uid: TaskId, + index_uid: Option, + status: Status, + #[serde(rename = "type")] + kind: Kind, + #[serde(serialize_with = "time::serde::rfc3339::serialize")] + enqueued_at: OffsetDateTime, +} + +impl From for SummarizedTaskView { + fn from(task: Task) -> Self { + SummarizedTaskView { + task_uid: task.uid, + index_uid: task.index_uid().map(|s| s.to_string()), + status: task.status, + kind: task.kind.as_kind(), + enqueued_at: task.enqueued_at, + } + } +} + #[derive(Debug, Clone, Copy, Deserialize)] #[serde(rename_all = "camelCase", deny_unknown_fields)] pub struct Pagination { @@ -72,11 +102,7 @@ impl Pagination { T: Serialize, { let total = content.len(); - let content: Vec<_> = content - .into_iter() - .skip(self.offset) - .take(self.limit) - .collect(); + let content: Vec<_> = content.into_iter().skip(self.offset).take(self.limit).collect(); self.format_with(total, content) } @@ -89,11 +115,7 @@ impl Pagination { where T: Serialize, { - let content: Vec<_> = content - .into_iter() - .skip(self.offset) - .take(self.limit) - .collect(); + let content: Vec<_> = content.into_iter().skip(self.offset).take(self.limit).collect(); self.format_with(total, content) } @@ -103,23 +125,13 @@ impl Pagination { where T: Serialize, { - PaginationView { - results, - offset: self.offset, - limit: self.limit, - total, - } + PaginationView { results, offset: self.offset, limit: self.limit, total } } } impl PaginationView { pub fn new(offset: usize, limit: usize, total: usize, results: Vec) -> Self { - Self { - offset, - limit, - results, - total, - } + Self { offset, limit, results, total } } } @@ -181,10 +193,7 @@ pub struct EnqueuedUpdateResult { pub update_type: UpdateType, #[serde(with = "time::serde::rfc3339")] pub enqueued_at: OffsetDateTime, - #[serde( - skip_serializing_if = "Option::is_none", - with = "time::serde::rfc3339::option" - )] + #[serde(skip_serializing_if = "Option::is_none", with = "time::serde::rfc3339::option")] pub started_processing_at: Option, } @@ -231,21 +240,63 @@ pub async fn running() -> HttpResponse { HttpResponse::Ok().json(serde_json::json!({ "status": "Meilisearch is running" })) } +#[derive(Serialize, Debug)] +#[serde(rename_all = "camelCase")] +pub struct Stats { + pub database_size: u64, + #[serde(serialize_with = "time::serde::rfc3339::option::serialize")] + pub last_update: Option, + pub indexes: BTreeMap, +} + async fn get_stats( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, req: HttpRequest, analytics: web::Data, ) -> Result { - analytics.publish( - "Stats Seen".to_string(), - json!({ "per_index_uid": false }), - Some(&req), - ); - let search_rules = &meilisearch.filters().search_rules; - let response = meilisearch.get_all_stats(search_rules).await?; + analytics.publish("Stats Seen".to_string(), json!({ "per_index_uid": false }), Some(&req)); + let search_rules = &index_scheduler.filters().search_rules; - debug!("returns: {:?}", response); - Ok(HttpResponse::Ok().json(response)) + let stats = create_all_stats((*index_scheduler).clone(), search_rules)?; + + debug!("returns: {:?}", stats); + Ok(HttpResponse::Ok().json(stats)) +} + +pub fn create_all_stats( + index_scheduler: Data, + search_rules: &meilisearch_auth::SearchRules, +) -> Result { + let mut last_task: Option = None; + let mut indexes = BTreeMap::new(); + let mut database_size = 0; + let processing_task = index_scheduler.get_tasks(Query { + status: Some(vec![Status::Processing]), + limit: Some(1), + ..Query::default() + })?; + let processing_index = processing_task.first().and_then(|task| task.index_uid()); + for (name, index) in index_scheduler.indexes()? { + if !search_rules.is_index_authorized(&name) { + continue; + } + + database_size += index.on_disk_size()?; + + let rtxn = index.read_txn()?; + let stats = IndexStats { + number_of_documents: index.number_of_documents(&rtxn)?, + is_indexing: processing_index.map_or(false, |index_name| name == index_name), + field_distribution: index.field_distribution(&rtxn)?, + }; + + let updated_at = index.updated_at(&rtxn)?; + last_task = last_task.map_or(Some(updated_at), |last| Some(last.max(updated_at))); + + indexes.insert(name, stats); + } + let stats = Stats { database_size, last_update: last_task, indexes }; + Ok(stats) } #[derive(Serialize)] @@ -257,7 +308,7 @@ struct VersionResponse { } async fn get_version( - _meilisearch: GuardedData, MeiliSearch>, + _index_scheduler: GuardedData, Data>, ) -> HttpResponse { let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown"); let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown"); diff --git a/meilisearch-http/src/routes/swap_indexes.rs b/meilisearch-http/src/routes/swap_indexes.rs new file mode 100644 index 000000000..6389bad58 --- /dev/null +++ b/meilisearch-http/src/routes/swap_indexes.rs @@ -0,0 +1,77 @@ +use std::collections::BTreeSet; + +use actix_web::web::Data; +use actix_web::{web, HttpResponse}; +use index_scheduler::IndexScheduler; +use meilisearch_types::error::ResponseError; +use meilisearch_types::tasks::{IndexSwap, KindWithContent}; +use serde::Deserialize; + +use super::SummarizedTaskView; +use crate::error::MeilisearchHttpError; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::{AuthenticationError, GuardedData}; +use crate::extractors::sequential_extractor::SeqHandler; + +pub fn configure(cfg: &mut web::ServiceConfig) { + cfg.service(web::resource("").route(web::post().to(SeqHandler(swap_indexes)))); +} +#[derive(Deserialize, Debug, Clone, PartialEq, Eq)] +#[serde(rename_all = "camelCase", deny_unknown_fields)] +pub struct SwapIndexesPayload { + indexes: Vec, +} + +pub async fn swap_indexes( + index_scheduler: GuardedData, Data>, + params: web::Json>, +) -> Result { + let search_rules = &index_scheduler.filters().search_rules; + + let mut swaps = vec![]; + let mut indexes_set = BTreeSet::::default(); + let mut unauthorized_indexes = BTreeSet::new(); + let mut duplicate_indexes = BTreeSet::new(); + for SwapIndexesPayload { indexes } in params.into_inner().into_iter() { + let (lhs, rhs) = match indexes.as_slice() { + [lhs, rhs] => (lhs, rhs), + _ => { + return Err(MeilisearchHttpError::SwapIndexPayloadWrongLength(indexes).into()); + } + }; + if !search_rules.is_index_authorized(lhs) { + unauthorized_indexes.insert(lhs.clone()); + } + if !search_rules.is_index_authorized(rhs) { + unauthorized_indexes.insert(rhs.clone()); + } + + swaps.push(IndexSwap { indexes: (lhs.clone(), rhs.clone()) }); + + let is_unique_index_lhs = indexes_set.insert(lhs.clone()); + if !is_unique_index_lhs { + duplicate_indexes.insert(lhs.clone()); + } + let is_unique_index_rhs = indexes_set.insert(rhs.clone()); + if !is_unique_index_rhs { + duplicate_indexes.insert(rhs.clone()); + } + } + if !duplicate_indexes.is_empty() { + let duplicate_indexes: Vec<_> = duplicate_indexes.into_iter().collect(); + if let [index] = duplicate_indexes.as_slice() { + return Err(MeilisearchHttpError::SwapDuplicateIndexFound(index.clone()).into()); + } else { + return Err(MeilisearchHttpError::SwapDuplicateIndexesFound(duplicate_indexes).into()); + } + } + if !unauthorized_indexes.is_empty() { + return Err(AuthenticationError::InvalidToken.into()); + } + + let task = KindWithContent::IndexSwap { swaps }; + + let task = index_scheduler.register(task)?; + let task: SummarizedTaskView = task.into(); + Ok(HttpResponse::Accepted().json(task)) +} diff --git a/meilisearch-http/src/routes/tasks.rs b/meilisearch-http/src/routes/tasks.rs index c630bae90..59f1a1f68 100644 --- a/meilisearch-http/src/routes/tasks.rs +++ b/meilisearch-http/src/routes/tasks.rs @@ -1,84 +1,400 @@ +use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; -use meilisearch_lib::tasks::task::{TaskContent, TaskEvent, TaskId}; -use meilisearch_lib::tasks::TaskFilter; -use meilisearch_lib::MeiliSearch; +use index_scheduler::{IndexScheduler, Query, TaskId}; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; +use meilisearch_types::settings::{Settings, Unchecked}; use meilisearch_types::star_or::StarOr; -use serde::Deserialize; +use meilisearch_types::tasks::{ + serialize_duration, Details, IndexSwap, Kind, KindWithContent, Status, Task, +}; +use serde::{Deserialize, Serialize}; use serde_cs::vec::CS; use serde_json::json; +use time::{Duration, OffsetDateTime}; +use tokio::task; +use super::{fold_star_or, SummarizedTaskView}; use crate::analytics::Analytics; -use crate::extractors::authentication::{policies::*, GuardedData}; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; -use crate::task::{TaskListView, TaskStatus, TaskType, TaskView}; -use super::fold_star_or; - -const DEFAULT_LIMIT: fn() -> usize = || 20; +const DEFAULT_LIMIT: fn() -> u32 = || 20; pub fn configure(cfg: &mut web::ServiceConfig) { - cfg.service(web::resource("").route(web::get().to(SeqHandler(get_tasks)))) - .service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task)))); + cfg.service( + web::resource("") + .route(web::get().to(SeqHandler(get_tasks))) + .route(web::delete().to(SeqHandler(delete_tasks))), + ) + .service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks)))) + .service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task)))); +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct TaskView { + pub uid: TaskId, + #[serde(default)] + pub index_uid: Option, + pub status: Status, + #[serde(rename = "type")] + pub kind: Kind, + + #[serde(skip_serializing_if = "Option::is_none")] + pub canceled_by: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub details: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, + + #[serde(serialize_with = "serialize_duration", default)] + pub duration: Option, + #[serde(with = "time::serde::rfc3339")] + pub enqueued_at: OffsetDateTime, + #[serde(with = "time::serde::rfc3339::option", default)] + pub started_at: Option, + #[serde(with = "time::serde::rfc3339::option", default)] + pub finished_at: Option, +} + +impl TaskView { + pub fn from_task(task: &Task) -> TaskView { + TaskView { + uid: task.uid, + index_uid: task.index_uid().map(ToOwned::to_owned), + status: task.status, + kind: task.kind.as_kind(), + canceled_by: task.canceled_by, + details: task.details.clone().map(DetailsView::from), + error: task.error.clone(), + duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start), + enqueued_at: task.enqueued_at, + started_at: task.started_at, + finished_at: task.finished_at, + } + } +} + +#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct DetailsView { + #[serde(skip_serializing_if = "Option::is_none")] + pub received_documents: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub indexed_documents: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub primary_key: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub matched_documents: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub deleted_documents: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub matched_tasks: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub canceled_tasks: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub deleted_tasks: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub original_query: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub dump_uid: Option, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(flatten)] + pub settings: Option>>, + #[serde(skip_serializing_if = "Option::is_none")] + pub swaps: Option>, +} + +impl From
for DetailsView { + fn from(details: Details) -> Self { + match details { + Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => { + DetailsView { + received_documents: Some(received_documents), + indexed_documents, + ..DetailsView::default() + } + } + Details::SettingsUpdate { settings } => { + DetailsView { settings: Some(settings), ..DetailsView::default() } + } + Details::IndexInfo { primary_key } => { + DetailsView { primary_key: Some(primary_key), ..DetailsView::default() } + } + Details::DocumentDeletion { + matched_documents: received_document_ids, + deleted_documents, + } => DetailsView { + matched_documents: Some(received_document_ids), + deleted_documents: Some(deleted_documents), + ..DetailsView::default() + }, + Details::ClearAll { deleted_documents } => { + DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() } + } + Details::TaskCancelation { matched_tasks, canceled_tasks, original_query } => { + DetailsView { + matched_tasks: Some(matched_tasks), + canceled_tasks: Some(canceled_tasks), + original_query: Some(original_query), + ..DetailsView::default() + } + } + Details::TaskDeletion { matched_tasks, deleted_tasks, original_query } => DetailsView { + matched_tasks: Some(matched_tasks), + deleted_tasks: Some(deleted_tasks), + original_query: Some(original_query), + ..DetailsView::default() + }, + Details::Dump { dump_uid } => { + DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() } + } + Details::IndexSwap { swaps } => { + DetailsView { swaps: Some(swaps), ..Default::default() } + } + } + } +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase", deny_unknown_fields)] +pub struct TaskDateQuery { + #[serde( + default, + skip_serializing_if = "Option::is_none", + serialize_with = "time::serde::rfc3339::option::serialize", + deserialize_with = "date_deserializer::after::deserialize" + )] + after_enqueued_at: Option, + #[serde( + default, + skip_serializing_if = "Option::is_none", + serialize_with = "time::serde::rfc3339::option::serialize", + deserialize_with = "date_deserializer::before::deserialize" + )] + before_enqueued_at: Option, + #[serde( + default, + skip_serializing_if = "Option::is_none", + serialize_with = "time::serde::rfc3339::option::serialize", + deserialize_with = "date_deserializer::after::deserialize" + )] + after_started_at: Option, + #[serde( + default, + skip_serializing_if = "Option::is_none", + serialize_with = "time::serde::rfc3339::option::serialize", + deserialize_with = "date_deserializer::before::deserialize" + )] + before_started_at: Option, + #[serde( + default, + skip_serializing_if = "Option::is_none", + serialize_with = "time::serde::rfc3339::option::serialize", + deserialize_with = "date_deserializer::after::deserialize" + )] + after_finished_at: Option, + #[serde( + default, + skip_serializing_if = "Option::is_none", + serialize_with = "time::serde::rfc3339::option::serialize", + deserialize_with = "date_deserializer::before::deserialize" + )] + before_finished_at: Option, } #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase", deny_unknown_fields)] pub struct TasksFilterQuery { #[serde(rename = "type")] - type_: Option>>, - status: Option>>, - index_uid: Option>>, + kind: Option>>, + uid: Option>, + status: Option>>, + index_uid: Option>>, #[serde(default = "DEFAULT_LIMIT")] - limit: usize, + limit: u32, from: Option, + #[serde(flatten)] + dates: TaskDateQuery, } -#[rustfmt::skip] -fn task_type_matches_content(type_: &TaskType, content: &TaskContent) -> bool { - matches!((type_, content), - (TaskType::IndexCreation, TaskContent::IndexCreation { .. }) - | (TaskType::IndexUpdate, TaskContent::IndexUpdate { .. }) - | (TaskType::IndexDeletion, TaskContent::IndexDeletion { .. }) - | (TaskType::DocumentAdditionOrUpdate, TaskContent::DocumentAddition { .. }) - | (TaskType::DocumentDeletion, TaskContent::DocumentDeletion{ .. }) - | (TaskType::SettingsUpdate, TaskContent::SettingsUpdate { .. }) - | (TaskType::DumpCreation, TaskContent::Dump { .. }) - ) +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase", deny_unknown_fields)] +pub struct TaskDeletionQuery { + #[serde(rename = "type")] + kind: Option>, + uid: Option>, + status: Option>, + index_uid: Option>, + #[serde(flatten)] + dates: TaskDateQuery, } -#[rustfmt::skip] -fn task_status_matches_events(status: &TaskStatus, events: &[TaskEvent]) -> bool { - events.last().map_or(false, |event| { - matches!((status, event), - (TaskStatus::Enqueued, TaskEvent::Created(_)) - | (TaskStatus::Processing, TaskEvent::Processing(_) | TaskEvent::Batched { .. }) - | (TaskStatus::Succeeded, TaskEvent::Succeeded { .. }) - | (TaskStatus::Failed, TaskEvent::Failed { .. }), - ) - }) +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase", deny_unknown_fields)] +pub struct TaskCancelationQuery { + #[serde(rename = "type")] + type_: Option>, + uid: Option>, + status: Option>, + index_uid: Option>, + #[serde(flatten)] + dates: TaskDateQuery, +} + +async fn cancel_tasks( + index_scheduler: GuardedData, Data>, + req: HttpRequest, + params: web::Query, +) -> Result { + let TaskCancelationQuery { + type_, + uid, + status, + index_uid, + dates: + TaskDateQuery { + after_enqueued_at, + before_enqueued_at, + after_started_at, + before_started_at, + after_finished_at, + before_finished_at, + }, + } = params.into_inner(); + + let kind: Option> = type_.map(|x| x.into_iter().collect()); + let uid: Option> = uid.map(|x| x.into_iter().collect()); + let status: Option> = status.map(|x| x.into_iter().collect()); + let index_uid: Option> = + index_uid.map(|x| x.into_iter().map(|x| x.to_string()).collect()); + + let query = Query { + limit: None, + from: None, + status, + kind, + index_uid, + uid, + before_enqueued_at, + after_enqueued_at, + before_started_at, + after_started_at, + before_finished_at, + after_finished_at, + }; + + if query.is_empty() { + return Err(index_scheduler::Error::TaskCancelationWithEmptyQuery.into()); + } + + let filtered_query = filter_out_inaccessible_indexes_from_query(&index_scheduler, &query); + let tasks = index_scheduler.get_task_ids(&filtered_query)?; + let task_cancelation = + KindWithContent::TaskCancelation { query: req.query_string().to_string(), tasks }; + + let task = task::spawn_blocking(move || index_scheduler.register(task_cancelation)).await??; + let task: SummarizedTaskView = task.into(); + + Ok(HttpResponse::Ok().json(task)) +} + +async fn delete_tasks( + index_scheduler: GuardedData, Data>, + req: HttpRequest, + params: web::Query, +) -> Result { + let TaskDeletionQuery { + kind: type_, + uid, + status, + index_uid, + dates: + TaskDateQuery { + after_enqueued_at, + before_enqueued_at, + after_started_at, + before_started_at, + after_finished_at, + before_finished_at, + }, + } = params.into_inner(); + + let kind: Option> = type_.map(|x| x.into_iter().collect()); + let uid: Option> = uid.map(|x| x.into_iter().collect()); + let status: Option> = status.map(|x| x.into_iter().collect()); + let index_uid: Option> = + index_uid.map(|x| x.into_iter().map(|x| x.to_string()).collect()); + + let query = Query { + limit: None, + from: None, + status, + kind, + index_uid, + uid, + after_enqueued_at, + before_enqueued_at, + after_started_at, + before_started_at, + after_finished_at, + before_finished_at, + }; + + if query.is_empty() { + return Err(index_scheduler::Error::TaskDeletionWithEmptyQuery.into()); + } + + let filtered_query = filter_out_inaccessible_indexes_from_query(&index_scheduler, &query); + let tasks = index_scheduler.get_task_ids(&filtered_query)?; + let task_deletion = + KindWithContent::TaskDeletion { query: req.query_string().to_string(), tasks }; + + let task = task::spawn_blocking(move || index_scheduler.register(task_deletion)).await??; + let task: SummarizedTaskView = task.into(); + + Ok(HttpResponse::Ok().json(task)) +} + +#[derive(Debug, Serialize)] +pub struct AllTasks { + results: Vec, + limit: u32, + from: Option, + next: Option, } async fn get_tasks( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, params: web::Query, req: HttpRequest, analytics: web::Data, ) -> Result { let TasksFilterQuery { - type_, + kind, + uid, status, index_uid, limit, from, + dates: + TaskDateQuery { + after_enqueued_at, + before_enqueued_at, + after_started_at, + before_started_at, + after_finished_at, + before_finished_at, + }, } = params.into_inner(); - let search_rules = &meilisearch.filters().search_rules; - // We first transform a potential indexUid=* into a "not specified indexUid filter" // for every one of the filters: type, status, and indexUid. - let type_: Option> = type_.and_then(fold_star_or); + let kind: Option> = kind.and_then(fold_star_or); + let uid: Option> = uid.map(|x| x.into_iter().collect()); let status: Option> = status.and_then(fold_star_or); let index_uid: Option> = index_uid.and_then(fold_star_or); @@ -86,75 +402,37 @@ async fn get_tasks( "Tasks Seen".to_string(), json!({ "filtered_by_index_uid": index_uid.as_ref().map_or(false, |v| !v.is_empty()), - "filtered_by_type": type_.as_ref().map_or(false, |v| !v.is_empty()), + "filtered_by_type": kind.as_ref().map_or(false, |v| !v.is_empty()), "filtered_by_status": status.as_ref().map_or(false, |v| !v.is_empty()), }), Some(&req), ); - // Then we filter on potential indexes and make sure that the search filter - // restrictions are also applied. - let indexes_filters = match index_uid { - Some(indexes) => { - let mut filters = TaskFilter::default(); - for name in indexes { - if search_rules.is_index_authorized(&name) { - filters.filter_index(name.to_string()); - } - } - Some(filters) - } - None => { - if search_rules.is_index_authorized("*") { - None - } else { - let mut filters = TaskFilter::default(); - for (index, _policy) in search_rules.clone() { - filters.filter_index(index); - } - Some(filters) - } - } - }; - - // Then we complete the task filter with other potential status and types filters. - let filters = if type_.is_some() || status.is_some() { - let mut filters = indexes_filters.unwrap_or_default(); - filters.filter_fn(Box::new(move |task| { - let matches_type = match &type_ { - Some(types) => types - .iter() - .any(|t| task_type_matches_content(t, &task.content)), - None => true, - }; - - let matches_status = match &status { - Some(statuses) => statuses - .iter() - .any(|t| task_status_matches_events(t, &task.events)), - None => true, - }; - - matches_type && matches_status - })); - Some(filters) - } else { - indexes_filters - }; - // We +1 just to know if there is more after this "page" or not. let limit = limit.saturating_add(1); - let mut tasks_results: Vec<_> = meilisearch - .list_tasks(filters, Some(limit), from) - .await? - .into_iter() - .map(TaskView::from) - .collect(); + let query = index_scheduler::Query { + limit: Some(limit), + from, + status, + kind, + index_uid, + uid, + before_enqueued_at, + after_enqueued_at, + before_started_at, + after_started_at, + before_finished_at, + after_finished_at, + }; + let query = filter_out_inaccessible_indexes_from_query(&index_scheduler, &query); + + let mut tasks_results: Vec = + index_scheduler.get_tasks(query)?.into_iter().map(|t| TaskView::from_task(&t)).collect(); // If we were able to fetch the number +1 tasks we asked // it means that there is more to come. - let next = if tasks_results.len() == limit { + let next = if tasks_results.len() == limit as usize { tasks_results.pop().map(|t| t.uid) } else { None @@ -162,43 +440,285 @@ async fn get_tasks( let from = tasks_results.first().map(|t| t.uid); - let tasks = TaskListView { - results: tasks_results, - limit: limit.saturating_sub(1), - from, - next, - }; - + let tasks = AllTasks { results: tasks_results, limit: limit.saturating_sub(1), from, next }; Ok(HttpResponse::Ok().json(tasks)) } async fn get_task( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, task_id: web::Path, req: HttpRequest, analytics: web::Data, ) -> Result { - analytics.publish( - "Tasks Seen".to_string(), - json!({ "per_task_uid": true }), - Some(&req), - ); + let task_id = task_id.into_inner(); - let search_rules = &meilisearch.filters().search_rules; - let filters = if search_rules.is_index_authorized("*") { - None - } else { - let mut filters = TaskFilter::default(); + analytics.publish("Tasks Seen".to_string(), json!({ "per_task_uid": true }), Some(&req)); + + let search_rules = &index_scheduler.filters().search_rules; + let mut filters = index_scheduler::Query::default(); + if !search_rules.is_index_authorized("*") { for (index, _policy) in search_rules.clone() { - filters.filter_index(index); + filters = filters.with_index(index); + } + } + + filters.uid = Some(vec![task_id]); + + if let Some(task) = index_scheduler.get_tasks(filters)?.first() { + let task_view = TaskView::from_task(task); + Ok(HttpResponse::Ok().json(task_view)) + } else { + Err(index_scheduler::Error::TaskNotFound(task_id).into()) + } +} + +fn filter_out_inaccessible_indexes_from_query( + index_scheduler: &GuardedData, Data>, + query: &Query, +) -> Query { + let mut query = query.clone(); + + // First remove all indexes from the query, we will add them back later + let indexes = query.index_uid.take(); + + let search_rules = &index_scheduler.filters().search_rules; + + // We filter on potential indexes and make sure that the search filter + // restrictions are also applied. + match indexes { + Some(indexes) => { + for name in indexes.iter() { + if search_rules.is_index_authorized(name) { + query = query.with_index(name.to_string()); + } + } + } + None => { + if !search_rules.is_index_authorized("*") { + for (index, _policy) in search_rules.clone() { + query = query.with_index(index.to_string()); + } + } } - Some(filters) }; - let task: TaskView = meilisearch - .get_task(task_id.into_inner(), filters) - .await? - .into(); - - Ok(HttpResponse::Ok().json(task)) + query +} + +pub(crate) mod date_deserializer { + use time::format_description::well_known::Rfc3339; + use time::macros::format_description; + use time::{Date, Duration, OffsetDateTime, Time}; + + enum DeserializeDateOption { + Before, + After, + } + + fn deserialize_date( + value: &str, + option: DeserializeDateOption, + ) -> std::result::Result { + // We can't parse using time's rfc3339 format, since then we won't know what part of the + // datetime was not explicitly specified, and thus we won't be able to increment it to the + // next step. + if let Ok(datetime) = OffsetDateTime::parse(value, &Rfc3339) { + // fully specified up to the second + // we assume that the subseconds are 0 if not specified, and we don't increment to the next second + Ok(datetime) + } else if let Ok(datetime) = Date::parse( + value, + format_description!("[year repr:full base:calendar]-[month repr:numerical]-[day]"), + ) { + let datetime = datetime.with_time(Time::MIDNIGHT).assume_utc(); + // add one day since the time was not specified + match option { + DeserializeDateOption::Before => Ok(datetime), + DeserializeDateOption::After => { + let datetime = datetime + .checked_add(Duration::days(1)) + .ok_or_else(|| serde::de::Error::custom("date overflow"))?; + Ok(datetime) + } + } + } else { + Err(serde::de::Error::custom( + "could not parse a date with the RFC3339 or YYYY-MM-DD format", + )) + } + } + + /// Deserialize an upper bound datetime with RFC3339 or YYYY-MM-DD. + pub(crate) mod before { + use serde::Deserializer; + use time::OffsetDateTime; + + use super::{deserialize_date, DeserializeDateOption}; + + /// Deserialize an [`Option`] from its ISO 8601 representation. + pub fn deserialize<'a, D: Deserializer<'a>>( + deserializer: D, + ) -> Result, D::Error> { + deserializer.deserialize_option(Visitor) + } + + struct Visitor; + + #[derive(Debug)] + struct DeserializeError; + + impl<'a> serde::de::Visitor<'a> for Visitor { + type Value = Option; + + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + formatter.write_str( + "an optional date written as a string with the RFC3339 or YYYY-MM-DD format", + ) + } + + fn visit_str( + self, + value: &str, + ) -> Result, E> { + deserialize_date(value, DeserializeDateOption::Before).map(Some) + } + + fn visit_some>( + self, + deserializer: D, + ) -> Result, D::Error> { + deserializer.deserialize_str(Visitor) + } + + fn visit_none(self) -> Result, E> { + Ok(None) + } + + fn visit_unit(self) -> Result { + Ok(None) + } + } + } + /// Deserialize a lower bound datetime with RFC3339 or YYYY-MM-DD. + /// + /// If YYYY-MM-DD is used, the day is incremented by one. + pub(crate) mod after { + use serde::Deserializer; + use time::OffsetDateTime; + + use super::{deserialize_date, DeserializeDateOption}; + + /// Deserialize an [`Option`] from its ISO 8601 representation. + pub fn deserialize<'a, D: Deserializer<'a>>( + deserializer: D, + ) -> Result, D::Error> { + deserializer.deserialize_option(Visitor) + } + + struct Visitor; + + #[derive(Debug)] + struct DeserializeError; + + impl<'a> serde::de::Visitor<'a> for Visitor { + type Value = Option; + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + formatter.write_str( + "an optional date written as a string with the RFC3339 or YYYY-MM-DD format", + ) + } + + fn visit_str( + self, + value: &str, + ) -> Result, E> { + deserialize_date(value, DeserializeDateOption::After).map(Some) + } + + fn visit_some>( + self, + deserializer: D, + ) -> Result, D::Error> { + deserializer.deserialize_str(Visitor) + } + + fn visit_none(self) -> Result, E> { + Ok(None) + } + + fn visit_unit(self) -> Result { + Ok(None) + } + } + } +} + +#[cfg(test)] +mod tests { + use meili_snap::snapshot; + + use crate::routes::tasks::TaskDeletionQuery; + + #[test] + fn deserialize_task_deletion_query_datetime() { + { + let json = r#" { + "afterEnqueuedAt": "2021-12-03", + "beforeEnqueuedAt": "2021-12-03", + "afterStartedAt": "2021-12-03", + "beforeStartedAt": "2021-12-03", + "afterFinishedAt": "2021-12-03", + "beforeFinishedAt": "2021-12-03" + } "#; + let query = serde_json::from_str::(json).unwrap(); + snapshot!(format!("{:?}", query.dates.after_enqueued_at.unwrap()), @"2021-12-04 0:00:00.0 +00:00:00"); + snapshot!(format!("{:?}", query.dates.before_enqueued_at.unwrap()), @"2021-12-03 0:00:00.0 +00:00:00"); + snapshot!(format!("{:?}", query.dates.after_started_at.unwrap()), @"2021-12-04 0:00:00.0 +00:00:00"); + snapshot!(format!("{:?}", query.dates.before_started_at.unwrap()), @"2021-12-03 0:00:00.0 +00:00:00"); + snapshot!(format!("{:?}", query.dates.after_finished_at.unwrap()), @"2021-12-04 0:00:00.0 +00:00:00"); + snapshot!(format!("{:?}", query.dates.before_finished_at.unwrap()), @"2021-12-03 0:00:00.0 +00:00:00"); + } + { + let json = r#" { "afterEnqueuedAt": "2021-12-03T23:45:23Z", "beforeEnqueuedAt": "2021-12-03T23:45:23Z" } "#; + let query = serde_json::from_str::(json).unwrap(); + snapshot!(format!("{:?}", query.dates.after_enqueued_at.unwrap()), @"2021-12-03 23:45:23.0 +00:00:00"); + snapshot!(format!("{:?}", query.dates.before_enqueued_at.unwrap()), @"2021-12-03 23:45:23.0 +00:00:00"); + } + { + let json = r#" { "afterEnqueuedAt": "1997-11-12T09:55:06-06:20" } "#; + let query = serde_json::from_str::(json).unwrap(); + snapshot!(format!("{:?}", query.dates.after_enqueued_at.unwrap()), @"1997-11-12 9:55:06.0 -06:20:00"); + } + { + let json = r#" { "afterEnqueuedAt": "1997-11-12T09:55:06+00:00" } "#; + let query = serde_json::from_str::(json).unwrap(); + snapshot!(format!("{:?}", query.dates.after_enqueued_at.unwrap()), @"1997-11-12 9:55:06.0 +00:00:00"); + } + { + let json = r#" { "afterEnqueuedAt": "1997-11-12T09:55:06.200000300Z" } "#; + let query = serde_json::from_str::(json).unwrap(); + snapshot!(format!("{:?}", query.dates.after_enqueued_at.unwrap()), @"1997-11-12 9:55:06.2000003 +00:00:00"); + } + { + let json = r#" { "afterEnqueuedAt": "2021" } "#; + let err = serde_json::from_str::(json).unwrap_err(); + snapshot!(format!("{err}"), @"could not parse a date with the RFC3339 or YYYY-MM-DD format at line 1 column 30"); + } + { + let json = r#" { "afterEnqueuedAt": "2021-12" } "#; + let err = serde_json::from_str::(json).unwrap_err(); + snapshot!(format!("{err}"), @"could not parse a date with the RFC3339 or YYYY-MM-DD format at line 1 column 33"); + } + + { + let json = r#" { "afterEnqueuedAt": "2021-12-03T23" } "#; + let err = serde_json::from_str::(json).unwrap_err(); + snapshot!(format!("{err}"), @"could not parse a date with the RFC3339 or YYYY-MM-DD format at line 1 column 39"); + } + { + let json = r#" { "afterEnqueuedAt": "2021-12-03T23:45" } "#; + let err = serde_json::from_str::(json).unwrap_err(); + snapshot!(format!("{err}"), @"could not parse a date with the RFC3339 or YYYY-MM-DD format at line 1 column 42"); + } + } } diff --git a/meilisearch-lib/src/index/search.rs b/meilisearch-http/src/search.rs similarity index 60% rename from meilisearch-lib/src/index/search.rs rename to meilisearch-http/src/search.rs index 558a530c0..7310e7914 100644 --- a/meilisearch-lib/src/index/search.rs +++ b/meilisearch-http/src/search.rs @@ -4,21 +4,19 @@ use std::str::FromStr; use std::time::Instant; use either::Either; +use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; +use meilisearch_types::{milli, Document}; use milli::tokenizer::TokenizerBuilder; use milli::{ - AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, MatchBounds, MatcherBuilder, SortError, - TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, + AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, MatchBounds, MatcherBuilder, + SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, }; use regex::Regex; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; -use crate::index::error::FacetError; +use crate::error::MeilisearchHttpError; -use super::error::{IndexError, Result}; -use super::index::Index; - -pub type Document = serde_json::Map; type MatchesPosition = BTreeMap>; pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0; @@ -28,10 +26,6 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string(); pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "".to_string(); pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "".to_string(); -/// The maximum number of results that the engine -/// will be able to return in one search call. -pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000; - #[derive(Deserialize, Debug, Clone, PartialEq, Eq)] #[serde(rename_all = "camelCase", deny_unknown_fields)] pub struct SearchQuery { @@ -93,7 +87,7 @@ impl From for TermsMatchingStrategy { } } -#[derive(Debug, Clone, Serialize, PartialEq)] +#[derive(Debug, Clone, Serialize, PartialEq, Eq)] pub struct SearchHit { #[serde(flatten)] pub document: Document, @@ -103,7 +97,7 @@ pub struct SearchHit { pub matches_position: Option, } -#[derive(Serialize, Debug, Clone, PartialEq)] +#[derive(Serialize, Debug, Clone, PartialEq, Eq)] #[serde(rename_all = "camelCase")] pub struct SearchResult { pub hits: Vec, @@ -119,229 +113,210 @@ pub struct SearchResult { #[serde(untagged)] pub enum HitsInfo { #[serde(rename_all = "camelCase")] - Pagination { - hits_per_page: usize, - page: usize, - total_pages: usize, - total_hits: usize, - }, + Pagination { hits_per_page: usize, page: usize, total_pages: usize, total_hits: usize }, #[serde(rename_all = "camelCase")] - OffsetLimit { - limit: usize, - offset: usize, - estimated_total_hits: usize, - }, + OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize }, } -impl Index { - pub fn perform_search(&self, query: SearchQuery) -> Result { - let before_search = Instant::now(); - let rtxn = self.read_txn()?; +pub fn perform_search( + index: &Index, + query: SearchQuery, +) -> Result { + let before_search = Instant::now(); + let rtxn = index.read_txn()?; - let mut search = self.search(&rtxn); + let mut search = index.search(&rtxn); - if let Some(ref query) = query.q { - search.query(query); - } - - let is_finite_pagination = query.is_finite_pagination(); - search.terms_matching_strategy(query.matching_strategy.into()); - - let max_total_hits = self - .pagination_max_total_hits(&rtxn)? - .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); - - search.exhaustive_number_hits(is_finite_pagination); - - // compute the offset on the limit depending on the pagination mode. - let (offset, limit) = if is_finite_pagination { - let limit = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT); - let page = query.page.unwrap_or(1); - - // page 0 gives a limit of 0 forcing Meilisearch to return no document. - page.checked_sub(1).map_or((0, 0), |p| (limit * p, limit)) - } else { - (query.offset, query.limit) - }; - - // Make sure that a user can't get more documents than the hard limit, - // we align that on the offset too. - let offset = min(offset, max_total_hits); - let limit = min(limit, max_total_hits.saturating_sub(offset)); - - search.offset(offset); - search.limit(limit); - - if let Some(ref filter) = query.filter { - if let Some(facets) = parse_filter(filter)? { - search.filter(facets); - } - } - - if let Some(ref sort) = query.sort { - let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() { - Ok(sorts) => sorts, - Err(asc_desc_error) => { - return Err(IndexError::Milli(SortError::from(asc_desc_error).into())) - } - }; - - search.sort_criteria(sort); - } - - let milli::SearchResult { - documents_ids, - matching_words, - candidates, - .. - } = search.execute()?; - - let fields_ids_map = self.fields_ids_map(&rtxn).unwrap(); - - let displayed_ids = self - .displayed_fields_ids(&rtxn)? - .map(|fields| fields.into_iter().collect::>()) - .unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect()); - - let fids = |attrs: &BTreeSet| { - let mut ids = BTreeSet::new(); - for attr in attrs { - if attr == "*" { - ids = displayed_ids.clone(); - break; - } - - if let Some(id) = fields_ids_map.id(attr) { - ids.insert(id); - } - } - ids - }; - - // The attributes to retrieve are the ones explicitly marked as to retrieve (all by default), - // but these attributes must be also be present - // - in the fields_ids_map - // - in the the displayed attributes - let to_retrieve_ids: BTreeSet<_> = query - .attributes_to_retrieve - .as_ref() - .map(fids) - .unwrap_or_else(|| displayed_ids.clone()) - .intersection(&displayed_ids) - .cloned() - .collect(); - - let attr_to_highlight = query.attributes_to_highlight.unwrap_or_default(); - - let attr_to_crop = query.attributes_to_crop.unwrap_or_default(); - - // Attributes in `formatted_options` correspond to the attributes that will be in `_formatted` - // These attributes are: - // - the attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`) - // - the attributes asked to be retrieved: these attributes will not be highlighted/cropped - // But these attributes must be also present in displayed attributes - let formatted_options = compute_formatted_options( - &attr_to_highlight, - &attr_to_crop, - query.crop_length, - &to_retrieve_ids, - &fields_ids_map, - &displayed_ids, - ); - - let tokenizer = TokenizerBuilder::default().build(); - - let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer); - formatter_builder.crop_marker(query.crop_marker); - formatter_builder.highlight_prefix(query.highlight_pre_tag); - formatter_builder.highlight_suffix(query.highlight_post_tag); - - let mut documents = Vec::new(); - - let documents_iter = self.documents(&rtxn, documents_ids)?; - - for (_id, obkv) in documents_iter { - // First generate a document with all the displayed fields - let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?; - - // select the attributes to retrieve - let attributes_to_retrieve = to_retrieve_ids - .iter() - .map(|&fid| fields_ids_map.name(fid).expect("Missing field name")); - let mut document = - permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve); - - let (matches_position, formatted) = format_fields( - &displayed_document, - &fields_ids_map, - &formatter_builder, - &formatted_options, - query.show_matches_position, - &displayed_ids, - )?; - - if let Some(sort) = query.sort.as_ref() { - insert_geo_distance(sort, &mut document); - } - - let hit = SearchHit { - document, - formatted, - matches_position, - }; - documents.push(hit); - } - - let number_of_hits = min(candidates.len() as usize, max_total_hits); - let hits_info = if is_finite_pagination { - let hits_per_page = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT); - // If hit_per_page is 0, then pages can't be computed and so we respond 0. - let total_pages = (number_of_hits + hits_per_page.saturating_sub(1)) - .checked_div(hits_per_page) - .unwrap_or(0); - - HitsInfo::Pagination { - hits_per_page, - page: query.page.unwrap_or(1), - total_pages, - total_hits: number_of_hits, - } - } else { - HitsInfo::OffsetLimit { - limit: query.limit, - offset, - estimated_total_hits: number_of_hits, - } - }; - - let facet_distribution = match query.facets { - Some(ref fields) => { - let mut facet_distribution = self.facets_distribution(&rtxn); - - let max_values_by_facet = self - .max_values_per_facet(&rtxn)? - .unwrap_or(DEFAULT_VALUES_PER_FACET); - facet_distribution.max_values_per_facet(max_values_by_facet); - - if fields.iter().all(|f| f != "*") { - facet_distribution.facets(fields); - } - let distribution = facet_distribution.candidates(candidates).execute()?; - - Some(distribution) - } - None => None, - }; - - let result = SearchResult { - hits: documents, - hits_info, - query: query.q.clone().unwrap_or_default(), - processing_time_ms: before_search.elapsed().as_millis(), - facet_distribution, - }; - Ok(result) + if let Some(ref query) = query.q { + search.query(query); } + + let is_finite_pagination = query.is_finite_pagination(); + search.terms_matching_strategy(query.matching_strategy.into()); + + let max_total_hits = index + .pagination_max_total_hits(&rtxn) + .map_err(milli::Error::from)? + .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); + + search.exhaustive_number_hits(is_finite_pagination); + + // compute the offset on the limit depending on the pagination mode. + let (offset, limit) = if is_finite_pagination { + let limit = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT); + let page = query.page.unwrap_or(1); + + // page 0 gives a limit of 0 forcing Meilisearch to return no document. + page.checked_sub(1).map_or((0, 0), |p| (limit * p, limit)) + } else { + (query.offset, query.limit) + }; + + // Make sure that a user can't get more documents than the hard limit, + // we align that on the offset too. + let offset = min(offset, max_total_hits); + let limit = min(limit, max_total_hits.saturating_sub(offset)); + + search.offset(offset); + search.limit(limit); + + if let Some(ref filter) = query.filter { + if let Some(facets) = parse_filter(filter)? { + search.filter(facets); + } + } + + if let Some(ref sort) = query.sort { + let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() { + Ok(sorts) => sorts, + Err(asc_desc_error) => { + return Err(milli::Error::from(SortError::from(asc_desc_error)).into()) + } + }; + + search.sort_criteria(sort); + } + + let milli::SearchResult { documents_ids, matching_words, candidates, .. } = search.execute()?; + + let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + + let displayed_ids = index + .displayed_fields_ids(&rtxn)? + .map(|fields| fields.into_iter().collect::>()) + .unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect()); + + let fids = |attrs: &BTreeSet| { + let mut ids = BTreeSet::new(); + for attr in attrs { + if attr == "*" { + ids = displayed_ids.clone(); + break; + } + + if let Some(id) = fields_ids_map.id(attr) { + ids.insert(id); + } + } + ids + }; + + // The attributes to retrieve are the ones explicitly marked as to retrieve (all by default), + // but these attributes must be also be present + // - in the fields_ids_map + // - in the the displayed attributes + let to_retrieve_ids: BTreeSet<_> = query + .attributes_to_retrieve + .as_ref() + .map(fids) + .unwrap_or_else(|| displayed_ids.clone()) + .intersection(&displayed_ids) + .cloned() + .collect(); + + let attr_to_highlight = query.attributes_to_highlight.unwrap_or_default(); + + let attr_to_crop = query.attributes_to_crop.unwrap_or_default(); + + // Attributes in `formatted_options` correspond to the attributes that will be in `_formatted` + // These attributes are: + // - the attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`) + // - the attributes asked to be retrieved: these attributes will not be highlighted/cropped + // But these attributes must be also present in displayed attributes + let formatted_options = compute_formatted_options( + &attr_to_highlight, + &attr_to_crop, + query.crop_length, + &to_retrieve_ids, + &fields_ids_map, + &displayed_ids, + ); + + let tokenizer = TokenizerBuilder::default().build(); + + let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer); + formatter_builder.crop_marker(query.crop_marker); + formatter_builder.highlight_prefix(query.highlight_pre_tag); + formatter_builder.highlight_suffix(query.highlight_post_tag); + + let mut documents = Vec::new(); + + let documents_iter = index.documents(&rtxn, documents_ids)?; + + for (_id, obkv) in documents_iter { + // First generate a document with all the displayed fields + let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?; + + // select the attributes to retrieve + let attributes_to_retrieve = to_retrieve_ids + .iter() + .map(|&fid| fields_ids_map.name(fid).expect("Missing field name")); + let mut document = + permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve); + + let (matches_position, formatted) = format_fields( + &displayed_document, + &fields_ids_map, + &formatter_builder, + &formatted_options, + query.show_matches_position, + &displayed_ids, + )?; + + if let Some(sort) = query.sort.as_ref() { + insert_geo_distance(sort, &mut document); + } + + let hit = SearchHit { document, formatted, matches_position }; + documents.push(hit); + } + + let number_of_hits = min(candidates.len() as usize, max_total_hits); + let hits_info = if is_finite_pagination { + let hits_per_page = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT); + // If hit_per_page is 0, then pages can't be computed and so we respond 0. + let total_pages = (number_of_hits + hits_per_page.saturating_sub(1)) + .checked_div(hits_per_page) + .unwrap_or(0); + + HitsInfo::Pagination { + hits_per_page, + page: query.page.unwrap_or(1), + total_pages, + total_hits: number_of_hits, + } + } else { + HitsInfo::OffsetLimit { limit: query.limit, offset, estimated_total_hits: number_of_hits } + }; + + let facet_distribution = match query.facets { + Some(ref fields) => { + let mut facet_distribution = index.facets_distribution(&rtxn); + + let max_values_by_facet = index + .max_values_per_facet(&rtxn) + .map_err(milli::Error::from)? + .unwrap_or(DEFAULT_VALUES_PER_FACET); + facet_distribution.max_values_per_facet(max_values_by_facet); + + if fields.iter().all(|f| f != "*") { + facet_distribution.facets(fields); + } + let distribution = facet_distribution.candidates(candidates).execute()?; + + Some(distribution) + } + None => None, + }; + + let result = SearchResult { + hits: documents, + hits_info, + query: query.q.clone().unwrap_or_default(), + processing_time_ms: before_search.elapsed().as_millis(), + facet_distribution, + }; + Ok(result) } fn insert_geo_distance(sorts: &[String], document: &mut Document) { @@ -351,10 +326,7 @@ fn insert_geo_distance(sorts: &[String], document: &mut Document) { }; if let Some(capture_group) = sorts.iter().find_map(|sort| GEO_REGEX.captures(sort)) { // TODO: TAMO: milli encountered an internal error, what do we want to do? - let base = [ - capture_group[1].parse().unwrap(), - capture_group[2].parse().unwrap(), - ]; + let base = [capture_group[1].parse().unwrap(), capture_group[2].parse().unwrap()]; let geo_point = &document.get("_geo").unwrap_or(&json!(null)); if let Some((lat, lng)) = geo_point["lat"].as_f64().zip(geo_point["lng"].as_f64()) { let distance = milli::distance_between_two_points(&base, &[lat, lng]); @@ -403,10 +375,7 @@ fn add_highlight_to_formatted_options( displayed_ids: &BTreeSet, ) { for attr in attr_to_highlight { - let new_format = FormatOptions { - highlight: true, - crop: None, - }; + let new_format = FormatOptions { highlight: true, crop: None }; if attr == "*" { for id in displayed_ids { @@ -445,10 +414,7 @@ fn add_crop_to_formatted_options( formatted_options .entry(*id) .and_modify(|f| f.crop = Some(attr_len)) - .or_insert(FormatOptions { - highlight: false, - crop: Some(attr_len), - }); + .or_insert(FormatOptions { highlight: false, crop: Some(attr_len) }); } } @@ -457,10 +423,7 @@ fn add_crop_to_formatted_options( formatted_options .entry(id) .and_modify(|f| f.crop = Some(attr_len)) - .or_insert(FormatOptions { - highlight: false, - crop: Some(attr_len), - }); + .or_insert(FormatOptions { highlight: false, crop: Some(attr_len) }); } } } @@ -471,10 +434,7 @@ fn add_non_formatted_ids_to_formatted_options( to_retrieve_ids: &BTreeSet, ) { for id in to_retrieve_ids { - formatted_options.entry(*id).or_insert(FormatOptions { - highlight: false, - crop: None, - }); + formatted_options.entry(*id).or_insert(FormatOptions { highlight: false, crop: None }); } } @@ -482,16 +442,13 @@ fn make_document( displayed_attributes: &BTreeSet, field_ids_map: &FieldsIdsMap, obkv: obkv::KvReaderU16, -) -> Result { +) -> Result { let mut document = serde_json::Map::new(); // recreate the original json for (key, value) in obkv.iter() { let value = serde_json::from_slice(value)?; - let key = field_ids_map - .name(key) - .expect("Missing field name") - .to_string(); + let key = field_ids_map.name(key).expect("Missing field name").to_string(); document.insert(key, value); } @@ -512,14 +469,13 @@ fn format_fields<'a, A: AsRef<[u8]>>( formatted_options: &BTreeMap, compute_matches: bool, displayable_ids: &BTreeSet, -) -> Result<(Option, Document)> { +) -> Result<(Option, Document), MeilisearchHttpError> { let mut matches_position = compute_matches.then(BTreeMap::new); let mut document = document.clone(); // select the attributes to retrieve - let displayable_names = displayable_ids - .iter() - .map(|&fid| field_ids_map.name(fid).expect("Missing field name")); + let displayable_names = + displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name")); permissive_json_pointer::map_leaf_values(&mut document, displayable_names, |key, value| { // To get the formatting option of each key we need to see all the rules that applies // to the value and merge them together. eg. If a user said he wanted to highlight `doggo` @@ -535,13 +491,7 @@ fn format_fields<'a, A: AsRef<[u8]>>( .reduce(|acc, option| acc.merge(option)); let mut infos = Vec::new(); - *value = format_value( - std::mem::take(value), - builder, - format, - &mut infos, - compute_matches, - ); + *value = format_value(std::mem::take(value), builder, format, &mut infos, compute_matches); if let Some(matches) = matches_position.as_mut() { if !infos.is_empty() { @@ -641,18 +591,18 @@ fn format_value<'a, A: AsRef<[u8]>>( } } -fn parse_filter(facets: &Value) -> Result> { +fn parse_filter(facets: &Value) -> Result, MeilisearchHttpError> { match facets { Value::String(expr) => { let condition = Filter::from_str(expr)?; Ok(condition) } Value::Array(arr) => parse_filter_array(arr), - v => Err(FacetError::InvalidExpression(&["Array"], v.clone()).into()), + v => Err(MeilisearchHttpError::InvalidExpression(&["Array"], v.clone())), } } -fn parse_filter_array(arr: &[Value]) -> Result> { +fn parse_filter_array(arr: &[Value]) -> Result, MeilisearchHttpError> { let mut ands = Vec::new(); for value in arr { match value { @@ -663,16 +613,20 @@ fn parse_filter_array(arr: &[Value]) -> Result> { match value { Value::String(s) => ors.push(s.as_str()), v => { - return Err(FacetError::InvalidExpression(&["String"], v.clone()).into()) + return Err(MeilisearchHttpError::InvalidExpression( + &["String"], + v.clone(), + )) } } } ands.push(Either::Left(ors)); } v => { - return Err( - FacetError::InvalidExpression(&["String", "[String]"], v.clone()).into(), - ) + return Err(MeilisearchHttpError::InvalidExpression( + &["String", "[String]"], + v.clone(), + )) } } } diff --git a/meilisearch-http/src/task.rs b/meilisearch-http/src/task.rs deleted file mode 100644 index 786d318f8..000000000 --- a/meilisearch-http/src/task.rs +++ /dev/null @@ -1,434 +0,0 @@ -use std::error::Error; -use std::fmt::{self, Write}; -use std::str::FromStr; -use std::write; - -use meilisearch_lib::index::{Settings, Unchecked}; -use meilisearch_lib::tasks::task::{ - DocumentDeletion, Task, TaskContent, TaskEvent, TaskId, TaskResult, -}; -use meilisearch_types::error::ResponseError; -use serde::{Deserialize, Serialize, Serializer}; -use time::{Duration, OffsetDateTime}; - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub enum TaskType { - IndexCreation, - IndexUpdate, - IndexDeletion, - DocumentAdditionOrUpdate, - DocumentDeletion, - SettingsUpdate, - DumpCreation, -} - -impl From for TaskType { - fn from(other: TaskContent) -> Self { - match other { - TaskContent::IndexCreation { .. } => TaskType::IndexCreation, - TaskContent::IndexUpdate { .. } => TaskType::IndexUpdate, - TaskContent::IndexDeletion { .. } => TaskType::IndexDeletion, - TaskContent::DocumentAddition { .. } => TaskType::DocumentAdditionOrUpdate, - TaskContent::DocumentDeletion { .. } => TaskType::DocumentDeletion, - TaskContent::SettingsUpdate { .. } => TaskType::SettingsUpdate, - TaskContent::Dump { .. } => TaskType::DumpCreation, - } - } -} - -#[derive(Debug)] -pub struct TaskTypeError { - invalid_type: String, -} - -impl fmt::Display for TaskTypeError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "invalid task type `{}`, expecting one of: \ - indexCreation, indexUpdate, indexDeletion, documentAdditionOrUpdate, \ - documentDeletion, settingsUpdate, dumpCreation", - self.invalid_type - ) - } -} - -impl Error for TaskTypeError {} - -impl FromStr for TaskType { - type Err = TaskTypeError; - - fn from_str(type_: &str) -> Result { - if type_.eq_ignore_ascii_case("indexCreation") { - Ok(TaskType::IndexCreation) - } else if type_.eq_ignore_ascii_case("indexUpdate") { - Ok(TaskType::IndexUpdate) - } else if type_.eq_ignore_ascii_case("indexDeletion") { - Ok(TaskType::IndexDeletion) - } else if type_.eq_ignore_ascii_case("documentAdditionOrUpdate") { - Ok(TaskType::DocumentAdditionOrUpdate) - } else if type_.eq_ignore_ascii_case("documentDeletion") { - Ok(TaskType::DocumentDeletion) - } else if type_.eq_ignore_ascii_case("settingsUpdate") { - Ok(TaskType::SettingsUpdate) - } else if type_.eq_ignore_ascii_case("dumpCreation") { - Ok(TaskType::DumpCreation) - } else { - Err(TaskTypeError { - invalid_type: type_.to_string(), - }) - } - } -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub enum TaskStatus { - Enqueued, - Processing, - Succeeded, - Failed, -} - -#[derive(Debug)] -pub struct TaskStatusError { - invalid_status: String, -} - -impl fmt::Display for TaskStatusError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "invalid task status `{}`, expecting one of: \ - enqueued, processing, succeeded, or failed", - self.invalid_status, - ) - } -} - -impl Error for TaskStatusError {} - -impl FromStr for TaskStatus { - type Err = TaskStatusError; - - fn from_str(status: &str) -> Result { - if status.eq_ignore_ascii_case("enqueued") { - Ok(TaskStatus::Enqueued) - } else if status.eq_ignore_ascii_case("processing") { - Ok(TaskStatus::Processing) - } else if status.eq_ignore_ascii_case("succeeded") { - Ok(TaskStatus::Succeeded) - } else if status.eq_ignore_ascii_case("failed") { - Ok(TaskStatus::Failed) - } else { - Err(TaskStatusError { - invalid_status: status.to_string(), - }) - } - } -} - -#[derive(Debug, Serialize)] -#[serde(untagged)] -#[allow(clippy::large_enum_variant)] -enum TaskDetails { - #[serde(rename_all = "camelCase")] - DocumentAddition { - received_documents: usize, - indexed_documents: Option, - }, - #[serde(rename_all = "camelCase")] - Settings { - #[serde(flatten)] - settings: Settings, - }, - #[serde(rename_all = "camelCase")] - IndexInfo { primary_key: Option }, - #[serde(rename_all = "camelCase")] - DocumentDeletion { - matched_documents: usize, - deleted_documents: Option, - }, - #[serde(rename_all = "camelCase")] - ClearAll { deleted_documents: Option }, - #[serde(rename_all = "camelCase")] - Dump { dump_uid: String }, -} - -/// Serialize a `time::Duration` as a best effort ISO 8601 while waiting for -/// https://github.com/time-rs/time/issues/378. -/// This code is a port of the old code of time that was removed in 0.2. -fn serialize_duration( - duration: &Option, - serializer: S, -) -> Result { - match duration { - Some(duration) => { - // technically speaking, negative duration is not valid ISO 8601 - if duration.is_negative() { - return serializer.serialize_none(); - } - - const SECS_PER_DAY: i64 = Duration::DAY.whole_seconds(); - let secs = duration.whole_seconds(); - let days = secs / SECS_PER_DAY; - let secs = secs - days * SECS_PER_DAY; - let hasdate = days != 0; - let nanos = duration.subsec_nanoseconds(); - let hastime = (secs != 0 || nanos != 0) || !hasdate; - - // all the following unwrap can't fail - let mut res = String::new(); - write!(&mut res, "P").unwrap(); - - if hasdate { - write!(&mut res, "{}D", days).unwrap(); - } - - const NANOS_PER_MILLI: i32 = Duration::MILLISECOND.subsec_nanoseconds(); - const NANOS_PER_MICRO: i32 = Duration::MICROSECOND.subsec_nanoseconds(); - - if hastime { - if nanos == 0 { - write!(&mut res, "T{}S", secs).unwrap(); - } else if nanos % NANOS_PER_MILLI == 0 { - write!(&mut res, "T{}.{:03}S", secs, nanos / NANOS_PER_MILLI).unwrap(); - } else if nanos % NANOS_PER_MICRO == 0 { - write!(&mut res, "T{}.{:06}S", secs, nanos / NANOS_PER_MICRO).unwrap(); - } else { - write!(&mut res, "T{}.{:09}S", secs, nanos).unwrap(); - } - } - - serializer.serialize_str(&res) - } - None => serializer.serialize_none(), - } -} - -#[derive(Debug, Serialize)] -#[serde(rename_all = "camelCase")] -pub struct TaskView { - pub uid: TaskId, - index_uid: Option, - status: TaskStatus, - #[serde(rename = "type")] - task_type: TaskType, - #[serde(skip_serializing_if = "Option::is_none")] - details: Option, - #[serde(skip_serializing_if = "Option::is_none")] - error: Option, - #[serde(serialize_with = "serialize_duration")] - duration: Option, - #[serde(serialize_with = "time::serde::rfc3339::serialize")] - enqueued_at: OffsetDateTime, - #[serde(serialize_with = "time::serde::rfc3339::option::serialize")] - started_at: Option, - #[serde(serialize_with = "time::serde::rfc3339::option::serialize")] - finished_at: Option, -} - -impl From for TaskView { - fn from(task: Task) -> Self { - let index_uid = task.index_uid().map(String::from); - let Task { - id, - content, - events, - } = task; - - let (task_type, mut details) = match content { - TaskContent::DocumentAddition { - documents_count, .. - } => { - let details = TaskDetails::DocumentAddition { - received_documents: documents_count, - indexed_documents: None, - }; - - (TaskType::DocumentAdditionOrUpdate, Some(details)) - } - TaskContent::DocumentDeletion { - deletion: DocumentDeletion::Ids(ids), - .. - } => ( - TaskType::DocumentDeletion, - Some(TaskDetails::DocumentDeletion { - matched_documents: ids.len(), - deleted_documents: None, - }), - ), - TaskContent::DocumentDeletion { - deletion: DocumentDeletion::Clear, - .. - } => ( - TaskType::DocumentDeletion, - Some(TaskDetails::ClearAll { - deleted_documents: None, - }), - ), - TaskContent::IndexDeletion { .. } => ( - TaskType::IndexDeletion, - Some(TaskDetails::ClearAll { - deleted_documents: None, - }), - ), - TaskContent::SettingsUpdate { settings, .. } => ( - TaskType::SettingsUpdate, - Some(TaskDetails::Settings { settings }), - ), - TaskContent::IndexCreation { primary_key, .. } => ( - TaskType::IndexCreation, - Some(TaskDetails::IndexInfo { primary_key }), - ), - TaskContent::IndexUpdate { primary_key, .. } => ( - TaskType::IndexUpdate, - Some(TaskDetails::IndexInfo { primary_key }), - ), - TaskContent::Dump { uid } => ( - TaskType::DumpCreation, - Some(TaskDetails::Dump { dump_uid: uid }), - ), - }; - - // An event always has at least one event: "Created" - let (status, error, finished_at) = match events.last().unwrap() { - TaskEvent::Created(_) => (TaskStatus::Enqueued, None, None), - TaskEvent::Batched { .. } => (TaskStatus::Enqueued, None, None), - TaskEvent::Processing(_) => (TaskStatus::Processing, None, None), - TaskEvent::Succeeded { timestamp, result } => { - match (result, &mut details) { - ( - TaskResult::DocumentAddition { - indexed_documents: num, - .. - }, - Some(TaskDetails::DocumentAddition { - ref mut indexed_documents, - .. - }), - ) => { - indexed_documents.replace(*num); - } - ( - TaskResult::DocumentDeletion { - deleted_documents: docs, - .. - }, - Some(TaskDetails::DocumentDeletion { - ref mut deleted_documents, - .. - }), - ) => { - deleted_documents.replace(*docs); - } - ( - TaskResult::ClearAll { - deleted_documents: docs, - }, - Some(TaskDetails::ClearAll { - ref mut deleted_documents, - }), - ) => { - deleted_documents.replace(*docs); - } - _ => (), - } - (TaskStatus::Succeeded, None, Some(*timestamp)) - } - TaskEvent::Failed { timestamp, error } => { - match details { - Some(TaskDetails::DocumentDeletion { - ref mut deleted_documents, - .. - }) => { - deleted_documents.replace(0); - } - Some(TaskDetails::ClearAll { - ref mut deleted_documents, - .. - }) => { - deleted_documents.replace(0); - } - Some(TaskDetails::DocumentAddition { - ref mut indexed_documents, - .. - }) => { - indexed_documents.replace(0); - } - _ => (), - } - (TaskStatus::Failed, Some(error.clone()), Some(*timestamp)) - } - }; - - let enqueued_at = match events.first() { - Some(TaskEvent::Created(ts)) => *ts, - _ => unreachable!("A task must always have a creation event."), - }; - - let started_at = events.iter().find_map(|e| match e { - TaskEvent::Processing(ts) => Some(*ts), - _ => None, - }); - - let duration = finished_at.zip(started_at).map(|(tf, ts)| (tf - ts)); - - Self { - uid: id, - index_uid, - status, - task_type, - details, - error, - duration, - enqueued_at, - started_at, - finished_at, - } - } -} - -#[derive(Debug, Serialize)] -pub struct TaskListView { - pub results: Vec, - pub limit: usize, - pub from: Option, - pub next: Option, -} - -#[derive(Debug, Serialize)] -#[serde(rename_all = "camelCase")] -pub struct SummarizedTaskView { - task_uid: TaskId, - index_uid: Option, - status: TaskStatus, - #[serde(rename = "type")] - task_type: TaskType, - #[serde(serialize_with = "time::serde::rfc3339::serialize")] - enqueued_at: OffsetDateTime, -} - -impl From for SummarizedTaskView { - fn from(mut other: Task) -> Self { - let created_event = other - .events - .drain(..1) - .next() - .expect("Task must have an enqueued event."); - - let enqueued_at = match created_event { - TaskEvent::Created(ts) => ts, - _ => unreachable!("The first event of a task must always be 'Created'"), - }; - - Self { - task_uid: other.id, - index_uid: other.index_uid().map(String::from), - status: TaskStatus::Enqueued, - task_type: other.content.into(), - enqueued_at, - } - } -} diff --git a/meilisearch-http/tests/auth/api_keys.rs b/meilisearch-http/tests/auth/api_keys.rs index 4e1908257..bcea51d3f 100644 --- a/meilisearch-http/tests/auth/api_keys.rs +++ b/meilisearch-http/tests/auth/api_keys.rs @@ -1,7 +1,9 @@ -use crate::common::Server; +use std::{thread, time}; + use assert_json_diff::assert_json_include; use serde_json::{json, Value}; -use std::{thread, time}; + +use crate::common::Server; #[actix_rt::test] async fn add_valid_api_key() { diff --git a/meilisearch-http/tests/auth/authorization.rs b/meilisearch-http/tests/auth/authorization.rs index 5b23749c5..fae6ee7e1 100644 --- a/meilisearch-http/tests/auth/authorization.rs +++ b/meilisearch-http/tests/auth/authorization.rs @@ -1,11 +1,13 @@ -use crate::common::Server; +use std::collections::{HashMap, HashSet}; + use ::time::format_description::well_known::Rfc3339; use maplit::{hashmap, hashset}; use once_cell::sync::Lazy; use serde_json::{json, Value}; -use std::collections::{HashMap, HashSet}; use time::{Duration, OffsetDateTime}; +use crate::common::Server; + pub static AUTHORIZATIONS: Lazy>> = Lazy::new(|| { let mut authorizations = hashmap! { @@ -16,6 +18,7 @@ pub static AUTHORIZATIONS: Lazy hashset!{"documents.get", "documents.*", "*"}, ("DELETE", "/indexes/products/documents/0") => hashset!{"documents.delete", "documents.*", "*"}, ("GET", "/tasks") => hashset!{"tasks.get", "tasks.*", "*"}, + ("DELETE", "/tasks") => hashset!{"tasks.delete", "tasks.*", "*"}, ("GET", "/tasks?indexUid=products") => hashset!{"tasks.get", "tasks.*", "*"}, ("GET", "/tasks/0") => hashset!{"tasks.get", "tasks.*", "*"}, ("PATCH", "/indexes/products/") => hashset!{"indexes.update", "indexes.*", "*"}, @@ -23,6 +26,7 @@ pub static AUTHORIZATIONS: Lazy hashset!{"indexes.delete", "indexes.*", "*"}, ("POST", "/indexes") => hashset!{"indexes.create", "indexes.*", "*"}, ("GET", "/indexes") => hashset!{"indexes.get", "indexes.*", "*"}, + ("POST", "/swap-indexes") => hashset!{"indexes.swap", "indexes.*", "*"}, ("GET", "/indexes/products/settings") => hashset!{"settings.get", "settings.*", "*"}, ("GET", "/indexes/products/settings/displayed-attributes") => hashset!{"settings.get", "settings.*", "*"}, ("GET", "/indexes/products/settings/distinct-attribute") => hashset!{"settings.get", "settings.*", "*"}, @@ -55,21 +59,14 @@ pub static AUTHORIZATIONS: Lazy> = Lazy::new(|| { - AUTHORIZATIONS - .values() - .cloned() - .reduce(|l, r| l.union(&r).cloned().collect()) - .unwrap() + AUTHORIZATIONS.values().cloned().reduce(|l, r| l.union(&r).cloned().collect()).unwrap() }); static INVALID_RESPONSE: Lazy = Lazy::new(|| { @@ -81,7 +78,6 @@ static INVALID_RESPONSE: Lazy = Lazy::new(|| { }); #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn error_access_expired_key() { use std::{thread, time}; @@ -99,7 +95,7 @@ async fn error_access_expired_key() { assert!(response["key"].is_string()); let key = response["key"].as_str().unwrap(); - server.use_api_key(&key); + server.use_api_key(key); // wait until the key is expired. thread::sleep(time::Duration::new(1, 0)); @@ -107,19 +103,12 @@ async fn error_access_expired_key() { for (method, route) in AUTHORIZATIONS.keys() { let (response, code) = server.dummy_request(method, route).await; - assert_eq!( - response, - INVALID_RESPONSE.clone(), - "on route: {:?} - {:?}", - method, - route - ); + assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route); assert_eq!(403, code, "{:?}", &response); } } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn error_access_unauthorized_index() { let mut server = Server::new_auth().await; server.use_api_key("MASTER_KEY"); @@ -135,7 +124,7 @@ async fn error_access_unauthorized_index() { assert!(response["key"].is_string()); let key = response["key"].as_str().unwrap(); - server.use_api_key(&key); + server.use_api_key(key); for (method, route) in AUTHORIZATIONS .keys() @@ -144,19 +133,12 @@ async fn error_access_unauthorized_index() { { let (response, code) = server.dummy_request(method, route).await; - assert_eq!( - response, - INVALID_RESPONSE.clone(), - "on route: {:?} - {:?}", - method, - route - ); + assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route); assert_eq!(403, code, "{:?}", &response); } } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn error_access_unauthorized_action() { let mut server = Server::new_auth().await; @@ -175,22 +157,15 @@ async fn error_access_unauthorized_action() { assert!(response["key"].is_string()); let key = response["key"].as_str().unwrap(); - server.use_api_key(&key); + server.use_api_key(key); let (response, code) = server.dummy_request(method, route).await; - assert_eq!( - response, - INVALID_RESPONSE.clone(), - "on route: {:?} - {:?}", - method, - route - ); + assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route); assert_eq!(403, code, "{:?}", &response); } } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn access_authorized_master_key() { let mut server = Server::new_auth().await; server.use_api_key("MASTER_KEY"); @@ -199,19 +174,12 @@ async fn access_authorized_master_key() { for ((method, route), _) in AUTHORIZATIONS.iter() { let (response, code) = server.dummy_request(method, route).await; - assert_ne!( - response, - INVALID_RESPONSE.clone(), - "on route: {:?} - {:?}", - method, - route - ); + assert_ne!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route); assert_ne!(code, 403); } } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn access_authorized_restricted_index() { let mut server = Server::new_auth().await; for ((method, route), actions) in AUTHORIZATIONS.iter() { @@ -230,7 +198,7 @@ async fn access_authorized_restricted_index() { assert!(response["key"].is_string()); let key = response["key"].as_str().unwrap(); - server.use_api_key(&key); + server.use_api_key(key); let (response, code) = server.dummy_request(method, route).await; @@ -248,7 +216,6 @@ async fn access_authorized_restricted_index() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn access_authorized_no_index_restriction() { let mut server = Server::new_auth().await; @@ -268,7 +235,7 @@ async fn access_authorized_no_index_restriction() { assert!(response["key"].is_string()); let key = response["key"].as_str().unwrap(); - server.use_api_key(&key); + server.use_api_key(key); let (response, code) = server.dummy_request(method, route).await; @@ -286,7 +253,6 @@ async fn access_authorized_no_index_restriction() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn access_authorized_stats_restricted_index() { let mut server = Server::new_auth().await; server.use_admin_key("MASTER_KEY").await; @@ -299,7 +265,8 @@ async fn access_authorized_stats_restricted_index() { let index = server.index("products"); let (response, code) = index.create(Some("product_id")).await; assert_eq!(202, code, "{:?}", &response); - index.wait_task(0).await; + let task_id = response["taskUid"].as_u64().unwrap(); + index.wait_task(task_id).await; // create key with access on `products` index only. let content = json!({ @@ -313,7 +280,7 @@ async fn access_authorized_stats_restricted_index() { // use created key. let key = response["key"].as_str().unwrap(); - server.use_api_key(&key); + server.use_api_key(key); let (response, code) = server.stats().await; assert_eq!(200, code, "{:?}", &response); @@ -326,7 +293,6 @@ async fn access_authorized_stats_restricted_index() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn access_authorized_stats_no_index_restriction() { let mut server = Server::new_auth().await; server.use_admin_key("MASTER_KEY").await; @@ -339,7 +305,8 @@ async fn access_authorized_stats_no_index_restriction() { let index = server.index("products"); let (response, code) = index.create(Some("product_id")).await; assert_eq!(202, code, "{:?}", &response); - index.wait_task(0).await; + let task_id = response["taskUid"].as_u64().unwrap(); + index.wait_task(task_id).await; // create key with access on all indexes. let content = json!({ @@ -353,7 +320,7 @@ async fn access_authorized_stats_no_index_restriction() { // use created key. let key = response["key"].as_str().unwrap(); - server.use_api_key(&key); + server.use_api_key(key); let (response, code) = server.stats().await; assert_eq!(200, code, "{:?}", &response); @@ -366,7 +333,6 @@ async fn access_authorized_stats_no_index_restriction() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn list_authorized_indexes_restricted_index() { let mut server = Server::new_auth().await; server.use_admin_key("MASTER_KEY").await; @@ -379,7 +345,8 @@ async fn list_authorized_indexes_restricted_index() { let index = server.index("products"); let (response, code) = index.create(Some("product_id")).await; assert_eq!(202, code, "{:?}", &response); - index.wait_task(0).await; + let task_id = response["taskUid"].as_u64().unwrap(); + index.wait_task(task_id).await; // create key with access on `products` index only. let content = json!({ @@ -393,7 +360,7 @@ async fn list_authorized_indexes_restricted_index() { // use created key. let key = response["key"].as_str().unwrap(); - server.use_api_key(&key); + server.use_api_key(key); let (response, code) = server.list_indexes(None, None).await; assert_eq!(200, code, "{:?}", &response); @@ -407,7 +374,6 @@ async fn list_authorized_indexes_restricted_index() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn list_authorized_indexes_no_index_restriction() { let mut server = Server::new_auth().await; server.use_admin_key("MASTER_KEY").await; @@ -420,7 +386,8 @@ async fn list_authorized_indexes_no_index_restriction() { let index = server.index("products"); let (response, code) = index.create(Some("product_id")).await; assert_eq!(202, code, "{:?}", &response); - index.wait_task(0).await; + let task_id = response["taskUid"].as_u64().unwrap(); + index.wait_task(task_id).await; // create key with access on all indexes. let content = json!({ @@ -434,7 +401,7 @@ async fn list_authorized_indexes_no_index_restriction() { // use created key. let key = response["key"].as_str().unwrap(); - server.use_api_key(&key); + server.use_api_key(key); let (response, code) = server.list_indexes(None, None).await; assert_eq!(200, code, "{:?}", &response); @@ -460,7 +427,8 @@ async fn list_authorized_tasks_restricted_index() { let index = server.index("products"); let (response, code) = index.create(Some("product_id")).await; assert_eq!(202, code, "{:?}", &response); - index.wait_task(0).await; + let task_id = response["taskUid"].as_u64().unwrap(); + index.wait_task(task_id).await; // create key with access on `products` index only. let content = json!({ @@ -474,7 +442,7 @@ async fn list_authorized_tasks_restricted_index() { // use created key. let key = response["key"].as_str().unwrap(); - server.use_api_key(&key); + server.use_api_key(key); let (response, code) = server.service.get("/tasks").await; assert_eq!(200, code, "{:?}", &response); @@ -500,7 +468,8 @@ async fn list_authorized_tasks_no_index_restriction() { let index = server.index("products"); let (response, code) = index.create(Some("product_id")).await; assert_eq!(202, code, "{:?}", &response); - index.wait_task(0).await; + let task_id = response["taskUid"].as_u64().unwrap(); + index.wait_task(task_id).await; // create key with access on all indexes. let content = json!({ @@ -514,7 +483,7 @@ async fn list_authorized_tasks_no_index_restriction() { // use created key. let key = response["key"].as_str().unwrap(); - server.use_api_key(&key); + server.use_api_key(key); let (response, code) = server.service.get("/tasks").await; assert_eq!(200, code, "{:?}", &response); @@ -545,7 +514,7 @@ async fn error_creating_index_without_action() { // use created key. let key = response["key"].as_str().unwrap(); - server.use_api_key(&key); + server.use_api_key(key); let expected_error = json!({ "message": "Index `test` not found.", @@ -625,7 +594,7 @@ async fn lazy_create_index() { // use created key. let key = response["key"].as_str().unwrap(); - server.use_api_key(&key); + server.use_api_key(key); // try to create a index via add documents route let index = server.index("test"); @@ -692,7 +661,7 @@ async fn error_creating_index_without_index() { // use created key. let key = response["key"].as_str().unwrap(); - server.use_api_key(&key); + server.use_api_key(key); // try to create a index via add documents route let index = server.index("test"); diff --git a/meilisearch-http/tests/auth/mod.rs b/meilisearch-http/tests/auth/mod.rs index 03c24dd6d..dec02cf1f 100644 --- a/meilisearch-http/tests/auth/mod.rs +++ b/meilisearch-http/tests/auth/mod.rs @@ -3,11 +3,11 @@ mod authorization; mod payload; mod tenant_token; -use crate::common::Server; use actix_web::http::StatusCode; - use serde_json::{json, Value}; +use crate::common::Server; + impl Server { pub fn use_api_key(&mut self, api_key: impl AsRef) { self.service.api_key = Some(api_key.as_ref().to_string()); diff --git a/meilisearch-http/tests/auth/payload.rs b/meilisearch-http/tests/auth/payload.rs index 4437cd5f7..78eec3eb2 100644 --- a/meilisearch-http/tests/auth/payload.rs +++ b/meilisearch-http/tests/auth/payload.rs @@ -1,8 +1,8 @@ -use crate::common::Server; use actix_web::test; -use meilisearch_http::{analytics, create_app}; use serde_json::{json, Value}; +use crate::common::Server; + #[actix_rt::test] async fn error_api_key_bad_content_types() { let content = json!({ @@ -15,14 +15,7 @@ async fn error_api_key_bad_content_types() { let mut server = Server::new_auth().await; server.use_api_key("MASTER_KEY"); - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; // post let req = test::TestRequest::post() @@ -44,10 +37,7 @@ async fn error_api_key_bad_content_types() { ); assert_eq!(response["code"], "invalid_content_type"); assert_eq!(response["type"], "invalid_request"); - assert_eq!( - response["link"], - "https://docs.meilisearch.com/errors#invalid_content_type" - ); + assert_eq!(response["link"], "https://docs.meilisearch.com/errors#invalid_content_type"); // patch let req = test::TestRequest::patch() @@ -69,10 +59,7 @@ async fn error_api_key_bad_content_types() { ); assert_eq!(response["code"], "invalid_content_type"); assert_eq!(response["type"], "invalid_request"); - assert_eq!( - response["link"], - "https://docs.meilisearch.com/errors#invalid_content_type" - ); + assert_eq!(response["link"], "https://docs.meilisearch.com/errors#invalid_content_type"); } #[actix_rt::test] @@ -87,14 +74,7 @@ async fn error_api_key_empty_content_types() { let mut server = Server::new_auth().await; server.use_api_key("MASTER_KEY"); - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; // post let req = test::TestRequest::post() @@ -116,10 +96,7 @@ async fn error_api_key_empty_content_types() { ); assert_eq!(response["code"], "invalid_content_type"); assert_eq!(response["type"], "invalid_request"); - assert_eq!( - response["link"], - "https://docs.meilisearch.com/errors#invalid_content_type" - ); + assert_eq!(response["link"], "https://docs.meilisearch.com/errors#invalid_content_type"); // patch let req = test::TestRequest::patch() @@ -141,10 +118,7 @@ async fn error_api_key_empty_content_types() { ); assert_eq!(response["code"], "invalid_content_type"); assert_eq!(response["type"], "invalid_request"); - assert_eq!( - response["link"], - "https://docs.meilisearch.com/errors#invalid_content_type" - ); + assert_eq!(response["link"], "https://docs.meilisearch.com/errors#invalid_content_type"); } #[actix_rt::test] @@ -159,14 +133,7 @@ async fn error_api_key_missing_content_types() { let mut server = Server::new_auth().await; server.use_api_key("MASTER_KEY"); - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; // post let req = test::TestRequest::post() @@ -187,10 +154,7 @@ async fn error_api_key_missing_content_types() { ); assert_eq!(response["code"], "missing_content_type"); assert_eq!(response["type"], "invalid_request"); - assert_eq!( - response["link"], - "https://docs.meilisearch.com/errors#missing_content_type" - ); + assert_eq!(response["link"], "https://docs.meilisearch.com/errors#missing_content_type"); // patch let req = test::TestRequest::patch() @@ -211,10 +175,7 @@ async fn error_api_key_missing_content_types() { ); assert_eq!(response["code"], "missing_content_type"); assert_eq!(response["type"], "invalid_request"); - assert_eq!( - response["link"], - "https://docs.meilisearch.com/errors#missing_content_type" - ); + assert_eq!(response["link"], "https://docs.meilisearch.com/errors#missing_content_type"); } #[actix_rt::test] @@ -223,14 +184,7 @@ async fn error_api_key_empty_payload() { let mut server = Server::new_auth().await; server.use_api_key("MASTER_KEY"); - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; // post let req = test::TestRequest::post() @@ -246,10 +200,7 @@ async fn error_api_key_empty_payload() { assert_eq!(status_code, 400); assert_eq!(response["code"], json!("missing_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#missing_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#missing_payload")); assert_eq!(response["message"], json!(r#"A json payload is missing."#)); // patch @@ -266,10 +217,7 @@ async fn error_api_key_empty_payload() { assert_eq!(status_code, 400); assert_eq!(response["code"], json!("missing_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#missing_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#missing_payload")); assert_eq!(response["message"], json!(r#"A json payload is missing."#)); } @@ -279,14 +227,7 @@ async fn error_api_key_malformed_payload() { let mut server = Server::new_auth().await; server.use_api_key("MASTER_KEY"); - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; // post let req = test::TestRequest::post() @@ -302,10 +243,7 @@ async fn error_api_key_malformed_payload() { assert_eq!(status_code, 400); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#malformed_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#malformed_payload")); assert_eq!( response["message"], json!( @@ -327,10 +265,7 @@ async fn error_api_key_malformed_payload() { assert_eq!(status_code, 400); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#malformed_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#malformed_payload")); assert_eq!( response["message"], json!( diff --git a/meilisearch-http/tests/auth/tenant_token.rs b/meilisearch-http/tests/auth/tenant_token.rs index 6e2127aeb..3206a6553 100644 --- a/meilisearch-http/tests/auth/tenant_token.rs +++ b/meilisearch-http/tests/auth/tenant_token.rs @@ -1,12 +1,13 @@ -use crate::common::Server; +use std::collections::HashMap; + use ::time::format_description::well_known::Rfc3339; use maplit::hashmap; use once_cell::sync::Lazy; use serde_json::{json, Value}; -use std::collections::HashMap; use time::{Duration, OffsetDateTime}; use super::authorization::{ALL_ACTIONS, AUTHORIZATIONS}; +use crate::common::Server; fn generate_tenant_token( parent_uid: impl AsRef, @@ -17,12 +18,8 @@ fn generate_tenant_token( let parent_uid = parent_uid.as_ref(); body.insert("apiKeyUid", json!(parent_uid)); - encode( - &Header::default(), - &body, - &EncodingKey::from_secret(parent_key.as_ref().as_bytes()), - ) - .unwrap() + encode(&Header::default(), &body, &EncodingKey::from_secret(parent_key.as_ref().as_bytes())) + .unwrap() } static DOCUMENTS: Lazy = Lazy::new(|| { @@ -473,7 +470,7 @@ async fn error_access_forbidden_routes() { "searchRules" => json!(["*"]), "exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp()) }; - let web_token = generate_tenant_token(&uid, &key, tenant_token); + let web_token = generate_tenant_token(uid, key, tenant_token); server.use_api_key(&web_token); for ((method, route), actions) in AUTHORIZATIONS.iter() { @@ -509,22 +506,18 @@ async fn error_access_expired_parent_key() { "searchRules" => json!(["*"]), "exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp()) }; - let web_token = generate_tenant_token(&uid, &key, tenant_token); + let web_token = generate_tenant_token(uid, key, tenant_token); server.use_api_key(&web_token); // test search request while parent_key is not expired - let (response, code) = server - .dummy_request("POST", "/indexes/products/search") - .await; + let (response, code) = server.dummy_request("POST", "/indexes/products/search").await; assert_ne!(response, INVALID_RESPONSE.clone()); assert_ne!(code, 403); // wait until the key is expired. thread::sleep(time::Duration::new(1, 0)); - let (response, code) = server - .dummy_request("POST", "/indexes/products/search") - .await; + let (response, code) = server.dummy_request("POST", "/indexes/products/search").await; assert_eq!(response, INVALID_RESPONSE.clone()); assert_eq!(code, 403); } @@ -552,13 +545,11 @@ async fn error_access_modified_token() { "searchRules" => json!(["products"]), "exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp()) }; - let web_token = generate_tenant_token(&uid, &key, tenant_token); + let web_token = generate_tenant_token(uid, key, tenant_token); server.use_api_key(&web_token); // test search request while web_token is valid - let (response, code) = server - .dummy_request("POST", "/indexes/products/search") - .await; + let (response, code) = server.dummy_request("POST", "/indexes/products/search").await; assert_ne!(response, INVALID_RESPONSE.clone()); assert_ne!(code, 403); @@ -567,7 +558,7 @@ async fn error_access_modified_token() { "exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp()) }; - let alt = generate_tenant_token(&uid, &key, tenant_token); + let alt = generate_tenant_token(uid, key, tenant_token); let altered_token = [ web_token.split('.').next().unwrap(), alt.split('.').nth(1).unwrap(), @@ -576,9 +567,7 @@ async fn error_access_modified_token() { .join("."); server.use_api_key(&altered_token); - let (response, code) = server - .dummy_request("POST", "/indexes/products/search") - .await; + let (response, code) = server.dummy_request("POST", "/indexes/products/search").await; assert_eq!(response, INVALID_RESPONSE.clone()); assert_eq!(code, 403); } diff --git a/meilisearch-http/tests/common/encoder.rs b/meilisearch-http/tests/common/encoder.rs index 2363ec4f9..b6a60f73e 100644 --- a/meilisearch-http/tests/common/encoder.rs +++ b/meilisearch-http/tests/common/encoder.rs @@ -1,9 +1,10 @@ +use std::io::{Read, Write}; + use actix_http::header::TryIntoHeaderPair; use bytes::Bytes; use flate2::read::{GzDecoder, ZlibDecoder}; use flate2::write::{GzEncoder, ZlibEncoder}; use flate2::Compression; -use std::io::{Read, Write}; #[derive(Clone, Copy)] pub enum Encoder { @@ -18,24 +19,18 @@ impl Encoder { match self { Self::Gzip => { let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); - encoder - .write_all(&body.into()) - .expect("Failed to encode request body"); + encoder.write_all(&body.into()).expect("Failed to encode request body"); encoder.finish().expect("Failed to encode request body") } Self::Deflate => { let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default()); - encoder - .write_all(&body.into()) - .expect("Failed to encode request body"); + encoder.write_all(&body.into()).expect("Failed to encode request body"); encoder.finish().unwrap() } Self::Plain => Vec::from(body.into()), Self::Brotli => { let mut encoder = brotli::CompressorWriter::new(Vec::new(), 32 * 1024, 3, 22); - encoder - .write_all(&body.into()) - .expect("Failed to encode request body"); + encoder.write_all(&body.into()).expect("Failed to encode request body"); encoder.flush().expect("Failed to encode request body"); encoder.into_inner() } @@ -57,9 +52,7 @@ impl Encoder { .expect("Invalid zlib stream"); } Self::Plain => { - buffer - .write_all(input.as_ref()) - .expect("Unexpected memory copying issue"); + buffer.write_all(input.as_ref()).expect("Unexpected memory copying issue"); } Self::Brotli => { brotli::Decompressor::new(input.as_ref(), 4096) @@ -80,8 +73,6 @@ impl Encoder { } pub fn iterator() -> impl Iterator { - [Self::Plain, Self::Gzip, Self::Deflate, Self::Brotli] - .iter() - .copied() + [Self::Plain, Self::Gzip, Self::Deflate, Self::Brotli].iter().copied() } } diff --git a/meilisearch-http/tests/common/index.rs b/meilisearch-http/tests/common/index.rs index 43534074d..1ac56d9ad 100644 --- a/meilisearch-http/tests/common/index.rs +++ b/meilisearch-http/tests/common/index.rs @@ -1,17 +1,14 @@ -use std::{ - fmt::Write, - panic::{catch_unwind, resume_unwind, UnwindSafe}, - time::Duration, -}; +use std::fmt::Write; +use std::panic::{catch_unwind, resume_unwind, UnwindSafe}; +use std::time::Duration; use actix_web::http::StatusCode; use serde_json::{json, Value}; use tokio::time::sleep; use urlencoding::encode as urlencode; -use super::service::Service; - use super::encoder::Encoder; +use super::service::Service; pub struct Index<'a> { pub uid: String, @@ -28,10 +25,8 @@ impl Index<'_> { pub async fn load_test_set(&self) -> u64 { let url = format!("/indexes/{}/documents", urlencode(self.uid.as_ref())); - let (response, code) = self - .service - .post_str(url, include_str!("../assets/test_set.json")) - .await; + let (response, code) = + self.service.post_str(url, include_str!("../assets/test_set.json")).await; assert_eq!(code, 202); let update_id = response["taskUid"].as_i64().unwrap(); self.wait_task(update_id as u64).await; @@ -43,9 +38,7 @@ impl Index<'_> { "uid": self.uid, "primaryKey": primary_key, }); - self.service - .post_encoded("/indexes", body, self.encoder) - .await + self.service.post_encoded("/indexes", body, self.encoder).await } pub async fn update(&self, primary_key: Option<&str>) -> (Value, StatusCode) { @@ -68,16 +61,12 @@ impl Index<'_> { primary_key: Option<&str>, ) -> (Value, StatusCode) { let url = match primary_key { - Some(key) => format!( - "/indexes/{}/documents?primaryKey={}", - urlencode(self.uid.as_ref()), - key - ), + Some(key) => { + format!("/indexes/{}/documents?primaryKey={}", urlencode(self.uid.as_ref()), key) + } None => format!("/indexes/{}/documents", urlencode(self.uid.as_ref())), }; - self.service - .post_encoded(url, documents, self.encoder) - .await + self.service.post_encoded(url, documents, self.encoder).await } pub async fn update_documents( @@ -86,11 +75,9 @@ impl Index<'_> { primary_key: Option<&str>, ) -> (Value, StatusCode) { let url = match primary_key { - Some(key) => format!( - "/indexes/{}/documents?primaryKey={}", - urlencode(self.uid.as_ref()), - key - ), + Some(key) => { + format!("/indexes/{}/documents?primaryKey={}", urlencode(self.uid.as_ref()), key) + } None => format!("/indexes/{}/documents", urlencode(self.uid.as_ref())), }; self.service.put_encoded(url, documents, self.encoder).await @@ -174,13 +161,8 @@ impl Index<'_> { } pub async fn delete_batch(&self, ids: Vec) -> (Value, StatusCode) { - let url = format!( - "/indexes/{}/documents/delete-batch", - urlencode(self.uid.as_ref()) - ); - self.service - .post_encoded(url, serde_json::to_value(&ids).unwrap(), self.encoder) - .await + let url = format!("/indexes/{}/documents/delete-batch", urlencode(self.uid.as_ref())); + self.service.post_encoded(url, serde_json::to_value(&ids).unwrap(), self.encoder).await } pub async fn settings(&self) -> (Value, StatusCode) { @@ -190,9 +172,7 @@ impl Index<'_> { pub async fn update_settings(&self, settings: Value) -> (Value, StatusCode) { let url = format!("/indexes/{}/settings", urlencode(self.uid.as_ref())); - self.service - .patch_encoded(url, settings, self.encoder) - .await + self.service.patch_encoded(url, settings, self.encoder).await } pub async fn delete_settings(&self) -> (Value, StatusCode) { @@ -232,29 +212,19 @@ impl Index<'_> { pub async fn search_get(&self, query: Value) -> (Value, StatusCode) { let params = yaup::to_string(&query).unwrap(); - let url = format!( - "/indexes/{}/search?{}", - urlencode(self.uid.as_ref()), - params - ); + let url = format!("/indexes/{}/search?{}", urlencode(self.uid.as_ref()), params); self.service.get(url).await } pub async fn update_distinct_attribute(&self, value: Value) -> (Value, StatusCode) { - let url = format!( - "/indexes/{}/settings/{}", - urlencode(self.uid.as_ref()), - "distinct-attribute" - ); + let url = + format!("/indexes/{}/settings/{}", urlencode(self.uid.as_ref()), "distinct-attribute"); self.service.put_encoded(url, value, self.encoder).await } pub async fn get_distinct_attribute(&self) -> (Value, StatusCode) { - let url = format!( - "/indexes/{}/settings/{}", - urlencode(self.uid.as_ref()), - "distinct-attribute" - ); + let url = + format!("/indexes/{}/settings/{}", urlencode(self.uid.as_ref()), "distinct-attribute"); self.service.get(url).await } } diff --git a/meilisearch-http/tests/common/mod.rs b/meilisearch-http/tests/common/mod.rs index c4793a0cb..9c6d572d9 100644 --- a/meilisearch-http/tests/common/mod.rs +++ b/meilisearch-http/tests/common/mod.rs @@ -15,18 +15,10 @@ macro_rules! test_post_get_search { let get_query: meilisearch_http::routes::search::SearchQuery = post_query.into(); let get_query = ::serde_url_params::to_string(&get_query).unwrap(); let ($response, $status_code) = $server.search_get(&get_query).await; - let _ = ::std::panic::catch_unwind(|| $block).map_err(|e| { - panic!( - "panic in get route: {:?}", - e.downcast_ref::<&str>().unwrap() - ) - }); + let _ = ::std::panic::catch_unwind(|| $block) + .map_err(|e| panic!("panic in get route: {:?}", e.downcast_ref::<&str>().unwrap())); let ($response, $status_code) = $server.search_post($query).await; - let _ = ::std::panic::catch_unwind(|| $block).map_err(|e| { - panic!( - "panic in post route: {:?}", - e.downcast_ref::<&str>().unwrap() - ) - }); + let _ = ::std::panic::catch_unwind(|| $block) + .map_err(|e| panic!("panic in post route: {:?}", e.downcast_ref::<&str>().unwrap())); }; } diff --git a/meilisearch-http/tests/common/server.rs b/meilisearch-http/tests/common/server.rs index f243a11b9..b7ddc772c 100644 --- a/meilisearch-http/tests/common/server.rs +++ b/meilisearch-http/tests/common/server.rs @@ -1,22 +1,23 @@ #![allow(dead_code)] -use clap::Parser; use std::path::Path; +use std::time::Duration; +use actix_http::body::MessageBody; +use actix_web::dev::ServiceResponse; use actix_web::http::StatusCode; use byte_unit::{Byte, ByteUnit}; -use meilisearch_auth::AuthController; -use meilisearch_http::setup_meilisearch; -use meilisearch_lib::options::{IndexerOpts, MaxMemory}; +use clap::Parser; +use meilisearch_http::option::{IndexerOpts, MaxMemory, Opt}; +use meilisearch_http::{analytics, create_app, setup_meilisearch}; use once_cell::sync::Lazy; -use serde_json::Value; +use serde_json::{json, Value}; use tempfile::TempDir; - -use crate::common::encoder::Encoder; -use meilisearch_http::option::Opt; +use tokio::time::sleep; use super::index::Index; use super::service::Service; +use crate::common::encoder::Encoder; pub struct Server { pub service: Service, @@ -38,19 +39,10 @@ impl Server { let options = default_settings(dir.path()); - let meilisearch = setup_meilisearch(&options).unwrap(); - let auth = AuthController::new(&options.db_path, &options.master_key).unwrap(); - let service = Service { - meilisearch, - auth, - options, - api_key: None, - }; + let (index_scheduler, auth) = setup_meilisearch(&options).unwrap(); + let service = Service { index_scheduler, auth, options, api_key: None }; - Server { - service, - _dir: Some(dir), - } + Server { service, _dir: Some(dir) } } pub async fn new_auth_with_options(mut options: Opt, dir: TempDir) -> Self { @@ -62,19 +54,10 @@ impl Server { options.master_key = Some("MASTER_KEY".to_string()); - let meilisearch = setup_meilisearch(&options).unwrap(); - let auth = AuthController::new(&options.db_path, &options.master_key).unwrap(); - let service = Service { - meilisearch, - auth, - options, - api_key: None, - }; + let (index_scheduler, auth) = setup_meilisearch(&options).unwrap(); + let service = Service { index_scheduler, auth, options, api_key: None }; - Server { - service, - _dir: Some(dir), - } + Server { service, _dir: Some(dir) } } pub async fn new_auth() -> Self { @@ -84,19 +67,27 @@ impl Server { } pub async fn new_with_options(options: Opt) -> Result { - let meilisearch = setup_meilisearch(&options)?; - let auth = AuthController::new(&options.db_path, &options.master_key)?; - let service = Service { - meilisearch, - auth, - options, - api_key: None, - }; + let (index_scheduler, auth) = setup_meilisearch(&options)?; + let service = Service { index_scheduler, auth, options, api_key: None }; - Ok(Server { - service, - _dir: None, - }) + Ok(Server { service, _dir: None }) + } + + pub async fn init_web_app( + &self, + ) -> impl actix_web::dev::Service< + actix_http::Request, + Response = ServiceResponse, + Error = actix_web::Error, + > { + actix_web::test::init_service(create_app( + self.service.index_scheduler.clone().into(), + self.service.auth.clone(), + self.service.options.clone(), + analytics::MockAnalytics::new(&self.service.options), + true, + )) + .await } /// Returns a view to an index. There is no guarantee that the index exists. @@ -105,11 +96,7 @@ impl Server { } pub fn index_with_encoder(&self, uid: impl AsRef, encoder: Encoder) -> Index<'_> { - Index { - uid: uid.as_ref().to_string(), - service: &self.service, - encoder, - } + Index { uid: uid.as_ref().to_string(), service: &self.service, encoder } } pub async fn list_indexes( @@ -127,9 +114,7 @@ impl Server { .map(|(offset, limit)| format!("{offset}&{limit}")) .or_else(|| offset.xor(limit)); if let Some(query_parameter) = query_parameter { - self.service - .get(format!("/indexes?{query_parameter}")) - .await + self.service.get(format!("/indexes?{query_parameter}")).await } else { self.service.get("/indexes").await } @@ -150,6 +135,46 @@ impl Server { pub async fn get_dump_status(&self, uid: &str) -> (Value, StatusCode) { self.service.get(format!("/dumps/{}/status", uid)).await } + + pub async fn create_dump(&self) -> (Value, StatusCode) { + self.service.post("/dumps", json!(null)).await + } + + pub async fn index_swap(&self, value: Value) -> (Value, StatusCode) { + self.service.post("/swap-indexes", value).await + } + + pub async fn cancel_task(&self, value: Value) -> (Value, StatusCode) { + self.service + .post(format!("/tasks/cancel?{}", yaup::to_string(&value).unwrap()), json!(null)) + .await + } + + pub async fn delete_task(&self, value: Value) -> (Value, StatusCode) { + self.service.delete(format!("/tasks?{}", yaup::to_string(&value).unwrap())).await + } + + pub async fn wait_task(&self, update_id: u64) -> Value { + // try several times to get status, or panic to not wait forever + let url = format!("/tasks/{}", update_id); + for _ in 0..100 { + let (response, status_code) = self.service.get(&url).await; + assert_eq!(200, status_code, "response: {}", response); + + if response["status"] == "succeeded" || response["status"] == "failed" { + return response; + } + + // wait 0.5 second. + sleep(Duration::from_millis(500)).await; + } + panic!("Timeout waiting for update id"); + } + + pub async fn get_task(&self, update_id: u64) -> (Value, StatusCode) { + let url = format!("/tasks/{}", update_id); + self.service.get(url).await + } } pub fn default_settings(dir: impl AsRef) -> Opt { diff --git a/meilisearch-http/tests/common/service.rs b/meilisearch-http/tests/common/service.rs index 3a3f6021c..945ff4c13 100644 --- a/meilisearch-http/tests/common/service.rs +++ b/meilisearch-http/tests/common/service.rs @@ -1,15 +1,18 @@ +use std::sync::Arc; + use actix_web::http::header::ContentType; +use actix_web::http::StatusCode; +use actix_web::test; use actix_web::test::TestRequest; -use actix_web::{http::StatusCode, test}; +use index_scheduler::IndexScheduler; use meilisearch_auth::AuthController; -use meilisearch_lib::MeiliSearch; +use meilisearch_http::{analytics, create_app, Opt}; use serde_json::Value; use crate::common::encoder::Encoder; -use meilisearch_http::{analytics, create_app, Opt}; pub struct Service { - pub meilisearch: MeiliSearch, + pub index_scheduler: Arc, pub auth: AuthController, pub options: Opt, pub api_key: Option, @@ -85,12 +88,12 @@ impl Service { } pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) { - let app = test::init_service(create_app!( - &self.meilisearch, - &self.auth, + let app = test::init_service(create_app( + self.index_scheduler.clone().into(), + self.auth.clone(), + self.options.clone(), + analytics::MockAnalytics::new(&self.options), true, - self.options, - analytics::MockAnalytics::new(&self.options).0 )) .await; diff --git a/meilisearch-http/tests/content_type.rs b/meilisearch-http/tests/content_type.rs index 47e224bd1..e16a83c06 100644 --- a/meilisearch-http/tests/content_type.rs +++ b/meilisearch-http/tests/content_type.rs @@ -2,11 +2,11 @@ mod common; -use crate::common::Server; use actix_web::test; -use meilisearch_http::{analytics, create_app}; use serde_json::{json, Value}; +use crate::common::Server; + enum HttpVerb { Put, Patch, @@ -59,14 +59,8 @@ async fn error_json_bad_content_type() { let document = "{}"; let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; + for (verb, route) in routes { // Good content-type, we probably have an error since we didn't send anything in the json // so we only ensure we didn't get a bad media type error. @@ -82,11 +76,7 @@ async fn error_json_bad_content_type() { "calling the route `{}` with a content-type of json isn't supposed to throw a bad media type error", route); // No content-type. - let req = verb - .test_request() - .uri(route) - .set_payload(document) - .to_request(); + let req = verb.test_request().uri(route).set_payload(document).to_request(); let res = test::call_service(&app, req).await; let status_code = res.status(); let body = test::read_body(res).await; @@ -142,14 +132,7 @@ async fn extract_actual_content_type() { let route = "/indexes/doggo/documents"; let documents = "[{}]"; let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; // Good content-type, we probably have an error since we didn't send anything in the json // so we only ensure we didn't get a bad media type error. diff --git a/meilisearch-http/tests/documents/add_documents.rs b/meilisearch-http/tests/documents/add_documents.rs index 48ef6276b..8dd3ba39a 100644 --- a/meilisearch-http/tests/documents/add_documents.rs +++ b/meilisearch-http/tests/documents/add_documents.rs @@ -1,10 +1,10 @@ -use crate::common::{GetAllDocumentsOptions, Server}; use actix_web::test; +use serde_json::{json, Value}; +use time::format_description::well_known::Rfc3339; +use time::OffsetDateTime; use crate::common::encoder::Encoder; -use meilisearch_http::{analytics, create_app}; -use serde_json::{json, Value}; -use time::{format_description::well_known::Rfc3339, OffsetDateTime}; +use crate::common::{GetAllDocumentsOptions, Server}; /// This is the basic usage of our API and every other tests uses the content-type application/json #[actix_rt::test] @@ -18,14 +18,8 @@ async fn add_documents_test_json_content_types() { // this is a what is expected and should work let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; + // post let req = test::TestRequest::post() .uri("/indexes/dog/documents") @@ -63,14 +57,8 @@ async fn add_single_document_test_json_content_types() { // this is a what is expected and should work let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; + // post let req = test::TestRequest::post() .uri("/indexes/dog/documents") @@ -108,14 +96,7 @@ async fn add_single_document_gzip_encoded() { // this is a what is expected and should work let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; // post let document = serde_json::to_string(&document).unwrap(); let encoder = Encoder::Gzip; @@ -157,14 +138,7 @@ async fn add_single_document_with_every_encoding() { // this is a what is expected and should work let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; // post let document = serde_json::to_string(&document).unwrap(); @@ -198,14 +172,8 @@ async fn error_add_documents_test_bad_content_types() { ]); let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; + // post let req = test::TestRequest::post() .uri("/indexes/dog/documents") @@ -225,10 +193,7 @@ async fn error_add_documents_test_bad_content_types() { ); assert_eq!(response["code"], "invalid_content_type"); assert_eq!(response["type"], "invalid_request"); - assert_eq!( - response["link"], - "https://docs.meilisearch.com/errors#invalid_content_type" - ); + assert_eq!(response["link"], "https://docs.meilisearch.com/errors#invalid_content_type"); // put let req = test::TestRequest::put() @@ -249,10 +214,7 @@ async fn error_add_documents_test_bad_content_types() { ); assert_eq!(response["code"], "invalid_content_type"); assert_eq!(response["type"], "invalid_request"); - assert_eq!( - response["link"], - "https://docs.meilisearch.com/errors#invalid_content_type" - ); + assert_eq!(response["link"], "https://docs.meilisearch.com/errors#invalid_content_type"); } /// missing content-type must be refused @@ -266,14 +228,8 @@ async fn error_add_documents_test_no_content_type() { ]); let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; + // post let req = test::TestRequest::post() .uri("/indexes/dog/documents") @@ -292,10 +248,7 @@ async fn error_add_documents_test_no_content_type() { ); assert_eq!(response["code"], "missing_content_type"); assert_eq!(response["type"], "invalid_request"); - assert_eq!( - response["link"], - "https://docs.meilisearch.com/errors#missing_content_type" - ); + assert_eq!(response["link"], "https://docs.meilisearch.com/errors#missing_content_type"); // put let req = test::TestRequest::put() @@ -315,10 +268,7 @@ async fn error_add_documents_test_no_content_type() { ); assert_eq!(response["code"], "missing_content_type"); assert_eq!(response["type"], "invalid_request"); - assert_eq!( - response["link"], - "https://docs.meilisearch.com/errors#missing_content_type" - ); + assert_eq!(response["link"], "https://docs.meilisearch.com/errors#missing_content_type"); } #[actix_rt::test] @@ -326,14 +276,8 @@ async fn error_add_malformed_csv_documents() { let document = "id, content\n1234, hello, world\n12, hello world"; let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; + // post let req = test::TestRequest::post() .uri("/indexes/dog/documents") @@ -353,10 +297,7 @@ async fn error_add_malformed_csv_documents() { ); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#malformed_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#malformed_payload")); // put let req = test::TestRequest::put() @@ -377,10 +318,7 @@ async fn error_add_malformed_csv_documents() { ); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#malformed_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#malformed_payload")); } #[actix_rt::test] @@ -388,14 +326,8 @@ async fn error_add_malformed_json_documents() { let document = r#"[{"id": 1}, {id: 2}]"#; let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; + // post let req = test::TestRequest::post() .uri("/indexes/dog/documents") @@ -415,10 +347,7 @@ async fn error_add_malformed_json_documents() { ); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#malformed_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#malformed_payload")); // put let req = test::TestRequest::put() @@ -439,10 +368,7 @@ async fn error_add_malformed_json_documents() { ); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#malformed_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#malformed_payload")); // truncate @@ -467,10 +393,7 @@ async fn error_add_malformed_json_documents() { ); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#malformed_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#malformed_payload")); // add one more char to the long string to test if the truncating works. let document = format!("\"{}m\"", long); @@ -489,10 +412,7 @@ async fn error_add_malformed_json_documents() { ); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#malformed_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#malformed_payload")); } #[actix_rt::test] @@ -500,14 +420,8 @@ async fn error_add_malformed_ndjson_documents() { let document = "{\"id\": 1}\n{id: 2}"; let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; + // post let req = test::TestRequest::post() .uri("/indexes/dog/documents") @@ -527,10 +441,7 @@ async fn error_add_malformed_ndjson_documents() { ); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#malformed_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#malformed_payload")); // put let req = test::TestRequest::put() @@ -549,10 +460,7 @@ async fn error_add_malformed_ndjson_documents() { ); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#malformed_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#malformed_payload")); } #[actix_rt::test] @@ -560,14 +468,8 @@ async fn error_add_missing_payload_csv_documents() { let document = ""; let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; + // post let req = test::TestRequest::post() .uri("/indexes/dog/documents") @@ -582,10 +484,7 @@ async fn error_add_missing_payload_csv_documents() { assert_eq!(response["message"], json!(r#"A csv payload is missing."#)); assert_eq!(response["code"], json!("missing_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#missing_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#missing_payload")); // put let req = test::TestRequest::put() @@ -601,10 +500,7 @@ async fn error_add_missing_payload_csv_documents() { assert_eq!(response["message"], json!(r#"A csv payload is missing."#)); assert_eq!(response["code"], json!("missing_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#missing_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#missing_payload")); } #[actix_rt::test] @@ -612,14 +508,8 @@ async fn error_add_missing_payload_json_documents() { let document = ""; let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; + // post let req = test::TestRequest::post() .uri("/indexes/dog/documents") @@ -634,10 +524,7 @@ async fn error_add_missing_payload_json_documents() { assert_eq!(response["message"], json!(r#"A json payload is missing."#)); assert_eq!(response["code"], json!("missing_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#missing_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#missing_payload")); // put let req = test::TestRequest::put() @@ -653,10 +540,7 @@ async fn error_add_missing_payload_json_documents() { assert_eq!(response["message"], json!(r#"A json payload is missing."#)); assert_eq!(response["code"], json!("missing_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#missing_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#missing_payload")); } #[actix_rt::test] @@ -664,14 +548,8 @@ async fn error_add_missing_payload_ndjson_documents() { let document = ""; let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; + // post let req = test::TestRequest::post() .uri("/indexes/dog/documents") @@ -683,16 +561,10 @@ async fn error_add_missing_payload_ndjson_documents() { let body = test::read_body(res).await; let response: Value = serde_json::from_slice(&body).unwrap_or_default(); assert_eq!(status_code, 400); - assert_eq!( - response["message"], - json!(r#"A ndjson payload is missing."#) - ); + assert_eq!(response["message"], json!(r#"A ndjson payload is missing."#)); assert_eq!(response["code"], json!("missing_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#missing_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#missing_payload")); // put let req = test::TestRequest::put() @@ -705,16 +577,10 @@ async fn error_add_missing_payload_ndjson_documents() { let body = test::read_body(res).await; let response: Value = serde_json::from_slice(&body).unwrap_or_default(); assert_eq!(status_code, 400); - assert_eq!( - response["message"], - json!(r#"A ndjson payload is missing."#) - ); + assert_eq!(response["message"], json!(r#"A ndjson payload is missing."#)); assert_eq!(response["code"], json!("missing_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#missing_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#missing_payload")); } #[actix_rt::test] @@ -867,10 +733,7 @@ async fn add_larger_dataset() { assert_eq!(response["details"]["indexedDocuments"], 77); assert_eq!(response["details"]["receivedDocuments"], 77); let (response, code) = index - .get_all_documents(GetAllDocumentsOptions { - limit: Some(1000), - ..Default::default() - }) + .get_all_documents(GetAllDocumentsOptions { limit: Some(1000), ..Default::default() }) .await; assert_eq!(code, 200, "failed with `{}`", response); assert_eq!(response["results"].as_array().unwrap().len(), 77); @@ -975,9 +838,7 @@ async fn add_documents_invalid_geo_field() { let server = Server::new().await; let index = server.index("test"); index.create(Some("id")).await; - index - .update_settings(json!({"sortableAttributes": ["_geo"]})) - .await; + index.update_settings(json!({"sortableAttributes": ["_geo"]})).await; let documents = json!([ { @@ -1120,10 +981,7 @@ async fn batch_several_documents_addition() { // Check if there are exactly 120 documents (150 - 30) in the index; let (response, code) = index - .get_all_documents(GetAllDocumentsOptions { - limit: Some(200), - ..Default::default() - }) + .get_all_documents(GetAllDocumentsOptions { limit: Some(200), ..Default::default() }) .await; assert_eq!(code, 200, "failed with `{}`", response); assert_eq!(response["results"].as_array().unwrap().len(), 120); diff --git a/meilisearch-http/tests/documents/delete_documents.rs b/meilisearch-http/tests/documents/delete_documents.rs index 8c7ddaa7b..e36e2f033 100644 --- a/meilisearch-http/tests/documents/delete_documents.rs +++ b/meilisearch-http/tests/documents/delete_documents.rs @@ -29,9 +29,7 @@ async fn delete_one_unexisting_document() { async fn delete_one_document() { let server = Server::new().await; let index = server.index("test"); - index - .add_documents(json!([{ "id": 0, "content": "foobar" }]), None) - .await; + index.add_documents(json!([{ "id": 0, "content": "foobar" }]), None).await; index.wait_task(0).await; let (_response, code) = server.index("test").delete_document(0).await; assert_eq!(code, 202); @@ -68,9 +66,7 @@ async fn clear_all_documents() { assert_eq!(code, 202); let _update = index.wait_task(1).await; - let (response, code) = index - .get_all_documents(GetAllDocumentsOptions::default()) - .await; + let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert!(response["results"].as_array().unwrap().is_empty()); } @@ -85,9 +81,7 @@ async fn clear_all_documents_empty_index() { assert_eq!(code, 202); let _update = index.wait_task(0).await; - let (response, code) = index - .get_all_documents(GetAllDocumentsOptions::default()) - .await; + let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert!(response["results"].as_array().unwrap().is_empty()); } @@ -121,9 +115,7 @@ async fn delete_batch() { assert_eq!(code, 202); let _update = index.wait_task(1).await; - let (response, code) = index - .get_all_documents(GetAllDocumentsOptions::default()) - .await; + let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert_eq!(response["results"].as_array().unwrap().len(), 1); assert_eq!(response["results"][0]["id"], json!(3)); @@ -139,9 +131,7 @@ async fn delete_no_document_batch() { assert_eq!(code, 202, "{}", _response); let _update = index.wait_task(1).await; - let (response, code) = index - .get_all_documents(GetAllDocumentsOptions::default()) - .await; + let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert_eq!(response["results"].as_array().unwrap().len(), 3); } diff --git a/meilisearch-http/tests/documents/get_documents.rs b/meilisearch-http/tests/documents/get_documents.rs index f3a25e720..9bc54973e 100644 --- a/meilisearch-http/tests/documents/get_documents.rs +++ b/meilisearch-http/tests/documents/get_documents.rs @@ -1,12 +1,11 @@ -use crate::common::{GetAllDocumentsOptions, GetDocumentOptions, Server}; use actix_web::test; use http::header::ACCEPT_ENCODING; - -use crate::common::encoder::Encoder; -use meilisearch_http::{analytics, create_app}; use serde_json::{json, Value}; use urlencoding::encode as urlencode; +use crate::common::encoder::Encoder; +use crate::common::{GetAllDocumentsOptions, GetDocumentOptions, Server}; + // TODO: partial test since we are testing error, amd error is not yet fully implemented in // transplant #[actix_rt::test] @@ -59,14 +58,8 @@ async fn get_document() { }) ); - let (response, code) = index - .get_document( - 0, - Some(GetDocumentOptions { - fields: Some(vec!["id"]), - }), - ) - .await; + let (response, code) = + index.get_document(0, Some(GetDocumentOptions { fields: Some(vec!["id"]) })).await; assert_eq!(code, 200); assert_eq!( response, @@ -76,12 +69,7 @@ async fn get_document() { ); let (response, code) = index - .get_document( - 0, - Some(GetDocumentOptions { - fields: Some(vec!["nested.content"]), - }), - ) + .get_document(0, Some(GetDocumentOptions { fields: Some(vec!["nested.content"]) })) .await; assert_eq!(code, 200); assert_eq!( @@ -95,10 +83,8 @@ async fn get_document() { #[actix_rt::test] async fn error_get_unexisting_index_all_documents() { let server = Server::new().await; - let (response, code) = server - .index("test") - .get_all_documents(GetAllDocumentsOptions::default()) - .await; + let (response, code) = + server.index("test").get_all_documents(GetAllDocumentsOptions::default()).await; let expected_response = json!({ "message": "Index `test` not found.", @@ -120,9 +106,7 @@ async fn get_no_document() { index.wait_task(0).await; - let (response, code) = index - .get_all_documents(GetAllDocumentsOptions::default()) - .await; + let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert!(response["results"].as_array().unwrap().is_empty()); } @@ -133,9 +117,7 @@ async fn get_all_documents_no_options() { let index = server.index("test"); index.load_test_set().await; - let (response, code) = index - .get_all_documents(GetAllDocumentsOptions::default()) - .await; + let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); let arr = response["results"].as_array().unwrap(); assert_eq!(arr.len(), 20); @@ -167,15 +149,7 @@ async fn get_all_documents_no_options_with_response_compression() { let index = server.index(index_uid); index.load_test_set().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; - + let app = server.init_web_app().await; let req = test::TestRequest::get() .uri(&format!("/indexes/{}/documents?", urlencode(index_uid))) .insert_header((ACCEPT_ENCODING, "gzip")) @@ -201,10 +175,7 @@ async fn test_get_all_documents_limit() { index.load_test_set().await; let (response, code) = index - .get_all_documents(GetAllDocumentsOptions { - limit: Some(5), - ..Default::default() - }) + .get_all_documents(GetAllDocumentsOptions { limit: Some(5), ..Default::default() }) .await; assert_eq!(code, 200); assert_eq!(response["results"].as_array().unwrap().len(), 5); @@ -221,10 +192,7 @@ async fn test_get_all_documents_offset() { index.load_test_set().await; let (response, code) = index - .get_all_documents(GetAllDocumentsOptions { - offset: Some(5), - ..Default::default() - }) + .get_all_documents(GetAllDocumentsOptions { offset: Some(5), ..Default::default() }) .await; assert_eq!(code, 200); assert_eq!(response["results"].as_array().unwrap().len(), 20); @@ -347,24 +315,12 @@ async fn get_document_s_nested_attributes_to_retrieve() { assert_eq!(code, 202); index.wait_task(1).await; - let (response, code) = index - .get_document( - 0, - Some(GetDocumentOptions { - fields: Some(vec!["content"]), - }), - ) - .await; + let (response, code) = + index.get_document(0, Some(GetDocumentOptions { fields: Some(vec!["content"]) })).await; assert_eq!(code, 200); assert_eq!(response, json!({})); - let (response, code) = index - .get_document( - 1, - Some(GetDocumentOptions { - fields: Some(vec!["content"]), - }), - ) - .await; + let (response, code) = + index.get_document(1, Some(GetDocumentOptions { fields: Some(vec!["content"]) })).await; assert_eq!(code, 200); assert_eq!( response, @@ -377,12 +333,7 @@ async fn get_document_s_nested_attributes_to_retrieve() { ); let (response, code) = index - .get_document( - 0, - Some(GetDocumentOptions { - fields: Some(vec!["content.truc"]), - }), - ) + .get_document(0, Some(GetDocumentOptions { fields: Some(vec!["content.truc"]) })) .await; assert_eq!(code, 200); assert_eq!( @@ -392,12 +343,7 @@ async fn get_document_s_nested_attributes_to_retrieve() { }) ); let (response, code) = index - .get_document( - 1, - Some(GetDocumentOptions { - fields: Some(vec!["content.truc"]), - }), - ) + .get_document(1, Some(GetDocumentOptions { fields: Some(vec!["content.truc"]) })) .await; assert_eq!(code, 200); assert_eq!( @@ -414,20 +360,13 @@ async fn get_document_s_nested_attributes_to_retrieve() { async fn get_documents_displayed_attributes_is_ignored() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"displayedAttributes": ["gender"]})) - .await; + index.update_settings(json!({"displayedAttributes": ["gender"]})).await; index.load_test_set().await; - let (response, code) = index - .get_all_documents(GetAllDocumentsOptions::default()) - .await; + let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert_eq!(response["results"].as_array().unwrap().len(), 20); - assert_eq!( - response["results"][0].as_object().unwrap().keys().count(), - 16 - ); + assert_eq!(response["results"][0].as_object().unwrap().keys().count(), 16); assert!(response["results"][0]["gender"] != json!(null)); assert_eq!(response["offset"], json!(0)); diff --git a/meilisearch-http/tests/documents/update_documents.rs b/meilisearch-http/tests/documents/update_documents.rs index 99d700f9f..1cc66a0c2 100644 --- a/meilisearch-http/tests/documents/update_documents.rs +++ b/meilisearch-http/tests/documents/update_documents.rs @@ -1,7 +1,7 @@ -use crate::common::{GetAllDocumentsOptions, Server}; +use serde_json::json; use crate::common::encoder::Encoder; -use serde_json::json; +use crate::common::{GetAllDocumentsOptions, Server}; #[actix_rt::test] async fn error_document_update_create_index_bad_uid() { @@ -84,10 +84,7 @@ async fn update_document() { let (response, code) = index.get_document(1, None).await; assert_eq!(code, 200); - assert_eq!( - response.to_string(), - r##"{"doc_id":1,"content":"foo","other":"bar"}"## - ); + assert_eq!(response.to_string(), r##"{"doc_id":1,"content":"foo","other":"bar"}"##); } #[actix_rt::test] @@ -125,10 +122,7 @@ async fn update_document_gzip_encoded() { let (response, code) = index.get_document(1, None).await; assert_eq!(code, 200); - assert_eq!( - response.to_string(), - r##"{"doc_id":1,"content":"foo","other":"bar"}"## - ); + assert_eq!(response.to_string(), r##"{"doc_id":1,"content":"foo","other":"bar"}"##); } #[actix_rt::test] @@ -143,10 +137,7 @@ async fn update_larger_dataset() { assert_eq!(response["type"], "documentAdditionOrUpdate"); assert_eq!(response["details"]["indexedDocuments"], 77); let (response, code) = index - .get_all_documents(GetAllDocumentsOptions { - limit: Some(1000), - ..Default::default() - }) + .get_all_documents(GetAllDocumentsOptions { limit: Some(1000), ..Default::default() }) .await; assert_eq!(code, 200); assert_eq!(response["results"].as_array().unwrap().len(), 77); diff --git a/meilisearch-http/tests/dumps/mod.rs b/meilisearch-http/tests/dumps/mod.rs index 389f6b480..fa0b929a3 100644 --- a/meilisearch-http/tests/dumps/mod.rs +++ b/meilisearch-http/tests/dumps/mod.rs @@ -1,10 +1,10 @@ mod data; -use crate::common::{default_settings, GetAllDocumentsOptions, Server}; use meilisearch_http::Opt; use serde_json::json; use self::data::GetDump; +use crate::common::{default_settings, GetAllDocumentsOptions, Server}; // all the following test are ignored on windows. See #2364 #[actix_rt::test] @@ -17,14 +17,8 @@ async fn import_dump_v1() { GetDump::MoviesWithSettingsV1.path(), GetDump::RubyGemsWithSettingsV1.path(), ] { - let options = Opt { - import_dump: Some(path), - ..default_settings(temp.path()) - }; - let error = Server::new_with_options(options) - .await - .map(|_| ()) - .unwrap_err(); + let options = Opt { import_dump: Some(path), ..default_settings(temp.path()) }; + let error = Server::new_with_options(options).await.map(drop).unwrap_err(); assert_eq!(error.to_string(), "The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards."); } @@ -35,10 +29,8 @@ async fn import_dump_v1() { async fn import_dump_v2_movie_raw() { let temp = tempfile::tempdir().unwrap(); - let options = Opt { - import_dump: Some(GetDump::MoviesRawV2.path()), - ..default_settings(temp.path()) - }; + let options = + Opt { import_dump: Some(GetDump::MoviesRawV2.path()), ..default_settings(temp.path()) }; let server = Server::new_with_options(options).await.unwrap(); let (indexes, code) = server.list_indexes(None, None).await; @@ -227,10 +219,8 @@ async fn import_dump_v2_rubygems_with_settings() { async fn import_dump_v3_movie_raw() { let temp = tempfile::tempdir().unwrap(); - let options = Opt { - import_dump: Some(GetDump::MoviesRawV3.path()), - ..default_settings(temp.path()) - }; + let options = + Opt { import_dump: Some(GetDump::MoviesRawV3.path()), ..default_settings(temp.path()) }; let server = Server::new_with_options(options).await.unwrap(); let (indexes, code) = server.list_indexes(None, None).await; @@ -419,10 +409,8 @@ async fn import_dump_v3_rubygems_with_settings() { async fn import_dump_v4_movie_raw() { let temp = tempfile::tempdir().unwrap(); - let options = Opt { - import_dump: Some(GetDump::MoviesRawV4.path()), - ..default_settings(temp.path()) - }; + let options = + Opt { import_dump: Some(GetDump::MoviesRawV4.path()), ..default_settings(temp.path()) }; let server = Server::new_with_options(options).await.unwrap(); let (indexes, code) = server.list_indexes(None, None).await; @@ -611,10 +599,8 @@ async fn import_dump_v4_rubygems_with_settings() { async fn import_dump_v5() { let temp = tempfile::tempdir().unwrap(); - let options = Opt { - import_dump: Some(GetDump::TestV5.path()), - ..default_settings(temp.path()) - }; + let options = + Opt { import_dump: Some(GetDump::TestV5.path()), ..default_settings(temp.path()) }; let mut server = Server::new_auth_with_options(options, temp).await; server.use_api_key("MASTER_KEY"); @@ -654,14 +640,10 @@ async fn import_dump_v5() { assert_eq!(code, 200); assert_eq!(stats, expected_stats); - let (docs, code) = index2 - .get_all_documents(GetAllDocumentsOptions::default()) - .await; + let (docs, code) = index2.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert_eq!(docs["results"].as_array().unwrap().len(), 10); - let (docs, code) = index1 - .get_all_documents(GetAllDocumentsOptions::default()) - .await; + let (docs, code) = index1.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert_eq!(docs["results"].as_array().unwrap().len(), 10); diff --git a/meilisearch-http/tests/index/create_index.rs b/meilisearch-http/tests/index/create_index.rs index 30040abfe..0d4b01278 100644 --- a/meilisearch-http/tests/index/create_index.rs +++ b/meilisearch-http/tests/index/create_index.rs @@ -1,11 +1,11 @@ -use crate::common::encoder::Encoder; -use crate::common::Server; use actix_web::http::header::ContentType; use actix_web::test; use http::header::ACCEPT_ENCODING; -use meilisearch_http::{analytics, create_app}; use serde_json::{json, Value}; +use crate::common::encoder::Encoder; +use crate::common::Server; + #[actix_rt::test] async fn create_index_no_primary_key() { let server = Server::new().await; @@ -43,14 +43,7 @@ async fn create_index_with_gzip_encoded_request() { #[actix_rt::test] async fn create_index_with_gzip_encoded_request_and_receiving_brotli_encoded_response() { let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; let body = serde_json::to_string(&json!({ "uid": "test", diff --git a/meilisearch-http/tests/index/get_index.rs b/meilisearch-http/tests/index/get_index.rs index 91cb1a6d5..3d3ba4b44 100644 --- a/meilisearch-http/tests/index/get_index.rs +++ b/meilisearch-http/tests/index/get_index.rs @@ -1,6 +1,6 @@ +use serde_json::{json, Value}; + use crate::common::Server; -use serde_json::json; -use serde_json::Value; #[actix_rt::test] async fn create_and_get_index() { @@ -63,12 +63,8 @@ async fn list_multiple_indexes() { assert!(response["results"].is_array()); let arr = response["results"].as_array().unwrap(); assert_eq!(arr.len(), 2); - assert!(arr - .iter() - .any(|entry| entry["uid"] == "test" && entry["primaryKey"] == Value::Null)); - assert!(arr - .iter() - .any(|entry| entry["uid"] == "test1" && entry["primaryKey"] == "key")); + assert!(arr.iter().any(|entry| entry["uid"] == "test" && entry["primaryKey"] == Value::Null)); + assert!(arr.iter().any(|entry| entry["uid"] == "test1" && entry["primaryKey"] == "key")); } #[actix_rt::test] @@ -77,10 +73,7 @@ async fn get_and_paginate_indexes() { const NB_INDEXES: usize = 50; for i in 0..NB_INDEXES { server.index(&format!("test_{i:02}")).create(None).await; - server - .index(&format!("test_{i:02}")) - .wait_task(i as u64) - .await; + server.index(&format!("test_{i:02}")).wait_task(i as u64).await; } // basic diff --git a/meilisearch-http/tests/index/stats.rs b/meilisearch-http/tests/index/stats.rs index f55998998..813f05b4a 100644 --- a/meilisearch-http/tests/index/stats.rs +++ b/meilisearch-http/tests/index/stats.rs @@ -17,10 +17,7 @@ async fn stats() { assert_eq!(code, 200); assert_eq!(response["numberOfDocuments"], 0); assert!(response["isIndexing"] == false); - assert!(response["fieldDistribution"] - .as_object() - .unwrap() - .is_empty()); + assert!(response["fieldDistribution"].as_object().unwrap().is_empty()); let documents = json!([ { diff --git a/meilisearch-http/tests/index/update_index.rs b/meilisearch-http/tests/index/update_index.rs index 97eecbf83..3c283407c 100644 --- a/meilisearch-http/tests/index/update_index.rs +++ b/meilisearch-http/tests/index/update_index.rs @@ -1,7 +1,9 @@ +use serde_json::json; +use time::format_description::well_known::Rfc3339; +use time::OffsetDateTime; + use crate::common::encoder::Encoder; use crate::common::Server; -use serde_json::json; -use time::{format_description::well_known::Rfc3339, OffsetDateTime}; #[actix_rt::test] async fn update_primary_key() { diff --git a/meilisearch-http/tests/search/errors.rs b/meilisearch-http/tests/search/errors.rs index 6b5569b58..76e63eeb7 100644 --- a/meilisearch-http/tests/search/errors.rs +++ b/meilisearch-http/tests/search/errors.rs @@ -1,7 +1,7 @@ -use crate::common::Server; use serde_json::json; use super::DOCUMENTS; +use crate::common::Server; #[actix_rt::test] async fn search_unexisting_index() { @@ -45,16 +45,14 @@ async fn search_invalid_highlight_and_crop_tags() { for field in fields { // object - let (response, code) = index - .search_post(json!({field.to_string(): {"marker": ""}})) - .await; + let (response, code) = + index.search_post(json!({field.to_string(): {"marker": ""}})).await; assert_eq!(code, 400, "field {} passing object: {}", &field, response); assert_eq!(response["code"], "bad_request"); // array - let (response, code) = index - .search_post(json!({field.to_string(): ["marker", ""]})) - .await; + let (response, code) = + index.search_post(json!({field.to_string(): ["marker", ""]})).await; assert_eq!(code, 400, "field {} passing array: {}", &field, response); assert_eq!(response["code"], "bad_request"); } @@ -65,9 +63,7 @@ async fn filter_invalid_syntax_object() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -92,9 +88,7 @@ async fn filter_invalid_syntax_array() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -119,9 +113,7 @@ async fn filter_invalid_syntax_string() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -134,13 +126,10 @@ async fn filter_invalid_syntax_string() { "link": "https://docs.meilisearch.com/errors#invalid_filter" }); index - .search( - json!({"filter": "title = Glass XOR title = Glass"}), - |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }, - ) + .search(json!({"filter": "title = Glass XOR title = Glass"}), |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }) .await; } @@ -149,9 +138,7 @@ async fn filter_invalid_attribute_array() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -176,9 +163,7 @@ async fn filter_invalid_attribute_string() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -203,9 +188,7 @@ async fn filter_reserved_geo_attribute_array() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -230,9 +213,7 @@ async fn filter_reserved_geo_attribute_string() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -257,9 +238,7 @@ async fn filter_reserved_attribute_array() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -272,13 +251,10 @@ async fn filter_reserved_attribute_array() { "link": "https://docs.meilisearch.com/errors#invalid_filter" }); index - .search( - json!({"filter": ["_geoDistance = Glass"]}), - |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }, - ) + .search(json!({"filter": ["_geoDistance = Glass"]}), |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }) .await; } @@ -287,9 +263,7 @@ async fn filter_reserved_attribute_string() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -302,13 +276,10 @@ async fn filter_reserved_attribute_string() { "link": "https://docs.meilisearch.com/errors#invalid_filter" }); index - .search( - json!({"filter": "_geoDistance = Glass"}), - |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }, - ) + .search(json!({"filter": "_geoDistance = Glass"}), |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }) .await; } @@ -317,9 +288,7 @@ async fn sort_geo_reserved_attribute() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"sortableAttributes": ["id"]})) - .await; + index.update_settings(json!({"sortableAttributes": ["id"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -349,9 +318,7 @@ async fn sort_reserved_attribute() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"sortableAttributes": ["id"]})) - .await; + index.update_settings(json!({"sortableAttributes": ["id"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -381,9 +348,7 @@ async fn sort_unsortable_attribute() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"sortableAttributes": ["id"]})) - .await; + index.update_settings(json!({"sortableAttributes": ["id"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -413,9 +378,7 @@ async fn sort_invalid_syntax() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"sortableAttributes": ["id"]})) - .await; + index.update_settings(json!({"sortableAttributes": ["id"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; diff --git a/meilisearch-http/tests/search/formatted.rs b/meilisearch-http/tests/search/formatted.rs index 7303a7154..191720602 100644 --- a/meilisearch-http/tests/search/formatted.rs +++ b/meilisearch-http/tests/search/formatted.rs @@ -1,15 +1,14 @@ +use serde_json::json; + use super::*; use crate::common::Server; -use serde_json::json; #[actix_rt::test] async fn formatted_contain_wildcard() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({ "displayedAttributes": ["id", "cattos"] })) - .await; + index.update_settings(json!({ "displayedAttributes": ["id", "cattos"] })).await; let documents = NESTED_DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -34,19 +33,16 @@ async fn formatted_contain_wildcard() { .await; index - .search( - json!({ "q": "pesti", "attributesToRetrieve": ["*"] }), - |response, code| { - assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, - "cattos": "pesti", - }) - ); - }, - ) + .search(json!({ "q": "pesti", "attributesToRetrieve": ["*"] }), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "cattos": "pesti", + }) + ); + }) .await; index @@ -91,23 +87,20 @@ async fn formatted_contain_wildcard() { .await; index - .search( - json!({ "q": "pesti", "attributesToCrop": ["*"] }), - |response, code| { - assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, + .search(json!({ "q": "pesti", "attributesToCrop": ["*"] }), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "cattos": "pesti", + "_formatted": { + "id": "852", "cattos": "pesti", - "_formatted": { - "id": "852", - "cattos": "pesti", - } - }) - ); - }, - ) + } + }) + ); + }) .await; } @@ -121,27 +114,24 @@ async fn format_nested() { index.wait_task(0).await; index - .search( - json!({ "q": "pesti", "attributesToRetrieve": ["doggos"] }), - |response, code| { - assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "doggos": [ - { - "name": "bobby", - "age": 2, - }, - { - "name": "buddy", - "age": 4, - }, - ], - }) - ); - }, - ) + .search(json!({ "q": "pesti", "attributesToRetrieve": ["doggos"] }), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "doggos": [ + { + "name": "bobby", + "age": 2, + }, + { + "name": "buddy", + "age": 4, + }, + ], + }) + ); + }) .await; index @@ -297,9 +287,7 @@ async fn displayedattr_2_smol() { let index = server.index("test"); // not enough displayed for the other settings - index - .update_settings(json!({ "displayedAttributes": ["id"] })) - .await; + index.update_settings(json!({ "displayedAttributes": ["id"] })).await; let documents = NESTED_DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -319,36 +307,30 @@ async fn displayedattr_2_smol() { .await; index - .search( - json!({ "attributesToRetrieve": ["id"] }), - |response, code| { - assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, - }) - ); - }, - ) + .search(json!({ "attributesToRetrieve": ["id"] }), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + }) + ); + }) .await; index - .search( - json!({ "attributesToHighlight": ["id"] }), - |response, code| { - assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, - "_formatted": { - "id": "852", - } - }) - ); - }, - ) + .search(json!({ "attributesToHighlight": ["id"] }), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "_formatted": { + "id": "852", + } + }) + ); + }) .await; index @@ -385,43 +367,34 @@ async fn displayedattr_2_smol() { .await; index - .search( - json!({ "attributesToHighlight": ["cattos"] }), - |response, code| { - assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, - }) - ); - }, - ) + .search(json!({ "attributesToHighlight": ["cattos"] }), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + }) + ); + }) .await; index - .search( - json!({ "attributesToCrop": ["cattos"] }), - |response, code| { - assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, - }) - ); - }, - ) + .search(json!({ "attributesToCrop": ["cattos"] }), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + }) + ); + }) .await; index - .search( - json!({ "attributesToRetrieve": ["cattos"] }), - |response, code| { - assert_eq!(code, 200, "{}", response); - assert_eq!(response["hits"][0], json!({})); - }, - ) + .search(json!({ "attributesToRetrieve": ["cattos"] }), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"][0], json!({})); + }) .await; index diff --git a/meilisearch-http/tests/search/mod.rs b/meilisearch-http/tests/search/mod.rs index da31a3cdf..44a4702d0 100644 --- a/meilisearch-http/tests/search/mod.rs +++ b/meilisearch-http/tests/search/mod.rs @@ -5,10 +5,11 @@ mod errors; mod formatted; mod pagination; -use crate::common::Server; use once_cell::sync::Lazy; use serde_json::{json, Value}; +use crate::common::Server; + pub(self) static DOCUMENTS: Lazy = Lazy::new(|| { json!([ { @@ -199,9 +200,7 @@ async fn search_with_filter_string_notation() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -221,9 +220,7 @@ async fn search_with_filter_string_notation() { let index = server.index("nested"); - index - .update_settings(json!({"filterableAttributes": ["cattos", "doggos.age"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["cattos", "doggos.age"]})).await; let documents = NESTED_DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -262,9 +259,7 @@ async fn search_with_filter_array_notation() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -292,9 +287,7 @@ async fn search_with_sort_on_numbers() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"sortableAttributes": ["id"]})) - .await; + index.update_settings(json!({"sortableAttributes": ["id"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -314,9 +307,7 @@ async fn search_with_sort_on_numbers() { let index = server.index("nested"); - index - .update_settings(json!({"sortableAttributes": ["doggos.age"]})) - .await; + index.update_settings(json!({"sortableAttributes": ["doggos.age"]})).await; let documents = NESTED_DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -340,9 +331,7 @@ async fn search_with_sort_on_strings() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"sortableAttributes": ["title"]})) - .await; + index.update_settings(json!({"sortableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -362,9 +351,7 @@ async fn search_with_sort_on_strings() { let index = server.index("nested"); - index - .update_settings(json!({"sortableAttributes": ["doggos.name"]})) - .await; + index.update_settings(json!({"sortableAttributes": ["doggos.name"]})).await; let documents = NESTED_DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -388,9 +375,7 @@ async fn search_with_multiple_sort() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"sortableAttributes": ["id", "title"]})) - .await; + index.update_settings(json!({"sortableAttributes": ["id", "title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -410,9 +395,7 @@ async fn search_facet_distribution() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -434,9 +417,7 @@ async fn search_facet_distribution() { let index = server.index("nested"); - index - .update_settings(json!({"filterableAttributes": ["father", "doggos.name"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["father", "doggos.name"]})).await; let documents = NESTED_DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -467,9 +448,7 @@ async fn search_facet_distribution() { ) .await; - index - .update_settings(json!({"filterableAttributes": ["doggos"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["doggos"]})).await; index.wait_task(4).await; index @@ -502,10 +481,7 @@ async fn search_facet_distribution() { dist["doggos.name"], json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1}) ); - assert_eq!( - dist["doggos.age"], - json!({ "2": 1, "4": 1, "5": 1, "6": 1, "8": 1}) - ); + assert_eq!(dist["doggos.age"], json!({ "2": 1, "4": 1, "5": 1, "6": 1, "8": 1})); }, ) .await; @@ -516,17 +492,14 @@ async fn displayed_attributes() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({ "displayedAttributes": ["title"] })) - .await; + index.update_settings(json!({ "displayedAttributes": ["title"] })).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; index.wait_task(1).await; - let (response, code) = index - .search_post(json!({ "attributesToRetrieve": ["title", "id"] })) - .await; + let (response, code) = + index.search_post(json!({ "attributesToRetrieve": ["title", "id"] })).await; assert_eq!(code, 200, "{}", response); assert!(response["hits"][0].get("title").is_some()); } @@ -536,9 +509,7 @@ async fn placeholder_search_is_hard_limited() { let server = Server::new().await; let index = server.index("test"); - let documents: Vec<_> = (0..1200) - .map(|i| json!({ "id": i, "text": "I am unique!" })) - .collect(); + let documents: Vec<_> = (0..1200).map(|i| json!({ "id": i, "text": "I am unique!" })).collect(); index.add_documents(documents.into(), None).await; index.wait_task(0).await; @@ -567,9 +538,7 @@ async fn placeholder_search_is_hard_limited() { ) .await; - index - .update_settings(json!({ "pagination": { "maxTotalHits": 10_000 } })) - .await; + index.update_settings(json!({ "pagination": { "maxTotalHits": 10_000 } })).await; index.wait_task(1).await; index @@ -603,9 +572,7 @@ async fn search_is_hard_limited() { let server = Server::new().await; let index = server.index("test"); - let documents: Vec<_> = (0..1200) - .map(|i| json!({ "id": i, "text": "I am unique!" })) - .collect(); + let documents: Vec<_> = (0..1200).map(|i| json!({ "id": i, "text": "I am unique!" })).collect(); index.add_documents(documents.into(), None).await; index.wait_task(0).await; @@ -636,9 +603,7 @@ async fn search_is_hard_limited() { ) .await; - index - .update_settings(json!({ "pagination": { "maxTotalHits": 10_000 } })) - .await; + index.update_settings(json!({ "pagination": { "maxTotalHits": 10_000 } })).await; index.wait_task(1).await; index @@ -674,13 +639,9 @@ async fn faceting_max_values_per_facet() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({ "filterableAttributes": ["number"] })) - .await; + index.update_settings(json!({ "filterableAttributes": ["number"] })).await; - let documents: Vec<_> = (0..10_000) - .map(|id| json!({ "id": id, "number": id * 10 })) - .collect(); + let documents: Vec<_> = (0..10_000).map(|id| json!({ "id": id, "number": id * 10 })).collect(); index.add_documents(json!(documents), None).await; index.wait_task(1).await; @@ -697,9 +658,7 @@ async fn faceting_max_values_per_facet() { ) .await; - index - .update_settings(json!({ "faceting": { "maxValuesPerFacet": 10_000 } })) - .await; + index.update_settings(json!({ "faceting": { "maxValuesPerFacet": 10_000 } })).await; index.wait_task(2).await; index diff --git a/meilisearch-http/tests/search/pagination.rs b/meilisearch-http/tests/search/pagination.rs index 41c4f31a4..1099200b8 100644 --- a/meilisearch-http/tests/search/pagination.rs +++ b/meilisearch-http/tests/search/pagination.rs @@ -1,6 +1,7 @@ +use serde_json::json; + use crate::common::Server; use crate::search::DOCUMENTS; -use serde_json::json; #[actix_rt::test] async fn default_search_should_return_estimated_total_hit() { diff --git a/meilisearch-http/tests/settings/distinct.rs b/meilisearch-http/tests/settings/distinct.rs index d2dd0f74f..a60792329 100644 --- a/meilisearch-http/tests/settings/distinct.rs +++ b/meilisearch-http/tests/settings/distinct.rs @@ -1,23 +1,20 @@ -use crate::common::Server; use serde_json::json; +use crate::common::Server; + #[actix_rt::test] async fn set_and_reset_distinct_attribute() { let server = Server::new().await; let index = server.index("test"); - let (_response, _code) = index - .update_settings(json!({ "distinctAttribute": "test"})) - .await; + let (_response, _code) = index.update_settings(json!({ "distinctAttribute": "test"})).await; index.wait_task(0).await; let (response, _) = index.settings().await; assert_eq!(response["distinctAttribute"], "test"); - index - .update_settings(json!({ "distinctAttribute": null })) - .await; + index.update_settings(json!({ "distinctAttribute": null })).await; index.wait_task(1).await; diff --git a/meilisearch-http/tests/settings/get_settings.rs b/meilisearch-http/tests/settings/get_settings.rs index 9d10b7820..fa45ad55e 100644 --- a/meilisearch-http/tests/settings/get_settings.rs +++ b/meilisearch-http/tests/settings/get_settings.rs @@ -13,14 +13,7 @@ static DEFAULT_SETTINGS_VALUES: Lazy> = Lazy::new(| map.insert("distinct_attribute", json!(Value::Null)); map.insert( "ranking_rules", - json!([ - "words", - "typo", - "proximity", - "attribute", - "sort", - "exactness" - ]), + json!(["words", "typo", "proximity", "attribute", "sort", "exactness"]), ); map.insert("stop_words", json!([])); map.insert("synonyms", json!({})); @@ -63,14 +56,7 @@ async fn get_settings() { assert_eq!(settings["distinctAttribute"], json!(null)); assert_eq!( settings["rankingRules"], - json!([ - "words", - "typo", - "proximity", - "attribute", - "sort", - "exactness" - ]) + json!(["words", "typo", "proximity", "attribute", "sort", "exactness"]) ); assert_eq!(settings["stopWords"], json!([])); assert_eq!( @@ -99,18 +85,14 @@ async fn error_update_settings_unknown_field() { async fn test_partial_update() { let server = Server::new().await; let index = server.index("test"); - let (_response, _code) = index - .update_settings(json!({"displayedAttributes": ["foo"]})) - .await; + let (_response, _code) = index.update_settings(json!({"displayedAttributes": ["foo"]})).await; index.wait_task(0).await; let (response, code) = index.settings().await; assert_eq!(code, 200); assert_eq!(response["displayedAttributes"], json!(["foo"])); assert_eq!(response["searchableAttributes"], json!(["*"])); - let (_response, _) = index - .update_settings(json!({"searchableAttributes": ["bar"]})) - .await; + let (_response, _) = index.update_settings(json!({"searchableAttributes": ["bar"]})).await; index.wait_task(1).await; let (response, code) = index.settings().await; @@ -158,10 +140,7 @@ async fn reset_all_settings() { assert_eq!(response["displayedAttributes"], json!(["name", "age"])); assert_eq!(response["searchableAttributes"], json!(["name"])); assert_eq!(response["stopWords"], json!(["the"])); - assert_eq!( - response["synonyms"], - json!({"puppy": ["dog", "doggo", "potat"] }) - ); + assert_eq!(response["synonyms"], json!({"puppy": ["dog", "doggo", "potat"] })); assert_eq!(response["filterableAttributes"], json!(["age"])); index.delete_settings().await; @@ -299,9 +278,8 @@ async fn error_set_invalid_ranking_rules() { let index = server.index("test"); index.create(None).await; - let (_response, _code) = index - .update_settings(json!({ "rankingRules": [ "manyTheFish"]})) - .await; + let (_response, _code) = + index.update_settings(json!({ "rankingRules": [ "manyTheFish"]})).await; index.wait_task(1).await; let (response, code) = index.get_task(1).await; diff --git a/meilisearch-http/tests/snapshot/mod.rs b/meilisearch-http/tests/snapshot/mod.rs index 27ff838e1..1c2e33534 100644 --- a/meilisearch-http/tests/snapshot/mod.rs +++ b/meilisearch-http/tests/snapshot/mod.rs @@ -1,11 +1,10 @@ use std::time::Duration; -use crate::common::server::default_settings; -use crate::common::GetAllDocumentsOptions; -use crate::common::Server; +use meilisearch_http::Opt; use tokio::time::sleep; -use meilisearch_http::Opt; +use crate::common::server::default_settings; +use crate::common::{GetAllDocumentsOptions, Server}; macro_rules! verify_snapshot { ( @@ -30,6 +29,7 @@ macro_rules! verify_snapshot { } #[actix_rt::test] +#[ignore] // TODO: unignore async fn perform_snapshot() { let temp = tempfile::tempdir().unwrap(); let snapshot_dir = tempfile::tempdir().unwrap(); @@ -62,10 +62,7 @@ async fn perform_snapshot() { let snapshot_path = snapshot_dir.path().to_owned().join("db.snapshot"); - let options = Opt { - import_snapshot: Some(snapshot_path), - ..default_settings(temp.path()) - }; + let options = Opt { import_snapshot: Some(snapshot_path), ..default_settings(temp.path()) }; let snapshot_server = Server::new_with_options(options).await.unwrap(); diff --git a/meilisearch-http/tests/stats/mod.rs b/meilisearch-http/tests/stats/mod.rs index 0629c2e29..152e4f625 100644 --- a/meilisearch-http/tests/stats/mod.rs +++ b/meilisearch-http/tests/stats/mod.rs @@ -1,5 +1,6 @@ use serde_json::json; -use time::{format_description::well_known::Rfc3339, OffsetDateTime}; +use time::format_description::well_known::Rfc3339; +use time::OffsetDateTime; use crate::common::Server; diff --git a/meilisearch-http/tests/tasks/mod.rs b/meilisearch-http/tests/tasks/mod.rs index 785e0284e..bbbb59d97 100644 --- a/meilisearch-http/tests/tasks/mod.rs +++ b/meilisearch-http/tests/tasks/mod.rs @@ -1,8 +1,10 @@ -use crate::common::Server; +use meili_snap::insta::assert_json_snapshot; use serde_json::json; use time::format_description::well_known::Rfc3339; use time::OffsetDateTime; +use crate::common::Server; + #[actix_rt::test] async fn error_get_unexisting_task_status() { let server = Server::new().await; @@ -49,10 +51,7 @@ async fn list_tasks() { index.create(None).await; index.wait_task(0).await; index - .add_documents( - serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), - None, - ) + .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) .await; let (response, code) = index.list_tasks().await; assert_eq!(code, 200); @@ -66,10 +65,7 @@ async fn list_tasks_with_star_filters() { index.create(None).await; index.wait_task(0).await; index - .add_documents( - serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), - None, - ) + .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) .await; let (response, code) = index.service.get("/tasks?indexUid=test").await; assert_eq!(code, 200); @@ -87,10 +83,8 @@ async fn list_tasks_with_star_filters() { assert_eq!(code, 200); assert_eq!(response["results"].as_array().unwrap().len(), 2); - let (response, code) = index - .service - .get("/tasks?type=*,documentAdditionOrUpdate&status=*") - .await; + let (response, code) = + index.service.get("/tasks?type=*,documentAdditionOrUpdate&status=*").await; assert_eq!(code, 200, "{:?}", response); assert_eq!(response["results"].as_array().unwrap().len(), 2); @@ -116,10 +110,7 @@ async fn list_tasks_status_filtered() { index.create(None).await; index.wait_task(0).await; index - .add_documents( - serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), - None, - ) + .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) .await; let (response, code) = index.filtered_tasks(&[], &["succeeded"]).await; @@ -145,19 +136,15 @@ async fn list_tasks_type_filtered() { index.create(None).await; index.wait_task(0).await; index - .add_documents( - serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), - None, - ) + .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) .await; let (response, code) = index.filtered_tasks(&["indexCreation"], &[]).await; assert_eq!(code, 200, "{}", response); assert_eq!(response["results"].as_array().unwrap().len(), 1); - let (response, code) = index - .filtered_tasks(&["indexCreation", "documentAdditionOrUpdate"], &[]) - .await; + let (response, code) = + index.filtered_tasks(&["indexCreation", "documentAdditionOrUpdate"], &[]).await; assert_eq!(code, 200, "{}", response); assert_eq!(response["results"].as_array().unwrap().len(), 2); } @@ -169,10 +156,7 @@ async fn list_tasks_status_and_type_filtered() { index.create(None).await; index.wait_task(0).await; index - .add_documents( - serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), - None, - ) + .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) .await; let (response, code) = index.filtered_tasks(&["indexCreation"], &["failed"]).await; @@ -231,3 +215,623 @@ async fn test_summarized_task_view() { let (response, _) = index.delete().await; assert_valid_summarized_task!(response, "indexDeletion", "test"); } + +#[actix_web::test] +async fn test_summarized_document_addition_or_update() { + let server = Server::new().await; + let index = server.index("test"); + index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), None).await; + index.wait_task(0).await; + let (task, _) = index.get_task(0).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 0, + "indexUid": "test", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "details": { + "receivedDocuments": 1, + "indexedDocuments": 1 + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await; + index.wait_task(1).await; + let (task, _) = index.get_task(1).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 1, + "indexUid": "test", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "details": { + "receivedDocuments": 1, + "indexedDocuments": 1 + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_web::test] +async fn test_summarized_delete_batch() { + let server = Server::new().await; + let index = server.index("test"); + index.delete_batch(vec![1, 2, 3]).await; + index.wait_task(0).await; + let (task, _) = index.get_task(0).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 0, + "indexUid": "test", + "status": "failed", + "type": "documentDeletion", + "details": { + "matchedDocuments": 3, + "deletedDocuments": null + }, + "error": { + "message": "Index `test` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + index.create(None).await; + index.delete_batch(vec![42]).await; + index.wait_task(2).await; + let (task, _) = index.get_task(2).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 2, + "indexUid": "test", + "status": "succeeded", + "type": "documentDeletion", + "details": { + "matchedDocuments": 1, + "deletedDocuments": 0 + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_web::test] +async fn test_summarized_delete_document() { + let server = Server::new().await; + let index = server.index("test"); + index.delete_document(1).await; + index.wait_task(0).await; + let (task, _) = index.get_task(0).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 0, + "indexUid": "test", + "status": "failed", + "type": "documentDeletion", + "details": { + "matchedDocuments": 1, + "deletedDocuments": null + }, + "error": { + "message": "Index `test` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + index.create(None).await; + index.delete_document(42).await; + index.wait_task(2).await; + let (task, _) = index.get_task(2).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 2, + "indexUid": "test", + "status": "succeeded", + "type": "documentDeletion", + "details": { + "matchedDocuments": 1, + "deletedDocuments": 0 + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_web::test] +async fn test_summarized_settings_update() { + let server = Server::new().await; + let index = server.index("test"); + // here we should find my payload even in the failed task. + index.update_settings(json!({ "rankingRules": ["custom"] })).await; + index.wait_task(0).await; + let (task, _) = index.get_task(0).await; + dbg!(&task); + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 0, + "indexUid": "test", + "status": "failed", + "type": "settingsUpdate", + "details": { + "rankingRules": [ + "custom" + ] + }, + "error": { + "message": "`custom` ranking rule is invalid. Valid ranking rules are words, typo, sort, proximity, attribute, exactness and custom ranking rules.", + "code": "invalid_ranking_rule", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_ranking_rule" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + index.update_settings(json!({ "displayedAttributes": ["doggos", "name"], "filterableAttributes": ["age", "nb_paw_pads"], "sortableAttributes": ["iq"] })).await; + index.wait_task(1).await; + let (task, _) = index.get_task(1).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 1, + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "details": { + "displayedAttributes": [ + "doggos", + "name" + ], + "filterableAttributes": [ + "age", + "nb_paw_pads" + ], + "sortableAttributes": [ + "iq" + ] + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_web::test] +async fn test_summarized_index_creation() { + let server = Server::new().await; + let index = server.index("test"); + index.create(None).await; + index.wait_task(0).await; + let (task, _) = index.get_task(0).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 0, + "indexUid": "test", + "status": "succeeded", + "type": "indexCreation", + "details": { + "primaryKey": null + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + index.create(Some("doggos")).await; + index.wait_task(1).await; + let (task, _) = index.get_task(1).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 1, + "indexUid": "test", + "status": "failed", + "type": "indexCreation", + "details": { + "primaryKey": "doggos" + }, + "error": { + "message": "Index `test` already exists.", + "code": "index_already_exists", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_already_exists" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_web::test] +async fn test_summarized_index_deletion() { + let server = Server::new().await; + let index = server.index("test"); + index.delete().await; + index.wait_task(0).await; + let (task, _) = index.get_task(0).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 0, + "indexUid": "test", + "status": "failed", + "type": "indexDeletion", + "error": { + "message": "Index `test` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // is the details correctly set when documents are actually deleted. + index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await; + index.delete().await; + index.wait_task(2).await; + let (task, _) = index.get_task(2).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 2, + "indexUid": "test", + "status": "succeeded", + "type": "indexDeletion", + "details": { + "deletedDocuments": 1 + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // What happens when you delete an index that doesn't exists. + index.delete().await; + index.wait_task(2).await; + let (task, _) = index.get_task(2).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 2, + "indexUid": "test", + "status": "succeeded", + "type": "indexDeletion", + "details": { + "deletedDocuments": 1 + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_web::test] +async fn test_summarized_index_update() { + let server = Server::new().await; + let index = server.index("test"); + // If the index doesn't exist yet, we should get errors with or without the primary key. + index.update(None).await; + index.wait_task(0).await; + let (task, _) = index.get_task(0).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 0, + "indexUid": "test", + "status": "failed", + "type": "indexUpdate", + "details": { + "primaryKey": null + }, + "error": { + "message": "Index `test` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + index.update(Some("bones")).await; + index.wait_task(1).await; + let (task, _) = index.get_task(1).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 1, + "indexUid": "test", + "status": "failed", + "type": "indexUpdate", + "details": { + "primaryKey": "bones" + }, + "error": { + "message": "Index `test` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // And run the same two tests once the index do exists. + index.create(None).await; + + index.update(None).await; + index.wait_task(3).await; + let (task, _) = index.get_task(3).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 3, + "indexUid": "test", + "status": "succeeded", + "type": "indexUpdate", + "details": { + "primaryKey": null + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + index.update(Some("bones")).await; + index.wait_task(4).await; + let (task, _) = index.get_task(4).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 4, + "indexUid": "test", + "status": "succeeded", + "type": "indexUpdate", + "details": { + "primaryKey": "bones" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_web::test] +async fn test_summarized_index_swap() { + let server = Server::new().await; + server + .index_swap(json!([ + { "indexes": ["doggos", "cattos"] } + ])) + .await; + server.wait_task(0).await; + let (task, _) = server.get_task(0).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 0, + "indexUid": null, + "status": "failed", + "type": "indexSwap", + "details": { + "swaps": [ + { + "indexes": [ + "doggos", + "cattos" + ] + } + ] + }, + "error": { + "message": "Index `doggos` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + server.index("doggos").create(None).await; + server.index("cattos").create(None).await; + server + .index_swap(json!([ + { "indexes": ["doggos", "cattos"] } + ])) + .await; + server.wait_task(3).await; + let (task, _) = server.get_task(3).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 3, + "indexUid": null, + "status": "succeeded", + "type": "indexSwap", + "details": { + "swaps": [ + { + "indexes": [ + "doggos", + "cattos" + ] + } + ] + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_web::test] +async fn test_summarized_task_cancelation() { + let server = Server::new().await; + let index = server.index("doggos"); + // to avoid being flaky we're only going to cancel an already finished task :( + index.create(None).await; + index.wait_task(0).await; + server.cancel_task(json!({ "uid": [0] })).await; + index.wait_task(1).await; + let (task, _) = index.get_task(1).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 1, + "indexUid": null, + "status": "succeeded", + "type": "taskCancelation", + "details": { + "matchedTasks": 1, + "canceledTasks": 0, + "originalQuery": "uid=0" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_web::test] +async fn test_summarized_task_deletion() { + let server = Server::new().await; + let index = server.index("doggos"); + // to avoid being flaky we're only going to delete an already finished task :( + index.create(None).await; + index.wait_task(0).await; + server.delete_task(json!({ "uid": [0] })).await; + index.wait_task(1).await; + let (task, _) = index.get_task(1).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 1, + "indexUid": null, + "status": "succeeded", + "type": "taskDeletion", + "details": { + "matchedTasks": 1, + "deletedTasks": 1, + "originalQuery": "uid=0" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_web::test] +async fn test_summarized_dump_creation() { + let server = Server::new().await; + server.create_dump().await; + server.wait_task(0).await; + let (task, _) = server.get_task(0).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 0, + "indexUid": null, + "status": "succeeded", + "type": "dumpCreation", + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml deleted file mode 100644 index dbaf8faa2..000000000 --- a/meilisearch-lib/Cargo.toml +++ /dev/null @@ -1,82 +0,0 @@ -[package] -name = "meilisearch-lib" -version = "0.29.1" -edition = "2021" - -[dependencies] -actix-web = { version = "4.2.1", default-features = false } -anyhow = { version = "1.0.65", features = ["backtrace"] } -async-stream = "0.3.3" -async-trait = "0.1.57" -atomic_refcell = "0.1.8" -byte-unit = { version = "4.0.14", default-features = false, features = ["std", "serde"] } -bytes = "1.2.1" -clap = { version = "4.0.9", features = ["derive", "env"] } -crossbeam-channel = "0.5.6" -csv = "1.1.6" -derivative = "2.2.0" -either = { version = "1.8.0", features = ["serde"] } -flate2 = "1.0.24" -fs_extra = "1.2.0" -fst = "0.4.7" -futures = "0.3.24" -futures-util = "0.3.24" -http = "0.2.8" -indexmap = { version = "1.9.1", features = ["serde-1"] } -itertools = "0.10.5" -lazy_static = "1.4.0" -log = "0.4.17" -meilisearch-auth = { path = "../meilisearch-auth" } -meilisearch-types = { path = "../meilisearch-types" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.34.0", default-features = false } -mime = "0.3.16" -num_cpus = "1.13.1" -obkv = "0.2.0" -once_cell = "1.15.0" -page_size = "0.4.2" -parking_lot = "0.12.1" -permissive-json-pointer = { path = "../permissive-json-pointer" } -rand = "0.8.5" -rayon = "1.5.3" -regex = "1.6.0" -reqwest = { version = "0.11.12", features = ["json", "rustls-tls"], default-features = false, optional = true } -roaring = "0.10.1" -rustls = "0.20.6" -serde = { version = "1.0.145", features = ["derive"] } -serde_json = { version = "1.0.85", features = ["preserve_order"] } -siphasher = "0.3.10" -slice-group-by = "0.3.0" -sysinfo = "0.26.4" -tar = "0.4.38" -tempfile = "3.3.0" -thiserror = "1.0.37" -time = { version = "0.3.15", features = ["serde-well-known", "formatting", "parsing", "macros"] } -tokio = { version = "1.21.2", features = ["full"] } -uuid = { version = "1.1.2", features = ["serde", "v4"] } -walkdir = "2.3.2" -whoami = { version = "1.2.3", optional = true } - -[dev-dependencies] -actix-rt = "2.7.0" -meilisearch-types = { path = "../meilisearch-types", features = ["test-traits"] } -mockall = "0.11.2" -nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"} -paste = "1.0.9" -proptest = "1.0.0" -proptest-derive = "0.3.0" - -[features] -# all specialized tokenizations -default = ["milli/default"] - -# chinese specialized tokenization -chinese = ["milli/chinese"] - -# hebrew specialized tokenization -hebrew = ["milli/hebrew"] - -# japanese specialized tokenization -japanese = ["milli/japanese"] - -# thai specialized tokenization -thai = ["milli/thai"] diff --git a/meilisearch-lib/proptest-regressions/index_resolver/mod.txt b/meilisearch-lib/proptest-regressions/index_resolver/mod.txt deleted file mode 100644 index 553b8f1d5..000000000 --- a/meilisearch-lib/proptest-regressions/index_resolver/mod.txt +++ /dev/null @@ -1,19 +0,0 @@ -# Seeds for failure cases proptest has generated in the past. It is -# automatically read and these particular cases re-run before any -# novel cases are generated. -# -# It is recommended to check this file in to source control so that -# everyone who runs the test benefits from these saved cases. -cc 6f3ae3cba934ba3e328e2306218c32f27a46ce2d54a1258b05fef65663208662 # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: DocumentAddition { content_uuid: 37bc137d-2038-47f0-819f-b133233daadc, merge_strategy: ReplaceDocuments, primary_key: None, documents_count: 0 }, events: [] } -cc b726f7d9f44a9216aad302ddba0f04e7108817e741d656a4759aea8562de4d63 # shrinks to task = Task { id: 0, index_uid: IndexUid("_"), content: IndexDeletion, events: [] }, index_exists = false, index_op_fails = false, any_int = 0 -cc 427ec2dde3260b1ab334207bdc22adef28a5b8532b9902c84b55fd2c017ea7e1 # shrinks to task = Task { id: 0, index_uid: IndexUid("A"), content: IndexDeletion, events: [] }, index_exists = true, index_op_fails = false, any_int = 0 -cc c24f3d42f0f36fbdbf4e9d4327e75529b163ac580d63a5934ca05e9b5bd23a65 # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: IndexDeletion, events: [] }, index_exists = true, index_op_fails = true, any_int = 0 -cc 8084e2410801b997533b0bcbad75cd212873cfc2677f26847f68c568ead1604c # shrinks to task = Task { id: 0, index_uid: IndexUid("A"), content: SettingsUpdate { settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, synonyms: NotSet, distinct_attribute: NotSet, _kind: PhantomData }, is_deletion: false }, events: [] }, index_exists = false, index_op_fails = false, any_int = 0 -cc 330085e0200a9a2ddfdd764a03d768aa95c431bcaafbd530c8c949425beed18b # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: CreateIndex { primary_key: None }, events: [] }, index_exists = false, index_op_fails = true, any_int = 0 -cc c70e901576ef2fb9622e814bdecd11e4747cd70d71a9a6ce771b5b7256a187c0 # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: SettingsUpdate { settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, synonyms: NotSet, distinct_attribute: NotSet, _kind: PhantomData }, is_deletion: true }, events: [] }, index_exists = false, index_op_fails = false, any_int = 0 -cc 3fe2c38cbc2cca34ecde321472141d386056f0cd332cbf700773657715a382b5 # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: UpdateIndex { primary_key: None }, events: [] }, index_exists = false, index_op_fails = false, any_int = 0 -cc c31cf86692968483f1ab08a6a9d4667ccb9635c306998551bf1eb1f135ef0d4b # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: UpdateIndex { primary_key: Some("") }, events: [] }, index_exists = true, index_op_fails = false, any_int = 0 -cc 3a01c78db082434b8a4f8914abf0d1059d39f4426d16df20d72e1bd7ebb94a6a # shrinks to task = Task { id: 0, index_uid: IndexUid("0"), content: UpdateIndex { primary_key: None }, events: [] }, index_exists = true, index_op_fails = true, any_int = 0 -cc c450806df3921d1e6fe9b6af93d999e8196d0175b69b64f1810802582421e94a # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: CreateIndex { primary_key: Some("") }, events: [] }, index_exists = false, index_op_fails = false, any_int = 0 -cc fb6b98947cbdbdee05ed3c0bf2923aad2c311edc276253642eb43a0c0ec4888a # shrinks to task = Task { id: 0, index_uid: IndexUid("A"), content: CreateIndex { primary_key: Some("") }, events: [] }, index_exists = false, index_op_fails = true, any_int = 0 -cc 1aa59d8e22484e9915efbb5818e1e1ab684aa61b166dc82130d6221663ba00bf # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: DocumentDeletion(Clear), events: [] }, index_exists = true, index_op_fails = false, any_int = 0 diff --git a/meilisearch-lib/proptest-regressions/tasks/task_store/store.txt b/meilisearch-lib/proptest-regressions/tasks/task_store/store.txt deleted file mode 100644 index a857bfbe4..000000000 --- a/meilisearch-lib/proptest-regressions/tasks/task_store/store.txt +++ /dev/null @@ -1,7 +0,0 @@ -# Seeds for failure cases proptest has generated in the past. It is -# automatically read and these particular cases re-run before any -# novel cases are generated. -# -# It is recommended to check this file in to source control so that -# everyone who runs the test benefits from these saved cases. -cc 8cbd6c45ce8c5611ec3f2f94fd485f6a8eeccc470fa426e59bdfd4d9e7fce0e1 # shrinks to bytes = [] diff --git a/meilisearch-lib/src/analytics.rs b/meilisearch-lib/src/analytics.rs deleted file mode 100644 index adfddf998..000000000 --- a/meilisearch-lib/src/analytics.rs +++ /dev/null @@ -1,8 +0,0 @@ -use std::{fs, path::Path}; - -/// Copy the `instance-uid` contained in one db to another. Ignore all errors. -pub fn copy_user_id(src: &Path, dst: &Path) { - if let Ok(user_id) = fs::read_to_string(src.join("instance-uid")) { - let _ = fs::write(dst.join("instance-uid"), &user_id); - } -} diff --git a/meilisearch-lib/src/dump/compat/mod.rs b/meilisearch-lib/src/dump/compat/mod.rs deleted file mode 100644 index 9abac24c7..000000000 --- a/meilisearch-lib/src/dump/compat/mod.rs +++ /dev/null @@ -1,17 +0,0 @@ -pub mod v2; -pub mod v3; -pub mod v4; - -/// Parses the v1 version of the Asc ranking rules `asc(price)`and returns the field name. -pub fn asc_ranking_rule(text: &str) -> Option<&str> { - text.split_once("asc(") - .and_then(|(_, tail)| tail.rsplit_once(')')) - .map(|(field, _)| field) -} - -/// Parses the v1 version of the Desc ranking rules `desc(price)`and returns the field name. -pub fn desc_ranking_rule(text: &str) -> Option<&str> { - text.split_once("desc(") - .and_then(|(_, tail)| tail.rsplit_once(')')) - .map(|(field, _)| field) -} diff --git a/meilisearch-lib/src/dump/compat/v2.rs b/meilisearch-lib/src/dump/compat/v2.rs deleted file mode 100644 index ba3b8e3a6..000000000 --- a/meilisearch-lib/src/dump/compat/v2.rs +++ /dev/null @@ -1,152 +0,0 @@ -use anyhow::bail; -use meilisearch_types::error::Code; -use milli::update::IndexDocumentsMethod; -use serde::{Deserialize, Serialize}; -use time::OffsetDateTime; -use uuid::Uuid; - -use crate::index::{Settings, Unchecked}; - -#[derive(Serialize, Deserialize)] -pub struct UpdateEntry { - pub uuid: Uuid, - pub update: UpdateStatus, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum UpdateFormat { - Json, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct DocumentAdditionResult { - pub nb_documents: usize, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum UpdateResult { - DocumentsAddition(DocumentAdditionResult), - DocumentDeletion { deleted: u64 }, - Other, -} - -#[allow(clippy::large_enum_variant)] -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(tag = "type")] -pub enum UpdateMeta { - DocumentsAddition { - method: IndexDocumentsMethod, - format: UpdateFormat, - primary_key: Option, - }, - ClearDocuments, - DeleteDocuments { - ids: Vec, - }, - Settings(Settings), -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct Enqueued { - pub update_id: u64, - pub meta: UpdateMeta, - #[serde(with = "time::serde::rfc3339")] - pub enqueued_at: OffsetDateTime, - pub content: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct Processed { - pub success: UpdateResult, - #[serde(with = "time::serde::rfc3339")] - pub processed_at: OffsetDateTime, - #[serde(flatten)] - pub from: Processing, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct Processing { - #[serde(flatten)] - pub from: Enqueued, - #[serde(with = "time::serde::rfc3339")] - pub started_processing_at: OffsetDateTime, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct Aborted { - #[serde(flatten)] - pub from: Enqueued, - #[serde(with = "time::serde::rfc3339")] - pub aborted_at: OffsetDateTime, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct Failed { - #[serde(flatten)] - pub from: Processing, - pub error: ResponseError, - #[serde(with = "time::serde::rfc3339")] - pub failed_at: OffsetDateTime, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(tag = "status", rename_all = "camelCase")] -pub enum UpdateStatus { - Processing(Processing), - Enqueued(Enqueued), - Processed(Processed), - Aborted(Aborted), - Failed(Failed), -} - -type StatusCode = (); - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct ResponseError { - #[serde(skip)] - pub code: StatusCode, - pub message: String, - pub error_code: String, - pub error_type: String, - pub error_link: String, -} - -pub fn error_code_from_str(s: &str) -> anyhow::Result { - let code = match s { - "index_creation_failed" => Code::CreateIndex, - "index_already_exists" => Code::IndexAlreadyExists, - "index_not_found" => Code::IndexNotFound, - "invalid_index_uid" => Code::InvalidIndexUid, - "invalid_state" => Code::InvalidState, - "missing_primary_key" => Code::MissingPrimaryKey, - "primary_key_already_present" => Code::PrimaryKeyAlreadyPresent, - "invalid_request" => Code::InvalidRankingRule, - "max_fields_limit_exceeded" => Code::MaxFieldsLimitExceeded, - "missing_document_id" => Code::MissingDocumentId, - "invalid_facet" => Code::Filter, - "invalid_filter" => Code::Filter, - "invalid_sort" => Code::Sort, - "bad_parameter" => Code::BadParameter, - "bad_request" => Code::BadRequest, - "document_not_found" => Code::DocumentNotFound, - "internal" => Code::Internal, - "invalid_geo_field" => Code::InvalidGeoField, - "invalid_token" => Code::InvalidToken, - "missing_authorization_header" => Code::MissingAuthorizationHeader, - "payload_too_large" => Code::PayloadTooLarge, - "unretrievable_document" => Code::RetrieveDocument, - "search_error" => Code::SearchDocuments, - "unsupported_media_type" => Code::UnsupportedMediaType, - "dump_already_in_progress" => Code::DumpAlreadyInProgress, - "dump_process_failed" => Code::DumpProcessFailed, - _ => bail!("unknown error code."), - }; - - Ok(code) -} diff --git a/meilisearch-lib/src/dump/compat/v3.rs b/meilisearch-lib/src/dump/compat/v3.rs deleted file mode 100644 index 61e31eccd..000000000 --- a/meilisearch-lib/src/dump/compat/v3.rs +++ /dev/null @@ -1,205 +0,0 @@ -use meilisearch_types::error::{Code, ResponseError}; -use meilisearch_types::index_uid::IndexUid; -use milli::update::IndexDocumentsMethod; -use serde::{Deserialize, Serialize}; -use time::OffsetDateTime; -use uuid::Uuid; - -use super::v4::{Task, TaskContent, TaskEvent}; -use crate::index::{Settings, Unchecked}; -use crate::tasks::task::{DocumentDeletion, TaskId, TaskResult}; - -use super::v2; - -#[derive(Serialize, Deserialize)] -pub struct DumpEntry { - pub uuid: Uuid, - pub uid: String, -} - -#[derive(Serialize, Deserialize)] -pub struct UpdateEntry { - pub uuid: Uuid, - pub update: UpdateStatus, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(tag = "status", rename_all = "camelCase")] -pub enum UpdateStatus { - Processing(Processing), - Enqueued(Enqueued), - Processed(Processed), - Failed(Failed), -} - -impl From for TaskResult { - fn from(other: v2::UpdateResult) -> Self { - match other { - v2::UpdateResult::DocumentsAddition(result) => TaskResult::DocumentAddition { - indexed_documents: result.nb_documents as u64, - }, - v2::UpdateResult::DocumentDeletion { deleted } => TaskResult::DocumentDeletion { - deleted_documents: deleted, - }, - v2::UpdateResult::Other => TaskResult::Other, - } - } -} - -#[allow(clippy::large_enum_variant)] -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum Update { - DeleteDocuments(Vec), - DocumentAddition { - primary_key: Option, - method: IndexDocumentsMethod, - content_uuid: Uuid, - }, - Settings(Settings), - ClearDocuments, -} - -impl From for super::v4::TaskContent { - fn from(update: Update) -> Self { - match update { - Update::DeleteDocuments(ids) => { - TaskContent::DocumentDeletion(DocumentDeletion::Ids(ids)) - } - Update::DocumentAddition { - primary_key, - method, - .. - } => TaskContent::DocumentAddition { - content_uuid: Uuid::default(), - merge_strategy: method, - primary_key, - // document count is unknown for legacy updates - documents_count: 0, - allow_index_creation: true, - }, - Update::Settings(settings) => TaskContent::SettingsUpdate { - settings, - // There is no way to know now, so we assume it isn't - is_deletion: false, - allow_index_creation: true, - }, - Update::ClearDocuments => TaskContent::DocumentDeletion(DocumentDeletion::Clear), - } - } -} - -#[allow(clippy::large_enum_variant)] -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(tag = "type")] -pub enum UpdateMeta { - DocumentsAddition { - method: IndexDocumentsMethod, - primary_key: Option, - }, - ClearDocuments, - DeleteDocuments { - ids: Vec, - }, - Settings(Settings), -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct Enqueued { - pub update_id: u64, - pub meta: Update, - #[serde(with = "time::serde::rfc3339")] - pub enqueued_at: OffsetDateTime, -} - -impl Enqueued { - fn update_task(self, task: &mut Task) { - // we do not erase the `TaskId` that was given to us. - task.content = self.meta.into(); - task.events.push(TaskEvent::Created(self.enqueued_at)); - } -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct Processed { - pub success: v2::UpdateResult, - #[serde(with = "time::serde::rfc3339")] - pub processed_at: OffsetDateTime, - #[serde(flatten)] - pub from: Processing, -} - -impl Processed { - fn update_task(self, task: &mut Task) { - self.from.update_task(task); - - let event = TaskEvent::Succeded { - result: TaskResult::from(self.success), - timestamp: self.processed_at, - }; - task.events.push(event); - } -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct Processing { - #[serde(flatten)] - pub from: Enqueued, - #[serde(with = "time::serde::rfc3339")] - pub started_processing_at: OffsetDateTime, -} - -impl Processing { - fn update_task(self, task: &mut Task) { - self.from.update_task(task); - - let event = TaskEvent::Processing(self.started_processing_at); - task.events.push(event); - } -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct Failed { - #[serde(flatten)] - pub from: Processing, - pub msg: String, - pub code: Code, - #[serde(with = "time::serde::rfc3339")] - pub failed_at: OffsetDateTime, -} - -impl Failed { - fn update_task(self, task: &mut Task) { - self.from.update_task(task); - - let event = TaskEvent::Failed { - error: ResponseError::from_msg(self.msg, self.code), - timestamp: self.failed_at, - }; - task.events.push(event); - } -} - -impl From<(UpdateStatus, String, TaskId)> for Task { - fn from((update, uid, task_id): (UpdateStatus, String, TaskId)) -> Self { - // Dummy task - let mut task = super::v4::Task { - id: task_id, - index_uid: IndexUid::new_unchecked(uid), - content: super::v4::TaskContent::IndexDeletion, - events: Vec::new(), - }; - - match update { - UpdateStatus::Processing(u) => u.update_task(&mut task), - UpdateStatus::Enqueued(u) => u.update_task(&mut task), - UpdateStatus::Processed(u) => u.update_task(&mut task), - UpdateStatus::Failed(u) => u.update_task(&mut task), - } - - task - } -} diff --git a/meilisearch-lib/src/dump/compat/v4.rs b/meilisearch-lib/src/dump/compat/v4.rs deleted file mode 100644 index 89e9ee1ab..000000000 --- a/meilisearch-lib/src/dump/compat/v4.rs +++ /dev/null @@ -1,145 +0,0 @@ -use meilisearch_types::error::ResponseError; -use meilisearch_types::index_uid::IndexUid; -use milli::update::IndexDocumentsMethod; -use serde::{Deserialize, Serialize}; -use time::OffsetDateTime; -use uuid::Uuid; - -use crate::index::{Settings, Unchecked}; -use crate::tasks::batch::BatchId; -use crate::tasks::task::{ - DocumentDeletion, TaskContent as NewTaskContent, TaskEvent as NewTaskEvent, TaskId, TaskResult, -}; - -#[derive(Debug, Serialize, Deserialize)] -pub struct Task { - pub id: TaskId, - pub index_uid: IndexUid, - pub content: TaskContent, - pub events: Vec, -} - -impl From for crate::tasks::task::Task { - fn from(other: Task) -> Self { - Self { - id: other.id, - content: NewTaskContent::from((other.index_uid, other.content)), - events: other.events.into_iter().map(Into::into).collect(), - } - } -} - -#[derive(Debug, Serialize, Deserialize)] -pub enum TaskEvent { - Created(#[serde(with = "time::serde::rfc3339")] OffsetDateTime), - Batched { - #[serde(with = "time::serde::rfc3339")] - timestamp: OffsetDateTime, - batch_id: BatchId, - }, - Processing(#[serde(with = "time::serde::rfc3339")] OffsetDateTime), - Succeded { - result: TaskResult, - #[serde(with = "time::serde::rfc3339")] - timestamp: OffsetDateTime, - }, - Failed { - error: ResponseError, - #[serde(with = "time::serde::rfc3339")] - timestamp: OffsetDateTime, - }, -} - -impl From for NewTaskEvent { - fn from(other: TaskEvent) -> Self { - match other { - TaskEvent::Created(x) => NewTaskEvent::Created(x), - TaskEvent::Batched { - timestamp, - batch_id, - } => NewTaskEvent::Batched { - timestamp, - batch_id, - }, - TaskEvent::Processing(x) => NewTaskEvent::Processing(x), - TaskEvent::Succeded { result, timestamp } => { - NewTaskEvent::Succeeded { result, timestamp } - } - TaskEvent::Failed { error, timestamp } => NewTaskEvent::Failed { error, timestamp }, - } - } -} - -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] -#[allow(clippy::large_enum_variant)] -pub enum TaskContent { - DocumentAddition { - content_uuid: Uuid, - merge_strategy: IndexDocumentsMethod, - primary_key: Option, - documents_count: usize, - allow_index_creation: bool, - }, - DocumentDeletion(DocumentDeletion), - SettingsUpdate { - settings: Settings, - /// Indicates whether the task was a deletion - is_deletion: bool, - allow_index_creation: bool, - }, - IndexDeletion, - IndexCreation { - primary_key: Option, - }, - IndexUpdate { - primary_key: Option, - }, - Dump { - uid: String, - }, -} - -impl From<(IndexUid, TaskContent)> for NewTaskContent { - fn from((index_uid, content): (IndexUid, TaskContent)) -> Self { - match content { - TaskContent::DocumentAddition { - content_uuid, - merge_strategy, - primary_key, - documents_count, - allow_index_creation, - } => NewTaskContent::DocumentAddition { - index_uid, - content_uuid, - merge_strategy, - primary_key, - documents_count, - allow_index_creation, - }, - TaskContent::DocumentDeletion(deletion) => NewTaskContent::DocumentDeletion { - index_uid, - deletion, - }, - TaskContent::SettingsUpdate { - settings, - is_deletion, - allow_index_creation, - } => NewTaskContent::SettingsUpdate { - index_uid, - settings, - is_deletion, - allow_index_creation, - }, - TaskContent::IndexDeletion => NewTaskContent::IndexDeletion { index_uid }, - TaskContent::IndexCreation { primary_key } => NewTaskContent::IndexCreation { - index_uid, - primary_key, - }, - TaskContent::IndexUpdate { primary_key } => NewTaskContent::IndexUpdate { - index_uid, - primary_key, - }, - TaskContent::Dump { uid } => NewTaskContent::Dump { uid }, - } - } -} diff --git a/meilisearch-lib/src/dump/handler.rs b/meilisearch-lib/src/dump/handler.rs deleted file mode 100644 index 069196451..000000000 --- a/meilisearch-lib/src/dump/handler.rs +++ /dev/null @@ -1,188 +0,0 @@ -#[cfg(not(test))] -pub use real::DumpHandler; - -#[cfg(test)] -pub use test::MockDumpHandler as DumpHandler; - -use time::{macros::format_description, OffsetDateTime}; - -/// Generate uid from creation date -pub fn generate_uid() -> String { - OffsetDateTime::now_utc() - .format(format_description!( - "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" - )) - .unwrap() -} - -mod real { - use std::path::PathBuf; - use std::sync::Arc; - - use log::{info, trace}; - use meilisearch_auth::AuthController; - use milli::heed::Env; - use tokio::fs::create_dir_all; - use tokio::io::AsyncWriteExt; - - use crate::analytics; - use crate::compression::to_tar_gz; - use crate::dump::error::{DumpError, Result}; - use crate::dump::{MetadataVersion, META_FILE_NAME}; - use crate::index_resolver::{ - index_store::IndexStore, meta_store::IndexMetaStore, IndexResolver, - }; - use crate::tasks::TaskStore; - use crate::update_file_store::UpdateFileStore; - - pub struct DumpHandler { - dump_path: PathBuf, - db_path: PathBuf, - update_file_store: UpdateFileStore, - task_store_size: usize, - index_db_size: usize, - env: Arc, - index_resolver: Arc>, - } - - impl DumpHandler - where - U: IndexMetaStore + Sync + Send + 'static, - I: IndexStore + Sync + Send + 'static, - { - pub fn new( - dump_path: PathBuf, - db_path: PathBuf, - update_file_store: UpdateFileStore, - task_store_size: usize, - index_db_size: usize, - env: Arc, - index_resolver: Arc>, - ) -> Self { - Self { - dump_path, - db_path, - update_file_store, - task_store_size, - index_db_size, - env, - index_resolver, - } - } - - pub async fn run(&self, uid: String) -> Result<()> { - trace!("Performing dump."); - - create_dir_all(&self.dump_path).await?; - - let temp_dump_dir = tokio::task::spawn_blocking(tempfile::TempDir::new).await??; - let temp_dump_path = temp_dump_dir.path().to_owned(); - - let meta = MetadataVersion::new_v5(self.index_db_size, self.task_store_size); - let meta_path = temp_dump_path.join(META_FILE_NAME); - - let meta_bytes = serde_json::to_vec(&meta)?; - let mut meta_file = tokio::fs::File::create(&meta_path).await?; - meta_file.write_all(&meta_bytes).await?; - - analytics::copy_user_id(&self.db_path, &temp_dump_path); - - create_dir_all(&temp_dump_path.join("indexes")).await?; - - let db_path = self.db_path.clone(); - let temp_dump_path_clone = temp_dump_path.clone(); - tokio::task::spawn_blocking(move || -> Result<()> { - AuthController::dump(db_path, temp_dump_path_clone)?; - Ok(()) - }) - .await??; - TaskStore::dump( - self.env.clone(), - &temp_dump_path, - self.update_file_store.clone(), - ) - .await?; - self.index_resolver.dump(&temp_dump_path).await?; - - let dump_path = self.dump_path.clone(); - let dump_path = tokio::task::spawn_blocking(move || -> Result { - // for now we simply copy the updates/updates_files - // FIXME: We may copy more files than necessary, if new files are added while we are - // performing the dump. We need a way to filter them out. - - let temp_dump_file = tempfile::NamedTempFile::new_in(&dump_path)?; - to_tar_gz(temp_dump_path, temp_dump_file.path()) - .map_err(|e| DumpError::Internal(e.into()))?; - - let dump_path = dump_path.join(uid).with_extension("dump"); - temp_dump_file.persist(&dump_path)?; - - Ok(dump_path) - }) - .await??; - - info!("Created dump in {:?}.", dump_path); - - Ok(()) - } - } -} - -#[cfg(test)] -mod test { - use std::path::PathBuf; - use std::sync::Arc; - - use milli::heed::Env; - use nelson::Mocker; - - use crate::dump::error::Result; - use crate::index_resolver::IndexResolver; - use crate::index_resolver::{index_store::IndexStore, meta_store::IndexMetaStore}; - use crate::update_file_store::UpdateFileStore; - - use super::*; - - pub enum MockDumpHandler { - Real(super::real::DumpHandler), - Mock(Mocker), - } - - impl MockDumpHandler { - pub fn mock(mocker: Mocker) -> Self { - Self::Mock(mocker) - } - } - - impl MockDumpHandler - where - U: IndexMetaStore + Sync + Send + 'static, - I: IndexStore + Sync + Send + 'static, - { - pub fn new( - dump_path: PathBuf, - db_path: PathBuf, - update_file_store: UpdateFileStore, - task_store_size: usize, - index_db_size: usize, - env: Arc, - index_resolver: Arc>, - ) -> Self { - Self::Real(super::real::DumpHandler::new( - dump_path, - db_path, - update_file_store, - task_store_size, - index_db_size, - env, - index_resolver, - )) - } - pub async fn run(&self, uid: String) -> Result<()> { - match self { - DumpHandler::Real(real) => real.run(uid).await, - DumpHandler::Mock(mocker) => unsafe { mocker.get("run").call(uid) }, - } - } - } -} diff --git a/meilisearch-lib/src/dump/loaders/mod.rs b/meilisearch-lib/src/dump/loaders/mod.rs deleted file mode 100644 index 199b20c02..000000000 --- a/meilisearch-lib/src/dump/loaders/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -pub mod v2; -pub mod v3; -pub mod v4; -pub mod v5; diff --git a/meilisearch-lib/src/dump/loaders/v2.rs b/meilisearch-lib/src/dump/loaders/v2.rs deleted file mode 100644 index 5926de931..000000000 --- a/meilisearch-lib/src/dump/loaders/v2.rs +++ /dev/null @@ -1,216 +0,0 @@ -use std::fs::{File, OpenOptions}; -use std::io::Write; -use std::path::{Path, PathBuf}; - -use serde_json::{Deserializer, Value}; -use tempfile::NamedTempFile; - -use crate::dump::compat::{self, v2, v3}; -use crate::dump::Metadata; -use crate::options::IndexerOpts; - -/// The dump v2 reads the dump folder and patches all the needed file to make it compatible with a -/// dump v3, then calls the dump v3 to actually handle the dump. -pub fn load_dump( - meta: Metadata, - src: impl AsRef, - dst: impl AsRef, - index_db_size: usize, - update_db_size: usize, - indexing_options: &IndexerOpts, -) -> anyhow::Result<()> { - log::info!("Patching dump V2 to dump V3..."); - let indexes_path = src.as_ref().join("indexes"); - - let dir_entries = std::fs::read_dir(indexes_path)?; - for entry in dir_entries { - let entry = entry?; - - // rename the index folder - let path = entry.path(); - let new_path = patch_index_uuid_path(&path).expect("invalid index folder."); - - std::fs::rename(path, &new_path)?; - - let settings_path = new_path.join("meta.json"); - - patch_settings(settings_path)?; - } - - let update_dir = src.as_ref().join("updates"); - let update_path = update_dir.join("data.jsonl"); - patch_updates(update_dir, update_path)?; - - super::v3::load_dump( - meta, - src, - dst, - index_db_size, - update_db_size, - indexing_options, - ) -} - -fn patch_index_uuid_path(path: &Path) -> Option { - let uuid = path.file_name()?.to_str()?.trim_start_matches("index-"); - let new_path = path.parent()?.join(uuid); - Some(new_path) -} - -fn patch_settings(path: impl AsRef) -> anyhow::Result<()> { - let mut meta_file = File::open(&path)?; - let mut meta: Value = serde_json::from_reader(&mut meta_file)?; - - // We first deserialize the dump meta into a serde_json::Value and change - // the custom ranking rules settings from the old format to the new format. - if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") { - patch_custom_ranking_rules(ranking_rules); - } - - let mut meta_file = OpenOptions::new().truncate(true).write(true).open(path)?; - - serde_json::to_writer(&mut meta_file, &meta)?; - - Ok(()) -} - -fn patch_updates(dir: impl AsRef, path: impl AsRef) -> anyhow::Result<()> { - let mut output_update_file = NamedTempFile::new_in(&dir)?; - let update_file = File::open(&path)?; - - let stream = Deserializer::from_reader(update_file).into_iter::(); - - for update in stream { - let update_entry = update?; - - let update_entry = v3::UpdateEntry::from(update_entry); - - serde_json::to_writer(&mut output_update_file, &update_entry)?; - output_update_file.write_all(b"\n")?; - } - - output_update_file.flush()?; - output_update_file.persist(path)?; - - Ok(()) -} - -/// Converts the ranking rules from the format `asc(_)`, `desc(_)` to the format `_:asc`, `_:desc`. -/// -/// This is done for compatibility reasons, and to avoid a new dump version, -/// since the new syntax was introduced soon after the new dump version. -fn patch_custom_ranking_rules(ranking_rules: &mut Value) { - *ranking_rules = match ranking_rules.take() { - Value::Array(values) => values - .into_iter() - .filter_map(|value| match value { - Value::String(s) if s.starts_with("asc") => compat::asc_ranking_rule(&s) - .map(|f| format!("{}:asc", f)) - .map(Value::String), - Value::String(s) if s.starts_with("desc") => compat::desc_ranking_rule(&s) - .map(|f| format!("{}:desc", f)) - .map(Value::String), - otherwise => Some(otherwise), - }) - .collect(), - otherwise => otherwise, - } -} - -impl From for v3::UpdateEntry { - fn from(v2::UpdateEntry { uuid, update }: v2::UpdateEntry) -> Self { - let update = match update { - v2::UpdateStatus::Processing(meta) => v3::UpdateStatus::Processing(meta.into()), - v2::UpdateStatus::Enqueued(meta) => v3::UpdateStatus::Enqueued(meta.into()), - v2::UpdateStatus::Processed(meta) => v3::UpdateStatus::Processed(meta.into()), - v2::UpdateStatus::Aborted(_) => unreachable!("Updates could never be aborted."), - v2::UpdateStatus::Failed(meta) => v3::UpdateStatus::Failed(meta.into()), - }; - - Self { uuid, update } - } -} - -impl From for v3::Failed { - fn from(other: v2::Failed) -> Self { - let v2::Failed { - from, - error, - failed_at, - } = other; - - Self { - from: from.into(), - msg: error.message, - code: v2::error_code_from_str(&error.error_code) - .expect("Invalid update: Invalid error code"), - failed_at, - } - } -} - -impl From for v3::Processing { - fn from(other: v2::Processing) -> Self { - let v2::Processing { - from, - started_processing_at, - } = other; - - Self { - from: from.into(), - started_processing_at, - } - } -} - -impl From for v3::Enqueued { - fn from(other: v2::Enqueued) -> Self { - let v2::Enqueued { - update_id, - meta, - enqueued_at, - content, - } = other; - - let meta = match meta { - v2::UpdateMeta::DocumentsAddition { - method, - primary_key, - .. - } => { - v3::Update::DocumentAddition { - primary_key, - method, - // Just ignore if the uuid is no present. If it is needed later, an error will - // be thrown. - content_uuid: content.unwrap_or_default(), - } - } - v2::UpdateMeta::ClearDocuments => v3::Update::ClearDocuments, - v2::UpdateMeta::DeleteDocuments { ids } => v3::Update::DeleteDocuments(ids), - v2::UpdateMeta::Settings(settings) => v3::Update::Settings(settings), - }; - - Self { - update_id, - meta, - enqueued_at, - } - } -} - -impl From for v3::Processed { - fn from(other: v2::Processed) -> Self { - let v2::Processed { - from, - success, - processed_at, - } = other; - - Self { - success, - processed_at, - from: from.into(), - } - } -} diff --git a/meilisearch-lib/src/dump/loaders/v3.rs b/meilisearch-lib/src/dump/loaders/v3.rs deleted file mode 100644 index 44984c946..000000000 --- a/meilisearch-lib/src/dump/loaders/v3.rs +++ /dev/null @@ -1,136 +0,0 @@ -use std::collections::HashMap; -use std::fs::{self, File}; -use std::io::{BufReader, BufWriter, Write}; -use std::path::Path; - -use anyhow::Context; -use fs_extra::dir::{self, CopyOptions}; -use log::info; -use tempfile::tempdir; -use uuid::Uuid; - -use crate::dump::compat::{self, v3}; -use crate::dump::Metadata; -use crate::index_resolver::meta_store::{DumpEntry, IndexMeta}; -use crate::options::IndexerOpts; -use crate::tasks::task::TaskId; - -/// dump structure for V3: -/// . -/// ├── indexes -/// │   └── 25f10bb8-6ea8-42f0-bd48-ad5857f77648 -/// │   ├── documents.jsonl -/// │   └── meta.json -/// ├── index_uuids -/// │   └── data.jsonl -/// ├── metadata.json -/// └── updates -/// └── data.jsonl - -pub fn load_dump( - meta: Metadata, - src: impl AsRef, - dst: impl AsRef, - index_db_size: usize, - meta_env_size: usize, - indexing_options: &IndexerOpts, -) -> anyhow::Result<()> { - info!("Patching dump V3 to dump V4..."); - - let patched_dir = tempdir()?; - - let options = CopyOptions::default(); - dir::copy(src.as_ref().join("indexes"), patched_dir.path(), &options)?; - dir::copy( - src.as_ref().join("index_uuids"), - patched_dir.path(), - &options, - )?; - - let uuid_map = patch_index_meta( - src.as_ref().join("index_uuids/data.jsonl"), - patched_dir.path(), - )?; - - fs::copy( - src.as_ref().join("metadata.json"), - patched_dir.path().join("metadata.json"), - )?; - - patch_updates(&src, patched_dir.path(), uuid_map)?; - - super::v4::load_dump( - meta, - patched_dir.path(), - dst, - index_db_size, - meta_env_size, - indexing_options, - ) -} - -fn patch_index_meta( - path: impl AsRef, - dst: impl AsRef, -) -> anyhow::Result> { - let file = BufReader::new(File::open(path)?); - let dst = dst.as_ref().join("index_uuids"); - fs::create_dir_all(&dst)?; - let mut dst_file = File::create(dst.join("data.jsonl"))?; - - let map = serde_json::Deserializer::from_reader(file) - .into_iter::() - .try_fold(HashMap::new(), |mut map, entry| -> anyhow::Result<_> { - let entry = entry?; - map.insert(entry.uuid, entry.uid.clone()); - let meta = IndexMeta { - uuid: entry.uuid, - // This is lost information, we patch it to 0; - creation_task_id: 0, - }; - let entry = DumpEntry { - uid: entry.uid, - index_meta: meta, - }; - serde_json::to_writer(&mut dst_file, &entry)?; - dst_file.write_all(b"\n")?; - Ok(map) - })?; - - dst_file.flush()?; - - Ok(map) -} - -fn patch_updates( - src: impl AsRef, - dst: impl AsRef, - uuid_map: HashMap, -) -> anyhow::Result<()> { - let dst = dst.as_ref().join("updates"); - fs::create_dir_all(&dst)?; - - let mut dst_file = BufWriter::new(File::create(dst.join("data.jsonl"))?); - let src_file = BufReader::new(File::open(src.as_ref().join("updates/data.jsonl"))?); - - serde_json::Deserializer::from_reader(src_file) - .into_iter::() - .enumerate() - .try_for_each(|(task_id, entry)| -> anyhow::Result<()> { - let entry = entry?; - let name = uuid_map - .get(&entry.uuid) - .with_context(|| format!("Unknown index uuid: {}", entry.uuid))? - .clone(); - serde_json::to_writer( - &mut dst_file, - &compat::v4::Task::from((entry.update, name, task_id as TaskId)), - )?; - dst_file.write_all(b"\n")?; - Ok(()) - })?; - - dst_file.flush()?; - - Ok(()) -} diff --git a/meilisearch-lib/src/dump/loaders/v4.rs b/meilisearch-lib/src/dump/loaders/v4.rs deleted file mode 100644 index 44ec23517..000000000 --- a/meilisearch-lib/src/dump/loaders/v4.rs +++ /dev/null @@ -1,103 +0,0 @@ -use std::fs::{self, create_dir_all, File}; -use std::io::{BufReader, Write}; -use std::path::Path; - -use fs_extra::dir::{self, CopyOptions}; -use log::info; -use serde_json::{Deserializer, Map, Value}; -use tempfile::tempdir; -use uuid::Uuid; - -use crate::dump::{compat, Metadata}; -use crate::options::IndexerOpts; -use crate::tasks::task::Task; - -pub fn load_dump( - meta: Metadata, - src: impl AsRef, - dst: impl AsRef, - index_db_size: usize, - meta_env_size: usize, - indexing_options: &IndexerOpts, -) -> anyhow::Result<()> { - info!("Patching dump V4 to dump V5..."); - - let patched_dir = tempdir()?; - let options = CopyOptions::default(); - - // Indexes - dir::copy(src.as_ref().join("indexes"), &patched_dir, &options)?; - - // Index uuids - dir::copy(src.as_ref().join("index_uuids"), &patched_dir, &options)?; - - // Metadata - fs::copy( - src.as_ref().join("metadata.json"), - patched_dir.path().join("metadata.json"), - )?; - - // Updates - patch_updates(&src, &patched_dir)?; - - // Keys - patch_keys(&src, &patched_dir)?; - - super::v5::load_dump( - meta, - &patched_dir, - dst, - index_db_size, - meta_env_size, - indexing_options, - ) -} - -fn patch_updates(src: impl AsRef, dst: impl AsRef) -> anyhow::Result<()> { - let updates_path = src.as_ref().join("updates/data.jsonl"); - let output_updates_path = dst.as_ref().join("updates/data.jsonl"); - create_dir_all(output_updates_path.parent().unwrap())?; - let updates_file = File::open(updates_path)?; - let mut output_update_file = File::create(output_updates_path)?; - - serde_json::Deserializer::from_reader(updates_file) - .into_iter::() - .try_for_each(|task| -> anyhow::Result<()> { - let task: Task = task?.into(); - - serde_json::to_writer(&mut output_update_file, &task)?; - output_update_file.write_all(b"\n")?; - - Ok(()) - })?; - - output_update_file.flush()?; - - Ok(()) -} - -fn patch_keys(src: impl AsRef, dst: impl AsRef) -> anyhow::Result<()> { - let keys_file_src = src.as_ref().join("keys"); - - if !keys_file_src.exists() { - return Ok(()); - } - - fs::create_dir_all(&dst)?; - let keys_file_dst = dst.as_ref().join("keys"); - let mut writer = File::create(&keys_file_dst)?; - - let reader = BufReader::new(File::open(&keys_file_src)?); - for key in Deserializer::from_reader(reader).into_iter() { - let mut key: Map = key?; - - // generate a new uuid v4 and insert it in the key. - let uid = serde_json::to_value(Uuid::new_v4()).unwrap(); - key.insert("uid".to_string(), uid); - - serde_json::to_writer(&mut writer, &key)?; - writer.write_all(b"\n")?; - } - - Ok(()) -} diff --git a/meilisearch-lib/src/dump/loaders/v5.rs b/meilisearch-lib/src/dump/loaders/v5.rs deleted file mode 100644 index fcb4224bb..000000000 --- a/meilisearch-lib/src/dump/loaders/v5.rs +++ /dev/null @@ -1,47 +0,0 @@ -use std::{path::Path, sync::Arc}; - -use log::info; -use meilisearch_auth::AuthController; -use milli::heed::EnvOpenOptions; - -use crate::analytics; -use crate::dump::Metadata; -use crate::index_resolver::IndexResolver; -use crate::options::IndexerOpts; -use crate::tasks::TaskStore; -use crate::update_file_store::UpdateFileStore; - -pub fn load_dump( - meta: Metadata, - src: impl AsRef, - dst: impl AsRef, - index_db_size: usize, - meta_env_size: usize, - indexing_options: &IndexerOpts, -) -> anyhow::Result<()> { - info!( - "Loading dump from {}, dump database version: {}, dump version: V5", - meta.dump_date, meta.db_version - ); - - let mut options = EnvOpenOptions::new(); - options.map_size(meta_env_size); - options.max_dbs(100); - let env = Arc::new(options.open(&dst)?); - - IndexResolver::load_dump( - src.as_ref(), - &dst, - index_db_size, - env.clone(), - indexing_options, - )?; - UpdateFileStore::load_dump(src.as_ref(), &dst)?; - TaskStore::load_dump(&src, env)?; - AuthController::load_dump(&src, &dst)?; - analytics::copy_user_id(src.as_ref(), dst.as_ref()); - - info!("Loading indexes."); - - Ok(()) -} diff --git a/meilisearch-lib/src/dump/mod.rs b/meilisearch-lib/src/dump/mod.rs deleted file mode 100644 index 10a3216f2..000000000 --- a/meilisearch-lib/src/dump/mod.rs +++ /dev/null @@ -1,262 +0,0 @@ -use std::fs::File; -use std::path::Path; - -use anyhow::bail; -use log::info; -use serde::{Deserialize, Serialize}; -use time::OffsetDateTime; - -use tempfile::TempDir; - -use crate::compression::from_tar_gz; -use crate::options::IndexerOpts; - -use self::loaders::{v2, v3, v4, v5}; - -pub use handler::{generate_uid, DumpHandler}; - -mod compat; -pub mod error; -mod handler; -mod loaders; - -const META_FILE_NAME: &str = "metadata.json"; - -#[derive(Serialize, Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct Metadata { - db_version: String, - index_db_size: usize, - update_db_size: usize, - #[serde(with = "time::serde::rfc3339")] - dump_date: OffsetDateTime, -} - -impl Metadata { - pub fn new(index_db_size: usize, update_db_size: usize) -> Self { - Self { - db_version: env!("CARGO_PKG_VERSION").to_string(), - index_db_size, - update_db_size, - dump_date: OffsetDateTime::now_utc(), - } - } -} - -#[derive(Serialize, Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct MetadataV1 { - pub db_version: String, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(tag = "dumpVersion")] -pub enum MetadataVersion { - V1(MetadataV1), - V2(Metadata), - V3(Metadata), - V4(Metadata), - // V5 is forward compatible with V4 but not backward compatible. - V5(Metadata), -} - -impl MetadataVersion { - pub fn load_dump( - self, - src: impl AsRef, - dst: impl AsRef, - index_db_size: usize, - meta_env_size: usize, - indexing_options: &IndexerOpts, - ) -> anyhow::Result<()> { - match self { - MetadataVersion::V1(_meta) => { - anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.") - } - MetadataVersion::V2(meta) => v2::load_dump( - meta, - src, - dst, - index_db_size, - meta_env_size, - indexing_options, - )?, - MetadataVersion::V3(meta) => v3::load_dump( - meta, - src, - dst, - index_db_size, - meta_env_size, - indexing_options, - )?, - MetadataVersion::V4(meta) => v4::load_dump( - meta, - src, - dst, - index_db_size, - meta_env_size, - indexing_options, - )?, - MetadataVersion::V5(meta) => v5::load_dump( - meta, - src, - dst, - index_db_size, - meta_env_size, - indexing_options, - )?, - } - - Ok(()) - } - - pub fn new_v5(index_db_size: usize, update_db_size: usize) -> Self { - let meta = Metadata::new(index_db_size, update_db_size); - Self::V5(meta) - } - - pub fn db_version(&self) -> &str { - match self { - Self::V1(meta) => &meta.db_version, - Self::V2(meta) | Self::V3(meta) | Self::V4(meta) | Self::V5(meta) => &meta.db_version, - } - } - - pub fn version(&self) -> &'static str { - match self { - MetadataVersion::V1(_) => "V1", - MetadataVersion::V2(_) => "V2", - MetadataVersion::V3(_) => "V3", - MetadataVersion::V4(_) => "V4", - MetadataVersion::V5(_) => "V5", - } - } - - pub fn dump_date(&self) -> Option<&OffsetDateTime> { - match self { - MetadataVersion::V1(_) => None, - MetadataVersion::V2(meta) - | MetadataVersion::V3(meta) - | MetadataVersion::V4(meta) - | MetadataVersion::V5(meta) => Some(&meta.dump_date), - } - } -} - -#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] -#[serde(rename_all = "snake_case")] -pub enum DumpStatus { - Done, - InProgress, - Failed, -} - -pub fn load_dump( - dst_path: impl AsRef, - src_path: impl AsRef, - ignore_dump_if_db_exists: bool, - ignore_missing_dump: bool, - index_db_size: usize, - update_db_size: usize, - indexer_opts: &IndexerOpts, -) -> anyhow::Result<()> { - let empty_db = crate::is_empty_db(&dst_path); - let src_path_exists = src_path.as_ref().exists(); - - if empty_db && src_path_exists { - let (tmp_src, tmp_dst, meta) = extract_dump(&dst_path, &src_path)?; - meta.load_dump( - tmp_src.path(), - tmp_dst.path(), - index_db_size, - update_db_size, - indexer_opts, - )?; - persist_dump(&dst_path, tmp_dst)?; - Ok(()) - } else if !empty_db && !ignore_dump_if_db_exists { - bail!( - "database already exists at {:?}, try to delete it or rename it", - dst_path - .as_ref() - .canonicalize() - .unwrap_or_else(|_| dst_path.as_ref().to_owned()) - ) - } else if !src_path_exists && !ignore_missing_dump { - bail!("dump doesn't exist at {:?}", src_path.as_ref()) - } else { - // there is nothing to do - Ok(()) - } -} - -fn extract_dump( - dst_path: impl AsRef, - src_path: impl AsRef, -) -> anyhow::Result<(TempDir, TempDir, MetadataVersion)> { - // Setup a temp directory path in the same path as the database, to prevent cross devices - // references. - let temp_path = dst_path - .as_ref() - .parent() - .map(ToOwned::to_owned) - .unwrap_or_else(|| ".".into()); - - let tmp_src = tempfile::tempdir_in(temp_path)?; - let tmp_src_path = tmp_src.path(); - - from_tar_gz(&src_path, tmp_src_path)?; - - let meta_path = tmp_src_path.join(META_FILE_NAME); - let mut meta_file = File::open(&meta_path)?; - let meta: MetadataVersion = serde_json::from_reader(&mut meta_file)?; - - if !dst_path.as_ref().exists() { - std::fs::create_dir_all(dst_path.as_ref())?; - } - - let tmp_dst = tempfile::tempdir_in(dst_path.as_ref())?; - - info!( - "Loading dump {}, dump database version: {}, dump version: {}", - meta.dump_date() - .map(|t| format!("from {}", t)) - .unwrap_or_else(String::new), - meta.db_version(), - meta.version() - ); - - Ok((tmp_src, tmp_dst, meta)) -} - -fn persist_dump(dst_path: impl AsRef, tmp_dst: TempDir) -> anyhow::Result<()> { - let persisted_dump = tmp_dst.into_path(); - - // Delete everything in the `data.ms` except the tempdir. - if dst_path.as_ref().exists() { - for file in dst_path.as_ref().read_dir().unwrap() { - let file = file.unwrap().path(); - if file.file_name() == persisted_dump.file_name() { - continue; - } - - if file.is_file() { - std::fs::remove_file(&file)?; - } else { - std::fs::remove_dir_all(&file)?; - } - } - } - - // Move the whole content of the tempdir into the `data.ms`. - for file in persisted_dump.read_dir().unwrap() { - let file = file.unwrap().path(); - - std::fs::rename(&file, &dst_path.as_ref().join(file.file_name().unwrap()))?; - } - - // Delete the empty tempdir. - std::fs::remove_dir_all(&persisted_dump)?; - - Ok(()) -} diff --git a/meilisearch-lib/src/error.rs b/meilisearch-lib/src/error.rs deleted file mode 100644 index 16111a191..000000000 --- a/meilisearch-lib/src/error.rs +++ /dev/null @@ -1,55 +0,0 @@ -use std::error::Error; -use std::fmt; - -use meilisearch_types::error::{Code, ErrorCode}; -use milli::UserError; - -#[derive(Debug)] -pub struct MilliError<'a>(pub &'a milli::Error); - -impl Error for MilliError<'_> {} - -impl fmt::Display for MilliError<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.0.fmt(f) - } -} - -impl ErrorCode for MilliError<'_> { - fn error_code(&self) -> Code { - match self.0 { - milli::Error::InternalError(_) => Code::Internal, - milli::Error::IoError(_) => Code::Internal, - milli::Error::UserError(ref error) => { - match error { - // TODO: wait for spec for new error codes. - UserError::SerdeJson(_) - | UserError::InvalidLmdbOpenOptions - | UserError::DocumentLimitReached - | UserError::AccessingSoftDeletedDocument { .. } - | UserError::UnknownInternalDocumentId { .. } => Code::Internal, - UserError::InvalidStoreFile => Code::InvalidStore, - UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice, - UserError::MaxDatabaseSizeReached => Code::DatabaseSizeLimitReached, - UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded, - UserError::InvalidFilter(_) => Code::Filter, - UserError::MissingDocumentId { .. } => Code::MissingDocumentId, - UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => { - Code::InvalidDocumentId - } - UserError::MissingPrimaryKey => Code::MissingPrimaryKey, - UserError::PrimaryKeyCannotBeChanged(_) => Code::PrimaryKeyAlreadyPresent, - UserError::SortRankingRuleMissing => Code::Sort, - UserError::InvalidFacetsDistribution { .. } => Code::BadRequest, - UserError::InvalidSortableAttribute { .. } => Code::Sort, - UserError::CriterionError(_) => Code::InvalidRankingRule, - UserError::InvalidGeoField { .. } => Code::InvalidGeoField, - UserError::SortError(_) => Code::Sort, - UserError::InvalidMinTypoWordLenSetting(_, _) => { - Code::InvalidMinWordLengthForTypo - } - } - } - } - } -} diff --git a/meilisearch-lib/src/index/dump.rs b/meilisearch-lib/src/index/dump.rs deleted file mode 100644 index 9cc3c033f..000000000 --- a/meilisearch-lib/src/index/dump.rs +++ /dev/null @@ -1,161 +0,0 @@ -use std::fs::{create_dir_all, File}; -use std::io::{BufReader, Seek, SeekFrom, Write}; -use std::path::Path; - -use anyhow::Context; -use indexmap::IndexMap; -use milli::documents::DocumentsBatchReader; -use milli::heed::{EnvOpenOptions, RoTxn}; -use milli::update::{IndexDocumentsConfig, IndexerConfig}; -use serde::{Deserialize, Serialize}; - -use crate::document_formats::read_ndjson; -use crate::index::updates::apply_settings_to_builder; - -use super::error::Result; -use super::{index::Index, Settings, Unchecked}; - -#[derive(Serialize, Deserialize)] -struct DumpMeta { - settings: Settings, - primary_key: Option, -} - -const META_FILE_NAME: &str = "meta.json"; -const DATA_FILE_NAME: &str = "documents.jsonl"; - -impl Index { - pub fn dump(&self, path: impl AsRef) -> Result<()> { - // acquire write txn make sure any ongoing write is finished before we start. - let txn = self.write_txn()?; - let path = path.as_ref().join(format!("indexes/{}", self.uuid)); - - create_dir_all(&path)?; - - self.dump_documents(&txn, &path)?; - self.dump_meta(&txn, &path)?; - - Ok(()) - } - - fn dump_documents(&self, txn: &RoTxn, path: impl AsRef) -> Result<()> { - let document_file_path = path.as_ref().join(DATA_FILE_NAME); - let mut document_file = File::create(&document_file_path)?; - - let documents = self.all_documents(txn)?; - let fields_ids_map = self.fields_ids_map(txn)?; - - // dump documents - let mut json_map = IndexMap::new(); - for document in documents { - let (_, reader) = document?; - - for (fid, bytes) in reader.iter() { - if let Some(name) = fields_ids_map.name(fid) { - json_map.insert(name, serde_json::from_slice::(bytes)?); - } - } - - serde_json::to_writer(&mut document_file, &json_map)?; - document_file.write_all(b"\n")?; - - json_map.clear(); - } - - Ok(()) - } - - fn dump_meta(&self, txn: &RoTxn, path: impl AsRef) -> Result<()> { - let meta_file_path = path.as_ref().join(META_FILE_NAME); - let mut meta_file = File::create(&meta_file_path)?; - - let settings = self.settings_txn(txn)?.into_unchecked(); - let primary_key = self.primary_key(txn)?.map(String::from); - let meta = DumpMeta { - settings, - primary_key, - }; - - serde_json::to_writer(&mut meta_file, &meta)?; - - Ok(()) - } - - pub fn load_dump( - src: impl AsRef, - dst: impl AsRef, - size: usize, - indexer_config: &IndexerConfig, - ) -> anyhow::Result<()> { - let dir_name = src - .as_ref() - .file_name() - .with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?; - - let dst_dir_path = dst.as_ref().join("indexes").join(dir_name); - create_dir_all(&dst_dir_path)?; - - let meta_path = src.as_ref().join(META_FILE_NAME); - let meta_file = File::open(meta_path)?; - let DumpMeta { - settings, - primary_key, - } = serde_json::from_reader(meta_file)?; - let settings = settings.check(); - - let mut options = EnvOpenOptions::new(); - options.map_size(size); - options.max_readers(1024); - let index = milli::Index::new(options, &dst_dir_path)?; - - let mut txn = index.write_txn()?; - - // Apply settings first - let mut builder = milli::update::Settings::new(&mut txn, &index, indexer_config); - - if let Some(primary_key) = primary_key { - builder.set_primary_key(primary_key); - } - - apply_settings_to_builder(&settings, &mut builder); - - builder.execute(|_| ())?; - - let document_file_path = src.as_ref().join(DATA_FILE_NAME); - let reader = BufReader::new(File::open(&document_file_path)?); - - let mut tmp_doc_file = tempfile::tempfile()?; - - let empty = match read_ndjson(reader, &mut tmp_doc_file) { - // if there was no document in the file it's because the index was empty - Ok(0) => true, - Ok(_) => false, - Err(e) => return Err(e.into()), - }; - - if !empty { - tmp_doc_file.seek(SeekFrom::Start(0))?; - - let documents_reader = DocumentsBatchReader::from_reader(tmp_doc_file)?; - - //If the document file is empty, we don't perform the document addition, to prevent - //a primary key error to be thrown. - let config = IndexDocumentsConfig::default(); - let builder = milli::update::IndexDocuments::new( - &mut txn, - &index, - indexer_config, - config, - |_| (), - )?; - let (builder, user_error) = builder.add_documents(documents_reader)?; - user_error?; - builder.execute()?; - } - - txn.commit()?; - index.prepare_for_closing().wait(); - - Ok(()) - } -} diff --git a/meilisearch-lib/src/index/error.rs b/meilisearch-lib/src/index/error.rs deleted file mode 100644 index f795ceaa4..000000000 --- a/meilisearch-lib/src/index/error.rs +++ /dev/null @@ -1,61 +0,0 @@ -use std::error::Error; - -use meilisearch_types::error::{Code, ErrorCode}; -use meilisearch_types::internal_error; -use serde_json::Value; - -use crate::{error::MilliError, update_file_store}; - -pub type Result = std::result::Result; - -#[derive(Debug, thiserror::Error)] -pub enum IndexError { - #[error("An internal error has occurred. `{0}`.")] - Internal(Box), - #[error("Document `{0}` not found.")] - DocumentNotFound(String), - #[error("{0}")] - Facet(#[from] FacetError), - #[error("{0}")] - Milli(#[from] milli::Error), -} - -internal_error!( - IndexError: std::io::Error, - milli::heed::Error, - fst::Error, - serde_json::Error, - update_file_store::UpdateFileStoreError, - milli::documents::Error -); - -impl ErrorCode for IndexError { - fn error_code(&self) -> Code { - match self { - IndexError::Internal(_) => Code::Internal, - IndexError::DocumentNotFound(_) => Code::DocumentNotFound, - IndexError::Facet(e) => e.error_code(), - IndexError::Milli(e) => MilliError(e).error_code(), - } - } -} - -impl From for IndexError { - fn from(error: milli::UserError) -> IndexError { - IndexError::Milli(error.into()) - } -} - -#[derive(Debug, thiserror::Error)] -pub enum FacetError { - #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))] - InvalidExpression(&'static [&'static str], Value), -} - -impl ErrorCode for FacetError { - fn error_code(&self) -> Code { - match self { - FacetError::InvalidExpression(_, _) => Code::Filter, - } - } -} diff --git a/meilisearch-lib/src/index/index.rs b/meilisearch-lib/src/index/index.rs deleted file mode 100644 index 3d6c47949..000000000 --- a/meilisearch-lib/src/index/index.rs +++ /dev/null @@ -1,333 +0,0 @@ -use std::collections::BTreeSet; -use std::fs::create_dir_all; -use std::marker::PhantomData; -use std::ops::Deref; -use std::path::Path; -use std::sync::Arc; - -use fst::IntoStreamer; -use milli::heed::{CompactionOption, EnvOpenOptions, RoTxn}; -use milli::update::{IndexerConfig, Setting}; -use milli::{obkv_to_json, FieldDistribution, DEFAULT_VALUES_PER_FACET}; -use serde::{Deserialize, Serialize}; -use serde_json::{Map, Value}; -use time::OffsetDateTime; -use uuid::Uuid; -use walkdir::WalkDir; - -use crate::index::search::DEFAULT_PAGINATION_MAX_TOTAL_HITS; - -use super::error::IndexError; -use super::error::Result; -use super::updates::{FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, TypoSettings}; -use super::{Checked, Settings}; - -pub type Document = Map; - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct IndexMeta { - #[serde(with = "time::serde::rfc3339")] - pub created_at: OffsetDateTime, - #[serde(with = "time::serde::rfc3339")] - pub updated_at: OffsetDateTime, - pub primary_key: Option, -} - -impl IndexMeta { - pub fn new(index: &Index) -> Result { - let txn = index.read_txn()?; - Self::new_txn(index, &txn) - } - - pub fn new_txn(index: &Index, txn: &milli::heed::RoTxn) -> Result { - let created_at = index.created_at(txn)?; - let updated_at = index.updated_at(txn)?; - let primary_key = index.primary_key(txn)?.map(String::from); - Ok(Self { - created_at, - updated_at, - primary_key, - }) - } -} - -#[derive(Serialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct IndexStats { - #[serde(skip)] - pub size: u64, - pub number_of_documents: u64, - /// Whether the current index is performing an update. It is initially `None` when the - /// index returns it, since it is the `UpdateStore` that knows what index is currently indexing. It is - /// later set to either true or false, we we retrieve the information from the `UpdateStore` - pub is_indexing: Option, - pub field_distribution: FieldDistribution, -} - -#[derive(Clone, derivative::Derivative)] -#[derivative(Debug)] -pub struct Index { - pub uuid: Uuid, - #[derivative(Debug = "ignore")] - pub inner: Arc, - #[derivative(Debug = "ignore")] - pub indexer_config: Arc, -} - -impl Deref for Index { - type Target = milli::Index; - - fn deref(&self) -> &Self::Target { - self.inner.as_ref() - } -} - -impl Index { - pub fn open( - path: impl AsRef, - size: usize, - uuid: Uuid, - update_handler: Arc, - ) -> Result { - log::debug!("opening index in {}", path.as_ref().display()); - create_dir_all(&path)?; - let mut options = EnvOpenOptions::new(); - options.map_size(size); - options.max_readers(1024); - let inner = Arc::new(milli::Index::new(options, &path)?); - Ok(Index { - inner, - uuid, - indexer_config: update_handler, - }) - } - - /// Asynchronously close the underlying index - pub fn close(self) { - self.inner.as_ref().clone().prepare_for_closing(); - } - - pub fn stats(&self) -> Result { - let rtxn = self.read_txn()?; - - Ok(IndexStats { - size: self.size(), - number_of_documents: self.number_of_documents(&rtxn)?, - is_indexing: None, - field_distribution: self.field_distribution(&rtxn)?, - }) - } - - pub fn meta(&self) -> Result { - IndexMeta::new(self) - } - pub fn settings(&self) -> Result> { - let txn = self.read_txn()?; - self.settings_txn(&txn) - } - - pub fn uuid(&self) -> Uuid { - self.uuid - } - - pub fn settings_txn(&self, txn: &RoTxn) -> Result> { - let displayed_attributes = self - .displayed_fields(txn)? - .map(|fields| fields.into_iter().map(String::from).collect()); - - let searchable_attributes = self - .user_defined_searchable_fields(txn)? - .map(|fields| fields.into_iter().map(String::from).collect()); - - let filterable_attributes = self.filterable_fields(txn)?.into_iter().collect(); - - let sortable_attributes = self.sortable_fields(txn)?.into_iter().collect(); - - let criteria = self - .criteria(txn)? - .into_iter() - .map(|c| c.to_string()) - .collect(); - - let stop_words = self - .stop_words(txn)? - .map(|stop_words| -> Result> { - Ok(stop_words.stream().into_strs()?.into_iter().collect()) - }) - .transpose()? - .unwrap_or_default(); - let distinct_field = self.distinct_field(txn)?.map(String::from); - - // in milli each word in the synonyms map were split on their separator. Since we lost - // this information we are going to put space between words. - let synonyms = self - .synonyms(txn)? - .iter() - .map(|(key, values)| { - ( - key.join(" "), - values.iter().map(|value| value.join(" ")).collect(), - ) - }) - .collect(); - - let min_typo_word_len = MinWordSizeTyposSetting { - one_typo: Setting::Set(self.min_word_len_one_typo(txn)?), - two_typos: Setting::Set(self.min_word_len_two_typos(txn)?), - }; - - let disabled_words = match self.exact_words(txn)? { - Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(), - None => BTreeSet::new(), - }; - - let disabled_attributes = self - .exact_attributes(txn)? - .into_iter() - .map(String::from) - .collect(); - - let typo_tolerance = TypoSettings { - enabled: Setting::Set(self.authorize_typos(txn)?), - min_word_size_for_typos: Setting::Set(min_typo_word_len), - disable_on_words: Setting::Set(disabled_words), - disable_on_attributes: Setting::Set(disabled_attributes), - }; - - let faceting = FacetingSettings { - max_values_per_facet: Setting::Set( - self.max_values_per_facet(txn)? - .unwrap_or(DEFAULT_VALUES_PER_FACET), - ), - }; - - let pagination = PaginationSettings { - max_total_hits: Setting::Set( - self.pagination_max_total_hits(txn)? - .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS), - ), - }; - - Ok(Settings { - displayed_attributes: match displayed_attributes { - Some(attrs) => Setting::Set(attrs), - None => Setting::Reset, - }, - searchable_attributes: match searchable_attributes { - Some(attrs) => Setting::Set(attrs), - None => Setting::Reset, - }, - filterable_attributes: Setting::Set(filterable_attributes), - sortable_attributes: Setting::Set(sortable_attributes), - ranking_rules: Setting::Set(criteria), - stop_words: Setting::Set(stop_words), - distinct_attribute: match distinct_field { - Some(field) => Setting::Set(field), - None => Setting::Reset, - }, - synonyms: Setting::Set(synonyms), - typo_tolerance: Setting::Set(typo_tolerance), - faceting: Setting::Set(faceting), - pagination: Setting::Set(pagination), - _kind: PhantomData, - }) - } - - /// Return the total number of documents contained in the index + the selected documents. - pub fn retrieve_documents>( - &self, - offset: usize, - limit: usize, - attributes_to_retrieve: Option>, - ) -> Result<(u64, Vec)> { - let txn = self.read_txn()?; - - let fields_ids_map = self.fields_ids_map(&txn)?; - let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); - - let mut documents = Vec::new(); - for entry in self.all_documents(&txn)?.skip(offset).take(limit) { - let (_id, obkv) = entry?; - let document = obkv_to_json(&all_fields, &fields_ids_map, obkv)?; - let document = match &attributes_to_retrieve { - Some(attributes_to_retrieve) => permissive_json_pointer::select_values( - &document, - attributes_to_retrieve.iter().map(|s| s.as_ref()), - ), - None => document, - }; - documents.push(document); - } - - let number_of_documents = self.number_of_documents(&txn)?; - - Ok((number_of_documents, documents)) - } - - pub fn retrieve_document>( - &self, - doc_id: String, - attributes_to_retrieve: Option>, - ) -> Result { - let txn = self.read_txn()?; - - let fields_ids_map = self.fields_ids_map(&txn)?; - let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); - - let internal_id = self - .external_documents_ids(&txn)? - .get(doc_id.as_bytes()) - .ok_or_else(|| IndexError::DocumentNotFound(doc_id.clone()))?; - - let document = self - .documents(&txn, std::iter::once(internal_id))? - .into_iter() - .next() - .map(|(_, d)| d) - .ok_or(IndexError::DocumentNotFound(doc_id))?; - - let document = obkv_to_json(&all_fields, &fields_ids_map, document)?; - let document = match &attributes_to_retrieve { - Some(attributes_to_retrieve) => permissive_json_pointer::select_values( - &document, - attributes_to_retrieve.iter().map(|s| s.as_ref()), - ), - None => document, - }; - - Ok(document) - } - - pub fn size(&self) -> u64 { - WalkDir::new(self.path()) - .into_iter() - .filter_map(|entry| entry.ok()) - .filter_map(|entry| entry.metadata().ok()) - .filter(|metadata| metadata.is_file()) - .fold(0, |acc, m| acc + m.len()) - } - - pub fn snapshot(&self, path: impl AsRef) -> Result<()> { - let mut dst = path.as_ref().join(format!("indexes/{}/", self.uuid)); - create_dir_all(&dst)?; - dst.push("data.mdb"); - let _txn = self.write_txn()?; - self.inner.copy_to_path(dst, CompactionOption::Enabled)?; - Ok(()) - } -} - -/// When running tests, when a server instance is dropped, the environment is not actually closed, -/// leaving a lot of open file descriptors. -impl Drop for Index { - fn drop(&mut self) { - // When dropping the last instance of an index, we want to close the index - // Note that the close is actually performed only if all the instances a effectively - // dropped - - if Arc::strong_count(&self.inner) == 1 { - self.inner.as_ref().clone().prepare_for_closing(); - } - } -} diff --git a/meilisearch-lib/src/index/mod.rs b/meilisearch-lib/src/index/mod.rs deleted file mode 100644 index 0aeaba14e..000000000 --- a/meilisearch-lib/src/index/mod.rs +++ /dev/null @@ -1,250 +0,0 @@ -pub use search::{ - HitsInfo, MatchingStrategy, SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, - DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, - DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, -}; -pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecked}; - -mod dump; -pub mod error; -mod search; -pub mod updates; - -#[allow(clippy::module_inception)] -mod index; - -pub use index::{Document, IndexMeta, IndexStats}; - -#[cfg(not(test))] -pub use index::Index; - -#[cfg(test)] -pub use test::MockIndex as Index; - -/// The index::test module provides means of mocking an index instance. I can be used throughout the -/// code for unit testing, in places where an index would normally be used. -#[cfg(test)] -pub mod test { - use std::path::{Path, PathBuf}; - use std::sync::Arc; - - use milli::update::{ - DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsMethod, IndexerConfig, - }; - use nelson::Mocker; - use uuid::Uuid; - - use super::error::Result; - use super::index::Index; - use super::Document; - use super::{Checked, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings}; - use crate::update_file_store::UpdateFileStore; - - #[derive(Clone)] - pub enum MockIndex { - Real(Index), - Mock(Arc), - } - - impl MockIndex { - pub fn mock(mocker: Mocker) -> Self { - Self::Mock(Arc::new(mocker)) - } - - pub fn open( - path: impl AsRef, - size: usize, - uuid: Uuid, - update_handler: Arc, - ) -> Result { - let index = Index::open(path, size, uuid, update_handler)?; - Ok(Self::Real(index)) - } - - pub fn load_dump( - src: impl AsRef, - dst: impl AsRef, - size: usize, - update_handler: &IndexerConfig, - ) -> anyhow::Result<()> { - Index::load_dump(src, dst, size, update_handler) - } - - pub fn uuid(&self) -> Uuid { - match self { - MockIndex::Real(index) => index.uuid(), - MockIndex::Mock(m) => unsafe { m.get("uuid").call(()) }, - } - } - - pub fn stats(&self) -> Result { - match self { - MockIndex::Real(index) => index.stats(), - MockIndex::Mock(m) => unsafe { m.get("stats").call(()) }, - } - } - - pub fn meta(&self) -> Result { - match self { - MockIndex::Real(index) => index.meta(), - MockIndex::Mock(_) => todo!(), - } - } - pub fn settings(&self) -> Result> { - match self { - MockIndex::Real(index) => index.settings(), - MockIndex::Mock(_) => todo!(), - } - } - - pub fn retrieve_documents>( - &self, - offset: usize, - limit: usize, - attributes_to_retrieve: Option>, - ) -> Result<(u64, Vec)> { - match self { - MockIndex::Real(index) => { - index.retrieve_documents(offset, limit, attributes_to_retrieve) - } - MockIndex::Mock(_) => todo!(), - } - } - - pub fn retrieve_document>( - &self, - doc_id: String, - attributes_to_retrieve: Option>, - ) -> Result { - match self { - MockIndex::Real(index) => index.retrieve_document(doc_id, attributes_to_retrieve), - MockIndex::Mock(_) => todo!(), - } - } - - pub fn size(&self) -> u64 { - match self { - MockIndex::Real(index) => index.size(), - MockIndex::Mock(_) => todo!(), - } - } - - pub fn snapshot(&self, path: impl AsRef) -> Result<()> { - match self { - MockIndex::Real(index) => index.snapshot(path), - MockIndex::Mock(m) => unsafe { m.get("snapshot").call(path.as_ref()) }, - } - } - - pub fn close(self) { - match self { - MockIndex::Real(index) => index.close(), - MockIndex::Mock(m) => unsafe { m.get("close").call(()) }, - } - } - - pub fn perform_search(&self, query: SearchQuery) -> Result { - match self { - MockIndex::Real(index) => index.perform_search(query), - MockIndex::Mock(m) => unsafe { m.get("perform_search").call(query) }, - } - } - - pub fn dump(&self, path: impl AsRef) -> Result<()> { - match self { - MockIndex::Real(index) => index.dump(path), - MockIndex::Mock(m) => unsafe { m.get("dump").call(path.as_ref()) }, - } - } - - pub fn update_documents( - &self, - method: IndexDocumentsMethod, - primary_key: Option, - file_store: UpdateFileStore, - contents: impl Iterator, - ) -> Result>> { - match self { - MockIndex::Real(index) => { - index.update_documents(method, primary_key, file_store, contents) - } - MockIndex::Mock(mocker) => unsafe { - mocker - .get("update_documents") - .call((method, primary_key, file_store, contents)) - }, - } - } - - pub fn update_settings(&self, settings: &Settings) -> Result<()> { - match self { - MockIndex::Real(index) => index.update_settings(settings), - MockIndex::Mock(m) => unsafe { m.get("update_settings").call(settings) }, - } - } - - pub fn update_primary_key(&self, primary_key: String) -> Result { - match self { - MockIndex::Real(index) => index.update_primary_key(primary_key), - MockIndex::Mock(m) => unsafe { m.get("update_primary_key").call(primary_key) }, - } - } - - pub fn delete_documents(&self, ids: &[String]) -> Result { - match self { - MockIndex::Real(index) => index.delete_documents(ids), - MockIndex::Mock(m) => unsafe { m.get("delete_documents").call(ids) }, - } - } - - pub fn clear_documents(&self) -> Result<()> { - match self { - MockIndex::Real(index) => index.clear_documents(), - MockIndex::Mock(m) => unsafe { m.get("clear_documents").call(()) }, - } - } - } - - #[test] - fn test_faux_index() { - let faux = Mocker::default(); - faux.when("snapshot") - .times(2) - .then(|_: &Path| -> Result<()> { Ok(()) }); - - let index = MockIndex::mock(faux); - - let path = PathBuf::from("hello"); - index.snapshot(&path).unwrap(); - index.snapshot(&path).unwrap(); - } - - #[test] - #[should_panic] - fn test_faux_unexisting_method_stub() { - let faux = Mocker::default(); - - let index = MockIndex::mock(faux); - - let path = PathBuf::from("hello"); - index.snapshot(&path).unwrap(); - index.snapshot(&path).unwrap(); - } - - #[test] - #[should_panic] - fn test_faux_panic() { - let faux = Mocker::default(); - faux.when("snapshot") - .times(2) - .then(|_: &Path| -> Result<()> { - panic!(); - }); - - let index = MockIndex::mock(faux); - - let path = PathBuf::from("hello"); - index.snapshot(&path).unwrap(); - index.snapshot(&path).unwrap(); - } -} diff --git a/meilisearch-lib/src/index_controller/error.rs b/meilisearch-lib/src/index_controller/error.rs deleted file mode 100644 index ab2dd142d..000000000 --- a/meilisearch-lib/src/index_controller/error.rs +++ /dev/null @@ -1,72 +0,0 @@ -use std::error::Error; - -use meilisearch_types::error::{Code, ErrorCode}; -use meilisearch_types::index_uid::IndexUidFormatError; -use meilisearch_types::internal_error; -use tokio::task::JoinError; - -use super::DocumentAdditionFormat; -use crate::document_formats::DocumentFormatError; -use crate::dump::error::DumpError; -use crate::index::error::IndexError; -use crate::tasks::error::TaskError; -use crate::update_file_store::UpdateFileStoreError; - -use crate::index_resolver::error::IndexResolverError; - -pub type Result = std::result::Result; - -#[derive(Debug, thiserror::Error)] -pub enum IndexControllerError { - #[error("Index creation must have an uid")] - MissingUid, - #[error("{0}")] - IndexResolver(#[from] IndexResolverError), - #[error("{0}")] - IndexError(#[from] IndexError), - #[error("An internal error has occurred. `{0}`.")] - Internal(Box), - #[error("{0}")] - TaskError(#[from] TaskError), - #[error("{0}")] - DumpError(#[from] DumpError), - #[error("{0}")] - DocumentFormatError(#[from] DocumentFormatError), - #[error("A {0} payload is missing.")] - MissingPayload(DocumentAdditionFormat), - #[error("The provided payload reached the size limit.")] - PayloadTooLarge, -} - -internal_error!(IndexControllerError: JoinError, UpdateFileStoreError); - -impl From for IndexControllerError { - fn from(other: actix_web::error::PayloadError) -> Self { - match other { - actix_web::error::PayloadError::Overflow => Self::PayloadTooLarge, - _ => Self::Internal(Box::new(other)), - } - } -} - -impl ErrorCode for IndexControllerError { - fn error_code(&self) -> Code { - match self { - IndexControllerError::MissingUid => Code::BadRequest, - IndexControllerError::IndexResolver(e) => e.error_code(), - IndexControllerError::IndexError(e) => e.error_code(), - IndexControllerError::Internal(_) => Code::Internal, - IndexControllerError::TaskError(e) => e.error_code(), - IndexControllerError::DocumentFormatError(e) => e.error_code(), - IndexControllerError::MissingPayload(_) => Code::MissingPayload, - IndexControllerError::PayloadTooLarge => Code::PayloadTooLarge, - IndexControllerError::DumpError(e) => e.error_code(), - } - } -} - -impl From for IndexControllerError { - fn from(err: IndexUidFormatError) -> Self { - IndexResolverError::from(err).into() - } -} diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs deleted file mode 100644 index 87644a44a..000000000 --- a/meilisearch-lib/src/index_controller/mod.rs +++ /dev/null @@ -1,783 +0,0 @@ -use meilisearch_auth::SearchRules; -use std::collections::BTreeMap; -use std::fmt; -use std::io::Cursor; -use std::path::{Path, PathBuf}; -use std::str::FromStr; -use std::sync::Arc; -use std::time::Duration; - -use actix_web::error::PayloadError; -use bytes::Bytes; -use futures::Stream; -use futures::StreamExt; -use meilisearch_types::index_uid::IndexUid; -use milli::update::IndexDocumentsMethod; -use serde::{Deserialize, Serialize}; -use time::OffsetDateTime; -use tokio::sync::RwLock; -use tokio::task::spawn_blocking; -use tokio::time::sleep; -use uuid::Uuid; - -use crate::document_formats::{read_csv, read_json, read_ndjson}; -use crate::dump::{self, load_dump, DumpHandler}; -use crate::index::{ - Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked, -}; -use crate::index_resolver::error::IndexResolverError; -use crate::options::{IndexerOpts, SchedulerConfig}; -use crate::snapshot::{load_snapshot, SnapshotService}; -use crate::tasks::error::TaskError; -use crate::tasks::task::{DocumentDeletion, Task, TaskContent, TaskId}; -use crate::tasks::{ - BatchHandler, EmptyBatchHandler, Scheduler, SnapshotHandler, TaskFilter, TaskStore, -}; -use error::Result; - -use self::error::IndexControllerError; -use crate::index_resolver::index_store::{IndexStore, MapIndexStore}; -use crate::index_resolver::meta_store::{HeedMetaStore, IndexMetaStore}; -use crate::index_resolver::{create_index_resolver, IndexResolver}; -use crate::update_file_store::UpdateFileStore; - -pub mod error; -pub mod versioning; - -/// Concrete implementation of the IndexController, exposed by meilisearch-lib -pub type MeiliSearch = IndexController; - -pub type Payload = Box< - dyn Stream> + Send + Sync + 'static + Unpin, ->; - -pub fn open_meta_env(path: &Path, size: usize) -> milli::heed::Result { - let mut options = milli::heed::EnvOpenOptions::new(); - options.map_size(size); - options.max_dbs(20); - options.open(path) -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct IndexMetadata { - #[serde(skip)] - pub uuid: Uuid, - pub uid: String, - #[serde(flatten)] - pub meta: IndexMeta, -} - -#[derive(Clone, Debug)] -pub struct IndexSettings { - pub uid: Option, - pub primary_key: Option, -} - -pub struct IndexController { - pub index_resolver: Arc>, - scheduler: Arc>, - task_store: TaskStore, - pub update_file_store: UpdateFileStore, -} - -/// Need a custom implementation for clone because deriving require that U and I are clone. -impl Clone for IndexController { - fn clone(&self) -> Self { - Self { - index_resolver: self.index_resolver.clone(), - scheduler: self.scheduler.clone(), - update_file_store: self.update_file_store.clone(), - task_store: self.task_store.clone(), - } - } -} - -#[derive(Debug)] -pub enum DocumentAdditionFormat { - Json, - Csv, - Ndjson, -} - -impl fmt::Display for DocumentAdditionFormat { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - DocumentAdditionFormat::Json => write!(f, "json"), - DocumentAdditionFormat::Ndjson => write!(f, "ndjson"), - DocumentAdditionFormat::Csv => write!(f, "csv"), - } - } -} - -#[derive(Serialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct Stats { - pub database_size: u64, - #[serde(serialize_with = "time::serde::rfc3339::option::serialize")] - pub last_update: Option, - pub indexes: BTreeMap, -} - -#[allow(clippy::large_enum_variant)] -#[derive(derivative::Derivative)] -#[derivative(Debug)] -pub enum Update { - DeleteDocuments(Vec), - ClearDocuments, - Settings { - settings: Settings, - /// Indicates whether the update was a deletion - is_deletion: bool, - allow_index_creation: bool, - }, - DocumentAddition { - #[derivative(Debug = "ignore")] - payload: Payload, - primary_key: Option, - method: IndexDocumentsMethod, - format: DocumentAdditionFormat, - allow_index_creation: bool, - }, - DeleteIndex, - CreateIndex { - primary_key: Option, - }, - UpdateIndex { - primary_key: Option, - }, -} - -#[derive(Default, Debug)] -pub struct IndexControllerBuilder { - max_index_size: Option, - max_task_store_size: Option, - snapshot_dir: Option, - import_snapshot: Option, - snapshot_interval: Option, - ignore_snapshot_if_db_exists: bool, - ignore_missing_snapshot: bool, - schedule_snapshot: bool, - dump_src: Option, - dump_dst: Option, - ignore_dump_if_db_exists: bool, - ignore_missing_dump: bool, -} - -impl IndexControllerBuilder { - pub fn build( - self, - db_path: impl AsRef, - indexer_options: IndexerOpts, - scheduler_config: SchedulerConfig, - ) -> anyhow::Result { - let index_size = self - .max_index_size - .ok_or_else(|| anyhow::anyhow!("Missing index size"))?; - let task_store_size = self - .max_task_store_size - .ok_or_else(|| anyhow::anyhow!("Missing update database size"))?; - - if let Some(ref path) = self.import_snapshot { - log::info!("Loading from snapshot {:?}", path); - load_snapshot( - db_path.as_ref(), - path, - self.ignore_snapshot_if_db_exists, - self.ignore_missing_snapshot, - )?; - } else if let Some(ref src_path) = self.dump_src { - load_dump( - db_path.as_ref(), - src_path, - self.ignore_dump_if_db_exists, - self.ignore_missing_dump, - index_size, - task_store_size, - &indexer_options, - )?; - } else if db_path.as_ref().exists() { - // Directory could be pre-created without any database in. - let db_is_empty = db_path.as_ref().read_dir()?.next().is_none(); - if !db_is_empty { - versioning::check_version_file(db_path.as_ref())?; - } - } - - std::fs::create_dir_all(db_path.as_ref())?; - - let meta_env = Arc::new(open_meta_env(db_path.as_ref(), task_store_size)?); - - let update_file_store = UpdateFileStore::new(&db_path)?; - // Create or overwrite the version file for this DB - versioning::create_version_file(db_path.as_ref())?; - - let index_resolver = Arc::new(create_index_resolver( - &db_path, - index_size, - &indexer_options, - meta_env.clone(), - update_file_store.clone(), - )?); - - let dump_path = self - .dump_dst - .ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?; - - let dump_handler = Arc::new(DumpHandler::new( - dump_path, - db_path.as_ref().into(), - update_file_store.clone(), - task_store_size, - index_size, - meta_env.clone(), - index_resolver.clone(), - )); - let task_store = TaskStore::new(meta_env)?; - - // register all the batch handlers for use with the scheduler. - let handlers: Vec> = vec![ - index_resolver.clone(), - dump_handler, - Arc::new(SnapshotHandler), - // dummy handler to catch all empty batches - Arc::new(EmptyBatchHandler), - ]; - let scheduler = Scheduler::new(task_store.clone(), handlers, scheduler_config)?; - - if self.schedule_snapshot { - let snapshot_period = self - .snapshot_interval - .ok_or_else(|| anyhow::anyhow!("Snapshot interval not provided."))?; - let snapshot_path = self - .snapshot_dir - .ok_or_else(|| anyhow::anyhow!("Snapshot path not provided."))?; - - let snapshot_service = SnapshotService { - db_path: db_path.as_ref().to_path_buf(), - snapshot_period, - snapshot_path, - index_size, - meta_env_size: task_store_size, - scheduler: scheduler.clone(), - }; - - tokio::task::spawn_local(snapshot_service.run()); - } - - Ok(IndexController { - index_resolver, - scheduler, - update_file_store, - task_store, - }) - } - - /// Set the index controller builder's max update store size. - pub fn set_max_task_store_size(&mut self, max_update_store_size: usize) -> &mut Self { - let max_update_store_size = clamp_to_page_size(max_update_store_size); - self.max_task_store_size.replace(max_update_store_size); - self - } - - pub fn set_max_index_size(&mut self, size: usize) -> &mut Self { - let size = clamp_to_page_size(size); - self.max_index_size.replace(size); - self - } - - /// Set the index controller builder's snapshot path. - pub fn set_snapshot_dir(&mut self, snapshot_dir: PathBuf) -> &mut Self { - self.snapshot_dir.replace(snapshot_dir); - self - } - - /// Set the index controller builder's ignore snapshot if db exists. - pub fn set_ignore_snapshot_if_db_exists( - &mut self, - ignore_snapshot_if_db_exists: bool, - ) -> &mut Self { - self.ignore_snapshot_if_db_exists = ignore_snapshot_if_db_exists; - self - } - - /// Set the index controller builder's ignore missing snapshot. - pub fn set_ignore_missing_snapshot(&mut self, ignore_missing_snapshot: bool) -> &mut Self { - self.ignore_missing_snapshot = ignore_missing_snapshot; - self - } - - /// Set the index controller builder's import snapshot. - pub fn set_import_snapshot(&mut self, import_snapshot: PathBuf) -> &mut Self { - self.import_snapshot.replace(import_snapshot); - self - } - - /// Set the index controller builder's snapshot interval sec. - pub fn set_snapshot_interval(&mut self, snapshot_interval: Duration) -> &mut Self { - self.snapshot_interval = Some(snapshot_interval); - self - } - - /// Set the index controller builder's schedule snapshot. - pub fn set_schedule_snapshot(&mut self) -> &mut Self { - self.schedule_snapshot = true; - self - } - - /// Set the index controller builder's dump src. - pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self { - self.dump_src.replace(dump_src); - self - } - - /// Set the index controller builder's dump dst. - pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self { - self.dump_dst.replace(dump_dst); - self - } - - /// Set the index controller builder's ignore dump if db exists. - pub fn set_ignore_dump_if_db_exists(&mut self, ignore_dump_if_db_exists: bool) -> &mut Self { - self.ignore_dump_if_db_exists = ignore_dump_if_db_exists; - self - } - - /// Set the index controller builder's ignore missing dump. - pub fn set_ignore_missing_dump(&mut self, ignore_missing_dump: bool) -> &mut Self { - self.ignore_missing_dump = ignore_missing_dump; - self - } -} - -impl IndexController -where - U: IndexMetaStore, - I: IndexStore, -{ - pub fn builder() -> IndexControllerBuilder { - IndexControllerBuilder::default() - } - - pub async fn register_update(&self, uid: String, update: Update) -> Result { - let index_uid = IndexUid::from_str(&uid).map_err(IndexResolverError::from)?; - let content = match update { - Update::DeleteDocuments(ids) => TaskContent::DocumentDeletion { - index_uid, - deletion: DocumentDeletion::Ids(ids), - }, - Update::ClearDocuments => TaskContent::DocumentDeletion { - index_uid, - deletion: DocumentDeletion::Clear, - }, - Update::Settings { - settings, - is_deletion, - allow_index_creation, - } => TaskContent::SettingsUpdate { - settings, - is_deletion, - allow_index_creation, - index_uid, - }, - Update::DocumentAddition { - mut payload, - primary_key, - format, - method, - allow_index_creation, - } => { - let mut buffer = Vec::new(); - while let Some(bytes) = payload.next().await { - let bytes = bytes?; - buffer.extend_from_slice(&bytes); - } - let (content_uuid, mut update_file) = self.update_file_store.new_update()?; - let documents_count = tokio::task::spawn_blocking(move || -> Result<_> { - // check if the payload is empty, and return an error - if buffer.is_empty() { - return Err(IndexControllerError::MissingPayload(format)); - } - - let reader = Cursor::new(buffer); - let count = match format { - DocumentAdditionFormat::Json => read_json(reader, &mut *update_file)?, - DocumentAdditionFormat::Csv => read_csv(reader, &mut *update_file)?, - DocumentAdditionFormat::Ndjson => read_ndjson(reader, &mut *update_file)?, - }; - - update_file.persist()?; - - Ok(count) - }) - .await??; - - TaskContent::DocumentAddition { - content_uuid, - merge_strategy: method, - primary_key, - documents_count, - allow_index_creation, - index_uid, - } - } - Update::DeleteIndex => TaskContent::IndexDeletion { index_uid }, - Update::CreateIndex { primary_key } => TaskContent::IndexCreation { - primary_key, - index_uid, - }, - Update::UpdateIndex { primary_key } => TaskContent::IndexUpdate { - primary_key, - index_uid, - }, - }; - - let task = self.task_store.register(content).await?; - self.scheduler.read().await.notify(); - - Ok(task) - } - - pub async fn register_dump_task(&self) -> Result { - let uid = dump::generate_uid(); - let content = TaskContent::Dump { uid }; - let task = self.task_store.register(content).await?; - self.scheduler.read().await.notify(); - Ok(task) - } - - pub async fn get_task(&self, id: TaskId, filter: Option) -> Result { - let task = self.scheduler.read().await.get_task(id, filter).await?; - Ok(task) - } - - pub async fn get_index_task(&self, index_uid: String, task_id: TaskId) -> Result { - let creation_task_id = self - .index_resolver - .get_index_creation_task_id(index_uid.clone()) - .await?; - if task_id < creation_task_id { - return Err(TaskError::UnexistingTask(task_id).into()); - } - - let mut filter = TaskFilter::default(); - filter.filter_index(index_uid); - let task = self - .scheduler - .read() - .await - .get_task(task_id, Some(filter)) - .await?; - - Ok(task) - } - - pub async fn list_tasks( - &self, - filter: Option, - limit: Option, - offset: Option, - ) -> Result> { - let tasks = self - .scheduler - .read() - .await - .list_tasks(offset, filter, limit) - .await?; - - Ok(tasks) - } - - pub async fn list_index_task( - &self, - index_uid: String, - limit: Option, - offset: Option, - ) -> Result> { - let task_id = self - .index_resolver - .get_index_creation_task_id(index_uid.clone()) - .await?; - - let mut filter = TaskFilter::default(); - filter.filter_index(index_uid); - - let tasks = self - .scheduler - .read() - .await - .list_tasks( - Some(offset.unwrap_or_default() + task_id), - Some(filter), - limit, - ) - .await?; - - Ok(tasks) - } - - pub async fn list_indexes(&self) -> Result> { - let indexes = self.index_resolver.list().await?; - let mut ret = Vec::new(); - for (uid, index) in indexes { - let meta = index.meta()?; - let meta = IndexMetadata { - uuid: index.uuid(), - uid, - meta, - }; - ret.push(meta); - } - - Ok(ret) - } - - pub async fn settings(&self, uid: String) -> Result> { - let index = self.index_resolver.get_index(uid).await?; - let settings = spawn_blocking(move || index.settings()).await??; - Ok(settings) - } - - /// Return the total number of documents contained in the index + the selected documents. - pub async fn documents( - &self, - uid: String, - offset: usize, - limit: usize, - attributes_to_retrieve: Option>, - ) -> Result<(u64, Vec)> { - let index = self.index_resolver.get_index(uid).await?; - let result = - spawn_blocking(move || index.retrieve_documents(offset, limit, attributes_to_retrieve)) - .await??; - Ok(result) - } - - pub async fn document( - &self, - uid: String, - doc_id: String, - attributes_to_retrieve: Option>, - ) -> Result { - let index = self.index_resolver.get_index(uid).await?; - let document = - spawn_blocking(move || index.retrieve_document(doc_id, attributes_to_retrieve)) - .await??; - Ok(document) - } - - pub async fn search(&self, uid: String, query: SearchQuery) -> Result { - let index = self.index_resolver.get_index(uid).await?; - let result = spawn_blocking(move || index.perform_search(query)).await??; - Ok(result) - } - - pub async fn get_index(&self, uid: String) -> Result { - let index = self.index_resolver.get_index(uid.clone()).await?; - let uuid = index.uuid(); - let meta = spawn_blocking(move || index.meta()).await??; - let meta = IndexMetadata { uuid, uid, meta }; - Ok(meta) - } - - pub async fn get_index_stats(&self, uid: String) -> Result { - let processing_tasks = self.scheduler.read().await.get_processing_tasks().await?; - // Check if the currently indexing update is from our index. - let is_indexing = processing_tasks - .first() - .map_or(false, |task| task.index_uid().map_or(false, |u| u == uid)); - - let index = self.index_resolver.get_index(uid).await?; - let mut stats = spawn_blocking(move || index.stats()).await??; - stats.is_indexing = Some(is_indexing); - - Ok(stats) - } - - pub async fn get_all_stats(&self, search_rules: &SearchRules) -> Result { - let mut last_task: Option = None; - let mut indexes = BTreeMap::new(); - let mut database_size = 0; - let processing_tasks = self.scheduler.read().await.get_processing_tasks().await?; - - for (index_uid, index) in self.index_resolver.list().await? { - if !search_rules.is_index_authorized(&index_uid) { - continue; - } - - let (mut stats, meta) = - spawn_blocking::<_, Result<(IndexStats, IndexMeta)>>(move || { - Ok((index.stats()?, index.meta()?)) - }) - .await??; - - database_size += stats.size; - - last_task = last_task.map_or(Some(meta.updated_at), |last| { - Some(last.max(meta.updated_at)) - }); - - // Check if the currently indexing update is from our index. - stats.is_indexing = processing_tasks - .first() - .and_then(|p| p.index_uid().map(|u| u == index_uid)) - .or(Some(false)); - - indexes.insert(index_uid, stats); - } - - Ok(Stats { - database_size, - last_update: last_task, - indexes, - }) - } -} - -pub async fn get_arc_ownership_blocking(mut item: Arc) -> T { - loop { - match Arc::try_unwrap(item) { - Ok(item) => return item, - Err(item_arc) => { - item = item_arc; - sleep(Duration::from_millis(100)).await; - continue; - } - } - } -} - -// Clamp the provided value to be a multiple of system page size. -fn clamp_to_page_size(size: usize) -> usize { - size / page_size::get() * page_size::get() -} - -#[cfg(test)] -mod test { - use futures::future::ok; - use mockall::predicate::eq; - use nelson::Mocker; - - use crate::index::error::Result as IndexResult; - use crate::index::{HitsInfo, Index}; - use crate::index::{ - DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, - }; - use crate::index_resolver::index_store::MockIndexStore; - use crate::index_resolver::meta_store::MockIndexMetaStore; - use crate::index_resolver::IndexResolver; - - use super::*; - - impl IndexController { - pub fn mock( - index_resolver: Arc>, - task_store: TaskStore, - update_file_store: UpdateFileStore, - scheduler: Arc>, - ) -> Self { - IndexController { - index_resolver, - task_store, - update_file_store, - scheduler, - } - } - } - - #[actix_rt::test] - async fn test_search_simple() { - let index_uid = "test"; - let index_uuid = Uuid::new_v4(); - let query = SearchQuery { - q: Some(String::from("hello world")), - offset: 10, - limit: 0, - page: Some(1), - hits_per_page: Some(10), - attributes_to_retrieve: Some(vec!["string".to_owned()].into_iter().collect()), - attributes_to_crop: None, - crop_length: 18, - attributes_to_highlight: None, - show_matches_position: true, - filter: None, - sort: None, - facets: None, - highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(), - highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(), - crop_marker: DEFAULT_CROP_MARKER(), - matching_strategy: Default::default(), - }; - - let result = SearchResult { - hits: vec![], - query: "hello world".to_string(), - hits_info: HitsInfo::OffsetLimit { - limit: 24, - offset: 0, - estimated_total_hits: 29, - }, - processing_time_ms: 50, - facet_distribution: None, - }; - - let mut uuid_store = MockIndexMetaStore::new(); - uuid_store - .expect_get() - .with(eq(index_uid.to_owned())) - .returning(move |s| { - Box::pin(ok(( - s, - Some(crate::index_resolver::meta_store::IndexMeta { - uuid: index_uuid, - creation_task_id: 0, - }), - ))) - }); - - let mut index_store = MockIndexStore::new(); - let result_clone = result.clone(); - let query_clone = query.clone(); - index_store - .expect_get() - .with(eq(index_uuid)) - .returning(move |_uuid| { - let result = result_clone.clone(); - let query = query_clone.clone(); - let mocker = Mocker::default(); - mocker - .when::>("perform_search") - .once() - .then(move |q| { - assert_eq!(&q, &query); - Ok(result.clone()) - }); - let index = Index::mock(mocker); - Box::pin(ok(Some(index))) - }); - - let task_store_mocker = nelson::Mocker::default(); - let mocker = Mocker::default(); - let update_file_store = UpdateFileStore::mock(mocker); - let index_resolver = Arc::new(IndexResolver::new( - uuid_store, - index_store, - update_file_store.clone(), - )); - let task_store = TaskStore::mock(task_store_mocker); - let scheduler = Scheduler::new( - task_store.clone(), - vec![index_resolver.clone()], - SchedulerConfig::default(), - ) - .unwrap(); - let index_controller = - IndexController::mock(index_resolver, task_store, update_file_store, scheduler); - - let r = index_controller - .search(index_uid.to_owned(), query.clone()) - .await - .unwrap(); - assert_eq!(r, result); - } -} diff --git a/meilisearch-lib/src/index_controller/updates/error.rs b/meilisearch-lib/src/index_controller/updates/error.rs deleted file mode 100644 index 7ecaa45c5..000000000 --- a/meilisearch-lib/src/index_controller/updates/error.rs +++ /dev/null @@ -1,79 +0,0 @@ -use std::error::Error; -use std::fmt; - -use meilisearch_types::{internal_error, Code, ErrorCode}; - -use crate::{ - document_formats::DocumentFormatError, - index::error::IndexError, - index_controller::{update_file_store::UpdateFileStoreError, DocumentAdditionFormat}, -}; - -pub type Result = std::result::Result; - -#[derive(Debug, thiserror::Error)] -#[allow(clippy::large_enum_variant)] -pub enum UpdateLoopError { - #[error("Task `{0}` not found.")] - UnexistingUpdate(u64), - #[error("An internal error has occurred. `{0}`.")] - Internal(Box), - #[error( - "update store was shut down due to a fatal error, please check your logs for more info." - )] - FatalUpdateStoreError, - #[error("{0}")] - DocumentFormatError(#[from] DocumentFormatError), - #[error("The provided payload reached the size limit.")] - PayloadTooLarge, - #[error("A {0} payload is missing.")] - MissingPayload(DocumentAdditionFormat), - #[error("{0}")] - IndexError(#[from] IndexError), -} - -impl From> for UpdateLoopError -where - T: Sync + Send + 'static + fmt::Debug, -{ - fn from(other: tokio::sync::mpsc::error::SendError) -> Self { - Self::Internal(Box::new(other)) - } -} - -impl From for UpdateLoopError { - fn from(other: tokio::sync::oneshot::error::RecvError) -> Self { - Self::Internal(Box::new(other)) - } -} - -impl From for UpdateLoopError { - fn from(other: actix_web::error::PayloadError) -> Self { - match other { - actix_web::error::PayloadError::Overflow => Self::PayloadTooLarge, - _ => Self::Internal(Box::new(other)), - } - } -} - -internal_error!( - UpdateLoopError: heed::Error, - std::io::Error, - serde_json::Error, - tokio::task::JoinError, - UpdateFileStoreError -); - -impl ErrorCode for UpdateLoopError { - fn error_code(&self) -> Code { - match self { - Self::UnexistingUpdate(_) => Code::TaskNotFound, - Self::Internal(_) => Code::Internal, - Self::FatalUpdateStoreError => Code::Internal, - Self::DocumentFormatError(error) => error.error_code(), - Self::PayloadTooLarge => Code::PayloadTooLarge, - Self::MissingPayload(_) => Code::MissingPayload, - Self::IndexError(e) => e.error_code(), - } - } -} diff --git a/meilisearch-lib/src/index_controller/versioning/error.rs b/meilisearch-lib/src/index_controller/versioning/error.rs deleted file mode 100644 index ba284ec91..000000000 --- a/meilisearch-lib/src/index_controller/versioning/error.rs +++ /dev/null @@ -1,19 +0,0 @@ -#[derive(thiserror::Error, Debug)] -pub enum VersionFileError { - #[error( - "Meilisearch (v{}) failed to infer the version of the database. Please consider using a dump to load your data.", - env!("CARGO_PKG_VERSION").to_string() - )] - MissingVersionFile, - #[error("Version file is corrupted and thus Meilisearch is unable to determine the version of the database.")] - MalformedVersionFile, - #[error( - "Expected Meilisearch engine version: {major}.{minor}.{patch}, current engine version: {}. To update Meilisearch use a dump.", - env!("CARGO_PKG_VERSION").to_string() - )] - VersionMismatch { - major: String, - minor: String, - patch: String, - }, -} diff --git a/meilisearch-lib/src/index_controller/versioning/mod.rs b/meilisearch-lib/src/index_controller/versioning/mod.rs deleted file mode 100644 index f2c83bdad..000000000 --- a/meilisearch-lib/src/index_controller/versioning/mod.rs +++ /dev/null @@ -1,56 +0,0 @@ -use std::fs; -use std::io::ErrorKind; -use std::path::Path; - -use self::error::VersionFileError; - -mod error; - -pub const VERSION_FILE_NAME: &str = "VERSION"; - -static VERSION_MAJOR: &str = env!("CARGO_PKG_VERSION_MAJOR"); -static VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR"); -static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH"); - -// Persists the version of the current Meilisearch binary to a VERSION file -pub fn create_version_file(db_path: &Path) -> anyhow::Result<()> { - let version_path = db_path.join(VERSION_FILE_NAME); - fs::write( - version_path, - format!("{}.{}.{}", VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH), - )?; - - Ok(()) -} - -// Ensures Meilisearch version is compatible with the database, returns an error versions mismatch. -pub fn check_version_file(db_path: &Path) -> anyhow::Result<()> { - let version_path = db_path.join(VERSION_FILE_NAME); - - match fs::read_to_string(&version_path) { - Ok(version) => { - let version_components = version.split('.').collect::>(); - let (major, minor, patch) = match &version_components[..] { - [major, minor, patch] => (major.to_string(), minor.to_string(), patch.to_string()), - _ => return Err(VersionFileError::MalformedVersionFile.into()), - }; - - if major != VERSION_MAJOR || minor != VERSION_MINOR { - return Err(VersionFileError::VersionMismatch { - major, - minor, - patch, - } - .into()); - } - } - Err(error) => { - return match error.kind() { - ErrorKind::NotFound => Err(VersionFileError::MissingVersionFile.into()), - _ => Err(error.into()), - } - } - } - - Ok(()) -} diff --git a/meilisearch-lib/src/index_resolver/error.rs b/meilisearch-lib/src/index_resolver/error.rs deleted file mode 100644 index d973d2229..000000000 --- a/meilisearch-lib/src/index_resolver/error.rs +++ /dev/null @@ -1,71 +0,0 @@ -use std::fmt; - -use meilisearch_types::error::{Code, ErrorCode}; -use meilisearch_types::index_uid::IndexUidFormatError; -use meilisearch_types::internal_error; -use tokio::sync::mpsc::error::SendError as MpscSendError; -use tokio::sync::oneshot::error::RecvError as OneshotRecvError; -use uuid::Uuid; - -use crate::{error::MilliError, index::error::IndexError, update_file_store::UpdateFileStoreError}; - -pub type Result = std::result::Result; - -#[derive(thiserror::Error, Debug)] -pub enum IndexResolverError { - #[error("{0}")] - IndexError(#[from] IndexError), - #[error("Index `{0}` already exists.")] - IndexAlreadyExists(String), - #[error("Index `{0}` not found.")] - UnexistingIndex(String), - #[error("A primary key is already present. It's impossible to update it")] - ExistingPrimaryKey, - #[error("An internal error has occurred. `{0}`.")] - Internal(Box), - #[error("The creation of the `{0}` index has failed due to `Index uuid is already assigned`.")] - UuidAlreadyExists(Uuid), - #[error("{0}")] - Milli(#[from] milli::Error), - #[error("{0}")] - BadlyFormatted(#[from] IndexUidFormatError), -} - -impl From> for IndexResolverError -where - T: Send + Sync + 'static + fmt::Debug, -{ - fn from(other: tokio::sync::mpsc::error::SendError) -> Self { - Self::Internal(Box::new(other)) - } -} - -impl From for IndexResolverError { - fn from(other: tokio::sync::oneshot::error::RecvError) -> Self { - Self::Internal(Box::new(other)) - } -} - -internal_error!( - IndexResolverError: milli::heed::Error, - uuid::Error, - std::io::Error, - tokio::task::JoinError, - serde_json::Error, - UpdateFileStoreError -); - -impl ErrorCode for IndexResolverError { - fn error_code(&self) -> Code { - match self { - IndexResolverError::IndexError(e) => e.error_code(), - IndexResolverError::IndexAlreadyExists(_) => Code::IndexAlreadyExists, - IndexResolverError::UnexistingIndex(_) => Code::IndexNotFound, - IndexResolverError::ExistingPrimaryKey => Code::PrimaryKeyAlreadyPresent, - IndexResolverError::Internal(_) => Code::Internal, - IndexResolverError::UuidAlreadyExists(_) => Code::CreateIndex, - IndexResolverError::Milli(e) => MilliError(e).error_code(), - IndexResolverError::BadlyFormatted(_) => Code::InvalidIndexUid, - } - } -} diff --git a/meilisearch-lib/src/index_resolver/index_store.rs b/meilisearch-lib/src/index_resolver/index_store.rs deleted file mode 100644 index ea3c7125a..000000000 --- a/meilisearch-lib/src/index_resolver/index_store.rs +++ /dev/null @@ -1,108 +0,0 @@ -use std::collections::HashMap; -use std::convert::TryFrom; -use std::path::{Path, PathBuf}; -use std::sync::Arc; - -use milli::update::IndexerConfig; -use tokio::fs; -use tokio::sync::RwLock; -use tokio::task::spawn_blocking; -use uuid::Uuid; - -use super::error::{IndexResolverError, Result}; -use crate::index::Index; -use crate::options::IndexerOpts; - -type AsyncMap = Arc>>; - -#[async_trait::async_trait] -#[cfg_attr(test, mockall::automock)] -pub trait IndexStore { - async fn create(&self, uuid: Uuid) -> Result; - async fn get(&self, uuid: Uuid) -> Result>; - async fn delete(&self, uuid: Uuid) -> Result>; -} - -pub struct MapIndexStore { - index_store: AsyncMap, - path: PathBuf, - index_size: usize, - indexer_config: Arc, -} - -impl MapIndexStore { - pub fn new( - path: impl AsRef, - index_size: usize, - indexer_opts: &IndexerOpts, - ) -> anyhow::Result { - let indexer_config = Arc::new(IndexerConfig::try_from(indexer_opts)?); - let path = path.as_ref().join("indexes/"); - let index_store = Arc::new(RwLock::new(HashMap::new())); - Ok(Self { - index_store, - path, - index_size, - indexer_config, - }) - } -} - -#[async_trait::async_trait] -impl IndexStore for MapIndexStore { - async fn create(&self, uuid: Uuid) -> Result { - // We need to keep the lock until we are sure the db file has been opened correctly, to - // ensure that another db is not created at the same time. - let mut lock = self.index_store.write().await; - - if let Some(index) = lock.get(&uuid) { - return Ok(index.clone()); - } - let path = self.path.join(format!("{}", uuid)); - if path.exists() { - return Err(IndexResolverError::UuidAlreadyExists(uuid)); - } - - let index_size = self.index_size; - let update_handler = self.indexer_config.clone(); - let index = spawn_blocking(move || -> Result { - let index = Index::open(path, index_size, uuid, update_handler)?; - Ok(index) - }) - .await??; - - lock.insert(uuid, index.clone()); - - Ok(index) - } - - async fn get(&self, uuid: Uuid) -> Result> { - let guard = self.index_store.read().await; - match guard.get(&uuid) { - Some(index) => Ok(Some(index.clone())), - None => { - // drop the guard here so we can perform the write after without deadlocking; - drop(guard); - let path = self.path.join(format!("{}", uuid)); - if !path.exists() { - return Ok(None); - } - - let index_size = self.index_size; - let update_handler = self.indexer_config.clone(); - let index = - spawn_blocking(move || Index::open(path, index_size, uuid, update_handler)) - .await??; - self.index_store.write().await.insert(uuid, index.clone()); - Ok(Some(index)) - } - } - } - - async fn delete(&self, uuid: Uuid) -> Result> { - let db_path = self.path.join(format!("{}", uuid)); - fs::remove_dir_all(db_path).await?; - let index = self.index_store.write().await.remove(&uuid); - Ok(index) - } -} diff --git a/meilisearch-lib/src/index_resolver/meta_store.rs b/meilisearch-lib/src/index_resolver/meta_store.rs deleted file mode 100644 index f335d9923..000000000 --- a/meilisearch-lib/src/index_resolver/meta_store.rs +++ /dev/null @@ -1,223 +0,0 @@ -use std::collections::HashSet; -use std::fs::{create_dir_all, File}; -use std::io::{BufRead, BufReader, Write}; -use std::path::{Path, PathBuf}; -use std::sync::Arc; -use walkdir::WalkDir; - -use milli::heed::types::{SerdeBincode, Str}; -use milli::heed::{CompactionOption, Database, Env}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::error::{IndexResolverError, Result}; -use crate::tasks::task::TaskId; - -#[derive(Serialize, Deserialize)] -pub struct DumpEntry { - pub uid: String, - pub index_meta: IndexMeta, -} - -const UUIDS_DB_PATH: &str = "index_uuids"; - -#[async_trait::async_trait] -#[cfg_attr(test, mockall::automock)] -pub trait IndexMetaStore: Sized { - // Create a new entry for `name`. Return an error if `err` and the entry already exists, return - // the uuid otherwise. - async fn get(&self, uid: String) -> Result<(String, Option)>; - async fn delete(&self, uid: String) -> Result>; - async fn list(&self) -> Result>; - async fn insert(&self, name: String, meta: IndexMeta) -> Result<()>; - async fn snapshot(&self, path: PathBuf) -> Result>; - async fn get_size(&self) -> Result; - async fn dump(&self, path: PathBuf) -> Result<()>; -} - -#[derive(Serialize, Deserialize, Debug, Clone)] -pub struct IndexMeta { - pub uuid: Uuid, - pub creation_task_id: TaskId, -} - -#[derive(Clone)] -pub struct HeedMetaStore { - env: Arc, - db: Database>, -} - -impl Drop for HeedMetaStore { - fn drop(&mut self) { - if Arc::strong_count(&self.env) == 1 { - self.env.as_ref().clone().prepare_for_closing(); - } - } -} - -impl HeedMetaStore { - pub fn new(env: Arc) -> Result { - let db = env.create_database(Some("uuids"))?; - Ok(Self { env, db }) - } - - fn get(&self, name: &str) -> Result> { - let env = self.env.clone(); - let db = self.db; - let txn = env.read_txn()?; - match db.get(&txn, name)? { - Some(meta) => Ok(Some(meta)), - None => Ok(None), - } - } - - fn delete(&self, uid: String) -> Result> { - let env = self.env.clone(); - let db = self.db; - let mut txn = env.write_txn()?; - match db.get(&txn, &uid)? { - Some(meta) => { - db.delete(&mut txn, &uid)?; - txn.commit()?; - Ok(Some(meta)) - } - None => Ok(None), - } - } - - fn list(&self) -> Result> { - let env = self.env.clone(); - let db = self.db; - let txn = env.read_txn()?; - let mut entries = Vec::new(); - for entry in db.iter(&txn)? { - let (name, meta) = entry?; - entries.push((name.to_string(), meta)) - } - Ok(entries) - } - - pub(crate) fn insert(&self, name: String, meta: IndexMeta) -> Result<()> { - let env = self.env.clone(); - let db = self.db; - let mut txn = env.write_txn()?; - - if db.get(&txn, &name)?.is_some() { - return Err(IndexResolverError::IndexAlreadyExists(name)); - } - - db.put(&mut txn, &name, &meta)?; - txn.commit()?; - Ok(()) - } - - fn snapshot(&self, mut path: PathBuf) -> Result> { - // Write transaction to acquire a lock on the database. - let txn = self.env.write_txn()?; - let mut entries = HashSet::new(); - for entry in self.db.iter(&txn)? { - let (_, IndexMeta { uuid, .. }) = entry?; - entries.insert(uuid); - } - - // only perform snapshot if there are indexes - if !entries.is_empty() { - path.push(UUIDS_DB_PATH); - create_dir_all(&path).unwrap(); - path.push("data.mdb"); - self.env.copy_to_path(path, CompactionOption::Enabled)?; - } - Ok(entries) - } - - fn get_size(&self) -> Result { - Ok(WalkDir::new(self.env.path()) - .into_iter() - .filter_map(|entry| entry.ok()) - .filter_map(|entry| entry.metadata().ok()) - .filter(|metadata| metadata.is_file()) - .fold(0, |acc, m| acc + m.len())) - } - - pub fn dump(&self, path: PathBuf) -> Result<()> { - let dump_path = path.join(UUIDS_DB_PATH); - create_dir_all(&dump_path)?; - let dump_file_path = dump_path.join("data.jsonl"); - let mut dump_file = File::create(&dump_file_path)?; - - let txn = self.env.read_txn()?; - for entry in self.db.iter(&txn)? { - let (uid, index_meta) = entry?; - let uid = uid.to_string(); - - let entry = DumpEntry { uid, index_meta }; - serde_json::to_writer(&mut dump_file, &entry)?; - dump_file.write_all(b"\n").unwrap(); - } - - Ok(()) - } - - pub fn load_dump(src: impl AsRef, env: Arc) -> Result<()> { - let src_indexes = src.as_ref().join(UUIDS_DB_PATH).join("data.jsonl"); - let indexes = File::open(&src_indexes)?; - let mut indexes = BufReader::new(indexes); - let mut line = String::new(); - - let db = Self::new(env)?; - let mut txn = db.env.write_txn()?; - - loop { - match indexes.read_line(&mut line) { - Ok(0) => break, - Ok(_) => { - let DumpEntry { uid, index_meta } = serde_json::from_str(&line)?; - db.db.put(&mut txn, &uid, &index_meta)?; - } - Err(e) => return Err(e.into()), - } - - line.clear(); - } - txn.commit()?; - - Ok(()) - } -} - -#[async_trait::async_trait] -impl IndexMetaStore for HeedMetaStore { - async fn get(&self, name: String) -> Result<(String, Option)> { - let this = self.clone(); - tokio::task::spawn_blocking(move || this.get(&name).map(|res| (name, res))).await? - } - - async fn delete(&self, uid: String) -> Result> { - let this = self.clone(); - tokio::task::spawn_blocking(move || this.delete(uid)).await? - } - - async fn list(&self) -> Result> { - let this = self.clone(); - tokio::task::spawn_blocking(move || this.list()).await? - } - - async fn insert(&self, name: String, meta: IndexMeta) -> Result<()> { - let this = self.clone(); - tokio::task::spawn_blocking(move || this.insert(name, meta)).await? - } - - async fn snapshot(&self, path: PathBuf) -> Result> { - let this = self.clone(); - tokio::task::spawn_blocking(move || this.snapshot(path)).await? - } - - async fn get_size(&self) -> Result { - self.get_size() - } - - async fn dump(&self, path: PathBuf) -> Result<()> { - let this = self.clone(); - Ok(tokio::task::spawn_blocking(move || this.dump(path)).await??) - } -} diff --git a/meilisearch-lib/src/index_resolver/mod.rs b/meilisearch-lib/src/index_resolver/mod.rs deleted file mode 100644 index 284f64942..000000000 --- a/meilisearch-lib/src/index_resolver/mod.rs +++ /dev/null @@ -1,685 +0,0 @@ -pub mod error; -pub mod index_store; -pub mod meta_store; - -use std::convert::TryFrom; -use std::path::Path; -use std::sync::Arc; - -use error::{IndexResolverError, Result}; -use index_store::{IndexStore, MapIndexStore}; -use meilisearch_types::error::ResponseError; -use meilisearch_types::index_uid::IndexUid; -use meta_store::{HeedMetaStore, IndexMetaStore}; -use milli::heed::Env; -use milli::update::{DocumentDeletionResult, IndexerConfig}; -use time::OffsetDateTime; -use tokio::task::spawn_blocking; -use uuid::Uuid; - -use crate::index::{error::Result as IndexResult, Index}; -use crate::options::IndexerOpts; -use crate::tasks::task::{DocumentDeletion, Task, TaskContent, TaskEvent, TaskId, TaskResult}; -use crate::update_file_store::UpdateFileStore; - -use self::meta_store::IndexMeta; - -pub type HardStateIndexResolver = IndexResolver; - -#[cfg(not(test))] -pub use real::IndexResolver; - -#[cfg(test)] -pub use test::MockIndexResolver as IndexResolver; - -pub fn create_index_resolver( - path: impl AsRef, - index_size: usize, - indexer_opts: &IndexerOpts, - meta_env: Arc, - file_store: UpdateFileStore, -) -> anyhow::Result { - let uuid_store = HeedMetaStore::new(meta_env)?; - let index_store = MapIndexStore::new(&path, index_size, indexer_opts)?; - Ok(IndexResolver::new(uuid_store, index_store, file_store)) -} - -mod real { - use super::*; - - pub struct IndexResolver { - pub(super) index_uuid_store: U, - pub(super) index_store: I, - pub(super) file_store: UpdateFileStore, - } - - impl IndexResolver { - pub fn load_dump( - src: impl AsRef, - dst: impl AsRef, - index_db_size: usize, - env: Arc, - indexer_opts: &IndexerOpts, - ) -> anyhow::Result<()> { - HeedMetaStore::load_dump(&src, env)?; - let indexes_path = src.as_ref().join("indexes"); - let indexes = indexes_path.read_dir()?; - let indexer_config = IndexerConfig::try_from(indexer_opts)?; - for index in indexes { - Index::load_dump(&index?.path(), &dst, index_db_size, &indexer_config)?; - } - - Ok(()) - } - } - - impl IndexResolver - where - U: IndexMetaStore, - I: IndexStore, - { - pub fn new(index_uuid_store: U, index_store: I, file_store: UpdateFileStore) -> Self { - Self { - index_uuid_store, - index_store, - file_store, - } - } - - pub async fn process_document_addition_batch(&self, tasks: &mut [Task]) { - fn get_content_uuid(task: &Task) -> Uuid { - match task { - Task { - content: TaskContent::DocumentAddition { content_uuid, .. }, - .. - } => *content_uuid, - _ => panic!("unexpected task in the document addition batch"), - } - } - - let content_uuids = tasks.iter().map(get_content_uuid).collect::>(); - - match tasks.first() { - Some(Task { - id, - content: - TaskContent::DocumentAddition { - merge_strategy, - primary_key, - allow_index_creation, - index_uid, - .. - }, - .. - }) => { - let primary_key = primary_key.clone(); - let method = *merge_strategy; - - let index = if *allow_index_creation { - self.get_or_create_index(index_uid.clone(), *id).await - } else { - self.get_index(index_uid.as_str().to_string()).await - }; - - // If the index doesn't exist and we are not allowed to create it with the first - // task, we must fails the whole batch. - let now = OffsetDateTime::now_utc(); - let index = match index { - Ok(index) => index, - Err(e) => { - let error = ResponseError::from(e); - for task in tasks.iter_mut() { - task.events.push(TaskEvent::Failed { - error: error.clone(), - timestamp: now, - }); - } - - return; - } - }; - - let file_store = self.file_store.clone(); - let result = spawn_blocking(move || { - index.update_documents( - method, - primary_key, - file_store, - content_uuids.into_iter(), - ) - }) - .await; - - match result { - Ok(Ok(results)) => { - for (task, result) in tasks.iter_mut().zip(results) { - let event = match result { - Ok(addition) => { - TaskEvent::succeeded(TaskResult::DocumentAddition { - indexed_documents: addition.indexed_documents, - }) - } - Err(error) => { - TaskEvent::failed(IndexResolverError::from(error)) - } - }; - task.events.push(event); - } - } - Ok(Err(e)) => { - let event = TaskEvent::failed(e); - for task in tasks.iter_mut() { - task.events.push(event.clone()); - } - } - Err(e) => { - let event = TaskEvent::failed(IndexResolverError::from(e)); - for task in tasks.iter_mut() { - task.events.push(event.clone()); - } - } - } - } - _ => panic!("invalid batch!"), - } - } - - pub async fn delete_content_file(&self, content_uuid: Uuid) -> Result<()> { - self.file_store.delete(content_uuid).await?; - Ok(()) - } - - async fn process_task_inner(&self, task: &Task) -> Result { - match &task.content { - TaskContent::DocumentAddition { .. } => { - panic!("updates should be handled by batch") - } - TaskContent::DocumentDeletion { - deletion: DocumentDeletion::Ids(ids), - index_uid, - } => { - let ids = ids.clone(); - let index = self.get_index(index_uid.clone().into_inner()).await?; - - let DocumentDeletionResult { - deleted_documents, .. - } = spawn_blocking(move || index.delete_documents(&ids)).await??; - - Ok(TaskResult::DocumentDeletion { deleted_documents }) - } - TaskContent::DocumentDeletion { - deletion: DocumentDeletion::Clear, - index_uid, - } => { - let index = self.get_index(index_uid.clone().into_inner()).await?; - let deleted_documents = spawn_blocking(move || -> IndexResult { - let number_documents = index.stats()?.number_of_documents; - index.clear_documents()?; - Ok(number_documents) - }) - .await??; - - Ok(TaskResult::ClearAll { deleted_documents }) - } - TaskContent::SettingsUpdate { - settings, - is_deletion, - allow_index_creation, - index_uid, - } => { - let index = if *is_deletion || !*allow_index_creation { - self.get_index(index_uid.clone().into_inner()).await? - } else { - self.get_or_create_index(index_uid.clone(), task.id).await? - }; - - let settings = settings.clone(); - spawn_blocking(move || index.update_settings(&settings.check())).await??; - - Ok(TaskResult::Other) - } - TaskContent::IndexDeletion { index_uid } => { - let index = self.delete_index(index_uid.clone().into_inner()).await?; - - let deleted_documents = spawn_blocking(move || -> IndexResult { - Ok(index.stats()?.number_of_documents) - }) - .await??; - - Ok(TaskResult::ClearAll { deleted_documents }) - } - TaskContent::IndexCreation { - primary_key, - index_uid, - } => { - let index = self.create_index(index_uid.clone(), task.id).await?; - - if let Some(primary_key) = primary_key { - let primary_key = primary_key.clone(); - spawn_blocking(move || index.update_primary_key(primary_key)).await??; - } - - Ok(TaskResult::Other) - } - TaskContent::IndexUpdate { - primary_key, - index_uid, - } => { - let index = self.get_index(index_uid.clone().into_inner()).await?; - - if let Some(primary_key) = primary_key { - let primary_key = primary_key.clone(); - spawn_blocking(move || index.update_primary_key(primary_key)).await??; - } - - Ok(TaskResult::Other) - } - _ => unreachable!("Invalid task for index resolver"), - } - } - - pub async fn process_task(&self, task: &mut Task) { - match self.process_task_inner(task).await { - Ok(res) => task.events.push(TaskEvent::succeeded(res)), - Err(e) => task.events.push(TaskEvent::failed(e)), - } - } - - pub async fn dump(&self, path: impl AsRef) -> Result<()> { - for (_, index) in self.list().await? { - index.dump(&path)?; - } - self.index_uuid_store.dump(path.as_ref().to_owned()).await?; - Ok(()) - } - - async fn create_index(&self, uid: IndexUid, creation_task_id: TaskId) -> Result { - match self.index_uuid_store.get(uid.into_inner()).await? { - (uid, Some(_)) => Err(IndexResolverError::IndexAlreadyExists(uid)), - (uid, None) => { - let uuid = Uuid::new_v4(); - let index = self.index_store.create(uuid).await?; - match self - .index_uuid_store - .insert( - uid, - IndexMeta { - uuid, - creation_task_id, - }, - ) - .await - { - Err(e) => { - match self.index_store.delete(uuid).await { - Ok(Some(index)) => { - index.close(); - } - Ok(None) => (), - Err(e) => log::error!("Error while deleting index: {:?}", e), - } - Err(e) - } - Ok(()) => Ok(index), - } - } - } - } - - /// Get or create an index with name `uid`. - pub async fn get_or_create_index(&self, uid: IndexUid, task_id: TaskId) -> Result { - match self.create_index(uid, task_id).await { - Ok(index) => Ok(index), - Err(IndexResolverError::IndexAlreadyExists(uid)) => self.get_index(uid).await, - Err(e) => Err(e), - } - } - - pub async fn list(&self) -> Result> { - let uuids = self.index_uuid_store.list().await?; - let mut indexes = Vec::new(); - for (name, IndexMeta { uuid, .. }) in uuids { - match self.index_store.get(uuid).await? { - Some(index) => indexes.push((name, index)), - None => { - // we found an unexisting index, we remove it from the uuid store - let _ = self.index_uuid_store.delete(name).await; - } - } - } - - Ok(indexes) - } - - pub async fn delete_index(&self, uid: String) -> Result { - match self.index_uuid_store.delete(uid.clone()).await? { - Some(IndexMeta { uuid, .. }) => match self.index_store.delete(uuid).await? { - Some(index) => { - index.clone().close(); - Ok(index) - } - None => Err(IndexResolverError::UnexistingIndex(uid)), - }, - None => Err(IndexResolverError::UnexistingIndex(uid)), - } - } - - pub async fn get_index(&self, uid: String) -> Result { - match self.index_uuid_store.get(uid).await? { - (name, Some(IndexMeta { uuid, .. })) => { - match self.index_store.get(uuid).await? { - Some(index) => Ok(index), - None => { - // For some reason we got a uuid to an unexisting index, we return an error, - // and remove the uuid from the uuid store. - let _ = self.index_uuid_store.delete(name.clone()).await; - Err(IndexResolverError::UnexistingIndex(name)) - } - } - } - (name, _) => Err(IndexResolverError::UnexistingIndex(name)), - } - } - - pub async fn get_index_creation_task_id(&self, index_uid: String) -> Result { - let (uid, meta) = self.index_uuid_store.get(index_uid).await?; - meta.map( - |IndexMeta { - creation_task_id, .. - }| creation_task_id, - ) - .ok_or(IndexResolverError::UnexistingIndex(uid)) - } - } -} - -#[cfg(test)] -mod test { - use crate::index::IndexStats; - - use super::index_store::MockIndexStore; - use super::meta_store::MockIndexMetaStore; - use super::*; - - use futures::future::ok; - use milli::FieldDistribution; - use nelson::Mocker; - - pub enum MockIndexResolver { - Real(super::real::IndexResolver), - Mock(Mocker), - } - - impl MockIndexResolver { - pub fn load_dump( - src: impl AsRef, - dst: impl AsRef, - index_db_size: usize, - env: Arc, - indexer_opts: &IndexerOpts, - ) -> anyhow::Result<()> { - super::real::IndexResolver::load_dump(src, dst, index_db_size, env, indexer_opts) - } - } - - impl MockIndexResolver - where - U: IndexMetaStore, - I: IndexStore, - { - pub fn new(index_uuid_store: U, index_store: I, file_store: UpdateFileStore) -> Self { - Self::Real(super::real::IndexResolver { - index_uuid_store, - index_store, - file_store, - }) - } - - pub fn mock(mocker: Mocker) -> Self { - Self::Mock(mocker) - } - - pub async fn process_document_addition_batch(&self, tasks: &mut [Task]) { - match self { - IndexResolver::Real(r) => r.process_document_addition_batch(tasks).await, - IndexResolver::Mock(m) => unsafe { - m.get("process_document_addition_batch").call(tasks) - }, - } - } - - pub async fn process_task(&self, task: &mut Task) { - match self { - IndexResolver::Real(r) => r.process_task(task).await, - IndexResolver::Mock(m) => unsafe { m.get("process_task").call(task) }, - } - } - - pub async fn dump(&self, path: impl AsRef) -> Result<()> { - match self { - IndexResolver::Real(r) => r.dump(path).await, - IndexResolver::Mock(_) => todo!(), - } - } - - /// Get or create an index with name `uid`. - pub async fn get_or_create_index(&self, uid: IndexUid, task_id: TaskId) -> Result { - match self { - IndexResolver::Real(r) => r.get_or_create_index(uid, task_id).await, - IndexResolver::Mock(_) => todo!(), - } - } - - pub async fn list(&self) -> Result> { - match self { - IndexResolver::Real(r) => r.list().await, - IndexResolver::Mock(_) => todo!(), - } - } - - pub async fn delete_index(&self, uid: String) -> Result { - match self { - IndexResolver::Real(r) => r.delete_index(uid).await, - IndexResolver::Mock(_) => todo!(), - } - } - - pub async fn get_index(&self, uid: String) -> Result { - match self { - IndexResolver::Real(r) => r.get_index(uid).await, - IndexResolver::Mock(_) => todo!(), - } - } - - pub async fn get_index_creation_task_id(&self, index_uid: String) -> Result { - match self { - IndexResolver::Real(r) => r.get_index_creation_task_id(index_uid).await, - IndexResolver::Mock(_) => todo!(), - } - } - - pub async fn delete_content_file(&self, content_uuid: Uuid) -> Result<()> { - match self { - IndexResolver::Real(r) => r.delete_content_file(content_uuid).await, - IndexResolver::Mock(m) => unsafe { - m.get("delete_content_file").call(content_uuid) - }, - } - } - } - - #[actix_rt::test] - async fn test_remove_unknown_index() { - let mut meta_store = MockIndexMetaStore::new(); - meta_store - .expect_delete() - .once() - .returning(|_| Box::pin(ok(None))); - - let index_store = MockIndexStore::new(); - - let mocker = Mocker::default(); - let file_store = UpdateFileStore::mock(mocker); - - let index_resolver = IndexResolver::new(meta_store, index_store, file_store); - - let mut task = Task { - id: 1, - content: TaskContent::IndexDeletion { - index_uid: IndexUid::new_unchecked("test"), - }, - events: Vec::new(), - }; - - index_resolver.process_task(&mut task).await; - - assert!(matches!(task.events[0], TaskEvent::Failed { .. })); - } - - #[actix_rt::test] - async fn test_remove_index() { - let mut meta_store = MockIndexMetaStore::new(); - meta_store.expect_delete().once().returning(|_| { - Box::pin(ok(Some(IndexMeta { - uuid: Uuid::new_v4(), - creation_task_id: 1, - }))) - }); - - let mut index_store = MockIndexStore::new(); - index_store.expect_delete().once().returning(|_| { - let mocker = Mocker::default(); - mocker.when::<(), ()>("close").then(|_| ()); - mocker - .when::<(), IndexResult>("stats") - .then(|_| { - Ok(IndexStats { - size: 10, - number_of_documents: 10, - is_indexing: None, - field_distribution: FieldDistribution::default(), - }) - }); - Box::pin(ok(Some(Index::mock(mocker)))) - }); - - let mocker = Mocker::default(); - let file_store = UpdateFileStore::mock(mocker); - - let index_resolver = IndexResolver::new(meta_store, index_store, file_store); - - let mut task = Task { - id: 1, - content: TaskContent::IndexDeletion { - index_uid: IndexUid::new_unchecked("test"), - }, - events: Vec::new(), - }; - - index_resolver.process_task(&mut task).await; - - assert!(matches!(task.events[0], TaskEvent::Succeeded { .. })); - } - - #[actix_rt::test] - async fn test_delete_documents() { - let mut meta_store = MockIndexMetaStore::new(); - meta_store.expect_get().once().returning(|_| { - Box::pin(ok(( - "test".to_string(), - Some(IndexMeta { - uuid: Uuid::new_v4(), - creation_task_id: 1, - }), - ))) - }); - - let mut index_store = MockIndexStore::new(); - index_store.expect_get().once().returning(|_| { - let mocker = Mocker::default(); - mocker - .when::<(), IndexResult<()>>("clear_documents") - .once() - .then(|_| Ok(())); - mocker - .when::<(), IndexResult>("stats") - .once() - .then(|_| { - Ok(IndexStats { - size: 10, - number_of_documents: 10, - is_indexing: None, - field_distribution: FieldDistribution::default(), - }) - }); - Box::pin(ok(Some(Index::mock(mocker)))) - }); - - let mocker = Mocker::default(); - let file_store = UpdateFileStore::mock(mocker); - - let index_resolver = IndexResolver::new(meta_store, index_store, file_store); - - let mut task = Task { - id: 1, - content: TaskContent::DocumentDeletion { - deletion: DocumentDeletion::Clear, - index_uid: IndexUid::new_unchecked("test"), - }, - events: Vec::new(), - }; - - index_resolver.process_task(&mut task).await; - - assert!(matches!(task.events[0], TaskEvent::Succeeded { .. })); - } - - #[actix_rt::test] - async fn test_index_update() { - let mut meta_store = MockIndexMetaStore::new(); - meta_store.expect_get().once().returning(|_| { - Box::pin(ok(( - "test".to_string(), - Some(IndexMeta { - uuid: Uuid::new_v4(), - creation_task_id: 1, - }), - ))) - }); - - let mut index_store = MockIndexStore::new(); - index_store.expect_get().once().returning(|_| { - let mocker = Mocker::default(); - - mocker - .when::>("update_primary_key") - .once() - .then(|_| { - Ok(crate::index::IndexMeta { - created_at: OffsetDateTime::now_utc(), - updated_at: OffsetDateTime::now_utc(), - primary_key: Some("key".to_string()), - }) - }); - Box::pin(ok(Some(Index::mock(mocker)))) - }); - - let mocker = Mocker::default(); - let file_store = UpdateFileStore::mock(mocker); - - let index_resolver = IndexResolver::new(meta_store, index_store, file_store); - - let mut task = Task { - id: 1, - content: TaskContent::IndexUpdate { - primary_key: Some("key".to_string()), - index_uid: IndexUid::new_unchecked("test"), - }, - events: Vec::new(), - }; - - index_resolver.process_task(&mut task).await; - - assert!(matches!(task.events[0], TaskEvent::Succeeded { .. })); - } -} diff --git a/meilisearch-lib/src/lib.rs b/meilisearch-lib/src/lib.rs deleted file mode 100644 index 7fe0984dc..000000000 --- a/meilisearch-lib/src/lib.rs +++ /dev/null @@ -1,50 +0,0 @@ -#[macro_use] -pub mod error; -pub mod options; - -mod analytics; -mod dump; -pub mod index; -pub mod index_controller; -mod index_resolver; -mod snapshot; -pub mod tasks; -mod update_file_store; - -use std::env::VarError; -use std::ffi::OsStr; -use std::path::Path; - -pub use index_controller::MeiliSearch; -pub use milli; -pub use milli::heed; - -mod compression; -pub mod document_formats; - -/// Check if a db is empty. It does not provide any information on the -/// validity of the data in it. -/// We consider a database as non empty when it's a non empty directory. -pub fn is_empty_db(db_path: impl AsRef) -> bool { - let db_path = db_path.as_ref(); - - if !db_path.exists() { - true - // if we encounter an error or if the db is a file we consider the db non empty - } else if let Ok(dir) = db_path.read_dir() { - dir.count() == 0 - } else { - true - } -} - -/// Checks if the key is defined in the environment variables. -/// If not, inserts it with the given value. -pub fn export_to_env_if_not_present(key: &str, value: T) -where - T: AsRef, -{ - if let Err(VarError::NotPresent) = std::env::var(key) { - std::env::set_var(key, value); - } -} diff --git a/meilisearch-lib/src/options.rs b/meilisearch-lib/src/options.rs deleted file mode 100644 index b84dd94a2..000000000 --- a/meilisearch-lib/src/options.rs +++ /dev/null @@ -1,205 +0,0 @@ -use crate::export_to_env_if_not_present; - -use core::fmt; -use std::{convert::TryFrom, num::ParseIntError, ops::Deref, str::FromStr}; - -use byte_unit::{Byte, ByteError}; -use clap::Parser; -use milli::update::IndexerConfig; -use serde::{Deserialize, Serialize}; -use sysinfo::{RefreshKind, System, SystemExt}; - -const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY"; -const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS"; -const DISABLE_AUTO_BATCHING: &str = "DISABLE_AUTO_BATCHING"; -const DEFAULT_LOG_EVERY_N: usize = 100000; - -#[derive(Debug, Clone, Parser, Serialize, Deserialize)] -#[serde(rename_all = "snake_case", deny_unknown_fields)] -pub struct IndexerOpts { - /// Sets the amount of documents to skip before printing - /// a log regarding the indexing advancement. - #[serde(skip_serializing, default = "default_log_every_n")] - #[clap(long, default_value_t = default_log_every_n(), hide = true)] // 100k - pub log_every_n: usize, - - /// Grenad max number of chunks in bytes. - #[serde(skip_serializing)] - #[clap(long, hide = true)] - pub max_nb_chunks: Option, - - /// Sets the maximum amount of RAM Meilisearch can use when indexing. By default, Meilisearch - /// uses no more than two thirds of available memory. - #[clap(long, env = MEILI_MAX_INDEXING_MEMORY, default_value_t)] - #[serde(default)] - pub max_indexing_memory: MaxMemory, - - /// Sets the maximum number of threads Meilisearch can use during indexation. By default, the - /// indexer avoids using more than half of a machine's total processing units. This ensures - /// Meilisearch is always ready to perform searches, even while you are updating an index. - #[clap(long, env = MEILI_MAX_INDEXING_THREADS, default_value_t)] - #[serde(default)] - pub max_indexing_threads: MaxThreads, -} - -#[derive(Debug, Clone, Parser, Default, Serialize, Deserialize)] -#[serde(rename_all = "snake_case", deny_unknown_fields)] -pub struct SchedulerConfig { - /// Deactivates auto-batching when provided. - #[clap(long, env = DISABLE_AUTO_BATCHING)] - #[serde(default)] - pub disable_auto_batching: bool, -} - -impl IndexerOpts { - /// Exports the values to their corresponding env vars if they are not set. - pub fn export_to_env(self) { - let IndexerOpts { - max_indexing_memory, - max_indexing_threads, - log_every_n: _, - max_nb_chunks: _, - } = self; - if let Some(max_indexing_memory) = max_indexing_memory.0 { - export_to_env_if_not_present( - MEILI_MAX_INDEXING_MEMORY, - max_indexing_memory.to_string(), - ); - } - export_to_env_if_not_present( - MEILI_MAX_INDEXING_THREADS, - max_indexing_threads.0.to_string(), - ); - } -} - -impl TryFrom<&IndexerOpts> for IndexerConfig { - type Error = anyhow::Error; - - fn try_from(other: &IndexerOpts) -> Result { - let thread_pool = rayon::ThreadPoolBuilder::new() - .num_threads(*other.max_indexing_threads) - .build()?; - - Ok(Self { - log_every_n: Some(other.log_every_n), - max_nb_chunks: other.max_nb_chunks, - max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize), - thread_pool: Some(thread_pool), - max_positions_per_attributes: None, - ..Default::default() - }) - } -} - -impl Default for IndexerOpts { - fn default() -> Self { - Self { - log_every_n: 100_000, - max_nb_chunks: None, - max_indexing_memory: MaxMemory::default(), - max_indexing_threads: MaxThreads::default(), - } - } -} - -impl SchedulerConfig { - pub fn export_to_env(self) { - let SchedulerConfig { - disable_auto_batching, - } = self; - export_to_env_if_not_present(DISABLE_AUTO_BATCHING, disable_auto_batching.to_string()); - } -} - -/// A type used to detect the max memory available and use 2/3 of it. -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] -pub struct MaxMemory(Option); - -impl FromStr for MaxMemory { - type Err = ByteError; - - fn from_str(s: &str) -> Result { - Byte::from_str(s).map(Some).map(MaxMemory) - } -} - -impl Default for MaxMemory { - fn default() -> MaxMemory { - MaxMemory( - total_memory_bytes() - .map(|bytes| bytes * 2 / 3) - .map(Byte::from_bytes), - ) - } -} - -impl fmt::Display for MaxMemory { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self.0 { - Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)), - None => f.write_str("unknown"), - } - } -} - -impl Deref for MaxMemory { - type Target = Option; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl MaxMemory { - pub fn unlimited() -> Self { - Self(None) - } -} - -/// Returns the total amount of bytes available or `None` if this system isn't supported. -fn total_memory_bytes() -> Option { - if System::IS_SUPPORTED { - let memory_kind = RefreshKind::new().with_memory(); - let mut system = System::new_with_specifics(memory_kind); - system.refresh_memory(); - Some(system.total_memory() * 1024) // KiB into bytes - } else { - None - } -} - -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] -pub struct MaxThreads(usize); - -impl FromStr for MaxThreads { - type Err = ParseIntError; - - fn from_str(s: &str) -> Result { - usize::from_str(s).map(Self) - } -} - -impl Default for MaxThreads { - fn default() -> Self { - MaxThreads(num_cpus::get() / 2) - } -} - -impl fmt::Display for MaxThreads { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.0) - } -} - -impl Deref for MaxThreads { - type Target = usize; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -fn default_log_every_n() -> usize { - DEFAULT_LOG_EVERY_N -} diff --git a/meilisearch-lib/src/snapshot.rs b/meilisearch-lib/src/snapshot.rs deleted file mode 100644 index 4566a627e..000000000 --- a/meilisearch-lib/src/snapshot.rs +++ /dev/null @@ -1,203 +0,0 @@ -use std::fs; -use std::path::{Path, PathBuf}; -use std::sync::Arc; -use std::time::Duration; - -use anyhow::bail; -use fs_extra::dir::{self, CopyOptions}; -use log::{info, trace}; -use meilisearch_auth::open_auth_store_env; -use milli::heed::CompactionOption; -use tokio::sync::RwLock; -use tokio::time::sleep; -use walkdir::WalkDir; - -use crate::compression::from_tar_gz; -use crate::index_controller::open_meta_env; -use crate::index_controller::versioning::VERSION_FILE_NAME; -use crate::tasks::Scheduler; - -pub struct SnapshotService { - pub(crate) db_path: PathBuf, - pub(crate) snapshot_period: Duration, - pub(crate) snapshot_path: PathBuf, - pub(crate) index_size: usize, - pub(crate) meta_env_size: usize, - pub(crate) scheduler: Arc>, -} - -impl SnapshotService { - pub async fn run(self) { - info!( - "Snapshot scheduled every {}s.", - self.snapshot_period.as_secs() - ); - loop { - let snapshot_job = SnapshotJob { - dest_path: self.snapshot_path.clone(), - src_path: self.db_path.clone(), - meta_env_size: self.meta_env_size, - index_size: self.index_size, - }; - self.scheduler.write().await.schedule_snapshot(snapshot_job); - sleep(self.snapshot_period).await; - } - } -} - -pub fn load_snapshot( - db_path: impl AsRef, - snapshot_path: impl AsRef, - ignore_snapshot_if_db_exists: bool, - ignore_missing_snapshot: bool, -) -> anyhow::Result<()> { - let empty_db = crate::is_empty_db(&db_path); - let snapshot_path_exists = snapshot_path.as_ref().exists(); - - if empty_db && snapshot_path_exists { - match from_tar_gz(snapshot_path, &db_path) { - Ok(()) => Ok(()), - Err(e) => { - //clean created db folder - std::fs::remove_dir_all(&db_path)?; - Err(e) - } - } - } else if !empty_db && !ignore_snapshot_if_db_exists { - bail!( - "database already exists at {:?}, try to delete it or rename it", - db_path - .as_ref() - .canonicalize() - .unwrap_or_else(|_| db_path.as_ref().to_owned()) - ) - } else if !snapshot_path_exists && !ignore_missing_snapshot { - bail!("snapshot doesn't exist at {:?}", snapshot_path.as_ref()) - } else { - Ok(()) - } -} - -#[derive(Debug)] -pub struct SnapshotJob { - dest_path: PathBuf, - src_path: PathBuf, - - meta_env_size: usize, - index_size: usize, -} - -impl SnapshotJob { - pub async fn run(self) -> anyhow::Result<()> { - tokio::task::spawn_blocking(|| self.run_sync()).await??; - - Ok(()) - } - - fn run_sync(self) -> anyhow::Result<()> { - trace!("Performing snapshot."); - - let snapshot_dir = self.dest_path.clone(); - std::fs::create_dir_all(&snapshot_dir)?; - let temp_snapshot_dir = tempfile::tempdir()?; - let temp_snapshot_path = temp_snapshot_dir.path(); - - self.snapshot_version_file(temp_snapshot_path)?; - self.snapshot_meta_env(temp_snapshot_path)?; - self.snapshot_file_store(temp_snapshot_path)?; - self.snapshot_indexes(temp_snapshot_path)?; - self.snapshot_auth(temp_snapshot_path)?; - - let db_name = self - .src_path - .file_name() - .and_then(|n| n.to_str()) - .unwrap_or("data.ms") - .to_string(); - - let snapshot_path = self.dest_path.join(format!("{}.snapshot", db_name)); - let temp_snapshot_file = tempfile::NamedTempFile::new_in(&snapshot_dir)?; - let temp_snapshot_file_path = temp_snapshot_file.path().to_owned(); - crate::compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?; - let _file = temp_snapshot_file.persist(&snapshot_path)?; - - #[cfg(unix)] - { - use std::fs::Permissions; - use std::os::unix::fs::PermissionsExt; - - let perm = Permissions::from_mode(0o644); - _file.set_permissions(perm)?; - } - - trace!("Created snapshot in {:?}.", snapshot_path); - - Ok(()) - } - - fn snapshot_version_file(&self, path: &Path) -> anyhow::Result<()> { - let dst = path.join(VERSION_FILE_NAME); - let src = self.src_path.join(VERSION_FILE_NAME); - - fs::copy(src, dst)?; - - Ok(()) - } - - fn snapshot_meta_env(&self, path: &Path) -> anyhow::Result<()> { - let env = open_meta_env(&self.src_path, self.meta_env_size)?; - - let dst = path.join("data.mdb"); - env.copy_to_path(dst, milli::heed::CompactionOption::Enabled)?; - - Ok(()) - } - - fn snapshot_file_store(&self, path: &Path) -> anyhow::Result<()> { - // for now we simply copy the updates/updates_files - // FIXME(marin): We may copy more files than necessary, if new files are added while we are - // performing the snapshop. We need a way to filter them out. - - let dst = path.join("updates"); - fs::create_dir_all(&dst)?; - let options = CopyOptions::default(); - dir::copy(self.src_path.join("updates/updates_files"), dst, &options)?; - - Ok(()) - } - - fn snapshot_indexes(&self, path: &Path) -> anyhow::Result<()> { - let indexes_path = self.src_path.join("indexes/"); - let dst = path.join("indexes/"); - - for entry in WalkDir::new(indexes_path).max_depth(1).into_iter().skip(1) { - let entry = entry?; - let name = entry.file_name(); - let dst = dst.join(name); - - std::fs::create_dir_all(&dst)?; - - let dst = dst.join("data.mdb"); - - let mut options = milli::heed::EnvOpenOptions::new(); - options.map_size(self.index_size); - options.max_readers(1024); - let index = milli::Index::new(options, entry.path())?; - index.copy_to_path(dst, CompactionOption::Enabled)?; - } - - Ok(()) - } - - fn snapshot_auth(&self, path: &Path) -> anyhow::Result<()> { - let auth_path = self.src_path.join("auth"); - let dst = path.join("auth"); - std::fs::create_dir_all(&dst)?; - let dst = dst.join("data.mdb"); - - let env = open_auth_store_env(&auth_path)?; - env.copy_to_path(dst, milli::heed::CompactionOption::Enabled)?; - - Ok(()) - } -} diff --git a/meilisearch-lib/src/tasks/batch.rs b/meilisearch-lib/src/tasks/batch.rs deleted file mode 100644 index 5fa2e224a..000000000 --- a/meilisearch-lib/src/tasks/batch.rs +++ /dev/null @@ -1,75 +0,0 @@ -use time::OffsetDateTime; - -use crate::snapshot::SnapshotJob; - -use super::task::{Task, TaskEvent}; - -pub type BatchId = u32; - -#[derive(Debug)] -pub enum BatchContent { - DocumentsAdditionBatch(Vec), - IndexUpdate(Task), - Dump(Task), - Snapshot(SnapshotJob), - // Symbolizes a empty batch. This can occur when we were woken, but there wasn't any work to do. - Empty, -} - -impl BatchContent { - pub fn first(&self) -> Option<&Task> { - match self { - BatchContent::DocumentsAdditionBatch(ts) => ts.first(), - BatchContent::Dump(t) | BatchContent::IndexUpdate(t) => Some(t), - BatchContent::Snapshot(_) | BatchContent::Empty => None, - } - } - - pub fn push_event(&mut self, event: TaskEvent) { - match self { - BatchContent::DocumentsAdditionBatch(ts) => { - ts.iter_mut().for_each(|t| t.events.push(event.clone())) - } - BatchContent::IndexUpdate(t) | BatchContent::Dump(t) => t.events.push(event), - BatchContent::Snapshot(_) | BatchContent::Empty => (), - } - } -} - -#[derive(Debug)] -pub struct Batch { - // Only batches that contains a persistent tasks are given an id. Snapshot batches don't have - // an id. - pub id: Option, - pub created_at: OffsetDateTime, - pub content: BatchContent, -} - -impl Batch { - pub fn new(id: Option, content: BatchContent) -> Self { - Self { - id, - created_at: OffsetDateTime::now_utc(), - content, - } - } - pub fn len(&self) -> usize { - match self.content { - BatchContent::DocumentsAdditionBatch(ref ts) => ts.len(), - BatchContent::IndexUpdate(_) | BatchContent::Dump(_) | BatchContent::Snapshot(_) => 1, - BatchContent::Empty => 0, - } - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - pub fn empty() -> Self { - Self { - id: None, - created_at: OffsetDateTime::now_utc(), - content: BatchContent::Empty, - } - } -} diff --git a/meilisearch-lib/src/tasks/error.rs b/meilisearch-lib/src/tasks/error.rs deleted file mode 100644 index 75fd7a591..000000000 --- a/meilisearch-lib/src/tasks/error.rs +++ /dev/null @@ -1,34 +0,0 @@ -use meilisearch_types::error::{Code, ErrorCode}; -use meilisearch_types::internal_error; -use tokio::task::JoinError; - -use crate::update_file_store::UpdateFileStoreError; - -use super::task::TaskId; - -pub type Result = std::result::Result; - -#[derive(Debug, thiserror::Error)] -pub enum TaskError { - #[error("Task `{0}` not found.")] - UnexistingTask(TaskId), - #[error("Internal error: {0}")] - Internal(Box), -} - -internal_error!( - TaskError: milli::heed::Error, - JoinError, - std::io::Error, - serde_json::Error, - UpdateFileStoreError -); - -impl ErrorCode for TaskError { - fn error_code(&self) -> Code { - match self { - TaskError::UnexistingTask(_) => Code::TaskNotFound, - TaskError::Internal(_) => Code::Internal, - } - } -} diff --git a/meilisearch-lib/src/tasks/handlers/dump_handler.rs b/meilisearch-lib/src/tasks/handlers/dump_handler.rs deleted file mode 100644 index c0833e4c7..000000000 --- a/meilisearch-lib/src/tasks/handlers/dump_handler.rs +++ /dev/null @@ -1,132 +0,0 @@ -use crate::dump::DumpHandler; -use crate::index_resolver::index_store::IndexStore; -use crate::index_resolver::meta_store::IndexMetaStore; -use crate::tasks::batch::{Batch, BatchContent}; -use crate::tasks::task::{Task, TaskContent, TaskEvent, TaskResult}; -use crate::tasks::BatchHandler; - -#[async_trait::async_trait] -impl BatchHandler for DumpHandler -where - U: IndexMetaStore + Sync + Send + 'static, - I: IndexStore + Sync + Send + 'static, -{ - fn accept(&self, batch: &Batch) -> bool { - matches!(batch.content, BatchContent::Dump { .. }) - } - - async fn process_batch(&self, mut batch: Batch) -> Batch { - match &batch.content { - BatchContent::Dump(Task { - content: TaskContent::Dump { uid }, - .. - }) => { - match self.run(uid.clone()).await { - Ok(_) => { - batch - .content - .push_event(TaskEvent::succeeded(TaskResult::Other)); - } - Err(e) => batch.content.push_event(TaskEvent::failed(e)), - } - batch - } - _ => unreachable!("invalid batch content for dump"), - } - } - - async fn finish(&self, _: &Batch) {} -} - -#[cfg(test)] -mod test { - use crate::dump::error::{DumpError, Result as DumpResult}; - use crate::index_resolver::{index_store::MockIndexStore, meta_store::MockIndexMetaStore}; - use crate::tasks::handlers::test::task_to_batch; - - use super::*; - - use nelson::Mocker; - use proptest::prelude::*; - - proptest! { - #[test] - fn finish_does_nothing( - task in any::(), - ) { - let rt = tokio::runtime::Runtime::new().unwrap(); - let handle = rt.spawn(async { - let batch = task_to_batch(task); - - let mocker = Mocker::default(); - let dump_handler = DumpHandler::::mock(mocker); - - dump_handler.finish(&batch).await; - }); - - rt.block_on(handle).unwrap(); - } - - #[test] - fn test_handle_dump_success( - task in any::(), - ) { - let rt = tokio::runtime::Runtime::new().unwrap(); - let handle = rt.spawn(async { - let batch = task_to_batch(task); - let should_accept = matches!(batch.content, BatchContent::Dump { .. }); - - let mocker = Mocker::default(); - if should_accept { - mocker.when::>("run") - .once() - .then(|_| Ok(())); - } - - let dump_handler = DumpHandler::::mock(mocker); - - let accept = dump_handler.accept(&batch); - assert_eq!(accept, should_accept); - - if accept { - let batch = dump_handler.process_batch(batch).await; - let last_event = batch.content.first().unwrap().events.last().unwrap(); - assert!(matches!(last_event, TaskEvent::Succeeded { .. })); - } - }); - - rt.block_on(handle).unwrap(); - } - - #[test] - fn test_handle_dump_error( - task in any::(), - ) { - let rt = tokio::runtime::Runtime::new().unwrap(); - let handle = rt.spawn(async { - let batch = task_to_batch(task); - let should_accept = matches!(batch.content, BatchContent::Dump { .. }); - - let mocker = Mocker::default(); - if should_accept { - mocker.when::>("run") - .once() - .then(|_| Err(DumpError::Internal("error".into()))); - } - - let dump_handler = DumpHandler::::mock(mocker); - - let accept = dump_handler.accept(&batch); - assert_eq!(accept, should_accept); - - if accept { - let batch = dump_handler.process_batch(batch).await; - let last_event = batch.content.first().unwrap().events.last().unwrap(); - assert!(matches!(last_event, TaskEvent::Failed { .. })); - } - }); - - rt.block_on(handle).unwrap(); - } - } -} diff --git a/meilisearch-lib/src/tasks/handlers/empty_handler.rs b/meilisearch-lib/src/tasks/handlers/empty_handler.rs deleted file mode 100644 index d800e1965..000000000 --- a/meilisearch-lib/src/tasks/handlers/empty_handler.rs +++ /dev/null @@ -1,18 +0,0 @@ -use crate::tasks::batch::{Batch, BatchContent}; -use crate::tasks::BatchHandler; - -/// A sink handler for empty tasks. -pub struct EmptyBatchHandler; - -#[async_trait::async_trait] -impl BatchHandler for EmptyBatchHandler { - fn accept(&self, batch: &Batch) -> bool { - matches!(batch.content, BatchContent::Empty) - } - - async fn process_batch(&self, batch: Batch) -> Batch { - batch - } - - async fn finish(&self, _: &Batch) {} -} diff --git a/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs b/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs deleted file mode 100644 index 22c57e2fd..000000000 --- a/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs +++ /dev/null @@ -1,199 +0,0 @@ -use crate::index_resolver::IndexResolver; -use crate::index_resolver::{index_store::IndexStore, meta_store::IndexMetaStore}; -use crate::tasks::batch::{Batch, BatchContent}; -use crate::tasks::BatchHandler; - -#[async_trait::async_trait] -impl BatchHandler for IndexResolver -where - U: IndexMetaStore + Send + Sync + 'static, - I: IndexStore + Send + Sync + 'static, -{ - fn accept(&self, batch: &Batch) -> bool { - matches!( - batch.content, - BatchContent::DocumentsAdditionBatch(_) | BatchContent::IndexUpdate(_) - ) - } - - async fn process_batch(&self, mut batch: Batch) -> Batch { - match batch.content { - BatchContent::DocumentsAdditionBatch(ref mut tasks) => { - self.process_document_addition_batch(tasks).await; - } - BatchContent::IndexUpdate(ref mut task) => { - self.process_task(task).await; - } - _ => unreachable!(), - } - - batch - } - - async fn finish(&self, batch: &Batch) { - if let BatchContent::DocumentsAdditionBatch(ref tasks) = batch.content { - for task in tasks { - if let Some(content_uuid) = task.get_content_uuid() { - if let Err(e) = self.delete_content_file(content_uuid).await { - log::error!("error deleting update file: {}", e); - } - } - } - } - } -} - -#[cfg(test)] -mod test { - use crate::index_resolver::index_store::MapIndexStore; - use crate::index_resolver::meta_store::HeedMetaStore; - use crate::index_resolver::{ - error::Result as IndexResult, index_store::MockIndexStore, meta_store::MockIndexMetaStore, - }; - use crate::tasks::{ - handlers::test::task_to_batch, - task::{Task, TaskContent}, - }; - use crate::update_file_store::{Result as FileStoreResult, UpdateFileStore}; - - use super::*; - use meilisearch_types::index_uid::IndexUid; - use milli::update::IndexDocumentsMethod; - use nelson::Mocker; - use proptest::prelude::*; - use uuid::Uuid; - - proptest! { - #[test] - fn test_accept_task( - task in any::(), - ) { - let batch = task_to_batch(task); - - let index_store = MockIndexStore::new(); - let meta_store = MockIndexMetaStore::new(); - let mocker = Mocker::default(); - let update_file_store = UpdateFileStore::mock(mocker); - let index_resolver = IndexResolver::new(meta_store, index_store, update_file_store); - - match batch.content { - BatchContent::DocumentsAdditionBatch(_) - | BatchContent::IndexUpdate(_) => assert!(index_resolver.accept(&batch)), - BatchContent::Dump(_) - | BatchContent::Snapshot(_) - | BatchContent::Empty => assert!(!index_resolver.accept(&batch)), - } - } - } - - #[actix_rt::test] - async fn finisher_called_on_document_update() { - let index_store = MockIndexStore::new(); - let meta_store = MockIndexMetaStore::new(); - let mocker = Mocker::default(); - let content_uuid = Uuid::new_v4(); - mocker - .when::>("delete") - .once() - .then(move |uuid| { - assert_eq!(uuid, content_uuid); - Ok(()) - }); - let update_file_store = UpdateFileStore::mock(mocker); - let index_resolver = IndexResolver::new(meta_store, index_store, update_file_store); - - let task = Task { - id: 1, - content: TaskContent::DocumentAddition { - content_uuid, - merge_strategy: IndexDocumentsMethod::ReplaceDocuments, - primary_key: None, - documents_count: 100, - allow_index_creation: true, - index_uid: IndexUid::new_unchecked("test"), - }, - events: Vec::new(), - }; - - let batch = task_to_batch(task); - - index_resolver.finish(&batch).await; - } - - #[actix_rt::test] - #[should_panic] - async fn panic_when_passed_unsupported_batch() { - let index_store = MockIndexStore::new(); - let meta_store = MockIndexMetaStore::new(); - let mocker = Mocker::default(); - let update_file_store = UpdateFileStore::mock(mocker); - let index_resolver = IndexResolver::new(meta_store, index_store, update_file_store); - - let task = Task { - id: 1, - content: TaskContent::Dump { - uid: String::from("hello"), - }, - events: Vec::new(), - }; - - let batch = task_to_batch(task); - - index_resolver.process_batch(batch).await; - } - - proptest! { - #[test] - fn index_document_task_deletes_update_file( - task in any::(), - ) { - let rt = tokio::runtime::Runtime::new().unwrap(); - let handle = rt.spawn(async { - let mocker = Mocker::default(); - - if let TaskContent::DocumentAddition{ .. } = task.content { - mocker.when::>("delete_content_file").then(|_| Ok(())); - } - - let index_resolver: IndexResolver = IndexResolver::mock(mocker); - - let batch = task_to_batch(task); - - index_resolver.finish(&batch).await; - }); - - rt.block_on(handle).unwrap(); - } - - #[test] - fn test_handle_batch(task in any::()) { - let rt = tokio::runtime::Runtime::new().unwrap(); - let handle = rt.spawn(async { - let mocker = Mocker::default(); - match task.content { - TaskContent::DocumentAddition { .. } => { - mocker.when::<&mut [Task], ()>("process_document_addition_batch").then(|_| ()); - } - TaskContent::Dump { .. } => (), - _ => { - mocker.when::<&mut Task, ()>("process_task").then(|_| ()); - } - } - let index_resolver: IndexResolver = IndexResolver::mock(mocker); - - - let batch = task_to_batch(task); - - if index_resolver.accept(&batch) { - index_resolver.process_batch(batch).await; - } - }); - - if let Err(e) = rt.block_on(handle) { - if e.is_panic() { - std::panic::resume_unwind(e.into_panic()); - } - } - } - } -} diff --git a/meilisearch-lib/src/tasks/handlers/mod.rs b/meilisearch-lib/src/tasks/handlers/mod.rs deleted file mode 100644 index 8f02de8b9..000000000 --- a/meilisearch-lib/src/tasks/handlers/mod.rs +++ /dev/null @@ -1,34 +0,0 @@ -pub mod dump_handler; -pub mod empty_handler; -mod index_resolver_handler; -pub mod snapshot_handler; - -#[cfg(test)] -mod test { - use time::OffsetDateTime; - - use crate::tasks::{ - batch::{Batch, BatchContent}, - task::{Task, TaskContent}, - }; - - pub fn task_to_batch(task: Task) -> Batch { - let content = match task.content { - TaskContent::DocumentAddition { .. } => { - BatchContent::DocumentsAdditionBatch(vec![task]) - } - TaskContent::DocumentDeletion { .. } - | TaskContent::SettingsUpdate { .. } - | TaskContent::IndexDeletion { .. } - | TaskContent::IndexCreation { .. } - | TaskContent::IndexUpdate { .. } => BatchContent::IndexUpdate(task), - TaskContent::Dump { .. } => BatchContent::Dump(task), - }; - - Batch { - id: Some(1), - created_at: OffsetDateTime::now_utc(), - content, - } - } -} diff --git a/meilisearch-lib/src/tasks/handlers/snapshot_handler.rs b/meilisearch-lib/src/tasks/handlers/snapshot_handler.rs deleted file mode 100644 index 32fe6d746..000000000 --- a/meilisearch-lib/src/tasks/handlers/snapshot_handler.rs +++ /dev/null @@ -1,26 +0,0 @@ -use crate::tasks::batch::{Batch, BatchContent}; -use crate::tasks::BatchHandler; - -pub struct SnapshotHandler; - -#[async_trait::async_trait] -impl BatchHandler for SnapshotHandler { - fn accept(&self, batch: &Batch) -> bool { - matches!(batch.content, BatchContent::Snapshot(_)) - } - - async fn process_batch(&self, batch: Batch) -> Batch { - match batch.content { - BatchContent::Snapshot(job) => { - if let Err(e) = job.run().await { - log::error!("snapshot error: {e}"); - } - } - _ => unreachable!(), - } - - Batch::empty() - } - - async fn finish(&self, _: &Batch) {} -} diff --git a/meilisearch-lib/src/tasks/mod.rs b/meilisearch-lib/src/tasks/mod.rs deleted file mode 100644 index fe722a987..000000000 --- a/meilisearch-lib/src/tasks/mod.rs +++ /dev/null @@ -1,56 +0,0 @@ -use async_trait::async_trait; - -pub use handlers::empty_handler::EmptyBatchHandler; -pub use handlers::snapshot_handler::SnapshotHandler; -pub use scheduler::Scheduler; -pub use task_store::TaskFilter; - -#[cfg(test)] -pub use task_store::test::MockTaskStore as TaskStore; -#[cfg(not(test))] -pub use task_store::TaskStore; - -use batch::Batch; -use error::Result; - -pub mod batch; -pub mod error; -mod handlers; -mod scheduler; -pub mod task; -mod task_store; -pub mod update_loop; - -#[cfg_attr(test, mockall::automock(type Error=test::DebugError;))] -#[async_trait] -pub trait BatchHandler: Sync + Send + 'static { - /// return whether this handler can accept this batch - fn accept(&self, batch: &Batch) -> bool; - - /// Processes the `Task` batch returning the batch with the `Task` updated. - /// - /// It is ok for this function to panic if a batch is handed that hasn't been verified by - /// `accept` beforehand. - async fn process_batch(&self, batch: Batch) -> Batch; - - /// `finish` is called when the result of `process` has been committed to the task store. This - /// method can be used to perform cleanup after the update has been completed for example. - async fn finish(&self, batch: &Batch); -} - -#[cfg(test)] -mod test { - use serde::{Deserialize, Serialize}; - use std::fmt::Display; - - #[derive(Debug, Serialize, Deserialize)] - pub struct DebugError; - - impl Display for DebugError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str("an error") - } - } - - impl std::error::Error for DebugError {} -} diff --git a/meilisearch-lib/src/tasks/scheduler.rs b/meilisearch-lib/src/tasks/scheduler.rs deleted file mode 100644 index c592b71fa..000000000 --- a/meilisearch-lib/src/tasks/scheduler.rs +++ /dev/null @@ -1,609 +0,0 @@ -use std::cmp::Ordering; -use std::collections::{hash_map::Entry, BinaryHeap, HashMap, VecDeque}; -use std::ops::{Deref, DerefMut}; -use std::slice; -use std::sync::Arc; - -use atomic_refcell::AtomicRefCell; -use milli::update::IndexDocumentsMethod; -use time::OffsetDateTime; -use tokio::sync::{watch, RwLock}; - -use crate::options::SchedulerConfig; -use crate::snapshot::SnapshotJob; - -use super::batch::{Batch, BatchContent}; -use super::error::Result; -use super::task::{Task, TaskContent, TaskEvent, TaskId}; -use super::update_loop::UpdateLoop; -use super::{BatchHandler, TaskFilter, TaskStore}; - -#[derive(Eq, Debug, Clone, Copy)] -enum TaskType { - DocumentAddition { number: usize }, - DocumentUpdate { number: usize }, - IndexUpdate, - Dump, -} - -/// Two tasks are equal if they have the same type. -impl PartialEq for TaskType { - fn eq(&self, other: &Self) -> bool { - matches!( - (self, other), - (Self::DocumentAddition { .. }, Self::DocumentAddition { .. }) - | (Self::DocumentUpdate { .. }, Self::DocumentUpdate { .. }) - ) - } -} - -#[derive(Eq, Debug, Clone, Copy)] -struct PendingTask { - kind: TaskType, - id: TaskId, -} - -impl PartialEq for PendingTask { - fn eq(&self, other: &Self) -> bool { - self.id.eq(&other.id) - } -} - -impl PartialOrd for PendingTask { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for PendingTask { - fn cmp(&self, other: &Self) -> Ordering { - self.id.cmp(&other.id).reverse() - } -} - -#[derive(Debug)] -struct TaskList { - id: TaskListIdentifier, - tasks: BinaryHeap, -} - -impl Deref for TaskList { - type Target = BinaryHeap; - - fn deref(&self) -> &Self::Target { - &self.tasks - } -} - -impl DerefMut for TaskList { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.tasks - } -} - -impl TaskList { - fn new(id: TaskListIdentifier) -> Self { - Self { - id, - tasks: Default::default(), - } - } -} - -impl PartialEq for TaskList { - fn eq(&self, other: &Self) -> bool { - self.id == other.id - } -} - -impl Eq for TaskList {} - -impl Ord for TaskList { - fn cmp(&self, other: &Self) -> Ordering { - match (&self.id, &other.id) { - (TaskListIdentifier::Index(_), TaskListIdentifier::Index(_)) => { - match (self.peek(), other.peek()) { - (None, None) => Ordering::Equal, - (None, Some(_)) => Ordering::Less, - (Some(_), None) => Ordering::Greater, - (Some(lhs), Some(rhs)) => lhs.cmp(rhs), - } - } - (TaskListIdentifier::Index(_), TaskListIdentifier::Dump) => Ordering::Less, - (TaskListIdentifier::Dump, TaskListIdentifier::Index(_)) => Ordering::Greater, - (TaskListIdentifier::Dump, TaskListIdentifier::Dump) => { - unreachable!("There should be only one Dump task list") - } - } - } -} - -impl PartialOrd for TaskList { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -#[derive(PartialEq, Eq, Hash, Debug, Clone)] -enum TaskListIdentifier { - Index(String), - Dump, -} - -impl From<&Task> for TaskListIdentifier { - fn from(task: &Task) -> Self { - match &task.content { - TaskContent::DocumentAddition { index_uid, .. } - | TaskContent::DocumentDeletion { index_uid, .. } - | TaskContent::SettingsUpdate { index_uid, .. } - | TaskContent::IndexDeletion { index_uid } - | TaskContent::IndexCreation { index_uid, .. } - | TaskContent::IndexUpdate { index_uid, .. } => { - TaskListIdentifier::Index(index_uid.as_str().to_string()) - } - TaskContent::Dump { .. } => TaskListIdentifier::Dump, - } - } -} - -#[derive(Default)] -struct TaskQueue { - /// Maps index uids to their TaskList, for quick access - index_tasks: HashMap>>, - /// A queue that orders TaskList by the priority of their fist update - queue: BinaryHeap>>, -} - -impl TaskQueue { - fn insert(&mut self, task: Task) { - let id = task.id; - let uid = TaskListIdentifier::from(&task); - - let kind = match task.content { - TaskContent::DocumentAddition { - documents_count, - merge_strategy: IndexDocumentsMethod::ReplaceDocuments, - .. - } => TaskType::DocumentAddition { - number: documents_count, - }, - TaskContent::DocumentAddition { - documents_count, - merge_strategy: IndexDocumentsMethod::UpdateDocuments, - .. - } => TaskType::DocumentUpdate { - number: documents_count, - }, - TaskContent::Dump { .. } => TaskType::Dump, - TaskContent::DocumentDeletion { .. } - | TaskContent::SettingsUpdate { .. } - | TaskContent::IndexDeletion { .. } - | TaskContent::IndexCreation { .. } - | TaskContent::IndexUpdate { .. } => TaskType::IndexUpdate, - _ => unreachable!("unhandled task type"), - }; - let task = PendingTask { kind, id }; - - match self.index_tasks.entry(uid) { - Entry::Occupied(entry) => { - // A task list already exists for this index, all we have to to is to push the new - // update to the end of the list. This won't change the order since ids are - // monotonically increasing. - let mut list = entry.get().borrow_mut(); - - // We only need the first element to be lower than the one we want to - // insert to preserve the order in the queue. - assert!(list.peek().map(|old_id| id >= old_id.id).unwrap_or(true)); - - list.push(task); - } - Entry::Vacant(entry) => { - let mut task_list = TaskList::new(entry.key().clone()); - task_list.push(task); - let task_list = Arc::new(AtomicRefCell::new(task_list)); - entry.insert(task_list.clone()); - self.queue.push(task_list); - } - } - } - - /// Passes a context with a view to the task list of the next index to schedule. It is - /// guaranteed that the first id from task list will be the lowest pending task id. - fn head_mut(&mut self, mut f: impl FnMut(&mut TaskList) -> R) -> Option { - let head = self.queue.pop()?; - let result = { - let mut ref_head = head.borrow_mut(); - f(&mut *ref_head) - }; - if !head.borrow().tasks.is_empty() { - // After being mutated, the head is reinserted to the correct position. - self.queue.push(head); - } else { - self.index_tasks.remove(&head.borrow().id); - } - - Some(result) - } - - pub fn is_empty(&self) -> bool { - self.queue.is_empty() && self.index_tasks.is_empty() - } -} - -pub struct Scheduler { - // TODO: currently snapshots are non persistent tasks, and are treated differently. - snapshots: VecDeque, - tasks: TaskQueue, - - store: TaskStore, - processing: Processing, - next_fetched_task_id: TaskId, - config: SchedulerConfig, - /// Notifies the update loop that a new task was received - notifier: watch::Sender<()>, -} - -impl Scheduler { - pub fn new( - store: TaskStore, - performers: Vec>, - config: SchedulerConfig, - ) -> Result>> { - let (notifier, rcv) = watch::channel(()); - - let this = Self { - snapshots: VecDeque::new(), - tasks: TaskQueue::default(), - - store, - processing: Processing::Nothing, - next_fetched_task_id: 0, - config, - notifier, - }; - - // Notify update loop to start processing pending updates immediately after startup. - this.notify(); - - let this = Arc::new(RwLock::new(this)); - - let update_loop = UpdateLoop::new(this.clone(), performers, rcv); - - tokio::task::spawn_local(update_loop.run()); - - Ok(this) - } - - fn register_task(&mut self, task: Task) { - assert!(!task.is_finished()); - self.tasks.insert(task); - } - - /// Clears the processing list, this method should be called when the processing of a batch is finished. - pub fn finish(&mut self) { - self.processing = Processing::Nothing; - } - - pub fn notify(&self) { - let _ = self.notifier.send(()); - } - - fn notify_if_not_empty(&self) { - if !self.snapshots.is_empty() || !self.tasks.is_empty() { - self.notify(); - } - } - - pub async fn update_tasks(&self, content: BatchContent) -> Result { - match content { - BatchContent::DocumentsAdditionBatch(tasks) => { - let tasks = self.store.update_tasks(tasks).await?; - Ok(BatchContent::DocumentsAdditionBatch(tasks)) - } - BatchContent::IndexUpdate(t) => { - let mut tasks = self.store.update_tasks(vec![t]).await?; - Ok(BatchContent::IndexUpdate(tasks.remove(0))) - } - BatchContent::Dump(t) => { - let mut tasks = self.store.update_tasks(vec![t]).await?; - Ok(BatchContent::Dump(tasks.remove(0))) - } - other => Ok(other), - } - } - - pub async fn get_task(&self, id: TaskId, filter: Option) -> Result { - self.store.get_task(id, filter).await - } - - pub async fn list_tasks( - &self, - offset: Option, - filter: Option, - limit: Option, - ) -> Result> { - self.store.list_tasks(offset, filter, limit).await - } - - pub async fn get_processing_tasks(&self) -> Result> { - let mut tasks = Vec::new(); - - for id in self.processing.ids() { - let task = self.store.get_task(id, None).await?; - tasks.push(task); - } - - Ok(tasks) - } - - pub fn schedule_snapshot(&mut self, job: SnapshotJob) { - self.snapshots.push_back(job); - self.notify(); - } - - async fn fetch_pending_tasks(&mut self) -> Result<()> { - self.store - .fetch_unfinished_tasks(Some(self.next_fetched_task_id)) - .await? - .into_iter() - .for_each(|t| { - self.next_fetched_task_id = t.id + 1; - self.register_task(t); - }); - - Ok(()) - } - - /// Prepare the next batch, and set `processing` to the ids in that batch. - pub async fn prepare(&mut self) -> Result { - // If there is a job to process, do it first. - if let Some(job) = self.snapshots.pop_front() { - // There is more work to do, notify the update loop - self.notify_if_not_empty(); - let batch = Batch::new(None, BatchContent::Snapshot(job)); - return Ok(batch); - } - - // Try to fill the queue with pending tasks. - self.fetch_pending_tasks().await?; - - self.processing = make_batch(&mut self.tasks, &self.config); - - log::debug!("prepared batch with {} tasks", self.processing.len()); - - if !self.processing.is_nothing() { - let (processing, mut content) = self - .store - .get_processing_tasks(std::mem::take(&mut self.processing)) - .await?; - - // The batch id is the id of the first update it contains. At this point we must have a - // valid batch that contains at least 1 task. - let id = match content.first() { - Some(Task { id, .. }) => *id, - _ => panic!("invalid batch"), - }; - - content.push_event(TaskEvent::Batched { - batch_id: id, - timestamp: OffsetDateTime::now_utc(), - }); - - self.processing = processing; - - let batch = Batch::new(Some(id), content); - - // There is more work to do, notify the update loop - self.notify_if_not_empty(); - - Ok(batch) - } else { - Ok(Batch::empty()) - } - } -} - -#[derive(Debug, PartialEq, Eq)] -pub enum Processing { - DocumentAdditions(Vec), - IndexUpdate(TaskId), - Dump(TaskId), - /// Variant used when there is nothing to process. - Nothing, -} - -impl Default for Processing { - fn default() -> Self { - Self::Nothing - } -} - -enum ProcessingIter<'a> { - Many(slice::Iter<'a, TaskId>), - Single(Option), -} - -impl<'a> Iterator for ProcessingIter<'a> { - type Item = TaskId; - - fn next(&mut self) -> Option { - match self { - ProcessingIter::Many(iter) => iter.next().copied(), - ProcessingIter::Single(val) => val.take(), - } - } -} - -impl Processing { - fn is_nothing(&self) -> bool { - matches!(self, Processing::Nothing) - } - - pub fn ids(&self) -> impl Iterator + '_ { - match self { - Processing::DocumentAdditions(v) => ProcessingIter::Many(v.iter()), - Processing::IndexUpdate(id) | Processing::Dump(id) => ProcessingIter::Single(Some(*id)), - Processing::Nothing => ProcessingIter::Single(None), - } - } - - pub fn len(&self) -> usize { - match self { - Processing::DocumentAdditions(v) => v.len(), - Processing::IndexUpdate(_) | Processing::Dump(_) => 1, - Processing::Nothing => 0, - } - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } -} - -fn make_batch(tasks: &mut TaskQueue, config: &SchedulerConfig) -> Processing { - let mut doc_count = 0; - tasks - .head_mut(|list| match list.peek().copied() { - Some(PendingTask { - kind: TaskType::IndexUpdate, - id, - }) => { - list.pop(); - Processing::IndexUpdate(id) - } - Some(PendingTask { - kind: TaskType::Dump, - id, - }) => { - list.pop(); - Processing::Dump(id) - } - Some(PendingTask { kind, .. }) => { - let mut task_list = Vec::new(); - loop { - match list.peek() { - Some(pending) if pending.kind == kind => { - // We always need to process at least one task for the scheduler to make progress. - if config.disable_auto_batching && !task_list.is_empty() { - break; - } - let pending = list.pop().unwrap(); - task_list.push(pending.id); - - // We add the number of documents to the count if we are scheduling document additions. - match pending.kind { - TaskType::DocumentUpdate { number } - | TaskType::DocumentAddition { number } => { - doc_count += number; - } - _ => (), - } - } - _ => break, - } - } - Processing::DocumentAdditions(task_list) - } - None => Processing::Nothing, - }) - .unwrap_or(Processing::Nothing) -} - -#[cfg(test)] -mod test { - use meilisearch_types::index_uid::IndexUid; - use milli::update::IndexDocumentsMethod; - use uuid::Uuid; - - use crate::tasks::task::TaskContent; - - use super::*; - - fn gen_task(id: TaskId, content: TaskContent) -> Task { - Task { - id, - content, - events: vec![], - } - } - - #[test] - #[rustfmt::skip] - fn register_updates_multiples_indexes() { - let mut queue = TaskQueue::default(); - queue.insert(gen_task(0, TaskContent::IndexDeletion { index_uid: IndexUid::new_unchecked("test1") })); - queue.insert(gen_task(1, TaskContent::IndexDeletion { index_uid: IndexUid::new_unchecked("test2") })); - queue.insert(gen_task(2, TaskContent::IndexDeletion { index_uid: IndexUid::new_unchecked("test2") })); - queue.insert(gen_task(3, TaskContent::IndexDeletion { index_uid: IndexUid::new_unchecked("test2") })); - queue.insert(gen_task(4, TaskContent::IndexDeletion { index_uid: IndexUid::new_unchecked("test1") })); - queue.insert(gen_task(5, TaskContent::IndexDeletion { index_uid: IndexUid::new_unchecked("test1") })); - queue.insert(gen_task(6, TaskContent::IndexDeletion { index_uid: IndexUid::new_unchecked("test2") })); - - let test1_tasks = queue - .head_mut(|tasks| tasks.drain().map(|t| t.id).collect::>()) - .unwrap(); - - assert_eq!(test1_tasks, &[0, 4, 5]); - - let test2_tasks = queue - .head_mut(|tasks| tasks.drain().map(|t| t.id).collect::>()) - .unwrap(); - - assert_eq!(test2_tasks, &[1, 2, 3, 6]); - - assert!(queue.index_tasks.is_empty()); - assert!(queue.queue.is_empty()); - } - - fn gen_doc_addition_task_content(index_uid: &str) -> TaskContent { - TaskContent::DocumentAddition { - content_uuid: Uuid::new_v4(), - merge_strategy: IndexDocumentsMethod::ReplaceDocuments, - primary_key: Some("test".to_string()), - documents_count: 0, - allow_index_creation: true, - index_uid: IndexUid::new_unchecked(index_uid), - } - } - - #[test] - #[rustfmt::skip] - fn test_make_batch() { - let mut queue = TaskQueue::default(); - queue.insert(gen_task(0, gen_doc_addition_task_content("test1"))); - queue.insert(gen_task(1, gen_doc_addition_task_content("test2"))); - queue.insert(gen_task(2, TaskContent::IndexDeletion { index_uid: IndexUid::new_unchecked("test2")})); - queue.insert(gen_task(3, gen_doc_addition_task_content("test2"))); - queue.insert(gen_task(4, gen_doc_addition_task_content("test1"))); - queue.insert(gen_task(5, TaskContent::IndexDeletion { index_uid: IndexUid::new_unchecked("test1")})); - queue.insert(gen_task(6, gen_doc_addition_task_content("test2"))); - queue.insert(gen_task(7, gen_doc_addition_task_content("test1"))); - queue.insert(gen_task(8, TaskContent::Dump { uid: "adump".to_owned() })); - - let config = SchedulerConfig::default(); - - // Make sure that the dump is processed before everybody else. - let batch = make_batch(&mut queue, &config); - assert_eq!(batch, Processing::Dump(8)); - - let batch = make_batch(&mut queue, &config); - assert_eq!(batch, Processing::DocumentAdditions(vec![0, 4])); - - let batch = make_batch(&mut queue, &config); - assert_eq!(batch, Processing::DocumentAdditions(vec![1])); - - let batch = make_batch(&mut queue, &config); - assert_eq!(batch, Processing::IndexUpdate(2)); - - let batch = make_batch(&mut queue, &config); - assert_eq!(batch, Processing::DocumentAdditions(vec![3, 6])); - - let batch = make_batch(&mut queue, &config); - assert_eq!(batch, Processing::IndexUpdate(5)); - - let batch = make_batch(&mut queue, &config); - assert_eq!(batch, Processing::DocumentAdditions(vec![7])); - - assert!(queue.is_empty()); - } -} diff --git a/meilisearch-lib/src/tasks/task.rs b/meilisearch-lib/src/tasks/task.rs deleted file mode 100644 index e0a18895b..000000000 --- a/meilisearch-lib/src/tasks/task.rs +++ /dev/null @@ -1,195 +0,0 @@ -use meilisearch_types::error::ResponseError; -use meilisearch_types::index_uid::IndexUid; -use milli::update::{DocumentAdditionResult, IndexDocumentsMethod}; -use serde::{Deserialize, Serialize}; -use time::OffsetDateTime; -use uuid::Uuid; - -use super::batch::BatchId; -use crate::index::{Settings, Unchecked}; - -pub type TaskId = u32; - -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] -#[cfg_attr(test, derive(proptest_derive::Arbitrary))] -pub enum TaskResult { - DocumentAddition { indexed_documents: u64 }, - DocumentDeletion { deleted_documents: u64 }, - ClearAll { deleted_documents: u64 }, - Other, -} - -impl From for TaskResult { - fn from(other: DocumentAdditionResult) -> Self { - Self::DocumentAddition { - indexed_documents: other.indexed_documents, - } - } -} - -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] -#[cfg_attr(test, derive(proptest_derive::Arbitrary))] -pub enum TaskEvent { - Created( - #[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))] - #[serde(with = "time::serde::rfc3339")] - OffsetDateTime, - ), - Batched { - #[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))] - #[serde(with = "time::serde::rfc3339")] - timestamp: OffsetDateTime, - batch_id: BatchId, - }, - Processing( - #[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))] - #[serde(with = "time::serde::rfc3339")] - OffsetDateTime, - ), - Succeeded { - result: TaskResult, - #[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))] - #[serde(with = "time::serde::rfc3339")] - timestamp: OffsetDateTime, - }, - Failed { - error: ResponseError, - #[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))] - #[serde(with = "time::serde::rfc3339")] - timestamp: OffsetDateTime, - }, -} - -impl TaskEvent { - pub fn succeeded(result: TaskResult) -> Self { - Self::Succeeded { - result, - timestamp: OffsetDateTime::now_utc(), - } - } - - pub fn failed(error: impl Into) -> Self { - Self::Failed { - error: error.into(), - timestamp: OffsetDateTime::now_utc(), - } - } -} - -/// A task represents an operation that Meilisearch must do. -/// It's stored on disk and executed from the lowest to highest Task id. -/// Every time a new task is created it has a higher Task id than the previous one. -/// See also `Job`. -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] -#[cfg_attr(test, derive(proptest_derive::Arbitrary))] -pub struct Task { - pub id: TaskId, - /// The name of the index the task is targeting. If it isn't targeting any index (i.e Dump task) - /// then this is None - // TODO: when next forward breaking dumps, it would be a good idea to move this field inside of - // the TaskContent. - pub content: TaskContent, - pub events: Vec, -} - -impl Task { - /// Return true when a task is finished. - /// A task is finished when its last state is either `Succeeded` or `Failed`. - pub fn is_finished(&self) -> bool { - self.events.last().map_or(false, |event| { - matches!( - event, - TaskEvent::Succeeded { .. } | TaskEvent::Failed { .. } - ) - }) - } - - /// Return the content_uuid of the `Task` if there is one. - pub fn get_content_uuid(&self) -> Option { - match self { - Task { - content: TaskContent::DocumentAddition { content_uuid, .. }, - .. - } => Some(*content_uuid), - _ => None, - } - } - - pub fn index_uid(&self) -> Option<&str> { - match &self.content { - TaskContent::DocumentAddition { index_uid, .. } - | TaskContent::DocumentDeletion { index_uid, .. } - | TaskContent::SettingsUpdate { index_uid, .. } - | TaskContent::IndexDeletion { index_uid } - | TaskContent::IndexCreation { index_uid, .. } - | TaskContent::IndexUpdate { index_uid, .. } => Some(index_uid.as_str()), - TaskContent::Dump { .. } => None, - } - } -} - -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] -#[cfg_attr(test, derive(proptest_derive::Arbitrary))] -pub enum DocumentDeletion { - Clear, - Ids(Vec), -} - -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] -#[cfg_attr(test, derive(proptest_derive::Arbitrary))] -#[allow(clippy::large_enum_variant)] -pub enum TaskContent { - DocumentAddition { - index_uid: IndexUid, - #[cfg_attr(test, proptest(value = "Uuid::new_v4()"))] - content_uuid: Uuid, - #[cfg_attr(test, proptest(strategy = "test::index_document_method_strategy()"))] - merge_strategy: IndexDocumentsMethod, - primary_key: Option, - documents_count: usize, - allow_index_creation: bool, - }, - DocumentDeletion { - index_uid: IndexUid, - deletion: DocumentDeletion, - }, - SettingsUpdate { - index_uid: IndexUid, - settings: Settings, - /// Indicates whether the task was a deletion - is_deletion: bool, - allow_index_creation: bool, - }, - IndexDeletion { - index_uid: IndexUid, - }, - IndexCreation { - index_uid: IndexUid, - primary_key: Option, - }, - IndexUpdate { - index_uid: IndexUid, - primary_key: Option, - }, - Dump { - uid: String, - }, -} - -#[cfg(test)] -mod test { - use proptest::prelude::*; - - use super::*; - - pub(super) fn index_document_method_strategy() -> impl Strategy { - prop_oneof![ - Just(IndexDocumentsMethod::ReplaceDocuments), - Just(IndexDocumentsMethod::UpdateDocuments), - ] - } - - pub(super) fn datetime_strategy() -> impl Strategy { - Just(OffsetDateTime::now_utc()) - } -} diff --git a/meilisearch-lib/src/tasks/task_store/mod.rs b/meilisearch-lib/src/tasks/task_store/mod.rs deleted file mode 100644 index 55dfe17d3..000000000 --- a/meilisearch-lib/src/tasks/task_store/mod.rs +++ /dev/null @@ -1,420 +0,0 @@ -mod store; - -use std::collections::HashSet; -use std::io::{BufWriter, Write}; -use std::path::Path; -use std::sync::Arc; - -use log::debug; -use milli::heed::{Env, RwTxn}; -use time::OffsetDateTime; - -use super::batch::BatchContent; -use super::error::TaskError; -use super::scheduler::Processing; -use super::task::{Task, TaskContent, TaskId}; -use super::Result; -use crate::tasks::task::TaskEvent; -use crate::update_file_store::UpdateFileStore; - -#[cfg(test)] -pub use store::test::MockStore as Store; -#[cfg(not(test))] -pub use store::Store; - -type FilterFn = Box bool + Sync + Send + 'static>; - -/// Defines constraints to be applied when querying for Tasks from the store. -#[derive(Default)] -pub struct TaskFilter { - indexes: Option>, - filter_fn: Option, -} - -impl TaskFilter { - fn pass(&self, task: &Task) -> bool { - match task.index_uid() { - Some(index_uid) => self - .indexes - .as_ref() - .map_or(true, |indexes| indexes.contains(index_uid)), - None => false, - } - } - - fn filtered_indexes(&self) -> Option<&HashSet> { - self.indexes.as_ref() - } - - /// Adds an index to the filter, so the filter must match this index. - pub fn filter_index(&mut self, index: String) { - self.indexes - .get_or_insert_with(Default::default) - .insert(index); - } - - pub fn filter_fn(&mut self, f: FilterFn) { - self.filter_fn.replace(f); - } -} - -pub struct TaskStore { - store: Arc, -} - -impl Clone for TaskStore { - fn clone(&self) -> Self { - Self { - store: self.store.clone(), - } - } -} - -impl TaskStore { - pub fn new(env: Arc) -> Result { - let store = Arc::new(Store::new(env)?); - Ok(Self { store }) - } - - pub async fn register(&self, content: TaskContent) -> Result { - debug!("registering update: {:?}", content); - let store = self.store.clone(); - let task = tokio::task::spawn_blocking(move || -> Result { - let mut txn = store.wtxn()?; - let next_task_id = store.next_task_id(&mut txn)?; - let created_at = TaskEvent::Created(OffsetDateTime::now_utc()); - let task = Task { - id: next_task_id, - content, - events: vec![created_at], - }; - - store.put(&mut txn, &task)?; - txn.commit()?; - - Ok(task) - }) - .await??; - - Ok(task) - } - - pub fn register_raw_update(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> { - self.store.put(wtxn, task)?; - Ok(()) - } - - pub async fn get_task(&self, id: TaskId, filter: Option) -> Result { - let store = self.store.clone(); - let task = tokio::task::spawn_blocking(move || -> Result<_> { - let txn = store.rtxn()?; - let task = store.get(&txn, id)?; - Ok(task) - }) - .await?? - .ok_or(TaskError::UnexistingTask(id))?; - - match filter { - Some(filter) => filter - .pass(&task) - .then_some(task) - .ok_or(TaskError::UnexistingTask(id)), - None => Ok(task), - } - } - - /// This methods takes a `Processing` which contains the next task ids to process, and returns - /// the corresponding tasks along with the ownership to the passed processing. - /// - /// We need get_processing_tasks to take ownership over `Processing` because we need it to be - /// valid for 'static. - pub async fn get_processing_tasks( - &self, - processing: Processing, - ) -> Result<(Processing, BatchContent)> { - let store = self.store.clone(); - let tasks = tokio::task::spawn_blocking(move || -> Result<_> { - let txn = store.rtxn()?; - - let content = match processing { - Processing::DocumentAdditions(ref ids) => { - let mut tasks = Vec::new(); - - for id in ids.iter() { - let task = store - .get(&txn, *id)? - .ok_or(TaskError::UnexistingTask(*id))?; - tasks.push(task); - } - BatchContent::DocumentsAdditionBatch(tasks) - } - Processing::IndexUpdate(id) => { - let task = store.get(&txn, id)?.ok_or(TaskError::UnexistingTask(id))?; - BatchContent::IndexUpdate(task) - } - Processing::Dump(id) => { - let task = store.get(&txn, id)?.ok_or(TaskError::UnexistingTask(id))?; - debug_assert!(matches!(task.content, TaskContent::Dump { .. })); - BatchContent::Dump(task) - } - Processing::Nothing => BatchContent::Empty, - }; - - Ok((processing, content)) - }) - .await??; - - Ok(tasks) - } - - pub async fn update_tasks(&self, tasks: Vec) -> Result> { - let store = self.store.clone(); - - let tasks = tokio::task::spawn_blocking(move || -> Result<_> { - let mut txn = store.wtxn()?; - - for task in &tasks { - store.put(&mut txn, task)?; - } - - txn.commit()?; - - Ok(tasks) - }) - .await??; - - Ok(tasks) - } - - pub async fn fetch_unfinished_tasks(&self, offset: Option) -> Result> { - let store = self.store.clone(); - - tokio::task::spawn_blocking(move || { - let txn = store.rtxn()?; - let tasks = store.fetch_unfinished_tasks(&txn, offset)?; - Ok(tasks) - }) - .await? - } - - pub async fn list_tasks( - &self, - offset: Option, - filter: Option, - limit: Option, - ) -> Result> { - let store = self.store.clone(); - - tokio::task::spawn_blocking(move || { - let txn = store.rtxn()?; - let tasks = store.list_tasks(&txn, offset, filter, limit)?; - Ok(tasks) - }) - .await? - } - - pub async fn dump( - env: Arc, - dir_path: impl AsRef, - update_file_store: UpdateFileStore, - ) -> Result<()> { - let store = Self::new(env)?; - let update_dir = dir_path.as_ref().join("updates"); - let updates_file = update_dir.join("data.jsonl"); - let tasks = store.list_tasks(None, None, None).await?; - - let dir_path = dir_path.as_ref().to_path_buf(); - tokio::task::spawn_blocking(move || -> Result<()> { - std::fs::create_dir(&update_dir)?; - let updates_file = std::fs::File::create(updates_file)?; - let mut updates_file = BufWriter::new(updates_file); - - for task in tasks { - serde_json::to_writer(&mut updates_file, &task)?; - updates_file.write_all(b"\n")?; - - if !task.is_finished() { - if let Some(content_uuid) = task.get_content_uuid() { - update_file_store.dump(content_uuid, &dir_path)?; - } - } - } - updates_file.flush()?; - Ok(()) - }) - .await??; - - Ok(()) - } - - pub fn load_dump(src: impl AsRef, env: Arc) -> anyhow::Result<()> { - // create a dummy update field store, since it is not needed right now. - let store = Self::new(env.clone())?; - - let src_update_path = src.as_ref().join("updates"); - let update_data = std::fs::File::open(&src_update_path.join("data.jsonl"))?; - let update_data = std::io::BufReader::new(update_data); - - let stream = serde_json::Deserializer::from_reader(update_data).into_iter::(); - - let mut wtxn = env.write_txn()?; - for entry in stream { - store.register_raw_update(&mut wtxn, &entry?)?; - } - wtxn.commit()?; - - Ok(()) - } -} - -#[cfg(test)] -pub mod test { - use crate::tasks::{scheduler::Processing, task_store::store::test::tmp_env}; - - use super::*; - - use meilisearch_types::index_uid::IndexUid; - use nelson::Mocker; - use proptest::{ - strategy::Strategy, - test_runner::{Config, TestRunner}, - }; - - pub enum MockTaskStore { - Real(TaskStore), - Mock(Arc), - } - - impl Clone for MockTaskStore { - fn clone(&self) -> Self { - match self { - Self::Real(x) => Self::Real(x.clone()), - Self::Mock(x) => Self::Mock(x.clone()), - } - } - } - - impl MockTaskStore { - pub fn new(env: Arc) -> Result { - Ok(Self::Real(TaskStore::new(env)?)) - } - - pub async fn dump( - env: Arc, - path: impl AsRef, - update_file_store: UpdateFileStore, - ) -> Result<()> { - TaskStore::dump(env, path, update_file_store).await - } - - pub fn mock(mocker: Mocker) -> Self { - Self::Mock(Arc::new(mocker)) - } - - pub async fn update_tasks(&self, tasks: Vec) -> Result> { - match self { - Self::Real(s) => s.update_tasks(tasks).await, - Self::Mock(m) => unsafe { - m.get::<_, Result>>("update_tasks").call(tasks) - }, - } - } - - pub async fn get_task(&self, id: TaskId, filter: Option) -> Result { - match self { - Self::Real(s) => s.get_task(id, filter).await, - Self::Mock(m) => unsafe { m.get::<_, Result>("get_task").call((id, filter)) }, - } - } - - pub async fn get_processing_tasks( - &self, - tasks: Processing, - ) -> Result<(Processing, BatchContent)> { - match self { - Self::Real(s) => s.get_processing_tasks(tasks).await, - Self::Mock(m) => unsafe { m.get("get_pending_task").call(tasks) }, - } - } - - pub async fn fetch_unfinished_tasks(&self, from: Option) -> Result> { - match self { - Self::Real(s) => s.fetch_unfinished_tasks(from).await, - Self::Mock(m) => unsafe { m.get("fetch_unfinished_tasks").call(from) }, - } - } - - pub async fn list_tasks( - &self, - from: Option, - filter: Option, - limit: Option, - ) -> Result> { - match self { - Self::Real(s) => s.list_tasks(from, filter, limit).await, - Self::Mock(m) => unsafe { m.get("list_tasks").call((from, filter, limit)) }, - } - } - - pub async fn register(&self, content: TaskContent) -> Result { - match self { - Self::Real(s) => s.register(content).await, - Self::Mock(_m) => todo!(), - } - } - - pub fn register_raw_update(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> { - match self { - Self::Real(s) => s.register_raw_update(wtxn, task), - Self::Mock(_m) => todo!(), - } - } - - pub fn load_dump(path: impl AsRef, env: Arc) -> anyhow::Result<()> { - TaskStore::load_dump(path, env) - } - } - - #[test] - fn test_increment_task_id() { - let tmp = tmp_env(); - let store = Store::new(tmp.env()).unwrap(); - - let mut txn = store.wtxn().unwrap(); - assert_eq!(store.next_task_id(&mut txn).unwrap(), 0); - txn.abort().unwrap(); - - let gen_task = |id: TaskId| Task { - id, - content: TaskContent::IndexCreation { - primary_key: None, - index_uid: IndexUid::new_unchecked("test"), - }, - events: Vec::new(), - }; - - let mut runner = TestRunner::new(Config::default()); - runner - .run(&(0..100u32).prop_map(gen_task), |task| { - let mut txn = store.wtxn().unwrap(); - let previous_id = store.next_task_id(&mut txn).unwrap(); - - store.put(&mut txn, &task).unwrap(); - - let next_id = store.next_task_id(&mut txn).unwrap(); - - // if we put a task whose task_id is less than the next_id, then the next_id remains - // unchanged, otherwise it becomes task.id + 1 - if task.id < previous_id { - assert_eq!(next_id, previous_id) - } else { - assert_eq!(next_id, task.id + 1); - } - - txn.commit().unwrap(); - - Ok(()) - }) - .unwrap(); - } -} diff --git a/meilisearch-lib/src/tasks/task_store/store.rs b/meilisearch-lib/src/tasks/task_store/store.rs deleted file mode 100644 index 32b20aeb8..000000000 --- a/meilisearch-lib/src/tasks/task_store/store.rs +++ /dev/null @@ -1,377 +0,0 @@ -#[allow(clippy::upper_case_acronyms)] - -type BEU32 = milli::heed::zerocopy::U32; - -const INDEX_UIDS_TASK_IDS: &str = "index-uids-task-ids"; -const TASKS: &str = "tasks"; - -use std::collections::HashSet; -use std::ops::Bound::{Excluded, Unbounded}; -use std::result::Result as StdResult; -use std::sync::Arc; - -use milli::heed::types::{OwnedType, SerdeJson, Str}; -use milli::heed::{Database, Env, RoTxn, RwTxn}; -use milli::heed_codec::RoaringBitmapCodec; -use roaring::RoaringBitmap; - -use crate::tasks::task::{Task, TaskId}; - -use super::super::Result; -use super::TaskFilter; - -pub struct Store { - env: Arc, - /// Maps an index uid to the set of tasks ids associated to it. - index_uid_task_ids: Database, - tasks: Database, SerdeJson>, -} - -impl Drop for Store { - fn drop(&mut self) { - if Arc::strong_count(&self.env) == 1 { - self.env.as_ref().clone().prepare_for_closing(); - } - } -} - -impl Store { - /// Create a new store from the specified `Path`. - /// Be really cautious when calling this function, the returned `Store` may - /// be in an invalid state, with dangling processing tasks. - /// You want to patch all un-finished tasks and put them in your pending - /// queue with the `reset_and_return_unfinished_update` method. - pub fn new(env: Arc) -> Result { - let index_uid_task_ids = env.create_database(Some(INDEX_UIDS_TASK_IDS))?; - let tasks = env.create_database(Some(TASKS))?; - - Ok(Self { - env, - index_uid_task_ids, - tasks, - }) - } - - pub fn wtxn(&self) -> Result { - Ok(self.env.write_txn()?) - } - - pub fn rtxn(&self) -> Result { - Ok(self.env.read_txn()?) - } - - /// Returns the id for the next task. - /// - /// The required `mut txn` acts as a reservation system. It guarantees that as long as you commit - /// the task to the store in the same transaction, no one else will have this task id. - pub fn next_task_id(&self, txn: &mut RwTxn) -> Result { - let id = self - .tasks - .lazily_decode_data() - .last(txn)? - .map(|(id, _)| id.get() + 1) - .unwrap_or(0); - Ok(id) - } - - pub fn put(&self, txn: &mut RwTxn, task: &Task) -> Result<()> { - self.tasks.put(txn, &BEU32::new(task.id), task)?; - // only add the task to the indexes index if it has an index_uid - if let Some(index_uid) = task.index_uid() { - let mut tasks_set = self - .index_uid_task_ids - .get(txn, index_uid)? - .unwrap_or_default(); - - tasks_set.insert(task.id); - - self.index_uid_task_ids.put(txn, index_uid, &tasks_set)?; - } - - Ok(()) - } - - pub fn get(&self, txn: &RoTxn, id: TaskId) -> Result> { - let task = self.tasks.get(txn, &BEU32::new(id))?; - Ok(task) - } - - /// Returns the unfinished tasks starting from the given taskId in ascending order. - pub fn fetch_unfinished_tasks(&self, txn: &RoTxn, from: Option) -> Result> { - // We must NEVER re-enqueue an already processed task! It's content uuid would point to an unexisting file. - // - // TODO(marin): This may create some latency when the first batch lazy loads the pending updates. - let from = from.unwrap_or_default(); - - let result: StdResult, milli::heed::Error> = self - .tasks - .range(txn, &(BEU32::new(from)..))? - .map(|r| r.map(|(_, t)| t)) - .filter(|result| result.as_ref().map_or(true, |t| !t.is_finished())) - .collect(); - - result.map_err(Into::into) - } - - /// Returns all the tasks starting from the given taskId and going in descending order. - pub fn list_tasks( - &self, - txn: &RoTxn, - from: Option, - filter: Option, - limit: Option, - ) -> Result> { - let from = match from { - Some(from) => from, - None => self.tasks.last(txn)?.map_or(0, |(id, _)| id.get()), - }; - - let filter_fn = |task: &Task| { - filter - .as_ref() - .and_then(|f| f.filter_fn.as_ref()) - .map_or(true, |f| f(task)) - }; - - let result: Result> = match filter.as_ref().and_then(|f| f.filtered_indexes()) { - Some(indexes) => self - .compute_candidates(txn, indexes, from)? - .filter(|result| result.as_ref().map_or(true, filter_fn)) - .take(limit.unwrap_or(usize::MAX)) - .collect(), - None => self - .tasks - .rev_range(txn, &(..=BEU32::new(from)))? - .map(|r| r.map(|(_, t)| t).map_err(Into::into)) - .filter(|result| result.as_ref().map_or(true, filter_fn)) - .take(limit.unwrap_or(usize::MAX)) - .collect(), - }; - - result.map_err(Into::into) - } - - fn compute_candidates<'a>( - &'a self, - txn: &'a RoTxn, - indexes: &HashSet, - from: TaskId, - ) -> Result> + 'a> { - let mut candidates = RoaringBitmap::new(); - - for index_uid in indexes { - if let Some(tasks_set) = self.index_uid_task_ids.get(txn, index_uid)? { - candidates |= tasks_set; - } - } - - candidates.remove_range((Excluded(from), Unbounded)); - - let iter = candidates - .into_iter() - .rev() - .filter_map(|id| self.get(txn, id).transpose()); - - Ok(iter) - } -} - -#[cfg(test)] -pub mod test { - use itertools::Itertools; - use meilisearch_types::index_uid::IndexUid; - use milli::heed::EnvOpenOptions; - use nelson::Mocker; - use tempfile::TempDir; - - use crate::tasks::task::TaskContent; - - use super::*; - - /// TODO: use this mock to test the task store properly. - #[allow(dead_code)] - pub enum MockStore { - Real(Store), - Fake(Mocker), - } - - pub struct TmpEnv(TempDir, Arc); - - impl TmpEnv { - pub fn env(&self) -> Arc { - self.1.clone() - } - } - - pub fn tmp_env() -> TmpEnv { - let tmp = tempfile::tempdir().unwrap(); - - let mut options = EnvOpenOptions::new(); - options.map_size(4096 * 100000); - options.max_dbs(1000); - let env = Arc::new(options.open(tmp.path()).unwrap()); - - TmpEnv(tmp, env) - } - - impl MockStore { - pub fn new(env: Arc) -> Result { - Ok(Self::Real(Store::new(env)?)) - } - - pub fn wtxn(&self) -> Result { - match self { - MockStore::Real(index) => index.wtxn(), - MockStore::Fake(_) => todo!(), - } - } - - pub fn rtxn(&self) -> Result { - match self { - MockStore::Real(index) => index.rtxn(), - MockStore::Fake(_) => todo!(), - } - } - - pub fn next_task_id(&self, txn: &mut RwTxn) -> Result { - match self { - MockStore::Real(index) => index.next_task_id(txn), - MockStore::Fake(_) => todo!(), - } - } - - pub fn put(&self, txn: &mut RwTxn, task: &Task) -> Result<()> { - match self { - MockStore::Real(index) => index.put(txn, task), - MockStore::Fake(_) => todo!(), - } - } - - pub fn get(&self, txn: &RoTxn, id: TaskId) -> Result> { - match self { - MockStore::Real(index) => index.get(txn, id), - MockStore::Fake(_) => todo!(), - } - } - - pub fn fetch_unfinished_tasks( - &self, - txn: &RoTxn, - from: Option, - ) -> Result> { - match self { - MockStore::Real(index) => index.fetch_unfinished_tasks(txn, from), - MockStore::Fake(_) => todo!(), - } - } - - pub fn list_tasks( - &self, - txn: &RoTxn, - from: Option, - filter: Option, - limit: Option, - ) -> Result> { - match self { - MockStore::Real(index) => index.list_tasks(txn, from, filter, limit), - MockStore::Fake(_) => todo!(), - } - } - } - - #[test] - fn test_ordered_filtered_updates() { - let tmp = tmp_env(); - let store = Store::new(tmp.env()).unwrap(); - - let tasks = (0..100) - .map(|_| Task { - id: rand::random(), - content: TaskContent::IndexDeletion { - index_uid: IndexUid::new_unchecked("test"), - }, - events: vec![], - }) - .collect::>(); - - let mut txn = store.env.write_txn().unwrap(); - tasks - .iter() - .try_for_each(|t| store.put(&mut txn, t)) - .unwrap(); - - let mut filter = TaskFilter::default(); - filter.filter_index("test".into()); - - let tasks = store.list_tasks(&txn, None, Some(filter), None).unwrap(); - - assert!(tasks - .iter() - .map(|t| t.id) - .tuple_windows() - .all(|(a, b)| a > b)); - } - - #[test] - fn test_filter_same_index_prefix() { - let tmp = tmp_env(); - let store = Store::new(tmp.env()).unwrap(); - - let task_1 = Task { - id: 1, - content: TaskContent::IndexDeletion { - index_uid: IndexUid::new_unchecked("test"), - }, - events: vec![], - }; - - let task_2 = Task { - id: 0, - content: TaskContent::IndexDeletion { - index_uid: IndexUid::new_unchecked("test1"), - }, - events: vec![], - }; - - let mut txn = store.wtxn().unwrap(); - store.put(&mut txn, &task_1).unwrap(); - store.put(&mut txn, &task_2).unwrap(); - - let mut filter = TaskFilter::default(); - filter.filter_index("test".into()); - - let tasks = store.list_tasks(&txn, None, Some(filter), None).unwrap(); - - txn.abort().unwrap(); - assert_eq!(tasks.len(), 1); - assert_eq!(tasks.first().as_ref().unwrap().index_uid().unwrap(), "test"); - - // same thing but invert the ids - let task_1 = Task { - id: 0, - content: TaskContent::IndexDeletion { - index_uid: IndexUid::new_unchecked("test"), - }, - events: vec![], - }; - let task_2 = Task { - id: 1, - content: TaskContent::IndexDeletion { - index_uid: IndexUid::new_unchecked("test1"), - }, - events: vec![], - }; - - let mut txn = store.wtxn().unwrap(); - store.put(&mut txn, &task_1).unwrap(); - store.put(&mut txn, &task_2).unwrap(); - - let mut filter = TaskFilter::default(); - filter.filter_index("test".into()); - - let tasks = store.list_tasks(&txn, None, Some(filter), None).unwrap(); - - assert_eq!(tasks.len(), 1); - assert_eq!(tasks.first().as_ref().unwrap().index_uid().unwrap(), "test"); - } -} diff --git a/meilisearch-lib/src/tasks/update_loop.rs b/meilisearch-lib/src/tasks/update_loop.rs deleted file mode 100644 index b6e43e319..000000000 --- a/meilisearch-lib/src/tasks/update_loop.rs +++ /dev/null @@ -1,93 +0,0 @@ -use std::sync::Arc; - -use time::OffsetDateTime; -use tokio::sync::{watch, RwLock}; - -use super::batch::Batch; -use super::error::Result; -use super::{BatchHandler, Scheduler}; -use crate::tasks::task::TaskEvent; - -/// The update loop sequentially performs batches of updates by asking the scheduler for a batch, -/// and handing it to the `TaskPerformer`. -pub struct UpdateLoop { - scheduler: Arc>, - performers: Vec>, - - notifier: Option>, -} - -impl UpdateLoop { - pub fn new( - scheduler: Arc>, - performers: Vec>, - notifier: watch::Receiver<()>, - ) -> Self { - Self { - scheduler, - performers, - notifier: Some(notifier), - } - } - - pub async fn run(mut self) { - let mut notifier = self.notifier.take().unwrap(); - - loop { - if notifier.changed().await.is_err() { - break; - } - - if let Err(e) = self.process_next_batch().await { - log::error!("an error occurred while processing an update batch: {}", e); - } - } - } - - async fn process_next_batch(&self) -> Result<()> { - let mut batch = { self.scheduler.write().await.prepare().await? }; - let performer = self - .performers - .iter() - .find(|p| p.accept(&batch)) - .expect("No performer found for batch") - .clone(); - - batch - .content - .push_event(TaskEvent::Processing(OffsetDateTime::now_utc())); - - batch.content = { - self.scheduler - .read() - .await - .update_tasks(batch.content) - .await? - }; - - let batch = performer.process_batch(batch).await; - - self.handle_batch_result(batch, performer).await?; - - Ok(()) - } - - /// Handles the result from a processed batch. - /// - /// When a task is processed, the result of the process is pushed to its event list. The - /// `handle_batch_result` make sure that the new state is saved to the store. - /// The tasks are then removed from the processing queue. - async fn handle_batch_result( - &self, - mut batch: Batch, - performer: Arc, - ) -> Result<()> { - let mut scheduler = self.scheduler.write().await; - let content = scheduler.update_tasks(batch.content).await?; - scheduler.finish(); - drop(scheduler); - batch.content = content; - performer.finish(&batch).await; - Ok(()) - } -} diff --git a/meilisearch-lib/src/update_file_store.rs b/meilisearch-lib/src/update_file_store.rs deleted file mode 100644 index cb4eadf4d..000000000 --- a/meilisearch-lib/src/update_file_store.rs +++ /dev/null @@ -1,258 +0,0 @@ -use std::fs::{create_dir_all, File}; -use std::io::{self, BufReader, BufWriter, Write}; -use std::ops::{Deref, DerefMut}; -use std::path::{Path, PathBuf}; - -use milli::documents::DocumentsBatchReader; -use serde_json::Map; -use tempfile::{NamedTempFile, PersistError}; -use uuid::Uuid; - -#[cfg(not(test))] -pub use store::UpdateFileStore; -#[cfg(test)] -pub use test::MockUpdateFileStore as UpdateFileStore; - -const UPDATE_FILES_PATH: &str = "updates/updates_files"; - -use crate::document_formats::read_ndjson; - -pub struct UpdateFile { - path: PathBuf, - file: NamedTempFile, -} - -#[derive(Debug, thiserror::Error)] -#[error("Error while persisting update to disk: {0}")] -pub struct UpdateFileStoreError(Box); - -pub type Result = std::result::Result; - -macro_rules! into_update_store_error { - ($($other:path),*) => { - $( - impl From<$other> for UpdateFileStoreError { - fn from(other: $other) -> Self { - Self(Box::new(other)) - } - } - )* - }; -} - -into_update_store_error!( - PersistError, - io::Error, - serde_json::Error, - milli::documents::Error, - milli::documents::DocumentsBatchCursorError -); - -impl UpdateFile { - pub fn persist(self) -> Result<()> { - self.file.persist(&self.path)?; - Ok(()) - } -} - -impl Deref for UpdateFile { - type Target = NamedTempFile; - - fn deref(&self) -> &Self::Target { - &self.file - } -} - -impl DerefMut for UpdateFile { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.file - } -} - -mod store { - use super::*; - - #[derive(Clone, Debug)] - pub struct UpdateFileStore { - path: PathBuf, - } - - impl UpdateFileStore { - pub fn load_dump(src: impl AsRef, dst: impl AsRef) -> anyhow::Result<()> { - let src_update_files_path = src.as_ref().join(UPDATE_FILES_PATH); - let dst_update_files_path = dst.as_ref().join(UPDATE_FILES_PATH); - - // No update files to load - if !src_update_files_path.exists() { - return Ok(()); - } - - create_dir_all(&dst_update_files_path)?; - - let entries = std::fs::read_dir(src_update_files_path)?; - - for entry in entries { - let entry = entry?; - let update_file = BufReader::new(File::open(entry.path())?); - let file_uuid = entry.file_name(); - let file_uuid = file_uuid - .to_str() - .ok_or_else(|| anyhow::anyhow!("invalid update file name"))?; - let dst_path = dst_update_files_path.join(file_uuid); - let dst_file = BufWriter::new(File::create(dst_path)?); - read_ndjson(update_file, dst_file)?; - } - - Ok(()) - } - - pub fn new(path: impl AsRef) -> Result { - let path = path.as_ref().join(UPDATE_FILES_PATH); - std::fs::create_dir_all(&path)?; - Ok(Self { path }) - } - - /// Creates a new temporary update file. - /// A call to `persist` is needed to persist the file in the database. - pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> { - let file = NamedTempFile::new_in(&self.path)?; - let uuid = Uuid::new_v4(); - let path = self.path.join(uuid.to_string()); - let update_file = UpdateFile { file, path }; - - Ok((uuid, update_file)) - } - - /// Returns the file corresponding to the requested uuid. - pub fn get_update(&self, uuid: Uuid) -> Result { - let path = self.path.join(uuid.to_string()); - let file = File::open(path)?; - Ok(file) - } - - /// Copies the content of the update file pointed to by `uuid` to the `dst` directory. - pub fn snapshot(&self, uuid: Uuid, dst: impl AsRef) -> Result<()> { - let src = self.path.join(uuid.to_string()); - let mut dst = dst.as_ref().join(UPDATE_FILES_PATH); - std::fs::create_dir_all(&dst)?; - dst.push(uuid.to_string()); - std::fs::copy(src, dst)?; - Ok(()) - } - - /// Peforms a dump of the given update file uuid into the provided dump path. - pub fn dump(&self, uuid: Uuid, dump_path: impl AsRef) -> Result<()> { - let uuid_string = uuid.to_string(); - let update_file_path = self.path.join(&uuid_string); - let mut dst = dump_path.as_ref().join(UPDATE_FILES_PATH); - std::fs::create_dir_all(&dst)?; - dst.push(&uuid_string); - - let update_file = File::open(update_file_path)?; - let mut dst_file = NamedTempFile::new_in(&dump_path)?; - let (mut document_cursor, index) = - DocumentsBatchReader::from_reader(update_file)?.into_cursor_and_fields_index(); - - let mut document_buffer = Map::new(); - // TODO: we need to find a way to do this more efficiently. (create a custom serializer - // for jsonl for example...) - while let Some(document) = document_cursor.next_document()? { - for (field_id, content) in document.iter() { - if let Some(field_name) = index.name(field_id) { - let content = serde_json::from_slice(content)?; - document_buffer.insert(field_name.to_string(), content); - } - } - - serde_json::to_writer(&mut dst_file, &document_buffer)?; - dst_file.write_all(b"\n")?; - document_buffer.clear(); - } - - dst_file.persist(dst)?; - - Ok(()) - } - - pub fn get_size(&self, uuid: Uuid) -> Result { - Ok(self.get_update(uuid)?.metadata()?.len()) - } - - pub async fn delete(&self, uuid: Uuid) -> Result<()> { - let path = self.path.join(uuid.to_string()); - tokio::fs::remove_file(path).await?; - Ok(()) - } - } -} - -#[cfg(test)] -mod test { - use std::sync::Arc; - - use nelson::Mocker; - - use super::*; - - #[derive(Clone)] - pub enum MockUpdateFileStore { - Real(store::UpdateFileStore), - Mock(Arc), - } - - impl MockUpdateFileStore { - pub fn mock(mocker: Mocker) -> Self { - Self::Mock(Arc::new(mocker)) - } - - pub fn load_dump(src: impl AsRef, dst: impl AsRef) -> anyhow::Result<()> { - store::UpdateFileStore::load_dump(src, dst) - } - - pub fn new(path: impl AsRef) -> Result { - store::UpdateFileStore::new(path).map(Self::Real) - } - - pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> { - match self { - MockUpdateFileStore::Real(s) => s.new_update(), - MockUpdateFileStore::Mock(_) => todo!(), - } - } - - pub fn get_update(&self, uuid: Uuid) -> Result { - match self { - MockUpdateFileStore::Real(s) => s.get_update(uuid), - MockUpdateFileStore::Mock(_) => todo!(), - } - } - - pub fn snapshot(&self, uuid: Uuid, dst: impl AsRef) -> Result<()> { - match self { - MockUpdateFileStore::Real(s) => s.snapshot(uuid, dst), - MockUpdateFileStore::Mock(_) => todo!(), - } - } - - pub fn dump(&self, uuid: Uuid, dump_path: impl AsRef) -> Result<()> { - match self { - MockUpdateFileStore::Real(s) => s.dump(uuid, dump_path), - MockUpdateFileStore::Mock(_) => todo!(), - } - } - - pub fn get_size(&self, uuid: Uuid) -> Result { - match self { - MockUpdateFileStore::Real(s) => s.get_size(uuid), - MockUpdateFileStore::Mock(_) => todo!(), - } - } - - pub async fn delete(&self, uuid: Uuid) -> Result<()> { - match self { - MockUpdateFileStore::Real(s) => s.delete(uuid).await, - MockUpdateFileStore::Mock(mocker) => unsafe { mocker.get("delete").call(uuid) }, - } - } - } -} diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 65a7af035..62d0e6ebb 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -6,10 +6,40 @@ edition = "2021" [dependencies] actix-web = { version = "4.2.1", default-features = false } +anyhow = "1.0.65" +csv = "1.1.6" +either = { version = "1.6.1", features = ["serde"] } +enum-iterator = "1.1.3" +flate2 = "1.0.24" +fst = "0.4.7" +milli = { git = "https://github.com/meilisearch/milli.git", version = "0.35.0", default-features = false } proptest = { version = "1.0.0", optional = true } proptest-derive = { version = "0.3.0", optional = true } +roaring = { version = "0.10.0", features = ["serde"] } serde = { version = "1.0.145", features = ["derive"] } serde_json = "1.0.85" +tar = "0.4.38" +thiserror = "1.0.30" +time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] } +tokio = "1.0" +uuid = { version = "1.1.2", features = ["serde", "v4"] } + +[dev-dependencies] +insta = "1.19.1" +meili-snap = { path = "../meili-snap" } +proptest = "1.0.0" +proptest-derive = "0.3.0" [features] +# all specialized tokenizations +default = ["milli/default"] + +# chinese specialized tokenization +chinese = ["milli/chinese"] +# hebrew specialized tokenization +hebrew = ["milli/hebrew"] +# japanese specialized tokenization +japanese = ["milli/japanese"] +# thai specialized tokenization +thai = ["milli/thai"] test-traits = ["proptest", "proptest-derive"] diff --git a/meilisearch-lib/src/compression.rs b/meilisearch-types/src/compression.rs similarity index 89% rename from meilisearch-lib/src/compression.rs rename to meilisearch-types/src/compression.rs index c4747cb21..1d364b815 100644 --- a/meilisearch-lib/src/compression.rs +++ b/meilisearch-types/src/compression.rs @@ -2,7 +2,9 @@ use std::fs::{create_dir_all, File}; use std::io::Write; use std::path::Path; -use flate2::{read::GzDecoder, write::GzEncoder, Compression}; +use flate2::read::GzDecoder; +use flate2::write::GzEncoder; +use flate2::Compression; use tar::{Archive, Builder}; pub fn to_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Result<()> { diff --git a/meilisearch-lib/src/document_formats.rs b/meilisearch-types/src/document_formats.rs similarity index 86% rename from meilisearch-lib/src/document_formats.rs rename to meilisearch-types/src/document_formats.rs index cfc200019..42a37eb43 100644 --- a/meilisearch-lib/src/document_formats.rs +++ b/meilisearch-types/src/document_formats.rs @@ -3,13 +3,14 @@ use std::fmt::{self, Debug, Display}; use std::io::{self, BufReader, Read, Seek, Write}; use either::Either; -use meilisearch_types::error::{Code, ErrorCode}; -use meilisearch_types::internal_error; use milli::documents::{DocumentsBatchBuilder, Error}; use milli::Object; use serde::Deserialize; use serde_json::error::Category; +use crate::error::{Code, ErrorCode}; +use crate::internal_error; + type Result = std::result::Result; #[derive(Debug)] @@ -67,7 +68,7 @@ impl Display for DocumentFormatError { f, "The `{}` payload provided is malformed. `Couldn't serialize document value: {}`.", b, message - ) + ) } _ => write!(f, "The `{}` payload provided is malformed: `{}`.", b, me), }, @@ -105,10 +106,7 @@ pub fn read_csv(input: impl Read, writer: impl Write + Seek) -> Result { builder.append_csv(csv).map_err(|e| (PayloadType::Csv, e))?; let count = builder.documents_count(); - let _ = builder - .into_inner() - .map_err(Into::into) - .map_err(DocumentFormatError::Internal)?; + let _ = builder.into_inner().map_err(Into::into).map_err(DocumentFormatError::Internal)?; Ok(count as usize) } @@ -119,9 +117,7 @@ pub fn read_ndjson(input: impl Read, writer: impl Write + Seek) -> Result let reader = BufReader::new(input); for result in serde_json::Deserializer::from_reader(reader).into_iter() { - let object = result - .map_err(Error::Json) - .map_err(|e| (PayloadType::Ndjson, e))?; + let object = result.map_err(Error::Json).map_err(|e| (PayloadType::Ndjson, e))?; builder .append_json_object(&object) .map_err(Into::into) @@ -129,10 +125,7 @@ pub fn read_ndjson(input: impl Read, writer: impl Write + Seek) -> Result } let count = builder.documents_count(); - let _ = builder - .into_inner() - .map_err(Into::into) - .map_err(DocumentFormatError::Internal)?; + let _ = builder.into_inner().map_err(Into::into).map_err(DocumentFormatError::Internal)?; Ok(count as usize) } @@ -149,9 +142,8 @@ pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result { inner: Either, Object>, } - let content: ArrayOrSingleObject = serde_json::from_reader(reader) - .map_err(Error::Json) - .map_err(|e| (PayloadType::Json, e))?; + let content: ArrayOrSingleObject = + serde_json::from_reader(reader).map_err(Error::Json).map_err(|e| (PayloadType::Json, e))?; for object in content.inner.map_right(|o| vec![o]).into_inner() { builder @@ -161,10 +153,7 @@ pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result { } let count = builder.documents_count(); - let _ = builder - .into_inner() - .map_err(Into::into) - .map_err(DocumentFormatError::Internal)?; + let _ = builder.into_inner().map_err(Into::into).map_err(DocumentFormatError::Internal)?; Ok(count as usize) } diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 147207aec..330a6f082 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -1,6 +1,9 @@ use std::fmt; -use actix_web::{self as aweb, http::StatusCode, HttpResponseBuilder}; +use actix_web::http::StatusCode; +use actix_web::{self as aweb, HttpResponseBuilder}; +use aweb::rt::task::JoinError; +use milli::heed::{Error as HeedError, MdbError}; use serde::{Deserialize, Serialize}; #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] @@ -8,10 +11,7 @@ use serde::{Deserialize, Serialize}; #[cfg_attr(feature = "test-traits", derive(proptest_derive::Arbitrary))] pub struct ResponseError { #[serde(skip)] - #[cfg_attr( - feature = "test-traits", - proptest(strategy = "strategy::status_code_strategy()") - )] + #[cfg_attr(feature = "test-traits", proptest(strategy = "strategy::status_code_strategy()"))] code: StatusCode, message: String, #[serde(rename = "code")] @@ -60,9 +60,7 @@ where impl aweb::error::ResponseError for ResponseError { fn error_response(&self) -> aweb::HttpResponse { let json = serde_json::to_vec(self).unwrap(); - HttpResponseBuilder::new(self.status_code()) - .content_type("application/json") - .body(json) + HttpResponseBuilder::new(self.status_code()).content_type("application/json").body(json) } fn status_code(&self) -> StatusCode { @@ -122,6 +120,8 @@ pub enum Code { InvalidIndexUid, InvalidMinWordLengthForTypo, + DuplicateIndexFound, + // invalid state error InvalidState, MissingPrimaryKey, @@ -148,6 +148,8 @@ pub enum Code { NoSpaceLeftOnDevice, DumpNotFound, TaskNotFound, + TaskDeletionWithEmptyQuery, + TaskCancelationWithEmptyQuery, PayloadTooLarge, RetrieveDocument, SearchDocuments, @@ -155,6 +157,8 @@ pub enum Code { DumpAlreadyInProgress, DumpProcessFailed, + // Only used when importing a dump + UnretrievableErrorCode, InvalidContentType, MissingContentType, @@ -221,10 +225,9 @@ impl Code { BadParameter => ErrCode::invalid("bad_parameter", StatusCode::BAD_REQUEST), BadRequest => ErrCode::invalid("bad_request", StatusCode::BAD_REQUEST), - DatabaseSizeLimitReached => ErrCode::internal( - "database_size_limit_reached", - StatusCode::INTERNAL_SERVER_ERROR, - ), + DatabaseSizeLimitReached => { + ErrCode::internal("database_size_limit_reached", StatusCode::INTERNAL_SERVER_ERROR) + } DocumentNotFound => ErrCode::invalid("document_not_found", StatusCode::NOT_FOUND), Internal => ErrCode::internal("internal", StatusCode::INTERNAL_SERVER_ERROR), InvalidGeoField => ErrCode::invalid("invalid_geo_field", StatusCode::BAD_REQUEST), @@ -236,6 +239,12 @@ impl Code { ErrCode::authentication("missing_master_key", StatusCode::UNAUTHORIZED) } TaskNotFound => ErrCode::invalid("task_not_found", StatusCode::NOT_FOUND), + TaskDeletionWithEmptyQuery => { + ErrCode::invalid("missing_filters", StatusCode::BAD_REQUEST) + } + TaskCancelationWithEmptyQuery => { + ErrCode::invalid("missing_filters", StatusCode::BAD_REQUEST) + } DumpNotFound => ErrCode::invalid("dump_not_found", StatusCode::NOT_FOUND), NoSpaceLeftOnDevice => { ErrCode::internal("no_space_left_on_device", StatusCode::INTERNAL_SERVER_ERROR) @@ -264,6 +273,10 @@ impl Code { ErrCode::invalid("invalid_content_type", StatusCode::UNSUPPORTED_MEDIA_TYPE) } MissingPayload => ErrCode::invalid("missing_payload", StatusCode::BAD_REQUEST), + // This one can only happen when importing a dump and encountering an unknown code in the task queue. + UnretrievableErrorCode => { + ErrCode::invalid("unretrievable_error_code", StatusCode::BAD_REQUEST) + } // error related to keys ApiKeyNotFound => ErrCode::invalid("api_key_not_found", StatusCode::NOT_FOUND), @@ -287,6 +300,9 @@ impl Code { InvalidMinWordLengthForTypo => { ErrCode::invalid("invalid_min_word_length_for_typo", StatusCode::BAD_REQUEST) } + DuplicateIndexFound => { + ErrCode::invalid("duplicate_index_found", StatusCode::BAD_REQUEST) + } } } @@ -320,26 +336,77 @@ struct ErrCode { impl ErrCode { fn authentication(error_name: &'static str, status_code: StatusCode) -> ErrCode { - ErrCode { - status_code, - error_name, - error_type: ErrorType::AuthenticationError, - } + ErrCode { status_code, error_name, error_type: ErrorType::AuthenticationError } } fn internal(error_name: &'static str, status_code: StatusCode) -> ErrCode { - ErrCode { - status_code, - error_name, - error_type: ErrorType::InternalError, - } + ErrCode { status_code, error_name, error_type: ErrorType::InternalError } } fn invalid(error_name: &'static str, status_code: StatusCode) -> ErrCode { - ErrCode { - status_code, - error_name, - error_type: ErrorType::InvalidRequestError, + ErrCode { status_code, error_name, error_type: ErrorType::InvalidRequestError } + } +} + +impl ErrorCode for JoinError { + fn error_code(&self) -> Code { + Code::Internal + } +} + +impl ErrorCode for milli::Error { + fn error_code(&self) -> Code { + use milli::{Error, UserError}; + + match self { + Error::InternalError(_) => Code::Internal, + Error::IoError(_) => Code::Internal, + Error::UserError(ref error) => { + match error { + // TODO: wait for spec for new error codes. + UserError::SerdeJson(_) + | UserError::InvalidLmdbOpenOptions + | UserError::DocumentLimitReached + | UserError::AccessingSoftDeletedDocument { .. } + | UserError::UnknownInternalDocumentId { .. } => Code::Internal, + UserError::InvalidStoreFile => Code::InvalidStore, + UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice, + UserError::MaxDatabaseSizeReached => Code::DatabaseSizeLimitReached, + UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded, + UserError::InvalidFilter(_) => Code::Filter, + UserError::MissingDocumentId { .. } => Code::MissingDocumentId, + UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => { + Code::InvalidDocumentId + } + UserError::MissingPrimaryKey => Code::MissingPrimaryKey, + UserError::PrimaryKeyCannotBeChanged(_) => Code::PrimaryKeyAlreadyPresent, + UserError::SortRankingRuleMissing => Code::Sort, + UserError::InvalidFacetsDistribution { .. } => Code::BadRequest, + UserError::InvalidSortableAttribute { .. } => Code::Sort, + UserError::CriterionError(_) => Code::InvalidRankingRule, + UserError::InvalidGeoField { .. } => Code::InvalidGeoField, + UserError::SortError(_) => Code::Sort, + UserError::InvalidMinTypoWordLenSetting(_, _) => { + Code::InvalidMinWordLengthForTypo + } + } + } + } + } +} + +impl ErrorCode for HeedError { + fn error_code(&self) -> Code { + match self { + HeedError::Mdb(MdbError::MapFull) => Code::DatabaseSizeLimitReached, + HeedError::Mdb(MdbError::Invalid) => Code::InvalidStore, + HeedError::Io(_) + | HeedError::Mdb(_) + | HeedError::Encoding + | HeedError::Decoding + | HeedError::InvalidDatabaseTyping + | HeedError::DatabaseClosing + | HeedError::BadOpenOptions => Code::Internal, } } } diff --git a/meilisearch-types/src/index_uid.rs b/meilisearch-types/src/index_uid.rs index a8cb726af..00e94c5b9 100644 --- a/meilisearch-types/src/index_uid.rs +++ b/meilisearch-types/src/index_uid.rs @@ -1,8 +1,11 @@ -use serde::{Deserialize, Serialize}; use std::error::Error; use std::fmt; use std::str::FromStr; +use serde::{Deserialize, Serialize}; + +use crate::error::{Code, ErrorCode}; + /// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400 /// bytes long #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] @@ -38,9 +41,7 @@ impl TryFrom for IndexUid { type Error = IndexUidFormatError; fn try_from(uid: String) -> Result { - if !uid - .chars() - .all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') + if !uid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') || uid.is_empty() || uid.len() > 400 { @@ -83,3 +84,9 @@ impl fmt::Display for IndexUidFormatError { } impl Error for IndexUidFormatError {} + +impl ErrorCode for IndexUidFormatError { + fn error_code(&self) -> Code { + Code::InvalidIndexUid + } +} diff --git a/meilisearch-types/src/keys.rs b/meilisearch-types/src/keys.rs new file mode 100644 index 000000000..cb0ec807e --- /dev/null +++ b/meilisearch-types/src/keys.rs @@ -0,0 +1,373 @@ +use std::hash::Hash; + +use enum_iterator::Sequence; +use serde::{Deserialize, Serialize}; +use serde_json::{from_value, Value}; +use time::format_description::well_known::Rfc3339; +use time::macros::{format_description, time}; +use time::{Date, OffsetDateTime, PrimitiveDateTime}; +use uuid::Uuid; + +use crate::error::{Code, ErrorCode}; +use crate::index_uid::IndexUid; +use crate::star_or::StarOr; + +type Result = std::result::Result; + +pub type KeyId = Uuid; + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub struct Key { + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub name: Option, + pub uid: KeyId, + pub actions: Vec, + pub indexes: Vec>, + #[serde(with = "time::serde::rfc3339::option")] + pub expires_at: Option, + #[serde(with = "time::serde::rfc3339")] + pub created_at: OffsetDateTime, + #[serde(with = "time::serde::rfc3339")] + pub updated_at: OffsetDateTime, +} + +impl Key { + pub fn create_from_value(value: Value) -> Result { + let name = match value.get("name") { + None | Some(Value::Null) => None, + Some(des) => from_value(des.clone()) + .map(Some) + .map_err(|_| Error::InvalidApiKeyName(des.clone()))?, + }; + + let description = match value.get("description") { + None | Some(Value::Null) => None, + Some(des) => from_value(des.clone()) + .map(Some) + .map_err(|_| Error::InvalidApiKeyDescription(des.clone()))?, + }; + + let uid = value.get("uid").map_or_else( + || Ok(Uuid::new_v4()), + |uid| from_value(uid.clone()).map_err(|_| Error::InvalidApiKeyUid(uid.clone())), + )?; + + let actions = value + .get("actions") + .map(|act| { + from_value(act.clone()).map_err(|_| Error::InvalidApiKeyActions(act.clone())) + }) + .ok_or(Error::MissingParameter("actions"))??; + + let indexes = value + .get("indexes") + .map(|ind| { + from_value(ind.clone()).map_err(|_| Error::InvalidApiKeyIndexes(ind.clone())) + }) + .ok_or(Error::MissingParameter("indexes"))??; + + let expires_at = value + .get("expiresAt") + .map(parse_expiration_date) + .ok_or(Error::MissingParameter("expiresAt"))??; + + let created_at = OffsetDateTime::now_utc(); + let updated_at = created_at; + + Ok(Self { name, description, uid, actions, indexes, expires_at, created_at, updated_at }) + } + + pub fn update_from_value(&mut self, value: Value) -> Result<()> { + if let Some(des) = value.get("description") { + let des = + from_value(des.clone()).map_err(|_| Error::InvalidApiKeyDescription(des.clone())); + self.description = des?; + } + + if let Some(des) = value.get("name") { + let des = from_value(des.clone()).map_err(|_| Error::InvalidApiKeyName(des.clone())); + self.name = des?; + } + + if value.get("uid").is_some() { + return Err(Error::ImmutableField("uid".to_string())); + } + + if value.get("actions").is_some() { + return Err(Error::ImmutableField("actions".to_string())); + } + + if value.get("indexes").is_some() { + return Err(Error::ImmutableField("indexes".to_string())); + } + + if value.get("expiresAt").is_some() { + return Err(Error::ImmutableField("expiresAt".to_string())); + } + + if value.get("createdAt").is_some() { + return Err(Error::ImmutableField("createdAt".to_string())); + } + + if value.get("updatedAt").is_some() { + return Err(Error::ImmutableField("updatedAt".to_string())); + } + + self.updated_at = OffsetDateTime::now_utc(); + + Ok(()) + } + + pub fn default_admin() -> Self { + let now = OffsetDateTime::now_utc(); + let uid = Uuid::new_v4(); + Self { + name: Some("Default Admin API Key".to_string()), + description: Some("Use it for anything that is not a search operation. Caution! Do not expose it on a public frontend".to_string()), + uid, + actions: vec![Action::All], + indexes: vec![StarOr::Star], + expires_at: None, + created_at: now, + updated_at: now, + } + } + + pub fn default_search() -> Self { + let now = OffsetDateTime::now_utc(); + let uid = Uuid::new_v4(); + Self { + name: Some("Default Search API Key".to_string()), + description: Some("Use it to search from the frontend".to_string()), + uid, + actions: vec![Action::Search], + indexes: vec![StarOr::Star], + expires_at: None, + created_at: now, + updated_at: now, + } + } +} + +fn parse_expiration_date(value: &Value) -> Result> { + match value { + Value::String(string) => OffsetDateTime::parse(string, &Rfc3339) + .or_else(|_| { + PrimitiveDateTime::parse( + string, + format_description!( + "[year repr:full base:calendar]-[month repr:numerical]-[day]T[hour]:[minute]:[second]" + ), + ).map(|datetime| datetime.assume_utc()) + }) + .or_else(|_| { + PrimitiveDateTime::parse( + string, + format_description!( + "[year repr:full base:calendar]-[month repr:numerical]-[day] [hour]:[minute]:[second]" + ), + ).map(|datetime| datetime.assume_utc()) + }) + .or_else(|_| { + Date::parse(string, format_description!( + "[year repr:full base:calendar]-[month repr:numerical]-[day]" + )).map(|date| PrimitiveDateTime::new(date, time!(00:00)).assume_utc()) + }) + .map_err(|_| Error::InvalidApiKeyExpiresAt(value.clone())) + // check if the key is already expired. + .and_then(|d| { + if d > OffsetDateTime::now_utc() { + Ok(d) + } else { + Err(Error::InvalidApiKeyExpiresAt(value.clone())) + } + }) + .map(Option::Some), + Value::Null => Ok(None), + _otherwise => Err(Error::InvalidApiKeyExpiresAt(value.clone())), + } +} + +#[derive(Copy, Clone, Serialize, Deserialize, Debug, Eq, PartialEq, Hash, Sequence)] +#[repr(u8)] +pub enum Action { + #[serde(rename = "*")] + All = 0, + #[serde(rename = "search")] + Search, + #[serde(rename = "documents.*")] + DocumentsAll, + #[serde(rename = "documents.add")] + DocumentsAdd, + #[serde(rename = "documents.get")] + DocumentsGet, + #[serde(rename = "documents.delete")] + DocumentsDelete, + #[serde(rename = "indexes.*")] + IndexesAll, + #[serde(rename = "indexes.create")] + IndexesAdd, + #[serde(rename = "indexes.get")] + IndexesGet, + #[serde(rename = "indexes.update")] + IndexesUpdate, + #[serde(rename = "indexes.delete")] + IndexesDelete, + #[serde(rename = "indexes.swap")] + IndexesSwap, + #[serde(rename = "tasks.*")] + TasksAll, + #[serde(rename = "tasks.cancel")] + TasksCancel, + #[serde(rename = "tasks.delete")] + TasksDelete, + #[serde(rename = "tasks.get")] + TasksGet, + #[serde(rename = "settings.*")] + SettingsAll, + #[serde(rename = "settings.get")] + SettingsGet, + #[serde(rename = "settings.update")] + SettingsUpdate, + #[serde(rename = "stats.*")] + StatsAll, + #[serde(rename = "stats.get")] + StatsGet, + #[serde(rename = "metrics.*")] + MetricsAll, + #[serde(rename = "metrics.get")] + MetricsGet, + #[serde(rename = "dumps.*")] + DumpsAll, + #[serde(rename = "dumps.create")] + DumpsCreate, + #[serde(rename = "version")] + Version, + #[serde(rename = "keys.create")] + KeysAdd, + #[serde(rename = "keys.get")] + KeysGet, + #[serde(rename = "keys.update")] + KeysUpdate, + #[serde(rename = "keys.delete")] + KeysDelete, +} + +impl Action { + pub const fn from_repr(repr: u8) -> Option { + use actions::*; + match repr { + ALL => Some(Self::All), + SEARCH => Some(Self::Search), + DOCUMENTS_ALL => Some(Self::DocumentsAll), + DOCUMENTS_ADD => Some(Self::DocumentsAdd), + DOCUMENTS_GET => Some(Self::DocumentsGet), + DOCUMENTS_DELETE => Some(Self::DocumentsDelete), + INDEXES_ALL => Some(Self::IndexesAll), + INDEXES_CREATE => Some(Self::IndexesAdd), + INDEXES_GET => Some(Self::IndexesGet), + INDEXES_UPDATE => Some(Self::IndexesUpdate), + INDEXES_DELETE => Some(Self::IndexesDelete), + INDEXES_SWAP => Some(Self::IndexesSwap), + TASKS_ALL => Some(Self::TasksAll), + TASKS_CANCEL => Some(Self::TasksCancel), + TASKS_DELETE => Some(Self::TasksDelete), + TASKS_GET => Some(Self::TasksGet), + SETTINGS_ALL => Some(Self::SettingsAll), + SETTINGS_GET => Some(Self::SettingsGet), + SETTINGS_UPDATE => Some(Self::SettingsUpdate), + STATS_ALL => Some(Self::StatsAll), + STATS_GET => Some(Self::StatsGet), + METRICS_ALL => Some(Self::MetricsAll), + METRICS_GET => Some(Self::MetricsGet), + DUMPS_ALL => Some(Self::DumpsAll), + DUMPS_CREATE => Some(Self::DumpsCreate), + VERSION => Some(Self::Version), + KEYS_CREATE => Some(Self::KeysAdd), + KEYS_GET => Some(Self::KeysGet), + KEYS_UPDATE => Some(Self::KeysUpdate), + KEYS_DELETE => Some(Self::KeysDelete), + _otherwise => None, + } + } + + pub const fn repr(&self) -> u8 { + *self as u8 + } +} + +pub mod actions { + use super::Action::*; + + pub(crate) const ALL: u8 = All.repr(); + pub const SEARCH: u8 = Search.repr(); + pub const DOCUMENTS_ALL: u8 = DocumentsAll.repr(); + pub const DOCUMENTS_ADD: u8 = DocumentsAdd.repr(); + pub const DOCUMENTS_GET: u8 = DocumentsGet.repr(); + pub const DOCUMENTS_DELETE: u8 = DocumentsDelete.repr(); + pub const INDEXES_ALL: u8 = IndexesAll.repr(); + pub const INDEXES_CREATE: u8 = IndexesAdd.repr(); + pub const INDEXES_GET: u8 = IndexesGet.repr(); + pub const INDEXES_UPDATE: u8 = IndexesUpdate.repr(); + pub const INDEXES_DELETE: u8 = IndexesDelete.repr(); + pub const INDEXES_SWAP: u8 = IndexesSwap.repr(); + pub const TASKS_ALL: u8 = TasksAll.repr(); + pub const TASKS_CANCEL: u8 = TasksCancel.repr(); + pub const TASKS_DELETE: u8 = TasksDelete.repr(); + pub const TASKS_GET: u8 = TasksGet.repr(); + pub const SETTINGS_ALL: u8 = SettingsAll.repr(); + pub const SETTINGS_GET: u8 = SettingsGet.repr(); + pub const SETTINGS_UPDATE: u8 = SettingsUpdate.repr(); + pub const STATS_ALL: u8 = StatsAll.repr(); + pub const STATS_GET: u8 = StatsGet.repr(); + pub const METRICS_ALL: u8 = MetricsAll.repr(); + pub const METRICS_GET: u8 = MetricsGet.repr(); + pub const DUMPS_ALL: u8 = DumpsAll.repr(); + pub const DUMPS_CREATE: u8 = DumpsCreate.repr(); + pub const VERSION: u8 = Version.repr(); + pub const KEYS_CREATE: u8 = KeysAdd.repr(); + pub const KEYS_GET: u8 = KeysGet.repr(); + pub const KEYS_UPDATE: u8 = KeysUpdate.repr(); + pub const KEYS_DELETE: u8 = KeysDelete.repr(); +} + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("`{0}` field is mandatory.")] + MissingParameter(&'static str), + #[error("`actions` field value `{0}` is invalid. It should be an array of string representing action names.")] + InvalidApiKeyActions(Value), + #[error( + "`{0}` is not a valid index uid. It should be an array of string representing index names." + )] + InvalidApiKeyIndexes(Value), + #[error("`expiresAt` field value `{0}` is invalid. It should follow the RFC 3339 format to represents a date or datetime in the future or specified as a null value. e.g. 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM:SS'.")] + InvalidApiKeyExpiresAt(Value), + #[error("`description` field value `{0}` is invalid. It should be a string or specified as a null value.")] + InvalidApiKeyDescription(Value), + #[error( + "`name` field value `{0}` is invalid. It should be a string or specified as a null value." + )] + InvalidApiKeyName(Value), + #[error("`uid` field value `{0}` is invalid. It should be a valid UUID v4 string or omitted.")] + InvalidApiKeyUid(Value), + #[error("The `{0}` field cannot be modified for the given resource.")] + ImmutableField(String), +} + +impl ErrorCode for Error { + fn error_code(&self) -> Code { + match self { + Self::MissingParameter(_) => Code::MissingParameter, + Self::InvalidApiKeyActions(_) => Code::InvalidApiKeyActions, + Self::InvalidApiKeyIndexes(_) => Code::InvalidApiKeyIndexes, + Self::InvalidApiKeyExpiresAt(_) => Code::InvalidApiKeyExpiresAt, + Self::InvalidApiKeyDescription(_) => Code::InvalidApiKeyDescription, + Self::InvalidApiKeyName(_) => Code::InvalidApiKeyName, + Self::InvalidApiKeyUid(_) => Code::InvalidApiKeyUid, + Self::ImmutableField(_) => Code::ImmutableField, + } + } +} diff --git a/meilisearch-types/src/lib.rs b/meilisearch-types/src/lib.rs index 2d685c2dc..c7f7ca7f5 100644 --- a/meilisearch-types/src/lib.rs +++ b/meilisearch-types/src/lib.rs @@ -1,3 +1,17 @@ +pub mod compression; +pub mod document_formats; pub mod error; pub mod index_uid; +pub mod keys; +pub mod settings; pub mod star_or; +pub mod tasks; +pub mod versioning; + +pub use milli; +pub use milli::{heed, Index}; +use uuid::Uuid; +pub use versioning::VERSION_FILE_NAME; + +pub type Document = serde_json::Map; +pub type InstanceUid = Uuid; diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-types/src/settings.rs similarity index 78% rename from meilisearch-lib/src/index/updates.rs rename to meilisearch-types/src/settings.rs index 7058d65c3..3369cfdfb 100644 --- a/meilisearch-lib/src/index/updates.rs +++ b/meilisearch-types/src/settings.rs @@ -2,18 +2,14 @@ use std::collections::{BTreeMap, BTreeSet}; use std::marker::PhantomData; use std::num::NonZeroUsize; -use log::{debug, info, trace}; -use milli::documents::DocumentsBatchReader; -use milli::update::{ - DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod, - Setting, -}; +use fst::IntoStreamer; +use milli::update::Setting; +use milli::{Index, DEFAULT_VALUES_PER_FACET}; use serde::{Deserialize, Serialize, Serializer}; -use uuid::Uuid; -use super::error::{IndexError, Result}; -use super::index::{Index, IndexMeta}; -use crate::update_file_store::UpdateFileStore; +/// The maximimum number of results that the engine +/// will be able to return in one search call. +pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000; fn serialize_with_wildcard( field: &Setting>, @@ -246,126 +242,6 @@ pub struct Facets { pub min_level_size: Option, } -impl Index { - fn update_primary_key_txn<'a, 'b>( - &'a self, - txn: &mut milli::heed::RwTxn<'a, 'b>, - primary_key: String, - ) -> Result { - let mut builder = milli::update::Settings::new(txn, self, self.indexer_config.as_ref()); - builder.set_primary_key(primary_key); - builder.execute(|_| ())?; - let meta = IndexMeta::new_txn(self, txn)?; - - Ok(meta) - } - - pub fn update_primary_key(&self, primary_key: String) -> Result { - let mut txn = self.write_txn()?; - let res = self.update_primary_key_txn(&mut txn, primary_key)?; - txn.commit()?; - - Ok(res) - } - - /// Deletes `ids` from the index, and returns how many documents were deleted. - pub fn delete_documents(&self, ids: &[String]) -> Result { - let mut txn = self.write_txn()?; - let mut builder = milli::update::DeleteDocuments::new(&mut txn, self)?; - - // We ignore unexisting document ids - ids.iter().for_each(|id| { - builder.delete_external_id(id); - }); - - let deleted = builder.execute()?; - - txn.commit()?; - - Ok(deleted) - } - - pub fn clear_documents(&self) -> Result<()> { - let mut txn = self.write_txn()?; - milli::update::ClearDocuments::new(&mut txn, self).execute()?; - txn.commit()?; - - Ok(()) - } - - pub fn update_documents( - &self, - method: IndexDocumentsMethod, - primary_key: Option, - file_store: UpdateFileStore, - contents: impl IntoIterator, - ) -> Result>> { - trace!("performing document addition"); - let mut txn = self.write_txn()?; - - if let Some(primary_key) = primary_key { - if self.primary_key(&txn)?.is_none() { - self.update_primary_key_txn(&mut txn, primary_key)?; - } - } - - let config = IndexDocumentsConfig { - update_method: method, - ..Default::default() - }; - - let indexing_callback = |indexing_step| debug!("update: {:?}", indexing_step); - let mut builder = milli::update::IndexDocuments::new( - &mut txn, - self, - self.indexer_config.as_ref(), - config, - indexing_callback, - )?; - - let mut results = Vec::new(); - for content_uuid in contents.into_iter() { - let content_file = file_store.get_update(content_uuid)?; - let reader = DocumentsBatchReader::from_reader(content_file)?; - let (new_builder, user_result) = builder.add_documents(reader)?; - builder = new_builder; - - let user_result = match user_result { - Ok(count) => Ok(DocumentAdditionResult { - indexed_documents: count, - number_of_documents: count, - }), - Err(e) => Err(IndexError::from(e)), - }; - - results.push(user_result); - } - - if results.iter().any(Result::is_ok) { - let addition = builder.execute()?; - txn.commit()?; - info!("document addition done: {:?}", addition); - } - - Ok(results) - } - - pub fn update_settings(&self, settings: &Settings) -> Result<()> { - // We must use the write transaction of the update here. - let mut txn = self.write_txn()?; - let mut builder = - milli::update::Settings::new(&mut txn, self, self.indexer_config.as_ref()); - - apply_settings_to_builder(settings, &mut builder); - - builder.execute(|indexing_step| debug!("update: {:?}", indexing_step))?; - - txn.commit()?; - - Ok(()) - } -} - pub fn apply_settings_to_builder( settings: &Settings, builder: &mut milli::update::Settings, @@ -496,6 +372,96 @@ pub fn apply_settings_to_builder( } } +pub fn settings( + index: &Index, + rtxn: &crate::heed::RoTxn, +) -> Result, milli::Error> { + let displayed_attributes = + index.displayed_fields(rtxn)?.map(|fields| fields.into_iter().map(String::from).collect()); + + let searchable_attributes = index + .user_defined_searchable_fields(rtxn)? + .map(|fields| fields.into_iter().map(String::from).collect()); + + let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect(); + + let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect(); + + let criteria = index.criteria(rtxn)?.into_iter().map(|c| c.to_string()).collect(); + + let stop_words = index + .stop_words(rtxn)? + .map(|stop_words| -> Result, milli::Error> { + Ok(stop_words.stream().into_strs()?.into_iter().collect()) + }) + .transpose()? + .unwrap_or_default(); + let distinct_field = index.distinct_field(rtxn)?.map(String::from); + + // in milli each word in the synonyms map were split on their separator. Since we lost + // this information we are going to put space between words. + let synonyms = index + .synonyms(rtxn)? + .iter() + .map(|(key, values)| (key.join(" "), values.iter().map(|value| value.join(" ")).collect())) + .collect(); + + let min_typo_word_len = MinWordSizeTyposSetting { + one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?), + two_typos: Setting::Set(index.min_word_len_two_typos(rtxn)?), + }; + + let disabled_words = match index.exact_words(rtxn)? { + Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(), + None => BTreeSet::new(), + }; + + let disabled_attributes = index.exact_attributes(rtxn)?.into_iter().map(String::from).collect(); + + let typo_tolerance = TypoSettings { + enabled: Setting::Set(index.authorize_typos(rtxn)?), + min_word_size_for_typos: Setting::Set(min_typo_word_len), + disable_on_words: Setting::Set(disabled_words), + disable_on_attributes: Setting::Set(disabled_attributes), + }; + + let faceting = FacetingSettings { + max_values_per_facet: Setting::Set( + index.max_values_per_facet(rtxn)?.unwrap_or(DEFAULT_VALUES_PER_FACET), + ), + }; + + let pagination = PaginationSettings { + max_total_hits: Setting::Set( + index.pagination_max_total_hits(rtxn)?.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS), + ), + }; + + Ok(Settings { + displayed_attributes: match displayed_attributes { + Some(attrs) => Setting::Set(attrs), + None => Setting::Reset, + }, + searchable_attributes: match searchable_attributes { + Some(attrs) => Setting::Set(attrs), + None => Setting::Reset, + }, + filterable_attributes: Setting::Set(filterable_attributes), + sortable_attributes: Setting::Set(sortable_attributes), + ranking_rules: Setting::Set(criteria), + stop_words: Setting::Set(stop_words), + distinct_attribute: match distinct_field { + Some(field) => Setting::Set(field), + None => Setting::Reset, + }, + synonyms: Setting::Set(synonyms), + typo_tolerance: Setting::Set(typo_tolerance), + faceting: Setting::Set(faceting), + pagination: Setting::Set(pagination), + _kind: PhantomData, + }) +} + #[cfg(test)] pub(crate) mod test { use proptest::prelude::*; @@ -503,11 +469,7 @@ pub(crate) mod test { use super::*; pub(super) fn setting_strategy() -> impl Strategy> { - prop_oneof![ - Just(Setting::NotSet), - Just(Setting::Reset), - any::().prop_map(Setting::Set) - ] + prop_oneof![Just(Setting::NotSet), Just(Setting::Reset), any::().prop_map(Setting::Set)] } #[test] @@ -530,10 +492,7 @@ pub(crate) mod test { let checked = settings.clone().check(); assert_eq!(settings.displayed_attributes, checked.displayed_attributes); - assert_eq!( - settings.searchable_attributes, - checked.searchable_attributes - ); + assert_eq!(settings.searchable_attributes, checked.searchable_attributes); // test wildcard // test no changes diff --git a/meilisearch-types/src/star_or.rs b/meilisearch-types/src/star_or.rs index 02c9c3524..e89ba6b0e 100644 --- a/meilisearch-types/src/star_or.rs +++ b/meilisearch-types/src/star_or.rs @@ -1,13 +1,14 @@ -use serde::de::Visitor; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt::{Display, Formatter}; use std::marker::PhantomData; use std::ops::Deref; use std::str::FromStr; +use serde::de::Visitor; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + /// A type that tries to match either a star (*) or /// any other thing that implements `FromStr`. -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum StarOr { Star, Other(T), @@ -121,9 +122,10 @@ where #[cfg(test)] mod tests { - use super::*; use serde_json::{json, Value}; + use super::*; + #[test] fn star_or_serde_roundtrip() { fn roundtrip(content: Value, expected: StarOr) { diff --git a/meilisearch-types/src/tasks.rs b/meilisearch-types/src/tasks.rs new file mode 100644 index 000000000..a5c990a2f --- /dev/null +++ b/meilisearch-types/src/tasks.rs @@ -0,0 +1,512 @@ +use std::collections::HashSet; +use std::fmt::{Display, Write}; +use std::str::FromStr; + +use enum_iterator::Sequence; +use milli::update::IndexDocumentsMethod; +use roaring::RoaringBitmap; +use serde::{Deserialize, Serialize, Serializer}; +use time::{Duration, OffsetDateTime}; +use uuid::Uuid; + +use crate::error::{Code, ResponseError}; +use crate::keys::Key; +use crate::settings::{Settings, Unchecked}; +use crate::InstanceUid; + +pub type TaskId = u32; + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct Task { + pub uid: TaskId, + + #[serde(with = "time::serde::rfc3339")] + pub enqueued_at: OffsetDateTime, + #[serde(with = "time::serde::rfc3339::option")] + pub started_at: Option, + #[serde(with = "time::serde::rfc3339::option")] + pub finished_at: Option, + + pub error: Option, + pub canceled_by: Option, + pub details: Option
, + + pub status: Status, + pub kind: KindWithContent, +} + +impl Task { + pub fn index_uid(&self) -> Option<&str> { + use KindWithContent::*; + + match &self.kind { + DumpCreation { .. } + | SnapshotCreation + | TaskCancelation { .. } + | TaskDeletion { .. } + | IndexSwap { .. } => None, + DocumentAdditionOrUpdate { index_uid, .. } + | DocumentDeletion { index_uid, .. } + | DocumentClear { index_uid } + | SettingsUpdate { index_uid, .. } + | IndexCreation { index_uid, .. } + | IndexUpdate { index_uid, .. } + | IndexDeletion { index_uid } => Some(index_uid), + } + } + + /// Return the list of indexes updated by this tasks. + pub fn indexes(&self) -> Vec<&str> { + self.kind.indexes() + } + + /// Return the content-uuid if there is one + pub fn content_uuid(&self) -> Option { + match self.kind { + KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => Some(content_file), + KindWithContent::DocumentDeletion { .. } + | KindWithContent::DocumentClear { .. } + | KindWithContent::SettingsUpdate { .. } + | KindWithContent::IndexDeletion { .. } + | KindWithContent::IndexCreation { .. } + | KindWithContent::IndexUpdate { .. } + | KindWithContent::IndexSwap { .. } + | KindWithContent::TaskCancelation { .. } + | KindWithContent::TaskDeletion { .. } + | KindWithContent::DumpCreation { .. } + | KindWithContent::SnapshotCreation => None, + } + } +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub enum KindWithContent { + DocumentAdditionOrUpdate { + index_uid: String, + primary_key: Option, + method: IndexDocumentsMethod, + content_file: Uuid, + documents_count: u64, + allow_index_creation: bool, + }, + DocumentDeletion { + index_uid: String, + documents_ids: Vec, + }, + DocumentClear { + index_uid: String, + }, + SettingsUpdate { + index_uid: String, + new_settings: Box>, + is_deletion: bool, + allow_index_creation: bool, + }, + IndexDeletion { + index_uid: String, + }, + IndexCreation { + index_uid: String, + primary_key: Option, + }, + IndexUpdate { + index_uid: String, + primary_key: Option, + }, + IndexSwap { + swaps: Vec, + }, + TaskCancelation { + query: String, + tasks: RoaringBitmap, + }, + TaskDeletion { + query: String, + tasks: RoaringBitmap, + }, + DumpCreation { + dump_uid: String, + keys: Vec, + instance_uid: Option, + }, + SnapshotCreation, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct IndexSwap { + pub indexes: (String, String), +} + +impl KindWithContent { + pub fn as_kind(&self) -> Kind { + match self { + KindWithContent::DocumentAdditionOrUpdate { .. } => Kind::DocumentAdditionOrUpdate, + KindWithContent::DocumentDeletion { .. } => Kind::DocumentDeletion, + KindWithContent::DocumentClear { .. } => Kind::DocumentDeletion, + KindWithContent::SettingsUpdate { .. } => Kind::SettingsUpdate, + KindWithContent::IndexCreation { .. } => Kind::IndexCreation, + KindWithContent::IndexDeletion { .. } => Kind::IndexDeletion, + KindWithContent::IndexUpdate { .. } => Kind::IndexUpdate, + KindWithContent::IndexSwap { .. } => Kind::IndexSwap, + KindWithContent::TaskCancelation { .. } => Kind::TaskCancelation, + KindWithContent::TaskDeletion { .. } => Kind::TaskDeletion, + KindWithContent::DumpCreation { .. } => Kind::DumpCreation, + KindWithContent::SnapshotCreation => Kind::SnapshotCreation, + } + } + + pub fn indexes(&self) -> Vec<&str> { + use KindWithContent::*; + + match self { + DumpCreation { .. } + | SnapshotCreation + | TaskCancelation { .. } + | TaskDeletion { .. } => vec![], + DocumentAdditionOrUpdate { index_uid, .. } + | DocumentDeletion { index_uid, .. } + | DocumentClear { index_uid } + | SettingsUpdate { index_uid, .. } + | IndexCreation { index_uid, .. } + | IndexUpdate { index_uid, .. } + | IndexDeletion { index_uid } => vec![index_uid], + IndexSwap { swaps } => { + let mut indexes = HashSet::<&str>::default(); + for swap in swaps { + indexes.insert(swap.indexes.0.as_str()); + indexes.insert(swap.indexes.1.as_str()); + } + indexes.into_iter().collect() + } + } + } + + /// Returns the default `Details` that correspond to this `KindWithContent`, + /// `None` if it cannot be generated. + pub fn default_details(&self) -> Option
{ + match self { + KindWithContent::DocumentAdditionOrUpdate { documents_count, .. } => { + Some(Details::DocumentAdditionOrUpdate { + received_documents: *documents_count, + indexed_documents: None, + }) + } + KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => { + Some(Details::DocumentDeletion { + matched_documents: documents_ids.len(), + deleted_documents: None, + }) + } + KindWithContent::DocumentClear { .. } => { + Some(Details::ClearAll { deleted_documents: None }) + } + KindWithContent::SettingsUpdate { new_settings, .. } => { + Some(Details::SettingsUpdate { settings: new_settings.clone() }) + } + KindWithContent::IndexDeletion { .. } => None, + KindWithContent::IndexCreation { primary_key, .. } + | KindWithContent::IndexUpdate { primary_key, .. } => { + Some(Details::IndexInfo { primary_key: primary_key.clone() }) + } + KindWithContent::IndexSwap { swaps } => { + Some(Details::IndexSwap { swaps: swaps.clone() }) + } + KindWithContent::TaskCancelation { query, tasks } => Some(Details::TaskCancelation { + matched_tasks: tasks.len(), + canceled_tasks: None, + original_query: query.clone(), + }), + KindWithContent::TaskDeletion { query, tasks } => Some(Details::TaskDeletion { + matched_tasks: tasks.len(), + deleted_tasks: None, + original_query: query.clone(), + }), + KindWithContent::DumpCreation { .. } => None, + KindWithContent::SnapshotCreation => None, + } + } + + pub fn default_finished_details(&self) -> Option
{ + match self { + KindWithContent::DocumentAdditionOrUpdate { documents_count, .. } => { + Some(Details::DocumentAdditionOrUpdate { + received_documents: *documents_count, + indexed_documents: Some(0), + }) + } + KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => { + Some(Details::DocumentDeletion { + matched_documents: documents_ids.len(), + deleted_documents: Some(0), + }) + } + KindWithContent::DocumentClear { .. } => { + Some(Details::ClearAll { deleted_documents: None }) + } + KindWithContent::SettingsUpdate { new_settings, .. } => { + Some(Details::SettingsUpdate { settings: new_settings.clone() }) + } + KindWithContent::IndexDeletion { .. } => None, + KindWithContent::IndexCreation { primary_key, .. } + | KindWithContent::IndexUpdate { primary_key, .. } => { + Some(Details::IndexInfo { primary_key: primary_key.clone() }) + } + KindWithContent::IndexSwap { .. } => { + todo!() + } + KindWithContent::TaskCancelation { query, tasks } => Some(Details::TaskCancelation { + matched_tasks: tasks.len(), + canceled_tasks: Some(0), + original_query: query.clone(), + }), + KindWithContent::TaskDeletion { query, tasks } => Some(Details::TaskDeletion { + matched_tasks: tasks.len(), + deleted_tasks: Some(0), + original_query: query.clone(), + }), + KindWithContent::DumpCreation { .. } => None, + KindWithContent::SnapshotCreation => None, + } + } +} + +impl From<&KindWithContent> for Option
{ + fn from(kind: &KindWithContent) -> Self { + match kind { + KindWithContent::DocumentAdditionOrUpdate { documents_count, .. } => { + Some(Details::DocumentAdditionOrUpdate { + received_documents: *documents_count, + indexed_documents: None, + }) + } + KindWithContent::DocumentDeletion { .. } => None, + KindWithContent::DocumentClear { .. } => None, + KindWithContent::SettingsUpdate { new_settings, .. } => { + Some(Details::SettingsUpdate { settings: new_settings.clone() }) + } + KindWithContent::IndexDeletion { .. } => None, + KindWithContent::IndexCreation { primary_key, .. } => { + Some(Details::IndexInfo { primary_key: primary_key.clone() }) + } + KindWithContent::IndexUpdate { primary_key, .. } => { + Some(Details::IndexInfo { primary_key: primary_key.clone() }) + } + KindWithContent::IndexSwap { .. } => None, + KindWithContent::TaskCancelation { query, tasks } => Some(Details::TaskCancelation { + matched_tasks: tasks.len(), + canceled_tasks: None, + original_query: query.clone(), + }), + KindWithContent::TaskDeletion { query, tasks } => Some(Details::TaskDeletion { + matched_tasks: tasks.len(), + deleted_tasks: None, + original_query: query.clone(), + }), + KindWithContent::DumpCreation { dump_uid, .. } => { + Some(Details::Dump { dump_uid: dump_uid.clone() }) + } + KindWithContent::SnapshotCreation => None, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Sequence)] +#[serde(rename_all = "camelCase")] +pub enum Status { + Enqueued, + Processing, + Succeeded, + Failed, + Canceled, +} + +impl Display for Status { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Status::Enqueued => write!(f, "enqueued"), + Status::Processing => write!(f, "processing"), + Status::Succeeded => write!(f, "succeeded"), + Status::Failed => write!(f, "failed"), + Status::Canceled => write!(f, "canceled"), + } + } +} + +impl FromStr for Status { + type Err = ResponseError; + + fn from_str(status: &str) -> Result { + if status.eq_ignore_ascii_case("enqueued") { + Ok(Status::Enqueued) + } else if status.eq_ignore_ascii_case("processing") { + Ok(Status::Processing) + } else if status.eq_ignore_ascii_case("succeeded") { + Ok(Status::Succeeded) + } else if status.eq_ignore_ascii_case("failed") { + Ok(Status::Failed) + } else if status.eq_ignore_ascii_case("canceled") { + Ok(Status::Canceled) + } else { + Err(ResponseError::from_msg( + format!( + "`{}` is not a status. Available status are {}.", + status, + enum_iterator::all::() + .map(|s| format!("`{s}`")) + .collect::>() + .join(", ") + ), + Code::BadRequest, + )) + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Sequence)] +#[serde(rename_all = "camelCase")] +pub enum Kind { + DocumentAdditionOrUpdate, + DocumentDeletion, + SettingsUpdate, + IndexCreation, + IndexDeletion, + IndexUpdate, + IndexSwap, + TaskCancelation, + TaskDeletion, + DumpCreation, + SnapshotCreation, +} + +impl FromStr for Kind { + type Err = ResponseError; + + fn from_str(kind: &str) -> Result { + if kind.eq_ignore_ascii_case("indexCreation") { + Ok(Kind::IndexCreation) + } else if kind.eq_ignore_ascii_case("indexUpdate") { + Ok(Kind::IndexUpdate) + } else if kind.eq_ignore_ascii_case("indexSwap") { + Ok(Kind::IndexSwap) + } else if kind.eq_ignore_ascii_case("indexDeletion") { + Ok(Kind::IndexDeletion) + } else if kind.eq_ignore_ascii_case("documentAdditionOrUpdate") { + Ok(Kind::DocumentAdditionOrUpdate) + } else if kind.eq_ignore_ascii_case("documentDeletion") { + Ok(Kind::DocumentDeletion) + } else if kind.eq_ignore_ascii_case("settingsUpdate") { + Ok(Kind::SettingsUpdate) + } else if kind.eq_ignore_ascii_case("taskCancelation") { + Ok(Kind::TaskCancelation) + } else if kind.eq_ignore_ascii_case("taskDeletion") { + Ok(Kind::TaskDeletion) + } else if kind.eq_ignore_ascii_case("dumpCreation") { + Ok(Kind::DumpCreation) + } else if kind.eq_ignore_ascii_case("snapshotCreation") { + Ok(Kind::SnapshotCreation) + } else { + Err(ResponseError::from_msg( + format!( + "`{}` is not a type. Available types are {}.", + kind, + enum_iterator::all::() + .map(|k| format!( + "`{}`", + // by default serde is going to insert `"` around the value. + serde_json::to_string(&k).unwrap().trim_matches('"') + )) + .collect::>() + .join(", ") + ), + Code::BadRequest, + )) + } + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)] +pub enum Details { + DocumentAdditionOrUpdate { received_documents: u64, indexed_documents: Option }, + SettingsUpdate { settings: Box> }, + IndexInfo { primary_key: Option }, + DocumentDeletion { matched_documents: usize, deleted_documents: Option }, + ClearAll { deleted_documents: Option }, + TaskCancelation { matched_tasks: u64, canceled_tasks: Option, original_query: String }, + TaskDeletion { matched_tasks: u64, deleted_tasks: Option, original_query: String }, + Dump { dump_uid: String }, + IndexSwap { swaps: Vec }, +} + +/// Serialize a `time::Duration` as a best effort ISO 8601 while waiting for +/// https://github.com/time-rs/time/issues/378. +/// This code is a port of the old code of time that was removed in 0.2. +pub fn serialize_duration( + duration: &Option, + serializer: S, +) -> Result { + match duration { + Some(duration) => { + // technically speaking, negative duration is not valid ISO 8601 + if duration.is_negative() { + return serializer.serialize_none(); + } + + const SECS_PER_DAY: i64 = Duration::DAY.whole_seconds(); + let secs = duration.whole_seconds(); + let days = secs / SECS_PER_DAY; + let secs = secs - days * SECS_PER_DAY; + let hasdate = days != 0; + let nanos = duration.subsec_nanoseconds(); + let hastime = (secs != 0 || nanos != 0) || !hasdate; + + // all the following unwrap can't fail + let mut res = String::new(); + write!(&mut res, "P").unwrap(); + + if hasdate { + write!(&mut res, "{}D", days).unwrap(); + } + + const NANOS_PER_MILLI: i32 = Duration::MILLISECOND.subsec_nanoseconds(); + const NANOS_PER_MICRO: i32 = Duration::MICROSECOND.subsec_nanoseconds(); + + if hastime { + if nanos == 0 { + write!(&mut res, "T{}S", secs).unwrap(); + } else if nanos % NANOS_PER_MILLI == 0 { + write!(&mut res, "T{}.{:03}S", secs, nanos / NANOS_PER_MILLI).unwrap(); + } else if nanos % NANOS_PER_MICRO == 0 { + write!(&mut res, "T{}.{:06}S", secs, nanos / NANOS_PER_MICRO).unwrap(); + } else { + write!(&mut res, "T{}.{:09}S", secs, nanos).unwrap(); + } + } + + serializer.serialize_str(&res) + } + None => serializer.serialize_none(), + } +} + +#[cfg(test)] +mod tests { + use super::Details; + use crate::heed::types::SerdeJson; + use crate::heed::{BytesDecode, BytesEncode}; + + #[test] + fn bad_deser() { + let details = Details::TaskDeletion { + matched_tasks: 1, + deleted_tasks: None, + original_query: "hello".to_owned(), + }; + let serialised = SerdeJson::
::bytes_encode(&details).unwrap(); + let deserialised = SerdeJson::
::bytes_decode(&serialised).unwrap(); + meili_snap::snapshot!(format!("{:?}", details), @r###"TaskDeletion { matched_tasks: 1, deleted_tasks: None, original_query: "hello" }"###); + meili_snap::snapshot!(format!("{:?}", deserialised), @r###"TaskDeletion { matched_tasks: 1, deleted_tasks: None, original_query: "hello" }"###); + } +} diff --git a/meilisearch-types/src/versioning.rs b/meilisearch-types/src/versioning.rs new file mode 100644 index 000000000..bf1efe1ad --- /dev/null +++ b/meilisearch-types/src/versioning.rs @@ -0,0 +1,61 @@ +use std::fs; +use std::io::{self, ErrorKind}; +use std::path::Path; + +/// The name of the file that contains the version of the database. +pub const VERSION_FILE_NAME: &str = "VERSION"; + +static VERSION_MAJOR: &str = env!("CARGO_PKG_VERSION_MAJOR"); +static VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR"); +static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH"); + +/// Persists the version of the current Meilisearch binary to a VERSION file +pub fn create_version_file(db_path: &Path) -> io::Result<()> { + let version_path = db_path.join(VERSION_FILE_NAME); + fs::write(version_path, format!("{}.{}.{}", VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)) +} + +/// Ensures Meilisearch version is compatible with the database, returns an error versions mismatch. +pub fn check_version_file(db_path: &Path) -> anyhow::Result<()> { + let version_path = db_path.join(VERSION_FILE_NAME); + + match fs::read_to_string(&version_path) { + Ok(version) => { + let version_components = version.split('.').collect::>(); + let (major, minor, patch) = match &version_components[..] { + [major, minor, patch] => (major.to_string(), minor.to_string(), patch.to_string()), + _ => return Err(VersionFileError::MalformedVersionFile.into()), + }; + + if major != VERSION_MAJOR || minor != VERSION_MINOR { + return Err(VersionFileError::VersionMismatch { major, minor, patch }.into()); + } + } + Err(error) => { + return match error.kind() { + ErrorKind::NotFound => Err(VersionFileError::MissingVersionFile.into()), + _ => Err(error.into()), + } + } + } + + Ok(()) +} + +#[derive(thiserror::Error, Debug)] +pub enum VersionFileError { + #[error( + "Meilisearch (v{}) failed to infer the version of the database. + To update Meilisearch please follow our guide on https://docs.meilisearch.com/learn/advanced/updating.html.", + env!("CARGO_PKG_VERSION").to_string() + )] + MissingVersionFile, + #[error("Version file is corrupted and thus Meilisearch is unable to determine the version of the database.")] + MalformedVersionFile, + #[error( + "Expected Meilisearch engine version: {major}.{minor}.{patch}, current engine version: {}. + To update Meilisearch please follow our guide on https://docs.meilisearch.com/learn/advanced/updating.html.", + env!("CARGO_PKG_VERSION").to_string() + )] + VersionMismatch { major: String, minor: String, patch: String }, +} diff --git a/permissive-json-pointer/src/lib.rs b/permissive-json-pointer/src/lib.rs index 52f181980..039bd3320 100644 --- a/permissive-json-pointer/src/lib.rs +++ b/permissive-json-pointer/src/lib.rs @@ -25,11 +25,7 @@ const SPLIT_SYMBOL: char = '.'; /// ``` fn contained_in(selector: &str, key: &str) -> bool { selector.starts_with(key) - && selector[key.len()..] - .chars() - .next() - .map(|c| c == SPLIT_SYMBOL) - .unwrap_or(true) + && selector[key.len()..].chars().next().map(|c| c == SPLIT_SYMBOL).unwrap_or(true) } /// Map the selected leaf values of a json allowing you to update only the fields that were selected. @@ -244,10 +240,7 @@ mod tests { fn test_contained_in() { assert!(contained_in("animaux", "animaux")); assert!(contained_in("animaux.chien", "animaux")); - assert!(contained_in( - "animaux.chien.race.bouvier bernois.fourrure.couleur", - "animaux" - )); + assert!(contained_in("animaux.chien.race.bouvier bernois.fourrure.couleur", "animaux")); assert!(contained_in( "animaux.chien.race.bouvier bernois.fourrure.couleur", "animaux.chien" @@ -726,14 +719,12 @@ mod tests { } }); - map_leaf_values( - value.as_object_mut().unwrap(), - ["jean.race.name"], - |key, value| match (value, key) { + map_leaf_values(value.as_object_mut().unwrap(), ["jean.race.name"], |key, value| { + match (value, key) { (Value::String(name), "jean.race.name") => *name = S("patou"), _ => unreachable!(), - }, - ); + } + }); assert_eq!( value,