Merge branch 'main' into merge-release-v1.8.1-in-main

This commit is contained in:
Many the fish
2024-05-29 11:31:03 +02:00
committed by GitHub
105 changed files with 5863 additions and 1031 deletions

View File

@@ -43,4 +43,11 @@ jobs:
- name: Run benchmarks on PR ${{ github.event.issue.id }}
run: |
cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "[Comment](${{ github.event.comment.html_url }}) on [#${{ github.event.issue.number }}](${{ github.event.issue.html_url }})" -- ${{ steps.command.outputs.command-arguments }}
cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" \
--dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" \
--reason "[Comment](${{ github.event.comment.html_url }}) on [#${{ github.event.issue.number }}](${{ github.event.issue.html_url }})" \
-- ${{ steps.command.outputs.command-arguments }} > benchlinks.txt
- name: Send comment in PR
run: |
gh pr comment ${{github.event.issue.number}} --body-file benchlinks.txt

View File

@@ -187,8 +187,8 @@ They are JSON files with the following structure (comments are not actually supp
},
// Core of the workload.
// A list of commands to run sequentially.
// A command is a request to the Meilisearch instance that is executed while the profiling runs.
"commands": [
// Optional: A precommand is a request to the Meilisearch instance that is executed before the profiling runs.
"precommands": [
{
// Meilisearch route to call. `http://localhost:7700/` will be prepended.
"route": "indexes/movies/settings",
@@ -224,8 +224,11 @@ They are JSON files with the following structure (comments are not actually supp
// - DontWait: run the next command without waiting the response to this one.
// - WaitForResponse: run the next command as soon as the response from the server is received.
// - WaitForTask: run the next command once **all** the Meilisearch tasks created up to now have finished processing.
"synchronous": "DontWait"
},
"synchronous": "WaitForTask"
}
],
// A command is a request to the Meilisearch instance that is executed while the profiling runs.
"commands": [
{
"route": "indexes/movies/documents",
"method": "POST",

240
Cargo.lock generated
View File

@@ -80,7 +80,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e01ed3140b2f8d422c68afa1ed2e85d996ea619c988ac834d255db32138655cb"
dependencies = [
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -378,9 +378,9 @@ dependencies = [
[[package]]
name = "arroy"
version = "0.2.0"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efddeb1e7c32a551cc07ef4c3e181e3cd5478fdaf4f0bd799983171c1f6efe57"
checksum = "73897699bf04bac935c0b120990d2a511e91e563e0f9769f9c8bb983d98dfbc9"
dependencies = [
"bytemuck",
"byteorder",
@@ -424,7 +424,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -435,7 +435,7 @@ checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -486,6 +486,12 @@ version = "0.21.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
[[package]]
name = "base64"
version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
[[package]]
name = "base64ct"
version = "1.6.0"
@@ -552,7 +558,7 @@ dependencies = [
"regex",
"rustc-hash",
"shlex",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -685,7 +691,7 @@ checksum = "4da9a32f3fed317401fa3c862968128267c3106685286e15d5aaa3d7389c2f60"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -845,9 +851,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "cc"
version = "1.0.90"
version = "1.0.94"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5"
checksum = "17f6e324229dc011159fcc089755d1e2e216a90d43a7dea6853ca740b84f35e7"
dependencies = [
"jobserver",
"libc",
@@ -992,7 +998,7 @@ dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -1304,7 +1310,7 @@ dependencies = [
"proc-macro2",
"quote",
"strsim",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -1326,7 +1332,7 @@ checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5"
dependencies = [
"darling_core 0.20.3",
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -1356,7 +1362,7 @@ checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -1460,7 +1466,7 @@ dependencies = [
"convert_case 0.6.0",
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -1530,9 +1536,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
[[package]]
name = "doxygen-rs"
version = "0.2.2"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bff670ea0c9bbb8414e7efa6e23ebde2b8f520a7eef78273a3918cf1903e7505"
checksum = "415b6ec780d34dcf624666747194393603d0373b7141eef01d12ee58881507d9"
dependencies = [
"phf",
]
@@ -1678,7 +1684,7 @@ dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -1698,7 +1704,7 @@ checksum = "03cdc46ec28bd728e67540c528013c6a10eb69a02eb31078a1bda695438cbfb8"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -1781,7 +1787,7 @@ dependencies = [
"darling 0.20.3",
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
"uuid",
]
@@ -1913,7 +1919,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -2117,9 +2123,9 @@ checksum = "36d244a08113319b5ebcabad2b8b7925732d15eec46d7e7ac3c11734f3b7a6ad"
[[package]]
name = "getrandom"
version = "0.2.12"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5"
checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c"
dependencies = [
"cfg-if",
"js-sys",
@@ -2256,12 +2262,11 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "heed"
version = "0.20.0-alpha.9"
version = "0.20.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9648a50991c86df7d00c56c268c27754fcf4c80be2ba57fc4a00dc928c6fe934"
checksum = "6f7acb9683d7c7068aa46d47557bfa4e35a277964b350d9504a87b03610163fd"
dependencies = [
"bitflags 2.5.0",
"bytemuck",
"byteorder",
"heed-traits",
"heed-types",
@@ -2275,15 +2280,15 @@ dependencies = [
[[package]]
name = "heed-traits"
version = "0.20.0-alpha.9"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ab0b7d9cde969ad36dde692e487dc89d97f7168bf6a7bd3b894ad4bf7278298"
checksum = "eb3130048d404c57ce5a1ac61a903696e8fcde7e8c2991e9fcfc1f27c3ef74ff"
[[package]]
name = "heed-types"
version = "0.20.0-alpha.9"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0cb3567a7363f28b597bf6e9897b9466397951dd0e52df2c8196dd8a71af44a"
checksum = "3cb0d6ba3700c9a57e83c013693e3eddb68a6d9b6781cacafc62a0d992e8ddb3"
dependencies = [
"bincode",
"byteorder",
@@ -2413,7 +2418,7 @@ dependencies = [
"futures-util",
"http 0.2.11",
"hyper",
"rustls 0.21.10",
"rustls 0.21.12",
"tokio",
"tokio-rustls",
]
@@ -2459,7 +2464,6 @@ dependencies = [
"meilisearch-auth",
"meilisearch-types",
"page_size 0.5.0",
"puffin",
"rayon",
"roaring",
"serde",
@@ -3157,7 +3161,7 @@ checksum = "fc2fb41a9bb4257a3803154bdf7e2df7d45197d1941c9b1a90ad815231630721"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -3183,14 +3187,13 @@ checksum = "f9d642685b028806386b2b6e75685faadd3eb65a85fff7df711ce18446a422da"
[[package]]
name = "lmdb-master-sys"
version = "0.1.0"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "629c123f5321b48fa4f8f4d3b868165b748d9ba79c7103fb58e3a94f736bcedd"
checksum = "dc9048db3a58c0732d7236abc4909058f9d2708cfb6d7d047eb895fddec6419a"
dependencies = [
"cc",
"doxygen-rs",
"libc",
"pkg-config",
]
[[package]]
@@ -3213,9 +3216,9 @@ checksum = "e34f76eb3611940e0e7d53a9aaa4e6a3151f69541a282fd0dad5571420c53ff1"
[[package]]
name = "lock_api"
version = "0.4.10"
version = "0.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16"
checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45"
dependencies = [
"autocfg",
"scopeguard",
@@ -3227,12 +3230,6 @@ version = "0.4.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
[[package]]
name = "lz4_flex"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b8c72594ac26bfd34f2d99dfced2edfaddfe8a476e3ff2ca0eb293d925c4f83"
[[package]]
name = "macro_rules_attribute"
version = "0.2.0"
@@ -3258,7 +3255,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -3337,12 +3334,11 @@ dependencies = [
"pin-project-lite",
"platform-dirs",
"prometheus",
"puffin",
"rand",
"rayon",
"regex",
"reqwest",
"rustls 0.21.10",
"rustls 0.21.12",
"rustls-pemfile",
"segment",
"serde",
@@ -3505,7 +3501,6 @@ dependencies = [
"obkv",
"once_cell",
"ordered-float",
"puffin",
"rand",
"rand_pcg",
"rayon",
@@ -3598,7 +3593,7 @@ checksum = "371717c0a5543d6a800cac822eac735aa7d2d2fbb41002e9856a4089532dbdce"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -3942,7 +3937,7 @@ dependencies = [
"pest_meta",
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -3996,7 +3991,7 @@ dependencies = [
"phf_shared",
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -4025,7 +4020,7 @@ checksum = "266c042b60c9c76b8d53061e52b2e0d1116abc57cefc8c5cd671619a56ac3690"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -4133,9 +4128,9 @@ dependencies = [
[[package]]
name = "proc-macro2"
version = "1.0.79"
version = "1.0.81"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e"
checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba"
dependencies = [
"unicode-ident",
]
@@ -4176,23 +4171,6 @@ version = "2.28.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
[[package]]
name = "puffin"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76425abd4e1a0ad4bd6995dd974b52f414fca9974171df8e3708b3e660d05a21"
dependencies = [
"anyhow",
"bincode",
"byteorder",
"cfg-if",
"instant",
"lz4_flex",
"once_cell",
"parking_lot",
"serde",
]
[[package]]
name = "pulp"
version = "0.18.9"
@@ -4207,9 +4185,9 @@ dependencies = [
[[package]]
name = "quote"
version = "1.0.35"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
dependencies = [
"proc-macro2",
]
@@ -4391,7 +4369,7 @@ dependencies = [
"once_cell",
"percent-encoding",
"pin-project-lite",
"rustls 0.21.10",
"rustls 0.21.12",
"rustls-pemfile",
"serde",
"serde_json",
@@ -4505,9 +4483,9 @@ dependencies = [
[[package]]
name = "rustls"
version = "0.21.10"
version = "0.21.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba"
checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e"
dependencies = [
"log",
"ring",
@@ -4517,9 +4495,9 @@ dependencies = [
[[package]]
name = "rustls"
version = "0.22.2"
version = "0.22.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e87c9956bd9807afa1f77e0f7594af32566e830e088a5576d27c5b6f30f49d41"
checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432"
dependencies = [
"log",
"ring",
@@ -4540,9 +4518,9 @@ dependencies = [
[[package]]
name = "rustls-pki-types"
version = "1.3.1"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ede67b28608b4c60685c7d54122d4400d90f62b40caee7700e700380a390fa8"
checksum = "ecd36cc4259e3e4514335c4a138c6b43171a8d61d8f5c9348f9fc7529416f247"
[[package]]
name = "rustls-webpki"
@@ -4643,9 +4621,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
[[package]]
name = "serde"
version = "1.0.197"
version = "1.0.198"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2"
checksum = "9846a40c979031340571da2545a4e5b7c4163bdae79b301d5f86d03979451fcc"
dependencies = [
"serde_derive",
]
@@ -4661,20 +4639,20 @@ dependencies = [
[[package]]
name = "serde_derive"
version = "1.0.197"
version = "1.0.198"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"
checksum = "e88edab869b01783ba905e7d0153f9fc1a6505a96e4ad3018011eedb838566d9"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
name = "serde_json"
version = "1.0.115"
version = "1.0.116"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd"
checksum = "3e17db7126d17feb94eb3fad46bf1a96b034e8aacbc2e775fe81505f8b0b2813"
dependencies = [
"indexmap",
"itoa",
@@ -4941,7 +4919,7 @@ dependencies = [
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -4963,9 +4941,9 @@ dependencies = [
[[package]]
name = "syn"
version = "2.0.58"
version = "2.0.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687"
checksum = "909518bc7b1c9b779f1bbf07f2929d35af9f0f37e47c6e9ef7f9dddc1e1821f3"
dependencies = [
"proc-macro2",
"quote",
@@ -4989,7 +4967,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -5100,7 +5078,7 @@ checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -5243,7 +5221,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -5252,7 +5230,7 @@ version = "0.24.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
dependencies = [
"rustls 0.21.10",
"rustls 0.21.12",
"tokio",
]
@@ -5354,7 +5332,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -5469,9 +5447,9 @@ dependencies = [
[[package]]
name = "unicode-bidi"
version = "0.3.13"
version = "0.3.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460"
checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75"
[[package]]
name = "unicode-blocks"
@@ -5529,15 +5507,15 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
[[package]]
name = "ureq"
version = "2.9.6"
version = "2.9.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11f214ce18d8b2cbe84ed3aa6486ed3f5b285cf8d8fbdbce9f3f767a724adc35"
checksum = "d11a831e3c0b56e438a28308e7c810799e3c118417f342d30ecec080105395cd"
dependencies = [
"base64 0.21.7",
"base64 0.22.1",
"flate2",
"log",
"once_cell",
"rustls 0.22.2",
"rustls 0.22.4",
"rustls-pki-types",
"rustls-webpki 0.102.2",
"serde",
@@ -5703,7 +5681,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
"wasm-bindgen-shared",
]
@@ -5737,7 +5715,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@@ -5834,7 +5812,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be"
dependencies = [
"windows-core",
"windows-targets 0.52.0",
"windows-targets 0.52.4",
]
[[package]]
@@ -5843,7 +5821,7 @@ version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
dependencies = [
"windows-targets 0.52.0",
"windows-targets 0.52.4",
]
[[package]]
@@ -5870,7 +5848,7 @@ version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets 0.52.0",
"windows-targets 0.52.4",
]
[[package]]
@@ -5905,17 +5883,17 @@ dependencies = [
[[package]]
name = "windows-targets"
version = "0.52.0"
version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd"
checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b"
dependencies = [
"windows_aarch64_gnullvm 0.52.0",
"windows_aarch64_msvc 0.52.0",
"windows_i686_gnu 0.52.0",
"windows_i686_msvc 0.52.0",
"windows_x86_64_gnu 0.52.0",
"windows_x86_64_gnullvm 0.52.0",
"windows_x86_64_msvc 0.52.0",
"windows_aarch64_gnullvm 0.52.4",
"windows_aarch64_msvc 0.52.4",
"windows_i686_gnu 0.52.4",
"windows_i686_msvc 0.52.4",
"windows_x86_64_gnu 0.52.4",
"windows_x86_64_gnullvm 0.52.4",
"windows_x86_64_msvc 0.52.4",
]
[[package]]
@@ -5932,9 +5910,9 @@ checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.0"
version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea"
checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9"
[[package]]
name = "windows_aarch64_msvc"
@@ -5950,9 +5928,9 @@ checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.0"
version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef"
checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675"
[[package]]
name = "windows_i686_gnu"
@@ -5968,9 +5946,9 @@ checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"
[[package]]
name = "windows_i686_gnu"
version = "0.52.0"
version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313"
checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3"
[[package]]
name = "windows_i686_msvc"
@@ -5986,9 +5964,9 @@ checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"
[[package]]
name = "windows_i686_msvc"
version = "0.52.0"
version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a"
checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02"
[[package]]
name = "windows_x86_64_gnu"
@@ -6004,9 +5982,9 @@ checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.0"
version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd"
checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03"
[[package]]
name = "windows_x86_64_gnullvm"
@@ -6022,9 +6000,9 @@ checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.0"
version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e"
checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177"
[[package]]
name = "windows_x86_64_msvc"
@@ -6040,9 +6018,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.0"
version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
[[package]]
name = "winnow"
@@ -6140,7 +6118,7 @@ checksum = "9e6936f0cce458098a201c245a11bef556c6a0181129c7034d10d76d1ec3a2b8"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
"synstructure",
]
@@ -6161,7 +6139,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
]
[[package]]
@@ -6181,7 +6159,7 @@ checksum = "e6a647510471d372f2e6c2e6b7219e44d8c574d24fdc11c610a61455782f18c3"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.58",
"syn 2.0.60",
"synstructure",
]

View File

@@ -197,6 +197,140 @@ pub(crate) mod test {
use super::*;
use crate::reader::v6::RuntimeTogglableFeatures;
#[test]
fn import_dump_v6_with_vectors() {
// dump containing two indexes
//
// "vector", configured with an embedder
// contains:
// - one document with an overriden vector,
// - one document with a natural vector
// - one document with a _vectors map containing one additional embedder name and a natural vector
// - one document with a _vectors map containing one additional embedder name and an overriden vector
//
// "novector", no embedder
// contains:
// - a document without vector
// - a document with a random _vectors field
let dump = File::open("tests/assets/v6-with-vectors.dump").unwrap();
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"278f63325ef06ca04d01df98d8207b94");
assert_eq!(update_files.len(), 10);
assert!(update_files[0].is_none()); // the dump creation
assert!(update_files[1].is_none());
assert!(update_files[2].is_none());
assert!(update_files[3].is_none());
assert!(update_files[4].is_none());
assert!(update_files[5].is_none());
assert!(update_files[6].is_none());
assert!(update_files[7].is_none());
assert!(update_files[8].is_none());
assert!(update_files[9].is_none());
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut vector_index = indexes.pop().unwrap();
let mut novector_index = indexes.pop().unwrap();
assert!(indexes.is_empty());
// vector
insta::assert_json_snapshot!(vector_index.metadata(), @r###"
{
"uid": "vector",
"primaryKey": "id",
"createdAt": "2024-05-16T15:33:17.240962Z",
"updatedAt": "2024-05-16T15:40:55.723052Z"
}
"###);
{
let documents: Result<Vec<_>> = vector_index.documents().unwrap().collect();
let mut documents = documents.unwrap();
assert_eq!(documents.len(), 4);
documents.sort_by_key(|doc| doc.get("id").unwrap().to_string());
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document);
}
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document);
}
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document);
}
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document);
}
}
// novector
insta::assert_json_snapshot!(novector_index.metadata(), @r###"
{
"uid": "novector",
"primaryKey": "id",
"createdAt": "2024-05-16T15:33:03.568055Z",
"updatedAt": "2024-05-16T15:33:07.530217Z"
}
"###);
insta::assert_json_snapshot!(novector_index.settings().unwrap().embedders, @"null");
{
let documents: Result<Vec<_>> = novector_index.documents().unwrap().collect();
let mut documents = documents.unwrap();
assert_eq!(documents.len(), 2);
documents.sort_by_key(|doc| doc.get("id").unwrap().to_string());
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document, @r###"
{
"id": "e1",
"other": "random1",
"_vectors": "toto"
}
"###);
}
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document, @r###"
{
"id": "e0",
"other": "random0"
}
"###);
}
}
assert_eq!(
dump.features().unwrap().unwrap(),
RuntimeTogglableFeatures { vector_store: true, ..Default::default() }
);
}
#[test]
fn import_dump_v6_experimental() {
let dump = File::open("tests/assets/v6-with-experimental.dump").unwrap();

View File

@@ -0,0 +1,783 @@
---
source: dump/src/reader/mod.rs
expression: document
---
{
"id": "e3",
"desc": "overriden vector + map",
"_vectors": {
"default": [
0.2,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1
],
"toto": [
0.1
]
}
}

View File

@@ -0,0 +1,786 @@
---
source: dump/src/reader/mod.rs
expression: document
---
{
"id": "e2",
"desc": "natural vector + map",
"_vectors": {
"toto": [],
"default": {
"embeddings": [
[
-0.05189208313822746,
-0.9273212552070618,
0.1443813145160675,
0.0932632014155388,
0.2665371894836426,
0.36266782879829407,
0.6402910947799683,
0.32014018297195435,
0.030915971845388412,
-0.9312191605567932,
-0.3718109726905823,
-0.2700554132461548,
-1.1014580726623535,
0.9154956936836244,
-0.3406888246536255,
1.0077725648880005,
0.6577560901641846,
-0.3955195546150207,
-0.4148270785808563,
0.1855088472366333,
0.5062315464019775,
-0.3632686734199524,
-0.2277890294790268,
0.2560805082321167,
-0.3853609561920166,
-0.1604762226343155,
-0.13947471976280212,
-0.20147813856601715,
-0.4466346800327301,
-0.3761846721172333,
0.1443382054567337,
0.18205296993255615,
0.49359792470932007,
-0.22538000345230105,
-0.4996317625045776,
-0.22734887897968292,
-0.6034309267997742,
-0.7857939600944519,
-0.34923747181892395,
-0.3466345965862274,
0.21176661550998688,
-0.5101462006568909,
-0.3403083384037018,
0.000315118464641273,
0.236465722322464,
-0.10246097296476364,
-1.3013339042663574,
0.3419138789176941,
-0.32963496446609497,
-0.0901619717478752,
-0.5426247119903564,
0.22656650841236117,
-0.44758284091949463,
0.14151698350906372,
-0.1089438870549202,
0.5500766634941101,
-0.670711100101471,
-0.6227269768714905,
0.3894464075565338,
-0.27609574794769287,
0.7028202414512634,
-0.19697771966457367,
0.328511506319046,
0.5063360929489136,
0.4065195322036743,
0.2614171802997589,
-0.30274391174316406,
1.0393824577331543,
-0.7742937207221985,
-0.7874112129211426,
-0.6749666929244995,
0.5190866589546204,
0.004123548045754433,
-0.28312963247299194,
-0.038731709122657776,
-1.0142987966537476,
-0.09519586712121964,
0.8755272626876831,
0.4876938760280609,
0.7811151742935181,
0.85174959897995,
0.11826585978269576,
0.5373436808586121,
0.3649002015590668,
0.19064077734947205,
-0.00287026260048151,
-0.7305403351783752,
-0.015206154435873032,
-0.7899249196052551,
0.19407285749912265,
0.08596625179052353,
-0.28976231813430786,
-0.1525907665491104,
0.3798313438892365,
0.050306469202041626,
-0.5697937607765198,
0.4219021201133728,
0.276252806186676,
0.1559903472661972,
0.10030482709407806,
-0.4043720066547394,
-0.1969818025827408,
0.5739826560020447,
0.2116064727306366,
-1.4620544910430908,
-0.7802462577819824,
-0.24739810824394223,
-0.09791352599859238,
-0.4413802027702331,
0.21549351513385773,
-0.9520436525344848,
-0.08762510865926743,
0.08154498040676117,
-0.6154940724372864,
-1.01079523563385,
0.885427713394165,
0.6967288851737976,
0.27186504006385803,
-0.43194177746772766,
-0.11248451471328735,
0.7576630711555481,
0.4998855590820313,
0.0264343973249197,
0.9872855544090272,
0.5634694695472717,
0.053698331117630005,
0.19410227239131927,
0.3570743501186371,
-0.23670297861099243,
-0.9114483594894408,
0.07884842902421951,
0.7318344116210938,
0.44630110263824463,
0.08745364099740982,
-0.347101628780365,
-0.4314247667789459,
-0.5060274004936218,
0.003706763498485088,
0.44320008158683777,
-0.00788921769708395,
-0.1368623524904251,
-0.17391923069953918,
0.14473655819892883,
0.10927865654230118,
0.6974599361419678,
0.005052129738032818,
-0.016953065991401672,
-0.1256176233291626,
-0.036742497235536575,
0.5591985583305359,
-0.37619709968566895,
0.22429119050502777,
0.5403043031692505,
-0.8603790998458862,
-0.3456307053565979,
0.9292937517166138,
0.5074859261512756,
0.6310645937919617,
-0.3091641068458557,
0.46902573108673096,
0.7891915440559387,
0.4499550759792328,
0.2744995653629303,
0.2712305784225464,
-0.04349074140191078,
-0.3638863265514374,
0.7839881777763367,
0.7352104783058167,
-0.19457511603832245,
-0.5957832932472229,
-0.43704694509506226,
-1.084769368171692,
0.4904985725879669,
0.5385226011276245,
0.1891629993915558,
0.12338479608297348,
0.8315675258636475,
-0.07830192148685455,
1.0916285514831543,
-0.28066861629486084,
-1.3585069179534912,
0.5203898549079895,
0.08678033947944641,
-0.2566044330596924,
0.09484415501356123,
-0.0180208683013916,
1.0264745950698853,
-0.023572135716676712,
0.5864979028701782,
0.7625196576118469,
-0.2543414533138275,
-0.8877770900726318,
0.7611982822418213,
-0.06220436468720436,
0.937336564064026,
0.2704363465309143,
-0.37733694911003113,
0.5076137781143188,
-0.30641937255859375,
0.6252772808074951,
-0.0823579877614975,
-0.03736555948853493,
0.4131673276424408,
-0.6514252424240112,
0.12918265163898468,
-0.4483584463596344,
0.6750786304473877,
-0.37008383870124817,
-0.02324833907186985,
0.38027650117874146,
-0.26374951004981995,
0.4346931278705597,
0.42882832884788513,
-0.48798441886901855,
1.1882442235946655,
0.5132288336753845,
0.5284568667411804,
-0.03538886830210686,
0.29620853066444397,
-1.0683696269989014,
0.25936177372932434,
0.10404160618782043,
-0.25796034932136536,
0.027896970510482788,
-0.09225251525640488,
1.4811025857925415,
0.641173779964447,
-0.13838383555412292,
-0.3437179923057556,
0.5667019486427307,
-0.5400741696357727,
0.31090837717056274,
0.6470608115196228,
-0.3747067153453827,
-0.7364534735679626,
-0.07431528717279434,
0.5173454880714417,
-0.6578747034072876,
0.7107478976249695,
-0.7918999791145325,
-0.0648345872759819,
0.609937846660614,
-0.7329513430595398,
0.9741371870040894,
0.17912346124649048,
-0.02658769302070141,
0.5162150859832764,
-0.3978803157806397,
-0.7833885550498962,
-0.6497276425361633,
-0.3898126780986786,
-0.0952848568558693,
0.2663288116455078,
-0.1604052186012268,
0.373076468706131,
-0.8357769250869751,
-0.05217683315277099,
-0.2680160701274872,
0.8389158248901367,
0.6833611130714417,
-0.6712407469749451,
0.7406917214393616,
-0.44522786140441895,
-0.34645363688468933,
-0.27384576201438904,
-0.9878405928611756,
-0.8166060447692871,
0.06268279999494553,
0.38567957282066345,
-0.3274703919887543,
0.5296315550804138,
-0.11810623109340668,
0.23029841482639313,
0.08616159111261368,
-0.2195747196674347,
0.09430307894945145,
0.4057176411151886,
0.4892159104347229,
-0.1636916548013687,
-0.6071445345878601,
0.41256585717201233,
0.622254490852356,
-0.41223976016044617,
-0.6686707139015198,
-0.7474371790885925,
-0.8509522080421448,
-0.16754287481307983,
-0.9078601002693176,
-0.29653599858283997,
-0.5020652413368225,
0.4692700505256653,
0.01281109917908907,
-0.16071580350399017,
0.03388889133930206,
-0.020511148497462273,
0.5027827024459839,
-0.20729811489582065,
0.48107290267944336,
0.33669769763946533,
-0.5275911688804626,
0.48271527886390686,
0.2738940715789795,
-0.033152539283037186,
-0.13629786670207977,
-0.05965912342071533,
-0.26200807094573975,
0.04002794995903969,
-0.34095603227615356,
-3.986898899078369,
-0.46819332242012024,
-0.422744482755661,
-0.169097900390625,
0.6008929014205933,
0.058016058057546616,
-0.11401277780532836,
-0.3077819049358368,
-0.09595538675785063,
0.6723822355270386,
0.19367831945419312,
0.28304359316825867,
0.1609862744808197,
0.7567598819732666,
0.6889985799789429,
0.06907720118761063,
-0.04188092052936554,
-0.7434936165809631,
0.13321782648563385,
0.8456063270568848,
-0.10364038497209548,
-0.45084846019744873,
-0.4758241474628449,
0.43882066011428833,
-0.6432598829269409,
0.7217311859130859,
-0.24189773201942444,
0.12737572193145752,
-1.1008601188659668,
-0.3305315673351288,
0.14614742994308472,
-0.7819333076477051,
0.5287120342254639,
-0.055538054555654526,
0.1877404749393463,
-0.6907662153244019,
0.5616975426673889,
-0.4611121714115143,
-0.26109233498573303,
-0.12898315489292145,
-0.3724522292613983,
-0.7191406488418579,
-0.4425233602523804,
-0.644108235836029,
0.8424481153488159,
0.17532426118850708,
-0.5121750235557556,
-0.6467239260673523,
-0.0008507720194756985,
0.7866212129592896,
-0.02644744887948036,
-0.005045140627771616,
0.015782782807946205,
0.16334445774555206,
-0.1913367658853531,
-0.13697923719882965,
-0.6684983372688293,
0.18346354365348816,
-0.341105580329895,
0.5427411198616028,
0.3779832422733307,
-0.6778115034103394,
-0.2931850254535675,
-0.8805161714553833,
-0.4212774932384491,
-0.5368952751159668,
-1.3937891721725464,
-1.225494146347046,
0.4276703894138336,
1.1205668449401855,
-0.6005299687385559,
0.15732505917549133,
-0.3914784789085388,
-1.357046604156494,
-0.4707142114639282,
-0.1497287154197693,
-0.25035548210144043,
-0.34328439831733704,
0.39083412289619446,
0.1623048633337021,
-0.9275814294815063,
-0.6430015563964844,
0.2973862886428833,
0.5580436587333679,
-0.6232585310935974,
-0.6611042022705078,
0.4015969038009643,
-1.0232892036437988,
-0.2585645020008087,
-0.5431421399116516,
0.5021264553070068,
-0.48601630330085754,
-0.010242084041237833,
0.5862035155296326,
0.7316920161247253,
0.4036808013916016,
0.4269520044326782,
-0.705938458442688,
0.7747307419776917,
0.10164368897676468,
0.7887958884239197,
-0.9612497091293336,
0.12755516171455383,
0.06812842190265656,
-0.022603651508688927,
0.14722754061222076,
-0.5588505268096924,
-0.20689940452575684,
0.3557641804218292,
-0.6812759637832642,
0.2860803008079529,
-0.38954633474349976,
0.1759403496980667,
-0.5678874850273132,
-0.1692986786365509,
-0.14578519761562347,
0.5711379051208496,
1.0208125114440918,
0.7759483456611633,
-0.372348427772522,
-0.5460885763168335,
0.7190321683883667,
-0.6914990544319153,
0.13365162909030914,
-0.4854792356491089,
0.4054908752441406,
0.4502798914909363,
-0.3041122555732727,
-0.06726965308189392,
-0.05570871382951737,
-0.0455719493329525,
0.4785125255584717,
0.8867972493171692,
0.4107886850833893,
0.6121342182159424,
-0.20477132499217987,
-0.5598517656326294,
-0.6443566679954529,
-0.5905212759971619,
-0.5571200251579285,
0.17573799192905426,
-0.28621870279312134,
0.1685224026441574,
0.09719007462263109,
-0.04223639518022537,
-0.28623101115226746,
-0.1449810117483139,
-0.3789580464363098,
-0.5227636098861694,
-0.049728814512491226,
0.7849089503288269,
0.16792525351047516,
0.9849340915679932,
-0.6559549570083618,
0.35723909735679626,
-0.6822739243507385,
1.2873116731643677,
0.19993330538272855,
0.03512010723352432,
-0.6972134113311768,
0.18453484773635864,
-0.2437680810689926,
0.2156416028738022,
0.5230382680892944,
0.22020135819911957,
0.8314080238342285,
0.15627102553844452,
-0.7330264449119568,
0.3888184726238251,
-0.22034703195095065,
0.5457669496536255,
-0.48084837198257446,
-0.45576658844947815,
-0.09287727624177931,
-0.06968110054731369,
0.35125672817230225,
-0.4278119504451752,
0.2038476765155792,
0.11392722278833388,
0.9433983564376832,
-0.4097744226455689,
0.035297419875860214,
-0.4274404048919678,
-0.25100165605545044,
1.0943366289138794,
-0.07634022831916809,
-0.2925529479980469,
-0.7512530088424683,
0.2649727463722229,
-0.4078235328197479,
-0.3372223973274231,
0.05190162733197212,
0.005654910113662481,
-0.0001571219472680241,
-0.35445958375930786,
-0.7837416529655457,
0.1500556766986847,
0.4383024573326111,
0.6099548935890198,
0.05951934307813645,
-0.21325334906578064,
0.0199207104742527,
-0.22704418003559113,
-0.6481077671051025,
0.37442275881767273,
-1.015955924987793,
0.38637226819992065,
-0.06489371508359909,
-0.494120329618454,
0.3469836115837097,
0.15402406454086304,
-0.7660972476005554,
-0.7053225040435791,
-0.25964751839637756,
0.014004424214363098,
-0.2860170006752014,
-0.17565494775772095,
-0.45117494463920593,
-0.0031954257283359766,
0.09676837921142578,
-0.514464259147644,
0.41698193550109863,
-0.21642713248729703,
-0.5398141145706177,
-0.3647628426551819,
0.37005379796028137,
0.239425927400589,
-0.08833975344896317,
0.934946596622467,
-0.48340797424316406,
0.6241437792778015,
-0.7253676652908325,
-0.04303571209311485,
1.1125205755233765,
-0.15692919492721558,
-0.2914651036262512,
-0.5117168426513672,
0.21365483105182648,
0.4924402534961701,
0.5269662141799927,
0.0352792888879776,
-0.149167999625206,
-0.6019760370254517,
0.08245442807674408,
0.4900692105293274,
0.518824577331543,
-0.00005570516441366635,
-0.553304135799408,
0.22217543423175812,
0.5047767758369446,
0.135724738240242,
1.1511540412902832,
-0.3541218340396881,
-0.9712511897087096,
0.8353699445724487,
-0.39227569103240967,
-0.9117669463157654,
-0.26349931955337524,
0.05597023293375969,
0.20695461332798004,
0.3178807199001312,
1.0663238763809204,
0.5062212347984314,
0.7288597822189331,
0.09899299591779707,
0.553720235824585,
0.675009548664093,
-0.20067055523395536,
0.3138423264026642,
-0.6886593103408813,
-0.2910398542881012,
-1.3186300992965698,
-0.4684459865093231,
-0.095743365585804,
-0.1257995069026947,
-0.4858281314373016,
-0.4935407340526581,
-0.3266896903514862,
-0.3928797245025635,
-0.40803104639053345,
-0.9975396394729614,
0.4229583740234375,
0.37309643626213074,
0.4431034922599793,
0.30364808440208435,
-0.3765178918838501,
0.5616499185562134,
0.16904796659946442,
-0.7343707084655762,
0.2560209631919861,
0.6166825294494629,
0.3200829327106476,
-0.4483652710914612,
0.16224201023578644,
-0.31495288014411926,
-0.42713335156440735,
0.7270734906196594,
0.7049484848976135,
-0.0571461021900177,
0.04477125033736229,
-0.6647796034812927,
1.183672308921814,
0.36199676990509033,
0.046881116926670074,
0.4515796303749085,
0.9278061985969543,
0.31471705436706543,
-0.7073333859443665,
-0.3443860113620758,
0.5440067052841187,
-0.15020819008350372,
-0.541202962398529,
0.5203295946121216,
1.2192286252975464,
-0.9983593225479126,
-0.18758884072303772,
0.2758221924304962,
-0.6511523723602295,
-0.1584404855966568,
-0.236241415143013,
0.2692437767982483,
-0.4941152036190033,
0.4987454116344452,
-0.3331359028816223,
0.3163745701313019,
0.745529294013977,
-0.2905873656272888,
0.13602906465530396,
0.4679684340953827,
1.0555986166000366,
1.075700044631958,
0.5368486046791077,
-0.5118206739425659,
0.8668332099914551,
-0.5726966857910156,
-0.7811751961708069,
0.1938626915216446,
-0.1929349899291992,
0.1757766306400299,
0.6384295225143433,
0.26462844014167786,
0.9542630314826964,
0.19313029944896695,
1.264248013496399,
-0.6304428577423096,
0.0487106591463089,
-0.16211535036563873,
-0.7894763350486755,
0.3582514822483063,
-0.04153040423989296,
0.635784387588501,
0.6554391980171204,
-0.47010496258735657,
-0.8302040696144104,
-0.1350124627351761,
0.2568812072277069,
0.13614831864833832,
-0.2563649117946625,
-1.0434694290161133,
0.3232482671737671,
0.47882452607154846,
0.4298652410507202,
1.0563770532608032,
-0.28917592763900757,
-0.8533256649971008,
0.10648339986801147,
0.6376127004623413,
-0.20832888782024384,
0.2370245456695557,
0.0018312990432605147,
-0.2034837007522583,
0.01051164511591196,
-1.105310082435608,
0.29724350571632385,
0.15604574978351593,
0.1973688006401062,
0.44394731521606445,
0.3974513411521912,
-0.13625948131084442,
0.9571986198425292,
0.2257384955883026,
0.2323588728904724,
-0.5583669543266296,
-0.7854922413825989,
0.1647188365459442,
-1.6098142862319946,
0.318587988615036,
-0.13399995863437653,
-0.2172701060771942,
-0.767514705657959,
-0.5813586711883545,
-0.3195130527019501,
-0.04894036799669266,
0.2929930090904236,
-0.8213384747505188,
0.07181350141763687,
0.7469993829727173,
0.6407455801963806,
0.16365697979927063,
0.7870153188705444,
0.6524736881256104,
0.6399973630905151,
-0.04992736503481865,
-0.03959266096353531,
-0.2512352466583252,
0.8448855876922607,
-0.1422702670097351,
0.1216789186000824,
-1.2647287845611572,
0.5931149125099182,
0.7186052203178406,
-0.06118432432413101,
-1.1942816972732544,
-0.17677085101604462,
0.31543800234794617,
-0.32252824306488037,
0.8255583047866821,
-0.14529970288276672,
-0.2695446312427521,
-0.33378756046295166,
-0.1653425395488739,
0.1454019844532013,
-0.3920115828514099,
0.912214994430542,
-0.7279734015464783,
0.7374742031097412,
0.933980405330658,
0.13429680466651917,
-0.514870285987854,
0.3989711999893189,
-0.11613689363002776,
0.4022413492202759,
-0.9990655779838562,
-0.33749932050704956,
-0.4334589838981629,
-1.376373291015625,
-0.2993924915790558,
-0.09454808384180068,
-0.01314175222069025,
-0.001090060803107917,
0.2137461006641388,
0.2938512861728668,
0.17508235573768616,
0.8260607123374939,
-0.7218498587608337,
0.2414487451314926,
-0.47296759486198425,
-0.3002610504627228,
-1.238540768623352,
0.08663805574178696,
0.6805586218833923,
0.5909030437469482,
-0.42807504534721375,
-0.22887496650218964,
0.47537800669670105,
-1.0474627017974854,
0.6338009238243103,
0.06548397243022919,
0.4971011281013489,
1.3484878540039063
]
],
"userProvided": false
}
}
}

View File

@@ -0,0 +1,785 @@
---
source: dump/src/reader/mod.rs
expression: document
---
{
"id": "e1",
"desc": "natural vector",
"_vectors": {
"default": {
"embeddings": [
[
-0.2979458272457123,
-0.5288640856742859,
-0.019957859069108963,
-0.18495318293571472,
0.7429973483085632,
0.5238497257232666,
0.432366281747818,
0.32744166254997253,
0.0020762972999364138,
-0.9507834911346436,
-0.35097137093544006,
0.08469701558351517,
-1.4176613092422483,
0.4647577106952667,
-0.69340580701828,
1.0372896194458008,
0.3716741800308227,
0.06031008064746857,
-0.6152024269104004,
0.007914665155112743,
0.7954924702644348,
-0.20773003995418549,
0.09376765787601472,
0.04508133605122566,
-0.2084471583366394,
-0.1518009901046753,
0.018195509910583496,
-0.07044368237257004,
-0.18119366466999057,
-0.4480230510234833,
0.3822529911994934,
0.1911812424659729,
0.4674372375011444,
0.06963984668254852,
-0.09341949224472046,
0.005675444379448891,
-0.6774799227714539,
-0.7066726684570313,
-0.39256376028060913,
0.04005039855837822,
0.2084812968969345,
-0.7872875928878784,
-0.8205880522727966,
0.2919981777667999,
-0.06004738807678223,
-0.4907574355602264,
-1.5937862396240234,
0.24249385297298431,
-0.14709846675395966,
-0.11860740929841997,
-0.8299489617347717,
0.472964346408844,
-0.497518390417099,
-0.22205302119255063,
-0.4196169078350067,
0.32697558403015137,
-0.360930860042572,
-0.9789686799049376,
0.1887447088956833,
-0.403737336397171,
0.18524253368377688,
0.3768732249736786,
0.3666233420372009,
0.3511938452720642,
0.6985810995101929,
0.41721710562705994,
0.09754953533411026,
0.6204307079315186,
-1.0762996673583984,
-0.06263761967420578,
-0.7376511693000793,
0.6849768161773682,
-0.1745152473449707,
-0.40449759364128113,
0.20757411420345304,
-0.8424443006515503,
0.330015629529953,
0.3489064872264862,
1.0954371690750122,
0.8487558960914612,
1.1076823472976685,
0.61430823802948,
0.4155903458595276,
0.4111340939998626,
0.05753209814429283,
-0.06429877132177353,
-0.765606164932251,
-0.41703930497169495,
-0.508820652961731,
0.19859947264194489,
-0.16607828438282013,
-0.28112146258354187,
0.11032675206661224,
0.38809511065483093,
-0.36498191952705383,
-0.48671194911003113,
0.6755134463310242,
0.03958442434668541,
0.4478721618652344,
-0.10335399955511092,
-0.9546685814857484,
-0.6087718605995178,
0.17498846352100372,
0.08320838958024979,
-1.4478336572647097,
-0.605027437210083,
-0.5867993235588074,
-0.14711688458919525,
-0.5447602272033691,
-0.026259321719408035,
-0.6997418403625488,
-0.07349082082509995,
0.10638900846242905,
-0.7133527398109436,
-0.9396815299987792,
1.087092399597168,
1.1885089874267578,
0.4011896848678589,
-0.4089202582836151,
-0.10938972979784012,
0.6726722121238708,
0.24576938152313232,
-0.24247920513153076,
1.1499971151351929,
0.47813335061073303,
-0.05331678315997124,
0.32338133454322815,
0.4870913326740265,
-0.23144258558750153,
-1.2023426294326782,
0.2349330335855484,
1.080536961555481,
0.29334118962287903,
0.391574501991272,
-0.15818795561790466,
-0.2948290705680847,
-0.024689948186278343,
0.06602869182825089,
0.5937030911445618,
-0.047901444137096405,
-0.512734591960907,
-0.35780075192451477,
0.28751692175865173,
0.4298716187477112,
0.9242428541183472,
-0.17208744585514069,
0.11515070497989656,
-0.0335976779460907,
-0.3422986567020416,
0.5344581604003906,
0.19895796477794647,
0.33001241087913513,
0.6390730142593384,
-0.6074934005737305,
-0.2553696632385254,
0.9644920229911804,
0.2699219584465027,
0.6403993368148804,
-0.6380003690719604,
-0.027310986071825027,
0.638815701007843,
0.27719101309776306,
-0.13553589582443237,
0.750195324420929,
0.1224869191646576,
-0.20613941550254825,
0.8444448709487915,
0.16200250387191772,
-0.24750925600528717,
-0.739950954914093,
-0.28443849086761475,
-1.176282525062561,
0.516107976436615,
0.3774825632572174,
0.10906043648719788,
0.07962015271186829,
0.7384604215621948,
-0.051241904497146606,
1.1730090379714966,
-0.4828610122203827,
-1.404372215270996,
0.8811132311820984,
-0.3839482367038727,
0.022516896948218346,
-0.0491158664226532,
-0.43027013540267944,
1.2049334049224854,
-0.27309560775756836,
0.6883630752563477,
0.8264574408531189,
-0.5020735263824463,
-0.4874092042446137,
0.6007202863693237,
-0.4965405762195587,
1.1302915811538696,
0.032572727650403976,
-0.3731859028339386,
0.658271849155426,
-0.9023059010505676,
0.7400162220001221,
0.014550759457051754,
-0.19699542224407196,
0.2319706380367279,
-0.789058268070221,
-0.14905710518360138,
-0.5826214551925659,
0.207652747631073,
-0.4507439732551574,
-0.3163885474205017,
0.3604124188423157,
-0.45119962096214294,
0.3428427278995514,
0.3005594313144684,
-0.36026081442832947,
1.1014249324798584,
0.40884315967559814,
0.34991952776908875,
-0.1806638240814209,
0.27440476417541504,
-0.7118373513221741,
0.4645499587059021,
0.214790478348732,
-0.2343102991580963,
0.10500429570674896,
-0.28034430742263794,
1.2267805337905884,
1.0561333894729614,
-0.497364342212677,
-0.6143305897712708,
0.24963727593421936,
-0.33136463165283203,
-0.01473914459347725,
0.495918869972229,
-0.6985538005828857,
-1.0033197402954102,
0.35937801003456116,
0.6325868368148804,
-0.6808838844299316,
1.0354058742523191,
-0.7214401960372925,
-0.33318862318992615,
0.874398410320282,
-0.6594992280006409,
0.6830640435218811,
-0.18534131348133087,
0.024834271520376205,
0.19901277124881744,
-0.5992477536201477,
-1.2126628160476685,
-0.9245557188987732,
-0.3898217976093292,
-0.1286519467830658,
0.4217943847179413,
-0.1143646091222763,
0.5630772709846497,
-0.5240639448165894,
0.21152715384960177,
-0.3792001008987427,
0.8266305327415466,
1.170984387397766,
-0.8072142004966736,
0.11382893472909927,
-0.17953898012638092,
-0.1789460331201553,
-0.15078622102737427,
-1.2082908153533936,
-0.7812382578849792,
-0.10903695970773696,
0.7303897142410278,
-0.39054441452026367,
0.19511254131793976,
-0.09121843427419662,
0.22400228679180145,
0.30143046379089355,
0.1141919493675232,
0.48112115263938904,
0.7307931780815125,
0.09701362252235413,
-0.2795647978782654,
-0.3997688889503479,
0.5540812611579895,
0.564578115940094,
-0.40065160393714905,
-0.3629159033298493,
-0.3789091110229492,
-0.7298538088798523,
-0.6996853351593018,
-0.4477842152118683,
-0.289089560508728,
-0.6430277824401855,
0.2344944179058075,
0.3742927014827728,
-0.5079357028007507,
0.28841453790664673,
0.06515737622976303,
0.707315981388092,
0.09498685598373412,
0.8365515470504761,
0.10002726316452026,
-0.7695478200912476,
0.6264724135398865,
0.7562043070793152,
-0.23112858831882477,
-0.2871039807796478,
-0.25010058283805847,
0.2783474028110504,
-0.03224996477365494,
-0.9119359850883484,
-3.6940200328826904,
-0.5099936127662659,
-0.1604711413383484,
0.17453284561634064,
0.41759559512138367,
0.1419190913438797,
-0.11362407356500626,
-0.33312007784843445,
0.11511333286762238,
0.4667884409427643,
-0.0031647447030991316,
0.15879854559898376,
0.3042248487472534,
0.5404849052429199,
0.8515422344207764,
0.06286454200744629,
0.43790125846862793,
-0.8682025074958801,
-0.06363756954669952,
0.5547921657562256,
-0.01483887154608965,
-0.07361344993114471,
-0.929947018623352,
0.3502565622329712,
-0.5080993175506592,
1.0380364656448364,
-0.2017953395843506,
0.21319580078125,
-1.0763001441955566,
-0.556368887424469,
0.1949922740459442,
-0.6445739269256592,
0.6791343688964844,
0.21188358962535855,
0.3736183941364288,
-0.21800459921360016,
0.7597446441650391,
-0.3732394874095917,
-0.4710160195827484,
0.025146087631583217,
0.05341297015547752,
-0.9522109627723694,
-0.6000866889953613,
-0.08469046652317047,
0.5966026186943054,
0.3444081246852875,
-0.461188405752182,
-0.5279349088668823,
0.10296865552663804,
0.5175143480300903,
-0.20671147108078003,
0.13392412662506104,
0.4812754988670349,
0.2993808686733246,
-0.3005635440349579,
0.5141698122024536,
-0.6239235401153564,
0.2877119481563568,
-0.4452739953994751,
0.5621107816696167,
0.5047508478164673,
-0.4226335883140564,
-0.18578553199768064,
-1.1967322826385498,
0.28178197145462036,
-0.8692031502723694,
-1.1812998056411743,
-1.4526212215423584,
0.4645712077617645,
0.9327932000160216,
-0.6560136675834656,
0.461549699306488,
-0.5621527433395386,
-1.328449010848999,
-0.08676894754171371,
0.00021918353741057217,
-0.18864136934280396,
0.1259666532278061,
0.18240638077259064,
-0.14919660985469818,
-0.8965857625007629,
-0.7539900541305542,
0.013973715715110302,
0.504276692867279,
-0.704748272895813,
-0.6428424119949341,
0.6303996443748474,
-0.5404738187789917,
-0.31176653504371643,
-0.21262824535369873,
0.18736739456653595,
-0.7998970746994019,
0.039946746081113815,
0.7390344738960266,
0.4283199906349182,
0.3795057237148285,
0.07204607129096985,
-0.9230587482452391,
0.9440426230430604,
0.26272690296173096,
0.5598306655883789,
-1.0520871877670288,
-0.2677186131477356,
-0.1888762265443802,
0.30426350235939026,
0.4746131896972656,
-0.5746733546257019,
-0.4197768568992615,
0.8565112948417664,
-0.6767723560333252,
0.23448683321475983,
-0.2010004222393036,
0.4112907350063324,
-0.6497949957847595,
-0.418667733669281,
-0.4950824975967407,
0.44438859820365906,
1.026281714439392,
0.482397586107254,
-0.26220494508743286,
-0.3640787005424499,
0.5907743573188782,
-0.8771642446517944,
0.09708411991596222,
-0.3671700060367584,
0.4331349730491638,
0.619417667388916,
-0.2684665620326996,
-0.5123821496963501,
-0.1502324342727661,
-0.012190685607492924,
0.3580845892429352,
0.8617186546325684,
0.3493645489215851,
1.0270192623138428,
0.18297909200191495,
-0.5881339311599731,
-0.1733516901731491,
-0.5040576457977295,
-0.340370237827301,
-0.26767754554748535,
-0.28570041060447693,
-0.032928116619586945,
0.6029254794120789,
0.17397655546665192,
0.09346921741962431,
0.27815181016921997,
-0.46699589490890503,
-0.8148876428604126,
-0.3964351713657379,
0.3812595009803772,
0.13547226786613464,
0.7126688361167908,
-0.3473474085330963,
-0.06573959439992905,
-0.6483767032623291,
1.4808889627456665,
0.30924928188323975,
-0.5085946917533875,
-0.8613000512123108,
0.3048902451992035,
-0.4241599142551422,
0.15909206867218018,
0.5764641761779785,
-0.07879110425710678,
1.015336513519287,
0.07599356025457382,
-0.7025855779647827,
0.30047643184661865,
-0.35094937682151794,
0.2522146999835968,
-0.2338722199201584,
-0.8326804637908936,
-0.13695412874221802,
-0.03452421352267265,
0.47974953055381775,
-0.18385636806488037,
0.32438594102859497,
0.1797013282775879,
0.787494957447052,
-0.12579888105392456,
-0.07507286965847015,
-0.4389670491218567,
0.2720070779323578,
0.8138866424560547,
0.01974171027541161,
-0.3057698905467987,
-0.6709924936294556,
0.0885881632566452,
-0.2862754464149475,
0.03475658595561981,
-0.1285519152879715,
0.3838353455066681,
-0.2944154739379883,
-0.4204859137535095,
-0.4416137933731079,
0.13426260650157928,
0.36733248829841614,
0.573428750038147,
-0.14928072690963745,
-0.026076916605234143,
0.33286052942276,
-0.5340145826339722,
-0.17279052734375,
-0.01154550164937973,
-0.6620771884918213,
0.18390542268753052,
-0.08265615254640579,
-0.2489682286977768,
0.2429984211921692,
-0.044153645634651184,
-0.986578404903412,
-0.33574509620666504,
-0.5387663841247559,
0.19767941534519196,
0.12540718913078308,
-0.3403128981590271,
-0.4154576361179352,
0.17275673151016235,
0.09407442808151244,
-0.5414086580276489,
0.4393929839134216,
0.1725579798221588,
-0.4998118281364441,
-0.6926208138465881,
0.16552448272705078,
0.6659538149833679,
-0.10949844866991044,
0.986426830291748,
0.01748848147690296,
0.4003709554672241,
-0.5430638194084167,
0.35347291827201843,
0.6887399554252625,
0.08274628221988678,
0.13407137989997864,
-0.591465950012207,
0.3446292281150818,
0.6069018244743347,
0.1935492902994156,
-0.0989871397614479,
0.07008486241102219,
-0.8503749370574951,
-0.09507356584072112,
0.6259510517120361,
0.13934025168418884,
0.06392545253038406,
-0.4112265408039093,
-0.08475656062364578,
0.4974113404750824,
-0.30606114864349365,
1.111435890197754,
-0.018766529858112335,
-0.8422622680664063,
0.4325508773326874,
-0.2832120656967163,
-0.4859798848628998,
-0.41498348116874695,
0.015977520495653152,
0.5292825698852539,
0.4538311660289765,
1.1328668594360352,
0.22632671892642975,
0.7918671369552612,
0.33401933312416077,
0.7306135296821594,
0.3548600673675537,
0.12506209313869476,
0.8573207855224609,
-0.5818327069282532,
-0.6953738927841187,
-1.6171947717666626,
-0.1699674427509308,
0.6318262815475464,
-0.05671752244234085,
-0.28145185112953186,
-0.3976689279079437,
-0.2041076272726059,
-0.5495951175689697,
-0.5152917504310608,
-0.9309796094894408,
0.101932130753994,
0.1367802917957306,
0.1490798443555832,
0.5304336547851563,
-0.5082434415817261,
0.06688683480024338,
0.14657628536224365,
-0.782435953617096,
0.2962816655635834,
0.6965363621711731,
0.8496337532997131,
-0.3042965829372406,
0.04343798756599426,
0.0330701619386673,
-0.5662598013877869,
1.1086925268173218,
0.756072998046875,
-0.204134538769722,
0.2404300570487976,
-0.47848284244537354,
1.3659011125564575,
0.5645433068275452,
-0.15836156904697418,
0.43395575881004333,
0.5944653749465942,
1.0043466091156006,
-0.49446743726730347,
-0.5954391360282898,
0.5341240763664246,
0.020598189905285835,
-0.4036853015422821,
0.4473709762096405,
1.1998231410980225,
-0.9317775368690492,
-0.23321466147899628,
0.2052552700042725,
-0.7423108816146851,
-0.19917210936546328,
-0.1722569614648819,
-0.034072667360305786,
-0.00671181408688426,
0.46396249532699585,
-0.1372445821762085,
0.053376372903585434,
0.7392690777778625,
-0.38447609543800354,
0.07497968524694443,
0.5197252631187439,
1.3746477365493774,
0.9060075879096984,
0.20000585913658145,
-0.4053704142570496,
0.7497360110282898,
-0.34087055921554565,
-1.101803183555603,
0.273650586605072,
-0.5125769376754761,
0.22472351789474487,
0.480757474899292,
-0.19845178723335263,
0.8857700824737549,
0.30752456188201904,
1.1109285354614258,
-0.6768012642860413,
0.524367094039917,
-0.22495046257972717,
-0.4224412739276886,
0.40753406286239624,
-0.23133376240730288,
0.3297771215438843,
0.4905449151992798,
-0.6813114285469055,
-0.7543983459472656,
-0.5599071383476257,
0.14351597428321838,
-0.029278717935085297,
-0.3970443606376648,
-0.303079217672348,
0.24161772429943085,
0.008353390730917454,
-0.0062365154735744,
1.0824860334396362,
-0.3704061508178711,
-1.0337258577346802,
0.04638749733567238,
1.163011074066162,
-0.31737643480300903,
0.013986887410283089,
0.19223114848136905,
-0.2260770797729492,
-0.210910826921463,
-1.0191949605941772,
0.22356095910072327,
0.09353553503751756,
0.18096882104873657,
0.14867214858531952,
0.43408671021461487,
-0.33312076330184937,
0.8173948526382446,
0.6428242921829224,
0.20215003192424777,
-0.6634518504142761,
-0.4132290482521057,
0.29815030097961426,
-1.579406976699829,
-0.0981958732008934,
-0.03941014781594277,
0.1709178239107132,
-0.5481140613555908,
-0.5338194966316223,
-0.3528362512588501,
-0.11561278253793716,
-0.21793591976165771,
-1.1570470333099363,
0.2157980799674988,
0.42083489894866943,
0.9639263153076172,
0.09747201204299928,
0.15671424567699432,
0.4034591615200043,
0.6728067994117737,
-0.5216875672340393,
0.09657668322324751,
-0.2416689097881317,
0.747975766658783,
0.1021689772605896,
0.11652665585279463,
-1.0484966039657593,
0.8489304780960083,
0.7169828414916992,
-0.09012343734502792,
-1.3173753023147583,
0.057890523225069046,
-0.006231260951608419,
-0.1018214002251625,
0.936040461063385,
-0.0502331368625164,
-0.4284322261810303,
-0.38209280371665955,
-0.22668412327766416,
0.0782942995429039,
-0.4881664514541626,
0.9268959760665894,
0.001867273123934865,
0.42261114716529846,
0.8283362984657288,
0.4256294071674347,
-0.7965338826179504,
0.4840078353881836,
-0.19861412048339844,
0.33977967500686646,
-0.4604192078113556,
-0.3107339143753052,
-0.2839638590812683,
-1.5734281539916992,
0.005220232997089624,
0.09239906817674635,
-0.7828494906425476,
-0.1397123783826828,
0.2576255202293396,
0.21372435986995697,
-0.23169949650764465,
0.4016408920288086,
-0.462497353553772,
-0.2186472862958908,
-0.5617868900299072,
-0.3649831712245941,
-1.1585862636566162,
-0.08222806453704834,
0.931126832962036,
0.4327389597892761,
-0.46451422572135925,
-0.5430706143379211,
-0.27434298396110535,
-0.9479129314422609,
0.1845661848783493,
0.3972720205783844,
0.4883299469947815,
1.04031240940094
]
],
"userProvided": false
}
}
}

View File

@@ -0,0 +1,780 @@
---
source: dump/src/reader/mod.rs
expression: document
---
{
"id": "e0",
"desc": "overriden vector",
"_vectors": {
"default": [
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1
]
}
}

Binary file not shown.

View File

@@ -568,7 +568,7 @@ pub mod tests {
insta::assert_display_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
// but it also works with other sequencies
// but it also works with other sequences
insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
}

View File

@@ -22,7 +22,6 @@ flate2 = "1.0.28"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
page_size = "0.5.0"
puffin = { version = "0.16.0", features = ["serialization"] }
rayon = "1.8.1"
roaring = { version = "0.10.2", features = ["serde"] }
serde = { version = "1.0.195", features = ["derive"] }
@@ -37,7 +36,7 @@ time = { version = "0.3.31", features = [
"macros",
] }
tracing = "0.1.40"
ureq = "2.9.1"
ureq = "2.9.7"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
[dev-dependencies]

View File

@@ -13,7 +13,7 @@ We can combine the two tasks in a single batch:
1. import documents X and Y
Processing this batch is functionally equivalent to processing the two
tasks individally, but should be much faster since we are only performing
tasks individually, but should be much faster since we are only performing
one indexing operation.
*/
@@ -31,6 +31,9 @@ use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::update::{
IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
};
use meilisearch_types::milli::vector::parsed_vectors::{
ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME,
};
use meilisearch_types::milli::{self, Filter};
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
@@ -526,8 +529,6 @@ impl IndexScheduler {
#[cfg(test)]
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
puffin::profile_function!();
let enqueued = &self.get_status(rtxn, Status::Enqueued)?;
let to_cancel = self.get_kind(rtxn, Kind::TaskCancelation)? & enqueued;
@@ -636,8 +637,6 @@ impl IndexScheduler {
self.breakpoint(crate::Breakpoint::InsideProcessBatch);
}
puffin::profile_function!(batch.to_string());
match batch {
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
// 1. Retrieve the tasks that matched the query at enqueue-time.
@@ -785,10 +784,12 @@ impl IndexScheduler {
let dst = temp_snapshot_dir.path().join("auth");
fs::create_dir_all(&dst)?;
// TODO We can't use the open_auth_store_env function here but we should
let auth = milli::heed::EnvOpenOptions::new()
.map_size(1024 * 1024 * 1024) // 1 GiB
.max_dbs(2)
.open(&self.auth_path)?;
let auth = unsafe {
milli::heed::EnvOpenOptions::new()
.map_size(1024 * 1024 * 1024) // 1 GiB
.max_dbs(2)
.open(&self.auth_path)
}?;
auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 5. Copy and tarball the flat snapshot
@@ -914,8 +915,55 @@ impl IndexScheduler {
if self.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let (_id, doc) = ret?;
let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
let (id, doc) = ret?;
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
'inject_vectors: {
let embeddings = index.embeddings(&rtxn, id)?;
if embeddings.is_empty() {
break 'inject_vectors;
}
let vectors = document
.entry(RESERVED_VECTORS_FIELD_NAME.to_owned())
.or_insert(serde_json::Value::Object(Default::default()));
let serde_json::Value::Object(vectors) = vectors else {
return Err(milli::Error::UserError(
milli::UserError::InvalidVectorsMapType {
document_id: {
if let Ok(Some(Ok(index))) = index
.external_id_of(&rtxn, std::iter::once(id))
.map(|it| it.into_iter().next())
{
index
} else {
format!("internal docid={id}")
}
},
value: vectors.clone(),
},
)
.into());
};
for (embedder_name, embeddings) in embeddings {
// don't change the entry if it already exists, because it was user-provided
vectors.entry(embedder_name).or_insert_with(|| {
let embeddings = ExplicitVectors {
embeddings: VectorOrArrayOfVectors::from_array_of_vectors(
embeddings,
),
user_provided: false,
};
serde_json::to_value(embeddings).unwrap()
});
}
}
index_dumper.push_document(&document)?;
}
@@ -1174,8 +1222,6 @@ impl IndexScheduler {
index: &'i Index,
operation: IndexOperation,
) -> Result<Vec<Task>> {
puffin::profile_function!();
match operation {
IndexOperation::DocumentClear { mut tasks, .. } => {
let count = milli::update::ClearDocuments::new(index_wtxn, index).execute()?;

View File

@@ -68,19 +68,6 @@ impl RoFeatures {
.into())
}
}
pub fn check_puffin(&self) -> Result<()> {
if self.runtime.export_puffin_reports {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Outputting Puffin reports to disk",
feature: "export puffin reports",
issue_link: "https://github.com/meilisearch/product/discussions/693",
}
.into())
}
}
}
impl FeatureData {

View File

@@ -32,7 +32,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
features: _,
max_number_of_tasks: _,
max_number_of_batched_tasks: _,
puffin_frame: _,
wake_up: _,
dumps_path: _,
snapshots_path: _,

View File

@@ -33,7 +33,6 @@ pub type Result<T> = std::result::Result<T, Error>;
pub type TaskId = u32;
use std::collections::{BTreeMap, HashMap};
use std::fs::File;
use std::io::{self, BufReader, Read};
use std::ops::{Bound, RangeBounds};
use std::path::{Path, PathBuf};
@@ -59,7 +58,6 @@ use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfi
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
use meilisearch_types::task_view::TaskView;
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use puffin::FrameView;
use rayon::current_num_threads;
use rayon::prelude::{IntoParallelIterator, ParallelIterator};
use roaring::RoaringBitmap;
@@ -344,9 +342,6 @@ pub struct IndexScheduler {
/// The Authorization header to send to the webhook URL.
pub(crate) webhook_authorization_header: Option<String>,
/// A frame to output the indexation profiling files to disk.
pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
/// The path used to create the dumps.
pub(crate) dumps_path: PathBuf,
@@ -401,7 +396,6 @@ impl IndexScheduler {
cleanup_enabled: self.cleanup_enabled,
max_number_of_tasks: self.max_number_of_tasks,
max_number_of_batched_tasks: self.max_number_of_batched_tasks,
puffin_frame: self.puffin_frame.clone(),
snapshots_path: self.snapshots_path.clone(),
dumps_path: self.dumps_path.clone(),
auth_path: self.auth_path.clone(),
@@ -453,10 +447,12 @@ impl IndexScheduler {
)
};
let env = heed::EnvOpenOptions::new()
.max_dbs(11)
.map_size(budget.task_db_size)
.open(options.tasks_path)?;
let env = unsafe {
heed::EnvOpenOptions::new()
.max_dbs(11)
.map_size(budget.task_db_size)
.open(options.tasks_path)
}?;
let features = features::FeatureData::new(&env, options.instance_features)?;
@@ -498,7 +494,6 @@ impl IndexScheduler {
env,
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
wake_up: Arc::new(SignalEvent::auto(true)),
puffin_frame: Arc::new(puffin::GlobalFrameView::default()),
autobatching_enabled: options.autobatching_enabled,
cleanup_enabled: options.cleanup_enabled,
max_number_of_tasks: options.max_number_of_tasks,
@@ -585,9 +580,9 @@ impl IndexScheduler {
}
fn is_good_heed(tasks_path: &Path, map_size: usize) -> bool {
if let Ok(env) =
if let Ok(env) = unsafe {
heed::EnvOpenOptions::new().map_size(clamp_to_page_size(map_size)).open(tasks_path)
{
} {
env.prepare_for_closing().wait();
true
} else {
@@ -619,10 +614,6 @@ impl IndexScheduler {
run.wake_up.wait();
loop {
let puffin_enabled = run.features().check_puffin().is_ok();
puffin::set_scopes_on(puffin_enabled);
puffin::GlobalProfiler::lock().new_frame();
match run.tick() {
Ok(TickOutcome::TickAgain(_)) => (),
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
@@ -634,31 +625,6 @@ impl IndexScheduler {
}
}
}
// Let's write the previous frame to disk but only if
// the user wanted to profile with puffin.
if puffin_enabled {
let mut frame_view = run.puffin_frame.lock();
if !frame_view.is_empty() {
let now = OffsetDateTime::now_utc();
let mut file = match File::create(format!("{}.puffin", now)) {
Ok(file) => file,
Err(e) => {
tracing::error!("{e}");
continue;
}
};
if let Err(e) = frame_view.save_to_writer(&mut file) {
tracing::error!("{e}");
}
if let Err(e) = file.sync_all() {
tracing::error!("{e}");
}
// We erase this frame view as it is no more useful. We want to
// measure the new frames now that we exported the previous ones.
*frame_view = FrameView::default();
}
}
}
})
.unwrap();
@@ -1772,6 +1738,7 @@ mod tests {
use big_s::S;
use crossbeam::channel::RecvTimeoutError;
use file_store::File;
use insta::assert_json_snapshot;
use meili_snap::{json_string, snapshot};
use meilisearch_auth::AuthFilter;
use meilisearch_types::document_formats::DocumentFormatError;
@@ -1849,7 +1816,7 @@ mod tests {
// To be 100% consistent between all test we're going to start the scheduler right now
// and ensure it's in the expected starting state.
let breakpoint = match receiver.recv_timeout(std::time::Duration::from_secs(1)) {
let breakpoint = match receiver.recv_timeout(std::time::Duration::from_secs(10)) {
Ok(b) => b,
Err(RecvTimeoutError::Timeout) => {
panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.")
@@ -1960,7 +1927,7 @@ mod tests {
fn advance(&mut self) -> Breakpoint {
let (breakpoint_1, b) = match self
.test_breakpoint_rcv
.recv_timeout(std::time::Duration::from_secs(5))
.recv_timeout(std::time::Duration::from_secs(50))
{
Ok(b) => b,
Err(RecvTimeoutError::Timeout) => {
@@ -1981,7 +1948,7 @@ mod tests {
let (breakpoint_2, b) = match self
.test_breakpoint_rcv
.recv_timeout(std::time::Duration::from_secs(5))
.recv_timeout(std::time::Duration::from_secs(50))
{
Ok(b) => b,
Err(RecvTimeoutError::Timeout) => {
@@ -4980,4 +4947,233 @@ mod tests {
----------------------------------------------------------------------
"###);
}
#[test]
fn import_vectors() {
use meilisearch_types::settings::{Settings, Unchecked};
use milli::update::Setting;
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
let mut new_settings: Box<Settings<Unchecked>> = Box::default();
let mut embedders = BTreeMap::default();
let embedding_settings = milli::vector::settings::EmbeddingSettings {
source: Setting::Set(milli::vector::settings::EmbedderSource::Rest),
api_key: Setting::Set(S("My super secret")),
url: Setting::Set(S("http://localhost:7777")),
dimensions: Setting::Set(384),
..Default::default()
};
embedders.insert(S("A_fakerest"), Setting::Set(embedding_settings));
let embedding_settings = milli::vector::settings::EmbeddingSettings {
source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace),
model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")),
revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")),
document_template: Setting::Set(S("{{doc.doggo}} the {{doc.breed}} best doggo")),
..Default::default()
};
embedders.insert(S("B_small_hf"), Setting::Set(embedding_settings));
new_settings.embedders = Setting::Set(embedders);
index_scheduler
.register(
KindWithContent::SettingsUpdate {
index_uid: S("doggos"),
new_settings,
is_deletion: false,
allow_index_creation: true,
},
None,
false,
)
.unwrap();
index_scheduler.assert_internally_consistent();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_settings_task_vectors");
{
let rtxn = index_scheduler.read_txn().unwrap();
let task = index_scheduler.get_task(&rtxn, 0).unwrap().unwrap();
let task = meilisearch_types::task_view::TaskView::from_task(&task);
insta::assert_json_snapshot!(task.details);
}
handle.advance_n_successful_batches(1);
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed_vectors");
{
let rtxn = index_scheduler.read_txn().unwrap();
let task = index_scheduler.get_task(&rtxn, 0).unwrap().unwrap();
let task = meilisearch_types::task_view::TaskView::from_task(&task);
insta::assert_json_snapshot!(task.details);
}
let (fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed) = {
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let configs = index.embedding_configs(&rtxn).unwrap();
// for consistency with the below
#[allow(clippy::get_first)]
let (name, fakerest_config) = configs.get(0).unwrap();
insta::assert_json_snapshot!(name, @r###""A_fakerest""###);
insta::assert_json_snapshot!(fakerest_config.embedder_options);
let fakerest_name = name.clone();
let (name, simple_hf_config) = configs.get(1).unwrap();
insta::assert_json_snapshot!(name, @r###""B_small_hf""###);
insta::assert_json_snapshot!(simple_hf_config.embedder_options);
let simple_hf_name = name.clone();
let configs = index_scheduler.embedders(configs).unwrap();
let (hf_embedder, _) = configs.get(&simple_hf_name).unwrap();
let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo")).unwrap();
let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo")).unwrap();
let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo")).unwrap();
(fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed)
};
// add one doc, specifying vectors
let doc = serde_json::json!(
{
"id": 0,
"doggo": "Intel",
"breed": "beagle",
"_vectors": {
&fakerest_name: {
// this will never trigger regeneration, which is good because we can't actually generate with
// this embedder
"userProvided": true,
"embeddings": beagle_embed,
},
&simple_hf_name: {
// this will be regenerated on updates
"userProvided": false,
"embeddings": lab_embed,
},
"noise": [0.1, 0.2, 0.3]
}
}
);
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0u128).unwrap();
let documents_count = read_json(doc.to_string().as_bytes(), &mut file).unwrap();
assert_eq!(documents_count, 1);
file.persist().unwrap();
index_scheduler
.register(
KindWithContent::DocumentAdditionOrUpdate {
index_uid: S("doggos"),
primary_key: Some(S("id")),
method: UpdateDocuments,
content_file: uuid,
documents_count,
allow_index_creation: true,
},
None,
false,
)
.unwrap();
index_scheduler.assert_internally_consistent();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after adding Intel");
handle.advance_one_successful_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "adding Intel succeeds");
// check embeddings
{
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let embeddings = index.embeddings(&rtxn, 0).unwrap();
assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true");
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let doc = obkv_to_json(
&[
fields_ids_map.id("doggo").unwrap(),
fields_ids_map.id("breed").unwrap(),
fields_ids_map.id("_vectors").unwrap(),
],
&fields_ids_map,
doc,
)
.unwrap();
assert_json_snapshot!(doc, {"._vectors.A_fakerest.embeddings" => "[vector]"});
}
// update the doc, specifying vectors
let doc = serde_json::json!(
{
"id": 0,
"doggo": "kefir",
"breed": "patou",
}
);
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(1u128).unwrap();
let documents_count = read_json(doc.to_string().as_bytes(), &mut file).unwrap();
assert_eq!(documents_count, 1);
file.persist().unwrap();
index_scheduler
.register(
KindWithContent::DocumentAdditionOrUpdate {
index_uid: S("doggos"),
primary_key: None,
method: UpdateDocuments,
content_file: uuid,
documents_count,
allow_index_creation: true,
},
None,
false,
)
.unwrap();
index_scheduler.assert_internally_consistent();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir");
handle.advance_one_successful_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir succeeds");
{
// check embeddings
{
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let embeddings = index.embeddings(&rtxn, 0).unwrap();
// automatically changed to patou
assert_json_snapshot!(embeddings[&simple_hf_name][0] == patou_embed, @"true");
// remained beagle because set to userProvided
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let doc = obkv_to_json(
&[
fields_ids_map.id("doggo").unwrap(),
fields_ids_map.id("breed").unwrap(),
fields_ids_map.id("_vectors").unwrap(),
],
&fields_ids_map,
doc,
)
.unwrap();
assert_json_snapshot!(doc, {"._vectors.A_fakerest.embeddings" => "[vector]"});
}
}
}
}

View File

@@ -0,0 +1,19 @@
---
source: index-scheduler/src/lib.rs
expression: doc
---
{
"doggo": "kefir",
"breed": "patou",
"_vectors": {
"A_fakerest": {
"embeddings": "[vector]",
"userProvided": true
},
"noise": [
0.1,
0.2,
0.3
]
}
}

View File

@@ -0,0 +1,20 @@
---
source: index-scheduler/src/lib.rs
expression: task.details
---
{
"embedders": {
"A_fakerest": {
"source": "rest",
"apiKey": "MyXXXX...",
"dimensions": 384,
"url": "http://localhost:7777"
},
"B_small_hf": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.doggo}} the {{doc.breed}} best doggo"
}
}
}

View File

@@ -0,0 +1,23 @@
---
source: index-scheduler/src/lib.rs
expression: fakerest_config.embedder_options
---
{
"Rest": {
"api_key": "My super secret",
"distribution": null,
"dimensions": 384,
"url": "http://localhost:7777",
"query": null,
"input_field": [
"input"
],
"path_to_embeddings": [
"data"
],
"embedding_object": [
"embedding"
],
"input_type": "text"
}
}

View File

@@ -0,0 +1,11 @@
---
source: index-scheduler/src/lib.rs
expression: simple_hf_config.embedder_options
---
{
"HuggingFace": {
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"distribution": null
}
}

View File

@@ -0,0 +1,19 @@
---
source: index-scheduler/src/lib.rs
expression: doc
---
{
"doggo": "Intel",
"breed": "beagle",
"_vectors": {
"A_fakerest": {
"embeddings": "[vector]",
"userProvided": true
},
"noise": [
0.1,
0.2,
0.3
]
}
}

View File

@@ -0,0 +1,20 @@
---
source: index-scheduler/src/lib.rs
expression: task.details
---
{
"embedders": {
"A_fakerest": {
"source": "rest",
"apiKey": "MyXXXX...",
"dimensions": 384,
"url": "http://localhost:7777"
},
"B_small_hf": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.doggo}} the {{doc.breed}} best doggo"
}
}
}

View File

@@ -0,0 +1,49 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,1,2,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,2,]
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,2,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 1, field_distribution: {"_vectors": 1, "breed": 1, "doggo": 1, "id": 1} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -0,0 +1,48 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [2,]
succeeded [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,2,]
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,2,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 1, field_distribution: {"_vectors": 1, "breed": 1, "doggo": 1, "id": 1} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000001
----------------------------------------------------------------------

View File

@@ -0,0 +1,45 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 1, field_distribution: {"_vectors": 1, "breed": 1, "doggo": 1, "id": 1} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -0,0 +1,44 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [1,]
succeeded [0,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 0, field_distribution: {} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@@ -0,0 +1,36 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,]
----------------------------------------------------------------------
### Kind:
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -0,0 +1,40 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,]
----------------------------------------------------------------------
### Kind:
"settingsUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
doggos: { number_of_documents: 0, field_distribution: {} }
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -272,9 +272,9 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
}
for index_uid in index_uids {
if index_uid == swap.0 {
*index_uid = swap.1.to_owned();
swap.1.clone_into(index_uid);
} else if index_uid == swap.1 {
*index_uid = swap.0.to_owned();
swap.0.clone_into(index_uid);
}
}
}

View File

@@ -49,7 +49,7 @@ pub fn open_auth_store_env(path: &Path) -> milli::heed::Result<milli::heed::Env>
let mut options = EnvOpenOptions::new();
options.map_size(AUTH_STORE_SIZE); // 1GB
options.max_dbs(2);
options.open(path)
unsafe { options.open(path) }
}
impl HeedAuthStore {

View File

@@ -26,7 +26,7 @@ pub type DeserrQueryParamError<C = BadRequest> = DeserrError<DeserrQueryParam, C
/// A request deserialization error.
///
/// The first generic paramater is a marker type describing the format of the request: either json (e.g. [`DeserrJson`] or [`DeserrQueryParam`]).
/// The first generic parameter is a marker type describing the format of the request: either json (e.g. [`DeserrJson`] or [`DeserrQueryParam`]).
/// The second generic parameter is the default error code for the deserialization error, in case it is not given.
pub struct DeserrError<Format, C: Default + ErrorCode> {
pub msg: String,

View File

@@ -384,7 +384,6 @@ impl ErrorCode for milli::Error {
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
UserError::InvalidVectorsMapType { .. } => Code::InvalidVectorsType,
UserError::InvalidVectorsType { .. } => Code::InvalidVectorsType,
UserError::TooManyVectors(_, _) => Code::TooManyVectors,
UserError::SortError(_) => Code::InvalidSearchSort,
UserError::InvalidMinTypoWordLenSetting(_, _) => {
@@ -423,7 +422,6 @@ impl ErrorCode for HeedError {
HeedError::Mdb(_)
| HeedError::Encoding(_)
| HeedError::Decoding(_)
| HeedError::InvalidDatabaseTyping
| HeedError::DatabaseClosing
| HeedError::BadOpenOptions { .. } => Code::Internal,
}

View File

@@ -6,7 +6,6 @@ pub struct RuntimeTogglableFeatures {
pub vector_store: bool,
pub metrics: bool,
pub logs_route: bool,
pub export_puffin_reports: bool,
}
#[derive(Default, Debug, Clone, Copy)]

View File

@@ -67,7 +67,6 @@ permissive-json-pointer = { path = "../permissive-json-pointer" }
pin-project-lite = "0.2.13"
platform-dirs = "0.3.0"
prometheus = { version = "0.13.3", features = ["process"] }
puffin = { version = "0.16.0", features = ["serialization"] }
rand = "0.8.5"
rayon = "1.8.0"
regex = "1.10.2"
@@ -75,7 +74,7 @@ reqwest = { version = "0.11.23", features = [
"rustls-tls",
"json",
], default-features = false }
rustls = "0.21.6"
rustls = "0.21.12"
rustls-pemfile = "1.0.2"
segment = { version = "0.2.3", optional = true }
serde = { version = "1.0.195", features = ["derive"] }

View File

@@ -59,10 +59,12 @@ where
let request_path = req.path();
let is_registered_resource = req.resource_map().has_resource(request_path);
if is_registered_resource {
let request_pattern = req.match_pattern();
let metric_path = request_pattern.as_ref().map_or(request_path, String::as_str);
let request_method = req.method().to_string();
histogram_timer = Some(
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
.with_label_values(&[&request_method, request_path])
.with_label_values(&[&request_method, metric_path])
.start_timer(),
);
}

View File

@@ -47,8 +47,6 @@ pub struct RuntimeTogglableFeatures {
pub metrics: Option<bool>,
#[deserr(default)]
pub logs_route: Option<bool>,
#[deserr(default)]
pub export_puffin_reports: Option<bool>,
}
async fn patch_features(
@@ -68,21 +66,13 @@ async fn patch_features(
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route),
export_puffin_reports: new_features
.0
.export_puffin_reports
.unwrap_or(old_features.export_puffin_reports),
};
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
// the it renames to camelCase, which we don't want for analytics.
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
let meilisearch_types::features::RuntimeTogglableFeatures {
vector_store,
metrics,
logs_route,
export_puffin_reports,
} = new_features;
let meilisearch_types::features::RuntimeTogglableFeatures { vector_store, metrics, logs_route } =
new_features;
analytics.publish(
"Experimental features Updated".to_string(),
@@ -90,7 +80,6 @@ async fn patch_features(
"vector_store": vector_store,
"metrics": metrics,
"logs_route": logs_route,
"export_puffin_reports": export_puffin_reports,
}),
Some(&req),
);

View File

@@ -730,7 +730,7 @@ pub fn perform_search(
let mut ids = BTreeSet::new();
for attr in attrs {
if attr == "*" {
ids = displayed_ids.clone();
ids.clone_from(&displayed_ids);
break;
}

View File

@@ -85,8 +85,13 @@ impl SearchQueue {
},
search_request = receive_new_searches.recv() => {
// this unwrap is safe because we're sure the `SearchQueue` still lives somewhere in actix-web
let search_request = search_request.unwrap();
let search_request = match search_request {
Some(search_request) => search_request,
// This should never happen while actix-web is running, but it's not a reason to crash
// and it can generate a lot of noise in the tests.
None => continue,
};
if searches_running < usize::from(parallelism) && queue.is_empty() {
searches_running += 1;
// if the search requests die it's not a hard error on our side

View File

@@ -1859,8 +1859,7 @@ async fn import_dump_v6_containing_experimental_features() {
{
"vectorStore": false,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
"logsRoute": false
}
"###);

View File

@@ -20,8 +20,7 @@ async fn experimental_features() {
{
"vectorStore": false,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
"logsRoute": false
}
"###);
@@ -32,8 +31,7 @@ async fn experimental_features() {
{
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
"logsRoute": false
}
"###);
@@ -44,8 +42,7 @@ async fn experimental_features() {
{
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
"logsRoute": false
}
"###);
@@ -57,8 +54,7 @@ async fn experimental_features() {
{
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
"logsRoute": false
}
"###);
@@ -70,8 +66,7 @@ async fn experimental_features() {
{
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
"logsRoute": false
}
"###);
}
@@ -90,8 +85,7 @@ async fn experimental_feature_metrics() {
{
"vectorStore": false,
"metrics": true,
"logsRoute": false,
"exportPuffinReports": false
"logsRoute": false
}
"###);
@@ -146,7 +140,7 @@ async fn errors() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `exportPuffinReports`",
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"

View File

@@ -5,7 +5,10 @@ use crate::common::index::Index;
use crate::common::{Server, Value};
use crate::json;
async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
async fn index_with_documents_user_provided<'a>(
server: &'a Server,
documents: &Value,
) -> Index<'a> {
let index = server.index("test");
let (response, code) = server.set_features(json!({"vectorStore": true})).await;
@@ -15,8 +18,7 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Inde
{
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
"logsRoute": false
}
"###);
@@ -34,7 +36,38 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Inde
index
}
static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
async fn index_with_documents_hf<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
let index = server.index("test");
let (response, code) = server.set_features(json!({"vectorStore": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false
}
"###);
let (response, code) = index
.update_settings(json!({ "embedders": {"default": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.title}}, {{doc.desc}}"
}}} ))
.await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await;
let (response, code) = index.add_documents(documents.clone(), None).await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await;
index
}
static SIMPLE_SEARCH_DOCUMENTS_VEC: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Shazam!",
@@ -56,7 +89,7 @@ static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
}])
});
static SINGLE_DOCUMENT: Lazy<Value> = Lazy::new(|| {
static SINGLE_DOCUMENT_VEC: Lazy<Value> = Lazy::new(|| {
json!([{
"title": "Shazam!",
"desc": "a Captain Marvel ersatz",
@@ -65,10 +98,29 @@ static SINGLE_DOCUMENT: Lazy<Value> = Lazy::new(|| {
}])
});
static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Shazam!",
"desc": "a Captain Marvel ersatz",
"id": "1",
},
{
"title": "Captain Planet",
"desc": "He's not part of the Marvel Cinematic Universe",
"id": "2",
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "3",
}])
});
#[actix_rt::test]
async fn simple_search() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
let (response, code) = index
.search_post(
@@ -85,8 +137,8 @@ async fn simple_search() {
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.996969696969697},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.996969696969697},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###);
snapshot!(response["semanticHitCount"], @"1");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###);
snapshot!(response["semanticHitCount"], @"2");
let (response, code) = index
.search_post(
@@ -98,10 +150,59 @@ async fn simple_search() {
snapshot!(response["semanticHitCount"], @"3");
}
#[actix_rt::test]
async fn simple_search_hf() {
let server = Server::new().await;
let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
let (response, code) =
index.search_post(json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}})).await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###);
snapshot!(response["semanticHitCount"], @"0");
let (response, code) = index
.search_post(
// disable ranking score as the vectors between architectures are not equal
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.55}, "showRankingScore": false}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###);
snapshot!(response["semanticHitCount"], @"1");
let (response, code) = index
.search_post(
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.8}, "showRankingScore": false}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"}]"###);
snapshot!(response["semanticHitCount"], @"3");
let (response, code) = index
.search_post(
json!({"q": "Movie World", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"}]"###);
snapshot!(response["semanticHitCount"], @"3");
let (response, code) = index
.search_post(
json!({"q": "Wonder replacement", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"}]"###);
snapshot!(response["semanticHitCount"], @"3");
}
#[actix_rt::test]
async fn distribution_shift() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}});
let (response, code) = index.search_post(search.clone()).await;
@@ -133,7 +234,7 @@ async fn distribution_shift() {
#[actix_rt::test]
async fn highlighter() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
let (response, code) = index
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
@@ -184,7 +285,7 @@ async fn highlighter() {
#[actix_rt::test]
async fn invalid_semantic_ratio() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
let (response, code) = index
.search_post(
@@ -256,7 +357,7 @@ async fn invalid_semantic_ratio() {
#[actix_rt::test]
async fn single_document() {
let server = Server::new().await;
let index = index_with_documents(&server, &SINGLE_DOCUMENT).await;
let index = index_with_documents_user_provided(&server, &SINGLE_DOCUMENT_VEC).await;
let (response, code) = index
.search_post(
@@ -272,7 +373,7 @@ async fn single_document() {
#[actix_rt::test]
async fn query_combination() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
// search without query and vector, but with hybrid => still placeholder
let (response, code) = index
@@ -331,7 +432,7 @@ async fn query_combination() {
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.996969696969697},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.996969696969697},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.8848484848484849}]"###);
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9848484848484848},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9242424242424242}]"###);
snapshot!(response["semanticHitCount"], @"null");
// query + vector, no hybrid keyword =>
@@ -374,6 +475,6 @@ async fn query_combination() {
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9848484848484848}]"###);
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9242424242424242}]"###);
snapshot!(response["semanticHitCount"], @"0");
}

View File

@@ -680,6 +680,26 @@ async fn search_facet_distribution() {
},
)
.await;
index.update_settings(json!({"filterableAttributes": ["doggos.name"]})).await;
index.wait_task(5).await;
index
.search(
json!({
"facets": ["doggos.name"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
let dist = response["facetDistribution"].as_object().unwrap();
assert_eq!(dist.len(), 1);
assert_eq!(
dist["doggos.name"],
json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1})
);
},
)
.await;
}
#[actix_rt::test]
@@ -895,9 +915,9 @@ async fn test_score_details() {
"id": "166428",
"_vectors": {
"manual": [
-100,
231,
32
-100.0,
231.0,
32.0
]
},
"_rankingScoreDetails": {
@@ -921,7 +941,7 @@ async fn test_score_details() {
"order": 3,
"attributeRankingOrderScore": 1.0,
"queryWordDistanceScore": 0.8095238095238095,
"score": 0.9727891156462584
"score": 0.8095238095238095
},
"exactness": {
"order": 4,
@@ -1096,9 +1116,9 @@ async fn experimental_feature_vector_store() {
"id": "287947",
"_vectors": {
"manual": [
1,
2,
3
1.0,
2.0,
3.0
]
},
"_rankingScore": 1.0
@@ -1108,9 +1128,9 @@ async fn experimental_feature_vector_store() {
"id": "299537",
"_vectors": {
"manual": [
1,
2,
54
1.0,
2.0,
54.0
]
},
"_rankingScore": 0.9129111766815186
@@ -1120,9 +1140,9 @@ async fn experimental_feature_vector_store() {
"id": "450465",
"_vectors": {
"manual": [
-100,
340,
90
-100.0,
340.0,
90.0
]
},
"_rankingScore": 0.8106412887573242
@@ -1132,9 +1152,9 @@ async fn experimental_feature_vector_store() {
"id": "166428",
"_vectors": {
"manual": [
-100,
231,
32
-100.0,
231.0,
32.0
]
},
"_rankingScore": 0.7412010431289673
@@ -1144,9 +1164,9 @@ async fn experimental_feature_vector_store() {
"id": "522681",
"_vectors": {
"manual": [
10,
-23,
32
10.0,
-23.0,
32.0
]
},
"_rankingScore": 0.6972063183784485
@@ -1405,9 +1425,9 @@ async fn simple_search_with_strange_synonyms() {
"id": "166428",
"_vectors": {
"manual": [
-100,
231,
32
-100.0,
231.0,
32.0
]
}
}
@@ -1426,9 +1446,9 @@ async fn simple_search_with_strange_synonyms() {
"id": "166428",
"_vectors": {
"manual": [
-100,
231,
32
-100.0,
231.0,
32.0
]
}
}
@@ -1447,9 +1467,9 @@ async fn simple_search_with_strange_synonyms() {
"id": "166428",
"_vectors": {
"manual": [
-100,
231,
32
-100.0,
231.0,
32.0
]
}
}

View File

@@ -75,9 +75,9 @@ async fn simple_search_single_index() {
"id": "450465",
"_vectors": {
"manual": [
-100,
340,
90
-100.0,
340.0,
90.0
]
}
}
@@ -96,9 +96,9 @@ async fn simple_search_single_index() {
"id": "299537",
"_vectors": {
"manual": [
1,
2,
54
1.0,
2.0,
54.0
]
}
}
@@ -194,9 +194,9 @@ async fn simple_search_two_indexes() {
"id": "450465",
"_vectors": {
"manual": [
-100,
340,
90
-100.0,
340.0,
90.0
]
}
}
@@ -227,9 +227,9 @@ async fn simple_search_two_indexes() {
"cattos": "pésti",
"_vectors": {
"manual": [
1,
2,
3
1.0,
2.0,
3.0
]
}
},
@@ -249,9 +249,9 @@ async fn simple_search_two_indexes() {
],
"_vectors": {
"manual": [
1,
2,
54
1.0,
2.0,
54.0
]
}
}

View File

@@ -285,10 +285,10 @@ async fn attributes_ranking_rule_order() {
@r###"
[
{
"id": "2"
"id": "1"
},
{
"id": "1"
"id": "2"
}
]
"###

View File

@@ -98,8 +98,7 @@ async fn secrets_are_hidden_in_settings() {
{
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"exportPuffinReports": false
"logsRoute": false
}
"###);

View File

@@ -1,6 +1,5 @@
use std::time::Duration;
use actix_rt::time::sleep;
use meili_snap::{json_string, snapshot};
use meilisearch::option::ScheduleSnapshot;
use meilisearch::Opt;
@@ -53,11 +52,29 @@ async fn perform_snapshot() {
index.load_test_set().await;
server.index("test1").create(Some("prim")).await;
let (task, code) = server.index("test1").create(Some("prim")).await;
meili_snap::snapshot!(code, @"202 Accepted");
index.wait_task(2).await;
index.wait_task(task.uid()).await;
sleep(Duration::from_secs(2)).await;
// wait for the _next task_ to process, aka the snapshot that should be enqueued at some point
println!("waited for the next task to finish");
let now = std::time::Instant::now();
let next_task = task.uid() + 1;
loop {
let (value, code) = index.get_task(next_task).await;
dbg!(&value);
if code != 404 && value["status"].as_str() == Some("succeeded") {
break;
}
if now.elapsed() > Duration::from_secs(30) {
panic!("The snapshot didn't schedule in 30s even though it was supposed to be scheduled every 2s: {}",
serde_json::to_string_pretty(&value).unwrap()
);
}
}
let temp = tempfile::tempdir().unwrap();

View File

@@ -80,9 +80,7 @@ fn main() -> anyhow::Result<()> {
/// Clears the task queue located at `db_path`.
fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
let path = db_path.join("tasks");
let env = EnvOpenOptions::new()
.max_dbs(100)
.open(&path)
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&path) }
.with_context(|| format!("While trying to open {:?}", path.display()))?;
eprintln!("Deleting tasks from the database...");
@@ -129,7 +127,7 @@ fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
}
}
eprintln!("Sucessfully deleted {count} content files from disk!");
eprintln!("Successfully deleted {count} content files from disk!");
Ok(())
}
@@ -193,9 +191,7 @@ fn export_a_dump(
FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?;
let index_scheduler_path = db_path.join("tasks");
let env = EnvOpenOptions::new()
.max_dbs(100)
.open(&index_scheduler_path)
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
eprintln!("Dumping the keys...");

View File

@@ -30,7 +30,7 @@ grenad = { version = "0.4.6", default-features = false, features = [
"rayon",
"tempfile",
] }
heed = { version = "0.20.0-alpha.9", default-features = false, features = [
heed = { version = "0.20.1", default-features = false, features = [
"serde-json",
"serde-bincode",
"read-txn-no-tls",
@@ -67,9 +67,6 @@ filter-parser = { path = "../filter-parser" }
# documents words self-join
itertools = "0.11.0"
# profiling
puffin = "0.16.0"
csv = "1.3.0"
candle-core = { version = "0.4.1" }
candle-transformers = { version = "0.4.1" }
@@ -82,10 +79,10 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
] }
tiktoken-rs = "0.5.8"
liquid = "0.26.4"
arroy = "0.2.0"
arroy = "0.3.1"
rand = "0.8.5"
tracing = "0.1.40"
ureq = { version = "2.9.6", features = ["json"] }
ureq = { version = "2.9.7", features = ["json"] }
url = "2.5.0"
[dev-dependencies]

View File

@@ -48,7 +48,7 @@ fn main() -> Result<(), Box<dyn Error>> {
let start = Instant::now();
let mut ctx = SearchContext::new(&index, &txn);
let mut ctx = SearchContext::new(&index, &txn)?;
let universe = filtered_universe(&ctx, &None)?;
let docs = execute_search(

3
milli/fuzz/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
target
corpus
artifacts

View File

@@ -203,7 +203,7 @@ fn parse_csv_header(header: &str) -> (&str, AllowedType) {
"string" => (field_name, AllowedType::String),
"boolean" => (field_name, AllowedType::Boolean),
"number" => (field_name, AllowedType::Number),
// if the pattern isn't reconized, we keep the whole field.
// if the pattern isn't recognized, we keep the whole field.
_otherwise => (header, AllowedType::String),
},
None => (header, AllowedType::String),

View File

@@ -32,6 +32,8 @@ pub enum InternalError {
DatabaseClosing,
#[error("Missing {} in the {db_name} database.", key.unwrap_or("key"))]
DatabaseMissingEntry { db_name: &'static str, key: Option<&'static str> },
#[error("Missing {key} in the fieldids weights mapping.")]
FieldidsWeightsMapMissingEntry { key: FieldId },
#[error(transparent)]
FieldIdMapMissingEntry(#[from] FieldIdMapMissingEntry),
#[error("Missing {key} in the field id mapping.")]
@@ -46,8 +48,6 @@ pub enum InternalError {
GrenadInvalidFormatVersion,
#[error("Invalid merge while processing {process}")]
IndexingMergingKeys { process: &'static str },
#[error("{}", HeedError::InvalidDatabaseTyping)]
InvalidDatabaseTyping,
#[error(transparent)]
RayonThreadPool(#[from] ThreadPoolBuildError),
#[error(transparent)]
@@ -117,10 +117,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
InvalidGeoField(#[from] GeoError),
#[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)]
InvalidVectorDimensions { expected: usize, found: usize },
#[error("The `_vectors.{subfield}` field in the document with id: `{document_id}` is not an array. Was expecting an array of floats or an array of arrays of floats but instead got `{value}`.")]
InvalidVectorsType { document_id: Value, value: Value, subfield: String },
#[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")]
InvalidVectorsMapType { document_id: Value, value: Value },
InvalidVectorsMapType { document_id: String, value: Value },
#[error("{0}")]
InvalidFilter(String),
#[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))]
@@ -427,7 +425,6 @@ impl From<HeedError> for Error {
// TODO use the encoding
HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })),
HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })),
HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping),
HeedError::DatabaseClosing => InternalError(DatabaseClosing),
HeedError::BadOpenOptions { .. } => UserError(InvalidLmdbOpenOptions),
}

View File

@@ -0,0 +1,48 @@
//! The fieldids weights map is in charge of storing linking the searchable fields with their weights.
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use crate::{FieldId, FieldsIdsMap, Weight};
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct FieldidsWeightsMap {
map: HashMap<FieldId, Weight>,
}
impl FieldidsWeightsMap {
/// Insert a field id -> weigth into the map.
/// If the map did not have this key present, `None` is returned.
/// If the map did have this key present, the value is updated, and the old value is returned.
pub fn insert(&mut self, fid: FieldId, weight: Weight) -> Option<Weight> {
self.map.insert(fid, weight)
}
/// Create the map from the fields ids maps.
/// Should only be called in the case there are NO searchable attributes.
/// All the fields will be inserted in the order of the fields ids map with a weight of 0.
pub fn from_field_id_map_without_searchable(fid_map: &FieldsIdsMap) -> Self {
FieldidsWeightsMap { map: fid_map.ids().map(|fid| (fid, 0)).collect() }
}
/// Removes a field id from the map, returning the associated weight previously in the map.
pub fn remove(&mut self, fid: FieldId) -> Option<Weight> {
self.map.remove(&fid)
}
/// Returns weight corresponding to the key.
pub fn weight(&self, fid: FieldId) -> Option<Weight> {
self.map.get(&fid).copied()
}
/// Returns highest weight contained in the map if any.
pub fn max_weight(&self) -> Option<Weight> {
self.map.values().copied().max()
}
/// Return an iterator visiting all field ids in arbitrary order.
pub fn ids(&self) -> impl Iterator<Item = FieldId> + '_ {
self.map.keys().copied()
}
}

View File

@@ -195,7 +195,7 @@ mod tests {
fn merge_cbo_roaring_bitmaps() {
let mut buffer = Vec::new();
let small_data = vec![
let small_data = [
RoaringBitmap::from_sorted_iter(1..4).unwrap(),
RoaringBitmap::from_sorted_iter(2..5).unwrap(),
RoaringBitmap::from_sorted_iter(4..6).unwrap(),
@@ -209,7 +209,7 @@ mod tests {
let expected = RoaringBitmap::from_sorted_iter(1..6).unwrap();
assert_eq!(bitmap, expected);
let medium_data = vec![
let medium_data = [
RoaringBitmap::from_sorted_iter(1..4).unwrap(),
RoaringBitmap::from_sorted_iter(2..5).unwrap(),
RoaringBitmap::from_sorted_iter(4..8).unwrap(),

View File

@@ -1,5 +1,6 @@
use std::borrow::Cow;
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::convert::TryInto;
use std::fs::File;
use std::path::Path;
@@ -22,11 +23,12 @@ use crate::heed_codec::{
};
use crate::order_by_map::OrderByMap;
use crate::proximity::ProximityPrecision;
use crate::vector::EmbeddingConfig;
use crate::vector::{Embedding, EmbeddingConfig};
use crate::{
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32, BEU64,
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
FieldidsWeightsMap, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec,
Search, U8StrStrCodec, Weight, BEU16, BEU32, BEU64,
};
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
@@ -42,6 +44,7 @@ pub mod main_key {
pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
pub const FIELDIDS_WEIGHTS_MAP_KEY: &str = "fieldids-weights-map";
pub const GEO_FACETED_DOCUMENTS_IDS_KEY: &str = "geo-faceted-documents-ids";
pub const GEO_RTREE_KEY: &str = "geo-rtree";
pub const PRIMARY_KEY_KEY: &str = "primary-key";
@@ -181,7 +184,7 @@ impl Index {
options.max_dbs(25);
let env = options.open(path)?;
let env = unsafe { options.open(path) }?;
let mut wtxn = env.write_txn()?;
let main = env.database_options().name(MAIN).create(&mut wtxn)?;
let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?;
@@ -291,6 +294,11 @@ impl Index {
self.env.read_txn()
}
/// Create a static read transaction to be able to read the index without keeping a reference to it.
pub fn static_read_txn(&self) -> heed::Result<RoTxn<'static>> {
self.env.clone().static_read_txn()
}
/// Returns the canonicalized path where the heed `Env` of this `Index` lives.
pub fn path(&self) -> &Path {
self.env.path()
@@ -414,6 +422,65 @@ impl Index {
.unwrap_or_default())
}
/* fieldids weights map */
// This maps the fields ids to their weights.
// Their weights is defined by the ordering of the searchable attributes.
/// Writes the fieldids weights map which associates the field ids to their weights
pub(crate) fn put_fieldids_weights_map(
&self,
wtxn: &mut RwTxn,
map: &FieldidsWeightsMap,
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeJson<_>>().put(
wtxn,
main_key::FIELDIDS_WEIGHTS_MAP_KEY,
map,
)
}
/// Get the fieldids weights map which associates the field ids to their weights
pub fn fieldids_weights_map(&self, rtxn: &RoTxn) -> heed::Result<FieldidsWeightsMap> {
self.main
.remap_types::<Str, SerdeJson<_>>()
.get(rtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)?
.map(Ok)
.unwrap_or_else(|| {
Ok(FieldidsWeightsMap::from_field_id_map_without_searchable(
&self.fields_ids_map(rtxn)?,
))
})
}
/// Delete the fieldsids weights map
pub fn delete_fieldids_weights_map(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)
}
pub fn searchable_fields_and_weights<'a>(
&self,
rtxn: &'a RoTxn,
) -> Result<Vec<(Cow<'a, str>, FieldId, Weight)>> {
let fid_map = self.fields_ids_map(rtxn)?;
let weight_map = self.fieldids_weights_map(rtxn)?;
let searchable = self.searchable_fields(rtxn)?;
searchable
.into_iter()
.map(|field| -> Result<_> {
let fid = fid_map.id(&field).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
field_name: field.to_string(),
process: "searchable_fields_and_weights",
})?;
let weight = weight_map
.weight(fid)
.ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?;
Ok((field, fid, weight))
})
.collect()
}
/* geo rtree */
/// Writes the provided `rtree` which associates coordinates to documents ids.
@@ -578,33 +645,42 @@ impl Index {
wtxn: &mut RwTxn,
user_fields: &[&str],
fields_ids_map: &FieldsIdsMap,
) -> heed::Result<()> {
) -> Result<()> {
// We can write the user defined searchable fields as-is.
self.put_user_defined_searchable_fields(wtxn, user_fields)?;
let mut weights = FieldidsWeightsMap::default();
// Now we generate the real searchable fields:
// 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion.
// 2. Iterate over the user defined searchable fields.
// 3. If a user defined field is a subset of a field defined in the fields_ids_map
// (ie doggo.name is a subset of doggo) then we push it at the end of the fields.
let mut real_fields = user_fields.to_vec();
// (ie doggo.name is a subset of doggo) right after doggo and with the same weight.
let mut real_fields = Vec::new();
for field_from_map in fields_ids_map.names() {
for user_field in user_fields {
for (id, field_from_map) in fields_ids_map.iter() {
for (weight, user_field) in user_fields.iter().enumerate() {
if crate::is_faceted_by(field_from_map, user_field)
&& !user_fields.contains(&field_from_map)
&& !real_fields.contains(&field_from_map)
{
real_fields.push(field_from_map);
let weight: u16 =
weight.try_into().map_err(|_| UserError::AttributeLimitReached)?;
weights.insert(id, weight);
}
}
}
self.put_searchable_fields(wtxn, &real_fields)
self.put_searchable_fields(wtxn, &real_fields)?;
self.put_fieldids_weights_map(wtxn, &weights)?;
Ok(())
}
pub(crate) fn delete_all_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
let did_delete_searchable = self.delete_searchable_fields(wtxn)?;
let did_delete_user_defined = self.delete_user_defined_searchable_fields(wtxn)?;
self.delete_fieldids_weights_map(wtxn)?;
Ok(did_delete_searchable || did_delete_user_defined)
}
@@ -623,28 +699,31 @@ impl Index {
}
/// Returns the searchable fields, those are the fields that are indexed,
/// if the searchable fields aren't there it means that **all** the fields are indexed.
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Vec<Cow<'t, str>>> {
self.main
.remap_types::<Str, SerdeBincode<Vec<&'t str>>>()
.get(rtxn, main_key::SEARCHABLE_FIELDS_KEY)
.get(rtxn, main_key::SEARCHABLE_FIELDS_KEY)?
.map(|fields| Ok(fields.into_iter().map(Cow::Borrowed).collect()))
.unwrap_or_else(|| {
Ok(self
.fields_ids_map(rtxn)?
.names()
.map(|field| Cow::Owned(field.to_string()))
.collect())
})
}
/// Identical to `searchable_fields`, but returns the ids instead.
pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> Result<Option<Vec<FieldId>>> {
match self.searchable_fields(rtxn)? {
Some(fields) => {
let fields_ids_map = self.fields_ids_map(rtxn)?;
let mut fields_ids = Vec::new();
for name in fields {
if let Some(field_id) = fields_ids_map.id(name) {
fields_ids.push(field_id);
}
}
Ok(Some(fields_ids))
pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> Result<Vec<FieldId>> {
let fields = self.searchable_fields(rtxn)?;
let fields_ids_map = self.fields_ids_map(rtxn)?;
let mut fields_ids = Vec::new();
for name in fields {
if let Some(field_id) = fields_ids_map.id(&name) {
fields_ids.push(field_id);
}
None => Ok(None),
}
Ok(fields_ids)
}
/// Writes the searchable fields, when this list is specified, only these are indexed.
@@ -1527,6 +1606,44 @@ impl Index {
pub(crate) fn delete_search_cutoff(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEARCH_CUTOFF)
}
pub fn embeddings(
&self,
rtxn: &RoTxn<'_>,
docid: DocumentId,
) -> Result<BTreeMap<String, Vec<Embedding>>> {
let mut res = BTreeMap::new();
for row in self.embedder_category_id.iter(rtxn)? {
let (embedder_name, embedder_id) = row?;
let embedder_id = (embedder_id as u16) << 8;
let mut embeddings = Vec::new();
'vectors: for i in 0..=u8::MAX {
let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy)
.map(Some)
.or_else(|e| match e {
arroy::Error::MissingMetadata => Ok(None),
e => Err(e),
})
.transpose();
let Some(reader) = reader else {
break 'vectors;
};
let embedding = reader?.item_vector(rtxn, docid)?;
if let Some(embedding) = embedding {
embeddings.push(embedding)
} else {
break 'vectors;
}
}
if !embeddings.is_empty() {
res.insert(embedder_name.to_owned(), embeddings);
}
}
Ok(res)
}
}
#[cfg(test)]
@@ -1710,10 +1827,14 @@ pub(crate) mod tests {
]))
.unwrap();
db_snap!(index, field_distribution, 1);
db_snap!(index, field_distribution, @r###"
age 1 |
id 2 |
name 2 |
"###);
db_snap!(index, word_docids,
@r###"
@r###"
1 [0, ]
2 [1, ]
20 [1, ]
@@ -1722,18 +1843,6 @@ pub(crate) mod tests {
"###
);
db_snap!(index, field_distribution);
db_snap!(index, field_distribution,
@r###"
age 1 |
id 2 |
name 2 |
"###
);
// snapshot_index!(&index, "1", include: "^field_distribution$");
// we add all the documents a second time. we are supposed to get the same
// field_distribution in the end
index
@@ -1820,7 +1929,7 @@ pub(crate) mod tests {
// ensure we get the right real searchable fields + user defined searchable fields
let rtxn = index.read_txn().unwrap();
let real = index.searchable_fields(&rtxn).unwrap().unwrap();
let real = index.searchable_fields(&rtxn).unwrap();
assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]);
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
@@ -1840,7 +1949,7 @@ pub(crate) mod tests {
// ensure we get the right real searchable fields + user defined searchable fields
let rtxn = index.read_txn().unwrap();
let real = index.searchable_fields(&rtxn).unwrap().unwrap();
let real = index.searchable_fields(&rtxn).unwrap();
assert_eq!(real, &["doggo", "name"]);
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
assert_eq!(user_defined, &["doggo", "name"]);
@@ -1856,7 +1965,7 @@ pub(crate) mod tests {
// ensure we get the right real searchable fields + user defined searchable fields
let rtxn = index.read_txn().unwrap();
let real = index.searchable_fields(&rtxn).unwrap().unwrap();
let real = index.searchable_fields(&rtxn).unwrap();
assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]);
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
@@ -2395,6 +2504,14 @@ pub(crate) mod tests {
11 0
4 1
"###);
db_snap!(index, fields_ids_map, @r###"
0 primary_key |
"###);
db_snap!(index, searchable_fields, @r###"["primary_key"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
"###);
index
.add_documents(documents!([
@@ -2410,6 +2527,16 @@ pub(crate) mod tests {
11 0
4 1
"###);
db_snap!(index, fields_ids_map, @r###"
0 primary_key |
1 a |
"###);
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
1 0 |
"###);
index.delete_documents(Default::default());
@@ -2420,6 +2547,16 @@ pub(crate) mod tests {
11 0
4 1
"###);
db_snap!(index, fields_ids_map, @r###"
0 primary_key |
1 a |
"###);
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
1 0 |
"###);
index
.add_documents(documents!([
@@ -2435,6 +2572,16 @@ pub(crate) mod tests {
11 0
4 1
"###);
db_snap!(index, fields_ids_map, @r###"
0 primary_key |
1 a |
"###);
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
1 0 |
"###);
let rtxn = index.read_txn().unwrap();
let search = Search::new(&rtxn, &index);
@@ -2520,4 +2667,104 @@ pub(crate) mod tests {
db_snap!(index, geo_faceted_documents_ids); // ensure that no documents were inserted
}
#[test]
fn swapping_searchable_attributes() {
// See https://github.com/meilisearch/meilisearch/issues/4484
let index = TempIndex::new();
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("name")]);
settings.set_filterable_fields(HashSet::from([S("age")]));
})
.unwrap();
index
.add_documents(documents!({ "id": 1, "name": "Many", "age": 28, "realName": "Maxime" }))
.unwrap();
db_snap!(index, fields_ids_map, @r###"
0 name |
1 id |
2 age |
3 realName |
"###);
db_snap!(index, searchable_fields, @r###"["name"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
"###);
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("name"), S("realName")]);
settings.set_filterable_fields(HashSet::from([S("age")]));
})
.unwrap();
// The order of the field id map shouldn't change
db_snap!(index, fields_ids_map, @r###"
0 name |
1 id |
2 age |
3 realName |
"###);
db_snap!(index, searchable_fields, @r###"["name", "realName"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
3 1 |
"###);
}
#[test]
fn attribute_weights_after_swapping_searchable_attributes() {
// See https://github.com/meilisearch/meilisearch/issues/4484
let index = TempIndex::new();
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("name"), S("beverage")]);
})
.unwrap();
index
.add_documents(documents!([
{ "id": 0, "name": "kefir", "beverage": "water" },
{ "id": 1, "name": "tamo", "beverage": "kefir" }
]))
.unwrap();
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
let results = search.query("kefir").execute().unwrap();
// We should find kefir the dog first
insta::assert_debug_snapshot!(results.documents_ids, @r###"
[
0,
1,
]
"###);
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("beverage"), S("name")]);
})
.unwrap();
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
let results = search.query("kefir").execute().unwrap();
// We should find tamo first
insta::assert_debug_snapshot!(results.documents_ids, @r###"
[
1,
0,
]
"###);
}
}

View File

@@ -28,6 +28,7 @@ pub mod vector;
#[cfg(test)]
#[macro_use]
pub mod snapshot_tests;
mod fieldids_weights_map;
use std::collections::{BTreeMap, HashMap};
use std::convert::{TryFrom, TryInto};
@@ -52,6 +53,7 @@ pub use self::error::{
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
};
pub use self::external_documents_ids::ExternalDocumentsIds;
pub use self::fieldids_weights_map::FieldidsWeightsMap;
pub use self::fields_ids_map::FieldsIdsMap;
pub use self::heed_codec::{
BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
@@ -77,6 +79,7 @@ pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;
pub type FieldDistribution = BTreeMap<String, u64>;
pub type FieldId = u16;
pub type Weight = u16;
pub type Object = serde_json::Map<String, serde_json::Value>;
pub type Position = u32;
pub type RelativePosition = u16;
@@ -351,43 +354,13 @@ pub fn is_faceted(field: &str, faceted_fields: impl IntoIterator<Item = impl AsR
/// assert!(!is_faceted_by("animaux.chien", "animaux.chie"));
/// ```
pub fn is_faceted_by(field: &str, facet: &str) -> bool {
field.starts_with(facet)
&& field[facet.len()..].chars().next().map(|c| c == '.').unwrap_or(true)
field.starts_with(facet) && field[facet.len()..].chars().next().map_or(true, |c| c == '.')
}
pub fn normalize_facet(original: &str) -> String {
CompatibilityDecompositionNormalizer.normalize_str(original.trim()).to_lowercase()
}
/// Represents either a vector or an array of multiple vectors.
#[derive(serde::Serialize, serde::Deserialize, Debug)]
#[serde(transparent)]
pub struct VectorOrArrayOfVectors {
#[serde(with = "either::serde_untagged_optional")]
inner: Option<either::Either<Vec<f32>, Vec<Vec<f32>>>>,
}
impl VectorOrArrayOfVectors {
pub fn into_array_of_vectors(self) -> Option<Vec<Vec<f32>>> {
match self.inner? {
either::Either::Left(vector) => Some(vec![vector]),
either::Either::Right(vectors) => Some(vectors),
}
}
}
/// Normalize a vector by dividing the dimensions by the length of it.
pub fn normalize_vector(mut vector: Vec<f32>) -> Vec<f32> {
let squared: f32 = vector.iter().map(|x| x * x).sum();
let length = squared.sqrt();
if length <= f32::EPSILON {
vector
} else {
vector.iter_mut().for_each(|x| *x /= length);
vector
}
}
#[cfg(test)]
mod tests {
use serde_json::json;

View File

@@ -147,7 +147,7 @@ impl<'a> Search<'a> {
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
if has_vector_search {
let ctx = SearchContext::new(self.index, self.rtxn);
let ctx = SearchContext::new(self.index, self.rtxn)?;
filtered_universe(&ctx, &self.filter)
} else {
Ok(self.execute()?.candidates)
@@ -155,10 +155,10 @@ impl<'a> Search<'a> {
}
pub fn execute(&self) -> Result<SearchResult> {
let mut ctx = SearchContext::new(self.index, self.rtxn);
let mut ctx = SearchContext::new(self.index, self.rtxn)?;
if let Some(searchable_attributes) = self.searchable_attributes {
ctx.searchable_attributes(searchable_attributes)?;
ctx.attributes_to_search_on(searchable_attributes)?;
}
let universe = filtered_universe(&ctx, &self.filter)?;

View File

@@ -101,7 +101,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
let mut ranking_rule_universes: Vec<RoaringBitmap> =
vec![RoaringBitmap::default(); ranking_rules_len];
ranking_rule_universes[0] = universe.clone();
ranking_rule_universes[0].clone_from(universe);
let mut cur_ranking_rule_index = 0;
/// Finish iterating over the current ranking rule, yielding
@@ -232,7 +232,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
}
cur_ranking_rule_index += 1;
ranking_rule_universes[cur_ranking_rule_index] = next_bucket.candidates.clone();
ranking_rule_universes[cur_ranking_rule_index].clone_from(&next_bucket.candidates);
logger.start_iteration_ranking_rule(
cur_ranking_rule_index,
ranking_rules[cur_ranking_rule_index].as_ref(),

View File

@@ -163,7 +163,7 @@ impl<'ctx> SearchContext<'ctx> {
Some(restricted_fids) => {
let interned = self.word_interner.get(word).as_str();
let keys: Vec<_> =
restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect();
restricted_fids.tolerant.iter().map(|(fid, _)| (interned, *fid)).collect();
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
self.txn,
@@ -192,7 +192,7 @@ impl<'ctx> SearchContext<'ctx> {
Some(restricted_fids) => {
let interned = self.word_interner.get(word).as_str();
let keys: Vec<_> =
restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect();
restricted_fids.exact.iter().map(|(fid, _)| (interned, *fid)).collect();
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
self.txn,
@@ -242,7 +242,7 @@ impl<'ctx> SearchContext<'ctx> {
Some(restricted_fids) => {
let interned = self.word_interner.get(prefix).as_str();
let keys: Vec<_> =
restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect();
restricted_fids.tolerant.iter().map(|(fid, _)| (interned, *fid)).collect();
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
self.txn,
@@ -271,7 +271,7 @@ impl<'ctx> SearchContext<'ctx> {
Some(restricted_fids) => {
let interned = self.word_interner.get(prefix).as_str();
let keys: Vec<_> =
restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect();
restricted_fids.exact.iter().map(|(fid, _)| (interned, *fid)).collect();
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
self.txn,
@@ -315,11 +315,7 @@ impl<'ctx> SearchContext<'ctx> {
.map_err(heed::Error::Decoding)?
} else {
// Compute the distance at the attribute level and store it in the cache.
let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
fids
} else {
self.index.fields_ids_map(self.txn)?.ids().collect()
};
let fids = self.index.searchable_fields_ids(self.txn)?;
let mut docids = RoaringBitmap::new();
for fid in fids {
// for each field, intersect left word bitmap and right word bitmap,
@@ -408,11 +404,7 @@ impl<'ctx> SearchContext<'ctx> {
let prefix_docids = match proximity_precision {
ProximityPrecision::ByAttribute => {
// Compute the distance at the attribute level and store it in the cache.
let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
fids
} else {
self.index.fields_ids_map(self.txn)?.ids().collect()
};
let fids = self.index.searchable_fields_ids(self.txn)?;
let mut prefix_docids = RoaringBitmap::new();
// for each field, intersect left word bitmap and right word bitmap,
// then merge the result in a global bitmap before storing it in the cache.

View File

@@ -184,13 +184,7 @@ impl State {
return Ok(State::Empty(query_graph.clone()));
}
let searchable_fields_ids = {
if let Some(fids) = ctx.index.searchable_fields_ids(ctx.txn)? {
fids
} else {
ctx.index.fields_ids_map(ctx.txn)?.ids().collect()
}
};
let searchable_fields_ids = ctx.index.searchable_fields_ids(ctx.txn)?;
let mut candidates_per_attribute = Vec::with_capacity(searchable_fields_ids.len());
// then check that there exists at least one attribute that has all of the terms

View File

@@ -42,7 +42,7 @@ fn facet_number_values<'a>(
}
/// Define the strategy used by the geo sort.
/// The paramater represents the cache size, and, in the case of the Dynamic strategy,
/// The parameter represents the cache size, and, in the case of the Dynamic strategy,
/// the point where we move from using the iterative strategy to the rtree.
#[derive(Debug, Clone, Copy)]
pub enum Strategy {

View File

@@ -258,7 +258,7 @@ pub(crate) mod tests {
fn matching_words() {
let temp_index = temp_index_with_documents();
let rtxn = temp_index.read_txn().unwrap();
let mut ctx = SearchContext::new(&temp_index, &rtxn);
let mut ctx = SearchContext::new(&temp_index, &rtxn).unwrap();
let mut builder = TokenizerBuilder::default();
let tokenizer = builder.build();
let tokens = tokenizer.tokenize("split this world");

View File

@@ -134,7 +134,7 @@ impl<'t> Matcher<'t, '_> {
for (token_position, word_position, word) in words_positions {
partial = match partial.match_token(word) {
// token matches the partial match, but the match is not full,
// we temporarly save the current token then we try to match the next one.
// we temporarily save the current token then we try to match the next one.
Some(MatchType::Partial(partial)) => {
potential_matches.push((token_position, word_position, partial.char_len()));
partial
@@ -506,7 +506,7 @@ mod tests {
impl<'a> MatcherBuilder<'a> {
fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
let mut ctx = SearchContext::new(index, rtxn);
let mut ctx = SearchContext::new(index, rtxn).unwrap();
let universe = filtered_universe(&ctx, &None).unwrap();
let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
&mut ctx,
@@ -722,7 +722,7 @@ mod tests {
@"…void void void void void split the world void void"
);
// Text containing matches with diferent density.
// Text containing matches with different density.
let text = "split void the void void world void void void void void void void void void void split the world void void";
let mut matcher = builder.build(text);
// crop should return 10 last words with a marker at the start.

View File

@@ -49,13 +49,12 @@ pub use self::geo_sort::Strategy as GeoSortStrategy;
use self::graph_based_ranking_rule::Words;
use self::interner::Interned;
use self::vector_sort::VectorSort;
use crate::error::FieldIdMapMissingEntry;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::apply_distinct_rule;
use crate::vector::Embedder;
use crate::{
AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget,
UserError,
UserError, Weight,
};
/// A structure used throughout the execution of a search query.
@@ -71,8 +70,21 @@ pub struct SearchContext<'ctx> {
}
impl<'ctx> SearchContext<'ctx> {
pub fn new(index: &'ctx Index, txn: &'ctx RoTxn<'ctx>) -> Self {
Self {
pub fn new(index: &'ctx Index, txn: &'ctx RoTxn<'ctx>) -> Result<Self> {
let searchable_fids = index.searchable_fields_and_weights(txn)?;
let exact_attributes_ids = index.exact_attributes_ids(txn)?;
let mut exact = Vec::new();
let mut tolerant = Vec::new();
for (_name, fid, weight) in searchable_fids {
if exact_attributes_ids.contains(&fid) {
exact.push((fid, weight));
} else {
tolerant.push((fid, weight));
}
}
Ok(Self {
index,
txn,
db_cache: <_>::default(),
@@ -81,42 +93,39 @@ impl<'ctx> SearchContext<'ctx> {
term_interner: <_>::default(),
phrase_docids: <_>::default(),
restricted_fids: None,
}
})
}
pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> {
let fids_map = self.index.fields_ids_map(self.txn)?;
let searchable_names = self.index.searchable_fields(self.txn)?;
pub fn attributes_to_search_on(
&mut self,
attributes_to_search_on: &'ctx [String],
) -> Result<()> {
let user_defined_searchable = self.index.user_defined_searchable_fields(self.txn)?;
let searchable_fields_weights = self.index.searchable_fields_and_weights(self.txn)?;
let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?;
let mut wildcard = false;
let mut restricted_fids = RestrictedFids::default();
let mut contains_wildcard = false;
for field_name in searchable_attributes {
for field_name in attributes_to_search_on {
if field_name == "*" {
contains_wildcard = true;
wildcard = true;
// we cannot early exit as we want to returns error in case of unknown fields
continue;
}
let searchable_contains_name =
searchable_names.as_ref().map(|sn| sn.iter().any(|name| name == field_name));
let fid = match (fids_map.id(field_name), searchable_contains_name) {
let searchable_weight =
searchable_fields_weights.iter().find(|(name, _, _)| name == field_name);
let (fid, weight) = match searchable_weight {
// The Field id exist and the field is searchable
(Some(fid), Some(true)) | (Some(fid), None) => fid,
// The field is searchable but the Field id doesn't exist => Internal Error
(None, Some(true)) => {
return Err(FieldIdMapMissingEntry::FieldName {
field_name: field_name.to_string(),
process: "search",
}
.into())
}
// The field is not searchable, but the searchableAttributes are set to * => ignore field
(None, None) => continue,
Some((_name, fid, weight)) => (*fid, *weight),
// The field is not searchable but the user didn't define any searchable attributes
None if user_defined_searchable.is_none() => continue,
// The field is not searchable => User error
(_fid, Some(false)) => {
let (valid_fields, hidden_fields) = match searchable_names {
Some(sn) => self.index.remove_hidden_fields(self.txn, sn)?,
None => self.index.remove_hidden_fields(self.txn, fids_map.names())?,
};
None => {
let (valid_fields, hidden_fields) = self.index.remove_hidden_fields(
self.txn,
searchable_fields_weights.iter().map(|(name, _, _)| name),
)?;
let field = field_name.to_string();
return Err(UserError::InvalidSearchableAttribute {
@@ -129,13 +138,17 @@ impl<'ctx> SearchContext<'ctx> {
};
if exact_attributes_ids.contains(&fid) {
restricted_fids.exact.push(fid);
restricted_fids.exact.push((fid, weight));
} else {
restricted_fids.tolerant.push(fid);
restricted_fids.tolerant.push((fid, weight));
};
}
self.restricted_fids = (!contains_wildcard).then_some(restricted_fids);
if wildcard {
self.restricted_fids = None;
} else {
self.restricted_fids = Some(restricted_fids);
}
Ok(())
}
@@ -158,13 +171,13 @@ impl Word {
#[derive(Debug, Clone, Default)]
pub struct RestrictedFids {
pub tolerant: Vec<FieldId>,
pub exact: Vec<FieldId>,
pub tolerant: Vec<(FieldId, Weight)>,
pub exact: Vec<(FieldId, Weight)>,
}
impl RestrictedFids {
pub fn contains(&self, fid: &FieldId) -> bool {
self.tolerant.contains(fid) || self.exact.contains(fid)
self.tolerant.iter().any(|(id, _)| id == fid) || self.exact.iter().any(|(id, _)| id == fid)
}
}

View File

@@ -119,7 +119,7 @@ pub fn located_query_terms_from_tokens(
if let Some(located_query_term) = phrase.build(ctx) {
// as we are evaluating a negative operator we put the phrase
// in the negative one *but* we don't reset the negative operator
// as we are immediatly starting a new negative phrase.
// as we are immediately starting a new negative phrase.
if negative_phrase {
negative_phrases.push(located_query_term);
} else {
@@ -366,7 +366,7 @@ mod tests {
let tokens = tokenizer.tokenize(".");
let index = temp_index_with_documents();
let rtxn = index.read_txn()?;
let mut ctx = SearchContext::new(&index, &rtxn);
let mut ctx = SearchContext::new(&index, &rtxn)?;
// panics with `attempt to add with overflow` before <https://github.com/meilisearch/meilisearch/issues/3785>
let ExtractedTokens { query_terms, .. } =
located_query_terms_from_tokens(&mut ctx, tokens, None)?;

View File

@@ -7,12 +7,12 @@ use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id;
use crate::search::new::SearchContext;
use crate::Result;
use crate::{FieldId, InternalError, Result};
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct FidCondition {
term: LocatedQueryTermSubset,
fid: u16,
fid: Option<FieldId>,
}
pub enum FidGraph {}
@@ -26,13 +26,15 @@ impl RankingRuleGraphTrait for FidGraph {
universe: &RoaringBitmap,
) -> Result<ComputedCondition> {
let FidCondition { term, .. } = condition;
// maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument
let mut docids = compute_query_term_subset_docids_within_field_id(
ctx,
&term.term_subset,
condition.fid,
)?;
docids &= universe;
let docids = if let Some(fid) = condition.fid {
// maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument
let docids =
compute_query_term_subset_docids_within_field_id(ctx, &term.term_subset, fid)?;
docids & universe
} else {
RoaringBitmap::new()
};
Ok(ComputedCondition {
docids,
@@ -68,34 +70,29 @@ impl RankingRuleGraphTrait for FidGraph {
all_fields.extend(fields);
}
let weights_map = ctx.index.fieldids_weights_map(ctx.txn)?;
let mut edges = vec![];
for fid in all_fields.iter().copied() {
let weight = weights_map
.weight(fid)
.ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?;
edges.push((
fid as u32 * term.term_ids.len() as u32,
conditions_interner.insert(FidCondition { term: term.clone(), fid }),
weight as u32 * term.term_ids.len() as u32,
conditions_interner.insert(FidCondition { term: term.clone(), fid: Some(fid) }),
));
}
// always lookup the max_fid if we don't already and add an artificial condition for max scoring
let max_fid: Option<u16> = {
if let Some(max_fid) = ctx
.index
.searchable_fields_ids(ctx.txn)?
.map(|field_ids| field_ids.into_iter().max())
{
max_fid
} else {
ctx.index.fields_ids_map(ctx.txn)?.ids().max()
}
};
let max_weight: Option<u16> = weights_map.max_weight();
if let Some(max_fid) = max_fid {
if !all_fields.contains(&max_fid) {
if let Some(max_weight) = max_weight {
if !all_fields.contains(&max_weight) {
edges.push((
max_fid as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
max_weight as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
conditions_interner.insert(FidCondition {
term: term.clone(), // TODO remove this ugly clone
fid: max_fid,
fid: None,
}),
));
}

View File

@@ -1,5 +1,5 @@
use crate::index::tests::TempIndex;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{db_snap, Criterion, Search, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@@ -131,6 +131,19 @@ fn test_attribute_fid_simple() {
#[test]
fn test_attribute_fid_ngrams() {
let index = create_index();
db_snap!(index, fields_ids_map, @r###"
0 id |
1 title |
2 description |
3 plot |
"###);
db_snap!(index, searchable_fields, @r###"["title", "description", "plot"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
1 0 |
2 1 |
3 2 |
"###);
let txn = index.read_txn().unwrap();

View File

@@ -0,0 +1,244 @@
---
source: milli/src/search/new/tests/attribute_fid.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
2,
[
Fid(
Rank {
rank: 19,
max_rank: 19,
},
),
Position(
Rank {
rank: 91,
max_rank: 91,
},
),
],
),
(
6,
[
Fid(
Rank {
rank: 15,
max_rank: 19,
},
),
Position(
Rank {
rank: 81,
max_rank: 91,
},
),
],
),
(
5,
[
Fid(
Rank {
rank: 14,
max_rank: 19,
},
),
Position(
Rank {
rank: 79,
max_rank: 91,
},
),
],
),
(
4,
[
Fid(
Rank {
rank: 13,
max_rank: 19,
},
),
Position(
Rank {
rank: 77,
max_rank: 91,
},
),
],
),
(
3,
[
Fid(
Rank {
rank: 12,
max_rank: 19,
},
),
Position(
Rank {
rank: 83,
max_rank: 91,
},
),
],
),
(
9,
[
Fid(
Rank {
rank: 11,
max_rank: 19,
},
),
Position(
Rank {
rank: 75,
max_rank: 91,
},
),
],
),
(
8,
[
Fid(
Rank {
rank: 10,
max_rank: 19,
},
),
Position(
Rank {
rank: 79,
max_rank: 91,
},
),
],
),
(
7,
[
Fid(
Rank {
rank: 10,
max_rank: 19,
},
),
Position(
Rank {
rank: 73,
max_rank: 91,
},
),
],
),
(
11,
[
Fid(
Rank {
rank: 7,
max_rank: 19,
},
),
Position(
Rank {
rank: 77,
max_rank: 91,
},
),
],
),
(
10,
[
Fid(
Rank {
rank: 6,
max_rank: 19,
},
),
Position(
Rank {
rank: 81,
max_rank: 91,
},
),
],
),
(
13,
[
Fid(
Rank {
rank: 6,
max_rank: 19,
},
),
Position(
Rank {
rank: 81,
max_rank: 91,
},
),
],
),
(
12,
[
Fid(
Rank {
rank: 6,
max_rank: 19,
},
),
Position(
Rank {
rank: 78,
max_rank: 91,
},
),
],
),
(
14,
[
Fid(
Rank {
rank: 5,
max_rank: 19,
},
),
Position(
Rank {
rank: 75,
max_rank: 91,
},
),
],
),
(
0,
[
Fid(
Rank {
rank: 1,
max_rank: 19,
},
),
Position(
Rank {
rank: 91,
max_rank: 91,
},
),
],
),
]

View File

@@ -308,6 +308,25 @@ pub fn snap_fields_ids_map(index: &Index) -> String {
}
snap
}
pub fn snap_fieldids_weights_map(index: &Index) -> String {
let rtxn = index.read_txn().unwrap();
let weights_map = index.fieldids_weights_map(&rtxn).unwrap();
let mut snap = String::new();
writeln!(&mut snap, "fid weight").unwrap();
let mut field_ids: Vec<_> = weights_map.ids().collect();
field_ids.sort();
for field_id in field_ids {
let weight = weights_map.weight(field_id).unwrap();
writeln!(&mut snap, "{field_id:<3} {weight:<3} |").unwrap();
}
snap
}
pub fn snap_searchable_fields(index: &Index) -> String {
let rtxn = index.read_txn().unwrap();
let searchable_fields = index.searchable_fields(&rtxn).unwrap();
format!("{searchable_fields:?}")
}
pub fn snap_geo_faceted_documents_ids(index: &Index) -> String {
let rtxn = index.read_txn().unwrap();
let geo_faceted_documents_ids = index.geo_faceted_documents_ids(&rtxn).unwrap();
@@ -469,6 +488,12 @@ macro_rules! full_snap_of_db {
($index:ident, fields_ids_map) => {{
$crate::snapshot_tests::snap_fields_ids_map(&$index)
}};
($index:ident, fieldids_weights_map) => {{
$crate::snapshot_tests::snap_fieldids_weights_map(&$index)
}};
($index:ident, searchable_fields) => {{
$crate::snapshot_tests::snap_searchable_fields(&$index)
}};
($index:ident, geo_faceted_documents_ids) => {{
$crate::snapshot_tests::snap_geo_faceted_documents_ids(&$index)
}};

View File

@@ -21,8 +21,6 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
name = "clear_documents"
)]
pub fn execute(self) -> Result<u64> {
puffin::profile_function!();
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
let Index {
env: _env,

View File

@@ -499,7 +499,7 @@ impl FacetsUpdateIncrementalInner {
ModificationResult::Expand | ModificationResult::Reduce { .. }
)
{
// if any modification occured, insert it in the database.
// if any modification occurred, insert it in the database.
self.db.put(txn, &insertion_key.as_ref(), &updated_value)?;
Ok(insertion_key_modification)
} else {

View File

@@ -379,7 +379,7 @@ pub(crate) mod test_helpers {
let mut options = heed::EnvOpenOptions::new();
let options = options.map_size(4096 * 4 * 1000 * 100);
let tempdir = tempfile::TempDir::new().unwrap();
let env = options.open(tempdir.path()).unwrap();
let env = unsafe { options.open(tempdir.path()) }.unwrap();
let mut wtxn = env.write_txn().unwrap();
let content = env.create_database(&mut wtxn, None).unwrap();
wtxn.commit().unwrap();

View File

@@ -29,8 +29,6 @@ pub fn enrich_documents_batch<R: Read + Seek>(
autogenerate_docids: bool,
reader: DocumentsBatchReader<R>,
) -> Result<StdResult<EnrichedDocumentsBatchReader<R>, UserError>> {
puffin::profile_function!();
let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index();
let mut external_ids = tempfile::tempfile().map(BufWriter::new).map(grenad::Writer::new)?;

View File

@@ -29,8 +29,6 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
settings_diff: &InnerIndexSettingsDiff,
max_positions_per_attributes: Option<u32>,
) -> Result<(grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
puffin::profile_function!();
let max_positions_per_attributes = max_positions_per_attributes
.map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
let max_memory = indexer.max_memory_by_thread();
@@ -186,7 +184,7 @@ fn searchable_fields_changed(
) -> bool {
let searchable_fields = &settings_diff.new.searchable_fields_ids;
for (field_id, field_bytes) in obkv.iter() {
if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) {
if searchable_fields.contains(&field_id) {
let del_add = KvReaderDelAdd::new(field_bytes);
match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) {
// if both fields are None, check the next field.
@@ -298,7 +296,7 @@ fn lang_safe_tokens_from_document<'a>(
/// Extract words mapped with their positions of a document.
fn tokens_from_document<'a>(
obkv: &KvReader<FieldId>,
searchable_fields: &Option<Vec<FieldId>>,
searchable_fields: &[FieldId],
tokenizer: &Tokenizer,
max_positions_per_attributes: u32,
del_add: DelAdd,
@@ -309,7 +307,7 @@ fn tokens_from_document<'a>(
let mut document_writer = KvWriterU16::new(&mut buffers.obkv_buffer);
for (field_id, field_bytes) in obkv.iter() {
// if field is searchable.
if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) {
if searchable_fields.as_ref().contains(&field_id) {
// extract deletion or addition only.
if let Some(field_bytes) = KvReaderDelAdd::new(field_bytes).get(del_add) {
// parse json.

View File

@@ -23,8 +23,6 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
indexer: GrenadParameters,
_settings_diff: &InnerIndexSettingsDiff,
) -> Result<grenad::Reader<BufReader<File>>> {
puffin::profile_function!();
let max_memory = indexer.max_memory_by_thread();
let mut facet_number_docids_sorter = create_sorter(

View File

@@ -28,8 +28,6 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
indexer: GrenadParameters,
_settings_diff: &InnerIndexSettingsDiff,
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
puffin::profile_function!();
let max_memory = indexer.max_memory_by_thread();
let options = NormalizerOption { lossy: true, ..Default::default() };

View File

@@ -37,7 +37,7 @@ pub struct ExtractedFacetValues {
/// Extracts the facet values of each faceted field of each document.
///
/// Returns the generated grenad reader containing the docid the fid and the orginal value as key
/// Returns the generated grenad reader containing the docid the fid and the original value as key
/// and the normalized value as value extracted from the given chunk of documents.
/// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
@@ -46,8 +46,6 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
indexer: GrenadParameters,
settings_diff: &InnerIndexSettingsDiff,
) -> Result<ExtractedFacetValues> {
puffin::profile_function!();
let max_memory = indexer.max_memory_by_thread();
let mut fid_docid_facet_numbers_sorter = create_sorter(

View File

@@ -26,8 +26,6 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
indexer: GrenadParameters,
_settings_diff: &InnerIndexSettingsDiff,
) -> Result<grenad::Reader<BufReader<File>>> {
puffin::profile_function!();
let max_memory = indexer.max_memory_by_thread();
let mut fid_word_count_docids_sorter = create_sorter(

View File

@@ -21,8 +21,6 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
primary_key_id: FieldId,
settings_diff: &InnerIndexSettingsDiff,
) -> Result<grenad::Reader<BufReader<File>>> {
puffin::profile_function!();
let mut writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,

View File

@@ -10,16 +10,16 @@ use bytemuck::cast_slice;
use grenad::Writer;
use itertools::EitherOrBoth;
use ordered_float::OrderedFloat;
use serde_json::{from_slice, Value};
use serde_json::Value;
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
use crate::error::UserError;
use crate::prompt::Prompt;
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
use crate::update::index_documents::helpers::try_split_at;
use crate::update::settings::InnerIndexSettingsDiff;
use crate::vector::parsed_vectors::{ParsedVectorsDiff, RESERVED_VECTORS_FIELD_NAME};
use crate::vector::Embedder;
use crate::{DocumentId, InternalError, Result, ThreadPoolNoAbort, VectorOrArrayOfVectors};
use crate::{DocumentId, Result, ThreadPoolNoAbort};
/// The length of the elements that are always in the buffer when inserting new values.
const TRUNCATE_SIZE: usize = size_of::<DocumentId>();
@@ -31,6 +31,10 @@ pub struct ExtractedVectorPoints {
pub remove_vectors: grenad::Reader<BufReader<File>>,
// docid -> prompt
pub prompts: grenad::Reader<BufReader<File>>,
// embedder
pub embedder_name: String,
pub embedder: Arc<Embedder>,
}
enum VectorStateDelta {
@@ -65,6 +69,19 @@ impl VectorStateDelta {
}
}
struct EmbedderVectorExtractor {
embedder_name: String,
embedder: Arc<Embedder>,
prompt: Arc<Prompt>,
// (docid, _index) -> KvWriterDelAdd -> Vector
manual_vectors_writer: Writer<BufWriter<File>>,
// (docid) -> (prompt)
prompts_writer: Writer<BufWriter<File>>,
// (docid) -> ()
remove_vectors_writer: Writer<BufWriter<File>>,
}
/// Extracts the embedding vector contained in each document under the `_vectors` field.
///
/// Returns the generated grenad reader containing the docid as key associated to the Vec<f32>
@@ -73,34 +90,52 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters,
settings_diff: &InnerIndexSettingsDiff,
prompt: &Prompt,
embedder_name: &str,
) -> Result<ExtractedVectorPoints> {
puffin::profile_function!();
) -> Result<Vec<ExtractedVectorPoints>> {
let reindex_vectors = settings_diff.reindex_vectors();
let old_fields_ids_map = &settings_diff.old.fields_ids_map;
let new_fields_ids_map = &settings_diff.new.fields_ids_map;
// the vector field id may have changed
let old_vectors_fid = old_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME);
// filter the old vector fid if the settings has been changed forcing reindexing.
let old_vectors_fid = old_vectors_fid.filter(|_| !reindex_vectors);
// (docid, _index) -> KvWriterDelAdd -> Vector
let mut manual_vectors_writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,
tempfile::tempfile()?,
);
let new_vectors_fid = new_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME);
// (docid) -> (prompt)
let mut prompts_writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,
tempfile::tempfile()?,
);
let mut extractors = Vec::new();
for (embedder_name, (embedder, prompt)) in
settings_diff.new.embedding_configs.clone().into_iter()
{
// (docid, _index) -> KvWriterDelAdd -> Vector
let manual_vectors_writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,
tempfile::tempfile()?,
);
// (docid) -> ()
let mut remove_vectors_writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,
tempfile::tempfile()?,
);
// (docid) -> (prompt)
let prompts_writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,
tempfile::tempfile()?,
);
// (docid) -> ()
let remove_vectors_writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,
tempfile::tempfile()?,
);
extractors.push(EmbedderVectorExtractor {
embedder_name,
embedder,
prompt,
manual_vectors_writer,
prompts_writer,
remove_vectors_writer,
});
}
let mut key_buffer = Vec::new();
let mut cursor = obkv_documents.into_cursor()?;
@@ -114,152 +149,138 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
key_buffer.clear();
key_buffer.extend_from_slice(docid_bytes);
// since we only needs the primary key when we throw an error we create this getter to
// since we only need the primary key when we throw an error we create this getter to
// lazily get it when needed
let document_id = || -> Value { from_utf8(external_id_bytes).unwrap().into() };
// the vector field id may have changed
let old_vectors_fid = old_fields_ids_map.id("_vectors");
// filter the old vector fid if the settings has been changed forcing reindexing.
let old_vectors_fid = old_vectors_fid.filter(|_| !settings_diff.reindex_vectors());
let mut parsed_vectors = ParsedVectorsDiff::new(obkv, old_vectors_fid, new_vectors_fid)
.map_err(|error| error.to_crate_error(document_id().to_string()))?;
let new_vectors_fid = new_fields_ids_map.id("_vectors");
let vectors_field = {
let del = old_vectors_fid
.and_then(|vectors_fid| obkv.get(vectors_fid))
.map(KvReaderDelAdd::new)
.map(|obkv| to_vector_map(obkv, DelAdd::Deletion, &document_id))
.transpose()?
.flatten();
let add = new_vectors_fid
.and_then(|vectors_fid| obkv.get(vectors_fid))
.map(KvReaderDelAdd::new)
.map(|obkv| to_vector_map(obkv, DelAdd::Addition, &document_id))
.transpose()?
.flatten();
(del, add)
};
for EmbedderVectorExtractor {
embedder_name,
embedder: _,
prompt,
manual_vectors_writer,
prompts_writer,
remove_vectors_writer,
} in extractors.iter_mut()
{
let delta = match parsed_vectors.remove(embedder_name) {
(Some(old), Some(new)) => {
// no autogeneration
let del_vectors = old.into_array_of_vectors();
let add_vectors = new.into_array_of_vectors();
let (del_map, add_map) = vectors_field;
let del_value = del_map.and_then(|mut map| map.remove(embedder_name));
let add_value = add_map.and_then(|mut map| map.remove(embedder_name));
let delta = match (del_value, add_value) {
(Some(old), Some(new)) => {
// no autogeneration
let del_vectors = extract_vectors(old, document_id, embedder_name)?;
let add_vectors = extract_vectors(new, document_id, embedder_name)?;
if add_vectors.len() > usize::from(u8::MAX) {
return Err(crate::Error::UserError(crate::UserError::TooManyVectors(
document_id().to_string(),
add_vectors.len(),
)));
}
VectorStateDelta::ManualDelta(del_vectors, add_vectors)
}
(Some(_old), None) => {
// Do we keep this document?
let document_is_kept = obkv
.iter()
.map(|(_, deladd)| KvReaderDelAdd::new(deladd))
.any(|deladd| deladd.get(DelAdd::Addition).is_some());
if document_is_kept {
// becomes autogenerated
VectorStateDelta::NowGenerated(prompt.render(
obkv,
DelAdd::Addition,
new_fields_ids_map,
)?)
} else {
VectorStateDelta::NowRemoved
}
}
(None, Some(new)) => {
// was possibly autogenerated, remove all vectors for that document
let add_vectors = extract_vectors(new, document_id, embedder_name)?;
if add_vectors.len() > usize::from(u8::MAX) {
return Err(crate::Error::UserError(crate::UserError::TooManyVectors(
document_id().to_string(),
add_vectors.len(),
)));
}
VectorStateDelta::WasGeneratedNowManual(add_vectors)
}
(None, None) => {
// Do we keep this document?
let document_is_kept = obkv
.iter()
.map(|(_, deladd)| KvReaderDelAdd::new(deladd))
.any(|deladd| deladd.get(DelAdd::Addition).is_some());
if document_is_kept {
// Don't give up if the old prompt was failing
let old_prompt = Some(prompt)
// TODO: this filter works because we erase the vec database when a embedding setting changes.
// When vector pipeline will be optimized, this should be removed.
.filter(|_| !settings_diff.reindex_vectors())
.map(|p| {
p.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or_default()
});
let new_prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?;
if old_prompt.as_ref() != Some(&new_prompt) {
let old_prompt = old_prompt.unwrap_or_default();
tracing::trace!(
"🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}"
);
VectorStateDelta::NowGenerated(new_prompt)
} else {
tracing::trace!("⏭️ Prompt unmodified, skipping");
VectorStateDelta::NoChange
if add_vectors.len() > usize::from(u8::MAX) {
return Err(crate::Error::UserError(crate::UserError::TooManyVectors(
document_id().to_string(),
add_vectors.len(),
)));
}
} else {
VectorStateDelta::NowRemoved
}
}
};
// and we finally push the unique vectors into the writer
push_vectors_diff(
&mut remove_vectors_writer,
&mut prompts_writer,
&mut manual_vectors_writer,
&mut key_buffer,
delta,
settings_diff,
)?;
VectorStateDelta::ManualDelta(del_vectors, add_vectors)
}
(Some(_old), None) => {
// Do we keep this document?
let document_is_kept = obkv
.iter()
.map(|(_, deladd)| KvReaderDelAdd::new(deladd))
.any(|deladd| deladd.get(DelAdd::Addition).is_some());
if document_is_kept {
// becomes autogenerated
VectorStateDelta::NowGenerated(prompt.render(
obkv,
DelAdd::Addition,
new_fields_ids_map,
)?)
} else {
VectorStateDelta::NowRemoved
}
}
(None, Some(new)) => {
// was possibly autogenerated, remove all vectors for that document
let add_vectors = new.into_array_of_vectors();
if add_vectors.len() > usize::from(u8::MAX) {
return Err(crate::Error::UserError(crate::UserError::TooManyVectors(
document_id().to_string(),
add_vectors.len(),
)));
}
VectorStateDelta::WasGeneratedNowManual(add_vectors)
}
(None, None) => {
// Do we keep this document?
let document_is_kept = obkv
.iter()
.map(|(_, deladd)| KvReaderDelAdd::new(deladd))
.any(|deladd| deladd.get(DelAdd::Addition).is_some());
if document_is_kept {
// Don't give up if the old prompt was failing
let old_prompt = Some(&prompt)
// TODO: this filter works because we erase the vec database when a embedding setting changes.
// When vector pipeline will be optimized, this should be removed.
.filter(|_| !settings_diff.reindex_vectors())
.map(|p| {
p.render(obkv, DelAdd::Deletion, old_fields_ids_map)
.unwrap_or_default()
});
let new_prompt =
prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?;
if old_prompt.as_ref() != Some(&new_prompt) {
let old_prompt = old_prompt.unwrap_or_default();
tracing::trace!(
"🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}"
);
VectorStateDelta::NowGenerated(new_prompt)
} else {
tracing::trace!("⏭️ Prompt unmodified, skipping");
VectorStateDelta::NoChange
}
} else {
VectorStateDelta::NowRemoved
}
}
};
// and we finally push the unique vectors into the writer
push_vectors_diff(
remove_vectors_writer,
prompts_writer,
manual_vectors_writer,
&mut key_buffer,
delta,
reindex_vectors,
)?;
}
}
Ok(ExtractedVectorPoints {
// docid, _index -> KvWriterDelAdd -> Vector
manual_vectors: writer_into_reader(manual_vectors_writer)?,
// docid -> ()
remove_vectors: writer_into_reader(remove_vectors_writer)?,
// docid -> prompt
prompts: writer_into_reader(prompts_writer)?,
})
}
let mut results = Vec::new();
fn to_vector_map(
obkv: KvReaderDelAdd,
side: DelAdd,
document_id: &impl Fn() -> Value,
) -> Result<Option<serde_json::Map<String, Value>>> {
Ok(if let Some(value) = obkv.get(side) {
let Ok(value) = from_slice(value) else {
let value = from_slice(value).map_err(InternalError::SerdeJson)?;
return Err(crate::Error::UserError(UserError::InvalidVectorsMapType {
document_id: document_id(),
value,
}));
};
Some(value)
} else {
None
})
for EmbedderVectorExtractor {
embedder_name,
embedder,
prompt: _,
manual_vectors_writer,
prompts_writer,
remove_vectors_writer,
} in extractors
{
results.push(ExtractedVectorPoints {
// docid, _index -> KvWriterDelAdd -> Vector
manual_vectors: writer_into_reader(manual_vectors_writer)?,
// docid -> ()
remove_vectors: writer_into_reader(remove_vectors_writer)?,
// docid -> prompt
prompts: writer_into_reader(prompts_writer)?,
embedder,
embedder_name,
})
}
Ok(results)
}
/// Computes the diff between both Del and Add numbers and
@@ -270,14 +291,13 @@ fn push_vectors_diff(
manual_vectors_writer: &mut Writer<BufWriter<File>>,
key_buffer: &mut Vec<u8>,
delta: VectorStateDelta,
settings_diff: &InnerIndexSettingsDiff,
reindex_vectors: bool,
) -> Result<()> {
puffin::profile_function!();
let (must_remove, prompt, (mut del_vectors, mut add_vectors)) = delta.into_values();
if must_remove
// TODO: the below condition works because we erase the vec database when a embedding setting changes.
// When vector pipeline will be optimized, this should be removed.
&& !settings_diff.reindex_vectors()
&& !reindex_vectors
{
key_buffer.truncate(TRUNCATE_SIZE);
remove_vectors_writer.insert(&key_buffer, [])?;
@@ -308,7 +328,7 @@ fn push_vectors_diff(
EitherOrBoth::Left(vector) => {
// TODO: the below condition works because we erase the vec database when a embedding setting changes.
// When vector pipeline will be optimized, this should be removed.
if !settings_diff.reindex_vectors() {
if !reindex_vectors {
// We insert only the Del part of the Obkv to inform
// that we only want to remove all those vectors.
let mut obkv = KvWriterDelAdd::memory();
@@ -336,26 +356,6 @@ fn compare_vectors(a: &[f32], b: &[f32]) -> Ordering {
a.iter().copied().map(OrderedFloat).cmp(b.iter().copied().map(OrderedFloat))
}
/// Extracts the vectors from a JSON value.
fn extract_vectors(
value: Value,
document_id: impl Fn() -> Value,
name: &str,
) -> Result<Vec<Vec<f32>>> {
// FIXME: ugly clone of the vectors here
match serde_json::from_value(value.clone()) {
Ok(vectors) => {
Ok(VectorOrArrayOfVectors::into_array_of_vectors(vectors).unwrap_or_default())
}
Err(_) => Err(UserError::InvalidVectorsType {
document_id: document_id(),
value,
subfield: name.to_owned(),
}
.into()),
}
}
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_embeddings<R: io::Read + io::Seek>(
// docid, prompt
@@ -364,7 +364,6 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
embedder: Arc<Embedder>,
request_threads: &ThreadPoolNoAbort,
) -> Result<grenad::Reader<BufReader<File>>> {
puffin::profile_function!();
let n_chunks = embedder.chunk_count_hint(); // chunk level parallelism
let n_vectors_per_chunk = embedder.prompt_count_in_chunk_hint(); // number of vectors in a single chunk

View File

@@ -36,8 +36,6 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
grenad::Reader<BufReader<File>>,
grenad::Reader<BufReader<File>>,
)> {
puffin::profile_function!();
let max_memory = indexer.max_memory_by_thread();
let mut word_fid_docids_sorter = create_sorter(
@@ -167,8 +165,6 @@ fn words_into_sorter(
add_words: &BTreeSet<Vec<u8>>,
word_fid_docids_sorter: &mut grenad::Sorter<MergeFn>,
) -> Result<()> {
puffin::profile_function!();
use itertools::merge_join_by;
use itertools::EitherOrBoth::{Both, Left, Right};

View File

@@ -26,7 +26,6 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
indexer: GrenadParameters,
settings_diff: &InnerIndexSettingsDiff,
) -> Result<grenad::Reader<BufReader<File>>> {
puffin::profile_function!();
let any_deletion = settings_diff.old.proximity_precision == ProximityPrecision::ByWord;
let any_addition = settings_diff.new.proximity_precision == ProximityPrecision::ByWord;
@@ -71,8 +70,6 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
// if we change document, we fill the sorter
if current_document_id.map_or(false, |id| id != document_id) {
puffin::profile_scope!("Document into sorter");
// FIXME: span inside of a hot loop might degrade performance and create big reports
let span = tracing::trace_span!(target: "indexing::details", "document_into_sorter");
let _entered = span.enter();
@@ -163,7 +160,6 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
}
if let Some(document_id) = current_document_id {
puffin::profile_scope!("Final document into sorter");
// FIXME: span inside of a hot loop might degrade performance and create big reports
let span = tracing::trace_span!(target: "indexing::details", "final_document_into_sorter");
let _entered = span.enter();
@@ -176,7 +172,6 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
)?;
}
{
puffin::profile_scope!("sorter_into_reader");
// FIXME: span inside of a hot loop might degrade performance and create big reports
let span = tracing::trace_span!(target: "indexing::details", "sorter_into_reader");
let _entered = span.enter();

View File

@@ -25,8 +25,6 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
indexer: GrenadParameters,
_settings_diff: &InnerIndexSettingsDiff,
) -> Result<grenad::Reader<BufReader<File>>> {
puffin::profile_function!();
let max_memory = indexer.max_memory_by_thread();
let mut word_position_docids_sorter = create_sorter(
@@ -104,8 +102,6 @@ fn words_position_into_sorter(
add_word_positions: &BTreeSet<(u16, Vec<u8>)>,
word_position_docids_sorter: &mut grenad::Sorter<MergeFn>,
) -> Result<()> {
puffin::profile_function!();
use itertools::merge_join_by;
use itertools::EitherOrBoth::{Both, Left, Right};

View File

@@ -46,8 +46,6 @@ pub(crate) fn data_from_obkv_documents(
settings_diff: Arc<InnerIndexSettingsDiff>,
max_positions_per_attributes: Option<u32>,
) -> Result<()> {
puffin::profile_function!();
let (original_pipeline_result, flattened_pipeline_result): (Result<_>, Result<_>) = rayon::join(
|| {
original_obkv_chunks
@@ -88,7 +86,6 @@ pub(crate) fn data_from_obkv_documents(
lmdb_writer_sx.clone(),
extract_fid_word_count_docids,
TypedChunk::FieldIdWordCountDocids,
"field-id-wordcount-docids",
);
run_extraction_task::<
_,
@@ -115,7 +112,6 @@ pub(crate) fn data_from_obkv_documents(
word_fid_docids_reader,
}
},
"word-docids",
);
run_extraction_task::<_, _, grenad::Reader<BufReader<File>>>(
@@ -125,7 +121,6 @@ pub(crate) fn data_from_obkv_documents(
lmdb_writer_sx.clone(),
extract_word_position_docids,
TypedChunk::WordPositionDocids,
"word-position-docids",
);
run_extraction_task::<
@@ -139,7 +134,6 @@ pub(crate) fn data_from_obkv_documents(
lmdb_writer_sx.clone(),
extract_facet_string_docids,
TypedChunk::FieldIdFacetStringDocids,
"field-id-facet-string-docids",
);
run_extraction_task::<_, _, grenad::Reader<BufReader<File>>>(
@@ -149,7 +143,6 @@ pub(crate) fn data_from_obkv_documents(
lmdb_writer_sx.clone(),
extract_facet_number_docids,
TypedChunk::FieldIdFacetNumberDocids,
"field-id-facet-number-docids",
);
run_extraction_task::<_, _, grenad::Reader<BufReader<File>>>(
@@ -159,7 +152,6 @@ pub(crate) fn data_from_obkv_documents(
lmdb_writer_sx.clone(),
extract_word_pair_proximity_docids,
TypedChunk::WordPairProximityDocids,
"word-pair-proximity-docids",
);
}
@@ -183,7 +175,6 @@ fn run_extraction_task<FE, FS, M>(
lmdb_writer_sx: Sender<Result<TypedChunk>>,
extract_fn: FE,
serialize_fn: FS,
name: &'static str,
) where
FE: Fn(
grenad::Reader<CursorClonableMmap>,
@@ -201,7 +192,7 @@ fn run_extraction_task<FE, FS, M>(
rayon::spawn(move || {
let child_span = tracing::trace_span!(target: "indexing::extract::details", parent: &current_span, "extract_multiple_chunks");
let _entered = child_span.enter();
puffin::profile_scope!("extract_multiple_chunks", name);
match extract_fn(chunk, indexer, &settings_diff) {
Ok(chunk) => {
let _ = lmdb_writer_sx.send(Ok(serialize_fn(chunk)));
@@ -224,27 +215,31 @@ fn send_original_documents_data(
let original_documents_chunk =
original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
let documents_chunk_cloned = original_documents_chunk.clone();
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
let request_threads = ThreadPoolNoAbortBuilder::new()
.num_threads(crate::vector::REQUEST_PARALLELISM)
.thread_name(|index| format!("embedding-request-{index}"))
.build()?;
if settings_diff.reindex_vectors() || !settings_diff.settings_update_only() {
let index_vectors = (settings_diff.reindex_vectors() || !settings_diff.settings_update_only())
// no point in indexing vectors without embedders
&& (!settings_diff.new.embedding_configs.inner_as_ref().is_empty());
if index_vectors {
let settings_diff = settings_diff.clone();
let original_documents_chunk = original_documents_chunk.clone();
let lmdb_writer_sx = lmdb_writer_sx.clone();
rayon::spawn(move || {
for (name, (embedder, prompt)) in settings_diff.new.embedding_configs.clone() {
let result = extract_vector_points(
documents_chunk_cloned.clone(),
indexer,
&settings_diff,
&prompt,
&name,
);
match result {
Ok(ExtractedVectorPoints { manual_vectors, remove_vectors, prompts }) => {
match extract_vector_points(original_documents_chunk.clone(), indexer, &settings_diff) {
Ok(extracted_vectors) => {
for ExtractedVectorPoints {
manual_vectors,
remove_vectors,
prompts,
embedder_name,
embedder,
} in extracted_vectors
{
let embeddings = match extract_embeddings(
prompts,
indexer,
@@ -253,28 +248,26 @@ fn send_original_documents_data(
) {
Ok(results) => Some(results),
Err(error) => {
let _ = lmdb_writer_sx_cloned.send(Err(error));
let _ = lmdb_writer_sx.send(Err(error));
None
}
};
if !(remove_vectors.is_empty()
&& manual_vectors.is_empty()
&& embeddings.as_ref().map_or(true, |e| e.is_empty()))
{
let _ = lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints {
let _ = lmdb_writer_sx.send(Ok(TypedChunk::VectorPoints {
remove_vectors,
embeddings,
expected_dimension: embedder.dimensions(),
manual_vectors,
embedder_name: name,
embedder_name,
}));
}
}
Err(error) => {
let _ = lmdb_writer_sx_cloned.send(Err(error));
}
}
Err(error) => {
let _ = lmdb_writer_sx.send(Err(error));
}
}
});

View File

@@ -61,7 +61,6 @@ pub fn sorter_into_reader(
sorter: grenad::Sorter<MergeFn>,
indexer: GrenadParameters,
) -> Result<grenad::Reader<BufReader<File>>> {
puffin::profile_function!();
let mut writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,
@@ -182,8 +181,6 @@ where
FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec<u8>) -> Result<Option<&'a [u8]>>,
{
puffin::profile_function!();
let mut buffer = Vec::new();
let database = database.remap_types::<Bytes, Bytes>();

View File

@@ -6,6 +6,7 @@ mod typed_chunk;
use std::collections::{HashMap, HashSet};
use std::io::{Read, Seek};
use std::iter;
use std::num::NonZeroU32;
use std::result::Result as StdResult;
use std::sync::Arc;
@@ -140,8 +141,6 @@ where
mut self,
reader: DocumentsBatchReader<R>,
) -> Result<(Self, StdResult<u64, UserError>)> {
puffin::profile_function!();
// Early return when there is no document to add
if reader.is_empty() {
return Ok((self, Ok(0)));
@@ -186,8 +185,6 @@ where
mut self,
to_delete: Vec<String>,
) -> Result<(Self, StdResult<u64, UserError>)> {
puffin::profile_function!();
// Early return when there is no document to add
if to_delete.is_empty() {
// Maintains Invariant: remove documents actually always returns Ok for the inner result
@@ -222,8 +219,6 @@ where
mut self,
to_delete: &RoaringBitmap,
) -> Result<(Self, u64)> {
puffin::profile_function!();
// Early return when there is no document to add
if to_delete.is_empty() {
return Ok((self, 0));
@@ -248,8 +243,6 @@ where
name = "index_documents"
)]
pub fn execute(mut self) -> Result<DocumentAdditionResult> {
puffin::profile_function!();
if self.added_documents == 0 && self.deleted_documents == 0 {
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents });
@@ -278,8 +271,6 @@ where
FP: Fn(UpdateIndexingStep) + Sync,
FA: Fn() -> bool + Sync,
{
puffin::profile_function!();
let TransformOutput {
primary_key,
mut settings_diff,
@@ -337,7 +328,10 @@ where
let min_chunk_size = 1024 * 512; // 512KiB
// compute the chunk size from the number of available threads and the inputed data size.
let total_size = flattened_documents.metadata().map(|m| m.len());
let total_size = match flattened_documents.as_ref() {
Some(flattened_documents) => flattened_documents.metadata().map(|m| m.len()),
None => Ok(default_chunk_size as u64),
};
let current_num_threads = pool.current_num_threads();
// if we have more than 2 thread, create a number of chunk equal to 3/4 threads count
let chunk_count = if current_num_threads > 2 {
@@ -351,8 +345,14 @@ where
}
};
let original_documents = grenad::Reader::new(original_documents)?;
let flattened_documents = grenad::Reader::new(flattened_documents)?;
let original_documents = match original_documents {
Some(original_documents) => Some(grenad::Reader::new(original_documents)?),
None => None,
};
let flattened_documents = match flattened_documents {
Some(flattened_documents) => Some(grenad::Reader::new(flattened_documents)?),
None => None,
};
let max_positions_per_attributes = self.indexer_config.max_positions_per_attributes;
@@ -371,15 +371,23 @@ where
pool.install(|| {
rayon::spawn(move || {
let child_span = tracing::trace_span!(target: "indexing::details", parent: &current_span, "extract_and_send_grenad_chunks");
let _enter = child_span.enter();
puffin::profile_scope!("extract_and_send_grenad_chunks");
// split obkv file into several chunks
let original_chunk_iter =
grenad_obkv_into_chunks(original_documents, pool_params, documents_chunk_size);
let _enter = child_span.enter();
// split obkv file into several chunks
let flattened_chunk_iter =
grenad_obkv_into_chunks(flattened_documents, pool_params, documents_chunk_size);
let original_chunk_iter = match original_documents {
Some(original_documents) => {
grenad_obkv_into_chunks(original_documents,pool_params,documents_chunk_size).map(either::Left)
},
None => Ok(either::Right(iter::empty())),
};
// split obkv file into several chunks
let flattened_chunk_iter = match flattened_documents {
Some(flattened_documents) => {
grenad_obkv_into_chunks(flattened_documents, pool_params, documents_chunk_size).map(either::Left)
},
None => Ok(either::Right(iter::empty())),
};
let result = original_chunk_iter.and_then(|original_chunk| {
let flattened_chunk = flattened_chunk_iter?;
@@ -533,7 +541,7 @@ where
let writer_index = (embedder_index as u16) << 8;
for k in 0..=u8::MAX {
let writer =
arroy::Writer::new(vector_arroy, writer_index | (k as u16), dimension)?;
arroy::Writer::new(vector_arroy, writer_index | (k as u16), dimension);
if writer.is_empty(wtxn)? {
break;
}
@@ -571,8 +579,6 @@ where
FP: Fn(UpdateIndexingStep) + Sync,
FA: Fn() -> bool + Sync,
{
puffin::profile_function!();
// Merged databases are already been indexed, we start from this count;
let mut databases_seen = MERGED_DATABASE_COUNT;
@@ -616,7 +622,6 @@ where
{
let span = tracing::trace_span!(target: "indexing::details", "compute_prefix_diffs");
let _entered = span.enter();
puffin::profile_scope!("compute_prefix_diffs");
current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;
@@ -756,8 +761,6 @@ fn execute_word_prefix_docids(
common_prefix_fst_words: &[&[String]],
del_prefix_fst_words: &HashSet<Vec<u8>>,
) -> Result<()> {
puffin::profile_function!();
let mut builder = WordPrefixDocids::new(txn, word_docids_db, word_prefix_docids_db);
builder.chunk_compression_type = indexer_config.chunk_compression_type;
builder.chunk_compression_level = indexer_config.chunk_compression_level;
@@ -3237,6 +3240,7 @@ mod tests {
}
#[test]
#[cfg(feature = "all-tokenizations")]
fn stored_detected_script_and_language_should_not_return_deleted_documents() {
use charabia::{Language, Script};
let index = TempIndex::new();

View File

@@ -1,7 +1,7 @@
use std::borrow::Cow;
use std::collections::btree_map::Entry as BEntry;
use std::collections::hash_map::Entry as HEntry;
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::fs::File;
use std::io::{Read, Seek};
@@ -20,21 +20,21 @@ use super::{IndexDocumentsMethod, IndexerConfig};
use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
use crate::error::{Error, InternalError, UserError};
use crate::index::{db_name, main_key};
use crate::update::del_add::{
del_add_from_two_obkvs, into_del_add_obkv, DelAdd, DelAddOperation, KvReaderDelAdd,
};
use crate::update::del_add::{into_del_add_obkv, DelAdd, DelAddOperation, KvReaderDelAdd};
use crate::update::index_documents::GrenadParameters;
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result};
use crate::{
is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
};
pub struct TransformOutput {
pub primary_key: String,
pub settings_diff: InnerIndexSettingsDiff,
pub field_distribution: FieldDistribution,
pub documents_count: usize,
pub original_documents: File,
pub flattened_documents: File,
pub original_documents: Option<File>,
pub flattened_documents: Option<File>,
}
/// Extract the external ids, deduplicate and compute the new internal documents ids
@@ -161,8 +161,6 @@ impl<'a, 'i> Transform<'a, 'i> {
FP: Fn(UpdateIndexingStep) + Sync,
FA: Fn() -> bool + Sync,
{
puffin::profile_function!();
let (mut cursor, fields_index) = reader.into_cursor_and_fields_index();
let external_documents_ids = self.index.external_documents_ids();
let mapping = create_fields_mapping(&mut self.fields_ids_map, &fields_index)?;
@@ -375,8 +373,6 @@ impl<'a, 'i> Transform<'a, 'i> {
where
FA: Fn() -> bool + Sync,
{
puffin::profile_function!();
// there may be duplicates in the documents to remove.
to_remove.sort_unstable();
to_remove.dedup();
@@ -466,8 +462,6 @@ impl<'a, 'i> Transform<'a, 'i> {
where
FA: Fn() -> bool + Sync,
{
puffin::profile_function!();
let mut documents_deleted = 0;
let mut document_sorter_value_buffer = Vec::new();
let mut document_sorter_key_buffer = Vec::new();
@@ -686,8 +680,6 @@ impl<'a, 'i> Transform<'a, 'i> {
where
F: Fn(UpdateIndexingStep) + Sync,
{
puffin::profile_function!();
let primary_key = self
.index
.primary_key(wtxn)?
@@ -808,11 +800,15 @@ impl<'a, 'i> Transform<'a, 'i> {
})?;
let old_inner_settings = InnerIndexSettings::from_index(self.index, wtxn)?;
let fields_ids_map = self.fields_ids_map;
let primary_key_id = self.index.primary_key(wtxn)?.and_then(|name| fields_ids_map.id(name));
let mut new_inner_settings = old_inner_settings.clone();
new_inner_settings.fields_ids_map = self.fields_ids_map;
new_inner_settings.fields_ids_map = fields_ids_map;
let settings_diff = InnerIndexSettingsDiff {
old: old_inner_settings,
new: new_inner_settings,
primary_key_id,
embedding_configs_updated: false,
settings_update_only: false,
};
@@ -822,10 +818,12 @@ impl<'a, 'i> Transform<'a, 'i> {
settings_diff,
field_distribution,
documents_count: self.documents_count,
original_documents: original_documents.into_inner().map_err(|err| err.into_error())?,
flattened_documents: flattened_documents
.into_inner()
.map_err(|err| err.into_error())?,
original_documents: Some(
original_documents.into_inner().map_err(|err| err.into_error())?,
),
flattened_documents: Some(
flattened_documents.into_inner().map_err(|err| err.into_error())?,
),
})
}
@@ -835,34 +833,66 @@ impl<'a, 'i> Transform<'a, 'i> {
fn rebind_existing_document(
old_obkv: KvReader<FieldId>,
settings_diff: &InnerIndexSettingsDiff,
original_obkv_buffer: &mut Vec<u8>,
flattened_obkv_buffer: &mut Vec<u8>,
modified_faceted_fields: &HashSet<String>,
original_obkv_buffer: Option<&mut Vec<u8>>,
flattened_obkv_buffer: Option<&mut Vec<u8>>,
) -> Result<()> {
let mut old_fields_ids_map = settings_diff.old.fields_ids_map.clone();
let mut new_fields_ids_map = settings_diff.new.fields_ids_map.clone();
// Always keep the primary key.
let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) };
// If only the `searchableAttributes` has been changed, keep only the searchable fields.
let must_reindex_searchables = settings_diff.reindex_searchable();
let necessary_searchable_field = |id: FieldId| -> bool {
must_reindex_searchables
&& (settings_diff.old.searchable_fields_ids.contains(&id)
|| settings_diff.new.searchable_fields_ids.contains(&id))
};
// If only a faceted field has been added, keep only this field.
let must_reindex_facets = settings_diff.reindex_facets();
let necessary_faceted_field = |id: FieldId| -> bool {
let field_name = settings_diff.new.fields_ids_map.name(id).unwrap();
must_reindex_facets
&& modified_faceted_fields
.iter()
.any(|long| is_faceted_by(long, field_name) || is_faceted_by(field_name, long))
};
// Alway provide all fields when vectors are involved because
// we need the fields for the prompt/templating.
let reindex_vectors = settings_diff.reindex_vectors();
let mut obkv_writer = KvWriter::<_, FieldId>::memory();
// We iterate over the new `FieldsIdsMap` ids in order and construct the new obkv.
for (id, name) in new_fields_ids_map.iter() {
if let Some(val) = old_fields_ids_map.id(name).and_then(|id| old_obkv.get(id)) {
for (id, val) in old_obkv.iter() {
if is_primary_key(id)
|| necessary_searchable_field(id)
|| necessary_faceted_field(id)
|| reindex_vectors
{
obkv_writer.insert(id, val)?;
}
}
let data = obkv_writer.into_inner()?;
let new_obkv = KvReader::<FieldId>::new(&data);
let obkv = KvReader::<FieldId>::new(&data);
// take the non-flattened version if flatten_from_fields_ids_map returns None.
let old_flattened = Self::flatten_from_fields_ids_map(&old_obkv, &mut old_fields_ids_map)?;
let old_flattened =
old_flattened.as_deref().map_or_else(|| old_obkv, KvReader::<FieldId>::new);
let new_flattened = Self::flatten_from_fields_ids_map(&new_obkv, &mut new_fields_ids_map)?;
let new_flattened =
new_flattened.as_deref().map_or_else(|| new_obkv, KvReader::<FieldId>::new);
if let Some(original_obkv_buffer) = original_obkv_buffer {
original_obkv_buffer.clear();
into_del_add_obkv(obkv, DelAddOperation::DeletionAndAddition, original_obkv_buffer)?;
}
original_obkv_buffer.clear();
flattened_obkv_buffer.clear();
if let Some(flattened_obkv_buffer) = flattened_obkv_buffer {
// take the non-flattened version if flatten_from_fields_ids_map returns None.
let mut fields_ids_map = settings_diff.new.fields_ids_map.clone();
let flattened = Self::flatten_from_fields_ids_map(&obkv, &mut fields_ids_map)?;
let flattened = flattened.as_deref().map_or(obkv, KvReader::new);
del_add_from_two_obkvs(&old_obkv, &new_obkv, original_obkv_buffer)?;
del_add_from_two_obkvs(&old_flattened, &new_flattened, flattened_obkv_buffer)?;
flattened_obkv_buffer.clear();
into_del_add_obkv(
flattened,
DelAddOperation::DeletionAndAddition,
flattened_obkv_buffer,
)?;
}
Ok(())
}
@@ -891,46 +921,63 @@ impl<'a, 'i> Transform<'a, 'i> {
let documents_count = documents_ids.len() as usize;
// We initialize the sorter with the user indexing settings.
let mut original_sorter = create_sorter(
grenad::SortAlgorithm::Stable,
keep_first,
self.indexer_settings.chunk_compression_type,
self.indexer_settings.chunk_compression_level,
self.indexer_settings.max_nb_chunks,
self.indexer_settings.max_memory.map(|mem| mem / 2),
);
let mut original_sorter = if settings_diff.reindex_vectors() {
Some(create_sorter(
grenad::SortAlgorithm::Stable,
keep_first,
self.indexer_settings.chunk_compression_type,
self.indexer_settings.chunk_compression_level,
self.indexer_settings.max_nb_chunks,
self.indexer_settings.max_memory.map(|mem| mem / 2),
))
} else {
None
};
// We initialize the sorter with the user indexing settings.
let mut flattened_sorter = create_sorter(
grenad::SortAlgorithm::Stable,
keep_first,
self.indexer_settings.chunk_compression_type,
self.indexer_settings.chunk_compression_level,
self.indexer_settings.max_nb_chunks,
self.indexer_settings.max_memory.map(|mem| mem / 2),
);
let mut flattened_sorter =
if settings_diff.reindex_searchable() || settings_diff.reindex_facets() {
Some(create_sorter(
grenad::SortAlgorithm::Stable,
keep_first,
self.indexer_settings.chunk_compression_type,
self.indexer_settings.chunk_compression_level,
self.indexer_settings.max_nb_chunks,
self.indexer_settings.max_memory.map(|mem| mem / 2),
))
} else {
None
};
let mut original_obkv_buffer = Vec::new();
let mut flattened_obkv_buffer = Vec::new();
let mut document_sorter_key_buffer = Vec::new();
for result in self.index.external_documents_ids().iter(wtxn)? {
let (external_id, docid) = result?;
let old_obkv = self.index.documents.get(wtxn, &docid)?.ok_or(
InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, key: None },
)?;
if original_sorter.is_some() || flattened_sorter.is_some() {
let modified_faceted_fields = settings_diff.modified_faceted_fields();
let mut original_obkv_buffer = Vec::new();
let mut flattened_obkv_buffer = Vec::new();
let mut document_sorter_key_buffer = Vec::new();
for result in self.index.external_documents_ids().iter(wtxn)? {
let (external_id, docid) = result?;
let old_obkv = self.index.documents.get(wtxn, &docid)?.ok_or(
InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, key: None },
)?;
Self::rebind_existing_document(
old_obkv,
&settings_diff,
&mut original_obkv_buffer,
&mut flattened_obkv_buffer,
)?;
Self::rebind_existing_document(
old_obkv,
&settings_diff,
&modified_faceted_fields,
Some(&mut original_obkv_buffer).filter(|_| original_sorter.is_some()),
Some(&mut flattened_obkv_buffer).filter(|_| flattened_sorter.is_some()),
)?;
document_sorter_key_buffer.clear();
document_sorter_key_buffer.extend_from_slice(&docid.to_be_bytes());
document_sorter_key_buffer.extend_from_slice(external_id.as_bytes());
original_sorter.insert(&document_sorter_key_buffer, &original_obkv_buffer)?;
flattened_sorter.insert(docid.to_be_bytes(), &flattened_obkv_buffer)?;
if let Some(original_sorter) = original_sorter.as_mut() {
document_sorter_key_buffer.clear();
document_sorter_key_buffer.extend_from_slice(&docid.to_be_bytes());
document_sorter_key_buffer.extend_from_slice(external_id.as_bytes());
original_sorter.insert(&document_sorter_key_buffer, &original_obkv_buffer)?;
}
if let Some(flattened_sorter) = flattened_sorter.as_mut() {
flattened_sorter.insert(docid.to_be_bytes(), &flattened_obkv_buffer)?;
}
}
}
let grenad_params = GrenadParameters {
@@ -941,17 +988,22 @@ impl<'a, 'i> Transform<'a, 'i> {
};
// Once we have written all the documents, we merge everything into a Reader.
let original_documents = sorter_into_reader(original_sorter, grenad_params)?;
let flattened_documents = sorter_into_reader(flattened_sorter, grenad_params)?;
let flattened_documents = match flattened_sorter {
Some(flattened_sorter) => Some(sorter_into_reader(flattened_sorter, grenad_params)?),
None => None,
};
let original_documents = match original_sorter {
Some(original_sorter) => Some(sorter_into_reader(original_sorter, grenad_params)?),
None => None,
};
Ok(TransformOutput {
primary_key,
field_distribution,
settings_diff,
documents_count,
original_documents: original_documents.into_inner().into_inner(),
flattened_documents: flattened_documents.into_inner().into_inner(),
original_documents: original_documents.map(|od| od.into_inner().into_inner()),
flattened_documents: flattened_documents.map(|fd| fd.into_inner().into_inner()),
})
}
}

View File

@@ -1,4 +1,4 @@
use std::collections::HashMap;
use std::collections::{BTreeSet, HashMap};
use std::convert::TryInto;
use std::fs::File;
use std::io::{self, BufReader};
@@ -118,65 +118,6 @@ impl TypedChunk {
}
}
impl TypedChunk {
pub fn to_debug_string(&self) -> String {
match self {
TypedChunk::FieldIdDocidFacetStrings(grenad) => {
format!("FieldIdDocidFacetStrings {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::FieldIdDocidFacetNumbers(grenad) => {
format!("FieldIdDocidFacetNumbers {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::Documents(grenad) => {
format!("Documents {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::FieldIdWordCountDocids(grenad) => {
format!("FieldIdWordcountDocids {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::WordDocids {
word_docids_reader,
exact_word_docids_reader,
word_fid_docids_reader,
} => format!(
"WordDocids {{ word_docids_reader: {}, exact_word_docids_reader: {}, word_fid_docids_reader: {} }}",
word_docids_reader.len(),
exact_word_docids_reader.len(),
word_fid_docids_reader.len()
),
TypedChunk::WordPositionDocids(grenad) => {
format!("WordPositionDocids {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::WordPairProximityDocids(grenad) => {
format!("WordPairProximityDocids {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::FieldIdFacetStringDocids((grenad, _)) => {
format!("FieldIdFacetStringDocids {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::FieldIdFacetNumberDocids(grenad) => {
format!("FieldIdFacetNumberDocids {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::FieldIdFacetExistsDocids(grenad) => {
format!("FieldIdFacetExistsDocids {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::FieldIdFacetIsNullDocids(grenad) => {
format!("FieldIdFacetIsNullDocids {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::FieldIdFacetIsEmptyDocids(grenad) => {
format!("FieldIdFacetIsEmptyDocids {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::GeoPoints(grenad) => {
format!("GeoPoints {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::VectorPoints{ remove_vectors, manual_vectors, embeddings, expected_dimension, embedder_name } => {
format!("VectorPoints {{ remove_vectors: {}, manual_vectors: {}, embeddings: {}, dimension: {}, embedder_name: {} }}", remove_vectors.len(), manual_vectors.len(), embeddings.as_ref().map(|e| e.len()).unwrap_or_default(), expected_dimension, embedder_name)
}
TypedChunk::ScriptLanguageDocids(sl_map) => {
format!("ScriptLanguageDocids {{ number_of_entries: {} }}", sl_map.len())
}
}
}
}
/// Write typed chunk in the corresponding LMDB database of the provided index.
/// Return new documents seen.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
@@ -185,14 +126,16 @@ pub(crate) fn write_typed_chunk_into_index(
index: &Index,
wtxn: &mut RwTxn,
) -> Result<(RoaringBitmap, bool)> {
puffin::profile_function!(typed_chunks[0].to_debug_string());
let mut is_merged_database = false;
match typed_chunks[0] {
TypedChunk::Documents(_) => {
let span = tracing::trace_span!(target: "indexing::write_db", "documents");
let _entered = span.enter();
let fields_ids_map = index.fields_ids_map(wtxn)?;
let vectors_fid =
fields_ids_map.id(crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME);
let mut builder = MergerBuilder::new(keep_latest_obkv as MergeFn);
for typed_chunk in typed_chunks {
let TypedChunk::Documents(chunk) = typed_chunk else {
@@ -206,6 +149,10 @@ pub(crate) fn write_typed_chunk_into_index(
let mut docids = index.documents_ids(wtxn)?;
let mut iter = merger.into_stream_merger_iter()?;
let embedders: BTreeSet<_> =
index.embedding_configs(wtxn)?.into_iter().map(|(k, _v)| k).collect();
let mut vectors_buffer = Vec::new();
while let Some((key, reader)) = iter.next()? {
let mut writer: KvWriter<_, FieldId> = KvWriter::memory();
let reader: KvReader<FieldId> = KvReader::new(reader);
@@ -219,7 +166,35 @@ pub(crate) fn write_typed_chunk_into_index(
let del_add_reader = KvReaderDelAdd::new(value);
if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
writer.insert(field_id, addition)?;
let addition = if vectors_fid == Some(field_id) {
'vectors: {
vectors_buffer.clear();
let Ok(mut vectors) =
crate::vector::parsed_vectors::ParsedVectors::from_bytes(
addition,
)
else {
// if the `_vectors` field cannot be parsed as map of vectors, just write it as-is
break 'vectors Some(addition);
};
vectors.retain_user_provided_vectors(&embedders);
let crate::vector::parsed_vectors::ParsedVectors(vectors) = vectors;
if vectors.is_empty() {
// skip writing empty `_vectors` map
break 'vectors None;
}
serde_json::to_writer(&mut vectors_buffer, &vectors)
.map_err(InternalError::SerdeJson)?;
Some(vectors_buffer.as_slice())
}
} else {
Some(addition)
};
if let Some(addition) = addition {
writer.insert(field_id, addition)?;
}
}
}
@@ -661,7 +636,7 @@ pub(crate) fn write_typed_chunk_into_index(
)?;
let writer_index = (embedder_index as u16) << 8;
// FIXME: allow customizing distance
let writers: std::result::Result<Vec<_>, _> = (0..=u8::MAX)
let writers: Vec<_> = (0..=u8::MAX)
.map(|k| {
arroy::Writer::new(
index.vector_arroy,
@@ -670,7 +645,6 @@ pub(crate) fn write_typed_chunk_into_index(
)
})
.collect();
let writers = writers?;
// remove vectors for docids we want them removed
let merger = remove_vectors_builder.build();
@@ -842,7 +816,6 @@ where
FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec<u8>) -> Result<Option<&'a [u8]>>,
{
puffin::profile_function!();
let mut buffer = Vec::new();
let database = database.remap_types::<Bytes, Bytes>();

View File

@@ -398,8 +398,6 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
FP: Fn(UpdateIndexingStep) + Sync,
FA: Fn() -> bool + Sync,
{
puffin::profile_function!();
// if the settings are set before any document update, we don't need to do anything, and
// will set the primary key during the first document addition.
if self.index.number_of_documents(self.wtxn)? == 0 {
@@ -461,50 +459,39 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
Ok(true)
}
/// Updates the index's searchable attributes. This causes the field map to be recomputed to
/// reflect the order of the searchable attributes.
/// Updates the index's searchable attributes.
fn update_searchable(&mut self) -> Result<bool> {
match self.searchable_fields {
Setting::Set(ref fields) => {
// Check to see if the searchable fields changed before doing anything else
let old_fields = self.index.searchable_fields(self.wtxn)?;
let did_change = match old_fields {
// If old_fields is Some, let's check to see if the fields actually changed
Some(old_fields) => {
let new_fields = fields.iter().map(String::as_str).collect::<Vec<_>>();
new_fields != old_fields
}
// If old_fields is None, the fields have changed (because they are being set)
None => true,
let did_change = {
let new_fields = fields.iter().map(String::as_str).collect::<Vec<_>>();
new_fields != old_fields
};
if !did_change {
return Ok(false);
}
// every time the searchable attributes are updated, we need to update the
// ids for any settings that uses the facets. (distinct_fields, filterable_fields).
let old_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
let mut new_fields_ids_map = FieldsIdsMap::new();
// Since we're updating the settings we can only add new fields at the end of the field id map
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
// fields are deduplicated, only the first occurrence is taken into account
let names = fields.iter().unique().map(String::as_str).collect::<Vec<_>>();
// Add all the searchable attributes to the field map, and then add the
// remaining fields from the old field map to the new one
for name in names.iter() {
new_fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
}
for (_, name) in old_fields_ids_map.iter() {
new_fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
// The fields ids map won't change the field id of already present elements thus only the
// new fields will be inserted.
fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
}
self.index.put_all_searchable_fields_from_fields_ids_map(
self.wtxn,
&names,
&new_fields_ids_map,
&fields_ids_map,
)?;
self.index.put_fields_ids_map(self.wtxn, &new_fields_ids_map)?;
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
Ok(true)
}
Setting::Reset => Ok(self.index.delete_all_searchable_fields(self.wtxn)?),
@@ -1078,10 +1065,17 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
// 3. Keep the old vectors but reattempt indexing on a prompt change: only actually changed prompt will need embedding + storage
let embedding_configs_updated = self.update_embedding_configs()?;
let new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn)?;
let mut new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn)?;
new_inner_settings.recompute_facets(self.wtxn, self.index)?;
let primary_key_id = self
.index
.primary_key(self.wtxn)?
.and_then(|name| new_inner_settings.fields_ids_map.id(name));
let inner_settings_diff = InnerIndexSettingsDiff {
old: old_inner_settings,
new: new_inner_settings,
primary_key_id,
embedding_configs_updated,
settings_update_only: true,
};
@@ -1097,10 +1091,9 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
pub struct InnerIndexSettingsDiff {
pub(crate) old: InnerIndexSettings,
pub(crate) new: InnerIndexSettings,
pub(crate) primary_key_id: Option<FieldId>,
// TODO: compare directly the embedders.
pub(crate) embedding_configs_updated: bool,
pub(crate) settings_update_only: bool,
}
@@ -1110,13 +1103,8 @@ impl InnerIndexSettingsDiff {
}
pub fn reindex_searchable(&self) -> bool {
self.old
.fields_ids_map
.iter()
.zip(self.new.fields_ids_map.iter())
.any(|(old, new)| old != new)
|| self.old.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
!= self.new.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
self.old.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
!= self.new.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
|| self.old.allowed_separators != self.new.allowed_separators
|| self.old.dictionary != self.new.dictionary
|| self.old.user_defined_searchable_fields != self.new.user_defined_searchable_fields
@@ -1143,15 +1131,7 @@ impl InnerIndexSettingsDiff {
return true;
}
let faceted_updated =
(existing_fields - old_faceted_fields) != (existing_fields - new_faceted_fields);
self.old
.fields_ids_map
.iter()
.zip(self.new.fields_ids_map.iter())
.any(|(old, new)| old != new)
|| faceted_updated
(existing_fields - old_faceted_fields) != (existing_fields - new_faceted_fields)
}
pub fn reindex_vectors(&self) -> bool {
@@ -1181,7 +1161,7 @@ pub(crate) struct InnerIndexSettings {
pub user_defined_faceted_fields: HashSet<String>,
pub user_defined_searchable_fields: Option<Vec<String>>,
pub faceted_fields_ids: HashSet<FieldId>,
pub searchable_fields_ids: Option<Vec<FieldId>>,
pub searchable_fields_ids: Vec<FieldId>,
pub exact_attributes: HashSet<FieldId>,
pub proximity_precision: ProximityPrecision,
pub embedding_configs: EmbeddingConfigs,
@@ -1262,18 +1242,21 @@ impl InnerIndexSettings {
// find and insert the new field ids
pub fn recompute_searchables(&mut self, wtxn: &mut heed::RwTxn, index: &Index) -> Result<()> {
let searchable_fields = self
.user_defined_searchable_fields
.as_ref()
.map(|searchable| searchable.iter().map(|s| s.as_str()).collect::<Vec<_>>());
// in case new fields were introduced we're going to recreate the searchable fields.
if let Some(searchable_fields) = self.user_defined_searchable_fields.as_ref() {
let searchable_fields =
searchable_fields.iter().map(String::as_ref).collect::<Vec<_>>();
if let Some(searchable_fields) = searchable_fields {
index.put_all_searchable_fields_from_fields_ids_map(
wtxn,
&searchable_fields,
&self.fields_ids_map,
)?;
let searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
self.searchable_fields_ids = searchable_fields_ids;
}
let searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
self.searchable_fields_ids = searchable_fields_ids;
Ok(())
}
@@ -1546,12 +1529,13 @@ mod tests {
use big_s::S;
use heed::types::Bytes;
use maplit::{btreemap, btreeset, hashset};
use meili_snap::snapshot;
use super::*;
use crate::error::Error;
use crate::index::tests::TempIndex;
use crate::update::ClearDocuments;
use crate::{Criterion, Filter, SearchResult};
use crate::{db_snap, Criterion, Filter, SearchResult};
#[test]
fn set_and_reset_searchable_fields() {
@@ -1580,6 +1564,17 @@ mod tests {
wtxn.commit().unwrap();
db_snap!(index, fields_ids_map, @r###"
0 id |
1 name |
2 age |
"###);
db_snap!(index, searchable_fields, @r###"["name"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
1 0 |
"###);
// Check that the searchable field is correctly set to "name" only.
let rtxn = index.read_txn().unwrap();
// When we search for something that is not in
@@ -1591,8 +1586,9 @@ mod tests {
// we must find the appropriate document.
let result = index.search(&rtxn).query(r#""kevin""#).execute().unwrap();
let documents = index.documents(&rtxn, result.documents_ids).unwrap();
let fid_map = index.fields_ids_map(&rtxn).unwrap();
assert_eq!(documents.len(), 1);
assert_eq!(documents[0].1.get(0), Some(&br#""kevin""#[..]));
assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
drop(rtxn);
// We change the searchable fields to be the "name" field only.
@@ -1602,14 +1598,31 @@ mod tests {
})
.unwrap();
db_snap!(index, fields_ids_map, @r###"
0 id |
1 name |
2 age |
"###);
db_snap!(index, searchable_fields, @r###"["id", "name", "age"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
1 0 |
2 0 |
"###);
// Check that the searchable field have been reset and documents are found now.
let rtxn = index.read_txn().unwrap();
let fid_map = index.fields_ids_map(&rtxn).unwrap();
let user_defined_searchable_fields = index.user_defined_searchable_fields(&rtxn).unwrap();
snapshot!(format!("{user_defined_searchable_fields:?}"), @"None");
// the searchable fields should contain all the fields
let searchable_fields = index.searchable_fields(&rtxn).unwrap();
assert_eq!(searchable_fields, None);
snapshot!(format!("{searchable_fields:?}"), @r###"["id", "name", "age"]"###);
let result = index.search(&rtxn).query("23").execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
let documents = index.documents(&rtxn, result.documents_ids).unwrap();
assert_eq!(documents[0].1.get(0), Some(&br#""kevin""#[..]));
assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
}
#[test]

View File

@@ -52,8 +52,6 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
common_prefix_fst_words: &[&[String]],
del_prefix_fst_words: &HashSet<Vec<u8>>,
) -> Result<()> {
puffin::profile_function!();
// It is forbidden to keep a mutable reference into the database
// and write into it at the same time, therefore we write into another file.
let mut prefix_docids_sorter = create_sorter(

View File

@@ -57,7 +57,6 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
common_prefix_fst_words: &[&[String]],
del_prefix_fst_words: &HashSet<Vec<u8>>,
) -> Result<()> {
puffin::profile_function!();
debug!("Computing and writing the word levels integers docids into LMDB on disk...");
let mut prefix_integer_docids_sorter = create_sorter(

View File

@@ -45,8 +45,6 @@ impl<'t, 'i> WordsPrefixesFst<'t, 'i> {
name = "words_prefix_fst"
)]
pub fn execute(self) -> Result<()> {
puffin::profile_function!();
let words_fst = self.index.words_fst(self.wtxn)?;
let mut current_prefix = vec![SmallString32::new(); self.max_prefix_length];

View File

@@ -13,6 +13,7 @@ pub mod error;
pub mod hf;
pub mod manual;
pub mod openai;
pub mod parsed_vectors;
pub mod settings;
pub mod ollama;
@@ -147,6 +148,10 @@ impl EmbeddingConfigs {
self.get(self.get_default_embedder_name())
}
pub fn inner_as_ref(&self) -> &HashMap<String, (Arc<Embedder>, Arc<Prompt>)> {
&self.0
}
/// Get the name of the default embedder configuration.
///
/// The default embedder is determined as follows:

View File

@@ -0,0 +1,207 @@
use std::collections::{BTreeMap, BTreeSet};
use obkv::KvReader;
use serde_json::{from_slice, Value};
use super::Embedding;
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
use crate::{FieldId, InternalError, UserError};
pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors";
#[derive(serde::Serialize, serde::Deserialize, Debug)]
#[serde(untagged)]
pub enum Vectors {
ImplicitlyUserProvided(VectorOrArrayOfVectors),
Explicit(ExplicitVectors),
}
impl Vectors {
pub fn into_array_of_vectors(self) -> Vec<Embedding> {
match self {
Vectors::ImplicitlyUserProvided(embeddings)
| Vectors::Explicit(ExplicitVectors { embeddings, user_provided: _ }) => {
embeddings.into_array_of_vectors().unwrap_or_default()
}
}
}
}
#[derive(serde::Serialize, serde::Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct ExplicitVectors {
pub embeddings: VectorOrArrayOfVectors,
pub user_provided: bool,
}
pub struct ParsedVectorsDiff {
pub old: Option<BTreeMap<String, Vectors>>,
pub new: Option<BTreeMap<String, Vectors>>,
}
impl ParsedVectorsDiff {
pub fn new(
documents_diff: KvReader<'_, FieldId>,
old_vectors_fid: Option<FieldId>,
new_vectors_fid: Option<FieldId>,
) -> Result<Self, Error> {
let old = match old_vectors_fid
.and_then(|vectors_fid| documents_diff.get(vectors_fid))
.map(KvReaderDelAdd::new)
.map(|obkv| to_vector_map(obkv, DelAdd::Deletion))
.transpose()
{
Ok(del) => del,
// ignore wrong shape for old version of documents, use an empty map in this case
Err(Error::InvalidMap(value)) => {
tracing::warn!(%value, "Previous version of the `_vectors` field had a wrong shape");
Default::default()
}
Err(error) => {
return Err(error);
}
}
.flatten();
let new = new_vectors_fid
.and_then(|vectors_fid| documents_diff.get(vectors_fid))
.map(KvReaderDelAdd::new)
.map(|obkv| to_vector_map(obkv, DelAdd::Addition))
.transpose()?
.flatten();
Ok(Self { old, new })
}
pub fn remove(&mut self, embedder_name: &str) -> (Option<Vectors>, Option<Vectors>) {
let old = self.old.as_mut().and_then(|old| old.remove(embedder_name));
let new = self.new.as_mut().and_then(|new| new.remove(embedder_name));
(old, new)
}
}
pub struct ParsedVectors(pub BTreeMap<String, Vectors>);
impl ParsedVectors {
pub fn from_bytes(value: &[u8]) -> Result<Self, Error> {
let Ok(value) = from_slice(value) else {
let value = from_slice(value).map_err(Error::InternalSerdeJson)?;
return Err(Error::InvalidMap(value));
};
Ok(ParsedVectors(value))
}
pub fn retain_user_provided_vectors(&mut self, embedders: &BTreeSet<String>) {
self.0.retain(|k, v| match v {
Vectors::ImplicitlyUserProvided(_) => true,
Vectors::Explicit(ExplicitVectors { embeddings: _, user_provided }) => {
*user_provided
// if the embedder is not in the config, then never touch it
|| !embedders.contains(k)
}
});
}
}
pub enum Error {
InvalidMap(Value),
InternalSerdeJson(serde_json::Error),
}
impl Error {
pub fn to_crate_error(self, document_id: String) -> crate::Error {
match self {
Error::InvalidMap(value) => {
crate::Error::UserError(UserError::InvalidVectorsMapType { document_id, value })
}
Error::InternalSerdeJson(error) => {
crate::Error::InternalError(InternalError::SerdeJson(error))
}
}
}
}
fn to_vector_map(
obkv: KvReaderDelAdd,
side: DelAdd,
) -> Result<Option<BTreeMap<String, Vectors>>, Error> {
Ok(if let Some(value) = obkv.get(side) {
let ParsedVectors(parsed_vectors) = ParsedVectors::from_bytes(value)?;
Some(parsed_vectors)
} else {
None
})
}
/// Represents either a vector or an array of multiple vectors.
#[derive(serde::Serialize, serde::Deserialize, Debug)]
#[serde(transparent)]
pub struct VectorOrArrayOfVectors {
#[serde(with = "either::serde_untagged_optional")]
inner: Option<either::Either<Vec<Embedding>, Embedding>>,
}
impl VectorOrArrayOfVectors {
pub fn into_array_of_vectors(self) -> Option<Vec<Embedding>> {
match self.inner? {
either::Either::Left(vectors) => Some(vectors),
either::Either::Right(vector) => Some(vec![vector]),
}
}
pub fn from_array_of_vectors(array_of_vec: Vec<Embedding>) -> Self {
Self { inner: Some(either::Either::Left(array_of_vec)) }
}
}
#[cfg(test)]
mod test {
use super::VectorOrArrayOfVectors;
#[test]
fn array_of_vectors() {
let null: VectorOrArrayOfVectors = serde_json::from_str("null").unwrap();
let empty: VectorOrArrayOfVectors = serde_json::from_str("[]").unwrap();
let one: VectorOrArrayOfVectors = serde_json::from_str("[0.1]").unwrap();
let two: VectorOrArrayOfVectors = serde_json::from_str("[0.1, 0.2]").unwrap();
let one_vec: VectorOrArrayOfVectors = serde_json::from_str("[[0.1, 0.2]]").unwrap();
let two_vecs: VectorOrArrayOfVectors =
serde_json::from_str("[[0.1, 0.2], [0.3, 0.4]]").unwrap();
insta::assert_json_snapshot!(null.into_array_of_vectors(), @"null");
insta::assert_json_snapshot!(empty.into_array_of_vectors(), @"[]");
insta::assert_json_snapshot!(one.into_array_of_vectors(), @r###"
[
[
0.1
]
]
"###);
insta::assert_json_snapshot!(two.into_array_of_vectors(), @r###"
[
[
0.1,
0.2
]
]
"###);
insta::assert_json_snapshot!(one_vec.into_array_of_vectors(), @r###"
[
[
0.1,
0.2
]
]
"###);
insta::assert_json_snapshot!(two_vecs.into_array_of_vectors(), @r###"
[
[
0.1,
0.2
],
[
0.3,
0.4
]
]
"###);
}
}

View File

@@ -54,7 +54,7 @@
"sha256": "27e25efd0b68b159b8b21350d9af76938710cb29ce0393fa71b41c4f3c630ffe"
}
},
"commands": [
"precommands": [
{
"route": "indexes/movies/settings",
"method": "PATCH",
@@ -78,8 +78,10 @@
]
}
},
"synchronous": "DontWait"
},
"synchronous": "WaitForTask"
}
],
"commands": [
{
"route": "indexes/movies/documents",
"method": "POST",

View File

@@ -11,7 +11,7 @@
"sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
}
},
"commands": [
"precommands": [
{
"route": "indexes/movies/settings",
"method": "PATCH",
@@ -30,8 +30,10 @@
]
}
},
"synchronous": "DontWait"
},
"synchronous": "WaitForTask"
}
],
"commands": [
{
"route": "indexes/movies/documents",
"method": "POST",

View File

@@ -11,7 +11,7 @@
"sha256": "d215e395e4240f12f03b8f1f68901eac82d9e7ded5b462cbf4a6b8efde76c6c6"
}
},
"commands": [
"precommands": [
{
"route": "experimental-features",
"method": "PATCH",
@@ -55,7 +55,9 @@
}
},
"synchronous": "WaitForTask"
},
}
],
"commands": [
{
"route": "indexes/movies/documents",
"method": "POST",

View File

@@ -11,7 +11,7 @@
"sha256": "d215e395e4240f12f03b8f1f68901eac82d9e7ded5b462cbf4a6b8efde76c6c6"
}
},
"commands": [
"precommands": [
{
"route": "experimental-features",
"method": "PATCH",
@@ -49,7 +49,9 @@
"asset": "movies-100.json"
},
"synchronous": "WaitForTask"
},
}
],
"commands": [
{
"route": "indexes/movies/settings",
"method": "PATCH",

View File

@@ -11,7 +11,7 @@
"sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b"
}
},
"commands": [
"precommands": [
{
"route": "indexes/peoples/settings",
"method": "PATCH",
@@ -59,7 +59,9 @@
"asset": "150k-people.json"
},
"synchronous": "WaitForTask"
},
}
],
"commands": [
{
"route": "indexes/peoples/settings",
"method": "PATCH",

View File

@@ -11,7 +11,7 @@
"sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b"
}
},
"commands": [
"precommands": [
{
"route": "indexes/peoples/settings",
"method": "PATCH",
@@ -61,7 +61,9 @@
"asset": "150k-people.json"
},
"synchronous": "WaitForTask"
},
}
],
"commands": [
{
"route": "indexes/peoples/settings",
"method": "PATCH",

View File

@@ -11,7 +11,7 @@
"sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b"
}
},
"commands": [
"precommands": [
{
"route": "indexes/peoples/settings",
"method": "PATCH",
@@ -61,7 +61,9 @@
"asset": "150k-people.json"
},
"synchronous": "WaitForTask"
},
}
],
"commands": [
{
"route": "indexes/peoples/settings",
"method": "PATCH",

Some files were not shown because too many files have changed in this diff Show More