Small commit to add hybrid search and autoembedding

This commit is contained in:
Louis Dureuil
2023-11-15 15:46:37 +01:00
parent 21bcf32109
commit 13c2c6c16b
42 changed files with 4045 additions and 246 deletions

281
Cargo.lock generated
View File

@ -46,7 +46,7 @@ dependencies = [
"actix-tls",
"actix-utils",
"ahash 0.8.3",
"base64 0.21.2",
"base64 0.21.5",
"bitflags 1.3.2",
"brotli",
"bytes",
@ -120,7 +120,7 @@ dependencies = [
"futures-util",
"mio",
"num_cpus",
"socket2",
"socket2 0.4.9",
"tokio",
"tracing",
]
@ -201,7 +201,7 @@ dependencies = [
"serde_json",
"serde_urlencoded",
"smallvec",
"socket2",
"socket2 0.4.9",
"time",
"url",
]
@ -365,6 +365,12 @@ dependencies = [
"backtrace",
]
[[package]]
name = "anymap2"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c"
[[package]]
name = "arbitrary"
version = "1.3.0"
@ -455,9 +461,9 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
[[package]]
name = "base64"
version = "0.21.2"
version = "0.21.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "604178f6c5c21f02dc555784810edfb88d34ac2c73b2eae109655649ee73ce3d"
checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9"
[[package]]
name = "base64ct"
@ -508,6 +514,21 @@ dependencies = [
"serde",
]
[[package]]
name = "bit-set"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
dependencies = [
"bit-vec",
]
[[package]]
name = "bit-vec"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
[[package]]
name = "bitflags"
version = "1.3.2"
@ -555,12 +576,12 @@ dependencies = [
[[package]]
name = "bstr"
version = "1.6.0"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6798148dccfbff0fae41c7574d2fa8f1ef3492fba0face179de5d8d447d67b05"
checksum = "542f33a8835a0884b006a0c3df3dadd99c0c3f296ed26c2fdc8028e01ad6230c"
dependencies = [
"memchr",
"regex-automata 0.3.6",
"regex-automata 0.4.3",
"serde",
]
@ -1346,6 +1367,12 @@ dependencies = [
"syn 2.0.28",
]
[[package]]
name = "doc-comment"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
[[package]]
name = "doxygen-rs"
version = "0.2.2"
@ -1562,6 +1589,16 @@ dependencies = [
"cc",
]
[[package]]
name = "fancy-regex"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b95f7c0680e4142284cf8b22c14a476e87d61b004a3a0861872b32ef7ead40a2"
dependencies = [
"bit-set",
"regex",
]
[[package]]
name = "fastrand"
version = "2.0.0"
@ -1690,9 +1727,9 @@ checksum = "7ab85b9b05e3978cc9a9cf8fea7f01b494e1a09ed3037e16ba39edc7a29eb61a"
[[package]]
name = "futures"
version = "0.3.28"
version = "0.3.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40"
checksum = "da0290714b38af9b4a7b094b8a37086d1b4e61f2df9122c3cad2577669145335"
dependencies = [
"futures-channel",
"futures-core",
@ -1705,9 +1742,9 @@ dependencies = [
[[package]]
name = "futures-channel"
version = "0.3.28"
version = "0.3.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2"
checksum = "ff4dd66668b557604244583e3e1e1eada8c5c2e96a6d0d6653ede395b78bbacb"
dependencies = [
"futures-core",
"futures-sink",
@ -1715,15 +1752,15 @@ dependencies = [
[[package]]
name = "futures-core"
version = "0.3.28"
version = "0.3.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c"
checksum = "eb1d22c66e66d9d72e1758f0bd7d4fd0bee04cad842ee34587d68c07e45d088c"
[[package]]
name = "futures-executor"
version = "0.3.28"
version = "0.3.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0"
checksum = "0f4fb8693db0cf099eadcca0efe2a5a22e4550f98ed16aba6c48700da29597bc"
dependencies = [
"futures-core",
"futures-task",
@ -1732,15 +1769,15 @@ dependencies = [
[[package]]
name = "futures-io"
version = "0.3.28"
version = "0.3.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964"
checksum = "8bf34a163b5c4c52d0478a4d757da8fb65cabef42ba90515efee0f6f9fa45aaa"
[[package]]
name = "futures-macro"
version = "0.3.28"
version = "0.3.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb"
dependencies = [
"proc-macro2",
"quote",
@ -1749,21 +1786,21 @@ dependencies = [
[[package]]
name = "futures-sink"
version = "0.3.28"
version = "0.3.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e"
checksum = "e36d3378ee38c2a36ad710c5d30c2911d752cb941c00c72dbabfb786a7970817"
[[package]]
name = "futures-task"
version = "0.3.28"
version = "0.3.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65"
checksum = "efd193069b0ddadc69c46389b740bbccdd97203899b48d09c5f7969591d6bae2"
[[package]]
name = "futures-util"
version = "0.3.28"
version = "0.3.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533"
checksum = "a19526d624e703a3179b3d322efec918b6246ea0fa51d41124525f00f1cc8104"
dependencies = [
"futures-channel",
"futures-core",
@ -2207,7 +2244,7 @@ dependencies = [
"httpdate",
"itoa",
"pin-project-lite",
"socket2",
"socket2 0.4.9",
"tokio",
"tower-service",
"tracing",
@ -2949,7 +2986,7 @@ version = "8.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6971da4d9c3aa03c3d8f3ff0f4155b534aad021292003895a469716b2a230378"
dependencies = [
"base64 0.21.2",
"base64 0.21.5",
"pem",
"ring",
"serde",
@ -2957,6 +2994,16 @@ dependencies = [
"simple_asn1",
]
[[package]]
name = "kstring"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec3066350882a1cd6d950d055997f379ac37fd39f81cd4d8ed186032eb3c5747"
dependencies = [
"serde",
"static_assertions",
]
[[package]]
name = "language-tags"
version = "0.3.2"
@ -2980,9 +3027,9 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.147"
version = "0.2.150"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
[[package]]
name = "libgit2-sys"
@ -3251,6 +3298,63 @@ version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503"
[[package]]
name = "liquid"
version = "0.26.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69f68ae1011499ae2ef879f631891f21c78e309755f4a5e483c4a8f12e10b609"
dependencies = [
"doc-comment",
"liquid-core",
"liquid-derive",
"liquid-lib",
"serde",
]
[[package]]
name = "liquid-core"
version = "0.26.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79e0724dfcaad5cfb7965ea0f178ca0870b8d7315178f4a7179f5696f7f04d5f"
dependencies = [
"anymap2",
"itertools 0.10.5",
"kstring",
"liquid-derive",
"num-traits",
"pest",
"pest_derive",
"regex",
"serde",
"time",
]
[[package]]
name = "liquid-derive"
version = "0.26.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc2fb41a9bb4257a3803154bdf7e2df7d45197d1941c9b1a90ad815231630721"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.28",
]
[[package]]
name = "liquid-lib"
version = "0.26.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2a17e273a6fb1fb6268f7a5867ddfd0bd4683c7e19b51084f3d567fad4348c0"
dependencies = [
"itertools 0.10.5",
"liquid-core",
"once_cell",
"percent-encoding",
"regex",
"time",
"unicode-segmentation",
]
[[package]]
name = "litemap"
version = "0.6.1"
@ -3483,7 +3587,7 @@ dependencies = [
name = "meilisearch-auth"
version = "1.5.1"
dependencies = [
"base64 0.21.2",
"base64 0.21.5",
"enum-iterator",
"hmac",
"maplit",
@ -3544,9 +3648,9 @@ dependencies = [
[[package]]
name = "memchr"
version = "2.5.0"
version = "2.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
[[package]]
name = "memmap2"
@ -3589,6 +3693,7 @@ dependencies = [
"filter-parser",
"flatten-serde-json",
"fst",
"futures",
"fxhash",
"geoutils",
"grenad",
@ -3600,6 +3705,7 @@ dependencies = [
"itertools 0.11.0",
"json-depth-checker",
"levenshtein_automata",
"liquid",
"log",
"logging_timer",
"maplit",
@ -3607,6 +3713,7 @@ dependencies = [
"meili-snap",
"memmap2",
"mimalloc",
"nolife",
"obkv",
"once_cell",
"ordered-float",
@ -3614,6 +3721,7 @@ dependencies = [
"rand",
"rand_pcg",
"rayon",
"reqwest",
"roaring",
"rstar",
"serde",
@ -3624,8 +3732,10 @@ dependencies = [
"smartstring",
"tempfile",
"thiserror",
"tiktoken-rs",
"time",
"tokenizers",
"tokio",
"uuid 1.5.0",
]
@ -3671,9 +3781,9 @@ dependencies = [
[[package]]
name = "mio"
version = "0.8.8"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2"
checksum = "3dce281c5e46beae905d4de1870d8b1509a9142b62eedf18b443b011ca8343d0"
dependencies = [
"libc",
"log",
@ -3725,6 +3835,12 @@ name = "nelson"
version = "0.1.0"
source = "git+https://github.com/meilisearch/nelson.git?rev=675f13885548fb415ead8fbb447e9e6d9314000a#675f13885548fb415ead8fbb447e9e6d9314000a"
[[package]]
name = "nolife"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc52aaf087e8a52e7a2692f83f2dac6ac7ff9d0136bf9c6ac496635cfe3e50dc"
[[package]]
name = "nom"
version = "7.1.3"
@ -4480,6 +4596,12 @@ dependencies = [
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
[[package]]
name = "regex-syntax"
version = "0.7.4"
@ -4488,11 +4610,11 @@ checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
[[package]]
name = "reqwest"
version = "0.11.18"
version = "0.11.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cde824a14b7c14f85caff81225f411faacc04a2013f41670f41443742b1c1c55"
checksum = "046cd98826c46c2ac8ddecae268eb5c2e58628688a5fc7a2643704a73faba95b"
dependencies = [
"base64 0.21.2",
"base64 0.21.5",
"bytes",
"encoding_rs",
"futures-core",
@ -4514,6 +4636,7 @@ dependencies = [
"serde",
"serde_json",
"serde_urlencoded",
"system-configuration",
"tokio",
"tokio-rustls 0.24.1",
"tower-service",
@ -4521,7 +4644,7 @@ dependencies = [
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
"webpki-roots 0.22.6",
"webpki-roots 0.25.3",
"winreg",
]
@ -4582,6 +4705,12 @@ version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustc_version"
version = "0.4.0"
@ -4648,7 +4777,7 @@ version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d3987094b1d07b653b7dfdc3f70ce9a1da9c51ac18c1b06b662e4f9a0e9f4b2"
dependencies = [
"base64 0.21.2",
"base64 0.21.5",
]
[[package]]
@ -4977,6 +5106,16 @@ dependencies = [
"winapi",
]
[[package]]
name = "socket2"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9"
dependencies = [
"libc",
"windows-sys 0.48.0",
]
[[package]]
name = "spin"
version = "0.5.2"
@ -5097,6 +5236,27 @@ dependencies = [
"winapi",
]
[[package]]
name = "system-configuration"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7"
dependencies = [
"bitflags 1.3.2",
"core-foundation",
"system-configuration-sys",
]
[[package]]
name = "system-configuration-sys"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9"
dependencies = [
"core-foundation-sys",
"libc",
]
[[package]]
name = "tar"
version = "0.4.40"
@ -5159,6 +5319,21 @@ dependencies = [
"syn 2.0.28",
]
[[package]]
name = "tiktoken-rs"
version = "0.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4427b6b1c6b38215b92dd47a83a0ecc6735573d0a5a4c14acc0ac5b33b28adb"
dependencies = [
"anyhow",
"base64 0.21.5",
"bstr",
"fancy-regex",
"lazy_static",
"parking_lot",
"rustc-hash",
]
[[package]]
name = "time"
version = "0.3.30"
@ -5258,11 +5433,10 @@ dependencies = [
[[package]]
name = "tokio"
version = "1.29.1"
version = "1.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "532826ff75199d5833b9d2c5fe410f29235e25704ee5f0ef599fb51c21f4a4da"
checksum = "d0c014766411e834f7af5b8f4cf46257aab4036ca95e9d2c144a10f59ad6f5b9"
dependencies = [
"autocfg",
"backtrace",
"bytes",
"libc",
@ -5271,16 +5445,16 @@ dependencies = [
"parking_lot",
"pin-project-lite",
"signal-hook-registry",
"socket2",
"socket2 0.5.5",
"tokio-macros",
"windows-sys 0.48.0",
]
[[package]]
name = "tokio-macros"
version = "2.1.0"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
dependencies = [
"proc-macro2",
"quote",
@ -5508,7 +5682,7 @@ version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b11c96ac7ee530603dcdf68ed1557050f374ce55a5a07193ebf8cbc9f8927e9"
dependencies = [
"base64 0.21.2",
"base64 0.21.5",
"flate2",
"log",
"native-tls",
@ -5758,6 +5932,12 @@ dependencies = [
"rustls-webpki 0.100.2",
]
[[package]]
name = "webpki-roots"
version = "0.25.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1778a42e8b3b90bff8d0f5032bf22250792889a5cdc752aa0020c84abe3aaf10"
[[package]]
name = "whatlang"
version = "0.16.2"
@ -5942,11 +6122,12 @@ dependencies = [
[[package]]
name = "winreg"
version = "0.10.1"
version = "0.50.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d"
checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1"
dependencies = [
"winapi",
"cfg-if",
"windows-sys 0.48.0",
]
[[package]]