mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-20 05:20:36 +00:00
Compare commits
34 Commits
binary-qua
...
document-d
Author | SHA1 | Date | |
---|---|---|---|
deee22b5da | |||
fd8c90b858 | |||
4ceade43cd | |||
e95e47d258 | |||
e18b06ddda | |||
b15e8aacb6 | |||
767f20e30d | |||
0d63d02ab2 | |||
bf5d9f68fa | |||
e9d6b4222b | |||
2f0567fad1 | |||
2099b4f0dd | |||
0d5bc4578e | |||
8f60ad0a23 | |||
9570139eeb | |||
9d6885793e | |||
98cd6a865c | |||
5f4530ce57 | |||
0ecaf861fa | |||
4d5005b01a | |||
952e742321 | |||
ee9aa63044 | |||
43db4f4242 | |||
9feba5028d | |||
0a40a98bb6 | |||
aac15f6719 | |||
53a359286c | |||
4aa7d386d8 | |||
84fabb9314 | |||
cd46ebd6b5 | |||
ef8d9a20f8 | |||
6afa578688 | |||
300bdfc2a7 | |||
e7e74c0099 |
2
.github/workflows/test-suite.yml
vendored
2
.github/workflows/test-suite.yml
vendored
@ -167,7 +167,7 @@ jobs:
|
||||
- uses: helix-editor/rust-toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: nightly-2024-06-25
|
||||
toolchain: nightly-2024-07-09
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
|
@ -52,6 +52,16 @@ cargo test
|
||||
|
||||
This command will be triggered to each PR as a requirement for merging it.
|
||||
|
||||
#### Faster build
|
||||
|
||||
You can set the `LINDERA_CACHE` environment variable to speed up your successive builds by up to 2 minutes.
|
||||
It'll store some built artifacts in the directory of your choice.
|
||||
|
||||
We recommend using the standard `$HOME/.cache/lindera` directory:
|
||||
```sh
|
||||
export LINDERA_CACHE=$HOME/.cache/lindera
|
||||
```
|
||||
|
||||
#### Snapshot-based tests
|
||||
|
||||
We are using [insta](https://insta.rs) to perform snapshot-based testing.
|
||||
@ -63,7 +73,7 @@ Furthermore, we provide some macros on top of insta, notably a way to use snapsh
|
||||
|
||||
To effectively debug snapshot-based hashes, we recommend you export the `MEILI_TEST_FULL_SNAPS` environment variable so that snapshot are fully created locally:
|
||||
|
||||
```
|
||||
```sh
|
||||
export MEILI_TEST_FULL_SNAPS=true # add this to your .bashrc, .zshrc, ...
|
||||
```
|
||||
|
||||
|
1563
Cargo.lock
generated
1563
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,7 @@
|
||||
# Compile
|
||||
FROM rust:1.75.0-alpine3.18 AS compiler
|
||||
FROM rust:1.79.0-alpine3.20 AS compiler
|
||||
|
||||
RUN apk add -q --update-cache --no-cache build-base openssl-dev
|
||||
RUN apk add -q --no-cache build-base openssl-dev
|
||||
|
||||
WORKDIR /
|
||||
|
||||
@ -20,13 +20,12 @@ RUN set -eux; \
|
||||
cargo build --release -p meilisearch -p meilitool
|
||||
|
||||
# Run
|
||||
FROM alpine:3.16
|
||||
FROM alpine:3.20
|
||||
|
||||
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
|
||||
ENV MEILI_SERVER_PROVIDER docker
|
||||
|
||||
RUN apk update --quiet \
|
||||
&& apk add -q --no-cache libgcc tini curl
|
||||
RUN apk add -q --no-cache libgcc tini curl
|
||||
|
||||
# add meilisearch and meilitool to the `/bin` so you can run it from anywhere
|
||||
# and it's easy to find.
|
||||
|
@ -11,24 +11,24 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.79"
|
||||
anyhow = "1.0.86"
|
||||
csv = "1.3.0"
|
||||
milli = { path = "../milli" }
|
||||
mimalloc = { version = "0.1.39", default-features = false }
|
||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
||||
mimalloc = { version = "0.1.43", default-features = false }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
||||
rand = "0.8.5"
|
||||
rand_chacha = "0.3.1"
|
||||
roaring = "0.10.2"
|
||||
roaring = "0.10.6"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.79"
|
||||
bytes = "1.5.0"
|
||||
anyhow = "1.0.86"
|
||||
bytes = "1.6.0"
|
||||
convert_case = "0.6.0"
|
||||
flate2 = "1.0.28"
|
||||
reqwest = { version = "0.11.23", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
flate2 = "1.0.30"
|
||||
reqwest = { version = "0.12.5", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
|
||||
[features]
|
||||
default = ["milli/all-tokenizations"]
|
||||
|
@ -11,8 +11,8 @@ license.workspace = true
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
time = { version = "0.3.34", features = ["parsing"] }
|
||||
time = { version = "0.3.36", features = ["parsing"] }
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.80"
|
||||
vergen-git2 = "1.0.0-beta.2"
|
||||
anyhow = "1.0.86"
|
||||
vergen-git2 = "1.0.0"
|
||||
|
@ -11,22 +11,21 @@ readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.79"
|
||||
flate2 = "1.0.28"
|
||||
http = "0.2.11"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
anyhow = "1.0.86"
|
||||
flate2 = "1.0.30"
|
||||
http = "1.1.0"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
once_cell = "1.19.0"
|
||||
regex = "1.10.2"
|
||||
roaring = { version = "0.10.2", features = ["serde"] }
|
||||
serde = { version = "1.0.195", features = ["derive"] }
|
||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
||||
tar = "0.4.40"
|
||||
tempfile = "3.9.0"
|
||||
thiserror = "1.0.56"
|
||||
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
regex = "1.10.5"
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
tar = "0.4.41"
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tracing = "0.1.40"
|
||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
@ -425,7 +425,7 @@ pub(crate) mod test {
|
||||
let mut dump = v2::V2Reader::open(dir).unwrap().to_v3();
|
||||
|
||||
// top level infos
|
||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||
|
@ -358,7 +358,7 @@ pub(crate) mod test {
|
||||
let mut dump = v3::V3Reader::open(dir).unwrap().to_v4();
|
||||
|
||||
// top level infos
|
||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||
|
@ -394,8 +394,8 @@ pub(crate) mod test {
|
||||
let mut dump = v4::V4Reader::open(dir).unwrap().to_v5();
|
||||
|
||||
// top level infos
|
||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
|
||||
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
|
||||
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||
|
@ -442,8 +442,8 @@ pub(crate) mod test {
|
||||
let mut dump = v5::V5Reader::open(dir).unwrap().to_v6();
|
||||
|
||||
// top level infos
|
||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
|
||||
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
|
||||
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
|
@ -216,7 +216,7 @@ pub(crate) mod test {
|
||||
let mut dump = DumpReader::open(dump).unwrap();
|
||||
|
||||
// top level infos
|
||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
|
||||
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
|
||||
|
||||
// tasks
|
||||
@ -337,7 +337,7 @@ pub(crate) mod test {
|
||||
let mut dump = DumpReader::open(dump).unwrap();
|
||||
|
||||
// top level infos
|
||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00");
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00");
|
||||
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
|
||||
|
||||
// tasks
|
||||
@ -383,8 +383,8 @@ pub(crate) mod test {
|
||||
let mut dump = DumpReader::open(dump).unwrap();
|
||||
|
||||
// top level infos
|
||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
|
||||
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
|
||||
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
@ -463,8 +463,8 @@ pub(crate) mod test {
|
||||
let mut dump = DumpReader::open(dump).unwrap();
|
||||
|
||||
// top level infos
|
||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
|
||||
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
|
||||
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
|
||||
@ -540,7 +540,7 @@ pub(crate) mod test {
|
||||
let mut dump = DumpReader::open(dump).unwrap();
|
||||
|
||||
// top level infos
|
||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
|
||||
assert_eq!(dump.instance_uid().unwrap(), None);
|
||||
|
||||
// tasks
|
||||
@ -633,7 +633,7 @@ pub(crate) mod test {
|
||||
let mut dump = DumpReader::open(dump).unwrap();
|
||||
|
||||
// top level infos
|
||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
|
||||
assert_eq!(dump.instance_uid().unwrap(), None);
|
||||
|
||||
// tasks
|
||||
@ -726,7 +726,7 @@ pub(crate) mod test {
|
||||
let mut dump = DumpReader::open(dump).unwrap();
|
||||
|
||||
// top level infos
|
||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
|
||||
assert_eq!(dump.instance_uid().unwrap(), None);
|
||||
|
||||
// tasks
|
||||
|
@ -252,7 +252,7 @@ pub(crate) mod test {
|
||||
let mut dump = V2Reader::open(dir).unwrap();
|
||||
|
||||
// top level infos
|
||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||
@ -349,7 +349,7 @@ pub(crate) mod test {
|
||||
let mut dump = V2Reader::open(dir).unwrap();
|
||||
|
||||
// top level infos
|
||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||
|
@ -267,7 +267,7 @@ pub(crate) mod test {
|
||||
let mut dump = V3Reader::open(dir).unwrap();
|
||||
|
||||
// top level infos
|
||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||
|
@ -262,8 +262,8 @@ pub(crate) mod test {
|
||||
let mut dump = V4Reader::open(dir).unwrap();
|
||||
|
||||
// top level infos
|
||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
|
||||
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
|
||||
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||
|
@ -299,8 +299,8 @@ pub(crate) mod test {
|
||||
let mut dump = V5Reader::open(dir).unwrap();
|
||||
|
||||
// top level infos
|
||||
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
|
||||
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
|
||||
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
|
||||
|
||||
// tasks
|
||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||
|
@ -281,7 +281,7 @@ pub(crate) mod test {
|
||||
let dump_path = dump.path();
|
||||
|
||||
// ==== checking global file hierarchy (we want to be sure there isn't too many files or too few)
|
||||
insta::assert_display_snapshot!(create_directory_hierarchy(dump_path), @r###"
|
||||
insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r###"
|
||||
.
|
||||
├---- indexes/
|
||||
│ └---- doggos/
|
||||
|
@ -11,10 +11,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
tempfile = "3.9.0"
|
||||
thiserror = "1.0.56"
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
tracing = "0.1.40"
|
||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
faux = "0.1.10"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
|
@ -14,7 +14,7 @@ license.workspace = true
|
||||
[dependencies]
|
||||
nom = "7.1.3"
|
||||
nom_locate = "4.2.0"
|
||||
unescaper = "0.1.3"
|
||||
unescaper = "0.1.5"
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.34.0"
|
||||
insta = "1.39.0"
|
||||
|
@ -564,121 +564,121 @@ pub mod tests {
|
||||
|
||||
#[test]
|
||||
fn parse_escaped() {
|
||||
insta::assert_display_snapshot!(p(r"title = 'foo\\'"), @r#"{title} = {foo\}"#);
|
||||
insta::assert_display_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#);
|
||||
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#);
|
||||
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
|
||||
insta::assert_snapshot!(p(r"title = 'foo\\'"), @r#"{title} = {foo\}"#);
|
||||
insta::assert_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#);
|
||||
insta::assert_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#);
|
||||
insta::assert_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
|
||||
// but it also works with other sequences
|
||||
insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
|
||||
insta::assert_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse() {
|
||||
// Test equal
|
||||
insta::assert_display_snapshot!(p("channel = Ponce"), @"{channel} = {Ponce}");
|
||||
insta::assert_display_snapshot!(p("subscribers = 12"), @"{subscribers} = {12}");
|
||||
insta::assert_display_snapshot!(p("channel = 'Mister Mv'"), @"{channel} = {Mister Mv}");
|
||||
insta::assert_display_snapshot!(p("channel = \"Mister Mv\""), @"{channel} = {Mister Mv}");
|
||||
insta::assert_display_snapshot!(p("'dog race' = Borzoi"), @"{dog race} = {Borzoi}");
|
||||
insta::assert_display_snapshot!(p("\"dog race\" = Chusky"), @"{dog race} = {Chusky}");
|
||||
insta::assert_display_snapshot!(p("\"dog race\" = \"Bernese Mountain\""), @"{dog race} = {Bernese Mountain}");
|
||||
insta::assert_display_snapshot!(p("'dog race' = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}");
|
||||
insta::assert_display_snapshot!(p("\"dog race\" = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}");
|
||||
insta::assert_snapshot!(p("channel = Ponce"), @"{channel} = {Ponce}");
|
||||
insta::assert_snapshot!(p("subscribers = 12"), @"{subscribers} = {12}");
|
||||
insta::assert_snapshot!(p("channel = 'Mister Mv'"), @"{channel} = {Mister Mv}");
|
||||
insta::assert_snapshot!(p("channel = \"Mister Mv\""), @"{channel} = {Mister Mv}");
|
||||
insta::assert_snapshot!(p("'dog race' = Borzoi"), @"{dog race} = {Borzoi}");
|
||||
insta::assert_snapshot!(p("\"dog race\" = Chusky"), @"{dog race} = {Chusky}");
|
||||
insta::assert_snapshot!(p("\"dog race\" = \"Bernese Mountain\""), @"{dog race} = {Bernese Mountain}");
|
||||
insta::assert_snapshot!(p("'dog race' = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}");
|
||||
insta::assert_snapshot!(p("\"dog race\" = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}");
|
||||
|
||||
// Test IN
|
||||
insta::assert_display_snapshot!(p("colour IN[]"), @"{colour} IN[]");
|
||||
insta::assert_display_snapshot!(p("colour IN[green]"), @"{colour} IN[{green}, ]");
|
||||
insta::assert_display_snapshot!(p("colour IN[green,]"), @"{colour} IN[{green}, ]");
|
||||
insta::assert_display_snapshot!(p("colour NOT IN[green,blue]"), @"NOT ({colour} IN[{green}, {blue}, ])");
|
||||
insta::assert_display_snapshot!(p(" colour IN [ green , blue , ]"), @"{colour} IN[{green}, {blue}, ]");
|
||||
insta::assert_snapshot!(p("colour IN[]"), @"{colour} IN[]");
|
||||
insta::assert_snapshot!(p("colour IN[green]"), @"{colour} IN[{green}, ]");
|
||||
insta::assert_snapshot!(p("colour IN[green,]"), @"{colour} IN[{green}, ]");
|
||||
insta::assert_snapshot!(p("colour NOT IN[green,blue]"), @"NOT ({colour} IN[{green}, {blue}, ])");
|
||||
insta::assert_snapshot!(p(" colour IN [ green , blue , ]"), @"{colour} IN[{green}, {blue}, ]");
|
||||
|
||||
// Test IN + OR/AND/()
|
||||
insta::assert_display_snapshot!(p(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]");
|
||||
insta::assert_display_snapshot!(p("NOT (colour IN [green, blue]) AND color = green "), @"AND[NOT ({colour} IN[{green}, {blue}, ]), {color} = {green}, ]");
|
||||
insta::assert_display_snapshot!(p("x = 1 OR NOT (colour IN [green, blue] OR color = green) "), @"OR[{x} = {1}, NOT (OR[{colour} IN[{green}, {blue}, ], {color} = {green}, ]), ]");
|
||||
insta::assert_snapshot!(p(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]");
|
||||
insta::assert_snapshot!(p("NOT (colour IN [green, blue]) AND color = green "), @"AND[NOT ({colour} IN[{green}, {blue}, ]), {color} = {green}, ]");
|
||||
insta::assert_snapshot!(p("x = 1 OR NOT (colour IN [green, blue] OR color = green) "), @"OR[{x} = {1}, NOT (OR[{colour} IN[{green}, {blue}, ], {color} = {green}, ]), ]");
|
||||
|
||||
// Test whitespace start/end
|
||||
insta::assert_display_snapshot!(p(" colour = green "), @"{colour} = {green}");
|
||||
insta::assert_display_snapshot!(p(" (colour = green OR colour = red) "), @"OR[{colour} = {green}, {colour} = {red}, ]");
|
||||
insta::assert_display_snapshot!(p(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]");
|
||||
insta::assert_display_snapshot!(p(" colour NOT IN [green, blue] "), @"NOT ({colour} IN[{green}, {blue}, ])");
|
||||
insta::assert_display_snapshot!(p(" colour IN [green, blue] "), @"{colour} IN[{green}, {blue}, ]");
|
||||
insta::assert_snapshot!(p(" colour = green "), @"{colour} = {green}");
|
||||
insta::assert_snapshot!(p(" (colour = green OR colour = red) "), @"OR[{colour} = {green}, {colour} = {red}, ]");
|
||||
insta::assert_snapshot!(p(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]");
|
||||
insta::assert_snapshot!(p(" colour NOT IN [green, blue] "), @"NOT ({colour} IN[{green}, {blue}, ])");
|
||||
insta::assert_snapshot!(p(" colour IN [green, blue] "), @"{colour} IN[{green}, {blue}, ]");
|
||||
|
||||
// Test conditions
|
||||
insta::assert_display_snapshot!(p("channel != ponce"), @"{channel} != {ponce}");
|
||||
insta::assert_display_snapshot!(p("NOT channel = ponce"), @"NOT ({channel} = {ponce})");
|
||||
insta::assert_display_snapshot!(p("subscribers < 1000"), @"{subscribers} < {1000}");
|
||||
insta::assert_display_snapshot!(p("subscribers > 1000"), @"{subscribers} > {1000}");
|
||||
insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
|
||||
insta::assert_display_snapshot!(p("subscribers >= 1000"), @"{subscribers} >= {1000}");
|
||||
insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
|
||||
insta::assert_display_snapshot!(p("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}");
|
||||
insta::assert_snapshot!(p("channel != ponce"), @"{channel} != {ponce}");
|
||||
insta::assert_snapshot!(p("NOT channel = ponce"), @"NOT ({channel} = {ponce})");
|
||||
insta::assert_snapshot!(p("subscribers < 1000"), @"{subscribers} < {1000}");
|
||||
insta::assert_snapshot!(p("subscribers > 1000"), @"{subscribers} > {1000}");
|
||||
insta::assert_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
|
||||
insta::assert_snapshot!(p("subscribers >= 1000"), @"{subscribers} >= {1000}");
|
||||
insta::assert_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
|
||||
insta::assert_snapshot!(p("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}");
|
||||
|
||||
// Test NOT
|
||||
insta::assert_display_snapshot!(p("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");
|
||||
insta::assert_snapshot!(p("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})");
|
||||
insta::assert_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");
|
||||
|
||||
// Test NULL + NOT NULL
|
||||
insta::assert_display_snapshot!(p("subscribers IS NULL"), @"{subscribers} IS NULL");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers IS NULL"), @"NOT ({subscribers} IS NULL)");
|
||||
insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL");
|
||||
insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
|
||||
insta::assert_snapshot!(p("subscribers IS NULL"), @"{subscribers} IS NULL");
|
||||
insta::assert_snapshot!(p("NOT subscribers IS NULL"), @"NOT ({subscribers} IS NULL)");
|
||||
insta::assert_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
|
||||
insta::assert_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL");
|
||||
insta::assert_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
|
||||
|
||||
// Test EMPTY + NOT EMPTY
|
||||
insta::assert_display_snapshot!(p("subscribers IS EMPTY"), @"{subscribers} IS EMPTY");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers IS EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
||||
insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY");
|
||||
insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
||||
insta::assert_snapshot!(p("subscribers IS EMPTY"), @"{subscribers} IS EMPTY");
|
||||
insta::assert_snapshot!(p("NOT subscribers IS EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
||||
insta::assert_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
||||
insta::assert_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY");
|
||||
insta::assert_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
||||
|
||||
// Test EXISTS + NOT EXITS
|
||||
insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||
insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS");
|
||||
insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||
insta::assert_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
|
||||
insta::assert_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||
insta::assert_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||
insta::assert_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS");
|
||||
insta::assert_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||
|
||||
// Test nested NOT
|
||||
insta::assert_display_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}");
|
||||
insta::assert_display_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}");
|
||||
insta::assert_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}");
|
||||
insta::assert_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}");
|
||||
|
||||
// Test geo radius
|
||||
insta::assert_display_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})");
|
||||
insta::assert_display_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))");
|
||||
insta::assert_display_snapshot!(p("_geoRadius(12,13,14)"), @"_geoRadius({12}, {13}, {14})");
|
||||
insta::assert_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})");
|
||||
insta::assert_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))");
|
||||
insta::assert_snapshot!(p("_geoRadius(12,13,14)"), @"_geoRadius({12}, {13}, {14})");
|
||||
|
||||
// Test geo bounding box
|
||||
insta::assert_display_snapshot!(p("_geoBoundingBox([12, 13], [14, 15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
|
||||
insta::assert_display_snapshot!(p("NOT _geoBoundingBox([12, 13], [14, 15])"), @"NOT (_geoBoundingBox([{12}, {13}], [{14}, {15}]))");
|
||||
insta::assert_display_snapshot!(p("_geoBoundingBox([12,13],[14,15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
|
||||
insta::assert_snapshot!(p("_geoBoundingBox([12, 13], [14, 15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
|
||||
insta::assert_snapshot!(p("NOT _geoBoundingBox([12, 13], [14, 15])"), @"NOT (_geoBoundingBox([{12}, {13}], [{14}, {15}]))");
|
||||
insta::assert_snapshot!(p("_geoBoundingBox([12,13],[14,15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
|
||||
|
||||
// Test OR + AND
|
||||
insta::assert_display_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
|
||||
insta::assert_display_snapshot!(p("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
|
||||
insta::assert_display_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000"), @"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ]");
|
||||
insta::assert_display_snapshot!(
|
||||
insta::assert_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
|
||||
insta::assert_snapshot!(p("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
|
||||
insta::assert_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000"), @"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ]");
|
||||
insta::assert_snapshot!(
|
||||
p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000 OR colour = red OR colour = blue AND size = 7"),
|
||||
@"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, {colour} = {red}, AND[{colour} = {blue}, {size} = {7}, ], ]"
|
||||
);
|
||||
|
||||
// Test parentheses
|
||||
insta::assert_display_snapshot!(p("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )"), @"AND[{channel} = {ponce}, OR[{dog race} != {bernese mountain}, {subscribers} > {1000}, ], ]");
|
||||
insta::assert_display_snapshot!(p("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)"), @"AND[OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ], _geoRadius({12}, {13}, {14}), ]");
|
||||
insta::assert_snapshot!(p("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )"), @"AND[{channel} = {ponce}, OR[{dog race} != {bernese mountain}, {subscribers} > {1000}, ], ]");
|
||||
insta::assert_snapshot!(p("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)"), @"AND[OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ], _geoRadius({12}, {13}, {14}), ]");
|
||||
|
||||
// Test recursion
|
||||
// This is the most that is allowed
|
||||
insta::assert_display_snapshot!(
|
||||
insta::assert_snapshot!(
|
||||
p("(((((((((((((((((((((((((((((((((((((((((((((((((x = 1)))))))))))))))))))))))))))))))))))))))))))))))))"),
|
||||
@"{x} = {1}"
|
||||
);
|
||||
insta::assert_display_snapshot!(
|
||||
insta::assert_snapshot!(
|
||||
p("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"),
|
||||
@"NOT ({x} = {1})"
|
||||
);
|
||||
|
||||
// Confusing keywords
|
||||
insta::assert_display_snapshot!(p(r#"NOT "OR" EXISTS AND "EXISTS" NOT EXISTS"#), @"AND[NOT ({OR} EXISTS), NOT ({EXISTS} EXISTS), ]");
|
||||
insta::assert_snapshot!(p(r#"NOT "OR" EXISTS AND "EXISTS" NOT EXISTS"#), @"AND[NOT ({OR} EXISTS), NOT ({EXISTS} EXISTS), ]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -689,182 +689,182 @@ pub mod tests {
|
||||
Fc::parse(s).unwrap_err().to_string()
|
||||
}
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = Ponce = 12"), @r###"
|
||||
insta::assert_snapshot!(p("channel = Ponce = 12"), @r###"
|
||||
Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule.
|
||||
17:21 channel = Ponce = 12
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = "), @r###"
|
||||
insta::assert_snapshot!(p("channel = "), @r###"
|
||||
Was expecting a value but instead got nothing.
|
||||
14:14 channel =
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = 🐻"), @r###"
|
||||
insta::assert_snapshot!(p("channel = 🐻"), @r###"
|
||||
Was expecting a value but instead got `🐻`.
|
||||
11:12 channel = 🐻
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = 🐻 AND followers < 100"), @r###"
|
||||
insta::assert_snapshot!(p("channel = 🐻 AND followers < 100"), @r###"
|
||||
Was expecting a value but instead got `🐻`.
|
||||
11:12 channel = 🐻 AND followers < 100
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("'OR'"), @r###"
|
||||
insta::assert_snapshot!(p("'OR'"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
|
||||
1:5 'OR'
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("OR"), @r###"
|
||||
insta::assert_snapshot!(p("OR"), @r###"
|
||||
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
|
||||
1:3 OR
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel Ponce"), @r###"
|
||||
insta::assert_snapshot!(p("channel Ponce"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
|
||||
1:14 channel Ponce
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###"
|
||||
insta::assert_snapshot!(p("channel = Ponce OR"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
|
||||
19:19 channel = Ponce OR
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("_geoRadius"), @r###"
|
||||
insta::assert_snapshot!(p("_geoRadius"), @r###"
|
||||
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
|
||||
1:11 _geoRadius
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("_geoRadius = 12"), @r###"
|
||||
insta::assert_snapshot!(p("_geoRadius = 12"), @r###"
|
||||
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
|
||||
1:16 _geoRadius = 12
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("_geoBoundingBox"), @r###"
|
||||
insta::assert_snapshot!(p("_geoBoundingBox"), @r###"
|
||||
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
|
||||
1:16 _geoBoundingBox
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("_geoBoundingBox = 12"), @r###"
|
||||
insta::assert_snapshot!(p("_geoBoundingBox = 12"), @r###"
|
||||
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
|
||||
1:21 _geoBoundingBox = 12
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r###"
|
||||
insta::assert_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r###"
|
||||
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
|
||||
1:26 _geoBoundingBox(1.0, 1.0)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("_geoPoint(12, 13, 14)"), @r###"
|
||||
insta::assert_snapshot!(p("_geoPoint(12, 13, 14)"), @r###"
|
||||
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
1:22 _geoPoint(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("position <= _geoPoint(12, 13, 14)"), @r###"
|
||||
insta::assert_snapshot!(p("position <= _geoPoint(12, 13, 14)"), @r###"
|
||||
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
13:34 position <= _geoPoint(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("_geoDistance(12, 13, 14)"), @r###"
|
||||
insta::assert_snapshot!(p("_geoDistance(12, 13, 14)"), @r###"
|
||||
`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
1:25 _geoDistance(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("position <= _geoDistance(12, 13, 14)"), @r###"
|
||||
insta::assert_snapshot!(p("position <= _geoDistance(12, 13, 14)"), @r###"
|
||||
`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
13:37 position <= _geoDistance(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("_geo(12, 13, 14)"), @r###"
|
||||
insta::assert_snapshot!(p("_geo(12, 13, 14)"), @r###"
|
||||
`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
1:17 _geo(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("position <= _geo(12, 13, 14)"), @r###"
|
||||
insta::assert_snapshot!(p("position <= _geo(12, 13, 14)"), @r###"
|
||||
`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
13:29 position <= _geo(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("position <= _geoRadius(12, 13, 14)"), @r###"
|
||||
insta::assert_snapshot!(p("position <= _geoRadius(12, 13, 14)"), @r###"
|
||||
The `_geoRadius` filter is an operation and can't be used as a value.
|
||||
13:35 position <= _geoRadius(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = 'ponce"), @r###"
|
||||
insta::assert_snapshot!(p("channel = 'ponce"), @r###"
|
||||
Expression `\'ponce` is missing the following closing delimiter: `'`.
|
||||
11:17 channel = 'ponce
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = \"ponce"), @r###"
|
||||
insta::assert_snapshot!(p("channel = \"ponce"), @r###"
|
||||
Expression `\"ponce` is missing the following closing delimiter: `"`.
|
||||
11:17 channel = "ponce
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = mv OR (followers >= 1000"), @r###"
|
||||
insta::assert_snapshot!(p("channel = mv OR (followers >= 1000"), @r###"
|
||||
Expression `(followers >= 1000` is missing the following closing delimiter: `)`.
|
||||
17:35 channel = mv OR (followers >= 1000
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = mv OR followers >= 1000)"), @r###"
|
||||
insta::assert_snapshot!(p("channel = mv OR followers >= 1000)"), @r###"
|
||||
Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule.
|
||||
34:35 channel = mv OR followers >= 1000)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###"
|
||||
insta::assert_snapshot!(p("colour NOT EXIST"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
|
||||
1:17 colour NOT EXIST
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###"
|
||||
insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
|
||||
1:23 subscribers 100 TO1000
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = ponce ORdog != 'bernese mountain'"), @r###"
|
||||
insta::assert_snapshot!(p("channel = ponce ORdog != 'bernese mountain'"), @r###"
|
||||
Found unexpected characters at the end of the filter: `ORdog != \'bernese mountain\'`. You probably forgot an `OR` or an `AND` rule.
|
||||
17:44 channel = ponce ORdog != 'bernese mountain'
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("colour IN blue, green]"), @r###"
|
||||
insta::assert_snapshot!(p("colour IN blue, green]"), @r###"
|
||||
Expected `[` after `IN` keyword.
|
||||
11:23 colour IN blue, green]
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("colour IN [blue, green, 'blue' > 2]"), @r###"
|
||||
insta::assert_snapshot!(p("colour IN [blue, green, 'blue' > 2]"), @r###"
|
||||
Expected only comma-separated field names inside `IN[..]` but instead found `> 2]`.
|
||||
32:36 colour IN [blue, green, 'blue' > 2]
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("colour IN [blue, green, AND]"), @r###"
|
||||
insta::assert_snapshot!(p("colour IN [blue, green, AND]"), @r###"
|
||||
Expected only comma-separated field names inside `IN[..]` but instead found `AND]`.
|
||||
25:29 colour IN [blue, green, AND]
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("colour IN [blue, green"), @r###"
|
||||
insta::assert_snapshot!(p("colour IN [blue, green"), @r###"
|
||||
Expected matching `]` after the list of field names given to `IN[`
|
||||
23:23 colour IN [blue, green
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("colour IN ['blue, green"), @r###"
|
||||
insta::assert_snapshot!(p("colour IN ['blue, green"), @r###"
|
||||
Expression `\'blue, green` is missing the following closing delimiter: `'`.
|
||||
12:24 colour IN ['blue, green
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("x = EXISTS"), @r###"
|
||||
insta::assert_snapshot!(p("x = EXISTS"), @r###"
|
||||
Was expecting a value but instead got `EXISTS`, which is a reserved keyword. To use `EXISTS` as a field name or a value, surround it by quotes.
|
||||
5:11 x = EXISTS
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("AND = 8"), @r###"
|
||||
insta::assert_snapshot!(p("AND = 8"), @r###"
|
||||
Was expecting a value but instead got `AND`, which is a reserved keyword. To use `AND` as a field name or a value, surround it by quotes.
|
||||
1:4 AND = 8
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("((((((((((((((((((((((((((((((((((((((((((((((((((x = 1))))))))))))))))))))))))))))))))))))))))))))))))))"), @r###"
|
||||
insta::assert_snapshot!(p("((((((((((((((((((((((((((((((((((((((((((((((((((x = 1))))))))))))))))))))))))))))))))))))))))))))))))))"), @r###"
|
||||
The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions.
|
||||
51:106 ((((((((((((((((((((((((((((((((((((((((((((((((((x = 1))))))))))))))))))))))))))))))))))))))))))))))))))
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(
|
||||
insta::assert_snapshot!(
|
||||
p("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"),
|
||||
@r###"
|
||||
The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions.
|
||||
@ -872,40 +872,40 @@ pub mod tests {
|
||||
"###
|
||||
);
|
||||
|
||||
insta::assert_display_snapshot!(p(r#"NOT OR EXISTS AND EXISTS NOT EXISTS"#), @r###"
|
||||
insta::assert_snapshot!(p(r#"NOT OR EXISTS AND EXISTS NOT EXISTS"#), @r###"
|
||||
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
|
||||
5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p(r#"value NULL"#), @r###"
|
||||
insta::assert_snapshot!(p(r#"value NULL"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
|
||||
1:11 value NULL
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value NOT NULL"#), @r###"
|
||||
insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
|
||||
1:15 value NOT NULL
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value EMPTY"#), @r###"
|
||||
insta::assert_snapshot!(p(r#"value EMPTY"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
|
||||
1:12 value EMPTY
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value NOT EMPTY"#), @r###"
|
||||
insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
|
||||
1:16 value NOT EMPTY
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value IS"#), @r###"
|
||||
insta::assert_snapshot!(p(r#"value IS"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
|
||||
1:9 value IS
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value IS NOT"#), @r###"
|
||||
insta::assert_snapshot!(p(r#"value IS NOT"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
|
||||
1:13 value IS NOT
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value IS EXISTS"#), @r###"
|
||||
insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
|
||||
1:16 value IS EXISTS
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
|
||||
insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
|
||||
1:20 value IS NOT EXISTS
|
||||
"###);
|
||||
|
@ -12,9 +12,9 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
arbitrary = { version = "1.3.2", features = ["derive"] }
|
||||
clap = { version = "4.4.17", features = ["derive"] }
|
||||
fastrand = "2.0.1"
|
||||
clap = { version = "4.5.9", features = ["derive"] }
|
||||
fastrand = "2.1.0"
|
||||
milli = { path = "../milli" }
|
||||
serde = { version = "1.0.195", features = ["derive"] }
|
||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
||||
tempfile = "3.9.0"
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
tempfile = "3.10.1"
|
||||
|
@ -110,7 +110,7 @@ fn main() {
|
||||
|
||||
// after executing a batch we check if the database is corrupted
|
||||
let res = index.search(&wtxn).execute().unwrap();
|
||||
index.documents(&wtxn, res.documents_ids).unwrap();
|
||||
index.compressed_documents(&wtxn, res.documents_ids).unwrap();
|
||||
progression.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
wtxn.abort();
|
||||
|
@ -11,38 +11,38 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.79"
|
||||
anyhow = "1.0.86"
|
||||
bincode = "1.3.3"
|
||||
csv = "1.3.0"
|
||||
derive_builder = "0.12.0"
|
||||
derive_builder = "0.20.0"
|
||||
dump = { path = "../dump" }
|
||||
enum-iterator = "1.5.0"
|
||||
enum-iterator = "2.1.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.28"
|
||||
flate2 = "1.0.30"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
page_size = "0.5.0"
|
||||
rayon = "1.8.1"
|
||||
roaring = { version = "0.10.2", features = ["serde"] }
|
||||
serde = { version = "1.0.195", features = ["derive"] }
|
||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
||||
page_size = "0.6.0"
|
||||
rayon = "1.10.0"
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
synchronoise = "1.0.1"
|
||||
tempfile = "3.9.0"
|
||||
thiserror = "1.0.56"
|
||||
time = { version = "0.3.31", features = [
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
time = { version = "0.3.36", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tracing = "0.1.40"
|
||||
ureq = "2.9.7"
|
||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
||||
ureq = "2.10.0"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
arroy = "0.4.0"
|
||||
big_s = "1.0.2"
|
||||
crossbeam = "0.8.4"
|
||||
insta = { version = "1.34.0", features = ["json", "redactions"] }
|
||||
insta = { version = "1.39.0", features = ["json", "redactions"] }
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
|
@ -908,16 +908,22 @@ impl IndexScheduler {
|
||||
let mut index_dumper = dump.create_index(uid, &metadata)?;
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
let embedding_configs = index.embedding_configs(&rtxn)?;
|
||||
let mut buffer = Vec::new();
|
||||
|
||||
// 3.1. Dump the documents
|
||||
for ret in index.all_documents(&rtxn)? {
|
||||
for ret in index.all_compressed_documents(&rtxn)? {
|
||||
if self.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
let (id, doc) = ret?;
|
||||
let (id, compressed) = ret?;
|
||||
let doc = compressed.decompress_with_optional_dictionary(
|
||||
&mut buffer,
|
||||
dictionary.as_ref(),
|
||||
)?;
|
||||
|
||||
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
|
||||
|
||||
|
@ -2465,12 +2465,20 @@ mod tests {
|
||||
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||
}
|
||||
@ -2525,12 +2533,20 @@ mod tests {
|
||||
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||
}
|
||||
@ -2904,12 +2920,20 @@ mod tests {
|
||||
// has everything being pushed successfully in milli?
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||
}
|
||||
@ -2955,12 +2979,20 @@ mod tests {
|
||||
// has everything being pushed successfully in milli?
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||
}
|
||||
@ -3011,12 +3043,20 @@ mod tests {
|
||||
// has everything being pushed successfully in milli?
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||
}
|
||||
@ -3129,12 +3169,20 @@ mod tests {
|
||||
// has everything being pushed successfully in milli?
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||
}
|
||||
@ -3184,12 +3232,20 @@ mod tests {
|
||||
// has everything being pushed successfully in milli?
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||
}
|
||||
@ -3898,12 +3954,20 @@ mod tests {
|
||||
// Has everything being pushed successfully in milli?
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||
}
|
||||
@ -3969,12 +4033,20 @@ mod tests {
|
||||
// Has everything being pushed successfully in milli?
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||
}
|
||||
@ -4037,12 +4109,20 @@ mod tests {
|
||||
// Has everything being pushed successfully in milli?
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||
}
|
||||
@ -4098,12 +4178,20 @@ mod tests {
|
||||
// Has everything being pushed successfully in milli?
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||
}
|
||||
@ -4159,6 +4247,8 @@ mod tests {
|
||||
// Is the primary key still what we expect?
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
|
||||
snapshot!(primary_key, @"id");
|
||||
|
||||
@ -4166,9 +4256,15 @@ mod tests {
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||
}
|
||||
@ -4220,6 +4316,8 @@ mod tests {
|
||||
// Is the primary key still what we expect?
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
|
||||
snapshot!(primary_key, @"id");
|
||||
|
||||
@ -4227,9 +4325,15 @@ mod tests {
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||
}
|
||||
@ -4303,6 +4407,8 @@ mod tests {
|
||||
// Is the primary key still what we expect?
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
|
||||
snapshot!(primary_key, @"id");
|
||||
|
||||
@ -4310,9 +4416,15 @@ mod tests {
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||
}
|
||||
@ -4389,6 +4501,8 @@ mod tests {
|
||||
// Is the primary key still what we expect?
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
|
||||
snapshot!(primary_key, @"paw");
|
||||
|
||||
@ -4396,9 +4510,15 @@ mod tests {
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||
}
|
||||
@ -4468,6 +4588,8 @@ mod tests {
|
||||
// Is the primary key still what we expect?
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
|
||||
snapshot!(primary_key, @"doggoid");
|
||||
|
||||
@ -4475,9 +4597,15 @@ mod tests {
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||
}
|
||||
@ -5120,6 +5248,8 @@ mod tests {
|
||||
{
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
|
||||
// Ensure the document have been inserted into the relevant bitamp
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
@ -5139,8 +5269,12 @@ mod tests {
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
|
||||
|
||||
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
||||
let (_id, compressed_doc) =
|
||||
index.compressed_documents(&rtxn, std::iter::once(0)).unwrap().remove(0);
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
let doc = obkv_to_json(
|
||||
&[
|
||||
fields_ids_map.id("doggo").unwrap(),
|
||||
@ -5194,6 +5328,8 @@ mod tests {
|
||||
{
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
|
||||
// Ensure the document have been inserted into the relevant bitamp
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
@ -5216,8 +5352,12 @@ mod tests {
|
||||
// remained beagle
|
||||
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
|
||||
|
||||
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
||||
let (_id, compressed_doc) =
|
||||
index.compressed_documents(&rtxn, std::iter::once(0)).unwrap().remove(0);
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
let doc = obkv_to_json(
|
||||
&[
|
||||
fields_ids_map.id("doggo").unwrap(),
|
||||
@ -5309,12 +5449,20 @@ mod tests {
|
||||
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), name: "documents after initial push");
|
||||
|
||||
@ -5348,12 +5496,20 @@ mod tests {
|
||||
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
// the all the vectors linked to the new specified embedder have been removed
|
||||
// Only the unknown embedders stays in the document DB
|
||||
@ -5456,9 +5612,15 @@ mod tests {
|
||||
|
||||
// the document with the id 3 should have its original embedding updated
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
||||
let doc = index.documents(&rtxn, Some(docid)).unwrap()[0];
|
||||
let doc = obkv_to_json(&field_ids, &field_ids_map, doc.1).unwrap();
|
||||
let (_id, compressed_doc) =
|
||||
index.compressed_documents(&rtxn, Some(docid)).unwrap().remove(0);
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
let doc = obkv_to_json(&field_ids, &field_ids_map, doc).unwrap();
|
||||
snapshot!(json_string!(doc), @r###"
|
||||
{
|
||||
"id": 3,
|
||||
@ -5570,12 +5732,20 @@ mod tests {
|
||||
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###);
|
||||
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||
@ -5610,12 +5780,20 @@ mod tests {
|
||||
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @"[]");
|
||||
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||
@ -5726,12 +5904,20 @@ mod tests {
|
||||
{
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel"}]"###);
|
||||
}
|
||||
@ -5761,12 +5947,20 @@ mod tests {
|
||||
{
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"embeddings":[[0.0,0.0,0.0]],"regenerate":false}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"embeddings":[[1.0,1.0,1.0]],"regenerate":false}}}]"###);
|
||||
}
|
||||
@ -5794,12 +5988,20 @@ mod tests {
|
||||
{
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut buffer = Vec::new();
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.all_compressed_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.map(|ret| {
|
||||
let (_id, compressed_doc) = ret.unwrap();
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// FIXME: redaction
|
||||
|
@ -11,6 +11,6 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
insta = { version = "^1.34.0", features = ["json", "redactions"] }
|
||||
insta = { version = "^1.39.0", features = ["json", "redactions"] }
|
||||
md5 = "0.7.0"
|
||||
once_cell = "1.19"
|
||||
|
@ -11,16 +11,16 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
base64 = "0.21.7"
|
||||
enum-iterator = "1.5.0"
|
||||
base64 = "0.22.1"
|
||||
enum-iterator = "2.1.0"
|
||||
hmac = "0.12.1"
|
||||
maplit = "1.0.2"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
rand = "0.8.5"
|
||||
roaring = { version = "0.10.2", features = ["serde"] }
|
||||
serde = { version = "1.0.195", features = ["derive"] }
|
||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
sha2 = "0.10.8"
|
||||
thiserror = "1.0.56"
|
||||
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
||||
thiserror = "1.0.61"
|
||||
time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
|
@ -11,36 +11,36 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
actix-web = { version = "4.6.0", default-features = false }
|
||||
anyhow = "1.0.79"
|
||||
actix-web = { version = "4.8.0", default-features = false }
|
||||
anyhow = "1.0.86"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.3.0"
|
||||
deserr = { version = "0.6.1", features = ["actix-web"] }
|
||||
either = { version = "1.9.0", features = ["serde"] }
|
||||
enum-iterator = "1.5.0"
|
||||
deserr = { version = "0.6.2", features = ["actix-web"] }
|
||||
either = { version = "1.13.0", features = ["serde"] }
|
||||
enum-iterator = "2.1.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.28"
|
||||
flate2 = "1.0.30"
|
||||
fst = "0.4.7"
|
||||
memmap2 = "0.7.1"
|
||||
memmap2 = "0.9.4"
|
||||
milli = { path = "../milli" }
|
||||
roaring = { version = "0.10.2", features = ["serde"] }
|
||||
serde = { version = "1.0.195", features = ["derive"] }
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde-cs = "0.2.4"
|
||||
serde_json = "1.0.111"
|
||||
tar = "0.4.40"
|
||||
tempfile = "3.9.0"
|
||||
thiserror = "1.0.56"
|
||||
time = { version = "0.3.31", features = [
|
||||
serde_json = "1.0.120"
|
||||
tar = "0.4.41"
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
time = { version = "0.3.36", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tokio = "1.35"
|
||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
||||
tokio = "1.38"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.34.0"
|
||||
insta = "1.39.0"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
|
||||
[features]
|
||||
|
@ -12,7 +12,7 @@ pub mod star_or;
|
||||
pub mod task_view;
|
||||
pub mod tasks;
|
||||
pub mod versioning;
|
||||
pub use milli::{heed, Index};
|
||||
pub use milli::{heed, zstd, Index};
|
||||
use uuid::Uuid;
|
||||
pub use versioning::VERSION_FILE_NAME;
|
||||
pub use {milli, serde_cs};
|
||||
|
@ -14,104 +14,99 @@ default-run = "meilisearch"
|
||||
|
||||
[dependencies]
|
||||
actix-cors = "0.7.0"
|
||||
actix-http = { version = "3.7.0", default-features = false, features = [
|
||||
actix-http = { version = "3.8.0", default-features = false, features = [
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"rustls-0_21",
|
||||
] }
|
||||
actix-utils = "3.0.1"
|
||||
actix-web = { version = "4.6.0", default-features = false, features = [
|
||||
actix-web = { version = "4.8.0", default-features = false, features = [
|
||||
"macros",
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"cookies",
|
||||
"rustls-0_21",
|
||||
] }
|
||||
actix-web-static-files = { version = "4.0.1", optional = true }
|
||||
anyhow = { version = "1.0.79", features = ["backtrace"] }
|
||||
async-stream = "0.3.5"
|
||||
async-trait = "0.1.77"
|
||||
bstr = "1.9.0"
|
||||
byte-unit = { version = "4.0.19", default-features = false, features = [
|
||||
anyhow = { version = "1.0.86", features = ["backtrace"] }
|
||||
async-trait = "0.1.81"
|
||||
bstr = "1.9.1"
|
||||
byte-unit = { version = "5.1.4", default-features = false, features = [
|
||||
"std",
|
||||
"byte",
|
||||
"serde",
|
||||
] }
|
||||
bytes = "1.5.0"
|
||||
clap = { version = "4.4.17", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.11"
|
||||
deserr = { version = "0.6.1", features = ["actix-web"] }
|
||||
bytes = "1.6.0"
|
||||
clap = { version = "4.5.9", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.13"
|
||||
deserr = { version = "0.6.2", features = ["actix-web"] }
|
||||
dump = { path = "../dump" }
|
||||
either = "1.9.0"
|
||||
either = "1.13.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.28"
|
||||
flate2 = "1.0.30"
|
||||
fst = "0.4.7"
|
||||
futures = "0.3.30"
|
||||
futures-util = "0.3.30"
|
||||
http = "0.2.11"
|
||||
index-scheduler = { path = "../index-scheduler" }
|
||||
indexmap = { version = "2.1.0", features = ["serde"] }
|
||||
is-terminal = "0.4.10"
|
||||
itertools = "0.11.0"
|
||||
jsonwebtoken = "9.2.0"
|
||||
lazy_static = "1.4.0"
|
||||
indexmap = { version = "2.2.6", features = ["serde"] }
|
||||
is-terminal = "0.4.12"
|
||||
itertools = "0.13.0"
|
||||
jsonwebtoken = "9.3.0"
|
||||
lazy_static = "1.5.0"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
mimalloc = { version = "0.1.39", default-features = false }
|
||||
mimalloc = { version = "0.1.43", default-features = false }
|
||||
mime = "0.3.17"
|
||||
num_cpus = "1.16.0"
|
||||
obkv = "0.2.1"
|
||||
obkv = "0.2.2"
|
||||
once_cell = "1.19.0"
|
||||
ordered-float = "4.2.0"
|
||||
parking_lot = "0.12.1"
|
||||
ordered-float = "4.2.1"
|
||||
parking_lot = "0.12.3"
|
||||
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||
pin-project-lite = "0.2.13"
|
||||
pin-project-lite = "0.2.14"
|
||||
platform-dirs = "0.3.0"
|
||||
prometheus = { version = "0.13.3", features = ["process"] }
|
||||
prometheus = { version = "0.13.4", features = ["process"] }
|
||||
rand = "0.8.5"
|
||||
rayon = "1.8.0"
|
||||
regex = "1.10.2"
|
||||
reqwest = { version = "0.11.23", features = [
|
||||
rayon = "1.10.0"
|
||||
regex = "1.10.5"
|
||||
reqwest = { version = "0.12.5", features = [
|
||||
"rustls-tls",
|
||||
"json",
|
||||
], default-features = false }
|
||||
rustls = "0.21.12"
|
||||
rustls-pemfile = "1.0.2"
|
||||
segment = { version = "0.2.3", optional = true }
|
||||
serde = { version = "1.0.195", features = ["derive"] }
|
||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
||||
rustls-pemfile = "1.0.4"
|
||||
segment = { version = "0.2.4", optional = true }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
sha2 = "0.10.8"
|
||||
siphasher = "1.0.0"
|
||||
siphasher = "1.0.1"
|
||||
slice-group-by = "0.3.1"
|
||||
static-files = { version = "0.2.3", optional = true }
|
||||
sysinfo = "0.30.5"
|
||||
tar = "0.4.40"
|
||||
tempfile = "3.9.0"
|
||||
thiserror = "1.0.56"
|
||||
time = { version = "0.3.31", features = [
|
||||
static-files = { version = "0.2.4", optional = true }
|
||||
sysinfo = "0.30.13"
|
||||
tar = "0.4.41"
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
time = { version = "0.3.36", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tokio = { version = "1.35.1", features = ["full"] }
|
||||
tokio-stream = "0.1.14"
|
||||
toml = "0.8.8"
|
||||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
||||
walkdir = "2.4.0"
|
||||
tokio = { version = "1.38.0", features = ["full"] }
|
||||
toml = "0.8.14"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
serde_urlencoded = "0.7.1"
|
||||
termcolor = "1.4.1"
|
||||
url = { version = "2.5.0", features = ["serde"] }
|
||||
url = { version = "2.5.2", features = ["serde"] }
|
||||
tracing = "0.1.40"
|
||||
tracing-subscriber = { version = "0.3.18", features = ["json"] }
|
||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||
tracing-actix-web = "0.7.10"
|
||||
tracing-actix-web = "0.7.11"
|
||||
build-info = { version = "1.7.0", path = "../build-info" }
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.9.0"
|
||||
assert-json-diff = "2.0.2"
|
||||
actix-rt = "2.10.0"
|
||||
brotli = "6.0.0"
|
||||
insta = "1.34.0"
|
||||
insta = "1.39.0"
|
||||
manifest-dir-macros = "0.1.18"
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
@ -120,23 +115,22 @@ urlencoding = "2.1.3"
|
||||
yaup = "0.3.1"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = { version = "1.0.79", optional = true }
|
||||
cargo_toml = { version = "0.18.0", optional = true }
|
||||
anyhow = { version = "1.0.86", optional = true }
|
||||
cargo_toml = { version = "0.20.3", optional = true }
|
||||
hex = { version = "0.4.3", optional = true }
|
||||
reqwest = { version = "0.11.23", features = [
|
||||
reqwest = { version = "0.12.5", features = [
|
||||
"blocking",
|
||||
"rustls-tls",
|
||||
], default-features = false, optional = true }
|
||||
sha-1 = { version = "0.10.1", optional = true }
|
||||
static-files = { version = "0.2.3", optional = true }
|
||||
tempfile = { version = "3.9.0", optional = true }
|
||||
zip = { version = "0.6.6", optional = true }
|
||||
static-files = { version = "0.2.4", optional = true }
|
||||
tempfile = { version = "3.10.1", optional = true }
|
||||
zip = { version = "2.1.3", default-features = false, features = ["deflate"], optional = true }
|
||||
|
||||
[features]
|
||||
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||
analytics = ["segment"]
|
||||
mini-dashboard = [
|
||||
"actix-web-static-files",
|
||||
"static-files",
|
||||
"anyhow",
|
||||
"cargo_toml",
|
||||
|
@ -5,10 +5,9 @@ use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use actix_web::http::header::USER_AGENT;
|
||||
use actix_web::http::header::{CONTENT_TYPE, USER_AGENT};
|
||||
use actix_web::HttpRequest;
|
||||
use byte_unit::Byte;
|
||||
use http::header::CONTENT_TYPE;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_auth::{AuthController, AuthFilter};
|
||||
use meilisearch_types::InstanceUid;
|
||||
|
@ -1,6 +1,6 @@
|
||||
use actix_web as aweb;
|
||||
use aweb::error::{JsonPayloadError, QueryPayloadError};
|
||||
use byte_unit::Byte;
|
||||
use byte_unit::{Byte, UnitType};
|
||||
use meilisearch_types::document_formats::{DocumentFormatError, PayloadType};
|
||||
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
|
||||
use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
|
||||
@ -33,7 +33,7 @@ pub enum MeilisearchHttpError {
|
||||
TooManySearchRequests(usize),
|
||||
#[error("Internal error: Search limiter is down.")]
|
||||
SearchLimiterIsDown,
|
||||
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_bytes(*.0 as u64).get_appropriate_unit(true))]
|
||||
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_u64(*.0 as u64).get_appropriate_unit(UnitType::Binary))]
|
||||
PayloadTooLarge(usize),
|
||||
#[error("Two indexes must be given for each swap. The list `[{}]` contains {} indexes.",
|
||||
.0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()
|
||||
|
@ -15,6 +15,7 @@ use std::fs::File;
|
||||
use std::io::{BufReader, BufWriter};
|
||||
use std::num::NonZeroUsize;
|
||||
use std::path::Path;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::thread::{self, available_parallelism};
|
||||
use std::time::Duration;
|
||||
@ -23,13 +24,13 @@ use actix_cors::Cors;
|
||||
use actix_http::body::MessageBody;
|
||||
use actix_web::dev::{ServiceFactory, ServiceResponse};
|
||||
use actix_web::error::JsonPayloadError;
|
||||
use actix_web::http::header::{CONTENT_TYPE, USER_AGENT};
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest};
|
||||
use analytics::Analytics;
|
||||
use anyhow::bail;
|
||||
use error::PayloadError;
|
||||
use extractors::payload::PayloadConfig;
|
||||
use http::header::CONTENT_TYPE;
|
||||
use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
@ -167,7 +168,7 @@ impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger {
|
||||
let conn_info = request.connection_info();
|
||||
let headers = request.headers();
|
||||
let user_agent = headers
|
||||
.get(http::header::USER_AGENT)
|
||||
.get(USER_AGENT)
|
||||
.map(|value| String::from_utf8_lossy(value.as_bytes()).into_owned())
|
||||
.unwrap_or_default();
|
||||
info_span!("HTTP request", method = %request.method(), host = conn_info.host(), route = %request.path(), query_parameters = %request.query_string(), %user_agent, status_code = Empty, error = Empty)
|
||||
@ -300,15 +301,15 @@ fn open_or_create_database_unchecked(
|
||||
dumps_path: opt.dump_dir.clone(),
|
||||
webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
|
||||
webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
|
||||
task_db_size: opt.max_task_db_size.get_bytes() as usize,
|
||||
index_base_map_size: opt.max_index_size.get_bytes() as usize,
|
||||
task_db_size: opt.max_task_db_size.as_u64() as usize,
|
||||
index_base_map_size: opt.max_index_size.as_u64() as usize,
|
||||
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
|
||||
indexer_config: (&opt.indexer_options).try_into()?,
|
||||
autobatching_enabled: true,
|
||||
cleanup_enabled: !opt.experimental_replication_parameters,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
|
||||
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
|
||||
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize,
|
||||
index_count: DEFAULT_INDEX_COUNT,
|
||||
instance_features,
|
||||
})?)
|
||||
@ -476,7 +477,7 @@ pub fn configure_data(
|
||||
opt.experimental_search_queue_size,
|
||||
available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()),
|
||||
);
|
||||
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
|
||||
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
|
||||
config
|
||||
.app_data(index_scheduler)
|
||||
.app_data(auth)
|
||||
|
@ -9,7 +9,7 @@ use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::{env, fmt, fs};
|
||||
|
||||
use byte_unit::{Byte, ByteError};
|
||||
use byte_unit::{Byte, ParseError, UnitType};
|
||||
use clap::Parser;
|
||||
use meilisearch_types::features::InstanceTogglableFeatures;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
@ -674,7 +674,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
||||
|
||||
Ok(Self {
|
||||
log_every_n: Some(DEFAULT_LOG_EVERY_N),
|
||||
max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize),
|
||||
max_memory: other.max_indexing_memory.map(|b| b.as_u64() as usize),
|
||||
thread_pool: Some(thread_pool),
|
||||
max_positions_per_attributes: None,
|
||||
skip_index_budget: other.skip_index_budget,
|
||||
@ -688,23 +688,25 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
||||
pub struct MaxMemory(Option<Byte>);
|
||||
|
||||
impl FromStr for MaxMemory {
|
||||
type Err = ByteError;
|
||||
type Err = ParseError;
|
||||
|
||||
fn from_str(s: &str) -> Result<MaxMemory, ByteError> {
|
||||
fn from_str(s: &str) -> Result<MaxMemory, Self::Err> {
|
||||
Byte::from_str(s).map(Some).map(MaxMemory)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MaxMemory {
|
||||
fn default() -> MaxMemory {
|
||||
MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_bytes))
|
||||
MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_u64))
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for MaxMemory {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.0 {
|
||||
Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)),
|
||||
Some(memory) => {
|
||||
write!(f, "{}", memory.get_appropriate_unit(UnitType::Binary))
|
||||
}
|
||||
None => f.write_str("unknown"),
|
||||
}
|
||||
}
|
||||
@ -844,11 +846,11 @@ fn default_env() -> String {
|
||||
}
|
||||
|
||||
fn default_max_index_size() -> Byte {
|
||||
Byte::from_bytes(INDEX_SIZE)
|
||||
Byte::from_u64(INDEX_SIZE)
|
||||
}
|
||||
|
||||
fn default_max_task_db_size() -> Byte {
|
||||
Byte::from_bytes(TASK_DB_SIZE)
|
||||
Byte::from_u64(TASK_DB_SIZE)
|
||||
}
|
||||
|
||||
fn default_http_payload_size_limit() -> Byte {
|
||||
|
@ -603,42 +603,51 @@ fn some_documents<'a, 't: 'a>(
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let dictionary = index.document_decompression_dictionary(rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
let embedding_configs = index.embedding_configs(rtxn)?;
|
||||
let mut buffer = Vec::new();
|
||||
|
||||
Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
|
||||
ret.map_err(ResponseError::from).and_then(|(key, document)| -> Result<_, ResponseError> {
|
||||
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
|
||||
match retrieve_vectors {
|
||||
RetrieveVectors::Ignore => {}
|
||||
RetrieveVectors::Hide => {
|
||||
document.remove("_vectors");
|
||||
}
|
||||
RetrieveVectors::Retrieve => {
|
||||
let mut vectors = match document.remove("_vectors") {
|
||||
Some(Value::Object(map)) => map,
|
||||
_ => Default::default(),
|
||||
};
|
||||
for (name, vector) in index.embeddings(rtxn, key)? {
|
||||
let user_provided = embedding_configs
|
||||
.iter()
|
||||
.find(|conf| conf.name == name)
|
||||
.is_some_and(|conf| conf.user_provided.contains(key));
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(vector.into()),
|
||||
regenerate: !user_provided,
|
||||
};
|
||||
vectors.insert(
|
||||
name,
|
||||
serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,
|
||||
);
|
||||
Ok(index.iter_compressed_documents(rtxn, doc_ids)?.map(move |ret| {
|
||||
ret.map_err(ResponseError::from).and_then(
|
||||
|(key, compressed_document)| -> Result<_, ResponseError> {
|
||||
let document = compressed_document
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())?;
|
||||
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
|
||||
match retrieve_vectors {
|
||||
RetrieveVectors::Ignore => {}
|
||||
RetrieveVectors::Hide => {
|
||||
document.remove("_vectors");
|
||||
}
|
||||
RetrieveVectors::Retrieve => {
|
||||
// Clippy is simply wrong
|
||||
#[allow(clippy::manual_unwrap_or_default)]
|
||||
let mut vectors = match document.remove("_vectors") {
|
||||
Some(Value::Object(map)) => map,
|
||||
_ => Default::default(),
|
||||
};
|
||||
for (name, vector) in index.embeddings(rtxn, key)? {
|
||||
let user_provided = embedding_configs
|
||||
.iter()
|
||||
.find(|conf| conf.name == name)
|
||||
.is_some_and(|conf| conf.user_provided.contains(key));
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(vector.into()),
|
||||
regenerate: !user_provided,
|
||||
};
|
||||
vectors.insert(
|
||||
name,
|
||||
serde_json::to_value(embeddings)
|
||||
.map_err(MeilisearchHttpError::from)?,
|
||||
);
|
||||
}
|
||||
document.insert("_vectors".into(), vectors.into());
|
||||
}
|
||||
document.insert("_vectors".into(), vectors.into());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(document)
|
||||
})
|
||||
Ok(document)
|
||||
},
|
||||
)
|
||||
}))
|
||||
}
|
||||
|
||||
|
@ -1123,10 +1123,16 @@ fn make_hits(
|
||||
formatter_builder.crop_marker(format.crop_marker);
|
||||
formatter_builder.highlight_prefix(format.highlight_pre_tag);
|
||||
formatter_builder.highlight_suffix(format.highlight_post_tag);
|
||||
let decompression_dictionary = index.document_decompression_dictionary(rtxn)?;
|
||||
let mut buffer = Vec::new();
|
||||
let mut documents = Vec::new();
|
||||
let embedding_configs = index.embedding_configs(rtxn)?;
|
||||
let documents_iter = index.documents(rtxn, documents_ids)?;
|
||||
for ((id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
|
||||
let documents_iter = index.compressed_documents(rtxn, documents_ids)?;
|
||||
for ((id, compressed), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
|
||||
let obkv = compressed
|
||||
.decompress_with_optional_dictionary(&mut buffer, decompression_dictionary.as_ref())
|
||||
// TODO use a better error?
|
||||
.map_err(|e| MeilisearchHttpError::HeedError(e.into()))?;
|
||||
// First generate a document with all the displayed fields
|
||||
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
|
||||
|
||||
@ -1150,6 +1156,8 @@ fn make_hits(
|
||||
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
|
||||
|
||||
if retrieve_vectors == RetrieveVectors::Retrieve {
|
||||
// Clippy is wrong
|
||||
#[allow(clippy::manual_unwrap_or_default)]
|
||||
let mut vectors = match document.remove("_vectors") {
|
||||
Some(Value::Object(map)) => map,
|
||||
_ => Default::default(),
|
||||
|
@ -1,5 +1,5 @@
|
||||
use actix_web::http::StatusCode;
|
||||
use actix_web::test;
|
||||
use http::StatusCode;
|
||||
use jsonwebtoken::{EncodingKey, Header};
|
||||
use meili_snap::*;
|
||||
use uuid::Uuid;
|
||||
|
@ -6,7 +6,7 @@ use std::time::Duration;
|
||||
use actix_http::body::MessageBody;
|
||||
use actix_web::dev::ServiceResponse;
|
||||
use actix_web::http::StatusCode;
|
||||
use byte_unit::{Byte, ByteUnit};
|
||||
use byte_unit::{Byte, Unit};
|
||||
use clap::Parser;
|
||||
use meilisearch::option::{IndexerOpts, MaxMemory, Opt};
|
||||
use meilisearch::{analytics, create_app, setup_meilisearch, SubscriberForSecondLayer};
|
||||
@ -231,9 +231,9 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
||||
env: "development".to_owned(),
|
||||
#[cfg(feature = "analytics")]
|
||||
no_analytics: true,
|
||||
max_index_size: Byte::from_unit(100.0, ByteUnit::MiB).unwrap(),
|
||||
max_task_db_size: Byte::from_unit(1.0, ByteUnit::GiB).unwrap(),
|
||||
http_payload_size_limit: Byte::from_unit(10.0, ByteUnit::MiB).unwrap(),
|
||||
max_index_size: Byte::from_u64_with_unit(100, Unit::MiB).unwrap(),
|
||||
max_task_db_size: Byte::from_u64_with_unit(1, Unit::GiB).unwrap(),
|
||||
http_payload_size_limit: Byte::from_u64_with_unit(10, Unit::MiB).unwrap(),
|
||||
snapshot_dir: ".".into(),
|
||||
indexer_options: IndexerOpts {
|
||||
// memory has to be unlimited because several meilisearch are running in test context.
|
||||
|
@ -2274,7 +2274,7 @@ async fn error_add_documents_payload_size() {
|
||||
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
||||
@r###"
|
||||
{
|
||||
"message": "The provided payload reached the size limit. The maximum accepted payload size is 10.00 MiB.",
|
||||
"message": "The provided payload reached the size limit. The maximum accepted payload size is 10 MiB.",
|
||||
"code": "payload_too_large",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#payload_too_large"
|
||||
|
@ -1,5 +1,5 @@
|
||||
use actix_web::http::header::ACCEPT_ENCODING;
|
||||
use actix_web::test;
|
||||
use http::header::ACCEPT_ENCODING;
|
||||
use meili_snap::*;
|
||||
use urlencoding::encode as urlencode;
|
||||
|
||||
|
@ -1,6 +1,5 @@
|
||||
use actix_web::http::header::ContentType;
|
||||
use actix_web::http::header::{ContentType, ACCEPT_ENCODING};
|
||||
use actix_web::test;
|
||||
use http::header::ACCEPT_ENCODING;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch::Opt;
|
||||
|
||||
|
@ -9,11 +9,11 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.79"
|
||||
clap = { version = "4.4.17", features = ["derive"] }
|
||||
anyhow = "1.0.86"
|
||||
clap = { version = "4.5.9", features = ["derive"] }
|
||||
dump = { path = "../dump" }
|
||||
file-store = { path = "../file-store" }
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
time = { version = "0.3.31", features = ["formatting"] }
|
||||
uuid = { version = "1.6.1", features = ["v4"], default-features = false }
|
||||
time = { version = "0.3.36", features = ["formatting"] }
|
||||
uuid = { version = "1.10.0", features = ["v4"], default-features = false }
|
||||
|
@ -260,6 +260,7 @@ fn export_a_dump(
|
||||
|
||||
// 4. Dump the indexes
|
||||
let mut count = 0;
|
||||
let mut buffer = Vec::new();
|
||||
for result in index_mapping.iter(&rtxn)? {
|
||||
let (uid, uuid) = result?;
|
||||
let index_path = db_path.join("indexes").join(uuid.to_string());
|
||||
@ -268,6 +269,7 @@ fn export_a_dump(
|
||||
})?;
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
|
||||
let metadata = IndexMetadata {
|
||||
uid: uid.to_owned(),
|
||||
primary_key: index.primary_key(&rtxn)?.map(String::from),
|
||||
@ -280,8 +282,11 @@ fn export_a_dump(
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
// 4.1. Dump the documents
|
||||
for ret in index.all_documents(&rtxn)? {
|
||||
let (_id, doc) = ret?;
|
||||
for ret in index.all_compressed_documents(&rtxn)? {
|
||||
let (_id, compressed_doc) = ret?;
|
||||
let doc = compressed_doc
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?;
|
||||
index_dumper.push_document(&document)?;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "milli"
|
||||
edition = "2018"
|
||||
edition = "2021"
|
||||
publish = false
|
||||
|
||||
version.workspace = true
|
||||
@ -14,81 +14,81 @@ license.workspace = true
|
||||
[dependencies]
|
||||
bimap = { version = "0.6.3", features = ["serde"] }
|
||||
bincode = "1.3.3"
|
||||
bstr = "1.9.0"
|
||||
bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] }
|
||||
bstr = "1.9.1"
|
||||
bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.5.0"
|
||||
charabia = { version = "0.8.11", default-features = false }
|
||||
charabia = { version = "0.8.12", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.11"
|
||||
deserr = "0.6.1"
|
||||
either = { version = "1.9.0", features = ["serde"] }
|
||||
crossbeam-channel = "0.5.13"
|
||||
deserr = "0.6.2"
|
||||
either = { version = "1.13.0", features = ["serde"] }
|
||||
flatten-serde-json = { path = "../flatten-serde-json" }
|
||||
fst = "0.4.7"
|
||||
fxhash = "0.2.1"
|
||||
geoutils = "0.5.1"
|
||||
grenad = { version = "0.4.6", default-features = false, features = [
|
||||
grenad = { version = "0.4.7", default-features = false, features = [
|
||||
"rayon",
|
||||
"tempfile",
|
||||
] }
|
||||
heed = { version = "0.20.1", default-features = false, features = [
|
||||
heed = { version = "0.20.3", default-features = false, features = [
|
||||
"serde-json",
|
||||
"serde-bincode",
|
||||
"read-txn-no-tls",
|
||||
] }
|
||||
indexmap = { version = "2.1.0", features = ["serde"] }
|
||||
indexmap = { version = "2.2.6", features = ["serde"] }
|
||||
json-depth-checker = { path = "../json-depth-checker" }
|
||||
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
||||
memmap2 = "0.7.1"
|
||||
obkv = "0.2.1"
|
||||
zstd = { version = "0.13.1", features = ["zdict_builder", "experimental"] }
|
||||
memmap2 = "0.9.4"
|
||||
obkv = "0.2.2"
|
||||
once_cell = "1.19.0"
|
||||
ordered-float = "4.2.0"
|
||||
rand_pcg = { version = "0.3.1", features = ["serde1"] }
|
||||
rayon = "1.8.0"
|
||||
roaring = { version = "0.10.2", features = ["serde"] }
|
||||
rstar = { version = "0.11.0", features = ["serde"] }
|
||||
serde = { version = "1.0.195", features = ["derive"] }
|
||||
serde_json = { version = "1.0.111", features = ["preserve_order"] }
|
||||
ordered-float = "4.2.1"
|
||||
rayon = "1.10.0"
|
||||
roaring = { version = "0.10.6", features = ["serde"] }
|
||||
rstar = { version = "0.12.0", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
slice-group-by = "0.3.1"
|
||||
smallstr = { version = "0.3.0", features = ["serde"] }
|
||||
smallvec = "1.12.0"
|
||||
smallvec = "1.13.2"
|
||||
smartstring = "1.0.1"
|
||||
tempfile = "3.9.0"
|
||||
thiserror = "1.0.56"
|
||||
time = { version = "0.3.31", features = [
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
time = { version = "0.3.36", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
uuid = { version = "1.6.1", features = ["v4"] }
|
||||
uuid = { version = "1.10.0", features = ["v4"] }
|
||||
|
||||
filter-parser = { path = "../filter-parser" }
|
||||
|
||||
# documents words self-join
|
||||
itertools = "0.11.0"
|
||||
itertools = "0.13.0"
|
||||
|
||||
csv = "1.3.0"
|
||||
candle-core = { version = "0.4.1" }
|
||||
candle-transformers = { version = "0.4.1" }
|
||||
candle-nn = { version = "0.4.1" }
|
||||
candle-core = { version = "0.6.0" }
|
||||
candle-transformers = { version = "0.6.0" }
|
||||
candle-nn = { version = "0.6.0" }
|
||||
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [
|
||||
"onig",
|
||||
] }
|
||||
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [
|
||||
"online",
|
||||
] }
|
||||
tiktoken-rs = "0.5.8"
|
||||
liquid = "0.26.4"
|
||||
arroy = { git = "https://github.com/meilisearch/arroy", branch = "binary-quantization" }
|
||||
tiktoken-rs = "0.5.9"
|
||||
liquid = "0.26.6"
|
||||
arroy = "0.4.0"
|
||||
rand = "0.8.5"
|
||||
tracing = "0.1.40"
|
||||
ureq = { version = "2.9.7", features = ["json"] }
|
||||
url = "2.5.0"
|
||||
ureq = { version = "2.10.0", features = ["json"] }
|
||||
url = "2.5.2"
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.39", default-features = false }
|
||||
mimalloc = { version = "0.1.43", default-features = false }
|
||||
big_s = "1.0.2"
|
||||
insta = "1.34.0"
|
||||
insta = "1.39.0"
|
||||
maplit = "1.0.2"
|
||||
md5 = "0.7.0"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
|
@ -30,6 +30,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
|
||||
let index = Index::new(options, dataset)?;
|
||||
let txn = index.read_txn()?;
|
||||
let dictionary = index.document_decompression_dictionary(&txn).unwrap();
|
||||
let mut query = String::new();
|
||||
while stdin().read_line(&mut query)? > 0 {
|
||||
for _ in 0..2 {
|
||||
@ -49,6 +50,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
let start = Instant::now();
|
||||
|
||||
let mut ctx = SearchContext::new(&index, &txn)?;
|
||||
let mut buffer = Vec::new();
|
||||
let universe = filtered_universe(ctx.index, ctx.txn, &None)?;
|
||||
|
||||
let docs = execute_search(
|
||||
@ -75,11 +77,14 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
let elapsed = start.elapsed();
|
||||
println!("new: {}us, docids: {:?}", elapsed.as_micros(), docs.documents_ids);
|
||||
if print_documents {
|
||||
let documents = index
|
||||
.documents(&txn, docs.documents_ids.iter().copied())
|
||||
let compressed_documents = index
|
||||
.compressed_documents(&txn, docs.documents_ids.iter().copied())
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(id, obkv)| {
|
||||
.map(|(id, compressed_obkv)| {
|
||||
let obkv = compressed_obkv
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
let mut object = serde_json::Map::default();
|
||||
for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
|
||||
let value = obkv.get(fid).unwrap();
|
||||
@ -90,17 +95,20 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for (id, document) in documents {
|
||||
for (id, document) in compressed_documents {
|
||||
println!("{id}:");
|
||||
println!("{document}");
|
||||
}
|
||||
|
||||
let documents = index
|
||||
.documents(&txn, docs.documents_ids.iter().copied())
|
||||
let compressed_documents = index
|
||||
.compressed_documents(&txn, docs.documents_ids.iter().copied())
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(id, obkv)| {
|
||||
.map(|(id, compressed_obkv)| {
|
||||
let mut object = serde_json::Map::default();
|
||||
let obkv = compressed_obkv
|
||||
.decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
|
||||
.unwrap();
|
||||
for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
|
||||
let value = obkv.get(fid).unwrap();
|
||||
let value: serde_json::Value = serde_json::from_slice(value).unwrap();
|
||||
@ -110,7 +118,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
println!("{}us: {:?}", elapsed.as_micros(), docs.documents_ids);
|
||||
for (id, document) in documents {
|
||||
for (id, document) in compressed_documents {
|
||||
println!("{id}:");
|
||||
println!("{document}");
|
||||
}
|
||||
|
@ -95,7 +95,7 @@ impl<R: io::Read + io::Seek> EnrichedDocumentsBatchCursor<R> {
|
||||
/// `next_document` advance the document reader until all the documents have been read.
|
||||
pub fn next_enriched_document(
|
||||
&mut self,
|
||||
) -> Result<Option<EnrichedDocument>, DocumentsBatchCursorError> {
|
||||
) -> Result<Option<EnrichedDocument<'_>>, DocumentsBatchCursorError> {
|
||||
let document = self.documents.next_document()?;
|
||||
let document_id = match self.external_ids.move_on_next()? {
|
||||
Some((_, bytes)) => serde_json::from_slice(bytes).map(Some)?,
|
||||
|
@ -27,7 +27,7 @@ use crate::{FieldId, Object, Result};
|
||||
const DOCUMENTS_BATCH_INDEX_KEY: [u8; 8] = u64::MAX.to_be_bytes();
|
||||
|
||||
/// Helper function to convert an obkv reader into a JSON object.
|
||||
pub fn obkv_to_object(obkv: &KvReader<FieldId>, index: &DocumentsBatchIndex) -> Result<Object> {
|
||||
pub fn obkv_to_object(obkv: &KvReader<'_, FieldId>, index: &DocumentsBatchIndex) -> Result<Object> {
|
||||
obkv.iter()
|
||||
.map(|(field_id, value)| {
|
||||
let field_name = index
|
||||
@ -64,7 +64,7 @@ impl DocumentsBatchIndex {
|
||||
self.0.len()
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> bimap::hash::Iter<FieldId, String> {
|
||||
pub fn iter(&self) -> bimap::hash::Iter<'_, FieldId, String> {
|
||||
self.0.iter()
|
||||
}
|
||||
|
||||
@ -76,7 +76,7 @@ impl DocumentsBatchIndex {
|
||||
self.0.get_by_right(name).cloned()
|
||||
}
|
||||
|
||||
pub fn recreate_json(&self, document: &obkv::KvReaderU16) -> Result<Object> {
|
||||
pub fn recreate_json(&self, document: &obkv::KvReaderU16<'_>) -> Result<Object> {
|
||||
let mut map = Object::new();
|
||||
|
||||
for (k, v) in document.iter() {
|
||||
|
@ -52,7 +52,7 @@ impl<'a> PrimaryKey<'a> {
|
||||
|
||||
pub fn document_id(
|
||||
&self,
|
||||
document: &obkv::KvReader<FieldId>,
|
||||
document: &obkv::KvReader<'_, FieldId>,
|
||||
fields: &impl FieldIdMapper,
|
||||
) -> Result<StdResult<String, DocumentIdExtractionError>> {
|
||||
match self {
|
||||
|
@ -76,7 +76,7 @@ impl<R: io::Read + io::Seek> DocumentsBatchCursor<R> {
|
||||
/// `next_document` advance the document reader until all the documents have been read.
|
||||
pub fn next_document(
|
||||
&mut self,
|
||||
) -> Result<Option<KvReader<FieldId>>, DocumentsBatchCursorError> {
|
||||
) -> Result<Option<KvReader<'_, FieldId>>, DocumentsBatchCursorError> {
|
||||
match self.cursor.move_on_next()? {
|
||||
Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => {
|
||||
Ok(Some(KvReader::new(value)))
|
||||
@ -108,7 +108,7 @@ impl From<serde_json::Error> for DocumentsBatchCursorError {
|
||||
impl error::Error for DocumentsBatchCursorError {}
|
||||
|
||||
impl fmt::Display for DocumentsBatchCursorError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
DocumentsBatchCursorError::Grenad(e) => e.fmt(f),
|
||||
DocumentsBatchCursorError::SerdeJson(e) => e.fmt(f),
|
||||
|
@ -56,7 +56,7 @@ impl<'a, 'de, W: Write> Visitor<'de> for &mut DocumentVisitor<'a, W> {
|
||||
Ok(Ok(()))
|
||||
}
|
||||
|
||||
fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "a documents, or a sequence of documents.")
|
||||
}
|
||||
}
|
||||
|
@ -24,17 +24,21 @@ impl ExternalDocumentsIds {
|
||||
}
|
||||
|
||||
/// Returns `true` if hard and soft external documents lists are empty.
|
||||
pub fn is_empty(&self, rtxn: &RoTxn) -> heed::Result<bool> {
|
||||
pub fn is_empty(&self, rtxn: &RoTxn<'_>) -> heed::Result<bool> {
|
||||
self.0.is_empty(rtxn).map_err(Into::into)
|
||||
}
|
||||
|
||||
pub fn get<A: AsRef<str>>(&self, rtxn: &RoTxn, external_id: A) -> heed::Result<Option<u32>> {
|
||||
pub fn get<A: AsRef<str>>(
|
||||
&self,
|
||||
rtxn: &RoTxn<'_>,
|
||||
external_id: A,
|
||||
) -> heed::Result<Option<u32>> {
|
||||
self.0.get(rtxn, external_id.as_ref())
|
||||
}
|
||||
|
||||
/// An helper function to debug this type, returns an `HashMap` of both,
|
||||
/// soft and hard fst maps, combined.
|
||||
pub fn to_hash_map(&self, rtxn: &RoTxn) -> heed::Result<HashMap<String, u32>> {
|
||||
pub fn to_hash_map(&self, rtxn: &RoTxn<'_>) -> heed::Result<HashMap<String, u32>> {
|
||||
let mut map = HashMap::default();
|
||||
for result in self.0.iter(rtxn)? {
|
||||
let (external, internal) = result?;
|
||||
@ -51,7 +55,11 @@ impl ExternalDocumentsIds {
|
||||
///
|
||||
/// - If attempting to delete a document that doesn't exist
|
||||
/// - If attempting to create a document that already exists
|
||||
pub fn apply(&self, wtxn: &mut RwTxn, operations: Vec<DocumentOperation>) -> heed::Result<()> {
|
||||
pub fn apply(
|
||||
&self,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
operations: Vec<DocumentOperation>,
|
||||
) -> heed::Result<()> {
|
||||
for DocumentOperation { external_id, internal_id, kind } in operations {
|
||||
match kind {
|
||||
DocumentOperationKind::Create => {
|
||||
@ -69,7 +77,7 @@ impl ExternalDocumentsIds {
|
||||
}
|
||||
|
||||
/// Returns an iterator over all the external ids.
|
||||
pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<RoIter<'t, Str, BEU32>> {
|
||||
pub fn iter<'t>(&self, rtxn: &'t RoTxn<'_>) -> heed::Result<RoIter<'t, Str, BEU32>> {
|
||||
self.0.iter(rtxn)
|
||||
}
|
||||
}
|
||||
|
@ -11,7 +11,7 @@ pub enum FacetType {
|
||||
}
|
||||
|
||||
impl fmt::Display for FacetType {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
FacetType::String => f.write_str("string"),
|
||||
FacetType::Number => f.write_str("number"),
|
||||
@ -37,7 +37,7 @@ impl FromStr for FacetType {
|
||||
pub struct InvalidFacetType;
|
||||
|
||||
impl fmt::Display for InvalidFacetType {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.write_str(r#"Invalid facet type, must be "string" or "number""#)
|
||||
}
|
||||
}
|
||||
|
@ -20,7 +20,7 @@ impl<'a> heed::BytesDecode<'a> for BEU16StrCodec {
|
||||
impl<'a> heed::BytesEncode<'a> for BEU16StrCodec {
|
||||
type EItem = (u16, &'a str);
|
||||
|
||||
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
let mut bytes = Vec::with_capacity(s.len() + 2);
|
||||
bytes.extend_from_slice(&n.to_be_bytes());
|
||||
bytes.extend_from_slice(s.as_bytes());
|
||||
|
@ -20,7 +20,7 @@ impl<'a> heed::BytesDecode<'a> for BEU32StrCodec {
|
||||
impl<'a> heed::BytesEncode<'a> for BEU32StrCodec {
|
||||
type EItem = (u32, &'a str);
|
||||
|
||||
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
let mut bytes = Vec::with_capacity(s.len() + 4);
|
||||
bytes.extend_from_slice(&n.to_be_bytes());
|
||||
bytes.extend_from_slice(s.as_bytes());
|
||||
|
89
milli/src/heed_codec/compressed_obkv_codec.rs
Normal file
89
milli/src/heed_codec/compressed_obkv_codec.rs
Normal file
@ -0,0 +1,89 @@
|
||||
use std::borrow::Cow;
|
||||
use std::io;
|
||||
use std::io::ErrorKind;
|
||||
|
||||
use heed::BoxedError;
|
||||
use obkv::KvReaderU16;
|
||||
use zstd::bulk::{Compressor, Decompressor};
|
||||
use zstd::dict::{DecoderDictionary, EncoderDictionary};
|
||||
|
||||
pub struct CompressedObkvCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for CompressedObkvCodec {
|
||||
type DItem = CompressedKvReaderU16<'a>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Ok(CompressedKvReaderU16(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for CompressedObkvCodec {
|
||||
type EItem = CompressedKvWriterU16;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
Ok(Cow::Borrowed(&item.0))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CompressedKvReaderU16<'a>(&'a [u8]);
|
||||
|
||||
impl<'a> CompressedKvReaderU16<'a> {
|
||||
/// Decompresses the KvReader into the buffer using the provided dictionnary.
|
||||
pub fn decompress_with<'b>(
|
||||
&self,
|
||||
buffer: &'b mut Vec<u8>,
|
||||
dictionary: &DecoderDictionary,
|
||||
) -> io::Result<KvReaderU16<'b>> {
|
||||
const TWO_GIGABYTES: usize = 2 * 1024 * 1024 * 1024;
|
||||
|
||||
let mut decompressor = Decompressor::with_prepared_dictionary(dictionary)?;
|
||||
let mut max_size = self.0.len() * 4;
|
||||
let size = loop {
|
||||
buffer.resize(max_size, 0);
|
||||
match decompressor.decompress_to_buffer(self.0, &mut buffer[..max_size]) {
|
||||
Ok(size) => break size,
|
||||
// TODO don't do that !!! But what should I do?
|
||||
Err(e) if e.kind() == ErrorKind::Other && max_size <= TWO_GIGABYTES => {
|
||||
max_size *= 2
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
};
|
||||
Ok(KvReaderU16::new(&buffer[..size]))
|
||||
}
|
||||
|
||||
/// Returns the KvReader like it is not compressed.
|
||||
/// Happends when there is no dictionary yet.
|
||||
pub fn as_non_compressed(&self) -> KvReaderU16<'a> {
|
||||
KvReaderU16::new(self.0)
|
||||
}
|
||||
|
||||
/// Decompresses this KvReader if necessary.
|
||||
pub fn decompress_with_optional_dictionary<'b>(
|
||||
&self,
|
||||
buffer: &'b mut Vec<u8>,
|
||||
dictionary: Option<&DecoderDictionary>,
|
||||
) -> io::Result<KvReaderU16<'b>>
|
||||
where
|
||||
'a: 'b,
|
||||
{
|
||||
match dictionary {
|
||||
Some(dict) => self.decompress_with(buffer, dict),
|
||||
None => Ok(self.as_non_compressed()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CompressedKvWriterU16(Vec<u8>);
|
||||
|
||||
impl CompressedKvWriterU16 {
|
||||
// TODO ask for a KvReaderU16 here
|
||||
pub fn new_with_dictionary(input: &[u8], dictionary: &EncoderDictionary) -> io::Result<Self> {
|
||||
let mut compressor = Compressor::with_prepared_dictionary(dictionary)?;
|
||||
compressor.compress(input).map(CompressedKvWriterU16)
|
||||
}
|
||||
|
||||
pub fn as_bytes(&self) -> &[u8] {
|
||||
&self.0
|
||||
}
|
||||
}
|
@ -35,7 +35,7 @@ where
|
||||
|
||||
fn bytes_encode(
|
||||
(field_id, document_id, value): &'a Self::EItem,
|
||||
) -> Result<Cow<[u8]>, BoxedError> {
|
||||
) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
let mut bytes = Vec::with_capacity(32);
|
||||
bytes.extend_from_slice(&field_id.to_be_bytes()); // 2 bytes
|
||||
bytes.extend_from_slice(&document_id.to_be_bytes()); // 4 bytes
|
||||
|
@ -24,7 +24,7 @@ impl<'a> BytesDecode<'a> for OrderedF64Codec {
|
||||
impl heed::BytesEncode<'_> for OrderedF64Codec {
|
||||
type EItem = f64;
|
||||
|
||||
fn bytes_encode(f: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
fn bytes_encode(f: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
|
||||
let mut buffer = [0u8; 16];
|
||||
|
||||
// write the globally ordered float
|
||||
|
@ -21,7 +21,7 @@ impl<'a> heed::BytesDecode<'a> for FieldIdWordCountCodec {
|
||||
impl<'a> heed::BytesEncode<'a> for FieldIdWordCountCodec {
|
||||
type EItem = (FieldId, u8);
|
||||
|
||||
fn bytes_encode((field_id, word_count): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
fn bytes_encode((field_id, word_count): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
let mut bytes = Vec::with_capacity(2 + 1);
|
||||
bytes.extend_from_slice(&field_id.to_be_bytes());
|
||||
bytes.push(*word_count);
|
||||
|
@ -1,6 +1,7 @@
|
||||
mod beu16_str_codec;
|
||||
mod beu32_str_codec;
|
||||
mod byte_slice_ref;
|
||||
mod compressed_obkv_codec;
|
||||
pub mod facet;
|
||||
mod field_id_word_count_codec;
|
||||
mod fst_set_codec;
|
||||
@ -19,6 +20,9 @@ use thiserror::Error;
|
||||
|
||||
pub use self::beu16_str_codec::BEU16StrCodec;
|
||||
pub use self::beu32_str_codec::BEU32StrCodec;
|
||||
pub use self::compressed_obkv_codec::{
|
||||
CompressedKvReaderU16, CompressedKvWriterU16, CompressedObkvCodec,
|
||||
};
|
||||
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
|
||||
pub use self::fst_set_codec::FstSetCodec;
|
||||
pub use self::obkv_codec::ObkvCodec;
|
||||
|
@ -16,7 +16,7 @@ impl<'a> heed::BytesDecode<'a> for ObkvCodec {
|
||||
impl heed::BytesEncode<'_> for ObkvCodec {
|
||||
type EItem = KvWriterU16<Vec<u8>>;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
|
||||
item.clone().into_inner().map(Cow::Owned).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
@ -42,7 +42,7 @@ impl BytesDecodeOwned for BoRoaringBitmapCodec {
|
||||
impl heed::BytesEncode<'_> for BoRoaringBitmapCodec {
|
||||
type EItem = RoaringBitmap;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
|
||||
let mut out = Vec::new();
|
||||
BoRoaringBitmapCodec::serialize_into(item, &mut out);
|
||||
Ok(Cow::Owned(out))
|
||||
|
@ -167,7 +167,7 @@ impl BytesDecodeOwned for CboRoaringBitmapCodec {
|
||||
impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
|
||||
type EItem = RoaringBitmap;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
|
||||
let mut vec = Vec::with_capacity(Self::serialized_size(item));
|
||||
Self::serialize_into(item, &mut vec);
|
||||
Ok(Cow::Owned(vec))
|
||||
|
@ -26,7 +26,7 @@ impl BytesDecodeOwned for RoaringBitmapCodec {
|
||||
impl heed::BytesEncode<'_> for RoaringBitmapCodec {
|
||||
type EItem = RoaringBitmap;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
|
||||
let mut bytes = Vec::with_capacity(item.serialized_size());
|
||||
item.serialize_into(&mut bytes)?;
|
||||
Ok(Cow::Owned(bytes))
|
||||
|
@ -25,7 +25,7 @@ impl RoaringBitmapLenCodec {
|
||||
}
|
||||
};
|
||||
|
||||
if size > u16::max_value() as usize + 1 {
|
||||
if size > u16::MAX as usize + 1 {
|
||||
return Err(io::Error::new(io::ErrorKind::Other, "size is greater than supported"));
|
||||
}
|
||||
|
||||
|
@ -25,7 +25,7 @@ impl<'a> heed::BytesDecode<'a> for ScriptLanguageCodec {
|
||||
impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec {
|
||||
type EItem = (Script, Language);
|
||||
|
||||
fn bytes_encode((script, lan): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
fn bytes_encode((script, lan): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
let script_name = script.name().as_bytes();
|
||||
let lan_name = lan.name().as_bytes();
|
||||
|
||||
|
@ -30,7 +30,7 @@ impl<'a> heed::BytesDecode<'a> for StrBEU32Codec {
|
||||
impl<'a> heed::BytesEncode<'a> for StrBEU32Codec {
|
||||
type EItem = (&'a str, u32);
|
||||
|
||||
fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
let pos = pos.to_be_bytes();
|
||||
|
||||
let mut bytes = Vec::with_capacity(word.len() + pos.len());
|
||||
@ -66,7 +66,7 @@ impl<'a> heed::BytesDecode<'a> for StrBEU16Codec {
|
||||
impl<'a> heed::BytesEncode<'a> for StrBEU16Codec {
|
||||
type EItem = (&'a str, u16);
|
||||
|
||||
fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
let pos = pos.to_be_bytes();
|
||||
|
||||
let mut bytes = Vec::with_capacity(word.len() + 1 + pos.len());
|
||||
|
@ -24,7 +24,7 @@ impl<'a> heed::BytesDecode<'a> for U8StrStrCodec {
|
||||
impl<'a> heed::BytesEncode<'a> for U8StrStrCodec {
|
||||
type EItem = (u8, &'a str, &'a str);
|
||||
|
||||
fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
|
||||
bytes.push(*n);
|
||||
bytes.extend_from_slice(s1.as_bytes());
|
||||
@ -51,7 +51,7 @@ impl<'a> heed::BytesDecode<'a> for UncheckedU8StrStrCodec {
|
||||
impl<'a> heed::BytesEncode<'a> for UncheckedU8StrStrCodec {
|
||||
type EItem = (u8, &'a [u8], &'a [u8]);
|
||||
|
||||
fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||
fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
|
||||
bytes.push(*n);
|
||||
bytes.extend_from_slice(s1);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -45,7 +45,7 @@ pub use search::new::{
|
||||
};
|
||||
use serde_json::Value;
|
||||
pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
|
||||
pub use {charabia as tokenizer, heed};
|
||||
pub use {charabia as tokenizer, heed, zstd};
|
||||
|
||||
pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
|
||||
pub use self::criterion::{default_criteria, Criterion, CriterionError};
|
||||
@ -211,7 +211,7 @@ pub fn bucketed_position(relative: u16) -> u16 {
|
||||
pub fn obkv_to_json(
|
||||
displayed_fields: &[FieldId],
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
obkv: obkv::KvReaderU16,
|
||||
obkv: obkv::KvReaderU16<'_>,
|
||||
) -> Result<Object> {
|
||||
displayed_fields
|
||||
.iter()
|
||||
@ -229,7 +229,10 @@ pub fn obkv_to_json(
|
||||
}
|
||||
|
||||
/// Transform every field of a raw obkv store into a JSON Object.
|
||||
pub fn all_obkv_to_json(obkv: obkv::KvReaderU16, fields_ids_map: &FieldsIdsMap) -> Result<Object> {
|
||||
pub fn all_obkv_to_json(
|
||||
obkv: obkv::KvReaderU16<'_>,
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
) -> Result<Object> {
|
||||
let all_keys = obkv.iter().map(|(k, _v)| k).collect::<Vec<_>>();
|
||||
obkv_to_json(all_keys.as_slice(), fields_ids_map, obkv)
|
||||
}
|
||||
|
@ -47,7 +47,7 @@ pub struct FacetDistribution<'a> {
|
||||
}
|
||||
|
||||
impl<'a> FacetDistribution<'a> {
|
||||
pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> FacetDistribution<'a> {
|
||||
pub fn new(rtxn: &'a heed::RoTxn<'a>, index: &'a Index) -> FacetDistribution<'a> {
|
||||
FacetDistribution {
|
||||
facets: None,
|
||||
candidates: None,
|
||||
@ -374,7 +374,7 @@ impl<'a> FacetDistribution<'a> {
|
||||
}
|
||||
|
||||
impl fmt::Debug for FacetDistribution<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let FacetDistribution {
|
||||
facets,
|
||||
candidates,
|
||||
|
@ -221,14 +221,14 @@ impl<'a> Filter<'a> {
|
||||
}
|
||||
|
||||
impl<'a> Filter<'a> {
|
||||
pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
|
||||
pub fn evaluate(&self, rtxn: &heed::RoTxn<'_>, index: &Index) -> Result<RoaringBitmap> {
|
||||
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
|
||||
let filterable_fields = index.filterable_fields(rtxn)?;
|
||||
self.inner_evaluate(rtxn, index, &filterable_fields, None)
|
||||
}
|
||||
|
||||
fn evaluate_operator(
|
||||
rtxn: &heed::RoTxn,
|
||||
rtxn: &heed::RoTxn<'_>,
|
||||
index: &Index,
|
||||
field_id: FieldId,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
@ -313,7 +313,7 @@ impl<'a> Filter<'a> {
|
||||
/// Aggregates the documents ids that are part of the specified range automatically
|
||||
/// going deeper through the levels.
|
||||
fn explore_facet_number_levels(
|
||||
rtxn: &heed::RoTxn,
|
||||
rtxn: &heed::RoTxn<'_>,
|
||||
db: heed::Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||
field_id: FieldId,
|
||||
left: Bound<f64>,
|
||||
@ -338,7 +338,7 @@ impl<'a> Filter<'a> {
|
||||
|
||||
fn inner_evaluate(
|
||||
&self,
|
||||
rtxn: &heed::RoTxn,
|
||||
rtxn: &heed::RoTxn<'_>,
|
||||
index: &Index,
|
||||
filterable_fields: &HashSet<String>,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
|
@ -33,7 +33,7 @@ fn facet_extreme_value<'t>(
|
||||
|
||||
pub fn facet_min_value<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
field_id: u16,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<Option<f64>> {
|
||||
@ -44,7 +44,7 @@ pub fn facet_min_value<'t>(
|
||||
|
||||
pub fn facet_max_value<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
field_id: u16,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<Option<f64>> {
|
||||
@ -55,7 +55,7 @@ pub fn facet_max_value<'t>(
|
||||
|
||||
/// Get the first facet value in the facet database
|
||||
pub(crate) fn get_first_facet_value<'t, BoundCodec, DC>(
|
||||
txn: &'t RoTxn,
|
||||
txn: &'t RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
|
||||
field_id: u16,
|
||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||
@ -79,7 +79,7 @@ where
|
||||
|
||||
/// Get the last facet value in the facet database
|
||||
pub(crate) fn get_last_facet_value<'t, BoundCodec, DC>(
|
||||
txn: &'t RoTxn,
|
||||
txn: &'t RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
|
||||
field_id: u16,
|
||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||
|
@ -55,7 +55,7 @@ pub struct Search<'a> {
|
||||
}
|
||||
|
||||
impl<'a> Search<'a> {
|
||||
pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> Search<'a> {
|
||||
pub fn new(rtxn: &'a heed::RoTxn<'a>, index: &'a Index) -> Search<'a> {
|
||||
Search {
|
||||
query: None,
|
||||
filter: None,
|
||||
@ -253,7 +253,7 @@ impl<'a> Search<'a> {
|
||||
}
|
||||
|
||||
impl fmt::Debug for Search<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let Search {
|
||||
query,
|
||||
filter,
|
||||
|
@ -47,7 +47,7 @@ pub struct DatabaseCache<'ctx> {
|
||||
}
|
||||
impl<'ctx> DatabaseCache<'ctx> {
|
||||
fn get_value<'v, K1, KC, DC>(
|
||||
txn: &'ctx RoTxn,
|
||||
txn: &'ctx RoTxn<'_>,
|
||||
cache_key: K1,
|
||||
db_key: &'v KC::EItem,
|
||||
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
|
||||
@ -77,7 +77,7 @@ impl<'ctx> DatabaseCache<'ctx> {
|
||||
}
|
||||
|
||||
fn get_value_from_keys<'v, K1, KC, DC>(
|
||||
txn: &'ctx RoTxn,
|
||||
txn: &'ctx RoTxn<'_>,
|
||||
cache_key: K1,
|
||||
db_keys: &'v [KC::EItem],
|
||||
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
|
||||
@ -99,7 +99,7 @@ impl<'ctx> DatabaseCache<'ctx> {
|
||||
.iter()
|
||||
.filter_map(|key| db.get(txn, key).transpose())
|
||||
.map(|v| v.map(Cow::Borrowed))
|
||||
.collect::<std::result::Result<Vec<Cow<[u8]>>, _>>()?;
|
||||
.collect::<std::result::Result<Vec<Cow<'_, [u8]>>, _>>()?;
|
||||
|
||||
if bitmaps.is_empty() {
|
||||
None
|
||||
|
@ -23,7 +23,7 @@ pub struct DistinctOutput {
|
||||
/// - `excluded`: the set of document ids that contain a value for the given field that occurs
|
||||
/// in the given candidates.
|
||||
pub fn apply_distinct_rule(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
field_id: u16,
|
||||
candidates: &RoaringBitmap,
|
||||
) -> Result<DistinctOutput> {
|
||||
@ -42,7 +42,7 @@ pub fn apply_distinct_rule(
|
||||
/// Apply the distinct rule defined by [`apply_distinct_rule`] for a single document id.
|
||||
pub fn distinct_single_docid(
|
||||
index: &Index,
|
||||
txn: &RoTxn,
|
||||
txn: &RoTxn<'_>,
|
||||
field_id: u16,
|
||||
docid: u32,
|
||||
excluded: &mut RoaringBitmap,
|
||||
@ -72,7 +72,7 @@ pub fn distinct_single_docid(
|
||||
/// Return all the docids containing the given value in the given field
|
||||
fn facet_value_docids(
|
||||
database: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
||||
txn: &RoTxn,
|
||||
txn: &RoTxn<'_>,
|
||||
field_id: u16,
|
||||
facet_value: &[u8],
|
||||
) -> heed::Result<Option<RoaringBitmap>> {
|
||||
@ -86,7 +86,7 @@ fn facet_number_values<'a>(
|
||||
docid: u32,
|
||||
field_id: u16,
|
||||
index: &Index,
|
||||
txn: &'a RoTxn,
|
||||
txn: &'a RoTxn<'a>,
|
||||
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<BytesRefCodec>, Unit>> {
|
||||
let key = facet_values_prefix_key(field_id, docid);
|
||||
|
||||
@ -104,7 +104,7 @@ pub fn facet_string_values<'a>(
|
||||
docid: u32,
|
||||
field_id: u16,
|
||||
index: &Index,
|
||||
txn: &'a RoTxn,
|
||||
txn: &'a RoTxn<'a>,
|
||||
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<BytesRefCodec>, Str>> {
|
||||
let key = facet_values_prefix_key(field_id, docid);
|
||||
|
||||
|
@ -28,7 +28,7 @@ fn facet_number_values<'a>(
|
||||
docid: u32,
|
||||
field_id: u16,
|
||||
index: &Index,
|
||||
txn: &'a RoTxn,
|
||||
txn: &'a RoTxn<'a>,
|
||||
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<OrderedF64Codec>, Unit>> {
|
||||
let key = facet_values_prefix_key(field_id, docid);
|
||||
|
||||
@ -109,7 +109,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
|
||||
/// Drop the rtree if we don't need it anymore.
|
||||
fn fill_buffer(
|
||||
&mut self,
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
geo_candidates: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
debug_assert!(self.field_ids.is_some(), "fill_buffer can't be called without the lat&lng");
|
||||
@ -182,7 +182,7 @@ fn geo_value(
|
||||
field_lat: u16,
|
||||
field_lng: u16,
|
||||
index: &Index,
|
||||
rtxn: &RoTxn,
|
||||
rtxn: &RoTxn<'_>,
|
||||
) -> Result<[f64; 2]> {
|
||||
let extract_geo = |geo_field: u16| -> Result<f64> {
|
||||
match facet_number_values(docid, geo_field, index, rtxn)?.next() {
|
||||
|
@ -375,7 +375,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
/// docids and the previous path docids is empty.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn visit_path_condition<G: RankingRuleGraphTrait>(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
graph: &mut RankingRuleGraph<G>,
|
||||
universe: &RoaringBitmap,
|
||||
dead_ends_cache: &mut DeadEndsCache<G::Condition>,
|
||||
|
@ -20,13 +20,13 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
||||
fn query_for_initial_universe(&mut self, _query: &Q);
|
||||
|
||||
/// Logs the ranking rules used to perform the search query
|
||||
fn ranking_rules(&mut self, _rr: &[BoxRankingRule<Q>]);
|
||||
fn ranking_rules(&mut self, _rr: &[BoxRankingRule<'_, Q>]);
|
||||
|
||||
/// Logs the start of a ranking rule's iteration.
|
||||
fn start_iteration_ranking_rule(
|
||||
&mut self,
|
||||
_ranking_rule_idx: usize,
|
||||
_ranking_rule: &dyn RankingRule<Q>,
|
||||
_ranking_rule: &dyn RankingRule<'_, Q>,
|
||||
_query: &Q,
|
||||
_universe: &RoaringBitmap,
|
||||
) {
|
||||
@ -35,7 +35,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
||||
fn next_bucket_ranking_rule(
|
||||
&mut self,
|
||||
_ranking_rule_idx: usize,
|
||||
_ranking_rule: &dyn RankingRule<Q>,
|
||||
_ranking_rule: &dyn RankingRule<'_, Q>,
|
||||
_universe: &RoaringBitmap,
|
||||
_candidates: &RoaringBitmap,
|
||||
) {
|
||||
@ -44,7 +44,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
||||
fn skip_bucket_ranking_rule(
|
||||
&mut self,
|
||||
_ranking_rule_idx: usize,
|
||||
_ranking_rule: &dyn RankingRule<Q>,
|
||||
_ranking_rule: &dyn RankingRule<'_, Q>,
|
||||
_candidates: &RoaringBitmap,
|
||||
) {
|
||||
}
|
||||
@ -52,7 +52,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
||||
fn end_iteration_ranking_rule(
|
||||
&mut self,
|
||||
_ranking_rule_idx: usize,
|
||||
_ranking_rule: &dyn RankingRule<Q>,
|
||||
_ranking_rule: &dyn RankingRule<'_, Q>,
|
||||
_universe: &RoaringBitmap,
|
||||
) {
|
||||
}
|
||||
@ -73,7 +73,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
||||
|
||||
fn query_for_initial_universe(&mut self, _query: &Q) {}
|
||||
|
||||
fn ranking_rules(&mut self, _rr: &[BoxRankingRule<Q>]) {}
|
||||
fn ranking_rules(&mut self, _rr: &[BoxRankingRule<'_, Q>]) {}
|
||||
|
||||
fn add_to_results(&mut self, _docids: &[u32]) {}
|
||||
|
||||
|
@ -69,14 +69,14 @@ impl SearchLogger<QueryGraph> for VisualSearchLogger {
|
||||
fn initial_universe(&mut self, universe: &RoaringBitmap) {
|
||||
self.initial_universe = Some(universe.clone());
|
||||
}
|
||||
fn ranking_rules(&mut self, rr: &[BoxRankingRule<QueryGraph>]) {
|
||||
fn ranking_rules(&mut self, rr: &[BoxRankingRule<'_, QueryGraph>]) {
|
||||
self.ranking_rules_ids = Some(rr.iter().map(|rr| rr.id()).collect());
|
||||
}
|
||||
|
||||
fn start_iteration_ranking_rule(
|
||||
&mut self,
|
||||
ranking_rule_idx: usize,
|
||||
ranking_rule: &dyn RankingRule<QueryGraph>,
|
||||
ranking_rule: &dyn RankingRule<'_, QueryGraph>,
|
||||
_query: &QueryGraph,
|
||||
universe: &RoaringBitmap,
|
||||
) {
|
||||
@ -97,7 +97,7 @@ impl SearchLogger<QueryGraph> for VisualSearchLogger {
|
||||
fn next_bucket_ranking_rule(
|
||||
&mut self,
|
||||
ranking_rule_idx: usize,
|
||||
_ranking_rule: &dyn RankingRule<QueryGraph>,
|
||||
_ranking_rule: &dyn RankingRule<'_, QueryGraph>,
|
||||
universe: &RoaringBitmap,
|
||||
bucket: &RoaringBitmap,
|
||||
) {
|
||||
@ -110,7 +110,7 @@ impl SearchLogger<QueryGraph> for VisualSearchLogger {
|
||||
fn skip_bucket_ranking_rule(
|
||||
&mut self,
|
||||
ranking_rule_idx: usize,
|
||||
_ranking_rule: &dyn RankingRule<QueryGraph>,
|
||||
_ranking_rule: &dyn RankingRule<'_, QueryGraph>,
|
||||
bucket: &RoaringBitmap,
|
||||
) {
|
||||
self.events.push(SearchEvents::RankingRuleSkipBucket {
|
||||
@ -122,7 +122,7 @@ impl SearchLogger<QueryGraph> for VisualSearchLogger {
|
||||
fn end_iteration_ranking_rule(
|
||||
&mut self,
|
||||
ranking_rule_idx: usize,
|
||||
_ranking_rule: &dyn RankingRule<QueryGraph>,
|
||||
_ranking_rule: &dyn RankingRule<'_, QueryGraph>,
|
||||
_universe: &RoaringBitmap,
|
||||
) {
|
||||
self.events.push(SearchEvents::RankingRuleEndIteration { ranking_rule_idx });
|
||||
|
@ -32,7 +32,7 @@ pub struct MatchingWords {
|
||||
}
|
||||
|
||||
impl MatchingWords {
|
||||
pub fn new(ctx: SearchContext, located_terms: Vec<LocatedQueryTerm>) -> Self {
|
||||
pub fn new(ctx: SearchContext<'_>, located_terms: Vec<LocatedQueryTerm>) -> Self {
|
||||
let mut phrases = Vec::new();
|
||||
let mut words = Vec::new();
|
||||
|
||||
@ -74,7 +74,7 @@ impl MatchingWords {
|
||||
}
|
||||
|
||||
/// Try to match the token with one of the located_words.
|
||||
fn match_unique_words<'a>(&'a self, token: &Token) -> Option<MatchType<'a>> {
|
||||
fn match_unique_words<'a>(&'a self, token: &Token<'_>) -> Option<MatchType<'a>> {
|
||||
for located_words in &self.words {
|
||||
for word in &located_words.value {
|
||||
let word = self.word_interner.get(*word);
|
||||
@ -166,7 +166,7 @@ impl<'a> PartialMatch<'a> {
|
||||
/// - None if the given token breaks the partial match
|
||||
/// - Partial if the given token matches the partial match but doesn't complete it
|
||||
/// - Full if the given token completes the partial match
|
||||
pub fn match_token(self, token: &Token) -> Option<MatchType<'a>> {
|
||||
pub fn match_token(self, token: &Token<'_>) -> Option<MatchType<'a>> {
|
||||
let Self { mut matching_words, ids, .. } = self;
|
||||
|
||||
let is_matching = match matching_words.first()? {
|
||||
@ -198,7 +198,7 @@ impl<'a> PartialMatch<'a> {
|
||||
}
|
||||
|
||||
impl fmt::Debug for MatchingWords {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let MatchingWords { word_interner, phrase_interner, phrases, words } = self;
|
||||
|
||||
let phrases: Vec<_> = phrases
|
||||
|
@ -123,7 +123,7 @@ impl<'t> Matcher<'t, '_> {
|
||||
/// some words are counted as matches only if they are close together and in the good order,
|
||||
/// compute_partial_match peek into next words to validate if the match is complete.
|
||||
fn compute_partial_match<'a>(
|
||||
mut partial: PartialMatch,
|
||||
mut partial: PartialMatch<'a>,
|
||||
token_position: usize,
|
||||
word_position: usize,
|
||||
words_positions: &mut impl Iterator<Item = (usize, usize, &'a Token<'a>)>,
|
||||
@ -244,7 +244,12 @@ impl<'t> Matcher<'t, '_> {
|
||||
}
|
||||
|
||||
/// Returns the bounds in byte index of the crop window.
|
||||
fn crop_bounds(&self, tokens: &[Token], matches: &[Match], crop_size: usize) -> (usize, usize) {
|
||||
fn crop_bounds(
|
||||
&self,
|
||||
tokens: &[Token<'_>],
|
||||
matches: &[Match],
|
||||
crop_size: usize,
|
||||
) -> (usize, usize) {
|
||||
// if there is no match, we start from the beginning of the string by default.
|
||||
let first_match_word_position = matches.first().map(|m| m.word_position).unwrap_or(0);
|
||||
let first_match_token_position = matches.first().map(|m| m.token_position).unwrap_or(0);
|
||||
@ -505,7 +510,7 @@ mod tests {
|
||||
use crate::{execute_search, filtered_universe, SearchContext, TimeBudget};
|
||||
|
||||
impl<'a> MatcherBuilder<'a> {
|
||||
fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
|
||||
fn new_test(rtxn: &'a heed::RoTxn<'a>, index: &'a TempIndex, query: &str) -> Self {
|
||||
let mut ctx = SearchContext::new(index, rtxn).unwrap();
|
||||
let universe = filtered_universe(ctx.index, ctx.txn, &None).unwrap();
|
||||
let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
|
||||
|
@ -183,7 +183,7 @@ impl RestrictedFids {
|
||||
|
||||
/// Apply the [`TermsMatchingStrategy`] to the query graph and resolve it.
|
||||
fn resolve_maximally_reduced_query_graph(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
universe: &RoaringBitmap,
|
||||
query_graph: &QueryGraph,
|
||||
matching_strategy: TermsMatchingStrategy,
|
||||
@ -214,7 +214,7 @@ fn resolve_maximally_reduced_query_graph(
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::universe")]
|
||||
fn resolve_universe(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
initial_universe: &RoaringBitmap,
|
||||
query_graph: &QueryGraph,
|
||||
matching_strategy: TermsMatchingStrategy,
|
||||
@ -231,7 +231,7 @@ fn resolve_universe(
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::query")]
|
||||
fn resolve_negative_words(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
negative_words: &[Word],
|
||||
) -> Result<RoaringBitmap> {
|
||||
let mut negative_bitmap = RoaringBitmap::new();
|
||||
@ -245,7 +245,7 @@ fn resolve_negative_words(
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::query")]
|
||||
fn resolve_negative_phrases(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
negative_phrases: &[LocatedQueryTerm],
|
||||
) -> Result<RoaringBitmap> {
|
||||
let mut negative_bitmap = RoaringBitmap::new();
|
||||
@ -267,7 +267,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
|
||||
let mut sort = false;
|
||||
let mut sorted_fields = HashSet::new();
|
||||
let mut geo_sorted = false;
|
||||
let mut ranking_rules: Vec<BoxRankingRule<PlaceholderQuery>> = vec![];
|
||||
let mut ranking_rules: Vec<BoxRankingRule<'ctx, PlaceholderQuery>> = vec![];
|
||||
let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
|
||||
for rr in settings_ranking_rules {
|
||||
match rr {
|
||||
@ -326,7 +326,7 @@ fn get_ranking_rules_for_vector<'ctx>(
|
||||
let mut geo_sorted = false;
|
||||
|
||||
let mut vector = false;
|
||||
let mut ranking_rules: Vec<BoxRankingRule<PlaceholderQuery>> = vec![];
|
||||
let mut ranking_rules: Vec<BoxRankingRule<'ctx, PlaceholderQuery>> = vec![];
|
||||
|
||||
let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
|
||||
for rr in settings_ranking_rules {
|
||||
@ -406,7 +406,7 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
|
||||
words = true;
|
||||
}
|
||||
|
||||
let mut ranking_rules: Vec<BoxRankingRule<QueryGraph>> = vec![];
|
||||
let mut ranking_rules: Vec<BoxRankingRule<'ctx, QueryGraph>> = vec![];
|
||||
let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
|
||||
for rr in settings_ranking_rules {
|
||||
// Add Words before any of: typo, proximity, attribute
|
||||
@ -552,7 +552,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
|
||||
pub fn filtered_universe(
|
||||
index: &Index,
|
||||
txn: &RoTxn<'_>,
|
||||
filters: &Option<Filter>,
|
||||
filters: &Option<Filter<'_>>,
|
||||
) -> Result<RoaringBitmap> {
|
||||
Ok(if let Some(filters) = filters {
|
||||
filters.evaluate(txn, index)?
|
||||
@ -563,7 +563,7 @@ pub fn filtered_universe(
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn execute_vector_search(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
vector: &[f32],
|
||||
scoring_strategy: ScoringStrategy,
|
||||
universe: RoaringBitmap,
|
||||
@ -622,7 +622,7 @@ pub fn execute_vector_search(
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::main")]
|
||||
pub fn execute_search(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
query: Option<&str>,
|
||||
terms_matching_strategy: TermsMatchingStrategy,
|
||||
scoring_strategy: ScoringStrategy,
|
||||
@ -775,7 +775,10 @@ pub fn execute_search(
|
||||
})
|
||||
}
|
||||
|
||||
fn check_sort_criteria(ctx: &SearchContext, sort_criteria: Option<&Vec<AscDesc>>) -> Result<()> {
|
||||
fn check_sort_criteria(
|
||||
ctx: &SearchContext<'_>,
|
||||
sort_criteria: Option<&Vec<AscDesc>>,
|
||||
) -> Result<()> {
|
||||
let sort_criteria = if let Some(sort_criteria) = sort_criteria {
|
||||
sort_criteria
|
||||
} else {
|
||||
|
@ -93,7 +93,7 @@ impl QueryGraph {
|
||||
/// Build the query graph from the parsed user search query, return an updated list of the located query terms
|
||||
/// which contains ngrams.
|
||||
pub fn from_query(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
// The terms here must be consecutive
|
||||
terms: &[LocatedQueryTerm],
|
||||
) -> Result<(QueryGraph, Vec<LocatedQueryTerm>)> {
|
||||
@ -294,7 +294,7 @@ impl QueryGraph {
|
||||
|
||||
pub fn removal_order_for_terms_matching_strategy_frequency(
|
||||
&self,
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
) -> Result<Vec<SmallBitmap<QueryNode>>> {
|
||||
// lookup frequency for each term
|
||||
let mut term_with_frequency: Vec<(u8, u64)> = {
|
||||
@ -316,7 +316,7 @@ impl QueryGraph {
|
||||
term_docids
|
||||
.into_iter()
|
||||
.map(|(idx, docids)| match docids.len() {
|
||||
0 => (idx, u64::max_value()),
|
||||
0 => (idx, u64::MAX),
|
||||
frequency => (idx, frequency),
|
||||
})
|
||||
.collect()
|
||||
@ -337,7 +337,7 @@ impl QueryGraph {
|
||||
|
||||
pub fn removal_order_for_terms_matching_strategy_last(
|
||||
&self,
|
||||
ctx: &SearchContext,
|
||||
ctx: &SearchContext<'_>,
|
||||
) -> Vec<SmallBitmap<QueryNode>> {
|
||||
let (first_term_idx, last_term_idx) = {
|
||||
let mut first_term_idx = u8::MAX;
|
||||
@ -370,7 +370,7 @@ impl QueryGraph {
|
||||
|
||||
pub fn removal_order_for_terms_matching_strategy(
|
||||
&self,
|
||||
ctx: &SearchContext,
|
||||
ctx: &SearchContext<'_>,
|
||||
order: impl Fn(u8) -> u16,
|
||||
) -> Vec<SmallBitmap<QueryNode>> {
|
||||
let mut nodes_to_remove = BTreeMap::<u16, SmallBitmap<QueryNode>>::new();
|
||||
@ -398,7 +398,7 @@ impl QueryGraph {
|
||||
}
|
||||
|
||||
/// Number of words in the phrases in this query graph
|
||||
pub(crate) fn words_in_phrases_count(&self, ctx: &SearchContext) -> usize {
|
||||
pub(crate) fn words_in_phrases_count(&self, ctx: &SearchContext<'_>) -> usize {
|
||||
let mut word_count = 0;
|
||||
for (_, node) in self.nodes.iter() {
|
||||
match &node.data {
|
||||
|
@ -27,7 +27,7 @@ pub enum ZeroOrOneTypo {
|
||||
}
|
||||
|
||||
impl Interned<QueryTerm> {
|
||||
pub fn compute_fully_if_needed(self, ctx: &mut SearchContext) -> Result<()> {
|
||||
pub fn compute_fully_if_needed(self, ctx: &mut SearchContext<'_>) -> Result<()> {
|
||||
let s = ctx.term_interner.get_mut(self);
|
||||
if s.max_levenshtein_distance <= 1 && s.one_typo.is_uninit() {
|
||||
assert!(s.two_typo.is_uninit());
|
||||
@ -48,7 +48,7 @@ impl Interned<QueryTerm> {
|
||||
|
||||
fn find_zero_typo_prefix_derivations(
|
||||
word_interned: Interned<String>,
|
||||
fst: fst::Set<Cow<[u8]>>,
|
||||
fst: fst::Set<Cow<'_, [u8]>>,
|
||||
word_interner: &mut DedupInterner<String>,
|
||||
mut visit: impl FnMut(Interned<String>) -> Result<ControlFlow<()>>,
|
||||
) -> Result<()> {
|
||||
@ -71,7 +71,7 @@ fn find_zero_typo_prefix_derivations(
|
||||
}
|
||||
|
||||
fn find_zero_one_typo_derivations(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
word_interned: Interned<String>,
|
||||
is_prefix: bool,
|
||||
mut visit: impl FnMut(Interned<String>, ZeroOrOneTypo) -> Result<ControlFlow<()>>,
|
||||
@ -114,7 +114,7 @@ fn find_zero_one_typo_derivations(
|
||||
fn find_zero_one_two_typo_derivations(
|
||||
word_interned: Interned<String>,
|
||||
is_prefix: bool,
|
||||
fst: fst::Set<Cow<[u8]>>,
|
||||
fst: fst::Set<Cow<'_, [u8]>>,
|
||||
word_interner: &mut DedupInterner<String>,
|
||||
mut visit: impl FnMut(Interned<String>, NumberOfTypos) -> Result<ControlFlow<()>>,
|
||||
) -> Result<()> {
|
||||
@ -172,7 +172,7 @@ fn find_zero_one_two_typo_derivations(
|
||||
}
|
||||
|
||||
pub fn partially_initialized_term_from_word(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
word: &str,
|
||||
max_typo: u8,
|
||||
is_prefix: bool,
|
||||
@ -265,7 +265,7 @@ pub fn partially_initialized_term_from_word(
|
||||
})
|
||||
}
|
||||
|
||||
fn find_split_words(ctx: &mut SearchContext, word: &str) -> Result<Option<Interned<Phrase>>> {
|
||||
fn find_split_words(ctx: &mut SearchContext<'_>, word: &str) -> Result<Option<Interned<Phrase>>> {
|
||||
if let Some((l, r)) = split_best_frequency(ctx, word)? {
|
||||
Ok(Some(ctx.phrase_interner.insert(Phrase { words: vec![Some(l), Some(r)] })))
|
||||
} else {
|
||||
@ -274,7 +274,7 @@ fn find_split_words(ctx: &mut SearchContext, word: &str) -> Result<Option<Intern
|
||||
}
|
||||
|
||||
impl Interned<QueryTerm> {
|
||||
fn initialize_one_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> {
|
||||
fn initialize_one_typo_subterm(self, ctx: &mut SearchContext<'_>) -> Result<()> {
|
||||
let self_mut = ctx.term_interner.get_mut(self);
|
||||
|
||||
let allows_split_words = self_mut.allows_split_words();
|
||||
@ -340,7 +340,7 @@ impl Interned<QueryTerm> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
fn initialize_one_and_two_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> {
|
||||
fn initialize_one_and_two_typo_subterm(self, ctx: &mut SearchContext<'_>) -> Result<()> {
|
||||
let self_mut = ctx.term_interner.get_mut(self);
|
||||
let QueryTerm {
|
||||
original,
|
||||
@ -406,7 +406,7 @@ impl Interned<QueryTerm> {
|
||||
///
|
||||
/// Return `None` if the original word cannot be split.
|
||||
fn split_best_frequency(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
original: &str,
|
||||
) -> Result<Option<(Interned<String>, Interned<String>)>> {
|
||||
let chars = original.char_indices().skip(1);
|
||||
|
@ -128,7 +128,7 @@ impl QueryTermSubset {
|
||||
pub fn make_mandatory(&mut self) {
|
||||
self.mandatory = true;
|
||||
}
|
||||
pub fn exact_term(&self, ctx: &SearchContext) -> Option<ExactTerm> {
|
||||
pub fn exact_term(&self, ctx: &SearchContext<'_>) -> Option<ExactTerm> {
|
||||
let full_query_term = ctx.term_interner.get(self.original);
|
||||
if full_query_term.ngram_words.is_some() {
|
||||
return None;
|
||||
@ -174,7 +174,7 @@ impl QueryTermSubset {
|
||||
self.two_typo_subset.intersect(&other.two_typo_subset);
|
||||
}
|
||||
|
||||
pub fn use_prefix_db(&self, ctx: &SearchContext) -> Option<Word> {
|
||||
pub fn use_prefix_db(&self, ctx: &SearchContext<'_>) -> Option<Word> {
|
||||
let original = ctx.term_interner.get(self.original);
|
||||
let use_prefix_db = original.zero_typo.use_prefix_db?;
|
||||
let word = match &self.zero_typo_subset {
|
||||
@ -198,7 +198,7 @@ impl QueryTermSubset {
|
||||
}
|
||||
pub fn all_single_words_except_prefix_db(
|
||||
&self,
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
) -> Result<BTreeSet<Word>> {
|
||||
let mut result = BTreeSet::default();
|
||||
if !self.one_typo_subset.is_empty() || !self.two_typo_subset.is_empty() {
|
||||
@ -290,7 +290,7 @@ impl QueryTermSubset {
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
pub fn all_phrases(&self, ctx: &mut SearchContext) -> Result<BTreeSet<Interned<Phrase>>> {
|
||||
pub fn all_phrases(&self, ctx: &mut SearchContext<'_>) -> Result<BTreeSet<Interned<Phrase>>> {
|
||||
let mut result = BTreeSet::default();
|
||||
|
||||
if !self.one_typo_subset.is_empty() {
|
||||
@ -328,7 +328,7 @@ impl QueryTermSubset {
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn original_phrase(&self, ctx: &SearchContext) -> Option<Interned<Phrase>> {
|
||||
pub fn original_phrase(&self, ctx: &SearchContext<'_>) -> Option<Interned<Phrase>> {
|
||||
let t = ctx.term_interner.get(self.original);
|
||||
if let Some(p) = t.zero_typo.phrase {
|
||||
if self.zero_typo_subset.contains_phrase(p) {
|
||||
@ -337,7 +337,7 @@ impl QueryTermSubset {
|
||||
}
|
||||
None
|
||||
}
|
||||
pub fn max_typo_cost(&self, ctx: &SearchContext) -> u8 {
|
||||
pub fn max_typo_cost(&self, ctx: &SearchContext<'_>) -> u8 {
|
||||
let t = ctx.term_interner.get(self.original);
|
||||
match t.max_levenshtein_distance {
|
||||
0 => {
|
||||
@ -368,7 +368,7 @@ impl QueryTermSubset {
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
pub fn keep_only_exact_term(&mut self, ctx: &SearchContext) {
|
||||
pub fn keep_only_exact_term(&mut self, ctx: &SearchContext<'_>) {
|
||||
if let Some(term) = self.exact_term(ctx) {
|
||||
match term {
|
||||
ExactTerm::Phrase(p) => {
|
||||
@ -399,7 +399,7 @@ impl QueryTermSubset {
|
||||
pub fn clear_two_typo_subset(&mut self) {
|
||||
self.two_typo_subset = NTypoTermSubset::Nothing;
|
||||
}
|
||||
pub fn description(&self, ctx: &SearchContext) -> String {
|
||||
pub fn description(&self, ctx: &SearchContext<'_>) -> String {
|
||||
let t = ctx.term_interner.get(self.original);
|
||||
ctx.word_interner.get(t.original).to_owned()
|
||||
}
|
||||
@ -446,7 +446,7 @@ impl QueryTerm {
|
||||
|
||||
impl Interned<QueryTerm> {
|
||||
/// Return the original word from the given query term
|
||||
fn original_single_word(self, ctx: &SearchContext) -> Option<Interned<String>> {
|
||||
fn original_single_word(self, ctx: &SearchContext<'_>) -> Option<Interned<String>> {
|
||||
let self_ = ctx.term_interner.get(self);
|
||||
if self_.ngram_words.is_some() {
|
||||
None
|
||||
@ -477,7 +477,7 @@ impl QueryTerm {
|
||||
pub fn is_prefix(&self) -> bool {
|
||||
self.is_prefix
|
||||
}
|
||||
pub fn original_word(&self, ctx: &SearchContext) -> String {
|
||||
pub fn original_word(&self, ctx: &SearchContext<'_>) -> String {
|
||||
ctx.word_interner.get(self.original).clone()
|
||||
}
|
||||
|
||||
|
@ -23,8 +23,8 @@ pub struct ExtractedTokens {
|
||||
/// Convert the tokenised search query into a list of located query terms.
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::query")]
|
||||
pub fn located_query_terms_from_tokens(
|
||||
ctx: &mut SearchContext,
|
||||
query: NormalizedTokenIter,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
query: NormalizedTokenIter<'_, '_>,
|
||||
words_limit: Option<usize>,
|
||||
) -> Result<ExtractedTokens> {
|
||||
let nbr_typos = number_of_typos_allowed(ctx)?;
|
||||
@ -214,7 +214,7 @@ pub fn number_of_typos_allowed<'ctx>(
|
||||
}
|
||||
|
||||
pub fn make_ngram(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
terms: &[LocatedQueryTerm],
|
||||
number_of_typos_allowed: &impl Fn(&str) -> u8,
|
||||
) -> Result<Option<LocatedQueryTerm>> {
|
||||
@ -297,7 +297,12 @@ impl PhraseBuilder {
|
||||
}
|
||||
|
||||
// precondition: token has kind Word or StopWord
|
||||
fn push_word(&mut self, ctx: &mut SearchContext, token: &charabia::Token, position: u16) {
|
||||
fn push_word(
|
||||
&mut self,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
token: &charabia::Token<'_>,
|
||||
position: u16,
|
||||
) {
|
||||
if self.is_empty() {
|
||||
self.start = position;
|
||||
}
|
||||
@ -311,7 +316,7 @@ impl PhraseBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
fn build(self, ctx: &mut SearchContext) -> Option<LocatedQueryTerm> {
|
||||
fn build(self, ctx: &mut SearchContext<'_>) -> Option<LocatedQueryTerm> {
|
||||
if self.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
@ -10,11 +10,11 @@ pub struct Phrase {
|
||||
pub words: Vec<Option<Interned<String>>>,
|
||||
}
|
||||
impl Interned<Phrase> {
|
||||
pub fn description(self, ctx: &SearchContext) -> String {
|
||||
pub fn description(self, ctx: &SearchContext<'_>) -> String {
|
||||
let p = ctx.phrase_interner.get(self);
|
||||
p.words.iter().flatten().map(|w| ctx.word_interner.get(*w)).join(" ")
|
||||
}
|
||||
pub fn words(self, ctx: &SearchContext) -> Vec<Option<Interned<String>>> {
|
||||
pub fn words(self, ctx: &SearchContext<'_>) -> Vec<Option<Interned<String>>> {
|
||||
let p = ctx.phrase_interner.get(self);
|
||||
p.words.clone()
|
||||
}
|
||||
|
@ -10,7 +10,7 @@ use crate::Result;
|
||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
/// Build the ranking rule graph from the given query graph
|
||||
pub fn build(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
query_graph: QueryGraph,
|
||||
cost_of_ignoring_node: MappedInterner<QueryNode, Option<(u32, SmallBitmap<QueryNode>)>>,
|
||||
) -> Result<Self> {
|
||||
|
@ -117,7 +117,7 @@ impl<'a, G: RankingRuleGraphTrait> PathVisitor<'a, G> {
|
||||
}
|
||||
|
||||
/// See module documentation
|
||||
pub fn visit_paths(mut self, visit: VisitFn<G>) -> Result<()> {
|
||||
pub fn visit_paths(mut self, visit: VisitFn<'_, G>) -> Result<()> {
|
||||
let _ =
|
||||
self.state.visit_node(self.ctx.graph.query_graph.root_node, visit, &mut self.ctx)?;
|
||||
Ok(())
|
||||
@ -132,8 +132,8 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
|
||||
fn visit_node(
|
||||
&mut self,
|
||||
from_node: Interned<QueryNode>,
|
||||
visit: VisitFn<G>,
|
||||
ctx: &mut VisitorContext<G>,
|
||||
visit: VisitFn<'_, G>,
|
||||
ctx: &mut VisitorContext<'_, G>,
|
||||
) -> Result<ControlFlow<(), bool>> {
|
||||
// any valid path will be found from this point
|
||||
// if a valid path was found, then we know that the DeadEndsCache may have been updated,
|
||||
@ -189,8 +189,8 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
|
||||
&mut self,
|
||||
dest_node: Interned<QueryNode>,
|
||||
edge_new_nodes_to_skip: &SmallBitmap<QueryNode>,
|
||||
visit: VisitFn<G>,
|
||||
ctx: &mut VisitorContext<G>,
|
||||
visit: VisitFn<'_, G>,
|
||||
ctx: &mut VisitorContext<'_, G>,
|
||||
) -> Result<ControlFlow<(), bool>> {
|
||||
if !ctx
|
||||
.all_costs_from_node
|
||||
@ -228,8 +228,8 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
|
||||
condition: Interned<G::Condition>,
|
||||
dest_node: Interned<QueryNode>,
|
||||
edge_new_nodes_to_skip: &SmallBitmap<QueryNode>,
|
||||
visit: VisitFn<G>,
|
||||
ctx: &mut VisitorContext<G>,
|
||||
visit: VisitFn<'_, G>,
|
||||
ctx: &mut VisitorContext<'_, G>,
|
||||
) -> Result<ControlFlow<(), bool>> {
|
||||
assert!(dest_node != ctx.graph.query_graph.end_node);
|
||||
|
||||
|
@ -33,7 +33,7 @@ impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> {
|
||||
/// and inserted in the cache.
|
||||
pub fn get_computed_condition<'s>(
|
||||
&'s mut self,
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
interned_condition: Interned<G::Condition>,
|
||||
graph: &mut RankingRuleGraph<G>,
|
||||
universe: &RoaringBitmap,
|
||||
|
@ -17,7 +17,7 @@ pub enum ExactnessCondition {
|
||||
pub enum ExactnessGraph {}
|
||||
|
||||
fn compute_docids(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
dest_node: &LocatedQueryTermSubset,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<RoaringBitmap> {
|
||||
@ -46,7 +46,7 @@ impl RankingRuleGraphTrait for ExactnessGraph {
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::exactness")]
|
||||
fn resolve_condition(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
condition: &Self::Condition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<ComputedCondition> {
|
||||
@ -74,7 +74,7 @@ impl RankingRuleGraphTrait for ExactnessGraph {
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::exactness")]
|
||||
fn build_edges(
|
||||
_ctx: &mut SearchContext,
|
||||
_ctx: &mut SearchContext<'_>,
|
||||
conditions_interner: &mut DedupInterner<Self::Condition>,
|
||||
_source_node: Option<&LocatedQueryTermSubset>,
|
||||
dest_node: &LocatedQueryTermSubset,
|
||||
|
@ -22,7 +22,7 @@ impl RankingRuleGraphTrait for FidGraph {
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::fid")]
|
||||
fn resolve_condition(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
condition: &Self::Condition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<ComputedCondition> {
|
||||
@ -47,7 +47,7 @@ impl RankingRuleGraphTrait for FidGraph {
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::fid")]
|
||||
fn build_edges(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
conditions_interner: &mut DedupInterner<Self::Condition>,
|
||||
_from: Option<&LocatedQueryTermSubset>,
|
||||
to_term: &LocatedQueryTermSubset,
|
||||
|
@ -99,14 +99,14 @@ pub trait RankingRuleGraphTrait: Sized + 'static {
|
||||
/// Compute the document ids associated with the given edge condition,
|
||||
/// restricted to the given universe.
|
||||
fn resolve_condition(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
condition: &Self::Condition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<ComputedCondition>;
|
||||
|
||||
/// Return the costs and conditions of the edges going from the source node to the destination node
|
||||
fn build_edges(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
conditions_interner: &mut DedupInterner<Self::Condition>,
|
||||
source_node: Option<&LocatedQueryTermSubset>,
|
||||
dest_node: &LocatedQueryTermSubset,
|
||||
|
@ -22,7 +22,7 @@ impl RankingRuleGraphTrait for PositionGraph {
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::position")]
|
||||
fn resolve_condition(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
condition: &Self::Condition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<ComputedCondition> {
|
||||
@ -47,7 +47,7 @@ impl RankingRuleGraphTrait for PositionGraph {
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::position")]
|
||||
fn build_edges(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
conditions_interner: &mut DedupInterner<Self::Condition>,
|
||||
_from: Option<&LocatedQueryTermSubset>,
|
||||
to_term: &LocatedQueryTermSubset,
|
||||
|
@ -8,7 +8,7 @@ use crate::search::new::SearchContext;
|
||||
use crate::Result;
|
||||
|
||||
pub fn build_edges(
|
||||
_ctx: &mut SearchContext,
|
||||
_ctx: &mut SearchContext<'_>,
|
||||
conditions_interner: &mut DedupInterner<ProximityCondition>,
|
||||
left_term: Option<&LocatedQueryTermSubset>,
|
||||
right_term: &LocatedQueryTermSubset,
|
||||
|
@ -13,7 +13,7 @@ use crate::search::new::{SearchContext, Word};
|
||||
use crate::Result;
|
||||
|
||||
pub fn compute_docids(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
condition: &ProximityCondition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<ComputedCondition> {
|
||||
@ -110,7 +110,7 @@ pub fn compute_docids(
|
||||
}
|
||||
|
||||
fn compute_prefix_edges(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
left_word: Interned<String>,
|
||||
right_prefix: Interned<String>,
|
||||
left_phrase: Option<Interned<Phrase>>,
|
||||
@ -166,7 +166,7 @@ fn compute_prefix_edges(
|
||||
}
|
||||
|
||||
fn compute_non_prefix_edges(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
word1: Interned<String>,
|
||||
word2: Interned<String>,
|
||||
left_phrase: Option<Interned<Phrase>>,
|
||||
@ -209,7 +209,7 @@ fn compute_non_prefix_edges(
|
||||
}
|
||||
|
||||
fn last_words_of_term_derivations(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
t: &QueryTermSubset,
|
||||
) -> Result<BTreeSet<(Option<Interned<Phrase>>, Word)>> {
|
||||
let mut result = BTreeSet::new();
|
||||
@ -228,7 +228,7 @@ fn last_words_of_term_derivations(
|
||||
Ok(result)
|
||||
}
|
||||
fn first_word_of_term_iter(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
t: &QueryTermSubset,
|
||||
) -> Result<BTreeSet<(Interned<String>, Option<Interned<Phrase>>)>> {
|
||||
let mut result = BTreeSet::new();
|
||||
|
@ -23,7 +23,7 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::proximity")]
|
||||
fn resolve_condition(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
condition: &Self::Condition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<ComputedCondition> {
|
||||
@ -32,7 +32,7 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::proximity")]
|
||||
fn build_edges(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
conditions_interner: &mut DedupInterner<Self::Condition>,
|
||||
source_term: Option<&LocatedQueryTermSubset>,
|
||||
dest_term: &LocatedQueryTermSubset,
|
||||
|
@ -21,7 +21,7 @@ impl RankingRuleGraphTrait for TypoGraph {
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::typo")]
|
||||
fn resolve_condition(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
condition: &Self::Condition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<ComputedCondition> {
|
||||
@ -40,7 +40,7 @@ impl RankingRuleGraphTrait for TypoGraph {
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::typo")]
|
||||
fn build_edges(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
conditions_interner: &mut DedupInterner<Self::Condition>,
|
||||
_from: Option<&LocatedQueryTermSubset>,
|
||||
to_term: &LocatedQueryTermSubset,
|
||||
|
@ -20,7 +20,7 @@ impl RankingRuleGraphTrait for WordsGraph {
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::words")]
|
||||
fn resolve_condition(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
condition: &Self::Condition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<ComputedCondition> {
|
||||
@ -39,7 +39,7 @@ impl RankingRuleGraphTrait for WordsGraph {
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::words")]
|
||||
fn build_edges(
|
||||
_ctx: &mut SearchContext,
|
||||
_ctx: &mut SearchContext<'_>,
|
||||
conditions_interner: &mut DedupInterner<Self::Condition>,
|
||||
_from: Option<&LocatedQueryTermSubset>,
|
||||
to_term: &LocatedQueryTermSubset,
|
||||
|
@ -30,7 +30,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
}
|
||||
}
|
||||
pub fn compute_query_term_subset_docids(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
term: &QueryTermSubset,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let mut docids = RoaringBitmap::new();
|
||||
@ -53,7 +53,7 @@ pub fn compute_query_term_subset_docids(
|
||||
}
|
||||
|
||||
pub fn compute_query_term_subset_docids_within_field_id(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
term: &QueryTermSubset,
|
||||
fid: u16,
|
||||
) -> Result<RoaringBitmap> {
|
||||
@ -86,7 +86,7 @@ pub fn compute_query_term_subset_docids_within_field_id(
|
||||
}
|
||||
|
||||
pub fn compute_query_term_subset_docids_within_position(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
term: &QueryTermSubset,
|
||||
position: u16,
|
||||
) -> Result<RoaringBitmap> {
|
||||
@ -121,7 +121,7 @@ pub fn compute_query_term_subset_docids_within_position(
|
||||
|
||||
/// Returns the subset of the input universe that satisfies the contraints of the input query graph.
|
||||
pub fn compute_query_graph_docids(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
q: &QueryGraph,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<RoaringBitmap> {
|
||||
@ -178,7 +178,7 @@ pub fn compute_query_graph_docids(
|
||||
}
|
||||
|
||||
pub fn compute_phrase_docids(
|
||||
ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
phrase: Interned<Phrase>,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let Phrase { words } = ctx.phrase_interner.get(phrase).clone();
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user