Use an experimental feature to avoid copying 64k in memory

Clean up some parts of the code
Make the tests pass
2025-12-25 05:46:58 +00:00 · 2024-07-10 16:42:01 +02:00 · 2024-07-10 16:37:21 +02:00 · 2024-07-10 16:37:21 +02:00 · 2024-07-10 16:37:21 +02:00 · 2024-07-10 16:37:20 +02:00
65 changed files with 1996 additions and 1263 deletions
--- a/.github/workflows/test-suite.yml
+++ b/.github/workflows/test-suite.yml
@@ -167,7 +167,7 @@ jobs:
      - uses: helix-editor/rust-toolchain@v1
        with:
          profile: minimal
-          toolchain: nightly-2024-06-25
+          toolchain: nightly-2024-07-09
          override: true
          components: rustfmt
      - name: Cache dependencies
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -52,6 +52,16 @@ cargo test

 This command will be triggered to each PR as a requirement for merging it.

+#### Faster build
+
+You can set the `LINDERA_CACHE` environment variable to speed up your successive builds by up to 2 minutes.
+It'll store some built artifacts in the directory of your choice.
+
+We recommend using the standard `$HOME/.cache/lindera` directory:
+```sh
+export LINDERA_CACHE=$HOME/.cache/lindera
+```
+
 #### Snapshot-based tests

 We are using [insta](https://insta.rs) to perform snapshot-based testing.
@@ -63,7 +73,7 @@ Furthermore, we provide some macros on top of insta, notably a way to use snapsh

 To effectively debug snapshot-based hashes, we recommend you export the `MEILI_TEST_FULL_SNAPS` environment variable so that snapshot are fully created locally:

-```
+```sh
 export MEILI_TEST_FULL_SNAPS=true # add this to your .bashrc, .zshrc, ...
 ```

--- a/Cargo.lock
+++ b/Cargo.lock
--- a/9
+++ b/9
@@ -1,7 +1,7 @@
 # Compile
-FROM    rust:1.75.0-alpine3.18 AS compiler
+FROM    rust:1.79.0-alpine3.20 AS compiler

-RUN     apk add -q --update-cache --no-cache build-base openssl-dev
+RUN     apk add -q --no-cache build-base openssl-dev

 WORKDIR /

@@ -20,13 +20,12 @@ RUN     set -eux; \
        cargo build --release -p meilisearch -p meilitool

 # Run
-FROM    alpine:3.16
+FROM    alpine:3.20

 ENV     MEILI_HTTP_ADDR 0.0.0.0:7700
 ENV     MEILI_SERVER_PROVIDER docker

-RUN     apk update --quiet \
-        && apk add -q --no-cache libgcc tini curl
+RUN     apk add -q --no-cache libgcc tini curl

 # add meilisearch and meilitool to the `/bin` so you can run it from anywhere
 # and it's easy to find.
--- a/benchmarks/Cargo.toml
+++ b/benchmarks/Cargo.toml
@@ -11,24 +11,24 @@ edition.workspace = true
 license.workspace = true

 [dependencies]
-anyhow = "1.0.79"
+anyhow = "1.0.86"
 csv = "1.3.0"
 milli = { path = "../milli" }
-mimalloc = { version = "0.1.39", default-features = false }
-serde_json = { version = "1.0.111", features = ["preserve_order"] }
+mimalloc = { version = "0.1.43", default-features = false }
+serde_json = { version = "1.0.120", features = ["preserve_order"] }

 [dev-dependencies]
 criterion = { version = "0.5.1", features = ["html_reports"] }
 rand = "0.8.5"
 rand_chacha = "0.3.1"
-roaring = "0.10.2"
+roaring = "0.10.6"

 [build-dependencies]
-anyhow = "1.0.79"
-bytes = "1.5.0"
+anyhow = "1.0.86"
+bytes = "1.6.0"
 convert_case = "0.6.0"
-flate2 = "1.0.28"
-reqwest = { version = "0.11.23", features = ["blocking", "rustls-tls"], default-features = false }
+flate2 = "1.0.30"
+reqwest = { version = "0.12.5", features = ["blocking", "rustls-tls"], default-features = false }

 [features]
 default = ["milli/all-tokenizations"]
--- a/build-info/Cargo.toml
+++ b/build-info/Cargo.toml
@@ -11,8 +11,8 @@ license.workspace = true
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

 [dependencies]
-time = { version = "0.3.34", features = ["parsing"] }
+time = { version = "0.3.36", features = ["parsing"] }

 [build-dependencies]
-anyhow = "1.0.80"
-vergen-git2 = "1.0.0-beta.2"
+anyhow = "1.0.86"
+vergen-git2 = "1.0.0"
--- a/dump/Cargo.toml
+++ b/dump/Cargo.toml
@@ -11,22 +11,21 @@ readme.workspace = true
 license.workspace = true

 [dependencies]
-anyhow = "1.0.79"
-flate2 = "1.0.28"
-http = "0.2.11"
-meilisearch-auth = { path = "../meilisearch-auth" }
+anyhow = "1.0.86"
+flate2 = "1.0.30"
+http = "1.1.0"
 meilisearch-types = { path = "../meilisearch-types" }
 once_cell = "1.19.0"
-regex = "1.10.2"
-roaring = { version = "0.10.2", features = ["serde"] }
-serde = { version = "1.0.195", features = ["derive"] }
-serde_json = { version = "1.0.111", features = ["preserve_order"] }
-tar = "0.4.40"
-tempfile = "3.9.0"
-thiserror = "1.0.56"
-time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
+regex = "1.10.5"
+roaring = { version = "0.10.6", features = ["serde"] }
+serde = { version = "1.0.204", features = ["derive"] }
+serde_json = { version = "1.0.120", features = ["preserve_order"] }
+tar = "0.4.41"
+tempfile = "3.10.1"
+thiserror = "1.0.61"
+time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] }
 tracing = "0.1.40"
-uuid = { version = "1.6.1", features = ["serde", "v4"] }
+uuid = { version = "1.10.0", features = ["serde", "v4"] }

 [dev-dependencies]
 big_s = "1.0.2"
--- a/dump/src/reader/compat/v2_to_v3.rs
+++ b/dump/src/reader/compat/v2_to_v3.rs
@@ -425,7 +425,7 @@ pub(crate) mod test {
        let mut dump = v2::V2Reader::open(dir).unwrap().to_v3();

        // top level infos
-        insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
+        insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");

        // tasks
        let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
--- a/dump/src/reader/compat/v3_to_v4.rs
+++ b/dump/src/reader/compat/v3_to_v4.rs
@@ -358,7 +358,7 @@ pub(crate) mod test {
        let mut dump = v3::V3Reader::open(dir).unwrap().to_v4();

        // top level infos
-        insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
+        insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");

        // tasks
        let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
--- a/dump/src/reader/compat/v4_to_v5.rs
+++ b/dump/src/reader/compat/v4_to_v5.rs
@@ -394,8 +394,8 @@ pub(crate) mod test {
        let mut dump = v4::V4Reader::open(dir).unwrap().to_v5();

        // top level infos
-        insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
-        insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
+        insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
+        insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");

        // tasks
        let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
--- a/dump/src/reader/compat/v5_to_v6.rs
+++ b/dump/src/reader/compat/v5_to_v6.rs
@@ -442,8 +442,8 @@ pub(crate) mod test {
        let mut dump = v5::V5Reader::open(dir).unwrap().to_v6();

        // top level infos
-        insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
-        insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
+        insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
+        insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");

        // tasks
        let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
--- a/dump/src/reader/mod.rs
+++ b/dump/src/reader/mod.rs
@@ -216,7 +216,7 @@ pub(crate) mod test {
        let mut dump = DumpReader::open(dump).unwrap();

        // top level infos
-        insta::assert_display_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
+        insta::assert_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
        insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");

        // tasks
@@ -337,7 +337,7 @@ pub(crate) mod test {
        let mut dump = DumpReader::open(dump).unwrap();

        // top level infos
-        insta::assert_display_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00");
+        insta::assert_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00");
        insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");

        // tasks
@@ -383,8 +383,8 @@ pub(crate) mod test {
        let mut dump = DumpReader::open(dump).unwrap();

        // top level infos
-        insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
-        insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
+        insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
+        insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");

        // tasks
        let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
@@ -463,8 +463,8 @@ pub(crate) mod test {
        let mut dump = DumpReader::open(dump).unwrap();

        // top level infos
-        insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
-        insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
+        insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
+        insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");

        // tasks
        let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
@@ -540,7 +540,7 @@ pub(crate) mod test {
        let mut dump = DumpReader::open(dump).unwrap();

        // top level infos
-        insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
+        insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
        assert_eq!(dump.instance_uid().unwrap(), None);

        // tasks
@@ -633,7 +633,7 @@ pub(crate) mod test {
        let mut dump = DumpReader::open(dump).unwrap();

        // top level infos
-        insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
+        insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
        assert_eq!(dump.instance_uid().unwrap(), None);

        // tasks
@@ -726,7 +726,7 @@ pub(crate) mod test {
        let mut dump = DumpReader::open(dump).unwrap();

        // top level infos
-        insta::assert_display_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
+        insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
        assert_eq!(dump.instance_uid().unwrap(), None);

        // tasks
--- a/dump/src/reader/v2/mod.rs
+++ b/dump/src/reader/v2/mod.rs
@@ -252,7 +252,7 @@ pub(crate) mod test {
        let mut dump = V2Reader::open(dir).unwrap();

        // top level infos
-        insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
+        insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");

        // tasks
        let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
@@ -349,7 +349,7 @@ pub(crate) mod test {
        let mut dump = V2Reader::open(dir).unwrap();

        // top level infos
-        insta::assert_display_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
+        insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");

        // tasks
        let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
--- a/dump/src/reader/v3/mod.rs
+++ b/dump/src/reader/v3/mod.rs
@@ -267,7 +267,7 @@ pub(crate) mod test {
        let mut dump = V3Reader::open(dir).unwrap();

        // top level infos
-        insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
+        insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");

        // tasks
        let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
--- a/dump/src/reader/v4/mod.rs
+++ b/dump/src/reader/v4/mod.rs
@@ -262,8 +262,8 @@ pub(crate) mod test {
        let mut dump = V4Reader::open(dir).unwrap();

        // top level infos
-        insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
-        insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
+        insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
+        insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");

        // tasks
        let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
--- a/dump/src/reader/v5/mod.rs
+++ b/dump/src/reader/v5/mod.rs
@@ -299,8 +299,8 @@ pub(crate) mod test {
        let mut dump = V5Reader::open(dir).unwrap();

        // top level infos
-        insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
-        insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
+        insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
+        insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");

        // tasks
        let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
--- a/dump/src/writer.rs
+++ b/dump/src/writer.rs
@@ -281,7 +281,7 @@ pub(crate) mod test {
        let dump_path = dump.path();

        // ==== checking global file hierarchy (we want to be sure there isn't too many files or too few)
-        insta::assert_display_snapshot!(create_directory_hierarchy(dump_path), @r###"
+        insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r###"
        .
        ├---- indexes/
        │    └---- doggos/
--- a/file-store/Cargo.toml
+++ b/file-store/Cargo.toml
@@ -11,10 +11,7 @@ edition.workspace = true
 license.workspace = true

 [dependencies]
-tempfile = "3.9.0"
-thiserror = "1.0.56"
+tempfile = "3.10.1"
+thiserror = "1.0.61"
 tracing = "0.1.40"
-uuid = { version = "1.6.1", features = ["serde", "v4"] }
-
-[dev-dependencies]
-faux = "0.1.10"
+uuid = { version = "1.10.0", features = ["serde", "v4"] }
--- a/filter-parser/Cargo.toml
+++ b/filter-parser/Cargo.toml
@@ -14,7 +14,7 @@ license.workspace = true
 [dependencies]
 nom = "7.1.3"
 nom_locate = "4.2.0"
-unescaper = "0.1.3"
+unescaper = "0.1.5"

 [dev-dependencies]
-insta = "1.34.0"
+insta = "1.39.0"
--- a/filter-parser/src/lib.rs
+++ b/filter-parser/src/lib.rs
@@ -564,121 +564,121 @@ pub mod tests {

    #[test]
    fn parse_escaped() {
-        insta::assert_display_snapshot!(p(r"title = 'foo\\'"), @r#"{title} = {foo\}"#);
-        insta::assert_display_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#);
-        insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#);
-        insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
+        insta::assert_snapshot!(p(r"title = 'foo\\'"), @r#"{title} = {foo\}"#);
+        insta::assert_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#);
+        insta::assert_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#);
+        insta::assert_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
        // but it also works with other sequences
-        insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
+        insta::assert_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
    }

    #[test]
    fn parse() {
        // Test equal
-        insta::assert_display_snapshot!(p("channel = Ponce"), @"{channel} = {Ponce}");
-        insta::assert_display_snapshot!(p("subscribers = 12"), @"{subscribers} = {12}");
-        insta::assert_display_snapshot!(p("channel = 'Mister Mv'"), @"{channel} = {Mister Mv}");
-        insta::assert_display_snapshot!(p("channel = \"Mister Mv\""), @"{channel} = {Mister Mv}");
-        insta::assert_display_snapshot!(p("'dog race' = Borzoi"), @"{dog race} = {Borzoi}");
-        insta::assert_display_snapshot!(p("\"dog race\" = Chusky"), @"{dog race} = {Chusky}");
-        insta::assert_display_snapshot!(p("\"dog race\" = \"Bernese Mountain\""), @"{dog race} = {Bernese Mountain}");
-        insta::assert_display_snapshot!(p("'dog race' = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}");
-        insta::assert_display_snapshot!(p("\"dog race\" = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}");
+        insta::assert_snapshot!(p("channel = Ponce"), @"{channel} = {Ponce}");
+        insta::assert_snapshot!(p("subscribers = 12"), @"{subscribers} = {12}");
+        insta::assert_snapshot!(p("channel = 'Mister Mv'"), @"{channel} = {Mister Mv}");
+        insta::assert_snapshot!(p("channel = \"Mister Mv\""), @"{channel} = {Mister Mv}");
+        insta::assert_snapshot!(p("'dog race' = Borzoi"), @"{dog race} = {Borzoi}");
+        insta::assert_snapshot!(p("\"dog race\" = Chusky"), @"{dog race} = {Chusky}");
+        insta::assert_snapshot!(p("\"dog race\" = \"Bernese Mountain\""), @"{dog race} = {Bernese Mountain}");
+        insta::assert_snapshot!(p("'dog race' = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}");
+        insta::assert_snapshot!(p("\"dog race\" = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}");

        // Test IN
-        insta::assert_display_snapshot!(p("colour IN[]"), @"{colour} IN[]");
-        insta::assert_display_snapshot!(p("colour IN[green]"), @"{colour} IN[{green}, ]");
-        insta::assert_display_snapshot!(p("colour IN[green,]"), @"{colour} IN[{green}, ]");
-        insta::assert_display_snapshot!(p("colour NOT IN[green,blue]"), @"NOT ({colour} IN[{green}, {blue}, ])");
-        insta::assert_display_snapshot!(p(" colour IN [  green , blue , ]"), @"{colour} IN[{green}, {blue}, ]");
+        insta::assert_snapshot!(p("colour IN[]"), @"{colour} IN[]");
+        insta::assert_snapshot!(p("colour IN[green]"), @"{colour} IN[{green}, ]");
+        insta::assert_snapshot!(p("colour IN[green,]"), @"{colour} IN[{green}, ]");
+        insta::assert_snapshot!(p("colour NOT IN[green,blue]"), @"NOT ({colour} IN[{green}, {blue}, ])");
+        insta::assert_snapshot!(p(" colour IN [  green , blue , ]"), @"{colour} IN[{green}, {blue}, ]");

        // Test IN + OR/AND/()
-        insta::assert_display_snapshot!(p(" colour IN [green, blue]  AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]");
-        insta::assert_display_snapshot!(p("NOT (colour IN [green, blue])  AND color = green "), @"AND[NOT ({colour} IN[{green}, {blue}, ]), {color} = {green}, ]");
-        insta::assert_display_snapshot!(p("x = 1 OR NOT (colour IN [green, blue]  OR color = green) "), @"OR[{x} = {1}, NOT (OR[{colour} IN[{green}, {blue}, ], {color} = {green}, ]), ]");
+        insta::assert_snapshot!(p(" colour IN [green, blue]  AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]");
+        insta::assert_snapshot!(p("NOT (colour IN [green, blue])  AND color = green "), @"AND[NOT ({colour} IN[{green}, {blue}, ]), {color} = {green}, ]");
+        insta::assert_snapshot!(p("x = 1 OR NOT (colour IN [green, blue]  OR color = green) "), @"OR[{x} = {1}, NOT (OR[{colour} IN[{green}, {blue}, ], {color} = {green}, ]), ]");

        // Test whitespace start/end
-        insta::assert_display_snapshot!(p(" colour = green "), @"{colour} = {green}");
-        insta::assert_display_snapshot!(p(" (colour = green OR colour = red) "), @"OR[{colour} = {green}, {colour} = {red}, ]");
-        insta::assert_display_snapshot!(p(" colour IN [green, blue]  AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]");
-        insta::assert_display_snapshot!(p(" colour NOT  IN [green, blue] "), @"NOT ({colour} IN[{green}, {blue}, ])");
-        insta::assert_display_snapshot!(p(" colour IN [green, blue] "), @"{colour} IN[{green}, {blue}, ]");
+        insta::assert_snapshot!(p(" colour = green "), @"{colour} = {green}");
+        insta::assert_snapshot!(p(" (colour = green OR colour = red) "), @"OR[{colour} = {green}, {colour} = {red}, ]");
+        insta::assert_snapshot!(p(" colour IN [green, blue]  AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]");
+        insta::assert_snapshot!(p(" colour NOT  IN [green, blue] "), @"NOT ({colour} IN[{green}, {blue}, ])");
+        insta::assert_snapshot!(p(" colour IN [green, blue] "), @"{colour} IN[{green}, {blue}, ]");

        // Test conditions
-        insta::assert_display_snapshot!(p("channel != ponce"), @"{channel} != {ponce}");
-        insta::assert_display_snapshot!(p("NOT channel = ponce"), @"NOT ({channel} = {ponce})");
-        insta::assert_display_snapshot!(p("subscribers < 1000"), @"{subscribers} < {1000}");
-        insta::assert_display_snapshot!(p("subscribers > 1000"), @"{subscribers} > {1000}");
-        insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
-        insta::assert_display_snapshot!(p("subscribers >= 1000"), @"{subscribers} >= {1000}");
-        insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
-        insta::assert_display_snapshot!(p("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}");
+        insta::assert_snapshot!(p("channel != ponce"), @"{channel} != {ponce}");
+        insta::assert_snapshot!(p("NOT channel = ponce"), @"NOT ({channel} = {ponce})");
+        insta::assert_snapshot!(p("subscribers < 1000"), @"{subscribers} < {1000}");
+        insta::assert_snapshot!(p("subscribers > 1000"), @"{subscribers} > {1000}");
+        insta::assert_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
+        insta::assert_snapshot!(p("subscribers >= 1000"), @"{subscribers} >= {1000}");
+        insta::assert_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
+        insta::assert_snapshot!(p("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}");

        // Test NOT
-        insta::assert_display_snapshot!(p("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})");
-        insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");
+        insta::assert_snapshot!(p("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})");
+        insta::assert_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");

        // Test NULL + NOT NULL
-        insta::assert_display_snapshot!(p("subscribers IS NULL"), @"{subscribers} IS NULL");
-        insta::assert_display_snapshot!(p("NOT subscribers IS NULL"), @"NOT ({subscribers} IS NULL)");
-        insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
-        insta::assert_display_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL");
-        insta::assert_display_snapshot!(p("subscribers  IS   NOT   NULL"), @"NOT ({subscribers} IS NULL)");
+        insta::assert_snapshot!(p("subscribers IS NULL"), @"{subscribers} IS NULL");
+        insta::assert_snapshot!(p("NOT subscribers IS NULL"), @"NOT ({subscribers} IS NULL)");
+        insta::assert_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
+        insta::assert_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL");
+        insta::assert_snapshot!(p("subscribers  IS   NOT   NULL"), @"NOT ({subscribers} IS NULL)");

        // Test EMPTY + NOT EMPTY
-        insta::assert_display_snapshot!(p("subscribers IS EMPTY"), @"{subscribers} IS EMPTY");
-        insta::assert_display_snapshot!(p("NOT subscribers IS EMPTY"), @"NOT ({subscribers} IS EMPTY)");
-        insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
-        insta::assert_display_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY");
-        insta::assert_display_snapshot!(p("subscribers  IS   NOT   EMPTY"), @"NOT ({subscribers} IS EMPTY)");
+        insta::assert_snapshot!(p("subscribers IS EMPTY"), @"{subscribers} IS EMPTY");
+        insta::assert_snapshot!(p("NOT subscribers IS EMPTY"), @"NOT ({subscribers} IS EMPTY)");
+        insta::assert_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
+        insta::assert_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY");
+        insta::assert_snapshot!(p("subscribers  IS   NOT   EMPTY"), @"NOT ({subscribers} IS EMPTY)");

        // Test EXISTS + NOT EXITS
-        insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
-        insta::assert_display_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
-        insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
-        insta::assert_display_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS");
-        insta::assert_display_snapshot!(p("subscribers NOT   EXISTS"), @"NOT ({subscribers} EXISTS)");
+        insta::assert_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
+        insta::assert_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
+        insta::assert_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
+        insta::assert_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS");
+        insta::assert_snapshot!(p("subscribers NOT   EXISTS"), @"NOT ({subscribers} EXISTS)");

        // Test nested NOT
-        insta::assert_display_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}");
-        insta::assert_display_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}");
+        insta::assert_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}");
+        insta::assert_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}");

        // Test geo radius
-        insta::assert_display_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})");
-        insta::assert_display_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))");
-        insta::assert_display_snapshot!(p("_geoRadius(12,13,14)"), @"_geoRadius({12}, {13}, {14})");
+        insta::assert_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})");
+        insta::assert_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))");
+        insta::assert_snapshot!(p("_geoRadius(12,13,14)"), @"_geoRadius({12}, {13}, {14})");

        // Test geo bounding box
-        insta::assert_display_snapshot!(p("_geoBoundingBox([12, 13], [14, 15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
-        insta::assert_display_snapshot!(p("NOT _geoBoundingBox([12, 13], [14, 15])"), @"NOT (_geoBoundingBox([{12}, {13}], [{14}, {15}]))");
-        insta::assert_display_snapshot!(p("_geoBoundingBox([12,13],[14,15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
+        insta::assert_snapshot!(p("_geoBoundingBox([12, 13], [14, 15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
+        insta::assert_snapshot!(p("NOT _geoBoundingBox([12, 13], [14, 15])"), @"NOT (_geoBoundingBox([{12}, {13}], [{14}, {15}]))");
+        insta::assert_snapshot!(p("_geoBoundingBox([12,13],[14,15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");

        // Test OR + AND
-        insta::assert_display_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
-        insta::assert_display_snapshot!(p("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
-        insta::assert_display_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000"), @"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ]");
-        insta::assert_display_snapshot!(
+        insta::assert_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
+        insta::assert_snapshot!(p("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
+        insta::assert_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000"), @"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ]");
+        insta::assert_snapshot!(
        p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000 OR colour = red OR colour = blue AND size = 7"),
        @"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, {colour} = {red}, AND[{colour} = {blue}, {size} = {7}, ], ]"
        );

        // Test parentheses
-        insta::assert_display_snapshot!(p("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )"), @"AND[{channel} = {ponce}, OR[{dog race} != {bernese mountain}, {subscribers} > {1000}, ], ]");
-        insta::assert_display_snapshot!(p("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)"), @"AND[OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ], _geoRadius({12}, {13}, {14}), ]");
+        insta::assert_snapshot!(p("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )"), @"AND[{channel} = {ponce}, OR[{dog race} != {bernese mountain}, {subscribers} > {1000}, ], ]");
+        insta::assert_snapshot!(p("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)"), @"AND[OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ], _geoRadius({12}, {13}, {14}), ]");

        // Test recursion
        // This is the most that is allowed
-        insta::assert_display_snapshot!(
+        insta::assert_snapshot!(
            p("(((((((((((((((((((((((((((((((((((((((((((((((((x = 1)))))))))))))))))))))))))))))))))))))))))))))))))"),
            @"{x} = {1}"
        );
-        insta::assert_display_snapshot!(
+        insta::assert_snapshot!(
            p("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"),
            @"NOT ({x} = {1})"
        );

        // Confusing keywords
-        insta::assert_display_snapshot!(p(r#"NOT "OR" EXISTS AND "EXISTS" NOT EXISTS"#), @"AND[NOT ({OR} EXISTS), NOT ({EXISTS} EXISTS), ]");
+        insta::assert_snapshot!(p(r#"NOT "OR" EXISTS AND "EXISTS" NOT EXISTS"#), @"AND[NOT ({OR} EXISTS), NOT ({EXISTS} EXISTS), ]");
    }

    #[test]
@@ -689,182 +689,182 @@ pub mod tests {
            Fc::parse(s).unwrap_err().to_string()
        }

-        insta::assert_display_snapshot!(p("channel = Ponce = 12"), @r###"
+        insta::assert_snapshot!(p("channel = Ponce = 12"), @r###"
        Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule.
        17:21 channel = Ponce = 12
        "###);

-        insta::assert_display_snapshot!(p("channel =    "), @r###"
+        insta::assert_snapshot!(p("channel =    "), @r###"
        Was expecting a value but instead got nothing.
        14:14 channel =
        "###);

-        insta::assert_display_snapshot!(p("channel = 🐻"), @r###"
+        insta::assert_snapshot!(p("channel = 🐻"), @r###"
        Was expecting a value but instead got `🐻`.
        11:12 channel = 🐻
        "###);

-        insta::assert_display_snapshot!(p("channel = 🐻 AND followers < 100"), @r###"
+        insta::assert_snapshot!(p("channel = 🐻 AND followers < 100"), @r###"
        Was expecting a value but instead got `🐻`.
        11:12 channel = 🐻 AND followers < 100
        "###);

-        insta::assert_display_snapshot!(p("'OR'"), @r###"
+        insta::assert_snapshot!(p("'OR'"), @r###"
        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
        1:5 'OR'
        "###);

-        insta::assert_display_snapshot!(p("OR"), @r###"
+        insta::assert_snapshot!(p("OR"), @r###"
        Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
        1:3 OR
        "###);

-        insta::assert_display_snapshot!(p("channel Ponce"), @r###"
+        insta::assert_snapshot!(p("channel Ponce"), @r###"
        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
        1:14 channel Ponce
        "###);

-        insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###"
+        insta::assert_snapshot!(p("channel = Ponce OR"), @r###"
        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
        19:19 channel = Ponce OR
        "###);

-        insta::assert_display_snapshot!(p("_geoRadius"), @r###"
+        insta::assert_snapshot!(p("_geoRadius"), @r###"
        The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
        1:11 _geoRadius
        "###);

-        insta::assert_display_snapshot!(p("_geoRadius = 12"), @r###"
+        insta::assert_snapshot!(p("_geoRadius = 12"), @r###"
        The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
        1:16 _geoRadius = 12
        "###);

-        insta::assert_display_snapshot!(p("_geoBoundingBox"), @r###"
+        insta::assert_snapshot!(p("_geoBoundingBox"), @r###"
        The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
        1:16 _geoBoundingBox
        "###);

-        insta::assert_display_snapshot!(p("_geoBoundingBox = 12"), @r###"
+        insta::assert_snapshot!(p("_geoBoundingBox = 12"), @r###"
        The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
        1:21 _geoBoundingBox = 12
        "###);

-        insta::assert_display_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r###"
+        insta::assert_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r###"
        The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
        1:26 _geoBoundingBox(1.0, 1.0)
        "###);

-        insta::assert_display_snapshot!(p("_geoPoint(12, 13, 14)"), @r###"
+        insta::assert_snapshot!(p("_geoPoint(12, 13, 14)"), @r###"
        `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
        1:22 _geoPoint(12, 13, 14)
        "###);

-        insta::assert_display_snapshot!(p("position <= _geoPoint(12, 13, 14)"), @r###"
+        insta::assert_snapshot!(p("position <= _geoPoint(12, 13, 14)"), @r###"
        `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
        13:34 position <= _geoPoint(12, 13, 14)
        "###);

-        insta::assert_display_snapshot!(p("_geoDistance(12, 13, 14)"), @r###"
+        insta::assert_snapshot!(p("_geoDistance(12, 13, 14)"), @r###"
        `_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
        1:25 _geoDistance(12, 13, 14)
        "###);

-        insta::assert_display_snapshot!(p("position <= _geoDistance(12, 13, 14)"), @r###"
+        insta::assert_snapshot!(p("position <= _geoDistance(12, 13, 14)"), @r###"
        `_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
        13:37 position <= _geoDistance(12, 13, 14)
        "###);

-        insta::assert_display_snapshot!(p("_geo(12, 13, 14)"), @r###"
+        insta::assert_snapshot!(p("_geo(12, 13, 14)"), @r###"
        `_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
        1:17 _geo(12, 13, 14)
        "###);

-        insta::assert_display_snapshot!(p("position <= _geo(12, 13, 14)"), @r###"
+        insta::assert_snapshot!(p("position <= _geo(12, 13, 14)"), @r###"
        `_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
        13:29 position <= _geo(12, 13, 14)
        "###);

-        insta::assert_display_snapshot!(p("position <= _geoRadius(12, 13, 14)"), @r###"
+        insta::assert_snapshot!(p("position <= _geoRadius(12, 13, 14)"), @r###"
        The `_geoRadius` filter is an operation and can't be used as a value.
        13:35 position <= _geoRadius(12, 13, 14)
        "###);

-        insta::assert_display_snapshot!(p("channel = 'ponce"), @r###"
+        insta::assert_snapshot!(p("channel = 'ponce"), @r###"
        Expression `\'ponce` is missing the following closing delimiter: `'`.
        11:17 channel = 'ponce
        "###);

-        insta::assert_display_snapshot!(p("channel = \"ponce"), @r###"
+        insta::assert_snapshot!(p("channel = \"ponce"), @r###"
        Expression `\"ponce` is missing the following closing delimiter: `"`.
        11:17 channel = "ponce
        "###);

-        insta::assert_display_snapshot!(p("channel = mv OR (followers >= 1000"), @r###"
+        insta::assert_snapshot!(p("channel = mv OR (followers >= 1000"), @r###"
        Expression `(followers >= 1000` is missing the following closing delimiter: `)`.
        17:35 channel = mv OR (followers >= 1000
        "###);

-        insta::assert_display_snapshot!(p("channel = mv OR followers >= 1000)"), @r###"
+        insta::assert_snapshot!(p("channel = mv OR followers >= 1000)"), @r###"
        Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule.
        34:35 channel = mv OR followers >= 1000)
        "###);

-        insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###"
+        insta::assert_snapshot!(p("colour NOT EXIST"), @r###"
        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
        1:17 colour NOT EXIST
        "###);

-        insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###"
+        insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###"
        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
        1:23 subscribers 100 TO1000
        "###);

-        insta::assert_display_snapshot!(p("channel = ponce ORdog != 'bernese mountain'"), @r###"
+        insta::assert_snapshot!(p("channel = ponce ORdog != 'bernese mountain'"), @r###"
        Found unexpected characters at the end of the filter: `ORdog != \'bernese mountain\'`. You probably forgot an `OR` or an `AND` rule.
        17:44 channel = ponce ORdog != 'bernese mountain'
        "###);

-        insta::assert_display_snapshot!(p("colour IN blue, green]"), @r###"
+        insta::assert_snapshot!(p("colour IN blue, green]"), @r###"
        Expected `[` after `IN` keyword.
        11:23 colour IN blue, green]
        "###);

-        insta::assert_display_snapshot!(p("colour IN [blue, green, 'blue' > 2]"), @r###"
+        insta::assert_snapshot!(p("colour IN [blue, green, 'blue' > 2]"), @r###"
        Expected only comma-separated field names inside `IN[..]` but instead found `> 2]`.
        32:36 colour IN [blue, green, 'blue' > 2]
        "###);

-        insta::assert_display_snapshot!(p("colour IN [blue, green, AND]"), @r###"
+        insta::assert_snapshot!(p("colour IN [blue, green, AND]"), @r###"
        Expected only comma-separated field names inside `IN[..]` but instead found `AND]`.
        25:29 colour IN [blue, green, AND]
        "###);

-        insta::assert_display_snapshot!(p("colour IN [blue, green"), @r###"
+        insta::assert_snapshot!(p("colour IN [blue, green"), @r###"
        Expected matching `]` after the list of field names given to `IN[`
        23:23 colour IN [blue, green
        "###);

-        insta::assert_display_snapshot!(p("colour IN ['blue, green"), @r###"
+        insta::assert_snapshot!(p("colour IN ['blue, green"), @r###"
        Expression `\'blue, green` is missing the following closing delimiter: `'`.
        12:24 colour IN ['blue, green
        "###);

-        insta::assert_display_snapshot!(p("x = EXISTS"), @r###"
+        insta::assert_snapshot!(p("x = EXISTS"), @r###"
        Was expecting a value but instead got `EXISTS`, which is a reserved keyword. To use `EXISTS` as a field name or a value, surround it by quotes.
        5:11 x = EXISTS
        "###);

-        insta::assert_display_snapshot!(p("AND = 8"), @r###"
+        insta::assert_snapshot!(p("AND = 8"), @r###"
        Was expecting a value but instead got `AND`, which is a reserved keyword. To use `AND` as a field name or a value, surround it by quotes.
        1:4 AND = 8
        "###);

-        insta::assert_display_snapshot!(p("((((((((((((((((((((((((((((((((((((((((((((((((((x = 1))))))))))))))))))))))))))))))))))))))))))))))))))"), @r###"
+        insta::assert_snapshot!(p("((((((((((((((((((((((((((((((((((((((((((((((((((x = 1))))))))))))))))))))))))))))))))))))))))))))))))))"), @r###"
        The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions.
        51:106 ((((((((((((((((((((((((((((((((((((((((((((((((((x = 1))))))))))))))))))))))))))))))))))))))))))))))))))
        "###);

-        insta::assert_display_snapshot!(
+        insta::assert_snapshot!(
            p("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"),
            @r###"
        The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions.
@@ -872,40 +872,40 @@ pub mod tests {
        "###
        );

-        insta::assert_display_snapshot!(p(r#"NOT OR EXISTS AND EXISTS NOT EXISTS"#), @r###"
+        insta::assert_snapshot!(p(r#"NOT OR EXISTS AND EXISTS NOT EXISTS"#), @r###"
        Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
        5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
        "###);

-        insta::assert_display_snapshot!(p(r#"value NULL"#), @r###"
+        insta::assert_snapshot!(p(r#"value NULL"#), @r###"
        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
        1:11 value NULL
        "###);
-        insta::assert_display_snapshot!(p(r#"value NOT NULL"#), @r###"
+        insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###"
        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
        1:15 value NOT NULL
        "###);
-        insta::assert_display_snapshot!(p(r#"value EMPTY"#), @r###"
+        insta::assert_snapshot!(p(r#"value EMPTY"#), @r###"
        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
        1:12 value EMPTY
        "###);
-        insta::assert_display_snapshot!(p(r#"value NOT EMPTY"#), @r###"
+        insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###"
        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
        1:16 value NOT EMPTY
        "###);
-        insta::assert_display_snapshot!(p(r#"value IS"#), @r###"
+        insta::assert_snapshot!(p(r#"value IS"#), @r###"
        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
        1:9 value IS
        "###);
-        insta::assert_display_snapshot!(p(r#"value IS NOT"#), @r###"
+        insta::assert_snapshot!(p(r#"value IS NOT"#), @r###"
        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
        1:13 value IS NOT
        "###);
-        insta::assert_display_snapshot!(p(r#"value IS EXISTS"#), @r###"
+        insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###"
        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
        1:16 value IS EXISTS
        "###);
-        insta::assert_display_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
+        insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
        1:20 value IS NOT EXISTS
        "###);
--- a/fuzzers/Cargo.toml
+++ b/fuzzers/Cargo.toml
@@ -12,9 +12,9 @@ license.workspace = true

 [dependencies]
 arbitrary = { version = "1.3.2", features = ["derive"] }
-clap = { version = "4.4.17", features = ["derive"] }
-fastrand = "2.0.1"
+clap = { version = "4.5.9", features = ["derive"] }
+fastrand = "2.1.0"
 milli = { path = "../milli" }
-serde = { version = "1.0.195", features = ["derive"] }
-serde_json = { version = "1.0.111", features = ["preserve_order"] }
-tempfile = "3.9.0"
+serde = { version = "1.0.204", features = ["derive"] }
+serde_json = { version = "1.0.120", features = ["preserve_order"] }
+tempfile = "3.10.1"
--- a/fuzzers/src/bin/fuzz-indexing.rs
+++ b/fuzzers/src/bin/fuzz-indexing.rs
@@ -110,7 +110,7 @@ fn main() {

                            // after executing a batch we check if the database is corrupted
                            let res = index.search(&wtxn).execute().unwrap();
-                            index.documents(&wtxn, res.documents_ids).unwrap();
+                            index.compressed_documents(&wtxn, res.documents_ids).unwrap();
                            progression.fetch_add(1, Ordering::Relaxed);
                        }
                        wtxn.abort();
--- a/index-scheduler/Cargo.toml
+++ b/index-scheduler/Cargo.toml
@@ -11,38 +11,38 @@ edition.workspace = true
 license.workspace = true

 [dependencies]
-anyhow = "1.0.79"
+anyhow = "1.0.86"
 bincode = "1.3.3"
 csv = "1.3.0"
-derive_builder = "0.12.0"
+derive_builder = "0.20.0"
 dump = { path = "../dump" }
-enum-iterator = "1.5.0"
+enum-iterator = "2.1.0"
 file-store = { path = "../file-store" }
-flate2 = "1.0.28"
+flate2 = "1.0.30"
 meilisearch-auth = { path = "../meilisearch-auth" }
 meilisearch-types = { path = "../meilisearch-types" }
-page_size = "0.5.0"
-rayon = "1.8.1"
-roaring = { version = "0.10.2", features = ["serde"] }
-serde = { version = "1.0.195", features = ["derive"] }
-serde_json = { version = "1.0.111", features = ["preserve_order"] }
+page_size = "0.6.0"
+rayon = "1.10.0"
+roaring = { version = "0.10.6", features = ["serde"] }
+serde = { version = "1.0.204", features = ["derive"] }
+serde_json = { version = "1.0.120", features = ["preserve_order"] }
 synchronoise = "1.0.1"
-tempfile = "3.9.0"
-thiserror = "1.0.56"
-time = { version = "0.3.31", features = [
+tempfile = "3.10.1"
+thiserror = "1.0.61"
+time = { version = "0.3.36", features = [
    "serde-well-known",
    "formatting",
    "parsing",
    "macros",
 ] }
 tracing = "0.1.40"
-ureq = "2.9.7"
-uuid = { version = "1.6.1", features = ["serde", "v4"] }
+ureq = "2.10.0"
+uuid = { version = "1.10.0", features = ["serde", "v4"] }

 [dev-dependencies]
 arroy = "0.4.0"
 big_s = "1.0.2"
 crossbeam = "0.8.4"
-insta = { version = "1.34.0", features = ["json", "redactions"] }
+insta = { version = "1.39.0", features = ["json", "redactions"] }
 maplit = "1.0.2"
 meili-snap = { path = "../meili-snap" }
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -908,16 +908,22 @@ impl IndexScheduler {
                    let mut index_dumper = dump.create_index(uid, &metadata)?;

                    let fields_ids_map = index.fields_ids_map(&rtxn)?;
+                    let dictionary = index.document_decompression_dictionary(&rtxn)?;
                    let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
                    let embedding_configs = index.embedding_configs(&rtxn)?;
+                    let mut buffer = Vec::new();

                    // 3.1. Dump the documents
-                    for ret in index.all_documents(&rtxn)? {
+                    for ret in index.all_compressed_documents(&rtxn)? {
                        if self.must_stop_processing.get() {
                            return Err(Error::AbortedTask);
                        }

-                        let (id, doc) = ret?;
+                        let (id, compressed) = ret?;
+                        let doc = compressed.decompress_with_optional_dictionary(
+                            &mut buffer,
+                            dictionary.as_ref(),
+                        )?;

                        let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;

--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -2465,12 +2465,20 @@ mod tests {

        let index = index_scheduler.index("doggos").unwrap();
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
        let field_ids = field_ids_map.ids().collect::<Vec<_>>();
        let documents = index
-            .all_documents(&rtxn)
+            .all_compressed_documents(&rtxn)
            .unwrap()
-            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .map(|ret| {
+                let (_id, compressed_doc) = ret.unwrap();
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+                obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+            })
            .collect::<Vec<_>>();
        snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
    }
@@ -2525,12 +2533,20 @@ mod tests {

        let index = index_scheduler.index("doggos").unwrap();
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
        let field_ids = field_ids_map.ids().collect::<Vec<_>>();
        let documents = index
-            .all_documents(&rtxn)
+            .all_compressed_documents(&rtxn)
            .unwrap()
-            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .map(|ret| {
+                let (_id, compressed_doc) = ret.unwrap();
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+                obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+            })
            .collect::<Vec<_>>();
        snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
    }
@@ -2904,12 +2920,20 @@ mod tests {
        // has everything being pushed successfully in milli?
        let index = index_scheduler.index("doggos").unwrap();
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
        let field_ids = field_ids_map.ids().collect::<Vec<_>>();
        let documents = index
-            .all_documents(&rtxn)
+            .all_compressed_documents(&rtxn)
            .unwrap()
-            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .map(|ret| {
+                let (_id, compressed_doc) = ret.unwrap();
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+                obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+            })
            .collect::<Vec<_>>();
        snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
    }
@@ -2955,12 +2979,20 @@ mod tests {
        // has everything being pushed successfully in milli?
        let index = index_scheduler.index("doggos").unwrap();
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
        let field_ids = field_ids_map.ids().collect::<Vec<_>>();
        let documents = index
-            .all_documents(&rtxn)
+            .all_compressed_documents(&rtxn)
            .unwrap()
-            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .map(|ret| {
+                let (_id, compressed_doc) = ret.unwrap();
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+                obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+            })
            .collect::<Vec<_>>();
        snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
    }
@@ -3011,12 +3043,20 @@ mod tests {
        // has everything being pushed successfully in milli?
        let index = index_scheduler.index("doggos").unwrap();
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
        let field_ids = field_ids_map.ids().collect::<Vec<_>>();
        let documents = index
-            .all_documents(&rtxn)
+            .all_compressed_documents(&rtxn)
            .unwrap()
-            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .map(|ret| {
+                let (_id, compressed_doc) = ret.unwrap();
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+                obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+            })
            .collect::<Vec<_>>();
        snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
    }
@@ -3129,12 +3169,20 @@ mod tests {
        // has everything being pushed successfully in milli?
        let index = index_scheduler.index("doggos").unwrap();
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
        let field_ids = field_ids_map.ids().collect::<Vec<_>>();
        let documents = index
-            .all_documents(&rtxn)
+            .all_compressed_documents(&rtxn)
            .unwrap()
-            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .map(|ret| {
+                let (_id, compressed_doc) = ret.unwrap();
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+                obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+            })
            .collect::<Vec<_>>();
        snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
    }
@@ -3184,12 +3232,20 @@ mod tests {
        // has everything being pushed successfully in milli?
        let index = index_scheduler.index("doggos").unwrap();
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
        let field_ids = field_ids_map.ids().collect::<Vec<_>>();
        let documents = index
-            .all_documents(&rtxn)
+            .all_compressed_documents(&rtxn)
            .unwrap()
-            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .map(|ret| {
+                let (_id, compressed_doc) = ret.unwrap();
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+                obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+            })
            .collect::<Vec<_>>();
        snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
    }
@@ -3898,12 +3954,20 @@ mod tests {
        // Has everything being pushed successfully in milli?
        let index = index_scheduler.index("doggos").unwrap();
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
        let field_ids = field_ids_map.ids().collect::<Vec<_>>();
        let documents = index
-            .all_documents(&rtxn)
+            .all_compressed_documents(&rtxn)
            .unwrap()
-            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .map(|ret| {
+                let (_id, compressed_doc) = ret.unwrap();
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+                obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+            })
            .collect::<Vec<_>>();
        snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
    }
@@ -3969,12 +4033,20 @@ mod tests {
        // Has everything being pushed successfully in milli?
        let index = index_scheduler.index("doggos").unwrap();
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
        let field_ids = field_ids_map.ids().collect::<Vec<_>>();
        let documents = index
-            .all_documents(&rtxn)
+            .all_compressed_documents(&rtxn)
            .unwrap()
-            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .map(|ret| {
+                let (_id, compressed_doc) = ret.unwrap();
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+                obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+            })
            .collect::<Vec<_>>();
        snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
    }
@@ -4037,12 +4109,20 @@ mod tests {
        // Has everything being pushed successfully in milli?
        let index = index_scheduler.index("doggos").unwrap();
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
        let field_ids = field_ids_map.ids().collect::<Vec<_>>();
        let documents = index
-            .all_documents(&rtxn)
+            .all_compressed_documents(&rtxn)
            .unwrap()
-            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .map(|ret| {
+                let (_id, compressed_doc) = ret.unwrap();
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+                obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+            })
            .collect::<Vec<_>>();
        snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
    }
@@ -4098,12 +4178,20 @@ mod tests {
        // Has everything being pushed successfully in milli?
        let index = index_scheduler.index("doggos").unwrap();
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
        let field_ids = field_ids_map.ids().collect::<Vec<_>>();
        let documents = index
-            .all_documents(&rtxn)
+            .all_compressed_documents(&rtxn)
            .unwrap()
-            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .map(|ret| {
+                let (_id, compressed_doc) = ret.unwrap();
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+                obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+            })
            .collect::<Vec<_>>();
        snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
    }
@@ -4159,6 +4247,8 @@ mod tests {
        // Is the primary key still what we expect?
        let index = index_scheduler.index("doggos").unwrap();
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
        snapshot!(primary_key, @"id");

@@ -4166,9 +4256,15 @@ mod tests {
        let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
        let field_ids = field_ids_map.ids().collect::<Vec<_>>();
        let documents = index
-            .all_documents(&rtxn)
+            .all_compressed_documents(&rtxn)
            .unwrap()
-            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .map(|ret| {
+                let (_id, compressed_doc) = ret.unwrap();
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+                obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+            })
            .collect::<Vec<_>>();
        snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
    }
@@ -4220,6 +4316,8 @@ mod tests {
        // Is the primary key still what we expect?
        let index = index_scheduler.index("doggos").unwrap();
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
        snapshot!(primary_key, @"id");

@@ -4227,9 +4325,15 @@ mod tests {
        let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
        let field_ids = field_ids_map.ids().collect::<Vec<_>>();
        let documents = index
-            .all_documents(&rtxn)
+            .all_compressed_documents(&rtxn)
            .unwrap()
-            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .map(|ret| {
+                let (_id, compressed_doc) = ret.unwrap();
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+                obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+            })
            .collect::<Vec<_>>();
        snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
    }
@@ -4303,6 +4407,8 @@ mod tests {
        // Is the primary key still what we expect?
        let index = index_scheduler.index("doggos").unwrap();
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
        snapshot!(primary_key, @"id");

@@ -4310,9 +4416,15 @@ mod tests {
        let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
        let field_ids = field_ids_map.ids().collect::<Vec<_>>();
        let documents = index
-            .all_documents(&rtxn)
+            .all_compressed_documents(&rtxn)
            .unwrap()
-            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .map(|ret| {
+                let (_id, compressed_doc) = ret.unwrap();
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+                obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+            })
            .collect::<Vec<_>>();
        snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
    }
@@ -4389,6 +4501,8 @@ mod tests {
        // Is the primary key still what we expect?
        let index = index_scheduler.index("doggos").unwrap();
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
        snapshot!(primary_key, @"paw");

@@ -4396,9 +4510,15 @@ mod tests {
        let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
        let field_ids = field_ids_map.ids().collect::<Vec<_>>();
        let documents = index
-            .all_documents(&rtxn)
+            .all_compressed_documents(&rtxn)
            .unwrap()
-            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .map(|ret| {
+                let (_id, compressed_doc) = ret.unwrap();
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+                obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+            })
            .collect::<Vec<_>>();
        snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
    }
@@ -4468,6 +4588,8 @@ mod tests {
        // Is the primary key still what we expect?
        let index = index_scheduler.index("doggos").unwrap();
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let primary_key = index.primary_key(&rtxn).unwrap().unwrap();
        snapshot!(primary_key, @"doggoid");

@@ -4475,9 +4597,15 @@ mod tests {
        let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
        let field_ids = field_ids_map.ids().collect::<Vec<_>>();
        let documents = index
-            .all_documents(&rtxn)
+            .all_compressed_documents(&rtxn)
            .unwrap()
-            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .map(|ret| {
+                let (_id, compressed_doc) = ret.unwrap();
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+                obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+            })
            .collect::<Vec<_>>();
        snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
    }
@@ -5120,6 +5248,8 @@ mod tests {
        {
            let index = index_scheduler.index("doggos").unwrap();
            let rtxn = index.read_txn().unwrap();
+            let mut buffer = Vec::new();
+            let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();

            // Ensure the document have been inserted into the relevant bitamp
            let configs = index.embedding_configs(&rtxn).unwrap();
@@ -5139,8 +5269,12 @@ mod tests {
            assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true");
            assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");

-            let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
+            let (_id, compressed_doc) =
+                index.compressed_documents(&rtxn, std::iter::once(0)).unwrap().remove(0);
            let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
+            let doc = compressed_doc
+                .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                .unwrap();
            let doc = obkv_to_json(
                &[
                    fields_ids_map.id("doggo").unwrap(),
@@ -5194,6 +5328,8 @@ mod tests {
            {
                let index = index_scheduler.index("doggos").unwrap();
                let rtxn = index.read_txn().unwrap();
+                let mut buffer = Vec::new();
+                let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();

                // Ensure the document have been inserted into the relevant bitamp
                let configs = index.embedding_configs(&rtxn).unwrap();
@@ -5216,8 +5352,12 @@ mod tests {
                // remained beagle
                assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");

-                let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
+                let (_id, compressed_doc) =
+                    index.compressed_documents(&rtxn, std::iter::once(0)).unwrap().remove(0);
                let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
                let doc = obkv_to_json(
                    &[
                        fields_ids_map.id("doggo").unwrap(),
@@ -5309,12 +5449,20 @@ mod tests {

        let index = index_scheduler.index("doggos").unwrap();
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
        let field_ids = field_ids_map.ids().collect::<Vec<_>>();
        let documents = index
-            .all_documents(&rtxn)
+            .all_compressed_documents(&rtxn)
            .unwrap()
-            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .map(|ret| {
+                let (_id, compressed_doc) = ret.unwrap();
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+                obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+            })
            .collect::<Vec<_>>();
        snapshot!(serde_json::to_string(&documents).unwrap(), name: "documents after initial push");

@@ -5348,12 +5496,20 @@ mod tests {

        let index = index_scheduler.index("doggos").unwrap();
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
        let field_ids = field_ids_map.ids().collect::<Vec<_>>();
        let documents = index
-            .all_documents(&rtxn)
+            .all_compressed_documents(&rtxn)
            .unwrap()
-            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .map(|ret| {
+                let (_id, compressed_doc) = ret.unwrap();
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+                obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+            })
            .collect::<Vec<_>>();
        // the all the vectors linked to the new specified embedder have been removed
        // Only the unknown embedders stays in the document DB
@@ -5456,9 +5612,15 @@ mod tests {

        // the document with the id 3 should have its original embedding updated
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
-        let doc = index.documents(&rtxn, Some(docid)).unwrap()[0];
-        let doc = obkv_to_json(&field_ids, &field_ids_map, doc.1).unwrap();
+        let (_id, compressed_doc) =
+            index.compressed_documents(&rtxn, Some(docid)).unwrap().remove(0);
+        let doc = compressed_doc
+            .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+            .unwrap();
+        let doc = obkv_to_json(&field_ids, &field_ids_map, doc).unwrap();
        snapshot!(json_string!(doc), @r###"
        {
          "id": 3,
@@ -5570,12 +5732,20 @@ mod tests {

        let index = index_scheduler.index("doggos").unwrap();
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
        let field_ids = field_ids_map.ids().collect::<Vec<_>>();
        let documents = index
-            .all_documents(&rtxn)
+            .all_compressed_documents(&rtxn)
            .unwrap()
-            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .map(|ret| {
+                let (_id, compressed_doc) = ret.unwrap();
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+                obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+            })
            .collect::<Vec<_>>();
        snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###);
        let conf = index.embedding_configs(&rtxn).unwrap();
@@ -5610,12 +5780,20 @@ mod tests {

        let index = index_scheduler.index("doggos").unwrap();
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
        let field_ids = field_ids_map.ids().collect::<Vec<_>>();
        let documents = index
-            .all_documents(&rtxn)
+            .all_compressed_documents(&rtxn)
            .unwrap()
-            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .map(|ret| {
+                let (_id, compressed_doc) = ret.unwrap();
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+                obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+            })
            .collect::<Vec<_>>();
        snapshot!(serde_json::to_string(&documents).unwrap(), @"[]");
        let conf = index.embedding_configs(&rtxn).unwrap();
@@ -5726,12 +5904,20 @@ mod tests {
        {
            let index = index_scheduler.index("doggos").unwrap();
            let rtxn = index.read_txn().unwrap();
+            let mut buffer = Vec::new();
+            let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
            let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
            let field_ids = field_ids_map.ids().collect::<Vec<_>>();
            let documents = index
-                .all_documents(&rtxn)
+                .all_compressed_documents(&rtxn)
                .unwrap()
-                .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+                .map(|ret| {
+                    let (_id, compressed_doc) = ret.unwrap();
+                    let doc = compressed_doc
+                        .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                        .unwrap();
+                    obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+                })
                .collect::<Vec<_>>();
            snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel"}]"###);
        }
@@ -5761,12 +5947,20 @@ mod tests {
        {
            let index = index_scheduler.index("doggos").unwrap();
            let rtxn = index.read_txn().unwrap();
+            let mut buffer = Vec::new();
+            let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
            let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
            let field_ids = field_ids_map.ids().collect::<Vec<_>>();
            let documents = index
-                .all_documents(&rtxn)
+                .all_compressed_documents(&rtxn)
                .unwrap()
-                .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+                .map(|ret| {
+                    let (_id, compressed_doc) = ret.unwrap();
+                    let doc = compressed_doc
+                        .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                        .unwrap();
+                    obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+                })
                .collect::<Vec<_>>();
            snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"embeddings":[[0.0,0.0,0.0]],"regenerate":false}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"embeddings":[[1.0,1.0,1.0]],"regenerate":false}}}]"###);
        }
@@ -5794,12 +5988,20 @@ mod tests {
        {
            let index = index_scheduler.index("doggos").unwrap();
            let rtxn = index.read_txn().unwrap();
+            let mut buffer = Vec::new();
+            let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
            let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
            let field_ids = field_ids_map.ids().collect::<Vec<_>>();
            let documents = index
-                .all_documents(&rtxn)
+                .all_compressed_documents(&rtxn)
                .unwrap()
-                .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+                .map(|ret| {
+                    let (_id, compressed_doc) = ret.unwrap();
+                    let doc = compressed_doc
+                        .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                        .unwrap();
+                    obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+                })
                .collect::<Vec<_>>();

            // FIXME: redaction
--- a/meili-snap/Cargo.toml
+++ b/meili-snap/Cargo.toml
@@ -11,6 +11,6 @@ edition.workspace = true
 license.workspace = true

 [dependencies]
-insta = { version = "^1.34.0", features = ["json", "redactions"] }
+insta = { version = "^1.39.0", features = ["json", "redactions"] }
 md5 = "0.7.0"
 once_cell = "1.19"
--- a/meilisearch-auth/Cargo.toml
+++ b/meilisearch-auth/Cargo.toml
@@ -11,16 +11,16 @@ edition.workspace = true
 license.workspace = true

 [dependencies]
-base64 = "0.21.7"
-enum-iterator = "1.5.0"
+base64 = "0.22.1"
+enum-iterator = "2.1.0"
 hmac = "0.12.1"
 maplit = "1.0.2"
 meilisearch-types = { path = "../meilisearch-types" }
 rand = "0.8.5"
-roaring = { version = "0.10.2", features = ["serde"] }
-serde = { version = "1.0.195", features = ["derive"] }
-serde_json = { version = "1.0.111", features = ["preserve_order"] }
+roaring = { version = "0.10.6", features = ["serde"] }
+serde = { version = "1.0.204", features = ["derive"] }
+serde_json = { version = "1.0.120", features = ["preserve_order"] }
 sha2 = "0.10.8"
-thiserror = "1.0.56"
-time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
-uuid = { version = "1.6.1", features = ["serde", "v4"] }
+thiserror = "1.0.61"
+time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] }
+uuid = { version = "1.10.0", features = ["serde", "v4"] }
--- a/meilisearch-types/Cargo.toml
+++ b/meilisearch-types/Cargo.toml
@@ -11,36 +11,36 @@ edition.workspace = true
 license.workspace = true

 [dependencies]
-actix-web = { version = "4.6.0", default-features = false }
-anyhow = "1.0.79"
+actix-web = { version = "4.8.0", default-features = false }
+anyhow = "1.0.86"
 convert_case = "0.6.0"
 csv = "1.3.0"
-deserr = { version = "0.6.1", features = ["actix-web"] }
-either = { version = "1.9.0", features = ["serde"] }
-enum-iterator = "1.5.0"
+deserr = { version = "0.6.2", features = ["actix-web"] }
+either = { version = "1.13.0", features = ["serde"] }
+enum-iterator = "2.1.0"
 file-store = { path = "../file-store" }
-flate2 = "1.0.28"
+flate2 = "1.0.30"
 fst = "0.4.7"
-memmap2 = "0.7.1"
+memmap2 = "0.9.4"
 milli = { path = "../milli" }
-roaring = { version = "0.10.2", features = ["serde"] }
-serde = { version = "1.0.195", features = ["derive"] }
+roaring = { version = "0.10.6", features = ["serde"] }
+serde = { version = "1.0.204", features = ["derive"] }
 serde-cs = "0.2.4"
-serde_json = "1.0.111"
-tar = "0.4.40"
-tempfile = "3.9.0"
-thiserror = "1.0.56"
-time = { version = "0.3.31", features = [
+serde_json = "1.0.120"
+tar = "0.4.41"
+tempfile = "3.10.1"
+thiserror = "1.0.61"
+time = { version = "0.3.36", features = [
    "serde-well-known",
    "formatting",
    "parsing",
    "macros",
 ] }
-tokio = "1.35"
-uuid = { version = "1.6.1", features = ["serde", "v4"] }
+tokio = "1.38"
+uuid = { version = "1.10.0", features = ["serde", "v4"] }

 [dev-dependencies]
-insta = "1.34.0"
+insta = "1.39.0"
 meili-snap = { path = "../meili-snap" }

 [features]
--- a/meilisearch-types/src/lib.rs
+++ b/meilisearch-types/src/lib.rs
@@ -12,7 +12,7 @@ pub mod star_or;
 pub mod task_view;
 pub mod tasks;
 pub mod versioning;
-pub use milli::{heed, Index};
+pub use milli::{heed, zstd, Index};
 use uuid::Uuid;
 pub use versioning::VERSION_FILE_NAME;
 pub use {milli, serde_cs};
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -14,104 +14,99 @@ default-run = "meilisearch"

 [dependencies]
 actix-cors = "0.7.0"
-actix-http = { version = "3.7.0", default-features = false, features = [
+actix-http = { version = "3.8.0", default-features = false, features = [
    "compress-brotli",
    "compress-gzip",
    "rustls-0_21",
 ] }
 actix-utils = "3.0.1"
-actix-web = { version = "4.6.0", default-features = false, features = [
+actix-web = { version = "4.8.0", default-features = false, features = [
    "macros",
    "compress-brotli",
    "compress-gzip",
    "cookies",
    "rustls-0_21",
 ] }
-actix-web-static-files = { version = "4.0.1", optional = true }
-anyhow = { version = "1.0.79", features = ["backtrace"] }
-async-stream = "0.3.5"
-async-trait = "0.1.77"
-bstr = "1.9.0"
-byte-unit = { version = "4.0.19", default-features = false, features = [
+anyhow = { version = "1.0.86", features = ["backtrace"] }
+async-trait = "0.1.81"
+bstr = "1.9.1"
+byte-unit = { version = "5.1.4", default-features = false, features = [
    "std",
+    "byte",
    "serde",
 ] }
-bytes = "1.5.0"
-clap = { version = "4.4.17", features = ["derive", "env"] }
-crossbeam-channel = "0.5.11"
-deserr = { version = "0.6.1", features = ["actix-web"] }
+bytes = "1.6.0"
+clap = { version = "4.5.9", features = ["derive", "env"] }
+crossbeam-channel = "0.5.13"
+deserr = { version = "0.6.2", features = ["actix-web"] }
 dump = { path = "../dump" }
-either = "1.9.0"
+either = "1.13.0"
 file-store = { path = "../file-store" }
-flate2 = "1.0.28"
+flate2 = "1.0.30"
 fst = "0.4.7"
 futures = "0.3.30"
 futures-util = "0.3.30"
-http = "0.2.11"
 index-scheduler = { path = "../index-scheduler" }
-indexmap = { version = "2.1.0", features = ["serde"] }
-is-terminal = "0.4.10"
-itertools = "0.11.0"
-jsonwebtoken = "9.2.0"
-lazy_static = "1.4.0"
+indexmap = { version = "2.2.6", features = ["serde"] }
+is-terminal = "0.4.12"
+itertools = "0.13.0"
+jsonwebtoken = "9.3.0"
+lazy_static = "1.5.0"
 meilisearch-auth = { path = "../meilisearch-auth" }
 meilisearch-types = { path = "../meilisearch-types" }
-mimalloc = { version = "0.1.39", default-features = false }
+mimalloc = { version = "0.1.43", default-features = false }
 mime = "0.3.17"
 num_cpus = "1.16.0"
-obkv = "0.2.1"
+obkv = "0.2.2"
 once_cell = "1.19.0"
-ordered-float = "4.2.0"
-parking_lot = "0.12.1"
+ordered-float = "4.2.1"
+parking_lot = "0.12.3"
 permissive-json-pointer = { path = "../permissive-json-pointer" }
-pin-project-lite = "0.2.13"
+pin-project-lite = "0.2.14"
 platform-dirs = "0.3.0"
-prometheus = { version = "0.13.3", features = ["process"] }
+prometheus = { version = "0.13.4", features = ["process"] }
 rand = "0.8.5"
-rayon = "1.8.0"
-regex = "1.10.2"
-reqwest = { version = "0.11.23", features = [
+rayon = "1.10.0"
+regex = "1.10.5"
+reqwest = { version = "0.12.5", features = [
    "rustls-tls",
    "json",
 ], default-features = false }
 rustls = "0.21.12"
-rustls-pemfile = "1.0.2"
-segment = { version = "0.2.3", optional = true }
-serde = { version = "1.0.195", features = ["derive"] }
-serde_json = { version = "1.0.111", features = ["preserve_order"] }
+rustls-pemfile = "1.0.4"
+segment = { version = "0.2.4", optional = true }
+serde = { version = "1.0.204", features = ["derive"] }
+serde_json = { version = "1.0.120", features = ["preserve_order"] }
 sha2 = "0.10.8"
-siphasher = "1.0.0"
+siphasher = "1.0.1"
 slice-group-by = "0.3.1"
-static-files = { version = "0.2.3", optional = true }
-sysinfo = "0.30.5"
-tar = "0.4.40"
-tempfile = "3.9.0"
-thiserror = "1.0.56"
-time = { version = "0.3.31", features = [
+static-files = { version = "0.2.4", optional = true }
+sysinfo = "0.30.13"
+tar = "0.4.41"
+tempfile = "3.10.1"
+thiserror = "1.0.61"
+time = { version = "0.3.36", features = [
    "serde-well-known",
    "formatting",
    "parsing",
    "macros",
 ] }
-tokio = { version = "1.35.1", features = ["full"] }
-tokio-stream = "0.1.14"
-toml = "0.8.8"
-uuid = { version = "1.6.1", features = ["serde", "v4"] }
-walkdir = "2.4.0"
+tokio = { version = "1.38.0", features = ["full"] }
+toml = "0.8.14"
+uuid = { version = "1.10.0", features = ["serde", "v4"] }
 serde_urlencoded = "0.7.1"
 termcolor = "1.4.1"
-url = { version = "2.5.0", features = ["serde"] }
+url = { version = "2.5.2", features = ["serde"] }
 tracing = "0.1.40"
 tracing-subscriber = { version = "0.3.18", features = ["json"] }
 tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
-tracing-actix-web = "0.7.10"
+tracing-actix-web = "0.7.11"
 build-info = { version = "1.7.0", path = "../build-info" }

 [dev-dependencies]
-actix-rt = "2.9.0"
-assert-json-diff = "2.0.2"
+actix-rt = "2.10.0"
 brotli = "6.0.0"
-insta = "1.34.0"
+insta = "1.39.0"
 manifest-dir-macros = "0.1.18"
 maplit = "1.0.2"
 meili-snap = { path = "../meili-snap" }
@@ -120,23 +115,22 @@ urlencoding = "2.1.3"
 yaup = "0.3.1"

 [build-dependencies]
-anyhow = { version = "1.0.79", optional = true }
-cargo_toml = { version = "0.18.0", optional = true }
+anyhow = { version = "1.0.86", optional = true }
+cargo_toml = { version = "0.20.3", optional = true }
 hex = { version = "0.4.3", optional = true }
-reqwest = { version = "0.11.23", features = [
+reqwest = { version = "0.12.5", features = [
    "blocking",
    "rustls-tls",
 ], default-features = false, optional = true }
 sha-1 = { version = "0.10.1", optional = true }
-static-files = { version = "0.2.3", optional = true }
-tempfile = { version = "3.9.0", optional = true }
-zip = { version = "0.6.6", optional = true }
+static-files = { version = "0.2.4", optional = true }
+tempfile = { version = "3.10.1", optional = true }
+zip = { version = "2.1.3", default-features = false, features = ["deflate"], optional = true }

 [features]
 default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
 analytics = ["segment"]
 mini-dashboard = [
-    "actix-web-static-files",
    "static-files",
    "anyhow",
    "cargo_toml",
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -5,10 +5,9 @@ use std::path::{Path, PathBuf};
 use std::sync::Arc;
 use std::time::{Duration, Instant};

-use actix_web::http::header::USER_AGENT;
+use actix_web::http::header::{CONTENT_TYPE, USER_AGENT};
 use actix_web::HttpRequest;
 use byte_unit::Byte;
-use http::header::CONTENT_TYPE;
 use index_scheduler::IndexScheduler;
 use meilisearch_auth::{AuthController, AuthFilter};
 use meilisearch_types::InstanceUid;
--- a/meilisearch/src/error.rs
+++ b/meilisearch/src/error.rs
@@ -1,6 +1,6 @@
 use actix_web as aweb;
 use aweb::error::{JsonPayloadError, QueryPayloadError};
-use byte_unit::Byte;
+use byte_unit::{Byte, UnitType};
 use meilisearch_types::document_formats::{DocumentFormatError, PayloadType};
 use meilisearch_types::error::{Code, ErrorCode, ResponseError};
 use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
@@ -33,7 +33,7 @@ pub enum MeilisearchHttpError {
    TooManySearchRequests(usize),
    #[error("Internal error: Search limiter is down.")]
    SearchLimiterIsDown,
-    #[error("The provided payload reached the size limit. The maximum accepted payload size is {}.",  Byte::from_bytes(*.0 as u64).get_appropriate_unit(true))]
+    #[error("The provided payload reached the size limit. The maximum accepted payload size is {}.",  Byte::from_u64(*.0 as u64).get_appropriate_unit(UnitType::Binary))]
    PayloadTooLarge(usize),
    #[error("Two indexes must be given for each swap. The list `[{}]` contains {} indexes.",
        .0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@@ -15,6 +15,7 @@ use std::fs::File;
 use std::io::{BufReader, BufWriter};
 use std::num::NonZeroUsize;
 use std::path::Path;
+use std::str::FromStr;
 use std::sync::Arc;
 use std::thread::{self, available_parallelism};
 use std::time::Duration;
@@ -23,13 +24,13 @@ use actix_cors::Cors;
 use actix_http::body::MessageBody;
 use actix_web::dev::{ServiceFactory, ServiceResponse};
 use actix_web::error::JsonPayloadError;
+use actix_web::http::header::{CONTENT_TYPE, USER_AGENT};
 use actix_web::web::Data;
 use actix_web::{web, HttpRequest};
 use analytics::Analytics;
 use anyhow::bail;
 use error::PayloadError;
 use extractors::payload::PayloadConfig;
-use http::header::CONTENT_TYPE;
 use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
 use meilisearch_auth::AuthController;
 use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
@@ -167,7 +168,7 @@ impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger {
        let conn_info = request.connection_info();
        let headers = request.headers();
        let user_agent = headers
-            .get(http::header::USER_AGENT)
+            .get(USER_AGENT)
            .map(|value| String::from_utf8_lossy(value.as_bytes()).into_owned())
            .unwrap_or_default();
        info_span!("HTTP request", method = %request.method(), host = conn_info.host(), route = %request.path(), query_parameters = %request.query_string(), %user_agent, status_code = Empty, error = Empty)
@@ -300,15 +301,15 @@ fn open_or_create_database_unchecked(
            dumps_path: opt.dump_dir.clone(),
            webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
            webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
-            task_db_size: opt.max_task_db_size.get_bytes() as usize,
-            index_base_map_size: opt.max_index_size.get_bytes() as usize,
+            task_db_size: opt.max_task_db_size.as_u64() as usize,
+            index_base_map_size: opt.max_index_size.as_u64() as usize,
            enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
            indexer_config: (&opt.indexer_options).try_into()?,
            autobatching_enabled: true,
            cleanup_enabled: !opt.experimental_replication_parameters,
            max_number_of_tasks: 1_000_000,
            max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
-            index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
+            index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize,
            index_count: DEFAULT_INDEX_COUNT,
            instance_features,
        })?)
@@ -476,7 +477,7 @@ pub fn configure_data(
        opt.experimental_search_queue_size,
        available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()),
    );
-    let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
+    let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
    config
        .app_data(index_scheduler)
        .app_data(auth)
--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@@ -9,7 +9,7 @@ use std::str::FromStr;
 use std::sync::Arc;
 use std::{env, fmt, fs};

-use byte_unit::{Byte, ByteError};
+use byte_unit::{Byte, ParseError, UnitType};
 use clap::Parser;
 use meilisearch_types::features::InstanceTogglableFeatures;
 use meilisearch_types::milli::update::IndexerConfig;
@@ -674,7 +674,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {

        Ok(Self {
            log_every_n: Some(DEFAULT_LOG_EVERY_N),
-            max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize),
+            max_memory: other.max_indexing_memory.map(|b| b.as_u64() as usize),
            thread_pool: Some(thread_pool),
            max_positions_per_attributes: None,
            skip_index_budget: other.skip_index_budget,
@@ -688,23 +688,25 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
 pub struct MaxMemory(Option<Byte>);

 impl FromStr for MaxMemory {
-    type Err = ByteError;
+    type Err = ParseError;

-    fn from_str(s: &str) -> Result<MaxMemory, ByteError> {
+    fn from_str(s: &str) -> Result<MaxMemory, Self::Err> {
        Byte::from_str(s).map(Some).map(MaxMemory)
    }
 }

 impl Default for MaxMemory {
    fn default() -> MaxMemory {
-        MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_bytes))
+        MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_u64))
    }
 }

 impl fmt::Display for MaxMemory {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self.0 {
-            Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)),
+            Some(memory) => {
+                write!(f, "{}", memory.get_appropriate_unit(UnitType::Binary))
+            }
            None => f.write_str("unknown"),
        }
    }
@@ -844,11 +846,11 @@ fn default_env() -> String {
 }

 fn default_max_index_size() -> Byte {
-    Byte::from_bytes(INDEX_SIZE)
+    Byte::from_u64(INDEX_SIZE)
 }

 fn default_max_task_db_size() -> Byte {
-    Byte::from_bytes(TASK_DB_SIZE)
+    Byte::from_u64(TASK_DB_SIZE)
 }

 fn default_http_payload_size_limit() -> Byte {
--- a/meilisearch/src/routes/indexes/documents.rs
+++ b/meilisearch/src/routes/indexes/documents.rs
@@ -603,42 +603,51 @@ fn some_documents<'a, 't: 'a>(
    retrieve_vectors: RetrieveVectors,
 ) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
    let fields_ids_map = index.fields_ids_map(rtxn)?;
+    let dictionary = index.document_decompression_dictionary(rtxn)?;
    let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
    let embedding_configs = index.embedding_configs(rtxn)?;
+    let mut buffer = Vec::new();

-    Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
-        ret.map_err(ResponseError::from).and_then(|(key, document)| -> Result<_, ResponseError> {
-            let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
-            match retrieve_vectors {
-                RetrieveVectors::Ignore => {}
-                RetrieveVectors::Hide => {
-                    document.remove("_vectors");
-                }
-                RetrieveVectors::Retrieve => {
-                    let mut vectors = match document.remove("_vectors") {
-                        Some(Value::Object(map)) => map,
-                        _ => Default::default(),
-                    };
-                    for (name, vector) in index.embeddings(rtxn, key)? {
-                        let user_provided = embedding_configs
-                            .iter()
-                            .find(|conf| conf.name == name)
-                            .is_some_and(|conf| conf.user_provided.contains(key));
-                        let embeddings = ExplicitVectors {
-                            embeddings: Some(vector.into()),
-                            regenerate: !user_provided,
-                        };
-                        vectors.insert(
-                            name,
-                            serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,
-                        );
+    Ok(index.iter_compressed_documents(rtxn, doc_ids)?.map(move |ret| {
+        ret.map_err(ResponseError::from).and_then(
+            |(key, compressed_document)| -> Result<_, ResponseError> {
+                let document = compressed_document
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())?;
+                let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
+                match retrieve_vectors {
+                    RetrieveVectors::Ignore => {}
+                    RetrieveVectors::Hide => {
+                        document.remove("_vectors");
+                    }
+                    RetrieveVectors::Retrieve => {
+                        // Clippy is simply wrong
+                        #[allow(clippy::manual_unwrap_or_default)]
+                        let mut vectors = match document.remove("_vectors") {
+                            Some(Value::Object(map)) => map,
+                            _ => Default::default(),
+                        };
+                        for (name, vector) in index.embeddings(rtxn, key)? {
+                            let user_provided = embedding_configs
+                                .iter()
+                                .find(|conf| conf.name == name)
+                                .is_some_and(|conf| conf.user_provided.contains(key));
+                            let embeddings = ExplicitVectors {
+                                embeddings: Some(vector.into()),
+                                regenerate: !user_provided,
+                            };
+                            vectors.insert(
+                                name,
+                                serde_json::to_value(embeddings)
+                                    .map_err(MeilisearchHttpError::from)?,
+                            );
+                        }
+                        document.insert("_vectors".into(), vectors.into());
                    }
-                    document.insert("_vectors".into(), vectors.into());
                }
-            }

-            Ok(document)
-        })
+                Ok(document)
+            },
+        )
    }))
 }

--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@@ -1123,10 +1123,16 @@ fn make_hits(
    formatter_builder.crop_marker(format.crop_marker);
    formatter_builder.highlight_prefix(format.highlight_pre_tag);
    formatter_builder.highlight_suffix(format.highlight_post_tag);
+    let decompression_dictionary = index.document_decompression_dictionary(rtxn)?;
+    let mut buffer = Vec::new();
    let mut documents = Vec::new();
    let embedding_configs = index.embedding_configs(rtxn)?;
-    let documents_iter = index.documents(rtxn, documents_ids)?;
-    for ((id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
+    let documents_iter = index.compressed_documents(rtxn, documents_ids)?;
+    for ((id, compressed), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
+        let obkv = compressed
+            .decompress_with_optional_dictionary(&mut buffer, decompression_dictionary.as_ref())
+            // TODO use a better error?
+            .map_err(|e| MeilisearchHttpError::HeedError(e.into()))?;
        // First generate a document with all the displayed fields
        let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;

@@ -1150,6 +1156,8 @@ fn make_hits(
            permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);

        if retrieve_vectors == RetrieveVectors::Retrieve {
+            // Clippy is wrong
+            #[allow(clippy::manual_unwrap_or_default)]
            let mut vectors = match document.remove("_vectors") {
                Some(Value::Object(map)) => map,
                _ => Default::default(),
--- a/meilisearch/tests/auth/errors.rs
+++ b/meilisearch/tests/auth/errors.rs
@@ -1,5 +1,5 @@
+use actix_web::http::StatusCode;
 use actix_web::test;
-use http::StatusCode;
 use jsonwebtoken::{EncodingKey, Header};
 use meili_snap::*;
 use uuid::Uuid;
--- a/meilisearch/tests/common/server.rs
+++ b/meilisearch/tests/common/server.rs
@@ -6,7 +6,7 @@ use std::time::Duration;
 use actix_http::body::MessageBody;
 use actix_web::dev::ServiceResponse;
 use actix_web::http::StatusCode;
-use byte_unit::{Byte, ByteUnit};
+use byte_unit::{Byte, Unit};
 use clap::Parser;
 use meilisearch::option::{IndexerOpts, MaxMemory, Opt};
 use meilisearch::{analytics, create_app, setup_meilisearch, SubscriberForSecondLayer};
@@ -231,9 +231,9 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
        env: "development".to_owned(),
        #[cfg(feature = "analytics")]
        no_analytics: true,
-        max_index_size: Byte::from_unit(100.0, ByteUnit::MiB).unwrap(),
-        max_task_db_size: Byte::from_unit(1.0, ByteUnit::GiB).unwrap(),
-        http_payload_size_limit: Byte::from_unit(10.0, ByteUnit::MiB).unwrap(),
+        max_index_size: Byte::from_u64_with_unit(100, Unit::MiB).unwrap(),
+        max_task_db_size: Byte::from_u64_with_unit(1, Unit::GiB).unwrap(),
+        http_payload_size_limit: Byte::from_u64_with_unit(10, Unit::MiB).unwrap(),
        snapshot_dir: ".".into(),
        indexer_options: IndexerOpts {
            // memory has to be unlimited because several meilisearch are running in test context.
--- a/meilisearch/tests/documents/add_documents.rs
+++ b/meilisearch/tests/documents/add_documents.rs
@@ -2274,7 +2274,7 @@ async fn error_add_documents_payload_size() {
    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
        @r###"
    {
-      "message": "The provided payload reached the size limit. The maximum accepted payload size is 10.00 MiB.",
+      "message": "The provided payload reached the size limit. The maximum accepted payload size is 10 MiB.",
      "code": "payload_too_large",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#payload_too_large"
--- a/meilisearch/tests/documents/get_documents.rs
+++ b/meilisearch/tests/documents/get_documents.rs
@@ -1,5 +1,5 @@
+use actix_web::http::header::ACCEPT_ENCODING;
 use actix_web::test;
-use http::header::ACCEPT_ENCODING;
 use meili_snap::*;
 use urlencoding::encode as urlencode;

--- a/meilisearch/tests/index/create_index.rs
+++ b/meilisearch/tests/index/create_index.rs
@@ -1,6 +1,5 @@
-use actix_web::http::header::ContentType;
+use actix_web::http::header::{ContentType, ACCEPT_ENCODING};
 use actix_web::test;
-use http::header::ACCEPT_ENCODING;
 use meili_snap::{json_string, snapshot};
 use meilisearch::Opt;

--- a/meilitool/Cargo.toml
+++ b/meilitool/Cargo.toml
@@ -9,11 +9,11 @@ edition.workspace = true
 license.workspace = true

 [dependencies]
-anyhow = "1.0.79"
-clap = { version = "4.4.17", features = ["derive"] }
+anyhow = "1.0.86"
+clap = { version = "4.5.9", features = ["derive"] }
 dump = { path = "../dump" }
 file-store = { path = "../file-store" }
 meilisearch-auth = { path = "../meilisearch-auth" }
 meilisearch-types = { path = "../meilisearch-types" }
-time = { version = "0.3.31", features = ["formatting"] }
-uuid = { version = "1.6.1", features = ["v4"], default-features = false }
+time = { version = "0.3.36", features = ["formatting"] }
+uuid = { version = "1.10.0", features = ["v4"], default-features = false }
--- a/meilitool/src/main.rs
+++ b/meilitool/src/main.rs
@@ -260,6 +260,7 @@ fn export_a_dump(

    // 4. Dump the indexes
    let mut count = 0;
+    let mut buffer = Vec::new();
    for result in index_mapping.iter(&rtxn)? {
        let (uid, uuid) = result?;
        let index_path = db_path.join("indexes").join(uuid.to_string());
@@ -268,6 +269,7 @@ fn export_a_dump(
        })?;

        let rtxn = index.read_txn()?;
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let metadata = IndexMetadata {
            uid: uid.to_owned(),
            primary_key: index.primary_key(&rtxn)?.map(String::from),
@@ -280,8 +282,11 @@ fn export_a_dump(
        let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();

        // 4.1. Dump the documents
-        for ret in index.all_documents(&rtxn)? {
-            let (_id, doc) = ret?;
+        for ret in index.all_compressed_documents(&rtxn)? {
+            let (_id, compressed_doc) = ret?;
+            let doc = compressed_doc
+                .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                .unwrap();
            let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?;
            index_dumper.push_document(&document)?;
        }
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -14,81 +14,81 @@ license.workspace = true
 [dependencies]
 bimap = { version = "0.6.3", features = ["serde"] }
 bincode = "1.3.3"
-bstr = "1.9.0"
-bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] }
+bstr = "1.9.1"
+bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] }
 byteorder = "1.5.0"
-charabia = { version = "0.8.11", default-features = false }
+charabia = { version = "0.8.12", default-features = false }
 concat-arrays = "0.1.2"
-crossbeam-channel = "0.5.11"
-deserr = "0.6.1"
-either = { version = "1.9.0", features = ["serde"] }
+crossbeam-channel = "0.5.13"
+deserr = "0.6.2"
+either = { version = "1.13.0", features = ["serde"] }
 flatten-serde-json = { path = "../flatten-serde-json" }
 fst = "0.4.7"
 fxhash = "0.2.1"
 geoutils = "0.5.1"
-grenad = { version = "0.4.6", default-features = false, features = [
+grenad = { version = "0.4.7", default-features = false, features = [
    "rayon",
    "tempfile",
 ] }
-heed = { version = "0.20.1", default-features = false, features = [
+heed = { version = "0.20.3", default-features = false, features = [
    "serde-json",
    "serde-bincode",
    "read-txn-no-tls",
 ] }
-indexmap = { version = "2.1.0", features = ["serde"] }
+indexmap = { version = "2.2.6", features = ["serde"] }
 json-depth-checker = { path = "../json-depth-checker" }
 levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
-memmap2 = "0.7.1"
-obkv = "0.2.1"
+zstd = { version = "0.13.1", features = ["zdict_builder", "experimental"] }
+memmap2 = "0.9.4"
+obkv = "0.2.2"
 once_cell = "1.19.0"
-ordered-float = "4.2.0"
-rand_pcg = { version = "0.3.1", features = ["serde1"] }
-rayon = "1.8.0"
-roaring = { version = "0.10.2", features = ["serde"] }
-rstar = { version = "0.11.0", features = ["serde"] }
-serde = { version = "1.0.195", features = ["derive"] }
-serde_json = { version = "1.0.111", features = ["preserve_order"] }
+ordered-float = "4.2.1"
+rayon = "1.10.0"
+roaring = { version = "0.10.6", features = ["serde"] }
+rstar = { version = "0.12.0", features = ["serde"] }
+serde = { version = "1.0.204", features = ["derive"] }
+serde_json = { version = "1.0.120", features = ["preserve_order"] }
 slice-group-by = "0.3.1"
 smallstr = { version = "0.3.0", features = ["serde"] }
-smallvec = "1.12.0"
+smallvec = "1.13.2"
 smartstring = "1.0.1"
-tempfile = "3.9.0"
-thiserror = "1.0.56"
-time = { version = "0.3.31", features = [
+tempfile = "3.10.1"
+thiserror = "1.0.61"
+time = { version = "0.3.36", features = [
    "serde-well-known",
    "formatting",
    "parsing",
    "macros",
 ] }
-uuid = { version = "1.6.1", features = ["v4"] }
+uuid = { version = "1.10.0", features = ["v4"] }

 filter-parser = { path = "../filter-parser" }

 # documents words self-join
-itertools = "0.11.0"
+itertools = "0.13.0"

 csv = "1.3.0"
-candle-core = { version = "0.4.1" }
-candle-transformers = { version = "0.4.1" }
-candle-nn = { version = "0.4.1" }
+candle-core = { version = "0.6.0" }
+candle-transformers = { version = "0.6.0" }
+candle-nn = { version = "0.6.0" }
 tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [
    "onig",
 ] }
 hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [
    "online",
 ] }
-tiktoken-rs = "0.5.8"
-liquid = "0.26.4"
+tiktoken-rs = "0.5.9"
+liquid = "0.26.6"
 arroy = "0.4.0"
 rand = "0.8.5"
 tracing = "0.1.40"
-ureq = { version = "2.9.7", features = ["json"] }
-url = "2.5.0"
+ureq = { version = "2.10.0", features = ["json"] }
+url = "2.5.2"

 [dev-dependencies]
-mimalloc = { version = "0.1.39", default-features = false }
+mimalloc = { version = "0.1.43", default-features = false }
 big_s = "1.0.2"
-insta = "1.34.0"
+insta = "1.39.0"
 maplit = "1.0.2"
 md5 = "0.7.0"
 meili-snap = { path = "../meili-snap" }
--- a/milli/examples/search.rs
+++ b/milli/examples/search.rs
@@ -30,6 +30,7 @@ fn main() -> Result<(), Box<dyn Error>> {

    let index = Index::new(options, dataset)?;
    let txn = index.read_txn()?;
+    let dictionary = index.document_decompression_dictionary(&txn).unwrap();
    let mut query = String::new();
    while stdin().read_line(&mut query)? > 0 {
        for _ in 0..2 {
@@ -49,6 +50,7 @@ fn main() -> Result<(), Box<dyn Error>> {
            let start = Instant::now();

            let mut ctx = SearchContext::new(&index, &txn)?;
+            let mut buffer = Vec::new();
            let universe = filtered_universe(ctx.index, ctx.txn, &None)?;

            let docs = execute_search(
@@ -75,11 +77,14 @@ fn main() -> Result<(), Box<dyn Error>> {
            let elapsed = start.elapsed();
            println!("new: {}us, docids: {:?}", elapsed.as_micros(), docs.documents_ids);
            if print_documents {
-                let documents = index
-                    .documents(&txn, docs.documents_ids.iter().copied())
+                let compressed_documents = index
+                    .compressed_documents(&txn, docs.documents_ids.iter().copied())
                    .unwrap()
                    .into_iter()
-                    .map(|(id, obkv)| {
+                    .map(|(id, compressed_obkv)| {
+                        let obkv = compressed_obkv
+                            .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                            .unwrap();
                        let mut object = serde_json::Map::default();
                        for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
                            let value = obkv.get(fid).unwrap();
@@ -90,17 +95,20 @@ fn main() -> Result<(), Box<dyn Error>> {
                    })
                    .collect::<Vec<_>>();

-                for (id, document) in documents {
+                for (id, document) in compressed_documents {
                    println!("{id}:");
                    println!("{document}");
                }

-                let documents = index
-                    .documents(&txn, docs.documents_ids.iter().copied())
+                let compressed_documents = index
+                    .compressed_documents(&txn, docs.documents_ids.iter().copied())
                    .unwrap()
                    .into_iter()
-                    .map(|(id, obkv)| {
+                    .map(|(id, compressed_obkv)| {
                        let mut object = serde_json::Map::default();
+                        let obkv = compressed_obkv
+                            .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                            .unwrap();
                        for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
                            let value = obkv.get(fid).unwrap();
                            let value: serde_json::Value = serde_json::from_slice(value).unwrap();
@@ -110,7 +118,7 @@ fn main() -> Result<(), Box<dyn Error>> {
                    })
                    .collect::<Vec<_>>();
                println!("{}us: {:?}", elapsed.as_micros(), docs.documents_ids);
-                for (id, document) in documents {
+                for (id, document) in compressed_documents {
                    println!("{id}:");
                    println!("{document}");
                }
--- a/milli/src/heed_codec/compressed_obkv_codec.rs
+++ b/milli/src/heed_codec/compressed_obkv_codec.rs
@@ -0,0 +1,89 @@
+use std::borrow::Cow;
+use std::io;
+use std::io::ErrorKind;
+
+use heed::BoxedError;
+use obkv::KvReaderU16;
+use zstd::bulk::{Compressor, Decompressor};
+use zstd::dict::{DecoderDictionary, EncoderDictionary};
+
+pub struct CompressedObkvCodec;
+
+impl<'a> heed::BytesDecode<'a> for CompressedObkvCodec {
+    type DItem = CompressedKvReaderU16<'a>;
+
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        Ok(CompressedKvReaderU16(bytes))
+    }
+}
+
+impl heed::BytesEncode<'_> for CompressedObkvCodec {
+    type EItem = CompressedKvWriterU16;
+
+    fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
+        Ok(Cow::Borrowed(&item.0))
+    }
+}
+
+pub struct CompressedKvReaderU16<'a>(&'a [u8]);
+
+impl<'a> CompressedKvReaderU16<'a> {
+    /// Decompresses the KvReader into the buffer using the provided dictionnary.
+    pub fn decompress_with<'b>(
+        &self,
+        buffer: &'b mut Vec<u8>,
+        dictionary: &DecoderDictionary,
+    ) -> io::Result<KvReaderU16<'b>> {
+        const TWO_GIGABYTES: usize = 2 * 1024 * 1024 * 1024;
+
+        let mut decompressor = Decompressor::with_prepared_dictionary(dictionary)?;
+        let mut max_size = self.0.len() * 4;
+        let size = loop {
+            buffer.resize(max_size, 0);
+            match decompressor.decompress_to_buffer(self.0, &mut buffer[..max_size]) {
+                Ok(size) => break size,
+                // TODO don't do that !!! But what should I do?
+                Err(e) if e.kind() == ErrorKind::Other && max_size <= TWO_GIGABYTES => {
+                    max_size *= 2
+                }
+                Err(e) => return Err(e),
+            }
+        };
+        Ok(KvReaderU16::new(&buffer[..size]))
+    }
+
+    /// Returns the KvReader like it is not compressed.
+    /// Happends when there is no dictionary yet.
+    pub fn as_non_compressed(&self) -> KvReaderU16<'a> {
+        KvReaderU16::new(self.0)
+    }
+
+    /// Decompresses this KvReader if necessary.
+    pub fn decompress_with_optional_dictionary<'b>(
+        &self,
+        buffer: &'b mut Vec<u8>,
+        dictionary: Option<&DecoderDictionary>,
+    ) -> io::Result<KvReaderU16<'b>>
+    where
+        'a: 'b,
+    {
+        match dictionary {
+            Some(dict) => self.decompress_with(buffer, dict),
+            None => Ok(self.as_non_compressed()),
+        }
+    }
+}
+
+pub struct CompressedKvWriterU16(Vec<u8>);
+
+impl CompressedKvWriterU16 {
+    // TODO ask for a KvReaderU16 here
+    pub fn new_with_dictionary(input: &[u8], dictionary: &EncoderDictionary) -> io::Result<Self> {
+        let mut compressor = Compressor::with_prepared_dictionary(dictionary)?;
+        compressor.compress(input).map(CompressedKvWriterU16)
+    }
+
+    pub fn as_bytes(&self) -> &[u8] {
+        &self.0
+    }
+}
--- a/milli/src/heed_codec/mod.rs
+++ b/milli/src/heed_codec/mod.rs
@@ -1,6 +1,7 @@
 mod beu16_str_codec;
 mod beu32_str_codec;
 mod byte_slice_ref;
+mod compressed_obkv_codec;
 pub mod facet;
 mod field_id_word_count_codec;
 mod fst_set_codec;
@@ -19,6 +20,9 @@ use thiserror::Error;

 pub use self::beu16_str_codec::BEU16StrCodec;
 pub use self::beu32_str_codec::BEU32StrCodec;
+pub use self::compressed_obkv_codec::{
+    CompressedKvReaderU16, CompressedKvWriterU16, CompressedObkvCodec,
+};
 pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
 pub use self::fst_set_codec::FstSetCodec;
 pub use self::obkv_codec::ObkvCodec;
--- a/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs
@@ -25,7 +25,7 @@ impl RoaringBitmapLenCodec {
            }
        };

-        if size > u16::max_value() as usize + 1 {
+        if size > u16::MAX as usize + 1 {
            return Err(io::Error::new(io::ErrorKind::Other, "size is greater than supported"));
        }

--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -11,6 +11,7 @@ use roaring::RoaringBitmap;
 use rstar::RTree;
 use serde::{Deserialize, Serialize};
 use time::OffsetDateTime;
+use zstd::dict::{DecoderDictionary, EncoderDictionary};

 use crate::documents::PrimaryKey;
 use crate::error::{InternalError, UserError};
@@ -20,7 +21,8 @@ use crate::heed_codec::facet::{
    FieldIdCodec, OrderedF64Codec,
 };
 use crate::heed_codec::{
-    BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec,
+    BEU16StrCodec, CompressedKvReaderU16, CompressedObkvCodec, FstSetCodec, ScriptLanguageCodec,
+    StrBEU16Codec, StrRefCodec,
 };
 use crate::order_by_map::OrderByMap;
 use crate::proximity::ProximityPrecision;
@@ -29,8 +31,8 @@ use crate::vector::{Embedding, EmbeddingConfig};
 use crate::{
    default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
    FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
-    FieldidsWeightsMap, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec,
-    Search, U8StrStrCodec, Weight, BEU16, BEU32, BEU64,
+    FieldidsWeightsMap, GeoPoint, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search,
+    U8StrStrCodec, Weight, BEU16, BEU32, BEU64,
 };

 pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
@@ -73,6 +75,7 @@ pub mod main_key {
    pub const PROXIMITY_PRECISION: &str = "proximity-precision";
    pub const EMBEDDING_CONFIGS: &str = "embedding_configs";
    pub const SEARCH_CUTOFF: &str = "search_cutoff";
+    pub const DOCUMENT_COMPRESSION_DICTIONARY: &str = "document-compression-dictionary";
 }

 pub mod db_name {
@@ -172,7 +175,7 @@ pub struct Index {
    pub vector_arroy: arroy::Database<arroy::distances::Angular>,

    /// Maps the document id to the document as an obkv store.
-    pub(crate) documents: Database<BEU32, ObkvCodec>,
+    pub(crate) documents: Database<BEU32, CompressedObkvCodec>,
 }

 impl Index {
@@ -339,6 +342,50 @@ impl Index {
        self.env.prepare_for_closing()
    }

+    /* document compression dictionary */
+
+    /// Writes the dictionnary that will further be used to compress the documents.
+    pub fn put_document_compression_dictionary(
+        &self,
+        wtxn: &mut RwTxn,
+        dictionary: &[u8],
+    ) -> heed::Result<()> {
+        self.main.remap_types::<Str, Bytes>().put(
+            wtxn,
+            main_key::DOCUMENT_COMPRESSION_DICTIONARY,
+            dictionary,
+        )
+    }
+
+    /// Deletes the document compression dictionary.
+    pub fn delete_document_compression_dictionary(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
+        self.main.remap_key_type::<Str>().delete(wtxn, main_key::DOCUMENT_COMPRESSION_DICTIONARY)
+    }
+
+    /// Returns the optional raw bytes dictionary to be used when reading or writing the OBKV documents.
+    pub fn document_compression_raw_dictionary<'t>(
+        &self,
+        rtxn: &'t RoTxn,
+    ) -> heed::Result<Option<&'t [u8]>> {
+        self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::DOCUMENT_COMPRESSION_DICTIONARY)
+    }
+
+    pub fn document_decompression_dictionary<'t>(
+        &self,
+        rtxn: &'t RoTxn,
+    ) -> heed::Result<Option<DecoderDictionary<'t>>> {
+        self.document_compression_raw_dictionary(rtxn).map(|opt| opt.map(DecoderDictionary::new))
+    }
+
+    pub fn document_compression_dictionary(
+        &self,
+        rtxn: &RoTxn,
+    ) -> heed::Result<Option<EncoderDictionary<'static>>> {
+        const COMPRESSION_LEVEL: i32 = 19;
+        self.document_compression_raw_dictionary(rtxn)
+            .map(|opt| opt.map(|bytes| EncoderDictionary::copy(bytes, COMPRESSION_LEVEL)))
+    }
+
    /* documents ids */

    /// Writes the documents ids that corresponds to the user-ids-documents-ids FST.
@@ -1261,36 +1308,36 @@ impl Index {

    /* documents */

-    /// Returns an iterator over the requested documents. The next item will be an error if a document is missing.
-    pub fn iter_documents<'a, 't: 'a>(
+    /// Returns an iterator over the requested compressed documents. The next item will be an error if a document is missing.
+    pub fn iter_compressed_documents<'a, 't: 'a>(
        &'a self,
        rtxn: &'t RoTxn<'t>,
        ids: impl IntoIterator<Item = DocumentId> + 'a,
-    ) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
+    ) -> Result<impl Iterator<Item = Result<(DocumentId, CompressedKvReaderU16<'t>)>> + 'a> {
        Ok(ids.into_iter().map(move |id| {
-            let kv = self
+            let compressed = self
                .documents
                .get(rtxn, &id)?
                .ok_or(UserError::UnknownInternalDocumentId { document_id: id })?;
-            Ok((id, kv))
+            Ok((id, compressed))
        }))
    }

    /// Returns a [`Vec`] of the requested documents. Returns an error if a document is missing.
-    pub fn documents<'t>(
+    pub fn compressed_documents<'t>(
        &self,
        rtxn: &'t RoTxn<'t>,
        ids: impl IntoIterator<Item = DocumentId>,
-    ) -> Result<Vec<(DocumentId, obkv::KvReaderU16<'t>)>> {
-        self.iter_documents(rtxn, ids)?.collect()
+    ) -> Result<Vec<(DocumentId, CompressedKvReaderU16<'t>)>> {
+        self.iter_compressed_documents(rtxn, ids)?.collect()
    }

    /// Returns an iterator over all the documents in the index.
-    pub fn all_documents<'a, 't: 'a>(
+    pub fn all_compressed_documents<'a, 't: 'a>(
        &'a self,
        rtxn: &'t RoTxn<'t>,
-    ) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
-        self.iter_documents(rtxn, self.documents_ids(rtxn)?)
+    ) -> Result<impl Iterator<Item = Result<(DocumentId, CompressedKvReaderU16<'t>)>> + 'a> {
+        self.iter_compressed_documents(rtxn, self.documents_ids(rtxn)?)
    }

    pub fn external_id_of<'a, 't: 'a>(
@@ -1311,8 +1358,13 @@ impl Index {
                process: "external_id_of",
            })
        })?;
-        Ok(self.iter_documents(rtxn, ids)?.map(move |entry| -> Result<_> {
-            let (_docid, obkv) = entry?;
+        let dictionary =
+            self.document_compression_raw_dictionary(rtxn)?.map(DecoderDictionary::copy);
+        let mut buffer = Vec::new();
+        Ok(self.iter_compressed_documents(rtxn, ids)?.map(move |entry| -> Result<_> {
+            let (_docid, compressed_obkv) = entry?;
+            let obkv = compressed_obkv
+                .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())?;
            match primary_key.document_id(&obkv, &fields)? {
                Ok(document_id) => Ok(document_id),
                Err(_) => Err(InternalError::DocumentsError(
@@ -2120,7 +2172,7 @@ pub(crate) mod tests {
            .filter(Filter::from_str("_geoBoundingBox([-80, 0], [80, 0])").unwrap().unwrap())
            .execute()
            .unwrap_err();
-        insta::assert_display_snapshot!(
+        insta::assert_snapshot!(
            error,
            @r###"
        The top latitude `-80` is below the bottom latitude `80`.
@@ -2133,7 +2185,7 @@ pub(crate) mod tests {
            .filter(Filter::from_str("_geoBoundingBox([-10, 0], [10, 0])").unwrap().unwrap())
            .execute()
            .unwrap_err();
-        insta::assert_display_snapshot!(
+        insta::assert_snapshot!(
            error,
            @r###"
        The top latitude `-10` is below the bottom latitude `10`.
@@ -2441,7 +2493,12 @@ pub(crate) mod tests {
        "###);

        let rtxn = index.read_txn().unwrap();
-        let (_docid, obkv) = index.documents(&rtxn, [0]).unwrap()[0];
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
+        let (_docid, compressed_obkv) = index.compressed_documents(&rtxn, [0]).unwrap().remove(0);
+        let mut buffer = Vec::new();
+        let obkv = compressed_obkv
+            .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+            .unwrap();
        let json = obkv_to_json(&[0, 1, 2], &index.fields_ids_map(&rtxn).unwrap(), obkv).unwrap();
        insta::assert_debug_snapshot!(json, @r###"
        {
@@ -2450,7 +2507,10 @@ pub(crate) mod tests {
        "###);

        // Furthermore, when we retrieve document 34, it is not the result of merging 35 with 34
-        let (_docid, obkv) = index.documents(&rtxn, [2]).unwrap()[0];
+        let (_docid, compressed_obkv) = index.compressed_documents(&rtxn, [2]).unwrap().remove(0);
+        let obkv = compressed_obkv
+            .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+            .unwrap();
        let json = obkv_to_json(&[0, 1, 2], &index.fields_ids_map(&rtxn).unwrap(), obkv).unwrap();
        insta::assert_debug_snapshot!(json, @r###"
        {
@@ -2459,6 +2519,7 @@ pub(crate) mod tests {
        }
        "###);

+        drop(dictionary);
        drop(rtxn);

        // Add new documents again
@@ -2657,11 +2718,16 @@ pub(crate) mod tests {
        } = search.execute().unwrap();
        let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap();
        documents_ids.sort_unstable();
-        let docs = index.documents(&rtxn, documents_ids).unwrap();
+        let compressed_docs = index.compressed_documents(&rtxn, documents_ids).unwrap();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
+        let mut buffer = Vec::new();
        let mut all_ids = HashSet::new();
-        for (_docid, obkv) in docs {
-            let id = obkv.get(primary_key_id).unwrap();
-            assert!(all_ids.insert(id));
+        for (_docid, compressed) in compressed_docs {
+            let doc = compressed
+                .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                .unwrap();
+            let id = doc.get(primary_key_id).unwrap();
+            assert!(all_ids.insert(id.to_vec()));
        }
    }

@@ -2715,7 +2781,7 @@ pub(crate) mod tests {
                documents!({ "id" : "doggo", "_geo": { "lat": 1, "lng": 2, "doggo": "are the best" }}),
            )
            .unwrap_err();
-        insta::assert_display_snapshot!(err, @r###"The `_geo` field in the document with the id: `"\"doggo\""` contains the following unexpected fields: `{"doggo":"are the best"}`."###);
+        insta::assert_snapshot!(err, @r###"The `_geo` field in the document with the id: `"\"doggo\""` contains the following unexpected fields: `{"doggo":"are the best"}`."###);

        db_snap!(index, geo_faceted_documents_ids); // ensure that no documents were inserted

@@ -2725,7 +2791,7 @@ pub(crate) mod tests {
                documents!({ "id" : "doggo", "_geo": { "lat": 1, "lng": 2, "doggo": "are the best", "and": { "all": ["cats", { "are": "beautiful" } ] } } }),
            )
            .unwrap_err();
-        insta::assert_display_snapshot!(err, @r###"The `_geo` field in the document with the id: `"\"doggo\""` contains the following unexpected fields: `{"and":{"all":["cats",{"are":"beautiful"}]},"doggo":"are the best"}`."###);
+        insta::assert_snapshot!(err, @r###"The `_geo` field in the document with the id: `"\"doggo\""` contains the following unexpected fields: `{"and":{"all":["cats",{"are":"beautiful"}]},"doggo":"are the best"}`."###);

        db_snap!(index, geo_faceted_documents_ids); // ensure that no documents were inserted
    }
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@@ -45,7 +45,7 @@ pub use search::new::{
 };
 use serde_json::Value;
 pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
-pub use {charabia as tokenizer, heed};
+pub use {charabia as tokenizer, heed, zstd};

 pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
 pub use self::criterion::{default_criteria, Criterion, CriterionError};
--- a/milli/src/search/new/query_graph.rs
+++ b/milli/src/search/new/query_graph.rs
@@ -316,7 +316,7 @@ impl QueryGraph {
            term_docids
                .into_iter()
                .map(|(idx, docids)| match docids.len() {
-                    0 => (idx, u64::max_value()),
+                    0 => (idx, u64::MAX),
                    frequency => (idx, frequency),
                })
                .collect()
--- a/milli/src/search/new/tests/mod.rs
+++ b/milli/src/search/new/tests/mod.rs
@@ -24,8 +24,13 @@ fn collect_field_values(
 ) -> Vec<String> {
    let mut values = vec![];
    let fid = index.fields_ids_map(txn).unwrap().id(fid).unwrap();
-    for doc in index.documents(txn, docids.iter().copied()).unwrap() {
-        if let Some(v) = doc.1.get(fid) {
+    let mut buffer = Vec::new();
+    let dictionary = index.document_decompression_dictionary(txn).unwrap();
+    for (_id, compressed_doc) in index.compressed_documents(txn, docids.iter().copied()).unwrap() {
+        let doc = compressed_doc
+            .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+            .unwrap();
+        if let Some(v) = doc.get(fid) {
            let v: serde_json::Value = serde_json::from_slice(v).unwrap();
            let v = v.to_string();
            values.push(v);
--- a/milli/src/snapshot_tests.rs
+++ b/milli/src/snapshot_tests.rs
@@ -407,9 +407,15 @@ pub fn snap_documents(index: &Index) -> String {
    let rtxn = index.read_txn().unwrap();
    let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
    let display = fields_ids_map.ids().collect::<Vec<_>>();
+    let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
+    let mut buffer = Vec::new();

-    for document in index.all_documents(&rtxn).unwrap() {
-        let doc = obkv_to_json(&display, &fields_ids_map, document.unwrap().1).unwrap();
+    for result in index.all_compressed_documents(&rtxn).unwrap() {
+        let (_id, compressed_document) = result.unwrap();
+        let document = compressed_document
+            .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+            .unwrap();
+        let doc = obkv_to_json(&display, &fields_ids_map, document).unwrap();
        snap.push_str(&serde_json::to_string(&doc).unwrap());
        snap.push('\n');
    }
--- a/milli/src/update/available_documents_ids.rs
+++ b/milli/src/update/available_documents_ids.rs
@@ -15,7 +15,7 @@ impl AvailableDocumentsIds {
                available -= docids;

                let iter = match last_id.checked_add(1) {
-                    Some(id) => id..=u32::max_value(),
+                    Some(id) => id..=u32::MAX,
                    #[allow(clippy::reversed_empty_ranges)]
                    None => 1..=0, // empty range iterator
                };
@@ -24,7 +24,7 @@ impl AvailableDocumentsIds {
            }
            None => {
                let empty = RoaringBitmap::new().into_iter();
-                AvailableDocumentsIds { iter: empty.chain(0..=u32::max_value()) }
+                AvailableDocumentsIds { iter: empty.chain(0..=u32::MAX) }
            }
        }
    }
@@ -46,7 +46,7 @@ mod tests {
    fn empty() {
        let base = RoaringBitmap::new();
        let left = AvailableDocumentsIds::from_documents_ids(&base);
-        let right = 0..=u32::max_value();
+        let right = 0..=u32::MAX;
        left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
    }

@@ -59,7 +59,7 @@ mod tests {
        base.insert(405);

        let left = AvailableDocumentsIds::from_documents_ids(&base);
-        let right = (0..=u32::max_value()).filter(|&n| n != 0 && n != 10 && n != 100 && n != 405);
+        let right = (0..=u32::MAX).filter(|&n| n != 0 && n != 10 && n != 100 && n != 405);
        left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
    }
 }
--- a/milli/src/update/clear_documents.rs
+++ b/milli/src/update/clear_documents.rs
@@ -63,6 +63,7 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
        self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?;
        self.index.delete_geo_rtree(self.wtxn)?;
        self.index.delete_geo_faceted_documents_ids(self.wtxn)?;
+        self.index.delete_document_compression_dictionary(self.wtxn)?;

        // Remove all user-provided bits from the configs
        let mut configs = self.index.embedding_configs(self.wtxn)?;
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -5,7 +5,7 @@ mod transform;
 mod typed_chunk;

 use std::collections::{HashMap, HashSet};
-use std::io::{Read, Seek};
+use std::io::{BufWriter, Read, Seek, Write};
 use std::iter;
 use std::num::NonZeroU32;
 use std::result::Result as StdResult;
@@ -13,8 +13,8 @@ use std::sync::Arc;

 use crossbeam_channel::{Receiver, Sender};
 use grenad::{Merger, MergerBuilder};
-use heed::types::Str;
-use heed::Database;
+use heed::types::{Bytes, Str};
+use heed::{Database, PutFlags};
 use rand::SeedableRng;
 use roaring::RoaringBitmap;
 use serde::{Deserialize, Serialize};
@@ -34,13 +34,14 @@ use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
 pub use self::transform::{Transform, TransformOutput};
 use crate::documents::{obkv_to_object, DocumentsBatchReader};
 use crate::error::{Error, InternalError, UserError};
+use crate::heed_codec::{CompressedKvWriterU16, CompressedObkvCodec};
 use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder;
 pub use crate::update::index_documents::helpers::CursorClonableMmap;
 use crate::update::{
    IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
 };
 use crate::vector::EmbeddingConfigs;
-use crate::{CboRoaringBitmapCodec, Index, Result};
+use crate::{CboRoaringBitmapCodec, Index, Result, BEU32};

 static MERGED_DATABASE_COUNT: usize = 7;
 static PREFIX_DATABASE_COUNT: usize = 4;
@@ -266,7 +267,7 @@ where
        target = "indexing::details",
        name = "index_documents_raw"
    )]
-    pub fn execute_raw(self, output: TransformOutput) -> Result<u64>
+    pub fn execute_raw(mut self, output: TransformOutput) -> Result<u64>
    where
        FP: Fn(UpdateIndexingStep) + Sync,
        FA: Fn() -> bool + Sync,
@@ -565,6 +566,10 @@ where
            word_fid_docids.map(MergerBuilder::build),
        )?;

+        // This call contains an internal condition to ensure we do not always
+        // generate compression dictionaries and always compress documents.
+        self.manage_compression_dictionary()?;
+
        Ok(number_of_documents)
    }

@@ -575,7 +580,7 @@ where
        name = "index_documents_prefix_databases"
    )]
    pub fn execute_prefix_databases(
-        self,
+        &mut self,
        word_docids: Option<Merger<CursorClonableMmap, MergeFn>>,
        exact_word_docids: Option<Merger<CursorClonableMmap, MergeFn>>,
        word_position_docids: Option<Merger<CursorClonableMmap, MergeFn>>,
@@ -747,6 +752,64 @@ where

        Ok(())
    }
+
+    /// Computes a new dictionay and compress the documents with it in the database.
+    ///
+    /// Documents still need to be directly compressed when being written in the database and a dictionary exists.
+    #[tracing::instrument(
+        level = "trace",
+        skip_all,
+        target = "indexing::compression",
+        name = "compress_documents_database"
+    )]
+    pub fn manage_compression_dictionary(&mut self) -> Result<()> {
+        /// The size of the dictionary generated from a sample of the documents already
+        /// in the database. It will be used when compressing and decompressing documents.
+        const COMPRESSION_DICTIONARY_SIZE: usize = 64_000;
+        /// The minimum number of documents to trigger the generation of the compression dictionary.
+        const COMPRESSION_ON_NUMBER_OF_DOCUMENTS: usize = 10_000;
+
+        if self.index.number_of_documents(self.wtxn)? < COMPRESSION_ON_NUMBER_OF_DOCUMENTS as u64
+            || self.index.document_compression_dictionary(self.wtxn)?.is_some()
+        {
+            return Ok(());
+        }
+
+        let mut sample_file = tempfile::tempfile().map(BufWriter::new)?;
+        let mut sample_sizes = Vec::new();
+        // TODO make this 1_000 be 10k and const
+        let documents = self.index.documents.remap_types::<BEU32, Bytes>();
+        for result in documents.iter(self.wtxn)?.take(COMPRESSION_ON_NUMBER_OF_DOCUMENTS) {
+            let (_id, bytes) = result?;
+            sample_file.write_all(bytes)?;
+            sample_sizes.push(bytes.len());
+        }
+
+        let sample_file = sample_file.into_inner().map_err(|ie| ie.into_error())?;
+        let sample_data = unsafe { memmap2::Mmap::map(&sample_file)? };
+        let dictionary =
+            zstd::dict::from_continuous(&sample_data, &sample_sizes, COMPRESSION_DICTIONARY_SIZE)?;
+        self.index.put_document_compression_dictionary(self.wtxn, &dictionary)?;
+        // safety: We just set the dictionary above. It must be there when we get it back.
+        let dictionary = self.index.document_compression_dictionary(self.wtxn)?.unwrap();
+
+        let mut iter = self.index.documents.iter_mut(self.wtxn)?;
+        while let Some(result) = iter.next() {
+            let (docid, document) = result?;
+            let document = document.as_non_compressed().as_bytes();
+            let compressed = CompressedKvWriterU16::new_with_dictionary(document, &dictionary)?;
+            // safety: the compressed document is entirely owned
+            unsafe {
+                iter.put_current_with_options::<CompressedObkvCodec>(
+                    PutFlags::empty(),
+                    &docid,
+                    &compressed,
+                )?;
+            }
+        }
+
+        Ok(())
+    }
 }

 /// Run the word prefix docids update operation.
@@ -834,7 +897,7 @@ mod tests {
        let rtxn = index.read_txn().unwrap();
        let count = index.number_of_documents(&rtxn).unwrap();
        assert_eq!(count, 3);
-        let count = index.all_documents(&rtxn).unwrap().count();
+        let count = index.all_compressed_documents(&rtxn).unwrap().count();
        assert_eq!(count, 3);

        drop(rtxn);
@@ -843,6 +906,7 @@ mod tests {
    #[test]
    fn simple_document_merge() {
        let mut index = TempIndex::new();
+        let mut buffer = Vec::new();
        index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;

        // First we send 3 documents with duplicate ids and
@@ -861,16 +925,21 @@ mod tests {
        assert_eq!(count, 1);

        // Check that we get only one document from the database.
-        let docs = index.documents(&rtxn, Some(0)).unwrap();
-        assert_eq!(docs.len(), 1);
-        let (id, doc) = docs[0];
+        let mut compressed_docs = index.compressed_documents(&rtxn, Some(0)).unwrap();
+        assert_eq!(compressed_docs.len(), 1);
+        let (id, compressed_doc) = compressed_docs.remove(0);
        assert_eq!(id, 0);
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
+        let doc = compressed_doc
+            .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+            .unwrap();

        // Check that this document is equal to the last one sent.
        let mut doc_iter = doc.iter();
        assert_eq!(doc_iter.next(), Some((0, &b"1"[..])));
        assert_eq!(doc_iter.next(), Some((1, &br#""benoit""#[..])));
        assert_eq!(doc_iter.next(), None);
+        drop(dictionary);
        drop(rtxn);

        // Second we send 1 document with id 1, to force it to be merged with the previous one.
@@ -882,10 +951,14 @@ mod tests {
        assert_eq!(count, 1);

        // Check that we get only one document from the database.
-        let docs = index.documents(&rtxn, Some(0)).unwrap();
-        assert_eq!(docs.len(), 1);
-        let (id, doc) = docs[0];
+        let mut compressed_docs = index.compressed_documents(&rtxn, Some(0)).unwrap();
+        assert_eq!(compressed_docs.len(), 1);
+        let (id, compressed_doc) = compressed_docs.remove(0);
        assert_eq!(id, 0);
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
+        let doc = compressed_doc
+            .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+            .unwrap();

        // Check that this document is equal to the last one sent.
        let mut doc_iter = doc.iter();
@@ -893,6 +966,7 @@ mod tests {
        assert_eq!(doc_iter.next(), Some((1, &br#""benoit""#[..])));
        assert_eq!(doc_iter.next(), Some((2, &b"25"[..])));
        assert_eq!(doc_iter.next(), None);
+        drop(dictionary);
        drop(rtxn);
    }

@@ -917,6 +991,7 @@ mod tests {
    #[test]
    fn simple_auto_generated_documents_ids() {
        let mut index = TempIndex::new();
+        let mut buffer = Vec::new();
        index.index_documents_config.autogenerate_docids = true;
        // First we send 3 documents with ids from 1 to 3.
        index
@@ -929,12 +1004,26 @@ mod tests {

        // Check that there is 3 documents now.
        let rtxn = index.read_txn().unwrap();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let count = index.number_of_documents(&rtxn).unwrap();
        assert_eq!(count, 3);

-        let docs = index.documents(&rtxn, vec![0, 1, 2]).unwrap();
-        let (_id, obkv) = docs.iter().find(|(_id, kv)| kv.get(0) == Some(br#""kevin""#)).unwrap();
+        let compressed_docs = index.compressed_documents(&rtxn, vec![0, 1, 2]).unwrap();
+        let (_id, compressed_obkv) = compressed_docs
+            .iter()
+            .find(|(_id, compressed_doc)| {
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+                doc.get(0) == Some(br#""kevin""#)
+            })
+            .unwrap();
+
+        let obkv = compressed_obkv
+            .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+            .unwrap();
        let kevin_uuid: String = serde_json::from_slice(obkv.get(1).unwrap()).unwrap();
+        drop(dictionary);
        drop(rtxn);

        // Second we send 1 document with the generated uuid, to erase the previous ones.
@@ -942,21 +1031,34 @@ mod tests {

        // Check that there is **always** 3 documents.
        let rtxn = index.read_txn().unwrap();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let count = index.number_of_documents(&rtxn).unwrap();
        assert_eq!(count, 3);

        // the document 0 has been deleted and reinserted with the id 3
-        let docs = index.documents(&rtxn, vec![1, 2, 0]).unwrap();
-        let kevin_position =
-            docs.iter().position(|(_, d)| d.get(0).unwrap() == br#""updated kevin""#).unwrap();
+        let mut compressed_docs = index.compressed_documents(&rtxn, vec![1, 2, 0]).unwrap();
+        let kevin_position = compressed_docs
+            .iter()
+            .position(|(_, compressed_doc)| {
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+
+                doc.get(0).unwrap() == br#""updated kevin""#
+            })
+            .unwrap();
        assert_eq!(kevin_position, 2);
-        let (_, doc) = docs[kevin_position];
+        let (_, compressed_doc) = compressed_docs.remove(kevin_position);
+        let doc = compressed_doc
+            .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+            .unwrap();

        // Check that this document is equal to the last
        // one sent and that an UUID has been generated.
        assert_eq!(doc.get(0), Some(&br#""updated kevin""#[..]));
        // This is an UUID, it must be 36 bytes long plus the 2 surrounding string quotes (").
        assert_eq!(doc.get(1).unwrap().len(), 36 + 2);
+        drop(dictionary);
        drop(rtxn);
    }

@@ -1088,7 +1190,7 @@ mod tests {
        let rtxn = index.read_txn().unwrap();
        let count = index.number_of_documents(&rtxn).unwrap();
        assert_eq!(count, 6);
-        let count = index.all_documents(&rtxn).unwrap().count();
+        let count = index.all_compressed_documents(&rtxn).unwrap().count();
        assert_eq!(count, 6);

        db_snap!(index, word_docids, "updated");
@@ -1506,7 +1608,7 @@ mod tests {
        index.add_documents(documents!({ "a" : { "b" : { "c" :  1 }}})).unwrap();

        let rtxn = index.read_txn().unwrap();
-        let all_documents_count = index.all_documents(&rtxn).unwrap().count();
+        let all_documents_count = index.all_compressed_documents(&rtxn).unwrap().count();
        assert_eq!(all_documents_count, 1);
        let external_documents_ids = index.external_documents_ids();
        assert!(external_documents_ids.get(&rtxn, "1").unwrap().is_some());
@@ -2229,10 +2331,10 @@ mod tests {
            { "id": 3, "name": "jean", "age": 25 },
        ]);
        let (builder, added) = builder.add_documents(documents).unwrap();
-        insta::assert_display_snapshot!(added.unwrap(), @"3");
+        insta::assert_snapshot!(added.unwrap(), @"3");

        let (builder, removed) = builder.remove_documents(vec![S("2")]).unwrap();
-        insta::assert_display_snapshot!(removed.unwrap(), @"1");
+        insta::assert_snapshot!(removed.unwrap(), @"1");

        let addition = builder.execute().unwrap();
        insta::assert_debug_snapshot!(addition, @r###"
@@ -2271,17 +2373,17 @@ mod tests {
            { "id": 3, "name": "jean", "age": 25 },
        ]);
        let (builder, added) = builder.add_documents(documents).unwrap();
-        insta::assert_display_snapshot!(added.unwrap(), @"3");
+        insta::assert_snapshot!(added.unwrap(), @"3");

        let documents = documents!([
            { "id": 2, "catto": "jorts" },
            { "id": 3, "legs": 4 },
        ]);
        let (builder, added) = builder.add_documents(documents).unwrap();
-        insta::assert_display_snapshot!(added.unwrap(), @"2");
+        insta::assert_snapshot!(added.unwrap(), @"2");

        let (builder, removed) = builder.remove_documents(vec![S("1"), S("2")]).unwrap();
-        insta::assert_display_snapshot!(removed.unwrap(), @"2");
+        insta::assert_snapshot!(removed.unwrap(), @"2");

        let addition = builder.execute().unwrap();
        insta::assert_debug_snapshot!(addition, @r###"
@@ -2319,7 +2421,7 @@ mod tests {
            { "id": 3, "name": "jean", "age": 25 },
        ]);
        let (builder, added) = builder.add_documents(documents).unwrap();
-        insta::assert_display_snapshot!(added.unwrap(), @"3");
+        insta::assert_snapshot!(added.unwrap(), @"3");

        let addition = builder.execute().unwrap();
        insta::assert_debug_snapshot!(addition, @r###"
@@ -2354,10 +2456,10 @@ mod tests {
            { "id": 3, "legs": 4 },
        ]);
        let (builder, added) = builder.add_documents(documents).unwrap();
-        insta::assert_display_snapshot!(added.unwrap(), @"2");
+        insta::assert_snapshot!(added.unwrap(), @"2");

        let (builder, removed) = builder.remove_documents(vec![S("1"), S("2")]).unwrap();
-        insta::assert_display_snapshot!(removed.unwrap(), @"2");
+        insta::assert_snapshot!(removed.unwrap(), @"2");

        let addition = builder.execute().unwrap();
        insta::assert_debug_snapshot!(addition, @r###"
@@ -2390,14 +2492,14 @@ mod tests {
        .unwrap();

        let (builder, removed) = builder.remove_documents(vec![S("1"), S("2")]).unwrap();
-        insta::assert_display_snapshot!(removed.unwrap(), @"0");
+        insta::assert_snapshot!(removed.unwrap(), @"0");

        let documents = documents!([
            { "id": 2, "doggo": { "name": "jean", "age": 20 } },
            { "id": 3, "name": "bob", "age": 25 },
        ]);
        let (builder, added) = builder.add_documents(documents).unwrap();
-        insta::assert_display_snapshot!(added.unwrap(), @"2");
+        insta::assert_snapshot!(added.unwrap(), @"2");

        let addition = builder.execute().unwrap();
        insta::assert_debug_snapshot!(addition, @r###"
@@ -2432,7 +2534,7 @@ mod tests {

        let (builder, removed) =
            builder.remove_documents(vec![S("1"), S("2"), S("1"), S("2")]).unwrap();
-        insta::assert_display_snapshot!(removed.unwrap(), @"0");
+        insta::assert_snapshot!(removed.unwrap(), @"0");

        let documents = documents!([
            { "id": 1, "doggo": "kevin" },
@@ -2440,11 +2542,11 @@ mod tests {
            { "id": 3, "name": "bob", "age": 25 },
        ]);
        let (builder, added) = builder.add_documents(documents).unwrap();
-        insta::assert_display_snapshot!(added.unwrap(), @"3");
+        insta::assert_snapshot!(added.unwrap(), @"3");

        let (builder, removed) =
            builder.remove_documents(vec![S("1"), S("2"), S("1"), S("2")]).unwrap();
-        insta::assert_display_snapshot!(removed.unwrap(), @"2");
+        insta::assert_snapshot!(removed.unwrap(), @"2");

        let addition = builder.execute().unwrap();
        insta::assert_debug_snapshot!(addition, @r###"
@@ -2480,7 +2582,7 @@ mod tests {
            { "id": 1, "doggo": "kevin" },
        ]);
        let (builder, added) = builder.add_documents(documents).unwrap();
-        insta::assert_display_snapshot!(added.unwrap(), @"1");
+        insta::assert_snapshot!(added.unwrap(), @"1");

        let addition = builder.execute().unwrap();
        insta::assert_debug_snapshot!(addition, @r###"
@@ -2509,13 +2611,13 @@ mod tests {
        .unwrap();

        let (builder, removed) = builder.remove_documents(vec![S("1")]).unwrap();
-        insta::assert_display_snapshot!(removed.unwrap(), @"1");
+        insta::assert_snapshot!(removed.unwrap(), @"1");

        let documents = documents!([
            { "id": 1, "catto": "jorts" },
        ]);
        let (builder, added) = builder.add_documents(documents).unwrap();
-        insta::assert_display_snapshot!(added.unwrap(), @"1");
+        insta::assert_snapshot!(added.unwrap(), @"1");

        let addition = builder.execute().unwrap();
        insta::assert_debug_snapshot!(addition, @r###"
@@ -2692,7 +2794,7 @@ mod tests {
            { "id": 1, "doggo": "bernese" },
        ]);
        let (builder, added) = builder.add_documents(documents).unwrap();
-        insta::assert_display_snapshot!(added.unwrap(), @"1");
+        insta::assert_snapshot!(added.unwrap(), @"1");

        // FINISHING
        let addition = builder.execute().unwrap();
@@ -2731,13 +2833,13 @@ mod tests {
        .unwrap();

        let (builder, removed) = builder.remove_documents(vec![S("1")]).unwrap();
-        insta::assert_display_snapshot!(removed.unwrap(), @"1");
+        insta::assert_snapshot!(removed.unwrap(), @"1");

        let documents = documents!([
            { "id": 0, "catto": "jorts" },
        ]);
        let (builder, added) = builder.add_documents(documents).unwrap();
-        insta::assert_display_snapshot!(added.unwrap(), @"1");
+        insta::assert_snapshot!(added.unwrap(), @"1");

        let addition = builder.execute().unwrap();
        insta::assert_debug_snapshot!(addition, @r###"
@@ -2777,7 +2879,7 @@ mod tests {
            { "id": 1, "catto": "jorts" },
        ]);
        let (builder, added) = builder.add_documents(documents).unwrap();
-        insta::assert_display_snapshot!(added.unwrap(), @"1");
+        insta::assert_snapshot!(added.unwrap(), @"1");

        let addition = builder.execute().unwrap();
        insta::assert_debug_snapshot!(addition, @r###"
@@ -2796,7 +2898,7 @@ mod tests {
        // Ensuring all the returned IDs actually exists
        let rtxn = index.read_txn().unwrap();
        let res = index.search(&rtxn).execute().unwrap();
-        index.documents(&rtxn, res.documents_ids).unwrap();
+        index.compressed_documents(&rtxn, res.documents_ids).unwrap();
    }

    fn delete_documents<'t>(
@@ -3163,7 +3265,7 @@ mod tests {
        let deleted_internal_ids = delete_documents(&mut wtxn, &index, &deleted_external_ids);

        // list all documents
-        let results = index.all_documents(&wtxn).unwrap();
+        let results = index.all_compressed_documents(&wtxn).unwrap();
        for result in results {
            let (id, _) = result.unwrap();
            assert!(
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@@ -168,10 +168,12 @@ impl<'a, 'i> Transform<'a, 'i> {
        let external_documents_ids = self.index.external_documents_ids();
        let mapping = create_fields_mapping(&mut self.fields_ids_map, &fields_index)?;

+        let dictionary = self.index.document_decompression_dictionary(wtxn)?;
        let primary_key = cursor.primary_key().to_string();
        let primary_key_id =
            self.fields_ids_map.insert(&primary_key).ok_or(UserError::AttributeLimitReached)?;

+        let mut decompression_buffer = Vec::new();
        let mut obkv_buffer = Vec::new();
        let mut document_sorter_value_buffer = Vec::new();
        let mut document_sorter_key_buffer = Vec::new();
@@ -247,18 +249,17 @@ impl<'a, 'i> Transform<'a, 'i> {
            let mut skip_insertion = false;
            if let Some(original_docid) = original_docid {
                let original_key = original_docid;
-                let base_obkv = self
-                    .index
-                    .documents
-                    .remap_data_type::<heed::types::Bytes>()
-                    .get(wtxn, &original_key)?
-                    .ok_or(InternalError::DatabaseMissingEntry {
-                        db_name: db_name::DOCUMENTS,
-                        key: None,
-                    })?;
+                let base_compressed_obkv = self.index.documents.get(wtxn, &original_key)?.ok_or(
+                    InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, key: None },
+                )?;
+
+                let base_obkv = base_compressed_obkv.decompress_with_optional_dictionary(
+                    &mut decompression_buffer,
+                    dictionary.as_ref(),
+                )?;

                // we check if the two documents are exactly equal. If it's the case we can skip this document entirely
-                if base_obkv == obkv_buffer {
+                if base_obkv.as_bytes() == obkv_buffer {
                    // we're not replacing anything
                    self.replaced_documents_ids.remove(original_docid);
                    // and we need to put back the original id as it was before
@@ -278,13 +279,12 @@ impl<'a, 'i> Transform<'a, 'i> {
                    document_sorter_value_buffer.clear();
                    document_sorter_value_buffer.push(Operation::Addition as u8);
                    into_del_add_obkv(
-                        KvReaderU16::new(base_obkv),
+                        base_obkv,
                        deladd_operation,
                        &mut document_sorter_value_buffer,
                    )?;
                    self.original_sorter
                        .insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?;
-                    let base_obkv = KvReader::new(base_obkv);
                    if let Some(flattened_obkv) =
                        Self::flatten_from_fields_ids_map(&base_obkv, &mut self.fields_ids_map)?
                    {
@@ -348,9 +348,12 @@ impl<'a, 'i> Transform<'a, 'i> {
            documents_seen: documents_count,
        });

+        drop(dictionary);
+
        self.index.put_fields_ids_map(wtxn, &self.fields_ids_map)?;
        self.index.put_primary_key(wtxn, &primary_key)?;
        self.documents_count += documents_count;
+
        // Now that we have a valid sorter that contains the user id and the obkv we
        // give it to the last transforming function which returns the TransformOutput.
        Ok(documents_count)
@@ -1035,15 +1038,21 @@ impl<'a, 'i> Transform<'a, 'i> {

        if original_sorter.is_some() || flattened_sorter.is_some() {
            let modified_faceted_fields = settings_diff.modified_faceted_fields();
+            let dictionary = self.index.document_decompression_dictionary(wtxn)?;
+
            let mut original_obkv_buffer = Vec::new();
            let mut flattened_obkv_buffer = Vec::new();
            let mut document_sorter_key_buffer = Vec::new();
+            let mut buffer = Vec::new();
            for result in self.index.external_documents_ids().iter(wtxn)? {
                let (external_id, docid) = result?;
-                let old_obkv = self.index.documents.get(wtxn, &docid)?.ok_or(
+                let old_compressed_obkv = self.index.documents.get(wtxn, &docid)?.ok_or(
                    InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, key: None },
                )?;

+                let old_obkv = old_compressed_obkv
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())?;
+
                let injected_vectors: std::result::Result<
                    serde_json::Map<String, serde_json::Value>,
                    arroy::Error,
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -19,6 +19,7 @@ use super::helpers::{
 use super::MergeFn;
 use crate::external_documents_ids::{DocumentOperation, DocumentOperationKind};
 use crate::facet::FacetType;
+use crate::heed_codec::CompressedKvWriterU16;
 use crate::index::db_name::DOCUMENTS;
 use crate::index::IndexEmbeddingConfig;
 use crate::proximity::MAX_DISTANCE;
@@ -162,6 +163,7 @@ pub(crate) fn write_typed_chunk_into_index(
                .into_iter()
                .map(|IndexEmbeddingConfig { name, .. }| name)
                .collect();
+            let dictionary = index.document_compression_dictionary(wtxn)?;
            let mut vectors_buffer = Vec::new();
            while let Some((key, reader)) = iter.next()? {
                let mut writer: KvWriter<_, FieldId> = KvWriter::memory();
@@ -211,7 +213,17 @@ pub(crate) fn write_typed_chunk_into_index(
                let db = index.documents.remap_data_type::<Bytes>();

                if !writer.is_empty() {
-                    db.put(wtxn, &docid, &writer.into_inner().unwrap())?;
+                    let uncompressed_document_bytes = writer.into_inner().unwrap();
+                    match dictionary.as_ref() {
+                        Some(dictionary) => {
+                            let compressed = CompressedKvWriterU16::new_with_dictionary(
+                                &uncompressed_document_bytes,
+                                dictionary,
+                            )?;
+                            db.put(wtxn, &docid, compressed.as_bytes())?
+                        }
+                        None => db.put(wtxn, &docid, &uncompressed_document_bytes)?,
+                    }
                    operations.push(DocumentOperation {
                        external_id: external_id.to_string(),
                        internal_id: docid,
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -1769,6 +1769,8 @@ mod tests {

        // Check that the searchable field is correctly set to "name" only.
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        // When we search for something that is not in
        // the searchable fields it must not return any document.
        let result = index.search(&rtxn).query("23").execute().unwrap();
@@ -1777,10 +1779,17 @@ mod tests {
        // When we search for something that is in the searchable fields
        // we must find the appropriate document.
        let result = index.search(&rtxn).query(r#""kevin""#).execute().unwrap();
-        let documents = index.documents(&rtxn, result.documents_ids).unwrap();
+        let mut compressed_documents =
+            index.compressed_documents(&rtxn, result.documents_ids).unwrap();
        let fid_map = index.fields_ids_map(&rtxn).unwrap();
-        assert_eq!(documents.len(), 1);
-        assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
+        assert_eq!(compressed_documents.len(), 1);
+        let (_id, compressed_document) = compressed_documents.remove(0);
+        let document = compressed_document
+            .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+            .unwrap();
+
+        assert_eq!(document.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
+        drop(dictionary);
        drop(rtxn);

        // We change the searchable fields to be the "name" field only.
@@ -1805,6 +1814,7 @@ mod tests {

        // Check that the searchable field have been reset and documents are found now.
        let rtxn = index.read_txn().unwrap();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let fid_map = index.fields_ids_map(&rtxn).unwrap();
        let user_defined_searchable_fields = index.user_defined_searchable_fields(&rtxn).unwrap();
        snapshot!(format!("{user_defined_searchable_fields:?}"), @"None");
@@ -1813,8 +1823,13 @@ mod tests {
        snapshot!(format!("{searchable_fields:?}"), @r###"["id", "name", "age"]"###);
        let result = index.search(&rtxn).query("23").execute().unwrap();
        assert_eq!(result.documents_ids.len(), 1);
-        let documents = index.documents(&rtxn, result.documents_ids).unwrap();
-        assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
+        let mut compressed_documents =
+            index.compressed_documents(&rtxn, result.documents_ids).unwrap();
+        let (_id, compressed_document) = compressed_documents.remove(0);
+        let document = compressed_document
+            .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+            .unwrap();
+        assert_eq!(document.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
    }

    #[test]
@@ -1949,15 +1964,20 @@ mod tests {

        // Check that the displayed fields are correctly set.
        let rtxn = index.read_txn().unwrap();
+        let mut buffer = Vec::new();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let fields_ids = index.filterable_fields(&rtxn).unwrap();
        assert_eq!(fields_ids, hashset! { S("age") });
        // Only count the field_id 0 and level 0 facet values.
        // TODO we must support typed CSVs for numbers to be understood.
        let fidmap = index.fields_ids_map(&rtxn).unwrap();
-        for document in index.all_documents(&rtxn).unwrap() {
-            let document = document.unwrap();
-            let json = crate::obkv_to_json(&fidmap.ids().collect::<Vec<_>>(), &fidmap, document.1)
+        for result in index.all_compressed_documents(&rtxn).unwrap() {
+            let (_id, compressed_document) = result.unwrap();
+            let document = compressed_document
+                .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
                .unwrap();
+            let json =
+                crate::obkv_to_json(&fidmap.ids().collect::<Vec<_>>(), &fidmap, document).unwrap();
            println!("json: {:?}", json);
        }
        let count = index
@@ -1968,6 +1988,7 @@ mod tests {
            .unwrap()
            .count();
        assert_eq!(count, 3);
+        drop(dictionary);
        drop(rtxn);

        // Index a little more documents with new and current facets values.
@@ -2057,6 +2078,7 @@ mod tests {
    #[test]
    fn set_asc_desc_field() {
        let mut index = TempIndex::new();
+        let mut buffer = Vec::new();
        index.index_documents_config.autogenerate_docids = true;

        // Set the filterable fields to be the age.
@@ -2078,12 +2100,16 @@ mod tests {

        // Run an empty query just to ensure that the search results are ordered.
        let rtxn = index.read_txn().unwrap();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
        let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap();
-        let documents = index.documents(&rtxn, documents_ids).unwrap();
+        let compressed_documents = index.compressed_documents(&rtxn, documents_ids).unwrap();

        // Fetch the documents "age" field in the ordre in which the documents appear.
        let age_field_id = index.fields_ids_map(&rtxn).unwrap().id("age").unwrap();
-        let iter = documents.into_iter().map(|(_, doc)| {
+        let iter = compressed_documents.into_iter().map(|(_, compressed_doc)| {
+            let doc = compressed_doc
+                .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                .unwrap();
            let bytes = doc.get(age_field_id).unwrap();
            let string = std::str::from_utf8(bytes).unwrap();
            string.parse::<u32>().unwrap()
@@ -2480,6 +2506,7 @@ mod tests {
    #[test]
    fn setting_impact_relevancy() {
        let mut index = TempIndex::new();
+        let mut buffer = Vec::new();
        index.index_documents_config.autogenerate_docids = true;

        // Set the genres setting
@@ -2512,8 +2539,12 @@ mod tests {
        let rtxn = index.read_txn().unwrap();
        let SearchResult { documents_ids, .. } = index.search(&rtxn).query("S").execute().unwrap();
        let first_id = documents_ids[0];
-        let documents = index.documents(&rtxn, documents_ids).unwrap();
-        let (_, content) = documents.iter().find(|(id, _)| *id == first_id).unwrap();
+        let documents = index.compressed_documents(&rtxn, documents_ids).unwrap();
+        let (_, compressed_content) = documents.iter().find(|(id, _)| *id == first_id).unwrap();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
+        let content = compressed_content
+            .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+            .unwrap();

        let fid = index.fields_ids_map(&rtxn).unwrap().id("title").unwrap();
        let line = std::str::from_utf8(content.get(fid).unwrap()).unwrap();
@@ -2681,7 +2712,7 @@ mod tests {
        wtxn.commit().unwrap();

        let rtxn = index.write_txn().unwrap();
-        let docs: StdResult<Vec<_>, _> = index.all_documents(&rtxn).unwrap().collect();
+        let docs: StdResult<Vec<_>, _> = index.all_compressed_documents(&rtxn).unwrap().collect();
        let docs = docs.unwrap();
        assert_eq!(docs.len(), 5);
    }
--- a/milli/tests/search/query_criteria.rs
+++ b/milli/tests/search/query_criteria.rs
@@ -317,7 +317,20 @@ fn criteria_ascdesc() {
    wtxn.commit().unwrap();

    let rtxn = index.read_txn().unwrap();
-    let documents = index.all_documents(&rtxn).unwrap().map(|doc| doc.unwrap()).collect::<Vec<_>>();
+    let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
+    let mut buffers = vec![Vec::new(); index.number_of_documents(&rtxn).unwrap() as usize];
+    let documents = index
+        .all_compressed_documents(&rtxn)
+        .unwrap()
+        .zip(buffers.iter_mut())
+        .map(|(compressed, buffer)| {
+            let (id, compressed) = compressed.unwrap();
+            let doc = compressed
+                .decompress_with_optional_dictionary(buffer, dictionary.as_ref())
+                .unwrap();
+            (id, doc)
+        })
+        .collect::<Vec<_>>();

    for criterion in [Asc(S("name")), Desc(S("name")), Asc(S("age")), Desc(S("age"))] {
        eprintln!("Testing with criterion: {:?}", &criterion);
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,3 +1,3 @@
 [toolchain]
-channel = "1.75.0"
+channel = "1.79.0"
 components = ["clippy"]
--- a/tracing-trace/Cargo.toml
+++ b/tracing-trace/Cargo.toml
@@ -7,17 +7,18 @@ edition = "2021"

 [dependencies]
 color-spantrace = "0.2.1"
-fxprof-processed-profile = "0.6.0"
-serde = { version = "1.0.195", features = ["derive"] }
-serde_json = "1.0.111"
+fxprof-processed-profile = "0.7.0"
+serde = { version = "1.0.204", features = ["derive"] }
+serde_json = "1.0.120"
 tracing = "0.1.40"
 tracing-error = "0.2.0"
 tracing-subscriber = "0.3.18"
-byte-unit = { version = "4.0.19", default-features = false, features = [
+byte-unit = { version = "5.1.4", default-features = false, features = [
    "std",
+    "byte",
    "serde",
 ] }
-tokio = { version = "1.35.1", features = ["sync"] }
+tokio = { version = "1.38.0", features = ["sync"] }

 [target.'cfg(any(target_os = "linux", target_os = "macos"))'.dependencies]
-libproc = "0.14.2"
+libproc = "0.14.8"
--- a/tracing-trace/src/processor/firefox_profiler.rs
+++ b/tracing-trace/src/processor/firefox_profiler.rs
@@ -1,9 +1,10 @@
 use std::collections::HashMap;

 use fxprof_processed_profile::{
-    CategoryPairHandle, CounterHandle, CpuDelta, Frame, FrameFlags, FrameInfo, MarkerDynamicField,
-    MarkerFieldFormat, MarkerLocation, MarkerSchema, MarkerSchemaField, ProcessHandle, Profile,
-    ProfilerMarker, ReferenceTimestamp, SamplingInterval, StringHandle, Timestamp,
+    CategoryHandle, CategoryPairHandle, CounterHandle, CpuDelta, Frame, FrameFlags, FrameInfo,
+    MarkerDynamicField, MarkerFieldFormat, MarkerLocation, MarkerSchema, MarkerSchemaField,
+    ProcessHandle, Profile, ProfilerMarker, ReferenceTimestamp, SamplingInterval, StringHandle,
+    Timestamp,
 };
 use serde_json::json;

@@ -129,6 +130,7 @@ pub fn to_firefox_profile<R: std::io::Read>(

                profile.add_marker_with_stack(
                    *thread_handle,
+                    CategoryHandle::OTHER,
                    &callsite.name,
                    marker,
                    fxprof_processed_profile::MarkerTiming::Interval(
@@ -179,6 +181,7 @@ pub fn to_firefox_profile<R: std::io::Read>(

                profile.add_marker_with_stack(
                    *thread_handle,
+                    CategoryHandle::OTHER,
                    &callsite.name,
                    marker,
                    fxprof_processed_profile::MarkerTiming::Instant(timestamp),
--- a/tracing-trace/src/processor/fmt.rs
+++ b/tracing-trace/src/processor/fmt.rs
@@ -1,6 +1,8 @@
 use std::collections::HashMap;
 use std::io::Read;

+use byte_unit::UnitType;
+
 use crate::entry::{
    Entry, Event, MemoryStats, NewCallsite, NewSpan, NewThread, ResourceId, SpanClose, SpanEnter,
    SpanExit, SpanId,
@@ -190,6 +192,6 @@ fn print_duration(duration: std::time::Duration) -> String {
 /// Format only the allocated bytes, deallocated bytes and reallocated bytes in GiB, MiB, KiB, Bytes.
 fn print_memory(MemoryStats { resident }: MemoryStats) -> String {
    use byte_unit::Byte;
-    let rss_bytes = Byte::from_bytes(resident).get_appropriate_unit(true);
+    let rss_bytes = Byte::from_u64(resident).get_appropriate_unit(UnitType::Binary);
    format!("RSS {rss_bytes:.2}")
 }
--- a/xtask/Cargo.toml
+++ b/xtask/Cargo.toml
@@ -11,27 +11,27 @@ license.workspace = true
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

 [dependencies]
-anyhow = "1.0.79"
+anyhow = "1.0.86"
 build-info = { version = "1.7.0", path = "../build-info" }
 cargo_metadata = "0.18.1"
-clap = { version = "4.4.14", features = ["derive"] }
+clap = { version = "4.5.9", features = ["derive"] }
 futures-core = "0.3.30"
 futures-util = "0.3.30"
-reqwest = { version = "0.11.23", features = [
+reqwest = { version = "0.12.5", features = [
    "stream",
    "json",
    "rustls-tls",
 ], default-features = false }
-serde = { version = "1.0.195", features = ["derive"] }
-serde_json = "1.0.111"
+serde = { version = "1.0.204", features = ["derive"] }
+serde_json = "1.0.120"
 sha2 = "0.10.8"
-sysinfo = "0.30.5"
-time = { version = "0.3.32", features = [
+sysinfo = "0.30.13"
+time = { version = "0.3.36", features = [
    "serde",
    "serde-human-readable",
    "macros",
 ] }
-tokio = { version = "1.35.1", features = [
+tokio = { version = "1.38.0", features = [
    "rt",
    "net",
    "time",
@@ -41,4 +41,4 @@ tokio = { version = "1.35.1", features = [
 tracing = "0.1.40"
 tracing-subscriber = "0.3.18"
 tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
-uuid = { version = "1.7.0", features = ["v7", "serde"] }
+uuid = { version = "1.10.0", features = ["v7", "serde"] }
Author	SHA1	Message	Date
Clément Renault	deee22b5da	Use an experimental feature to avoid copying 64k in memory	2024-07-10 16:42:01 +02:00
Clément Renault	fd8c90b858	Clean up some parts of the code	2024-07-10 16:37:21 +02:00
Clément Renault	4ceade43cd	Make the tests pass	2024-07-10 16:37:21 +02:00
Clément Renault	e95e47d258	Simplify optional document decompression usage	2024-07-10 16:37:21 +02:00
Clément Renault	e18b06ddda	Use the zstd library directly to be able to define the compression level	2024-07-10 16:37:20 +02:00
Clément Renault	b15e8aacb6	Fix merging of documents to support compressed documents	2024-07-10 16:34:45 +02:00
Clément Renault	767f20e30d	Generate the dictionary from the first 10k documents	2024-07-10 16:34:45 +02:00
Clément Renault	0d63d02ab2	Prefer encoding the output size when compressing documents	2024-07-10 16:33:39 +02:00
Clément Renault	bf5d9f68fa	First version compressing the documents	2024-07-10 16:33:39 +02:00
Clément Renault	e9d6b4222b	First compiling version with compressed documents iterators	2024-07-10 16:33:39 +02:00
Clément Renault	2f0567fad1	Introduce the compressed obkv readers and writers	2024-07-10 16:32:22 +02:00
meili-bors[bot]	2099b4f0dd	Merge #4786 4786: Update dependencies r=Kerollmops a=irevoire # Pull Request ## Related issue Fixes #4753 ## What does this PR do? - Update all dependencies except rustls - [x] Release charabia - [x] Update charabia - [x] Double check that the docker build works after updating charabia Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Clément Renault <clement@meilisearch.com>	2024-07-10 13:23:54 +00:00
Tamo	0d5bc4578e	Update CONTRIBUTING.md Co-authored-by: Clément Renault <clement@meilisearch.com>	2024-07-10 15:21:43 +02:00
Tamo	8f60ad0a23	apply review comments	2024-07-10 14:38:19 +02:00
Tamo	9570139eeb	update contributing.md with the new lindera update	2024-07-10 14:28:43 +02:00
Clément Renault	9d6885793e	Upgrade dependencies	2024-07-10 13:46:24 +02:00
Clément Renault	98cd6a865c	Update dependencies after removing useless ones	2024-07-10 13:37:24 +02:00
Clément Renault	5f4530ce57	Remove more unused dependencies	2024-07-10 13:36:34 +02:00
Tamo	0ecaf861fa	fix ci	2024-07-10 10:06:59 +02:00
Tamo	4d5005b01a	make clippy happy	2024-07-10 10:06:59 +02:00
Tamo	952e742321	update charabia	2024-07-09 23:41:29 +02:00
Tamo	ee9aa63044	update rust version	2024-07-09 23:41:29 +02:00
Tamo	43db4f4242	update fxprof_processed_profile	2024-07-09 23:41:29 +02:00
Tamo	9feba5028d	update byte-unit	2024-07-09 23:41:29 +02:00
hanbings	0a40a98bb6	Make milli use edition 2021 (#4770 ) * Make milli use edition 2021 * Add lifetime annotations to milli. * Run cargo fmt	2024-07-09 17:25:39 +02:00
meili-bors[bot]	aac15f6719	Merge #4781 4781: Correct apk usages in Dockerfile r=curquiza a=PeterDaveHello # Pull Request ## Related issue No issue was created because this is very trivial. ## What does this PR do? Correct apk usages in Dockerfile There is no need to use apk with `update` or `--update-cache` when `--no-cache` is used, which will make sure the index is the latest, and leave no temporary files behind. ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: Peter Dave Hello <hsu@peterdavehello.org>	2024-07-09 08:51:29 +00:00
meili-bors[bot]	53a359286c	Merge #4785 4785: Bump zerovec from 0.10.1 to 0.10.4 r=dureuill a=dependabot[bot] Bumps [zerovec](https://github.com/unicode-org/icu4x) from 0.10.1 to 0.10.4. <details> <summary>Changelog</summary> <p><em>Sourced from <a href="https://github.com/unicode-org/icu4x/blob/main/CHANGELOG.md">zerovec's changelog</a>.</em></p> <blockquote> <h1>Changelog</h1> <h2>icu4x 1.5.x</h2> <ul> <li><code>icu_calendar</code> <ul> <li>(1.5.1) Fix Japanese calendar Gregorian era year 0 (<a href="https://redirect.github.com/unicode-org/icu4x/issues/4968">unicode-org/icu4x#4968</a>)</li> <li>(1.5.2) Enforce C,packed, not just packed, on ULE types, fixing for incoming changes to <code>repr(Rust)</code> (<a href="https://redirect.github.com/unicode-org/icu4x/pull/5049">unicode-org/icu4x#5049</a>)</li> </ul> </li> <li><code>icu_datetime</code> <ul> <li>(1.5.1) Fix incorrect assertion in week-of-year formatting (<a href="https://redirect.github.com/unicode-org/icu4x/issues/4977">unicode-org/icu4x#4977</a>)</li> </ul> </li> <li><code>icu_casemap</code> <ul> <li>(1.5.1) Enforce C,packed, not just packed, on ULE types, fixing for incoming changes to <code>repr(Rust)</code> (<a href="https://redirect.github.com/unicode-org/icu4x/pull/5049">unicode-org/icu4x#5049</a>)</li> </ul> </li> <li><code>icu_capi</code> <ul> <li>(1.5.1) Fix situations in which <code>libc_alloc</code> is specified as a dependency (<a href="https://redirect.github.com/unicode-org/icu4x/pull/5119">unicode-org/icu4x#5119</a>)</li> </ul> </li> <li><code>icu_properties</code> <ul> <li>(1.5.1) Enforce C,packed, not just packed, on ULE types, fixing for incoming changes to <code>repr(Rust)</code> (<a href="https://redirect.github.com/unicode-org/icu4x/pull/5049">unicode-org/icu4x#5049</a>)</li> </ul> </li> <li><code>zerovec</code> <ul> <li>(0.10.3) Fix size regression by making <code>twox-hash</code> dep <code>no_std</code> (<a href="https://redirect.github.com/unicode-org/icu4x/pull/5007">unicode-org/icu4x#5007</a>)</li> <li>(0.10.3) Enforce C,packed, not just packed, on ULE types, fixing for incoming changes to <code>repr(Rust)</code> (<a href="https://redirect.github.com/unicode-org/icu4x/pull/5049">unicode-org/icu4x#5049</a>)</li> <li>(0.10.4) Enforce C,packed on OptionVarULE (<a href="https://redirect.github.com/unicode-org/icu4x/pull/5143">unicode-org/icu4x#5143</a>)</li> </ul> </li> <li><code>zerovec_derive</code> <ul> <li>(0.10.3) Enforce C,packed, not just packed, on ULE types, fixing for incoming changes to <code>repr(Rust)</code> (<a href="https://redirect.github.com/unicode-org/icu4x/pull/5049">unicode-org/icu4x#5049</a>)</li> </ul> </li> </ul> <h2>icu4x 1.5 (May 28, 2024)</h2> <ul> <li>Components <ul> <li>General <ul> <li>Compiled data updated to CLDR 45 and ICU 75 (unicode-org#4782)</li> </ul> </li> <li><code>icu_calendar</code> <ul> <li>Fix duration offsetting and negative-year bugs in several calendars including Chinese, Islamic, Coptic, Ethiopian, and Hebrew (<a href="https://redirect.github.com/unicode-org/icu4x/issues/4904">#4904</a>)</li> <li>Improved approximation for Persian calendrical calculations (<a href="https://redirect.github.com/unicode-org/icu4x/issues/4713">unicode-org/icu4x#4713</a>)</li> <li>Fix weekday calculations in negative ISO years (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4894">unicode-org/icu4x#4894</a>)</li> <li>New <code>DateTime::local_unix_epoch()</code> convenience constructor (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4479">unicode-org/icu4x#4479</a>)</li> <li>Add caching for all islamic calendars (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4785">unicode-org/icu4x#4785</a>)</li> <li>Add caching for chinese based calendars (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4411">unicode-org/icu4x#4411</a>, <a href="https://redirect.github.com/unicode-org/icu4x/pull/4468">unicode-org/icu4x#4468</a>)</li> <li>Switch Hebrew to faster keviyah/Four Gates calculations (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4504">unicode-org/icu4x#4504</a>)</li> <li>Replace 2820-year with 33-year cycle in Persian calendar, with override table (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4770">unicode-org/icu4x#4770</a>, <a href="https://redirect.github.com/unicode-org/icu4x/pull/4775">unicode-org/icu4x#4775</a>, <a href="https://redirect.github.com/unicode-org/icu4x/pull/4796">unicode-org/icu4x#4796</a>)</li> <li>Fix bugs in several calendars with new continuity test (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4904">unicode-org/icu4x#4904</a>)</li> <li>Fix year 2319 in the Chinese calendar (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4929">unicode-org/icu4x#4929</a>)</li> <li>Fix ISO weekday calculations in negative years (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4894">unicode-org/icu4x#4894</a>)</li> </ul> </li> <li><code>icu_collections</code> <ul> <li>Switch from <code>wasmer</code> to <code>wasmi</code> in <code>icu_codepointtrie_builder</code> (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4621">unicode-org/icu4x#4621</a>)</li> </ul> </li> <li><code>icu_normalizer</code> <ul> <li>Make UTS 46 normalization non-experimental (<a href="https://redirect.github.com/unicode-org/icu4x/issues/4712">#4712</a>)</li> </ul> </li> <li><code>icu_datetime</code> <ul> <li>Experimental "neo" datetime formatter with support for semantic skeleta and fine-grained data slicing (<a href="https://redirect.github.com/unicode-org/icu4x/issues/1317">unicode-org/icu4x#1317</a>, <a href="https://redirect.github.com/unicode-org/icu4x/issues/3347">unicode-org/icu4x#3347</a>)</li> <li><code>Writeable</code> and <code>Display</code> implementations now don't return <code>fmt::Error</code>s that don't originate from the <code>fmt::Write</code> anymore (<a href="https://redirect.github.com/unicode-org/icu4x/issues/4732">#4732</a>, <a href="https://redirect.github.com/unicode-org/icu4x/issues/4851">#4851</a>, <a href="https://redirect.github.com/unicode-org/icu4x/issues/4863">#4863</a>)</li> <li>Make <code>CldrCalendar</code> trait sealed except with experimental feature (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4392">unicode-org/icu4x#4392</a>)</li> <li><code>FormattedDateTime</code> and <code>FormattedZonedDateTime</code> now implement <code>Clone</code> and <code>Copy</code> (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4476">unicode-org/icu4x#4476</a>)</li> </ul> </li> <li><code>icu_experimental</code></li> </ul> </li> </ul> <!-- raw HTML omitted --> </blockquote> <p>... (truncated)</p> </details> <details> <summary>Commits</summary> <ul> <li>See full diff in <a href="https://github.com/unicode-org/icu4x/commits/ind/zerovec@0.10.4">compare view</a></li> </ul> </details> <br /> [![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=zerovec&package-manager=cargo&previous-version=0.10.1&new-version=0.10.4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) --- <details> <summary>Dependabot commands and options</summary> <br /> You can trigger Dependabot actions by commenting on this PR: - ``@dependabot` rebase` will rebase this PR - ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it - ``@dependabot` merge` will merge this PR after your CI passes on it - ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it - ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging - ``@dependabot` reopen` will reopen this PR if it is closed - ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency - ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts). </details> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-07-09 08:02:04 +00:00
Tamo	4aa7d386d8	remove http and uses actix_web::http instead	2024-07-08 21:17:10 +02:00
dependabot[bot]	84fabb9314	Bump zerovec from 0.10.1 to 0.10.4 Bumps [zerovec](https://github.com/unicode-org/icu4x) from 0.10.1 to 0.10.4. - [Release notes](https://github.com/unicode-org/icu4x/releases) - [Changelog](https://github.com/unicode-org/icu4x/blob/main/CHANGELOG.md) - [Commits](https://github.com/unicode-org/icu4x/commits/ind/zerovec@0.10.4) --- updated-dependencies: - dependency-name: zerovec dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com>	2024-07-08 18:38:44 +00:00
Tamo	cd46ebd6b5	remove insta deprecating	2024-07-08 18:38:05 +02:00
Tamo	ef8d9a20f8	update actix-web	2024-07-08 18:36:32 +02:00
Tamo	6afa578688	update most incompatible dependencies	2024-07-08 18:31:15 +02:00
Tamo	300bdfc2a7	update most dependencies	2024-07-08 18:09:12 +02:00
Peter Dave Hello	e7e74c0099	Correct apk usages in Dockerfile There is no need to use apk with `update` or `--update-cache` when `--no-cache` is used, which will make sure the index is the latest, and leave no temporary files behind.	2024-07-08 21:53:58 +08:00