change the version of arroy

Merge #5376
5376: Support dumpless upgrade for all v1.13 patches r=Kerollmops a=dureuill # Pull Request ## Related issue Fixes #5373 ## What does this PR do? - ensure v1.13 versions are known to the index scheduler upgrading code - Forbid opening a db of v1.13.x from v1.13.y - Keep old stat format to make sure the number of documents is available in stats during dumpless upgrade Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-12-01 18:25:37 +00:00 · 2025-02-27 15:51:23 +01:00 · 2025-02-27 11:02:40 +00:00 · 2025-02-27 11:57:57 +01:00 · 2025-02-27 11:43:58 +01:00 · 2025-02-27 10:56:41 +01:00
53 changed files with 1294 additions and 158 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -394,8 +394,7 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
 [[package]]
 name = "arroy"
 version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dfc5f272f38fa063bbff0a7ab5219404e221493de005e2b4078c62d626ef567e"
+source = "git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05#053807bf38dc079f25b003f19fc30fbf3613f6e7"
 dependencies = [
 "bytemuck",
 "byteorder",
@@ -414,7 +413,7 @@ dependencies = [
 [[package]]
 name = "arroy"
 version = "0.5.0"
-source = "git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05#053807bf38dc079f25b003f19fc30fbf3613f6e7"
+source = "git+https://github.com/meilisearch/arroy?rev=55fb0e8006f4f00ad3197b06bda348133f6ffffa#55fb0e8006f4f00ad3197b06bda348133f6ffffa"
 dependencies = [
 "bytemuck",
 "byteorder",
@@ -427,7 +426,7 @@ dependencies = [
 "rayon",
 "roaring",
 "tempfile",
- "thiserror 1.0.69",
+ "thiserror 2.0.9",
 ]

 [[package]]
@@ -503,7 +502,7 @@ source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2

 [[package]]
 name = "benchmarks"
-version = "1.13.0"
+version = "1.13.2"
 dependencies = [
 "anyhow",
 "bumpalo",
@@ -694,7 +693,7 @@ dependencies = [

 [[package]]
 name = "build-info"
-version = "1.13.0"
+version = "1.13.2"
 dependencies = [
 "anyhow",
 "time",
@@ -1671,7 +1670,7 @@ dependencies = [

 [[package]]
 name = "dump"
-version = "1.13.0"
+version = "1.13.2"
 dependencies = [
 "anyhow",
 "big_s",
@@ -1873,7 +1872,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"

 [[package]]
 name = "file-store"
-version = "1.13.0"
+version = "1.13.2"
 dependencies = [
 "tempfile",
 "thiserror 2.0.9",
@@ -1895,7 +1894,7 @@ dependencies = [

 [[package]]
 name = "filter-parser"
-version = "1.13.0"
+version = "1.13.2"
 dependencies = [
 "insta",
 "nom",
@@ -1915,7 +1914,7 @@ dependencies = [

 [[package]]
 name = "flatten-serde-json"
-version = "1.13.0"
+version = "1.13.2"
 dependencies = [
 "criterion",
 "serde_json",
@@ -2054,7 +2053,7 @@ dependencies = [

 [[package]]
 name = "fuzzers"
-version = "1.13.0"
+version = "1.13.2"
 dependencies = [
 "arbitrary",
 "bumpalo",
@@ -2743,10 +2742,10 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"

 [[package]]
 name = "index-scheduler"
-version = "1.13.0"
+version = "1.13.2"
 dependencies = [
 "anyhow",
- "arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "arroy 0.5.0 (git+https://github.com/meilisearch/arroy?rev=55fb0e8006f4f00ad3197b06bda348133f6ffffa)",
 "big_s",
 "bincode",
 "bumpalo",
@@ -2950,7 +2949,7 @@ dependencies = [

 [[package]]
 name = "json-depth-checker"
-version = "1.13.0"
+version = "1.13.2"
 dependencies = [
 "criterion",
 "serde_json",
@@ -3513,9 +3512,9 @@ checksum = "9374ef4228402d4b7e403e5838cb880d9ee663314b0a900d5a6aabf0c213552e"

 [[package]]
 name = "log"
-version = "0.4.21"
+version = "0.4.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
+checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e"

 [[package]]
 name = "lzma-rs"
@@ -3569,7 +3568,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"

 [[package]]
 name = "meili-snap"
-version = "1.13.0"
+version = "1.13.2"
 dependencies = [
 "insta",
 "md5",
@@ -3578,7 +3577,7 @@ dependencies = [

 [[package]]
 name = "meilisearch"
-version = "1.13.0"
+version = "1.13.2"
 dependencies = [
 "actix-cors",
 "actix-http",
@@ -3670,7 +3669,7 @@ dependencies = [

 [[package]]
 name = "meilisearch-auth"
-version = "1.13.0"
+version = "1.13.2"
 dependencies = [
 "base64 0.22.1",
 "enum-iterator",
@@ -3689,7 +3688,7 @@ dependencies = [

 [[package]]
 name = "meilisearch-types"
-version = "1.13.0"
+version = "1.13.2"
 dependencies = [
 "actix-web",
 "anyhow",
@@ -3723,7 +3722,7 @@ dependencies = [

 [[package]]
 name = "meilitool"
-version = "1.13.0"
+version = "1.13.2"
 dependencies = [
 "anyhow",
 "arroy 0.5.0 (git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05)",
@@ -3758,10 +3757,10 @@ dependencies = [

 [[package]]
 name = "milli"
-version = "1.13.0"
+version = "1.13.2"
 dependencies = [
 "allocator-api2",
- "arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "arroy 0.5.0 (git+https://github.com/meilisearch/arroy?rev=55fb0e8006f4f00ad3197b06bda348133f6ffffa)",
 "bbqueue",
 "big_s",
 "bimap",
@@ -4270,7 +4269,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"

 [[package]]
 name = "permissive-json-pointer"
-version = "1.13.0"
+version = "1.13.2"
 dependencies = [
 "big_s",
 "serde_json",
@@ -6847,7 +6846,7 @@ dependencies = [

 [[package]]
 name = "xtask"
-version = "1.13.0"
+version = "1.13.2"
 dependencies = [
 "anyhow",
 "build-info",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,7 +22,7 @@ members = [
 ]

 [workspace.package]
-version = "1.13.0"
+version = "1.13.2"
 authors = [
    "Quentin de Quelen <quentin@dequelen.me>",
    "Clément Renault <clement@meilisearch.com>",
--- a/crates/index-scheduler/Cargo.toml
+++ b/crates/index-scheduler/Cargo.toml
@@ -44,7 +44,7 @@ ureq = "2.12.1"
 uuid = { version = "1.11.0", features = ["serde", "v4"] }

 [dev-dependencies]
-arroy = "0.5.0"
+arroy = { git = "https://github.com/meilisearch/arroy", rev = "55fb0e8006f4f00ad3197b06bda348133f6ffffa" }
 big_s = "1.0.2"
 crossbeam-channel = "0.5.14"
 # fixed version due to format breakages in v1.40
--- a/crates/index-scheduler/src/index_mapper/mod.rs
+++ b/crates/index-scheduler/src/index_mapper/mod.rs
@@ -6,6 +6,7 @@ use std::{fs, thread};
 use meilisearch_types::heed::types::{SerdeJson, Str};
 use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
 use meilisearch_types::milli;
+use meilisearch_types::milli::database_stats::DatabaseStats;
 use meilisearch_types::milli::update::IndexerConfig;
 use meilisearch_types::milli::{FieldDistribution, Index};
 use serde::{Deserialize, Serialize};
@@ -98,14 +99,25 @@ pub enum IndexStatus {
 /// The statistics that can be computed from an `Index` object.
 #[derive(Serialize, Deserialize, Debug)]
 pub struct IndexStats {
-    /// Number of documents in the index.
-    pub number_of_documents: u64,
+    /// Stats of the documents database.
+    #[serde(default)]
+    pub documents_database_stats: DatabaseStats,
+
+    #[serde(default, skip_serializing)]
+    pub number_of_documents: Option<u64>,
+
    /// Size taken up by the index' DB, in bytes.
    ///
    /// This includes the size taken by both the used and free pages of the DB, and as the free pages
    /// are not returned to the disk after a deletion, this number is typically larger than
    /// `used_database_size` that only includes the size of the used pages.
    pub database_size: u64,
+    /// Number of embeddings in the index.
+    /// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch
+    pub number_of_embeddings: Option<u64>,
+    /// Number of embedded documents in the index.
+    /// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch
+    pub number_of_embedded_documents: Option<u64>,
    /// Size taken by the used pages of the index' DB, in bytes.
    ///
    /// As the DB backend does not return to the disk the pages that are not currently used by the DB,
@@ -130,8 +142,12 @@ impl IndexStats {
    ///
    /// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
    pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> {
+        let arroy_stats = index.arroy_stats(rtxn)?;
        Ok(IndexStats {
-            number_of_documents: index.number_of_documents(rtxn)?,
+            number_of_embeddings: Some(arroy_stats.number_of_embeddings),
+            number_of_embedded_documents: Some(arroy_stats.documents.len()),
+            documents_database_stats: index.documents_stats(rtxn)?.unwrap_or_default(),
+            number_of_documents: None,
            database_size: index.on_disk_size()?,
            used_database_size: index.used_size()?,
            primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()),
--- a/crates/index-scheduler/src/insta_snapshot.rs
+++ b/crates/index-scheduler/src/insta_snapshot.rs
@@ -365,7 +365,8 @@ pub fn snapshot_index_mapper(rtxn: &RoTxn, mapper: &IndexMapper) -> String {
        let stats = mapper.stats_of(rtxn, &name).unwrap();
        s.push_str(&format!(
            "{name}: {{ number_of_documents: {}, field_distribution: {:?} }}\n",
-            stats.number_of_documents, stats.field_distribution
+            stats.documents_database_stats.number_of_entries(),
+            stats.field_distribution
        ));
    }

--- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/after_processing_everything.snap
+++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/after_processing_everything.snap
@@ -1,13 +1,12 @@
 ---
 source: crates/index-scheduler/src/scheduler/test_failure.rs
-snapshot_kind: text
 ---
 ### Autobatching Enabled = true
 ### Processing batch None:
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 13, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
+0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 13, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
 1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
 2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
 3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -58,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
 [timestamp] [4,]
 ----------------------------------------------------------------------
 ### All Batches:
-0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
+0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.2"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
 1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, }
 2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
 3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
--- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/register_automatic_upgrade_task.snap
+++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/register_automatic_upgrade_task.snap
@@ -1,13 +1,12 @@
 ---
 source: crates/index-scheduler/src/scheduler/test_failure.rs
-snapshot_kind: text
 ---
 ### Autobatching Enabled = true
 ### Processing batch None:
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 13, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
+0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 13, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued [0,]
--- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/registered_a_task_while_the_upgrade_task_is_enqueued.snap
+++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/registered_a_task_while_the_upgrade_task_is_enqueued.snap
@@ -1,13 +1,12 @@
 ---
 source: crates/index-scheduler/src/scheduler/test_failure.rs
-snapshot_kind: text
 ---
 ### Autobatching Enabled = true
 ### Processing batch None:
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 13, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
+0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 13, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
 ----------------------------------------------------------------------
 ### Status:
--- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_failed.snap
+++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_failed.snap
@@ -1,13 +1,12 @@
 ---
 source: crates/index-scheduler/src/scheduler/test_failure.rs
-snapshot_kind: text
 ---
 ### Autobatching Enabled = true
 ### Processing batch None:
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 13, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
+0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 13, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
 ----------------------------------------------------------------------
 ### Status:
@@ -38,7 +37,7 @@ catto [1,]
 [timestamp] [0,]
 ----------------------------------------------------------------------
 ### All Batches:
-0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
+0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.2"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
 ----------------------------------------------------------------------
 ### Batch to tasks mapping:
 0 [0,]
--- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_failed_again.snap
+++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_failed_again.snap
@@ -1,13 +1,12 @@
 ---
 source: crates/index-scheduler/src/scheduler/test_failure.rs
-snapshot_kind: text
 ---
 ### Autobatching Enabled = true
 ### Processing batch None:
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 13, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
+0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 13, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
 2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
 ----------------------------------------------------------------------
@@ -41,7 +40,7 @@ doggo [2,]
 [timestamp] [0,]
 ----------------------------------------------------------------------
 ### All Batches:
-0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
+0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.2"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
 ----------------------------------------------------------------------
 ### Batch to tasks mapping:
 0 [0,]
--- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_succeeded.snap
+++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_succeeded.snap
@@ -1,13 +1,12 @@
 ---
 source: crates/index-scheduler/src/scheduler/test_failure.rs
-snapshot_kind: text
 ---
 ### Autobatching Enabled = true
 ### Processing batch None:
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 13, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
+0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 13, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
 2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
 3 {uid: 3, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -44,7 +43,7 @@ doggo [2,3,]
 [timestamp] [0,]
 ----------------------------------------------------------------------
 ### All Batches:
-0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
+0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.2"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
 ----------------------------------------------------------------------
 ### Batch to tasks mapping:
 0 [0,]
--- a/crates/index-scheduler/src/scheduler/test.rs
+++ b/crates/index-scheduler/src/scheduler/test.rs
@@ -903,15 +903,21 @@ fn create_and_list_index() {

    index_scheduler.index("kefir").unwrap();
    let list = index_scheduler.get_paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap();
-    snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r#"
+    snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r###"
    [
      1,
      [
        [
          "kefir",
          {
-            "number_of_documents": 0,
+            "documents_database_stats": {
+              "numberOfEntries": 0,
+              "totalKeySize": 0,
+              "totalValueSize": 0
+            },
            "database_size": "[bytes]",
+            "number_of_embeddings": 0,
+            "number_of_embedded_documents": 0,
            "used_database_size": "[bytes]",
            "primary_key": null,
            "field_distribution": {},
@@ -921,5 +927,5 @@ fn create_and_list_index() {
        ]
      ]
    ]
-    "#);
+    "###);
 }
--- a/crates/index-scheduler/src/upgrade/mod.rs
+++ b/crates/index-scheduler/src/upgrade/mod.rs
@@ -24,10 +24,11 @@ pub fn upgrade_index_scheduler(
    let current_minor = to.1;
    let current_patch = to.2;

-    let upgrade_functions: &[&dyn UpgradeIndexScheduler] = &[&V1_12_ToCurrent {}];
+    let upgrade_functions: &[&dyn UpgradeIndexScheduler] = &[&ToCurrentNoOp {}];

    let start = match from {
        (1, 12, _) => 0,
+        (1, 13, _) => 0,
        (major, minor, patch) => {
            if major > current_major
                || (major == current_major && minor > current_minor)
@@ -46,20 +47,19 @@ pub fn upgrade_index_scheduler(
        }
    };

-    let mut current_version = from;
-
    info!("Upgrading the task queue");
+    let mut local_from = from;
    for upgrade in upgrade_functions[start..].iter() {
        let target = upgrade.target_version();
        info!(
            "Upgrading from v{}.{}.{} to v{}.{}.{}",
-            from.0, from.1, from.2, current_version.0, current_version.1, current_version.2
+            local_from.0, local_from.1, local_from.2, target.0, target.1, target.2
        );
        let mut wtxn = env.write_txn()?;
-        upgrade.upgrade(env, &mut wtxn, from)?;
+        upgrade.upgrade(env, &mut wtxn, local_from)?;
        versioning.set_version(&mut wtxn, target)?;
        wtxn.commit()?;
-        current_version = target;
+        local_from = target;
    }

    let mut wtxn = env.write_txn()?;
@@ -86,9 +86,9 @@ pub fn upgrade_index_scheduler(
 }

 #[allow(non_camel_case_types)]
-struct V1_12_ToCurrent {}
+struct ToCurrentNoOp {}

-impl UpgradeIndexScheduler for V1_12_ToCurrent {
+impl UpgradeIndexScheduler for ToCurrentNoOp {
    fn upgrade(
        &self,
        _env: &Env,
--- a/crates/meilisearch/Cargo.toml
+++ b/crates/meilisearch/Cargo.toml
@@ -169,5 +169,5 @@ german = ["meilisearch-types/german"]
 turkish = ["meilisearch-types/turkish"]

 [package.metadata.mini-dashboard]
-assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.16/build.zip"
-sha1 = "68f83438a114aabbe76bc9fe480071e741996662"
+assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.17/build.zip"
+sha1 = "29e92ce25f306208a9c86f013279c736bdc1e034"
--- a/crates/meilisearch/src/lib.rs
+++ b/crates/meilisearch/src/lib.rs
@@ -364,7 +364,7 @@ fn check_version(
    let (bin_major, bin_minor, bin_patch) = binary_version;
    let (db_major, db_minor, db_patch) = get_version(&opt.db_path)?;

-    if db_major != bin_major || db_minor != bin_minor || db_patch > bin_patch {
+    if db_major != bin_major || db_minor != bin_minor || db_patch != bin_patch {
        if opt.experimental_dumpless_upgrade {
            update_version_file_for_dumpless_upgrade(
                opt,
--- a/crates/meilisearch/src/routes/indexes/mod.rs
+++ b/crates/meilisearch/src/routes/indexes/mod.rs
@@ -494,8 +494,18 @@ pub async fn delete_index(
 pub struct IndexStats {
    /// Number of documents in the index
    pub number_of_documents: u64,
+    /// Size of the documents database, in bytes.
+    pub raw_document_db_size: u64,
+    /// Average size of a document in the documents database.
+    pub avg_document_size: u64,
    /// Whether or not the index is currently ingesting document
    pub is_indexing: bool,
+    /// Number of embeddings in the index
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub number_of_embeddings: Option<u64>,
+    /// Number of embedded documents in the index
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub number_of_embedded_documents: Option<u64>,
    /// Association of every field name with the number of times it occurs in the documents.
    #[schema(value_type = HashMap<String, u64>)]
    pub field_distribution: FieldDistribution,
@@ -504,8 +514,15 @@ pub struct IndexStats {
 impl From<index_scheduler::IndexStats> for IndexStats {
    fn from(stats: index_scheduler::IndexStats) -> Self {
        IndexStats {
-            number_of_documents: stats.inner_stats.number_of_documents,
+            number_of_documents: stats
+                .inner_stats
+                .number_of_documents
+                .unwrap_or(stats.inner_stats.documents_database_stats.number_of_entries()),
+            raw_document_db_size: stats.inner_stats.documents_database_stats.total_value_size(),
+            avg_document_size: stats.inner_stats.documents_database_stats.average_value_size(),
            is_indexing: stats.is_indexing,
+            number_of_embeddings: stats.inner_stats.number_of_embeddings,
+            number_of_embedded_documents: stats.inner_stats.number_of_embedded_documents,
            field_distribution: stats.inner_stats.field_distribution,
        }
    }
@@ -524,6 +541,10 @@ impl From<index_scheduler::IndexStats> for IndexStats {
        (status = OK, description = "The stats of the index", body = IndexStats, content_type = "application/json", example = json!(
            {
                "numberOfDocuments": 10,
+                "rawDocumentDbSize": 10,
+                "avgDocumentSize": 10,
+                "numberOfEmbeddings": 10,
+                "numberOfEmbeddedDocuments": 10,
                "isIndexing": true,
                "fieldDistribution": {
                    "genre": 10,
--- a/crates/meilisearch/src/routes/mod.rs
+++ b/crates/meilisearch/src/routes/mod.rs
@@ -392,6 +392,9 @@ pub struct Stats {
                "indexes": {
                    "movies": {
                        "numberOfDocuments": 10,
+                        "rawDocumentDbSize": 100,
+                        "maxDocumentSize": 16,
+                        "avgDocumentSize": 10,
                        "isIndexing": true,
                        "fieldDistribution": {
                            "genre": 10,
--- a/crates/meilisearch/tests/documents/add_documents.rs
+++ b/crates/meilisearch/tests/documents/add_documents.rs
@@ -1803,6 +1803,275 @@ async fn add_documents_with_geo_field() {
      "finishedAt": "[date]"
    }
    "###);
+
+    let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
+
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
+    @r###"
+    {
+      "results": [
+        {
+          "id": "1"
+        },
+        {
+          "id": "2",
+          "_geo": null
+        },
+        {
+          "id": "3",
+          "_geo": {
+            "lat": 1,
+            "lng": 1
+          }
+        },
+        {
+          "id": "4",
+          "_geo": {
+            "lat": "1",
+            "lng": "1"
+          }
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 4
+    }
+    "###);
+
+    let (response, code) = index
+        .search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]}))
+        .await;
+    snapshot!(code, @"200 OK");
+    // we are expecting docs 4 and 3 first as they have geo
+    snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
+    @r###"
+    {
+      "hits": [
+        {
+          "id": "4",
+          "_geo": {
+            "lat": "1",
+            "lng": "1"
+          },
+          "_geoDistance": 5522018
+        },
+        {
+          "id": "3",
+          "_geo": {
+            "lat": 1,
+            "lng": 1
+          },
+          "_geoDistance": 5522018
+        },
+        {
+          "id": "1"
+        },
+        {
+          "id": "2",
+          "_geo": null
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[time]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 4
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn update_documents_with_geo_field() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+    index.update_settings(json!({"sortableAttributes": ["_geo"]})).await;
+
+    let documents = json!([
+        {
+            "id": "1",
+        },
+        {
+            "id": "2",
+            "_geo": null,
+        },
+        {
+            "id": "3",
+            "_geo": { "lat": 1, "lng": 1 },
+        },
+        {
+            "id": "4",
+            "_geo": { "lat": "1", "lng": "1" },
+        },
+    ]);
+
+    let (task, _status_code) = index.add_documents(documents, None).await;
+    let response = index.wait_task(task.uid()).await;
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
+        @r###"
+    {
+      "uid": 1,
+      "batchUid": 1,
+      "indexUid": "doggo",
+      "status": "succeeded",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 4,
+        "indexedDocuments": 4
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let (response, code) = index
+        .search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]}))
+        .await;
+    snapshot!(code, @"200 OK");
+    // we are expecting docs 4 and 3 first as they have geo
+    snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
+    @r###"
+    {
+      "hits": [
+        {
+          "id": "4",
+          "_geo": {
+            "lat": "1",
+            "lng": "1"
+          },
+          "_geoDistance": 5522018
+        },
+        {
+          "id": "3",
+          "_geo": {
+            "lat": 1,
+            "lng": 1
+          },
+          "_geoDistance": 5522018
+        },
+        {
+          "id": "1"
+        },
+        {
+          "id": "2",
+          "_geo": null
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[time]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 4
+    }
+    "###);
+
+    let updated_documents = json!([{
+      "id": "3",
+      "doggo": "kefir",
+    }]);
+    let (task, _status_code) = index.update_documents(updated_documents, None).await;
+    let response = index.wait_task(task.uid()).await;
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
+        @r###"
+    {
+      "uid": 2,
+      "batchUid": 2,
+      "indexUid": "doggo",
+      "status": "succeeded",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 1
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+    let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
+
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
+    @r###"
+    {
+      "results": [
+        {
+          "id": "1"
+        },
+        {
+          "id": "2",
+          "_geo": null
+        },
+        {
+          "id": "3",
+          "_geo": {
+            "lat": 1,
+            "lng": 1
+          },
+          "doggo": "kefir"
+        },
+        {
+          "id": "4",
+          "_geo": {
+            "lat": "1",
+            "lng": "1"
+          }
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 4
+    }
+    "###);
+
+    let (response, code) = index
+        .search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]}))
+        .await;
+    snapshot!(code, @"200 OK");
+    // the search response should not have changed: we are expecting docs 4 and 3 first as they have geo
+    snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
+    @r###"
+    {
+      "hits": [
+        {
+          "id": "4",
+          "_geo": {
+            "lat": "1",
+            "lng": "1"
+          },
+          "_geoDistance": 5522018
+        },
+        {
+          "id": "3",
+          "_geo": {
+            "lat": 1,
+            "lng": 1
+          },
+          "doggo": "kefir",
+          "_geoDistance": 5522018
+        },
+        {
+          "id": "1"
+        },
+        {
+          "id": "2",
+          "_geo": null
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[time]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 4
+    }
+    "###);
 }

 #[actix_rt::test]
--- a/crates/meilisearch/tests/documents/delete_documents.rs
+++ b/crates/meilisearch/tests/documents/delete_documents.rs
@@ -160,7 +160,11 @@ async fn delete_document_by_filter() {
    snapshot!(json_string!(stats), @r###"
    {
      "numberOfDocuments": 4,
+      "rawDocumentDbSize": 42,
+      "avgDocumentSize": 10,
      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
      "fieldDistribution": {
        "color": 3,
        "id": 4
@@ -207,7 +211,11 @@ async fn delete_document_by_filter() {
    snapshot!(json_string!(stats), @r###"
    {
      "numberOfDocuments": 2,
+      "rawDocumentDbSize": 16,
+      "avgDocumentSize": 8,
      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
      "fieldDistribution": {
        "color": 1,
        "id": 2
@@ -273,7 +281,11 @@ async fn delete_document_by_filter() {
    snapshot!(json_string!(stats), @r###"
    {
      "numberOfDocuments": 1,
+      "rawDocumentDbSize": 12,
+      "avgDocumentSize": 12,
      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
      "fieldDistribution": {
        "color": 1,
        "id": 1
--- a/crates/meilisearch/tests/dumps/mod.rs
+++ b/crates/meilisearch/tests/dumps/mod.rs
@@ -27,9 +27,26 @@ async fn import_dump_v1_movie_raw() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "rawDocumentDbSize": 21965,
+      "avgDocumentSize": 414,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "genres": 53,
+        "id": 53,
+        "overview": 53,
+        "poster": 53,
+        "release_date": 53,
+        "title": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -172,7 +189,11 @@ async fn import_dump_v1_movie_with_settings() {
        @r###"
    {
      "numberOfDocuments": 53,
+      "rawDocumentDbSize": 21965,
+      "avgDocumentSize": 414,
      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
      "fieldDistribution": {
        "genres": 53,
        "id": 53,
@@ -333,9 +354,26 @@ async fn import_dump_v1_rubygems_with_settings() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "rawDocumentDbSize": 8606,
+      "avgDocumentSize": 162,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "description": 53,
+        "id": 53,
+        "name": 53,
+        "summary": 53,
+        "total_downloads": 53,
+        "version": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -483,9 +521,26 @@ async fn import_dump_v2_movie_raw() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "rawDocumentDbSize": 21965,
+      "avgDocumentSize": 414,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "genres": 53,
+        "id": 53,
+        "overview": 53,
+        "poster": 53,
+        "release_date": 53,
+        "title": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -623,9 +678,26 @@ async fn import_dump_v2_movie_with_settings() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "rawDocumentDbSize": 21965,
+      "avgDocumentSize": 414,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "genres": 53,
+        "id": 53,
+        "overview": 53,
+        "poster": 53,
+        "release_date": 53,
+        "title": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -773,9 +845,26 @@ async fn import_dump_v2_rubygems_with_settings() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "rawDocumentDbSize": 8606,
+      "avgDocumentSize": 162,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "description": 53,
+        "id": 53,
+        "name": 53,
+        "summary": 53,
+        "total_downloads": 53,
+        "version": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -920,9 +1009,26 @@ async fn import_dump_v3_movie_raw() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "rawDocumentDbSize": 21965,
+      "avgDocumentSize": 414,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "genres": 53,
+        "id": 53,
+        "overview": 53,
+        "poster": 53,
+        "release_date": 53,
+        "title": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -1060,9 +1166,26 @@ async fn import_dump_v3_movie_with_settings() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "rawDocumentDbSize": 21965,
+      "avgDocumentSize": 414,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "genres": 53,
+        "id": 53,
+        "overview": 53,
+        "poster": 53,
+        "release_date": 53,
+        "title": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -1210,9 +1333,26 @@ async fn import_dump_v3_rubygems_with_settings() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "rawDocumentDbSize": 8606,
+      "avgDocumentSize": 162,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "description": 53,
+        "id": 53,
+        "name": 53,
+        "summary": 53,
+        "total_downloads": 53,
+        "version": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -1357,9 +1497,26 @@ async fn import_dump_v4_movie_raw() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "rawDocumentDbSize": 21965,
+      "avgDocumentSize": 414,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "genres": 53,
+        "id": 53,
+        "overview": 53,
+        "poster": 53,
+        "release_date": 53,
+        "title": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -1497,9 +1654,26 @@ async fn import_dump_v4_movie_with_settings() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "rawDocumentDbSize": 21965,
+      "avgDocumentSize": 414,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "genres": 53,
+        "id": 53,
+        "overview": 53,
+        "poster": 53,
+        "release_date": 53,
+        "title": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -1647,9 +1821,26 @@ async fn import_dump_v4_rubygems_with_settings() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "rawDocumentDbSize": 8606,
+      "avgDocumentSize": 162,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "description": 53,
+        "id": 53,
+        "name": 53,
+        "summary": 53,
+        "total_downloads": 53,
+        "version": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -1798,33 +1989,37 @@ async fn import_dump_v5() {
        server.wait_task(task["uid"].as_u64().unwrap()).await;
    }

-    let expected_stats = json!({
-        "numberOfDocuments": 10,
-        "isIndexing": false,
-        "fieldDistribution": {
-            "cast": 10,
-            "director": 10,
-            "genres": 10,
-            "id": 10,
-            "overview": 10,
-            "popularity": 10,
-            "poster_path": 10,
-            "producer": 10,
-            "production_companies": 10,
-            "release_date": 10,
-            "tagline": 10,
-            "title": 10,
-            "vote_average": 10,
-            "vote_count": 10
-        }
-    });
-
    let index1 = server.index("test");
    let index2 = server.index("test2");

    let (stats, code) = index1.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(stats, expected_stats);
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 10,
+      "rawDocumentDbSize": 6782,
+      "avgDocumentSize": 678,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "cast": 10,
+        "director": 10,
+        "genres": 10,
+        "id": 10,
+        "overview": 10,
+        "popularity": 10,
+        "poster_path": 10,
+        "producer": 10,
+        "production_companies": 10,
+        "release_date": 10,
+        "tagline": 10,
+        "title": 10,
+        "vote_average": 10,
+        "vote_count": 10
+      }
+    }
+    "###);

    let (docs, code) = index2.get_all_documents(GetAllDocumentsOptions::default()).await;
    snapshot!(code, @"200 OK");
@@ -1835,7 +2030,34 @@ async fn import_dump_v5() {

    let (stats, code) = index2.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(stats, expected_stats);
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 10,
+      "rawDocumentDbSize": 6782,
+      "avgDocumentSize": 678,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "cast": 10,
+        "director": 10,
+        "genres": 10,
+        "id": 10,
+        "overview": 10,
+        "popularity": 10,
+        "poster_path": 10,
+        "producer": 10,
+        "production_companies": 10,
+        "release_date": 10,
+        "tagline": 10,
+        "title": 10,
+        "vote_average": 10,
+        "vote_count": 10
+      }
+    }
+    "###);

    let (keys, code) = server.list_api_keys("").await;
    snapshot!(code, @"200 OK");
--- a/crates/meilisearch/tests/search/mod.rs
+++ b/crates/meilisearch/tests/search/mod.rs
@@ -128,6 +128,40 @@ async fn search_with_stop_word() {
          .await;
 }

+#[actix_rt::test]
+async fn search_with_typo_settings() {
+    // related to https://github.com/meilisearch/meilisearch/issues/5240
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let (_, code) = index
+        .update_settings(json!({"typoTolerance": { "disableOnAttributes": ["title", "id"]}}))
+        .await;
+    meili_snap::snapshot!(code, @"202 Accepted");
+
+    let documents = DOCUMENTS.clone();
+    let (task, _status_code) = index.add_documents(documents, None).await;
+    index.wait_task(task.uid()).await.succeeded();
+
+    index
+        .search(json!({"q": "287947" }), |response, code| {
+            assert_eq!(code, 200, "{}", response);
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "title": "Shazam!",
+                "id": "287947",
+                "color": [
+                  "green",
+                  "blue"
+                ]
+              }
+            ]
+            "###);
+        })
+        .await;
+}
+
 #[actix_rt::test]
 async fn phrase_search_with_stop_word() {
    // related to https://github.com/meilisearch/meilisearch/issues/3521
--- a/crates/meilisearch/tests/stats/mod.rs
+++ b/crates/meilisearch/tests/stats/mod.rs
@@ -1,3 +1,4 @@
+use meili_snap::{json_string, snapshot};
 use time::format_description::well_known::Rfc3339;
 use time::OffsetDateTime;

@@ -74,3 +75,269 @@ async fn stats() {
    assert_eq!(response["indexes"]["test"]["fieldDistribution"]["name"], 1);
    assert_eq!(response["indexes"]["test"]["fieldDistribution"]["age"], 1);
 }
+
+#[actix_rt::test]
+async fn add_remove_embeddings() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": {
+            "manual": {
+                "source": "userProvided",
+                "dimensions": 3,
+            },
+            "handcrafted": {
+                "source": "userProvided",
+                "dimensions": 3,
+            },
+
+          },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await.succeeded();
+
+    // 2 embedded documents for 5 embeddings in total
+    let documents = json!([
+      {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
+      {"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": [[1, 1, 1], [2, 2, 2]] }},
+    ]);
+
+    let (response, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 2,
+      "rawDocumentDbSize": 27,
+      "avgDocumentSize": 13,
+      "isIndexing": false,
+      "numberOfEmbeddings": 5,
+      "numberOfEmbeddedDocuments": 2,
+      "fieldDistribution": {
+        "id": 2,
+        "name": 2
+      }
+    }
+    "###);
+
+    // 2 embedded documents for 3 embeddings in total
+    let documents = json!([
+      {"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": null }},
+    ]);
+
+    let (response, code) = index.update_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 2,
+      "rawDocumentDbSize": 27,
+      "avgDocumentSize": 13,
+      "isIndexing": false,
+      "numberOfEmbeddings": 3,
+      "numberOfEmbeddedDocuments": 2,
+      "fieldDistribution": {
+        "id": 2,
+        "name": 2
+      }
+    }
+    "###);
+
+    // 2 embedded documents for 2 embeddings in total
+    let documents = json!([
+        {"id": 0, "name": "kefir", "_vectors": { "manual": null, "handcrafted": [0, 0, 0] }},
+    ]);
+
+    let (response, code) = index.update_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 2,
+      "rawDocumentDbSize": 27,
+      "avgDocumentSize": 13,
+      "isIndexing": false,
+      "numberOfEmbeddings": 2,
+      "numberOfEmbeddedDocuments": 2,
+      "fieldDistribution": {
+        "id": 2,
+        "name": 2
+      }
+    }
+    "###);
+
+    // 1 embedded documents for 2 embeddings in total
+    let documents = json!([
+        {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
+        {"id": 1, "name": "echo", "_vectors": { "manual": null, "handcrafted": null }},
+    ]);
+
+    let (response, code) = index.update_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 2,
+      "rawDocumentDbSize": 27,
+      "avgDocumentSize": 13,
+      "isIndexing": false,
+      "numberOfEmbeddings": 2,
+      "numberOfEmbeddedDocuments": 1,
+      "fieldDistribution": {
+        "id": 2,
+        "name": 2
+      }
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn add_remove_embedded_documents() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": {
+            "manual": {
+                "source": "userProvided",
+                "dimensions": 3,
+            },
+            "handcrafted": {
+                "source": "userProvided",
+                "dimensions": 3,
+            },
+
+          },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await.succeeded();
+
+    // 2 embedded documents for 5 embeddings in total
+    let documents = json!([
+      {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
+      {"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": [[1, 1, 1], [2, 2, 2]] }},
+    ]);
+
+    let (response, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 2,
+      "rawDocumentDbSize": 27,
+      "avgDocumentSize": 13,
+      "isIndexing": false,
+      "numberOfEmbeddings": 5,
+      "numberOfEmbeddedDocuments": 2,
+      "fieldDistribution": {
+        "id": 2,
+        "name": 2
+      }
+    }
+    "###);
+
+    // delete one embedded document, remaining 1 embedded documents for 3 embeddings in total
+    let (response, code) = index.delete_document(0).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 1,
+      "rawDocumentDbSize": 13,
+      "avgDocumentSize": 13,
+      "isIndexing": false,
+      "numberOfEmbeddings": 3,
+      "numberOfEmbeddedDocuments": 1,
+      "fieldDistribution": {
+        "id": 1,
+        "name": 1
+      }
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn update_embedder_settings() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+
+    // 2 embedded documents for 3 embeddings in total
+    // but no embedders are added in the settings yet so we expect 0 embedded documents for 0 embeddings in total
+    let documents = json!([
+      {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
+      {"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": null }},
+    ]);
+
+    let (response, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 2,
+      "rawDocumentDbSize": 108,
+      "avgDocumentSize": 54,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "id": 2,
+        "name": 2
+      }
+    }
+    "###);
+
+    // add embedders to the settings
+    // 2 embedded documents for 3 embeddings in total
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": {
+            "manual": {
+                "source": "userProvided",
+                "dimensions": 3,
+            },
+            "handcrafted": {
+                "source": "userProvided",
+                "dimensions": 3,
+            },
+
+          },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 2,
+      "rawDocumentDbSize": 108,
+      "avgDocumentSize": 54,
+      "isIndexing": false,
+      "numberOfEmbeddings": 3,
+      "numberOfEmbeddedDocuments": 2,
+      "fieldDistribution": {
+        "id": 2,
+        "name": 2
+      }
+    }
+    "###);
+}
--- a/crates/meilisearch/tests/upgrade/mod.rs
+++ b/crates/meilisearch/tests/upgrade/mod.rs
@@ -43,7 +43,7 @@ async fn version_too_old() {
    std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap();
    let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
    let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
-    snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.13.0");
+    snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.13.2");
 }

 #[actix_rt::test]
@@ -58,7 +58,7 @@ async fn version_requires_downgrade() {
    std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap();
    let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
    let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
-    snapshot!(err, @"Database version 1.13.1 is higher than the Meilisearch version 1.13.0. Downgrade is not supported");
+    snapshot!(err, @"Database version 1.13.3 is higher than the Meilisearch version 1.13.2. Downgrade is not supported");
 }

 #[actix_rt::test]
--- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41.snap
+++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41.snap
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
      "progress": null,
      "details": {
        "upgradeFrom": "v1.12.0",
-        "upgradeTo": "v1.13.0"
+        "upgradeTo": "v1.13.2"
      },
      "stats": {
        "totalNbTasks": 1,
--- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41.snap
+++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41.snap
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
      "progress": null,
      "details": {
        "upgradeFrom": "v1.12.0",
-        "upgradeTo": "v1.13.0"
+        "upgradeTo": "v1.13.2"
      },
      "stats": {
        "totalNbTasks": 1,
--- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterStartedAt_equal_2025-01-16T16_47_41.snap
+++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterStartedAt_equal_2025-01-16T16_47_41.snap
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
      "progress": null,
      "details": {
        "upgradeFrom": "v1.12.0",
-        "upgradeTo": "v1.13.0"
+        "upgradeTo": "v1.13.2"
      },
      "stats": {
        "totalNbTasks": 1,
--- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41.snap
+++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41.snap
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
      "canceledBy": null,
      "details": {
        "upgradeFrom": "v1.12.0",
-        "upgradeTo": "v1.13.0"
+        "upgradeTo": "v1.13.2"
      },
      "error": null,
      "duration": "[duration]",
--- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterFinishedAt_equal_2025-01-16T16_47_41.snap
+++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterFinishedAt_equal_2025-01-16T16_47_41.snap
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
      "canceledBy": null,
      "details": {
        "upgradeFrom": "v1.12.0",
-        "upgradeTo": "v1.13.0"
+        "upgradeTo": "v1.13.2"
      },
      "error": null,
      "duration": "[duration]",
--- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterStartedAt_equal_2025-01-16T16_47_41.snap
+++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterStartedAt_equal_2025-01-16T16_47_41.snap
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
      "canceledBy": null,
      "details": {
        "upgradeFrom": "v1.12.0",
-        "upgradeTo": "v1.13.0"
+        "upgradeTo": "v1.13.2"
      },
      "error": null,
      "duration": "[duration]",
--- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/the_whole_batch_queue_once_everything_has_been_processed.snap
+++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/the_whole_batch_queue_once_everything_has_been_processed.snap
@@ -1,6 +1,5 @@
 ---
 source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
-snapshot_kind: text
 ---
 {
  "results": [
@@ -9,7 +8,7 @@ snapshot_kind: text
      "progress": null,
      "details": {
        "upgradeFrom": "v1.12.0",
-        "upgradeTo": "v1.13.0"
+        "upgradeTo": "v1.13.2"
      },
      "stats": {
        "totalNbTasks": 1,
--- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/the_whole_task_queue_once_everything_has_been_processed.snap
+++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/the_whole_task_queue_once_everything_has_been_processed.snap
@@ -1,6 +1,5 @@
 ---
 source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
-snapshot_kind: text
 ---
 {
  "results": [
@@ -13,7 +12,7 @@ snapshot_kind: text
      "canceledBy": null,
      "details": {
        "upgradeFrom": "v1.12.0",
-        "upgradeTo": "v1.13.0"
+        "upgradeTo": "v1.13.2"
      },
      "error": null,
      "duration": "[duration]",
--- a/crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
+++ b/crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
@@ -134,7 +134,11 @@ async fn check_the_index_scheduler(server: &Server) {
      "indexes": {
        "kefir": {
          "numberOfDocuments": 1,
+          "rawDocumentDbSize": 109,
+          "avgDocumentSize": 109,
          "isIndexing": false,
+          "numberOfEmbeddings": 0,
+          "numberOfEmbeddedDocuments": 0,
          "fieldDistribution": {
            "age": 1,
            "description": 1,
@@ -214,7 +218,11 @@ async fn check_the_index_scheduler(server: &Server) {
      "indexes": {
        "kefir": {
          "numberOfDocuments": 1,
+          "rawDocumentDbSize": 109,
+          "avgDocumentSize": 109,
          "isIndexing": false,
+          "numberOfEmbeddings": 0,
+          "numberOfEmbeddedDocuments": 0,
          "fieldDistribution": {
            "age": 1,
            "description": 1,
@@ -228,10 +236,14 @@ async fn check_the_index_scheduler(server: &Server) {
    "###);
    let index = server.index("kefir");
    let (stats, _) = index.stats().await;
-    snapshot!(stats, @r#"
+    snapshot!(stats, @r###"
    {
      "numberOfDocuments": 1,
+      "rawDocumentDbSize": 109,
+      "avgDocumentSize": 109,
      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
      "fieldDistribution": {
        "age": 1,
        "description": 1,
@@ -240,7 +252,7 @@ async fn check_the_index_scheduler(server: &Server) {
        "surname": 1
      }
    }
-    "#);
+    "###);

    // Delete all the tasks of a specific batch
    let (task, _) = server.delete_tasks("batchUids=10").await;
--- a/crates/milli/Cargo.toml
+++ b/crates/milli/Cargo.toml
@@ -85,7 +85,7 @@ rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838
    "no_time",
    "sync",
 ] }
-arroy = "0.5.0"
+arroy = { git = "https://github.com/meilisearch/arroy", rev = "55fb0e8006f4f00ad3197b06bda348133f6ffffa" }
 rand = "0.8.5"
 tracing = "0.1.41"
 ureq = { version = "2.12.1", features = ["json"] }
--- a/crates/milli/src/database_stats.rs
+++ b/crates/milli/src/database_stats.rs
@@ -0,0 +1,96 @@
+use heed::types::Bytes;
+use heed::Database;
+use heed::RoTxn;
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
+#[serde(rename_all = "camelCase")]
+/// The stats of a database.
+pub struct DatabaseStats {
+    /// The number of entries in the database.
+    number_of_entries: u64,
+    /// The total size of the keys in the database.
+    total_key_size: u64,
+    /// The total size of the values in the database.
+    total_value_size: u64,
+}
+
+impl DatabaseStats {
+    /// Returns the stats of the database.
+    ///
+    /// This function iterates over the whole database and computes the stats.
+    /// It is not efficient and should be cached somewhere.
+    pub(crate) fn new(database: Database<Bytes, Bytes>, rtxn: &RoTxn<'_>) -> heed::Result<Self> {
+        let mut database_stats =
+            Self { number_of_entries: 0, total_key_size: 0, total_value_size: 0 };
+
+        let mut iter = database.iter(rtxn)?;
+        while let Some((key, value)) = iter.next().transpose()? {
+            let key_size = key.len() as u64;
+            let value_size = value.len() as u64;
+            database_stats.total_key_size += key_size;
+            database_stats.total_value_size += value_size;
+        }
+
+        database_stats.number_of_entries = database.len(rtxn)?;
+
+        Ok(database_stats)
+    }
+
+    /// Recomputes the stats of the database and returns the new stats.
+    ///
+    /// This function is used to update the stats of the database when some keys are modified.
+    /// It is more efficient than the `new` function because it does not iterate over the whole database but only the modified keys comparing the before and after states.
+    pub(crate) fn recompute<I, K>(
+        mut stats: Self,
+        database: Database<Bytes, Bytes>,
+        before_rtxn: &RoTxn<'_>,
+        after_rtxn: &RoTxn<'_>,
+        modified_keys: I,
+    ) -> heed::Result<Self>
+    where
+        I: IntoIterator<Item = K>,
+        K: AsRef<[u8]>,
+    {
+        for key in modified_keys {
+            let key = key.as_ref();
+            if let Some(value) = database.get(after_rtxn, key)? {
+                let key_size = key.len() as u64;
+                let value_size = value.len() as u64;
+                stats.total_key_size = stats.total_key_size.saturating_add(key_size);
+                stats.total_value_size = stats.total_value_size.saturating_add(value_size);
+            }
+
+            if let Some(value) = database.get(before_rtxn, key)? {
+                let key_size = key.len() as u64;
+                let value_size = value.len() as u64;
+                stats.total_key_size = stats.total_key_size.saturating_sub(key_size);
+                stats.total_value_size = stats.total_value_size.saturating_sub(value_size);
+            }
+        }
+
+        stats.number_of_entries = database.len(after_rtxn)?;
+
+        Ok(stats)
+    }
+
+    pub fn average_key_size(&self) -> u64 {
+        self.total_key_size.checked_div(self.number_of_entries).unwrap_or(0)
+    }
+
+    pub fn average_value_size(&self) -> u64 {
+        self.total_value_size.checked_div(self.number_of_entries).unwrap_or(0)
+    }
+
+    pub fn number_of_entries(&self) -> u64 {
+        self.number_of_entries
+    }
+
+    pub fn total_key_size(&self) -> u64 {
+        self.total_key_size
+    }
+
+    pub fn total_value_size(&self) -> u64 {
+        self.total_value_size
+    }
+}
--- a/crates/milli/src/index.rs
+++ b/crates/milli/src/index.rs
@@ -11,6 +11,7 @@ use rstar::RTree;
 use serde::{Deserialize, Serialize};

 use crate::constants::{self, RESERVED_VECTORS_FIELD_NAME};
+use crate::database_stats::DatabaseStats;
 use crate::documents::PrimaryKey;
 use crate::error::{InternalError, UserError};
 use crate::fields_ids_map::FieldsIdsMap;
@@ -22,7 +23,7 @@ use crate::heed_codec::version::VersionCodec;
 use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
 use crate::order_by_map::OrderByMap;
 use crate::proximity::ProximityPrecision;
-use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig};
+use crate::vector::{ArroyStats, ArroyWrapper, Embedding, EmbeddingConfig};
 use crate::{
    default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
    FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
@@ -74,6 +75,7 @@ pub mod main_key {
    pub const LOCALIZED_ATTRIBUTES_RULES: &str = "localized_attributes_rules";
    pub const FACET_SEARCH: &str = "facet_search";
    pub const PREFIX_SEARCH: &str = "prefix_search";
+    pub const DOCUMENTS_STATS: &str = "documents_stats";
 }

 pub mod db_name {
@@ -403,6 +405,58 @@ impl Index {
        Ok(count.unwrap_or_default())
    }

+    /// Updates the stats of the documents database based on the previous stats and the modified docids.
+    pub fn update_documents_stats(
+        &self,
+        wtxn: &mut RwTxn<'_>,
+        modified_docids: roaring::RoaringBitmap,
+    ) -> Result<()> {
+        let before_rtxn = self.read_txn()?;
+        let document_stats = match self.documents_stats(&before_rtxn)? {
+            Some(before_stats) => DatabaseStats::recompute(
+                before_stats,
+                self.documents.remap_types(),
+                &before_rtxn,
+                wtxn,
+                modified_docids.iter().map(|docid| docid.to_be_bytes()),
+            )?,
+            None => {
+                // This should never happen when there are already documents in the index, the documents stats should be present.
+                // If it happens, it means that the index was not properly initialized/upgraded.
+                debug_assert_eq!(
+                    self.documents.len(&before_rtxn)?,
+                    0,
+                    "The documents stats should be present when there are documents in the index"
+                );
+                tracing::warn!("No documents stats found, creating new ones");
+                DatabaseStats::new(self.documents.remap_types(), &*wtxn)?
+            }
+        };
+
+        self.put_documents_stats(wtxn, document_stats)?;
+        Ok(())
+    }
+
+    /// Writes the stats of the documents database.
+    pub fn put_documents_stats(
+        &self,
+        wtxn: &mut RwTxn<'_>,
+        stats: DatabaseStats,
+    ) -> heed::Result<()> {
+        self.main.remap_types::<Str, SerdeJson<DatabaseStats>>().put(
+            wtxn,
+            main_key::DOCUMENTS_STATS,
+            &stats,
+        )
+    }
+
+    /// Returns the stats of the documents database.
+    pub fn documents_stats(&self, rtxn: &RoTxn<'_>) -> heed::Result<Option<DatabaseStats>> {
+        self.main
+            .remap_types::<Str, SerdeJson<DatabaseStats>>()
+            .get(rtxn, main_key::DOCUMENTS_STATS)
+    }
+
    /* primary key */

    /// Writes the documents primary key, this is the field name that is used to store the id.
@@ -1731,6 +1785,18 @@ impl Index {
        let compute_prefixes = self.prefix_search(rtxn)?.unwrap_or_default();
        Ok(PrefixSettings { compute_prefixes, max_prefix_length: 4, prefix_count_threshold: 100 })
    }
+
+    pub fn arroy_stats(&self, rtxn: &RoTxn<'_>) -> Result<ArroyStats> {
+        let mut stats = ArroyStats::default();
+        let embedding_configs = self.embedding_configs(rtxn)?;
+        for config in embedding_configs {
+            let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap();
+            let reader =
+                ArroyWrapper::new(self.vector_arroy, embedder_id, config.config.quantized());
+            reader.aggregate_stats(rtxn, &mut stats)?;
+        }
+        Ok(stats)
+    }
 }

 #[derive(Debug, Deserialize, Serialize)]
--- a/crates/milli/src/lib.rs
+++ b/crates/milli/src/lib.rs
@@ -10,6 +10,7 @@ pub mod documents;

 mod asc_desc;
 mod criterion;
+pub mod database_stats;
 mod error;
 mod external_documents_ids;
 pub mod facet;
--- a/crates/milli/src/search/new/query_term/compute_derivations.rs
+++ b/crates/milli/src/search/new/query_term/compute_derivations.rs
@@ -215,7 +215,7 @@ pub fn partially_initialized_term_from_word(
    let mut zero_typo = None;
    let mut prefix_of = BTreeSet::new();

-    if fst.contains(word) {
+    if fst.contains(word) || ctx.index.exact_word_docids.get(ctx.txn, word)?.is_some() {
        zero_typo = Some(word_interned);
    }

--- a/crates/milli/src/update/index_documents/mod.rs
+++ b/crates/milli/src/update/index_documents/mod.rs
@@ -307,6 +307,7 @@ where
        let current_span = tracing::Span::current();

        // Run extraction pipeline in parallel.
+        let mut modified_docids = RoaringBitmap::new();
        pool.install(|| {
                let settings_diff_cloned = settings_diff.clone();
                rayon::spawn(move || {
@@ -367,7 +368,7 @@ where
                        Err(status) => {
                            if let Some(typed_chunks) = chunk_accumulator.pop_longest() {
                                let (docids, is_merged_database) =
-                                    write_typed_chunk_into_index(self.wtxn, self.index, &settings_diff, typed_chunks)?;
+                                    write_typed_chunk_into_index(self.wtxn, self.index, &settings_diff, typed_chunks, &mut modified_docids)?;
                                if !docids.is_empty() {
                                    final_documents_ids |= docids;
                                    let documents_seen_count = final_documents_ids.len();
@@ -467,6 +468,10 @@ where
                Ok(())
            }).map_err(InternalError::from)??;

+        if !settings_diff.settings_update_only {
+            // Update the stats of the documents database when there is a document update.
+            self.index.update_documents_stats(self.wtxn, modified_docids)?;
+        }
        // We write the field distribution into the main database
        self.index.put_field_distribution(self.wtxn, &field_distribution)?;

--- a/crates/milli/src/update/index_documents/typed_chunk.rs
+++ b/crates/milli/src/update/index_documents/typed_chunk.rs
@@ -129,6 +129,7 @@ pub(crate) fn write_typed_chunk_into_index(
    index: &Index,
    settings_diff: &InnerIndexSettingsDiff,
    typed_chunks: Vec<TypedChunk>,
+    modified_docids: &mut RoaringBitmap,
 ) -> Result<(RoaringBitmap, bool)> {
    let mut is_merged_database = false;
    match typed_chunks[0] {
@@ -214,6 +215,7 @@ pub(crate) fn write_typed_chunk_into_index(
                        kind: DocumentOperationKind::Create,
                    });
                    docids.insert(docid);
+                    modified_docids.insert(docid);
                } else {
                    db.delete(wtxn, &docid)?;
                    operations.push(DocumentOperation {
@@ -222,6 +224,7 @@ pub(crate) fn write_typed_chunk_into_index(
                        kind: DocumentOperationKind::Delete,
                    });
                    docids.remove(docid);
+                    modified_docids.insert(docid);
                }
            }
            let external_documents_docids = index.external_documents_ids();
--- a/crates/milli/src/update/new/document_change.rs
+++ b/crates/milli/src/update/new/document_change.rs
@@ -144,7 +144,7 @@ impl<'doc> Update<'doc> {
        )?)
    }

-    pub fn updated(&self) -> DocumentFromVersions<'_, 'doc> {
+    pub fn only_changed_fields(&self) -> DocumentFromVersions<'_, 'doc> {
        DocumentFromVersions::new(&self.new)
    }

@@ -182,7 +182,7 @@ impl<'doc> Update<'doc> {
        let mut cached_current = None;
        let mut updated_selected_field_count = 0;

-        for entry in self.updated().iter_top_level_fields() {
+        for entry in self.only_changed_fields().iter_top_level_fields() {
            let (key, updated_value) = entry?;

            if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
@@ -241,7 +241,7 @@ impl<'doc> Update<'doc> {
        Ok(has_deleted_fields)
    }

-    pub fn updated_vectors(
+    pub fn only_changed_vectors(
        &self,
        doc_alloc: &'doc Bump,
        embedders: &'doc EmbeddingConfigs,
--- a/crates/milli/src/update/new/extract/cache.rs
+++ b/crates/milli/src/update/new/extract/cache.rs
@@ -711,15 +711,17 @@ impl DelAddRoaringBitmap {
        DelAddRoaringBitmap { del, add }
    }

-    pub fn apply_to(&self, documents_ids: &mut RoaringBitmap) {
+    pub fn apply_to(&self, documents_ids: &mut RoaringBitmap, modified_docids: &mut RoaringBitmap) {
        let DelAddRoaringBitmap { del, add } = self;

        if let Some(del) = del {
            *documents_ids -= del;
+            *modified_docids |= del;
        }

        if let Some(add) = add {
            *documents_ids |= add;
+            *modified_docids |= add;
        }
    }
 }
--- a/crates/milli/src/update/new/extract/geo/mod.rs
+++ b/crates/milli/src/update/new/extract/geo/mod.rs
@@ -199,7 +199,7 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor {
                        .transpose()?;

                    let updated_geo = update
-                        .updated()
+                        .merged(rtxn, index, db_fields_ids_map)?
                        .geo_field()?
                        .map(|geo| extract_geo_coordinates(external_id, geo))
                        .transpose()?;
--- a/crates/milli/src/update/new/extract/vectors/mod.rs
+++ b/crates/milli/src/update/new/extract/vectors/mod.rs
@@ -99,7 +99,8 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
                        context.db_fields_ids_map,
                        &context.doc_alloc,
                    )?;
-                    let new_vectors = update.updated_vectors(&context.doc_alloc, self.embedders)?;
+                    let new_vectors =
+                        update.only_changed_vectors(&context.doc_alloc, self.embedders)?;

                    if let Some(new_vectors) = &new_vectors {
                        unused_vectors_distribution.append(new_vectors)?;
--- a/crates/milli/src/update/new/indexer/extract.rs
+++ b/crates/milli/src/update/new/indexer/extract.rs
@@ -32,6 +32,7 @@ pub(super) fn extract_all<'pl, 'extractor, DC, MSP>(
    field_distribution: &mut BTreeMap<String, u64>,
    mut index_embeddings: Vec<IndexEmbeddingConfig>,
    document_ids: &mut RoaringBitmap,
+    modified_docids: &mut RoaringBitmap,
 ) -> Result<(FacetFieldIdsDelta, Vec<IndexEmbeddingConfig>)>
 where
    DC: DocumentChanges<'pl>,
@@ -70,7 +71,7 @@ where
                // adding the delta should never cause a negative result, as we are removing fields that previously existed.
                *current = current.saturating_add_signed(delta);
            }
-            document_extractor_data.docids_delta.apply_to(document_ids);
+            document_extractor_data.docids_delta.apply_to(document_ids, modified_docids);
        }

        field_distribution.retain(|_, v| *v != 0);
@@ -256,7 +257,7 @@ where
                    let Some(deladd) = data.remove(&config.name) else {
                        continue 'data;
                    };
-                    deladd.apply_to(&mut config.user_provided);
+                    deladd.apply_to(&mut config.user_provided, modified_docids);
                }
            }
        }
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -129,6 +129,7 @@ where
    let index_embeddings = index.embedding_configs(wtxn)?;
    let mut field_distribution = index.field_distribution(wtxn)?;
    let mut document_ids = index.documents_ids(wtxn)?;
+    let mut modified_docids = roaring::RoaringBitmap::new();

    thread::scope(|s| -> Result<()> {
        let indexer_span = tracing::Span::current();
@@ -137,6 +138,7 @@ where
        // prevent moving the field_distribution and document_ids in the inner closure...
        let field_distribution = &mut field_distribution;
        let document_ids = &mut document_ids;
+        let modified_docids = &mut modified_docids;
        let extractor_handle =
            Builder::new().name(S("indexer-extractors")).spawn_scoped(s, move || {
                pool.install(move || {
@@ -151,6 +153,7 @@ where
                        field_distribution,
                        index_embeddings,
                        document_ids,
+                        modified_docids,
                    )
                })
                .unwrap()
@@ -225,6 +228,7 @@ where
        embedders,
        field_distribution,
        document_ids,
+        modified_docids,
    )?;

    Ok(())
--- a/crates/milli/src/update/new/indexer/write.rs
+++ b/crates/milli/src/update/new/indexer/write.rs
@@ -113,6 +113,7 @@ where
    Ok(())
 }

+#[allow(clippy::too_many_arguments)]
 pub(super) fn update_index(
    index: &Index,
    wtxn: &mut RwTxn<'_>,
@@ -121,6 +122,7 @@ pub(super) fn update_index(
    embedders: EmbeddingConfigs,
    field_distribution: std::collections::BTreeMap<String, u64>,
    document_ids: roaring::RoaringBitmap,
+    modified_docids: roaring::RoaringBitmap,
 ) -> Result<()> {
    index.put_fields_ids_map(wtxn, new_fields_ids_map.as_fields_ids_map())?;
    if let Some(new_primary_key) = new_primary_key {
@@ -132,6 +134,7 @@ pub(super) fn update_index(
    index.put_field_distribution(wtxn, &field_distribution)?;
    index.put_documents_ids(wtxn, &document_ids)?;
    index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
+    index.update_documents_stats(wtxn, modified_docids)?;
    Ok(())
 }

--- a/crates/milli/src/update/upgrade/mod.rs
+++ b/crates/milli/src/update/upgrade/mod.rs
@@ -1,7 +1,9 @@
 mod v1_12;
+mod v1_13;

 use heed::RwTxn;
-use v1_12::{V1_12_3_To_Current, V1_12_To_V1_12_3};
+use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3};
+use v1_13::{V1_13_0_To_V1_13_1, V1_13_1_To_Current};

 use crate::progress::{Progress, VariableNameStep};
 use crate::{Index, InternalError, Result};
@@ -26,13 +28,19 @@ pub fn upgrade(
    progress: Progress,
 ) -> Result<bool> {
    let from = index.get_version(wtxn)?.unwrap_or(db_version);
-    let upgrade_functions: &[&dyn UpgradeIndex] = &[&V1_12_To_V1_12_3 {}, &V1_12_3_To_Current()];
+    let upgrade_functions: &[&dyn UpgradeIndex] = &[
+        &V1_12_To_V1_12_3 {},
+        &V1_12_3_To_V1_13_0 {},
+        &V1_13_0_To_V1_13_1 {},
+        &V1_13_1_To_Current {},
+    ];

    let start = match from {
        (1, 12, 0..=2) => 0,
        (1, 12, 3..) => 1,
+        (1, 13, 0) => 2,
        // We must handle the current version in the match because in case of a failure some index may have been upgraded but not other.
-        (1, 13, _) => return Ok(false),
+        (1, 13, _) => 3,
        (major, minor, patch) => {
            return Err(InternalError::CannotUpgradeToVersion(major, minor, patch).into())
        }
--- a/crates/milli/src/update/upgrade/v1_12.rs
+++ b/crates/milli/src/update/upgrade/v1_12.rs
@@ -1,7 +1,6 @@
 use heed::RwTxn;

 use super::UpgradeIndex;
-use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
 use crate::progress::Progress;
 use crate::{make_enum_progress, Index, Result};

@@ -32,9 +31,9 @@ impl UpgradeIndex for V1_12_To_V1_12_3 {
 }

 #[allow(non_camel_case_types)]
-pub(super) struct V1_12_3_To_Current();
+pub(super) struct V1_12_3_To_V1_13_0 {}

-impl UpgradeIndex for V1_12_3_To_Current {
+impl UpgradeIndex for V1_12_3_To_V1_13_0 {
    fn upgrade(
        &self,
        _wtxn: &mut RwTxn,
@@ -42,14 +41,11 @@ impl UpgradeIndex for V1_12_3_To_Current {
        _original: (u32, u32, u32),
        _progress: Progress,
    ) -> Result<bool> {
-        Ok(false)
+        // recompute the indexes stats
+        Ok(true)
    }

    fn target_version(&self) -> (u32, u32, u32) {
-        (
-            VERSION_MAJOR.parse().unwrap(),
-            VERSION_MINOR.parse().unwrap(),
-            VERSION_PATCH.parse().unwrap(),
-        )
+        (1, 13, 0)
    }
 }
--- a/crates/milli/src/update/upgrade/v1_13.rs
+++ b/crates/milli/src/update/upgrade/v1_13.rs
@@ -0,0 +1,60 @@
+use heed::RwTxn;
+
+use super::UpgradeIndex;
+use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
+use crate::database_stats::DatabaseStats;
+use crate::progress::Progress;
+use crate::{make_enum_progress, Index, Result};
+
+#[allow(non_camel_case_types)]
+pub(super) struct V1_13_0_To_V1_13_1();
+
+impl UpgradeIndex for V1_13_0_To_V1_13_1 {
+    fn upgrade(
+        &self,
+        wtxn: &mut RwTxn,
+        index: &Index,
+        _original: (u32, u32, u32),
+        progress: Progress,
+    ) -> Result<bool> {
+        make_enum_progress! {
+            enum DocumentsStats {
+                CreatingDocumentsStats,
+            }
+        };
+
+        // Create the new documents stats.
+        progress.update_progress(DocumentsStats::CreatingDocumentsStats);
+        let stats = DatabaseStats::new(index.documents.remap_types(), wtxn)?;
+        index.put_documents_stats(wtxn, stats)?;
+
+        Ok(true)
+    }
+
+    fn target_version(&self) -> (u32, u32, u32) {
+        (1, 13, 1)
+    }
+}
+
+#[allow(non_camel_case_types)]
+pub(super) struct V1_13_1_To_Current();
+
+impl UpgradeIndex for V1_13_1_To_Current {
+    fn upgrade(
+        &self,
+        _wtxn: &mut RwTxn,
+        _index: &Index,
+        _original: (u32, u32, u32),
+        _progress: Progress,
+    ) -> Result<bool> {
+        Ok(false)
+    }
+
+    fn target_version(&self) -> (u32, u32, u32) {
+        (
+            VERSION_MAJOR.parse().unwrap(),
+            VERSION_MINOR.parse().unwrap(),
+            VERSION_PATCH.parse().unwrap(),
+        )
+    }
+}
--- a/crates/milli/src/vector/mod.rs
+++ b/crates/milli/src/vector/mod.rs
@@ -410,8 +410,43 @@ impl ArroyWrapper {
    fn quantized_db(&self) -> arroy::Database<BinaryQuantizedCosine> {
        self.database.remap_data_type()
    }
+
+    pub fn aggregate_stats(
+        &self,
+        rtxn: &RoTxn,
+        stats: &mut ArroyStats,
+    ) -> Result<(), arroy::Error> {
+        if self.quantized {
+            for reader in self.readers(rtxn, self.quantized_db()) {
+                let reader = reader?;
+                let documents = reader.item_ids();
+                if documents.is_empty() {
+                    break;
+                }
+                stats.documents |= documents;
+                stats.number_of_embeddings += documents.len();
+            }
+        } else {
+            for reader in self.readers(rtxn, self.angular_db()) {
+                let reader = reader?;
+                let documents = reader.item_ids();
+                if documents.is_empty() {
+                    break;
+                }
+                stats.documents |= documents;
+                stats.number_of_embeddings += documents.len();
+            }
+        }
+
+        Ok(())
+    }
 }

+#[derive(Debug, Default, Clone)]
+pub struct ArroyStats {
+    pub number_of_embeddings: u64,
+    pub documents: RoaringBitmap,
+}
 /// One or multiple embeddings stored consecutively in a flat vector.
 pub struct Embeddings<F> {
    data: Vec<F>,
--- a/crates/milli/src/vector/rest.rs
+++ b/crates/milli/src/vector/rest.rs
@@ -130,6 +130,7 @@ impl Embedder {
        let client = ureq::AgentBuilder::new()
            .max_idle_connections(REQUEST_PARALLELISM * 2)
            .max_idle_connections_per_host(REQUEST_PARALLELISM * 2)
+            .timeout(std::time::Duration::from_secs(30))
            .build();

        let request = Request::new(options.request)?;
--- a/workloads/hackernews-modify-facet-numbers.json
+++ b/workloads/hackernews-modify-facet-numbers.json
@@ -31,7 +31,7 @@
    "hackernews-modified-number-filters.ndjson": {
      "local_location": null,
      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01-modified-filters.ndjson",
-      "sha256": "7272cbfd41110d32d7fe168424a0000f07589bfe40f664652b34f4f20aaf3802"
+      "sha256": "b80c245ce1b1df80b9b38800f677f3bd11947ebc62716fb108269d50e796c35c"
    }
  },
  "precommands": [
--- a/workloads/hackernews-modify-facet-strings.json
+++ b/workloads/hackernews-modify-facet-strings.json
@@ -31,7 +31,7 @@
    "hackernews-modified-string-filters.ndjson": {
      "local_location": null,
      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02-modified-filters.ndjson",
-      "sha256": "b80c245ce1b1df80b9b38800f677f3bd11947ebc62716fb108269d50e796c35c"
+      "sha256": "7272cbfd41110d32d7fe168424a0000f07589bfe40f664652b34f4f20aaf3802"
    }
  },
  "precommands": [
Author	SHA1	Message	Date
Tamo	c0ea1a2c5a	change the version of arroy	2025-02-27 15:51:23 +01:00
meili-bors[bot]	296ca1d58f	Merge #5376 Some checks failed Test suite / Tests almost all features (push) Has been skipped Test suite / Test disabled tokenization (push) Has been skipped Test suite / Tests on ubuntu-20.04 (push) Failing after 15s Test suite / Run tests in debug (push) Failing after 15s Test suite / Run Clippy (push) Failing after 17s Test suite / Run Rustfmt (push) Failing after 7m2s Test suite / Tests on macos-13 (push) Has been cancelled Test suite / Tests on windows-2022 (push) Has been cancelled 5376: Support dumpless upgrade for all v1.13 patches r=Kerollmops a=dureuill # Pull Request ## Related issue Fixes #5373 ## What does this PR do? - ensure v1.13 versions are known to the index scheduler upgrading code - Forbid opening a db of v1.13.x from v1.13.y - Keep old stat format to make sure the number of documents is available in stats during dumpless upgrade Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2025-02-27 11:02:40 +00:00
Louis Dureuil	d43a1644b1	Keep old stat format to make sure the number of documents is available during dumpless upgrade	2025-02-27 11:57:57 +01:00
Louis Dureuil	8cbcb1476e	Forbid opening a db of v1.13.x from v1.13.y	2025-02-27 11:43:58 +01:00
Louis Dureuil	79110bf7b1	Support dumpless upgrade for all v1.13 patches	2025-02-27 10:56:41 +01:00
Louis Dureuil	3d5575a3e9	Update snapshots following version bump	2025-02-27 10:56:25 +01:00
Kerollmops	8524b59e83	Update version for the next release (v1.13.2) in Cargo.toml	2025-02-27 09:40:33 +00:00
meili-bors[bot]	ceec68cf7a	Merge #5360 Some checks failed Test suite / Tests almost all features (push) Has been skipped Test suite / Test disabled tokenization (push) Has been skipped Test suite / Run tests in debug (push) Failing after 16s Test suite / Tests on ubuntu-20.04 (push) Failing after 15s Test suite / Run Rustfmt (push) Failing after 20s Test suite / Run Clippy (push) Failing after 1m23s Test suite / Tests on macos-13 (push) Has been cancelled Test suite / Tests on windows-2022 (push) Has been cancelled 5360: Fix the dumpless upgrade log r=Kerollmops a=Kerollmops This PR fixes a dump less upgrade log issue where the current and target version was the same value and therefore displayed invalid logs like: _upgrading from v1.12.8 to v1.12.8_. Co-authored-by: Kerollmops <clement@meilisearch.com>	2025-02-26 11:24:10 +00:00
meili-bors[bot]	f296c325ad	Merge #5325 5325: Documents database stats r=irevoire a=ManyTheFish # Pull Request ## Related issue Fixes #5319 ## List - Create a DatabaseStats struct - Compute and store the documents database stats in the IndexStats - Force dumpless upgrade to update the index stats - when a document addition/modification/deletion is made, we only recompute the database stats on the added/modified/deleted documents Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Many the fish <many@meilisearch.com>	2025-02-26 10:03:45 +00:00
ManyTheFish	df2fcac36c	Fix fmt	2025-02-26 10:35:03 +01:00
Many the fish	5035487208	Update crates/milli/src/index.rs Co-authored-by: Tamo <tamo@meilisearch.com>	2025-02-26 10:28:51 +01:00
Many the fish	8ccd090f40	Update crates/milli/src/index.rs Co-authored-by: Tamo <tamo@meilisearch.com>	2025-02-26 10:28:25 +01:00
Kerollmops	834995b293	Fix the dumpless upgrade log	2025-02-26 10:14:55 +01:00
meili-bors[bot]	52b8abadf4	Merge #5367 5367: Bump mini-dashboard to v0.2.17 r=curquiza a=Strift # Pull Request ## Related issue Fixes #5361 ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: Strift <lau.cazanove@gmail.com>	2025-02-25 13:28:49 +00:00
Strift	21f7c6f5af	Bump	2025-02-25 20:42:43 +08:00
meili-bors[bot]	e937ba90c2	Merge #5346 Some checks failed Test suite / Test disabled tokenization (push) Has been skipped Test suite / Tests almost all features (push) Has been skipped Test suite / Run tests in debug (push) Failing after 2s Test suite / Run Clippy (push) Failing after 4s Test suite / Run Rustfmt (push) Failing after 7s Test suite / Tests on ubuntu-20.04 (push) Failing after 17s Test suite / Tests on macos-13 (push) Has been cancelled Test suite / Tests on windows-2022 (push) Has been cancelled 5346: Hotfix typo tolerance bug r=Kerollmops a=ManyTheFish # Pull Request ## Related issue Fixes #5240 ## What does this PR do? - Add a test reproducing the bug - fix the bug by relying on the exact_word database ## Explanation The new indexer introduced in V1.12 does not put the exact attributes words in the word FST, but the old indexer was doing it. So 2 fixes were possible: 1) Add the word from the exact-words database in the FST knowing that they should never be retrieved with a typo 2) Make the search check in the exact-word database in addition to the word FST to know if the word exists This PR implements the second fix ## Impact of the bug A word can't be retrieved if it only appears in attributes listed in the `typoTolerance.disableOnAttributes` setting. Co-authored-by: ManyTheFish <many@meilisearch.com>	2025-02-20 08:42:46 +00:00
meili-bors[bot]	8607a166d0	Merge #5353 Some checks failed Test suite / Tests almost all features (push) Has been skipped Test suite / Test disabled tokenization (push) Has been skipped Test suite / Tests on ubuntu-20.04 (push) Failing after 13s Test suite / Run tests in debug (push) Failing after 15s Test suite / Run Rustfmt (push) Failing after 6s Test suite / Run Clippy (push) Successful in 10m6s Test suite / Tests on macos-13 (push) Has been cancelled Test suite / Tests on windows-2022 (push) Has been cancelled 5353: Update version for the next release (v1.13.1) in Cargo.toml r=Kerollmops a=meili-bot ⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com> Co-authored-by: Kerollmops <clement@meilisearch.com>	2025-02-18 10:57:55 +00:00
Kerollmops	abfce7d9b8	Update the snapshots	2025-02-18 11:31:20 +01:00
Kerollmops	9e94b101ea	Update version for the next release (v1.13.1) in Cargo.toml	2025-02-18 09:19:17 +00:00
ManyTheFish	57b26f8441	fix clippy	2025-02-17 16:41:34 +01:00
ManyTheFish	9505f15c85	Dumpless upgrade	2025-02-17 16:37:17 +01:00
ManyTheFish	285c72a960	Update Snapshots	2025-02-17 16:36:58 +01:00
ManyTheFish	9a33628331	Implement Incremental document database stats computing	2025-02-17 16:36:33 +01:00
ManyTheFish	1bd57a9a94	Use checked_div in average computation	2025-02-17 11:02:04 +01:00
ManyTheFish	be676f9977	Fix zero division	2025-02-17 11:01:06 +01:00
ManyTheFish	fa27327db5	fix clippy	2025-02-17 11:01:06 +01:00
ManyTheFish	cd4ba395e4	fix snapshots	2025-02-17 11:01:06 +01:00
ManyTheFish	22bdec7e74	Add document database stats	2025-02-17 11:01:06 +01:00
ManyTheFish	96ba62da36	Check the exact_word database when computing zero typo query	2025-02-13 14:02:53 +01:00
meili-bors[bot]	5e7803632d	Merge #5342 Some checks failed Test suite / Tests on ubuntu-20.04 (push) Failing after 1s Test suite / Tests almost all features (push) Has been skipped Test suite / Test disabled tokenization (push) Has been skipped Test suite / Run tests in debug (push) Failing after 14s Test suite / Run Rustfmt (push) Successful in 1m54s Test suite / Run Clippy (push) Failing after 6m49s Test suite / Tests on macos-13 (push) Has been cancelled Test suite / Tests on windows-2022 (push) Has been cancelled 5342: Fix workload sha r=dureuill a=ManyTheFish The dataset shasum was wrong for some workloads making the `/bench workloads/*.json` crash Co-authored-by: ManyTheFish <many@meilisearch.com>	2025-02-12 16:27:09 +00:00
meili-bors[bot]	885710a07b	Merge #5341 5341: Embeddings stats r=ManyTheFish a=ManyTheFish # Pull Request ## Related issue Fixes #5321 ## What does this PR do? - Add embedding stats - force dumpless upgrade to recompute stats - add tests Co-authored-by: ManyTheFish <many@meilisearch.com>	2025-02-12 15:46:37 +00:00
ManyTheFish	c55fdad2c3	Fix dumpless upgrade target version	2025-02-12 16:35:05 +01:00
ManyTheFish	1caad4c4b0	Add multiple embeddings for the same embedder in tests	2025-02-12 16:13:34 +01:00
ManyTheFish	8419ed52a1	fix clippy	2025-02-12 14:38:51 +01:00
ManyTheFish	a65c52cc97	Convert dump test into snapshots	2025-02-12 14:14:10 +01:00
ManyTheFish	49e9655c24	Update snapshots	2025-02-12 14:05:32 +01:00
meili-bors[bot]	fa763ca5dc	Merge #5339 5339: Add back timeout from v1.11.3 r=Kerollmops a=dureuill # Pull Request ## Related issue Fixes #5337 ## What does this PR do? - Fix regression compared with v1.11 by reintroducing the 30s timeout on all REST API calls. Thanks to `@migueltarga` for reporting the issue Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2025-02-12 12:50:27 +00:00
ManyTheFish	c7aeb554b2	Add tests	2025-02-12 13:37:41 +01:00
ManyTheFish	88d9d47928	Fix benchmark sha	2025-02-12 13:27:15 +01:00
Louis Dureuil	8e0d8d31f9	Add back timeout from v1.11.3	2025-02-12 11:53:00 +01:00
meili-bors[bot]	81a38099ec	Merge #5336 5336: Meilitool Hair Dryer r=dureuill a=Kerollmops This pull request introduces a new subcommand to hair dry a specific part of specific indexes. It is useful when [the memory-mapped pages are not hot in the cache](https://arc.net/l/quote/ixhcdwcq) and must be. Hair drying those interesting pages makes the search requests using the vector store much faster. The previous technique used the "cat method," which consists of reading the whole LMDB data file and pipping it into the null file descriptor. By doing that, the whole LMDB data file becomes hot in the cache. However, when the database is large, at least 30% of it is free, and unused pages and many other pages don't need to be hot, e.g., raw JSON documents or uninteresting parts of the inverted index. This new subcommand reads all the Arroy pages of a given index to make them hot, and only those. More coming... The current algorithm is single-threaded and takes a lot of time. I am in the process of multithreading it. This is the time it takes to hair dry a 305GiB database with a single thread. ``` real 21m51.054s user 0m3.155s sys 0m19.393s ``` ## To Do - [ ] (optional) Do the reads in parallel. Co-authored-by: Kerollmops <clement@meilisearch.com>	2025-02-12 10:45:16 +00:00
ManyTheFish	bd27fe7d02	force dumpless upgrade to recompute stats	2025-02-12 11:45:02 +01:00
ManyTheFish	41203f0931	Add embedders stats	2025-02-12 11:37:47 +01:00
meili-bors[bot]	70305b9f71	Merge #5332 5332: Fix geo update r=Kerollmops a=dureuill # Pull Request ## Related issue Fixes #5331 ## What does this PR do? - use the merged version that contains all fields instead of the updated version that contains only updated fields - add test that detects the problem - As it is the second time that `changes.updated` is causing a bug, I'm changing its name to `only_changed_fields`, hopefully better communicating that old fields are not there Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2025-02-11 18:51:33 +00:00
Louis Dureuil	b83275c9c5	Change the `updated*` functions to `only_new` functions, hopefully better communicating what they do	2025-02-11 15:27:10 +01:00
Louis Dureuil	d7f35ee3ba	Use merged document instead of updated	2025-02-11 15:27:10 +01:00
Louis Dureuil	1dce341bfb	Add test	2025-02-11 15:27:10 +01:00