Merge #5341

5341: Embeddings stats r=ManyTheFish a=ManyTheFish # Pull Request ## Related issue Fixes #5321 ## What does this PR do? - Add embedding stats - force dumpless upgrade to recompute stats - add tests Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-10-31 07:56:28 +00:00 · 2025-02-12 15:46:37 +00:00
parent fa763ca5dc c55fdad2c3
commit 885710a07b
12 changed files with 626 additions and 72 deletions
--- a/crates/index-scheduler/src/index_mapper/mod.rs
+++ b/crates/index-scheduler/src/index_mapper/mod.rs
@@ -106,6 +106,12 @@ pub struct IndexStats {
    /// are not returned to the disk after a deletion, this number is typically larger than
    /// `used_database_size` that only includes the size of the used pages.
    pub database_size: u64,
+    /// Number of embeddings in the index.
+    /// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch
+    pub number_of_embeddings: Option<u64>,
+    /// Number of embedded documents in the index.
+    /// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch
+    pub number_of_embedded_documents: Option<u64>,
    /// Size taken by the used pages of the index' DB, in bytes.
    ///
    /// As the DB backend does not return to the disk the pages that are not currently used by the DB,
@@ -130,8 +136,11 @@ impl IndexStats {
    ///
    /// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
    pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> {
+        let arroy_stats = index.arroy_stats(rtxn)?;
        Ok(IndexStats {
            number_of_documents: index.number_of_documents(rtxn)?,
+            number_of_embeddings: Some(arroy_stats.number_of_embeddings),
+            number_of_embedded_documents: Some(arroy_stats.documents.len()),
            database_size: index.on_disk_size()?,
            used_database_size: index.used_size()?,
            primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()),
--- a/crates/index-scheduler/src/scheduler/test.rs
+++ b/crates/index-scheduler/src/scheduler/test.rs
@@ -903,7 +903,7 @@ fn create_and_list_index() {

    index_scheduler.index("kefir").unwrap();
    let list = index_scheduler.get_paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap();
-    snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r#"
+    snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r###"
    [
      1,
      [
@@ -912,6 +912,8 @@ fn create_and_list_index() {
          {
            "number_of_documents": 0,
            "database_size": "[bytes]",
+            "number_of_embeddings": 0,
+            "number_of_embedded_documents": 0,
            "used_database_size": "[bytes]",
            "primary_key": null,
            "field_distribution": {},
@@ -921,5 +923,5 @@ fn create_and_list_index() {
        ]
      ]
    ]
-    "#);
+    "###);
 }
--- a/crates/meilisearch/src/routes/indexes/mod.rs
+++ b/crates/meilisearch/src/routes/indexes/mod.rs
@@ -496,6 +496,12 @@ pub struct IndexStats {
    pub number_of_documents: u64,
    /// Whether or not the index is currently ingesting document
    pub is_indexing: bool,
+    /// Number of embeddings in the index
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub number_of_embeddings: Option<u64>,
+    /// Number of embedded documents in the index
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub number_of_embedded_documents: Option<u64>,
    /// Association of every field name with the number of times it occurs in the documents.
    #[schema(value_type = HashMap<String, u64>)]
    pub field_distribution: FieldDistribution,
@@ -506,6 +512,8 @@ impl From<index_scheduler::IndexStats> for IndexStats {
        IndexStats {
            number_of_documents: stats.inner_stats.number_of_documents,
            is_indexing: stats.is_indexing,
+            number_of_embeddings: stats.inner_stats.number_of_embeddings,
+            number_of_embedded_documents: stats.inner_stats.number_of_embedded_documents,
            field_distribution: stats.inner_stats.field_distribution,
        }
    }
@@ -524,6 +532,8 @@ impl From<index_scheduler::IndexStats> for IndexStats {
        (status = OK, description = "The stats of the index", body = IndexStats, content_type = "application/json", example = json!(
            {
                "numberOfDocuments": 10,
+                "numberOfEmbeddings": 10,
+                "numberOfEmbeddedDocuments": 10,
                "isIndexing": true,
                "fieldDistribution": {
                    "genre": 10,
--- a/crates/meilisearch/tests/documents/delete_documents.rs
+++ b/crates/meilisearch/tests/documents/delete_documents.rs
@@ -161,6 +161,8 @@ async fn delete_document_by_filter() {
    {
      "numberOfDocuments": 4,
      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
      "fieldDistribution": {
        "color": 3,
        "id": 4
@@ -208,6 +210,8 @@ async fn delete_document_by_filter() {
    {
      "numberOfDocuments": 2,
      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
      "fieldDistribution": {
        "color": 1,
        "id": 2
@@ -274,6 +278,8 @@ async fn delete_document_by_filter() {
    {
      "numberOfDocuments": 1,
      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
      "fieldDistribution": {
        "color": 1,
        "id": 1
--- a/crates/meilisearch/tests/dumps/mod.rs
+++ b/crates/meilisearch/tests/dumps/mod.rs
@@ -27,9 +27,24 @@ async fn import_dump_v1_movie_raw() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "genres": 53,
+        "id": 53,
+        "overview": 53,
+        "poster": 53,
+        "release_date": 53,
+        "title": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -173,6 +188,8 @@ async fn import_dump_v1_movie_with_settings() {
    {
      "numberOfDocuments": 53,
      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
      "fieldDistribution": {
        "genres": 53,
        "id": 53,
@@ -333,9 +350,24 @@ async fn import_dump_v1_rubygems_with_settings() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "description": 53,
+        "id": 53,
+        "name": 53,
+        "summary": 53,
+        "total_downloads": 53,
+        "version": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -483,9 +515,24 @@ async fn import_dump_v2_movie_raw() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "genres": 53,
+        "id": 53,
+        "overview": 53,
+        "poster": 53,
+        "release_date": 53,
+        "title": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -623,9 +670,24 @@ async fn import_dump_v2_movie_with_settings() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "genres": 53,
+        "id": 53,
+        "overview": 53,
+        "poster": 53,
+        "release_date": 53,
+        "title": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -773,9 +835,24 @@ async fn import_dump_v2_rubygems_with_settings() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "description": 53,
+        "id": 53,
+        "name": 53,
+        "summary": 53,
+        "total_downloads": 53,
+        "version": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -920,9 +997,24 @@ async fn import_dump_v3_movie_raw() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "genres": 53,
+        "id": 53,
+        "overview": 53,
+        "poster": 53,
+        "release_date": 53,
+        "title": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -1060,9 +1152,24 @@ async fn import_dump_v3_movie_with_settings() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "genres": 53,
+        "id": 53,
+        "overview": 53,
+        "poster": 53,
+        "release_date": 53,
+        "title": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -1210,9 +1317,24 @@ async fn import_dump_v3_rubygems_with_settings() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "description": 53,
+        "id": 53,
+        "name": 53,
+        "summary": 53,
+        "total_downloads": 53,
+        "version": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -1357,9 +1479,24 @@ async fn import_dump_v4_movie_raw() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "genres": 53,
+        "id": 53,
+        "overview": 53,
+        "poster": 53,
+        "release_date": 53,
+        "title": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -1497,9 +1634,24 @@ async fn import_dump_v4_movie_with_settings() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "genres": 53,
+        "id": 53,
+        "overview": 53,
+        "poster": 53,
+        "release_date": 53,
+        "title": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -1647,9 +1799,24 @@ async fn import_dump_v4_rubygems_with_settings() {

    let (stats, code) = index.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(
-        stats,
-        json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 53,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "description": 53,
+        "id": 53,
+        "name": 53,
+        "summary": 53,
+        "total_downloads": 53,
+        "version": 53
+      }
+    }
+    "###
    );

    let (settings, code) = index.settings().await;
@@ -1798,33 +1965,35 @@ async fn import_dump_v5() {
        server.wait_task(task["uid"].as_u64().unwrap()).await;
    }

-    let expected_stats = json!({
-        "numberOfDocuments": 10,
-        "isIndexing": false,
-        "fieldDistribution": {
-            "cast": 10,
-            "director": 10,
-            "genres": 10,
-            "id": 10,
-            "overview": 10,
-            "popularity": 10,
-            "poster_path": 10,
-            "producer": 10,
-            "production_companies": 10,
-            "release_date": 10,
-            "tagline": 10,
-            "title": 10,
-            "vote_average": 10,
-            "vote_count": 10
-        }
-    });
-
    let index1 = server.index("test");
    let index2 = server.index("test2");

    let (stats, code) = index1.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(stats, expected_stats);
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 10,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "cast": 10,
+        "director": 10,
+        "genres": 10,
+        "id": 10,
+        "overview": 10,
+        "popularity": 10,
+        "poster_path": 10,
+        "producer": 10,
+        "production_companies": 10,
+        "release_date": 10,
+        "tagline": 10,
+        "title": 10,
+        "vote_average": 10,
+        "vote_count": 10
+      }
+    }
+    "###);

    let (docs, code) = index2.get_all_documents(GetAllDocumentsOptions::default()).await;
    snapshot!(code, @"200 OK");
@@ -1835,7 +2004,32 @@ async fn import_dump_v5() {

    let (stats, code) = index2.stats().await;
    snapshot!(code, @"200 OK");
-    assert_eq!(stats, expected_stats);
+    snapshot!(
+      json_string!(stats),
+      @r###"
+    {
+      "numberOfDocuments": 10,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "cast": 10,
+        "director": 10,
+        "genres": 10,
+        "id": 10,
+        "overview": 10,
+        "popularity": 10,
+        "poster_path": 10,
+        "producer": 10,
+        "production_companies": 10,
+        "release_date": 10,
+        "tagline": 10,
+        "title": 10,
+        "vote_average": 10,
+        "vote_count": 10
+      }
+    }
+    "###);

    let (keys, code) = server.list_api_keys("").await;
    snapshot!(code, @"200 OK");
--- a/crates/meilisearch/tests/stats/mod.rs
+++ b/crates/meilisearch/tests/stats/mod.rs
@@ -1,3 +1,4 @@
+use meili_snap::{json_string, snapshot};
 use time::format_description::well_known::Rfc3339;
 use time::OffsetDateTime;

@@ -74,3 +75,253 @@ async fn stats() {
    assert_eq!(response["indexes"]["test"]["fieldDistribution"]["name"], 1);
    assert_eq!(response["indexes"]["test"]["fieldDistribution"]["age"], 1);
 }
+
+#[actix_rt::test]
+async fn add_remove_embeddings() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": {
+            "manual": {
+                "source": "userProvided",
+                "dimensions": 3,
+            },
+            "handcrafted": {
+                "source": "userProvided",
+                "dimensions": 3,
+            },
+
+          },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await.succeeded();
+
+    // 2 embedded documents for 5 embeddings in total
+    let documents = json!([
+      {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
+      {"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": [[1, 1, 1], [2, 2, 2]] }},
+    ]);
+
+    let (response, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 2,
+      "isIndexing": false,
+      "numberOfEmbeddings": 5,
+      "numberOfEmbeddedDocuments": 2,
+      "fieldDistribution": {
+        "id": 2,
+        "name": 2
+      }
+    }
+    "###);
+
+    // 2 embedded documents for 3 embeddings in total
+    let documents = json!([
+      {"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": null }},
+    ]);
+
+    let (response, code) = index.update_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 2,
+      "isIndexing": false,
+      "numberOfEmbeddings": 3,
+      "numberOfEmbeddedDocuments": 2,
+      "fieldDistribution": {
+        "id": 2,
+        "name": 2
+      }
+    }
+    "###);
+
+    // 2 embedded documents for 2 embeddings in total
+    let documents = json!([
+        {"id": 0, "name": "kefir", "_vectors": { "manual": null, "handcrafted": [0, 0, 0] }},
+    ]);
+
+    let (response, code) = index.update_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 2,
+      "isIndexing": false,
+      "numberOfEmbeddings": 2,
+      "numberOfEmbeddedDocuments": 2,
+      "fieldDistribution": {
+        "id": 2,
+        "name": 2
+      }
+    }
+    "###);
+
+    // 1 embedded documents for 2 embeddings in total
+    let documents = json!([
+        {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
+        {"id": 1, "name": "echo", "_vectors": { "manual": null, "handcrafted": null }},
+    ]);
+
+    let (response, code) = index.update_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 2,
+      "isIndexing": false,
+      "numberOfEmbeddings": 2,
+      "numberOfEmbeddedDocuments": 1,
+      "fieldDistribution": {
+        "id": 2,
+        "name": 2
+      }
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn add_remove_embedded_documents() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": {
+            "manual": {
+                "source": "userProvided",
+                "dimensions": 3,
+            },
+            "handcrafted": {
+                "source": "userProvided",
+                "dimensions": 3,
+            },
+
+          },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await.succeeded();
+
+    // 2 embedded documents for 5 embeddings in total
+    let documents = json!([
+      {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
+      {"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": [[1, 1, 1], [2, 2, 2]] }},
+    ]);
+
+    let (response, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 2,
+      "isIndexing": false,
+      "numberOfEmbeddings": 5,
+      "numberOfEmbeddedDocuments": 2,
+      "fieldDistribution": {
+        "id": 2,
+        "name": 2
+      }
+    }
+    "###);
+
+    // delete one embedded document, remaining 1 embedded documents for 3 embeddings in total
+    let (response, code) = index.delete_document(0).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 1,
+      "isIndexing": false,
+      "numberOfEmbeddings": 3,
+      "numberOfEmbeddedDocuments": 1,
+      "fieldDistribution": {
+        "id": 1,
+        "name": 1
+      }
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn update_embedder_settings() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+
+    // 2 embedded documents for 3 embeddings in total
+    // but no embedders are added in the settings yet so we expect 0 embedded documents for 0 embeddings in total
+    let documents = json!([
+      {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
+      {"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": null }},
+    ]);
+
+    let (response, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 2,
+      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
+      "fieldDistribution": {
+        "id": 2,
+        "name": 2
+      }
+    }
+    "###);
+
+    // add embedders to the settings
+    // 2 embedded documents for 3 embeddings in total
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": {
+            "manual": {
+                "source": "userProvided",
+                "dimensions": 3,
+            },
+            "handcrafted": {
+                "source": "userProvided",
+                "dimensions": 3,
+            },
+
+          },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await.succeeded();
+
+    let (stats, _code) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 2,
+      "isIndexing": false,
+      "numberOfEmbeddings": 3,
+      "numberOfEmbeddedDocuments": 2,
+      "fieldDistribution": {
+        "id": 2,
+        "name": 2
+      }
+    }
+    "###);
+}
--- a/crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
+++ b/crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
@@ -135,6 +135,8 @@ async fn check_the_index_scheduler(server: &Server) {
        "kefir": {
          "numberOfDocuments": 1,
          "isIndexing": false,
+          "numberOfEmbeddings": 0,
+          "numberOfEmbeddedDocuments": 0,
          "fieldDistribution": {
            "age": 1,
            "description": 1,
@@ -215,6 +217,8 @@ async fn check_the_index_scheduler(server: &Server) {
        "kefir": {
          "numberOfDocuments": 1,
          "isIndexing": false,
+          "numberOfEmbeddings": 0,
+          "numberOfEmbeddedDocuments": 0,
          "fieldDistribution": {
            "age": 1,
            "description": 1,
@@ -228,10 +232,12 @@ async fn check_the_index_scheduler(server: &Server) {
    "###);
    let index = server.index("kefir");
    let (stats, _) = index.stats().await;
-    snapshot!(stats, @r#"
+    snapshot!(stats, @r###"
    {
      "numberOfDocuments": 1,
      "isIndexing": false,
+      "numberOfEmbeddings": 0,
+      "numberOfEmbeddedDocuments": 0,
      "fieldDistribution": {
        "age": 1,
        "description": 1,
@@ -240,7 +246,7 @@ async fn check_the_index_scheduler(server: &Server) {
        "surname": 1
      }
    }
-    "#);
+    "###);

    // Delete all the tasks of a specific batch
    let (task, _) = server.delete_tasks("batchUids=10").await;
--- a/crates/milli/src/index.rs
+++ b/crates/milli/src/index.rs
@@ -22,7 +22,7 @@ use crate::heed_codec::version::VersionCodec;
 use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
 use crate::order_by_map::OrderByMap;
 use crate::proximity::ProximityPrecision;
-use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig};
+use crate::vector::{ArroyStats, ArroyWrapper, Embedding, EmbeddingConfig};
 use crate::{
    default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
    FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
@@ -1731,6 +1731,18 @@ impl Index {
        let compute_prefixes = self.prefix_search(rtxn)?.unwrap_or_default();
        Ok(PrefixSettings { compute_prefixes, max_prefix_length: 4, prefix_count_threshold: 100 })
    }
+
+    pub fn arroy_stats(&self, rtxn: &RoTxn<'_>) -> Result<ArroyStats> {
+        let mut stats = ArroyStats::default();
+        let embedding_configs = self.embedding_configs(rtxn)?;
+        for config in embedding_configs {
+            let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap();
+            let reader =
+                ArroyWrapper::new(self.vector_arroy, embedder_id, config.config.quantized());
+            reader.aggregate_stats(rtxn, &mut stats)?;
+        }
+        Ok(stats)
+    }
 }

 #[derive(Debug, Deserialize, Serialize)]
--- a/crates/milli/src/update/upgrade/mod.rs
+++ b/crates/milli/src/update/upgrade/mod.rs
@@ -1,7 +1,9 @@
 mod v1_12;
+mod v1_13;

 use heed::RwTxn;
-use v1_12::{V1_12_3_To_Current, V1_12_To_V1_12_3};
+use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3};
+use v1_13::V1_13_0_To_Current;

 use crate::progress::{Progress, VariableNameStep};
 use crate::{Index, InternalError, Result};
@@ -26,11 +28,13 @@ pub fn upgrade(
    progress: Progress,
 ) -> Result<bool> {
    let from = index.get_version(wtxn)?.unwrap_or(db_version);
-    let upgrade_functions: &[&dyn UpgradeIndex] = &[&V1_12_To_V1_12_3 {}, &V1_12_3_To_Current()];
+    let upgrade_functions: &[&dyn UpgradeIndex] =
+        &[&V1_12_To_V1_12_3 {}, &V1_12_3_To_V1_13_0 {}, &V1_13_0_To_Current()];

    let start = match from {
        (1, 12, 0..=2) => 0,
        (1, 12, 3..) => 1,
+        (1, 13, 0) => 2,
        // We must handle the current version in the match because in case of a failure some index may have been upgraded but not other.
        (1, 13, _) => return Ok(false),
        (major, minor, patch) => {
--- a/crates/milli/src/update/upgrade/v1_12.rs
+++ b/crates/milli/src/update/upgrade/v1_12.rs
@@ -1,7 +1,6 @@
 use heed::RwTxn;

 use super::UpgradeIndex;
-use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
 use crate::progress::Progress;
 use crate::{make_enum_progress, Index, Result};

@@ -32,9 +31,9 @@ impl UpgradeIndex for V1_12_To_V1_12_3 {
 }

 #[allow(non_camel_case_types)]
-pub(super) struct V1_12_3_To_Current();
+pub(super) struct V1_12_3_To_V1_13_0 {}

-impl UpgradeIndex for V1_12_3_To_Current {
+impl UpgradeIndex for V1_12_3_To_V1_13_0 {
    fn upgrade(
        &self,
        _wtxn: &mut RwTxn,
@@ -42,14 +41,11 @@ impl UpgradeIndex for V1_12_3_To_Current {
        _original: (u32, u32, u32),
        _progress: Progress,
    ) -> Result<bool> {
-        Ok(false)
+        // recompute the indexes stats
+        Ok(true)
    }

    fn target_version(&self) -> (u32, u32, u32) {
-        (
-            VERSION_MAJOR.parse().unwrap(),
-            VERSION_MINOR.parse().unwrap(),
-            VERSION_PATCH.parse().unwrap(),
-        )
+        (1, 13, 0)
    }
 }
--- a/crates/milli/src/update/upgrade/v1_13.rs
+++ b/crates/milli/src/update/upgrade/v1_13.rs
@@ -0,0 +1,29 @@
+use heed::RwTxn;
+
+use super::UpgradeIndex;
+use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
+use crate::progress::Progress;
+use crate::{Index, Result};
+
+#[allow(non_camel_case_types)]
+pub(super) struct V1_13_0_To_Current();
+
+impl UpgradeIndex for V1_13_0_To_Current {
+    fn upgrade(
+        &self,
+        _wtxn: &mut RwTxn,
+        _index: &Index,
+        _original: (u32, u32, u32),
+        _progress: Progress,
+    ) -> Result<bool> {
+        Ok(false)
+    }
+
+    fn target_version(&self) -> (u32, u32, u32) {
+        (
+            VERSION_MAJOR.parse().unwrap(),
+            VERSION_MINOR.parse().unwrap(),
+            VERSION_PATCH.parse().unwrap(),
+        )
+    }
+}
--- a/crates/milli/src/vector/mod.rs
+++ b/crates/milli/src/vector/mod.rs
@@ -410,8 +410,43 @@ impl ArroyWrapper {
    fn quantized_db(&self) -> arroy::Database<BinaryQuantizedCosine> {
        self.database.remap_data_type()
    }
+
+    pub fn aggregate_stats(
+        &self,
+        rtxn: &RoTxn,
+        stats: &mut ArroyStats,
+    ) -> Result<(), arroy::Error> {
+        if self.quantized {
+            for reader in self.readers(rtxn, self.quantized_db()) {
+                let reader = reader?;
+                let documents = reader.item_ids();
+                if documents.is_empty() {
+                    break;
+                }
+                stats.documents |= documents;
+                stats.number_of_embeddings += documents.len();
+            }
+        } else {
+            for reader in self.readers(rtxn, self.angular_db()) {
+                let reader = reader?;
+                let documents = reader.item_ids();
+                if documents.is_empty() {
+                    break;
+                }
+                stats.documents |= documents;
+                stats.number_of_embeddings += documents.len();
+            }
+        }
+
+        Ok(())
+    }
 }

+#[derive(Debug, Default, Clone)]
+pub struct ArroyStats {
+    pub number_of_embeddings: u64,
+    pub documents: RoaringBitmap,
+}
 /// One or multiple embeddings stored consecutively in a flat vector.
 pub struct Embeddings<F> {
    data: Vec<F>,