Send directly each chunk to the main thread instead of merging them at the end of the extracting

Remove append function
Compute chunk size based on the input data size ant the number of indexing threads
2025-11-26 07:40:31 +00:00 · 2024-01-22 16:30:27 +01:00 · 2024-01-22 16:30:09 +01:00 · 2024-01-22 16:29:44 +01:00 · 2024-01-15 18:41:14 +00:00 · 2024-01-15 17:54:50 +00:00
46 changed files with 1299 additions and 1043 deletions
--- a/.github/workflows/sdks-tests.yml
+++ b/.github/workflows/sdks-tests.yml
@@ -22,7 +22,7 @@ jobs:
    outputs:
      docker-image: ${{ steps.define-image.outputs.docker-image }}
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
      - name: Define the Docker image we need to use
        id: define-image
        run: |
@@ -46,11 +46,11 @@ jobs:
      MEILISEARCH_VERSION: ${{ needs.define-docker-image.outputs.docker-image }}

    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          repository: meilisearch/meilisearch-dotnet
      - name: Setup .NET Core
-        uses: actions/setup-dotnet@v3
+        uses: actions/setup-dotnet@v4
        with:
          dotnet-version: "6.0.x"
      - name: Install dependencies
@@ -75,12 +75,12 @@ jobs:
        ports:
          - '7700:7700'
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          repository: meilisearch/meilisearch-dart
      - uses: dart-lang/setup-dart@v1
        with:
-          sdk: 3.1.1
+          sdk: 'latest'
      - name: Install dependencies
        run: dart pub get
      - name: Run integration tests
@@ -100,10 +100,10 @@ jobs:
          - '7700:7700'
    steps:
      - name: Set up Go
-        uses: actions/setup-go@v4
+        uses: actions/setup-go@v5
        with:
          go-version: stable
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          repository: meilisearch/meilisearch-go
      - name: Get dependencies
@@ -129,11 +129,11 @@ jobs:
        ports:
          - '7700:7700'
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          repository: meilisearch/meilisearch-java
      - name: Set up Java
-        uses: actions/setup-java@v3
+        uses: actions/setup-java@v4
        with:
          java-version: 8
          distribution: 'zulu'
@@ -156,7 +156,7 @@ jobs:
        ports:
          - '7700:7700'
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          repository: meilisearch/meilisearch-js
      - name: Setup node
@@ -191,7 +191,7 @@ jobs:
        ports:
          - '7700:7700'
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          repository: meilisearch/meilisearch-php
      - name: Install PHP
@@ -220,11 +220,11 @@ jobs:
        ports:
          - '7700:7700'
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          repository: meilisearch/meilisearch-python
      - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
      - name: Install pipenv
        uses: dschep/install-pipenv-action@v1
      - name: Install dependencies
@@ -245,7 +245,7 @@ jobs:
        ports:
          - '7700:7700'
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          repository: meilisearch/meilisearch-ruby
      - name: Set up Ruby 3
@@ -270,7 +270,7 @@ jobs:
        ports:
          - '7700:7700'
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          repository: meilisearch/meilisearch-rust
      - name: Build
@@ -291,7 +291,7 @@ jobs:
        ports:
          - '7700:7700'
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          repository: meilisearch/meilisearch-swift
      - name: Run tests
@@ -314,7 +314,7 @@ jobs:
        ports:
          - '7700:7700'
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          repository: meilisearch/meilisearch-js-plugins
      - name: Setup node
@@ -345,7 +345,7 @@ jobs:
        ports:
          - '7700:7700'
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          repository: meilisearch/meilisearch-rails
      - name: Set up Ruby 3
@@ -369,7 +369,7 @@ jobs:
        ports:
          - '7700:7700'
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
        with:
          repository: meilisearch/meilisearch-symfony
      - name: Install PHP
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -383,7 +383,7 @@ dependencies = [
 [[package]]
 name = "arroy"
 version = "0.1.0"
-source = "git+https://github.com/meilisearch/arroy.git#4f193fd534acd357b65bfe9eec4b3fed8ece2007"
+source = "git+https://github.com/meilisearch/arroy.git#d372648212e561a4845077cdb9239423d78655a2"
 dependencies = [
 "bytemuck",
 "byteorder",
@@ -1677,9 +1677,9 @@ dependencies = [

 [[package]]
 name = "flate2"
-version = "1.0.26"
+version = "1.0.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743"
+checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e"
 dependencies = [
 "crc32fast",
 "miniz_oxide",
@@ -1701,9 +1701,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"

 [[package]]
 name = "form_urlencoded"
-version = "1.2.0"
+version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652"
+checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456"
 dependencies = [
 "percent-encoding",
 ]
@@ -2753,9 +2753,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"

 [[package]]
 name = "idna"
-version = "0.4.0"
+version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c"
+checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6"
 dependencies = [
 "unicode-bidi",
 "unicode-normalization",
@@ -2774,6 +2774,7 @@ dependencies = [
 "dump",
 "enum-iterator",
 "file-store",
+ "flate2",
 "insta",
 "log",
 "meili-snap",
@@ -2789,6 +2790,7 @@ dependencies = [
 "tempfile",
 "thiserror",
 "time",
+ "ureq",
 "uuid 1.5.0",
 ]

@@ -3559,6 +3561,7 @@ dependencies = [
 "tokio",
 "tokio-stream",
 "toml",
+ "url",
 "urlencoding",
 "uuid 1.5.0",
 "vergen",
@@ -4067,9 +4070,9 @@ dependencies = [

 [[package]]
 name = "percent-encoding"
-version = "2.3.0"
+version = "2.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
+checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"

 [[package]]
 name = "permissive-json-pointer"
@@ -5597,13 +5600,14 @@ dependencies = [

 [[package]]
 name = "url"
-version = "2.4.0"
+version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb"
+checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633"
 dependencies = [
 "form_urlencoded",
 "idna",
 "percent-encoding",
+ "serde",
 ]

 [[package]]
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 MIT License

-Copyright (c) 2019-2022 Meili SAS
+Copyright (c) 2019-2024 Meili SAS

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/README.md
+++ b/README.md
@@ -42,7 +42,7 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f

 - **Search-as-you-type:** find search results in less than 50 milliseconds
 - **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
+- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
 - **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
 - **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#synonyms):** configure synonyms to include more relevant content in your search results
 - **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data
--- a/index-scheduler/Cargo.toml
+++ b/index-scheduler/Cargo.toml
@@ -18,6 +18,7 @@ derive_builder = "0.12.0"
 dump = { path = "../dump" }
 enum-iterator = "1.4.0"
 file-store = { path = "../file-store" }
+flate2 = "1.0.28"
 log = "0.4.17"
 meilisearch-auth = { path = "../meilisearch-auth" }
 meilisearch-types = { path = "../meilisearch-types" }
@@ -30,6 +31,7 @@ synchronoise = "1.0.1"
 tempfile = "3.5.0"
 thiserror = "1.0.40"
 time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
+ureq = "2.9.1"
 uuid = { version = "1.3.1", features = ["serde", "v4"] }

 [dev-dependencies]
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -60,7 +60,7 @@ pub(crate) enum Batch {
        /// The list of tasks that were processing when this task cancelation appeared.
        previous_processing_tasks: RoaringBitmap,
    },
-    TaskDeletion(Task),
+    TaskDeletions(Vec<Task>),
    SnapshotCreation(Vec<Task>),
    Dump(Task),
    IndexOperation {
@@ -146,13 +146,12 @@ impl Batch {
    pub fn ids(&self) -> Vec<TaskId> {
        match self {
            Batch::TaskCancelation { task, .. }
-            | Batch::TaskDeletion(task)
            | Batch::Dump(task)
            | Batch::IndexCreation { task, .. }
            | Batch::IndexUpdate { task, .. } => vec![task.uid],
-            Batch::SnapshotCreation(tasks) | Batch::IndexDeletion { tasks, .. } => {
-                tasks.iter().map(|task| task.uid).collect()
-            }
+            Batch::SnapshotCreation(tasks)
+            | Batch::TaskDeletions(tasks)
+            | Batch::IndexDeletion { tasks, .. } => tasks.iter().map(|task| task.uid).collect(),
            Batch::IndexOperation { op, .. } => match op {
                IndexOperation::DocumentOperation { tasks, .. }
                | IndexOperation::Settings { tasks, .. }
@@ -180,7 +179,7 @@ impl Batch {
        use Batch::*;
        match self {
            TaskCancelation { .. }
-            | TaskDeletion(_)
+            | TaskDeletions(_)
            | SnapshotCreation(_)
            | Dump(_)
            | IndexSwap { .. } => None,
@@ -199,7 +198,7 @@ impl fmt::Display for Batch {
        let tasks = self.ids();
        match self {
            Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?,
-            Batch::TaskDeletion(_) => f.write_str("TaskDeletion")?,
+            Batch::TaskDeletions(_) => f.write_str("TaskDeletion")?,
            Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?,
            Batch::Dump(_) => f.write_str("Dump")?,
            Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
@@ -539,9 +538,9 @@ impl IndexScheduler {

        // 2. we get the next task to delete
        let to_delete = self.get_kind(rtxn, Kind::TaskDeletion)? & enqueued;
-        if let Some(task_id) = to_delete.min() {
-            let task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
-            return Ok(Some(Batch::TaskDeletion(task)));
+        if !to_delete.is_empty() {
+            let tasks = self.get_existing_tasks(rtxn, to_delete)?;
+            return Ok(Some(Batch::TaskDeletions(tasks)));
        }

        // 3. we batch the snapshot.
@@ -681,31 +680,43 @@ impl IndexScheduler {

                Ok(vec![task])
            }
-            Batch::TaskDeletion(mut task) => {
+            Batch::TaskDeletions(mut tasks) => {
                // 1. Retrieve the tasks that matched the query at enqueue-time.
-                let matched_tasks =
+                let mut matched_tasks = RoaringBitmap::new();
+
+                for task in tasks.iter() {
                    if let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind {
-                        tasks
+                        matched_tasks |= tasks;
                    } else {
                        unreachable!()
+                    }
+                }
+
+                let mut wtxn = self.env.write_txn()?;
+                let mut deleted_tasks = self.delete_matched_tasks(&mut wtxn, &matched_tasks)?;
+                wtxn.commit()?;
+
+                for task in tasks.iter_mut() {
+                    task.status = Status::Succeeded;
+                    let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind else {
+                        unreachable!()
                    };

-                let mut wtxn = self.env.write_txn()?;
-                let deleted_tasks_count = self.delete_matched_tasks(&mut wtxn, matched_tasks)?;
+                    let deleted_tasks_count = deleted_tasks.intersection_len(tasks);
+                    deleted_tasks -= tasks;

-                task.status = Status::Succeeded;
-                match &mut task.details {
-                    Some(Details::TaskDeletion {
-                        matched_tasks: _,
-                        deleted_tasks,
-                        original_filter: _,
-                    }) => {
-                        *deleted_tasks = Some(deleted_tasks_count);
+                    match &mut task.details {
+                        Some(Details::TaskDeletion {
+                            matched_tasks: _,
+                            deleted_tasks,
+                            original_filter: _,
+                        }) => {
+                            *deleted_tasks = Some(deleted_tasks_count);
+                        }
+                        _ => unreachable!(),
                    }
-                    _ => unreachable!(),
                }
-                wtxn.commit()?;
-                Ok(vec![task])
+                Ok(tasks)
            }
            Batch::SnapshotCreation(mut tasks) => {
                fs::create_dir_all(&self.snapshots_path)?;
@@ -936,8 +947,8 @@ impl IndexScheduler {
                };

                // the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
-                *self.currently_updating_index.write().unwrap() =
-                    Some((index_uid.clone(), index.clone()));
+                self.index_mapper
+                    .set_currently_updating_index(Some((index_uid.clone(), index.clone())));

                let mut index_wtxn = index.write_txn()?;
                let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
@@ -1351,9 +1362,6 @@ impl IndexScheduler {

                for (task, (_, settings)) in tasks.iter_mut().zip(settings) {
                    let checked_settings = settings.clone().check();
-                    if matches!(checked_settings.embedders, milli::update::Setting::Set(_)) {
-                        self.features().check_vector("Passing `embedders` in settings")?
-                    }
                    task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) });
                    apply_settings_to_builder(&checked_settings, &mut builder);

@@ -1438,7 +1446,11 @@ impl IndexScheduler {
    /// Delete each given task from all the databases (if it is deleteable).
    ///
    /// Return the number of tasks that were actually deleted.
-    fn delete_matched_tasks(&self, wtxn: &mut RwTxn, matched_tasks: &RoaringBitmap) -> Result<u64> {
+    fn delete_matched_tasks(
+        &self,
+        wtxn: &mut RwTxn,
+        matched_tasks: &RoaringBitmap,
+    ) -> Result<RoaringBitmap> {
        // 1. Remove from this list the tasks that we are not allowed to delete
        let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?;
        let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
@@ -1503,7 +1515,7 @@ impl IndexScheduler {
            }
        }

-        Ok(to_delete_tasks.len())
+        Ok(to_delete_tasks)
    }

    /// Cancel each given task from all the databases (if it is cancelable).
--- a/index-scheduler/src/index_mapper/mod.rs
+++ b/index-scheduler/src/index_mapper/mod.rs
@@ -69,6 +69,10 @@ pub struct IndexMapper {
    /// Whether we open a meilisearch index with the MDB_WRITEMAP option or not.
    enable_mdb_writemap: bool,
    pub indexer_config: Arc<IndexerConfig>,
+
+    /// A few types of long running batches of tasks that act on a single index set this field
+    /// so that a handle to the index is available from other threads (search) in an optimized manner.
+    currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
 }

 /// Whether the index is available for use or is forbidden to be inserted back in the index map
@@ -151,6 +155,7 @@ impl IndexMapper {
            index_growth_amount,
            enable_mdb_writemap,
            indexer_config: Arc::new(indexer_config),
+            currently_updating_index: Default::default(),
        })
    }

@@ -303,6 +308,14 @@ impl IndexMapper {

    /// Return an index, may open it if it wasn't already opened.
    pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
+        if let Some((current_name, current_index)) =
+            self.currently_updating_index.read().unwrap().as_ref()
+        {
+            if current_name == name {
+                return Ok(current_index.clone());
+            }
+        }
+
        let uuid = self
            .index_mapping
            .get(rtxn, name)?
@@ -474,4 +487,8 @@ impl IndexMapper {
    pub fn indexer_config(&self) -> &IndexerConfig {
        &self.indexer_config
    }
+
+    pub fn set_currently_updating_index(&self, index: Option<(String, Index)>) {
+        *self.currently_updating_index.write().unwrap() = index;
+    }
 }
--- a/index-scheduler/src/insta_snapshot.rs
+++ b/index-scheduler/src/insta_snapshot.rs
@@ -37,10 +37,11 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
        snapshots_path: _,
        auth_path: _,
        version_file_path: _,
+        webhook_url: _,
+        webhook_authorization_header: _,
        test_breakpoint_sdr: _,
        planned_failures: _,
        run_loop_iteration: _,
-        currently_updating_index: _,
        embedders: _,
    } = scheduler;

--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -34,6 +34,7 @@ pub type TaskId = u32;

 use std::collections::{BTreeMap, HashMap};
 use std::fs::File;
+use std::io::{self, BufReader, Read};
 use std::ops::{Bound, RangeBounds};
 use std::path::{Path, PathBuf};
 use std::sync::atomic::AtomicBool;
@@ -45,6 +46,8 @@ use dump::{KindDump, TaskDump, UpdateFile};
 pub use error::Error;
 pub use features::RoFeatures;
 use file_store::FileStore;
+use flate2::bufread::GzEncoder;
+use flate2::Compression;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
 use meilisearch_types::heed::byteorder::BE;
@@ -54,6 +57,7 @@ use meilisearch_types::milli::documents::DocumentsBatchBuilder;
 use meilisearch_types::milli::update::IndexerConfig;
 use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
 use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
+use meilisearch_types::task_view::TaskView;
 use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
 use puffin::FrameView;
 use roaring::RoaringBitmap;
@@ -170,8 +174,8 @@ impl ProcessingTasks {
    }

    /// Set the processing tasks to an empty list
-    fn stop_processing(&mut self) {
-        self.processing = RoaringBitmap::new();
+    fn stop_processing(&mut self) -> RoaringBitmap {
+        std::mem::take(&mut self.processing)
    }

    /// Returns `true` if there, at least, is one task that is currently processing that we must stop.
@@ -241,6 +245,10 @@ pub struct IndexSchedulerOptions {
    pub snapshots_path: PathBuf,
    /// The path to the folder containing the dumps.
    pub dumps_path: PathBuf,
+    /// The URL on which we must send the tasks statuses
+    pub webhook_url: Option<String>,
+    /// The value we will send into the Authorization HTTP header on the webhook URL
+    pub webhook_authorization_header: Option<String>,
    /// The maximum size, in bytes, of the task index.
    pub task_db_size: usize,
    /// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
@@ -323,6 +331,11 @@ pub struct IndexScheduler {
    /// The maximum number of tasks that will be batched together.
    pub(crate) max_number_of_batched_tasks: usize,

+    /// The webhook url we should send tasks to after processing every batches.
+    pub(crate) webhook_url: Option<String>,
+    /// The Authorization header to send to the webhook URL.
+    pub(crate) webhook_authorization_header: Option<String>,
+
    /// A frame to output the indexation profiling files to disk.
    pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,

@@ -338,10 +351,6 @@ pub struct IndexScheduler {
    /// The path to the version file of Meilisearch.
    pub(crate) version_file_path: PathBuf,

-    /// A few types of long running batches of tasks that act on a single index set this field
-    /// so that a handle to the index is available from other threads (search) in an optimized manner.
-    currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
-
    embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>,

    // ================= test
@@ -388,7 +397,8 @@ impl IndexScheduler {
            dumps_path: self.dumps_path.clone(),
            auth_path: self.auth_path.clone(),
            version_file_path: self.version_file_path.clone(),
-            currently_updating_index: self.currently_updating_index.clone(),
+            webhook_url: self.webhook_url.clone(),
+            webhook_authorization_header: self.webhook_authorization_header.clone(),
            embedders: self.embedders.clone(),
            #[cfg(test)]
            test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
@@ -487,7 +497,8 @@ impl IndexScheduler {
            snapshots_path: options.snapshots_path,
            auth_path: options.auth_path,
            version_file_path: options.version_file_path,
-            currently_updating_index: Arc::new(RwLock::new(None)),
+            webhook_url: options.webhook_url,
+            webhook_authorization_header: options.webhook_authorization_header,
            embedders: Default::default(),

            #[cfg(test)]
@@ -671,13 +682,6 @@ impl IndexScheduler {
    /// If you need to fetch information from or perform an action on all indexes,
    /// see the `try_for_each_index` function.
    pub fn index(&self, name: &str) -> Result<Index> {
-        if let Some((current_name, current_index)) =
-            self.currently_updating_index.read().unwrap().as_ref()
-        {
-            if current_name == name {
-                return Ok(current_index.clone());
-            }
-        }
        let rtxn = self.env.read_txn()?;
        self.index_mapper.index(&rtxn, name)
    }
@@ -1158,7 +1162,7 @@ impl IndexScheduler {
        };

        // Reset the currently updating index to relinquish the index handle
-        *self.currently_updating_index.write().unwrap() = None;
+        self.index_mapper.set_currently_updating_index(None);

        #[cfg(test)]
        self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
@@ -1251,19 +1255,99 @@ impl IndexScheduler {
            }
        }

-        self.processing_tasks.write().unwrap().stop_processing();
+        let processed = self.processing_tasks.write().unwrap().stop_processing();

        #[cfg(test)]
        self.maybe_fail(tests::FailureLocation::CommittingWtxn)?;

        wtxn.commit().map_err(Error::HeedTransaction)?;

+        // We shouldn't crash the tick function if we can't send data to the webhook.
+        let _ = self.notify_webhook(&processed);
+
        #[cfg(test)]
        self.breakpoint(Breakpoint::AfterProcessing);

        Ok(TickOutcome::TickAgain(processed_tasks))
    }

+    /// Once the tasks changes have been commited we must send all the tasks that were updated to our webhook if there is one.
+    fn notify_webhook(&self, updated: &RoaringBitmap) -> Result<()> {
+        if let Some(ref url) = self.webhook_url {
+            struct TaskReader<'a, 'b> {
+                rtxn: &'a RoTxn<'a>,
+                index_scheduler: &'a IndexScheduler,
+                tasks: &'b mut roaring::bitmap::Iter<'b>,
+                buffer: Vec<u8>,
+                written: usize,
+            }
+
+            impl<'a, 'b> Read for TaskReader<'a, 'b> {
+                fn read(&mut self, mut buf: &mut [u8]) -> std::io::Result<usize> {
+                    if self.buffer.is_empty() {
+                        match self.tasks.next() {
+                            None => return Ok(0),
+                            Some(task_id) => {
+                                let task = self
+                                    .index_scheduler
+                                    .get_task(self.rtxn, task_id)
+                                    .map_err(|err| io::Error::new(io::ErrorKind::Other, err))?
+                                    .ok_or_else(|| {
+                                        io::Error::new(
+                                            io::ErrorKind::Other,
+                                            Error::CorruptedTaskQueue,
+                                        )
+                                    })?;
+
+                                serde_json::to_writer(
+                                    &mut self.buffer,
+                                    &TaskView::from_task(&task),
+                                )?;
+                                self.buffer.push(b'\n');
+                            }
+                        }
+                    }
+
+                    let mut to_write = &self.buffer[self.written..];
+                    let wrote = io::copy(&mut to_write, &mut buf)?;
+                    self.written += wrote as usize;
+
+                    // we wrote everything and must refresh our buffer on the next call
+                    if self.written == self.buffer.len() {
+                        self.written = 0;
+                        self.buffer.clear();
+                    }
+
+                    Ok(wrote as usize)
+                }
+            }
+
+            let rtxn = self.env.read_txn()?;
+
+            let task_reader = TaskReader {
+                rtxn: &rtxn,
+                index_scheduler: self,
+                tasks: &mut updated.into_iter(),
+                buffer: Vec::with_capacity(50), // on average a task is around ~100 bytes
+                written: 0,
+            };
+
+            // let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
+            let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
+            let request = ureq::post(url).set("Content-Encoding", "gzip");
+            let request = match &self.webhook_authorization_header {
+                Some(header) => request.set("Authorization", header),
+                None => request,
+            };
+
+            if let Err(e) = request.send(reader) {
+                log::error!("While sending data to the webhook: {e}");
+            }
+        }
+
+        Ok(())
+    }
+
    /// Register a task to cleanup the task queue if needed
    fn cleanup_task_queue(&self) -> Result<()> {
        let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
@@ -1677,6 +1761,8 @@ mod tests {
                indexes_path: tempdir.path().join("indexes"),
                snapshots_path: tempdir.path().join("snapshots"),
                dumps_path: tempdir.path().join("dumps"),
+                webhook_url: None,
+                webhook_authorization_header: None,
                task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
                index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
                enable_mdb_writemap: false,
@@ -2158,10 +2244,7 @@ mod tests {
                .unwrap();
            index_scheduler.assert_internally_consistent();
        }
-        for _ in 0..2 {
-            handle.advance_one_successful_batch();
-            index_scheduler.assert_internally_consistent();
-        }
+        handle.advance_one_successful_batch();

        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed");
    }
--- a/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap
@@ -34,12 +34,10 @@ catto: { number_of_documents: 1, field_distribution: {"id": 1} }
 [timestamp] [3,]
 ----------------------------------------------------------------------
 ### Started At:
-[timestamp] [2,]
-[timestamp] [3,]
+[timestamp] [2,3,]
 ----------------------------------------------------------------------
 ### Finished At:
-[timestamp] [2,]
-[timestamp] [3,]
+[timestamp] [2,3,]
 ----------------------------------------------------------------------
 ### File Store:
 00000000-0000-0000-0000-000000000001
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@@ -344,7 +344,10 @@ impl ErrorCode for milli::Error {
                        Code::InvalidDocumentId
                    }
                    UserError::MissingDocumentField(_) => Code::InvalidDocumentFields,
-                    UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
+                    UserError::InvalidFieldForSource { .. }
+                    | UserError::MissingFieldForSource { .. }
+                    | UserError::InvalidOpenAiModel { .. }
+                    | UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
                    UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
                    UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
                    UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
--- a/meilisearch-types/src/lib.rs
+++ b/meilisearch-types/src/lib.rs
@@ -9,6 +9,7 @@ pub mod index_uid_pattern;
 pub mod keys;
 pub mod settings;
 pub mod star_or;
+pub mod task_view;
 pub mod tasks;
 pub mod versioning;
 pub use milli::{heed, Index};
--- a/meilisearch-types/src/settings.rs
+++ b/meilisearch-types/src/settings.rs
@@ -318,6 +318,21 @@ impl Settings<Unchecked> {
            _kind: PhantomData,
        }
    }
+
+    pub fn validate(self) -> Result<Self, milli::Error> {
+        self.validate_embedding_settings()
+    }
+
+    fn validate_embedding_settings(mut self) -> Result<Self, milli::Error> {
+        let Setting::Set(mut configs) = self.embedders else { return Ok(self) };
+        for (name, config) in configs.iter_mut() {
+            let config_to_check = std::mem::take(config);
+            let checked_config = milli::update::validate_embedding_settings(config_to_check, name)?;
+            *config = checked_config
+        }
+        self.embedders = Setting::Set(configs);
+        Ok(self)
+    }
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -585,11 +600,12 @@ pub fn settings(
        ),
    };

-    let embedders = index
+    let embedders: BTreeMap<_, _> = index
        .embedding_configs(rtxn)?
        .into_iter()
        .map(|(name, config)| (name, Setting::Set(config.into())))
        .collect();
+    let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };

    Ok(Settings {
        displayed_attributes: match displayed_attributes {
@@ -611,15 +627,12 @@ pub fn settings(
            Some(field) => Setting::Set(field),
            None => Setting::Reset,
        },
-        proximity_precision: match proximity_precision {
-            Some(precision) => Setting::Set(precision),
-            None => Setting::Reset,
-        },
+        proximity_precision: Setting::Set(proximity_precision.unwrap_or_default()),
        synonyms: Setting::Set(synonyms),
        typo_tolerance: Setting::Set(typo_tolerance),
        faceting: Setting::Set(faceting),
        pagination: Setting::Set(pagination),
-        embedders: Setting::Set(embedders),
+        embedders,
        _kind: PhantomData,
    })
 }
@@ -720,10 +733,11 @@ impl From<RankingRuleView> for Criterion {
    }
 }

-#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
+#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
 #[serde(deny_unknown_fields, rename_all = "camelCase")]
 #[deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields)]
 pub enum ProximityPrecisionView {
+    #[default]
    ByWord,
    ByAttribute,
 }
--- a/meilisearch-types/src/task_view.rs
+++ b/meilisearch-types/src/task_view.rs
@@ -0,0 +1,139 @@
+use serde::Serialize;
+use time::{Duration, OffsetDateTime};
+
+use crate::error::ResponseError;
+use crate::settings::{Settings, Unchecked};
+use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId};
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct TaskView {
+    pub uid: TaskId,
+    #[serde(default)]
+    pub index_uid: Option<String>,
+    pub status: Status,
+    #[serde(rename = "type")]
+    pub kind: Kind,
+    pub canceled_by: Option<TaskId>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub details: Option<DetailsView>,
+    pub error: Option<ResponseError>,
+    #[serde(serialize_with = "serialize_duration", default)]
+    pub duration: Option<Duration>,
+    #[serde(with = "time::serde::rfc3339")]
+    pub enqueued_at: OffsetDateTime,
+    #[serde(with = "time::serde::rfc3339::option", default)]
+    pub started_at: Option<OffsetDateTime>,
+    #[serde(with = "time::serde::rfc3339::option", default)]
+    pub finished_at: Option<OffsetDateTime>,
+}
+
+impl TaskView {
+    pub fn from_task(task: &Task) -> TaskView {
+        TaskView {
+            uid: task.uid,
+            index_uid: task.index_uid().map(ToOwned::to_owned),
+            status: task.status,
+            kind: task.kind.as_kind(),
+            canceled_by: task.canceled_by,
+            details: task.details.clone().map(DetailsView::from),
+            error: task.error.clone(),
+            duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
+            enqueued_at: task.enqueued_at,
+            started_at: task.started_at,
+            finished_at: task.finished_at,
+        }
+    }
+}
+
+#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct DetailsView {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub received_documents: Option<u64>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub indexed_documents: Option<Option<u64>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub primary_key: Option<Option<String>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub provided_ids: Option<usize>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub deleted_documents: Option<Option<u64>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub matched_tasks: Option<u64>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub canceled_tasks: Option<Option<u64>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub deleted_tasks: Option<Option<u64>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub original_filter: Option<Option<String>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub dump_uid: Option<Option<String>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[serde(flatten)]
+    pub settings: Option<Box<Settings<Unchecked>>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub swaps: Option<Vec<IndexSwap>>,
+}
+
+impl From<Details> for DetailsView {
+    fn from(details: Details) -> Self {
+        match details {
+            Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
+                DetailsView {
+                    received_documents: Some(received_documents),
+                    indexed_documents: Some(indexed_documents),
+                    ..DetailsView::default()
+                }
+            }
+            Details::SettingsUpdate { settings } => {
+                DetailsView { settings: Some(settings), ..DetailsView::default() }
+            }
+            Details::IndexInfo { primary_key } => {
+                DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
+            }
+            Details::DocumentDeletion {
+                provided_ids: received_document_ids,
+                deleted_documents,
+            } => DetailsView {
+                provided_ids: Some(received_document_ids),
+                deleted_documents: Some(deleted_documents),
+                original_filter: Some(None),
+                ..DetailsView::default()
+            },
+            Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
+                DetailsView {
+                    provided_ids: Some(0),
+                    original_filter: Some(Some(original_filter)),
+                    deleted_documents: Some(deleted_documents),
+                    ..DetailsView::default()
+                }
+            }
+            Details::ClearAll { deleted_documents } => {
+                DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
+            }
+            Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
+                DetailsView {
+                    matched_tasks: Some(matched_tasks),
+                    canceled_tasks: Some(canceled_tasks),
+                    original_filter: Some(Some(original_filter)),
+                    ..DetailsView::default()
+                }
+            }
+            Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
+                DetailsView {
+                    matched_tasks: Some(matched_tasks),
+                    deleted_tasks: Some(deleted_tasks),
+                    original_filter: Some(Some(original_filter)),
+                    ..DetailsView::default()
+                }
+            }
+            Details::Dump { dump_uid } => {
+                DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
+            }
+            Details::IndexSwap { swaps } => {
+                DetailsView { swaps: Some(swaps), ..Default::default() }
+            }
+        }
+    }
+}
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -104,6 +104,7 @@ walkdir = "2.3.3"
 yaup = "0.2.1"
 serde_urlencoded = "0.7.1"
 termcolor = "1.2.0"
+url = { version = "2.5.0", features = ["serde"] }

 [dev-dependencies]
 actix-rt = "2.8.0"
@@ -153,5 +154,5 @@ greek = ["meilisearch-types/greek"]
 khmer = ["meilisearch-types/khmer"]

 [package.metadata.mini-dashboard]
-assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
-sha1 = "83cd44ed1e5f97ecb581dc9f958a63f4ccc982d9"
+assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.12/build.zip"
+sha1 = "acfe9a018c93eb0604ea87ee87bff7df5474e18e"
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -264,6 +264,8 @@ struct Infos {
    ignore_snapshot_if_db_exists: bool,
    http_addr: bool,
    http_payload_size_limit: Byte,
+    task_queue_webhook: bool,
+    task_webhook_authorization_header: bool,
    log_level: String,
    max_indexing_memory: MaxMemory,
    max_indexing_threads: MaxThreads,
@@ -290,6 +292,8 @@ impl From<Opt> for Infos {
            http_addr,
            master_key: _,
            env,
+            task_webhook_url,
+            task_webhook_authorization_header,
            max_index_size: _,
            max_task_db_size: _,
            http_payload_size_limit,
@@ -343,6 +347,8 @@ impl From<Opt> for Infos {
            http_addr: http_addr != default_http_addr(),
            http_payload_size_limit,
            experimental_max_number_of_batched_tasks,
+            task_queue_webhook: task_webhook_url.is_some(),
+            task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
            log_level: log_level.to_string(),
            max_indexing_memory,
            max_indexing_threads,
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@@ -228,6 +228,8 @@ fn open_or_create_database_unchecked(
            indexes_path: opt.db_path.join("indexes"),
            snapshots_path: opt.snapshot_dir.clone(),
            dumps_path: opt.dump_dir.clone(),
+            webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
+            webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
            task_db_size: opt.max_task_db_size.get_bytes() as usize,
            index_base_map_size: opt.max_index_size.get_bytes() as usize,
            enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@@ -21,6 +21,7 @@ use rustls::RootCertStore;
 use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
 use serde::{Deserialize, Serialize};
 use sysinfo::{RefreshKind, System, SystemExt};
+use url::Url;

 const POSSIBLE_ENV: [&str; 2] = ["development", "production"];

@@ -28,6 +29,8 @@ const MEILI_DB_PATH: &str = "MEILI_DB_PATH";
 const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
 const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
 const MEILI_ENV: &str = "MEILI_ENV";
+const MEILI_TASK_WEBHOOK_URL: &str = "MEILI_TASK_WEBHOOK_URL";
+const MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER: &str = "MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER";
 #[cfg(feature = "analytics")]
 const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
 const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
@@ -156,6 +159,14 @@ pub struct Opt {
    #[serde(default = "default_env")]
    pub env: String,

+    /// Called whenever a task finishes so a third party can be notified.
+    #[clap(long, env = MEILI_TASK_WEBHOOK_URL)]
+    pub task_webhook_url: Option<Url>,
+
+    /// The Authorization header to send on the webhook URL whenever a task finishes so a third party can be notified.
+    #[clap(long, env = MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER)]
+    pub task_webhook_authorization_header: Option<String>,
+
    /// Deactivates Meilisearch's built-in telemetry when provided.
    ///
    /// Meilisearch automatically collects data from all instances that do not opt out using this flag.
@@ -375,6 +386,8 @@ impl Opt {
            http_addr,
            master_key,
            env,
+            task_webhook_url,
+            task_webhook_authorization_header,
            max_index_size: _,
            max_task_db_size: _,
            http_payload_size_limit,
@@ -409,6 +422,16 @@ impl Opt {
            export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
        }
        export_to_env_if_not_present(MEILI_ENV, env);
+        if let Some(task_webhook_url) = task_webhook_url {
+            export_to_env_if_not_present(MEILI_TASK_WEBHOOK_URL, task_webhook_url.to_string());
+        }
+        if let Some(task_webhook_authorization_header) = task_webhook_authorization_header {
+            export_to_env_if_not_present(
+                MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER,
+                task_webhook_authorization_header,
+            );
+        }
+
        #[cfg(feature = "analytics")]
        {
            export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@@ -90,6 +90,11 @@ macro_rules! make_setting_route {
                    ..Default::default()
                };

+                let new_settings = $crate::routes::indexes::settings::validate_settings(
+                    new_settings,
+                    &index_scheduler,
+                )?;
+
                let allow_index_creation =
                    index_scheduler.filters().allow_index_creation(&index_uid);

@@ -453,7 +458,7 @@ make_setting_route!(
            json!({
                "proximity_precision": {
                    "set": precision.is_some(),
-                    "value": precision,
+                    "value": precision.unwrap_or_default(),
                }
            }),
            Some(req),
@@ -582,13 +587,13 @@ fn embedder_analytics(
        for source in s
            .values()
            .filter_map(|config| config.clone().set())
-            .filter_map(|config| config.embedder_options.set())
+            .filter_map(|config| config.source.set())
        {
-            use meilisearch_types::milli::vector::settings::EmbedderSettings;
+            use meilisearch_types::milli::vector::settings::EmbedderSource;
            match source {
-                EmbedderSettings::OpenAi(_) => sources.insert("openAi"),
-                EmbedderSettings::HuggingFace(_) => sources.insert("huggingFace"),
-                EmbedderSettings::UserProvided(_) => sources.insert("userProvided"),
+                EmbedderSource::OpenAi => sources.insert("openAi"),
+                EmbedderSource::HuggingFace => sources.insert("huggingFace"),
+                EmbedderSource::UserProvided => sources.insert("userProvided"),
            };
        }
    };
@@ -651,6 +656,7 @@ pub async fn update_all(
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;

    let new_settings = body.into_inner();
+    let new_settings = validate_settings(new_settings, &index_scheduler)?;

    analytics.publish(
        "Settings Updated".to_string(),
@@ -684,7 +690,8 @@ pub async fn update_all(
                "set": new_settings.distinct_attribute.as_ref().set().is_some()
            },
            "proximity_precision": {
-                "set": new_settings.proximity_precision.as_ref().set().is_some()
+                "set": new_settings.proximity_precision.as_ref().set().is_some(),
+                "value": new_settings.proximity_precision.as_ref().set().copied().unwrap_or_default()
            },
            "typo_tolerance": {
                "enabled": new_settings.typo_tolerance
@@ -800,3 +807,13 @@ pub async fn delete_all(
    debug!("returns: {:?}", task);
    Ok(HttpResponse::Accepted().json(task))
 }
+
+fn validate_settings(
+    settings: Settings<Unchecked>,
+    index_scheduler: &IndexScheduler,
+) -> Result<Settings<Unchecked>, ResponseError> {
+    if matches!(settings.embedders, Setting::Set(_)) {
+        index_scheduler.features().check_vector("Passing `embedders` in settings")?
+    }
+    Ok(settings.validate()?)
+}
--- a/meilisearch/src/routes/tasks.rs
+++ b/meilisearch/src/routes/tasks.rs
@@ -8,11 +8,9 @@ use meilisearch_types::deserr::DeserrQueryParamError;
 use meilisearch_types::error::deserr_codes::*;
 use meilisearch_types::error::{InvalidTaskDateError, ResponseError};
 use meilisearch_types::index_uid::IndexUid;
-use meilisearch_types::settings::{Settings, Unchecked};
 use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList};
-use meilisearch_types::tasks::{
-    serialize_duration, Details, IndexSwap, Kind, KindWithContent, Status, Task,
-};
+use meilisearch_types::task_view::TaskView;
+use meilisearch_types::tasks::{Kind, KindWithContent, Status};
 use serde::Serialize;
 use serde_json::json;
 use time::format_description::well_known::Rfc3339;
@@ -37,140 +35,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
    .service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks))))
    .service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))));
 }
-
-#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
-#[serde(rename_all = "camelCase")]
-pub struct TaskView {
-    pub uid: TaskId,
-    #[serde(default)]
-    pub index_uid: Option<String>,
-    pub status: Status,
-    #[serde(rename = "type")]
-    pub kind: Kind,
-    pub canceled_by: Option<TaskId>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub details: Option<DetailsView>,
-    pub error: Option<ResponseError>,
-    #[serde(serialize_with = "serialize_duration", default)]
-    pub duration: Option<Duration>,
-    #[serde(with = "time::serde::rfc3339")]
-    pub enqueued_at: OffsetDateTime,
-    #[serde(with = "time::serde::rfc3339::option", default)]
-    pub started_at: Option<OffsetDateTime>,
-    #[serde(with = "time::serde::rfc3339::option", default)]
-    pub finished_at: Option<OffsetDateTime>,
-}
-
-impl TaskView {
-    pub fn from_task(task: &Task) -> TaskView {
-        TaskView {
-            uid: task.uid,
-            index_uid: task.index_uid().map(ToOwned::to_owned),
-            status: task.status,
-            kind: task.kind.as_kind(),
-            canceled_by: task.canceled_by,
-            details: task.details.clone().map(DetailsView::from),
-            error: task.error.clone(),
-            duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
-            enqueued_at: task.enqueued_at,
-            started_at: task.started_at,
-            finished_at: task.finished_at,
-        }
-    }
-}
-
-#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
-#[serde(rename_all = "camelCase")]
-pub struct DetailsView {
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub received_documents: Option<u64>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub indexed_documents: Option<Option<u64>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub primary_key: Option<Option<String>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub provided_ids: Option<usize>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub deleted_documents: Option<Option<u64>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub matched_tasks: Option<u64>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub canceled_tasks: Option<Option<u64>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub deleted_tasks: Option<Option<u64>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub original_filter: Option<Option<String>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub dump_uid: Option<Option<String>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    #[serde(flatten)]
-    pub settings: Option<Box<Settings<Unchecked>>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub swaps: Option<Vec<IndexSwap>>,
-}
-
-impl From<Details> for DetailsView {
-    fn from(details: Details) -> Self {
-        match details {
-            Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
-                DetailsView {
-                    received_documents: Some(received_documents),
-                    indexed_documents: Some(indexed_documents),
-                    ..DetailsView::default()
-                }
-            }
-            Details::SettingsUpdate { settings } => {
-                DetailsView { settings: Some(settings), ..DetailsView::default() }
-            }
-            Details::IndexInfo { primary_key } => {
-                DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
-            }
-            Details::DocumentDeletion {
-                provided_ids: received_document_ids,
-                deleted_documents,
-            } => DetailsView {
-                provided_ids: Some(received_document_ids),
-                deleted_documents: Some(deleted_documents),
-                original_filter: Some(None),
-                ..DetailsView::default()
-            },
-            Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
-                DetailsView {
-                    provided_ids: Some(0),
-                    original_filter: Some(Some(original_filter)),
-                    deleted_documents: Some(deleted_documents),
-                    ..DetailsView::default()
-                }
-            }
-            Details::ClearAll { deleted_documents } => {
-                DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
-            }
-            Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
-                DetailsView {
-                    matched_tasks: Some(matched_tasks),
-                    canceled_tasks: Some(canceled_tasks),
-                    original_filter: Some(Some(original_filter)),
-                    ..DetailsView::default()
-                }
-            }
-            Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
-                DetailsView {
-                    matched_tasks: Some(matched_tasks),
-                    deleted_tasks: Some(deleted_tasks),
-                    original_filter: Some(Some(original_filter)),
-                    ..DetailsView::default()
-                }
-            }
-            Details::Dump { dump_uid } => {
-                DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
-            }
-            Details::IndexSwap { swaps } => {
-                DetailsView { swaps: Some(swaps), ..Default::default() }
-            }
-        }
-    }
-}
-
 #[derive(Debug, Deserr)]
 #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
 pub struct TasksFilterQuery {
--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@@ -735,6 +735,9 @@ pub fn perform_facet_search(
    if let Some(facet_query) = &facet_query {
        facet_search.query(facet_query);
    }
+    if let Some(max_facets) = index.max_values_per_facet(&rtxn)? {
+        facet_search.max_values(max_facets as usize);
+    }

    Ok(FacetSearchResult {
        facet_hits: facet_search.execute()?,
@@ -897,6 +900,14 @@ fn format_fields<'a>(
    let mut matches_position = compute_matches.then(BTreeMap::new);
    let mut document = document.clone();

+    // reduce the formatted option list to the attributes that should be formatted,
+    // instead of all the attributes to display.
+    let formatting_fields_options: Vec<_> = formatted_options
+        .iter()
+        .filter(|(_, option)| option.should_format())
+        .map(|(fid, option)| (field_ids_map.name(*fid).unwrap(), option))
+        .collect();
+
    // select the attributes to retrieve
    let displayable_names =
        displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
@@ -905,13 +916,15 @@ fn format_fields<'a>(
        // to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
        // and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
        // highlighted.
-        let format = formatted_options
+        // Warn: The time to compute the format list scales with the number of fields to format;
+        // cumulated with map_leaf_values that iterates over all the nested fields, it gives a quadratic complexity:
+        // d*f where d is the total number of fields to display and f is the total number of fields to format.
+        let format = formatting_fields_options
            .iter()
-            .filter(|(field, _option)| {
-                let name = field_ids_map.name(**field).unwrap();
+            .filter(|(name, _option)| {
                milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)
            })
-            .map(|(_, option)| *option)
+            .map(|(_, option)| **option)
            .reduce(|acc, option| acc.merge(option));
        let mut infos = Vec::new();

@@ -1008,7 +1021,7 @@ fn format_value<'a>(
                    let value = matcher.format(format_options);
                    Value::String(value.into_owned())
                }
-                None => Value::Number(number),
+                None => Value::String(s),
            }
        }
        value => value,
--- a/meilisearch/tests/dumps/mod.rs
+++ b/meilisearch/tests/dumps/mod.rs
@@ -59,7 +59,7 @@ async fn import_dump_v1_movie_raw() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -77,8 +77,7 @@ async fn import_dump_v1_movie_raw() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -221,7 +220,7 @@ async fn import_dump_v1_movie_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -239,8 +238,7 @@ async fn import_dump_v1_movie_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -369,7 +367,7 @@ async fn import_dump_v1_rubygems_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -387,8 +385,7 @@ async fn import_dump_v1_rubygems_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -503,7 +500,7 @@ async fn import_dump_v2_movie_raw() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -521,8 +518,7 @@ async fn import_dump_v2_movie_raw() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -649,7 +645,7 @@ async fn import_dump_v2_movie_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -667,8 +663,7 @@ async fn import_dump_v2_movie_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -794,7 +789,7 @@ async fn import_dump_v2_rubygems_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -812,8 +807,7 @@ async fn import_dump_v2_rubygems_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -928,7 +922,7 @@ async fn import_dump_v3_movie_raw() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -946,8 +940,7 @@ async fn import_dump_v3_movie_raw() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -1074,7 +1067,7 @@ async fn import_dump_v3_movie_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -1092,8 +1085,7 @@ async fn import_dump_v3_movie_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -1219,7 +1211,7 @@ async fn import_dump_v3_rubygems_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -1237,8 +1229,7 @@ async fn import_dump_v3_rubygems_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -1353,7 +1344,7 @@ async fn import_dump_v4_movie_raw() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -1371,8 +1362,7 @@ async fn import_dump_v4_movie_raw() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -1499,7 +1489,7 @@ async fn import_dump_v4_movie_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -1517,8 +1507,7 @@ async fn import_dump_v4_movie_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -1644,7 +1633,7 @@ async fn import_dump_v4_rubygems_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -1662,8 +1651,7 @@ async fn import_dump_v4_rubygems_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -1907,8 +1895,7 @@ async fn import_dump_v6_containing_experimental_features() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###);

--- a/meilisearch/tests/search/facet_search.rs
+++ b/meilisearch/tests/search/facet_search.rs
@@ -105,6 +105,24 @@ async fn more_advanced_facet_search() {
    snapshot!(response["facetHits"].as_array().unwrap().len(), @"1");
 }

+#[actix_rt::test]
+async fn simple_facet_search_with_max_values() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = DOCUMENTS.clone();
+    index.update_settings_faceting(json!({ "maxValuesPerFacet": 1 })).await;
+    index.update_settings_filterable_attributes(json!(["genres"])).await;
+    index.add_documents(documents, None).await;
+    index.wait_task(2).await;
+
+    let (response, code) =
+        index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
+
+    assert_eq!(code, 200, "{}", response);
+    assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 1);
+}
+
 #[actix_rt::test]
 async fn non_filterable_facet_search_error() {
    let server = Server::new().await;
--- a/meilisearch/tests/search/hybrid.rs
+++ b/meilisearch/tests/search/hybrid.rs
@@ -21,9 +21,9 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Inde
    "###);

    let (response, code) = index
-        .update_settings(
-            json!({ "embedders": {"default": {"source": {"userProvided": {"dimensions": 2}}}} }),
-        )
+        .update_settings(json!({ "embedders": {"default": {
+                "source": "userProvided",
+                "dimensions": 2}}} ))
        .await;
    assert_eq!(202, code, "{:?}", response);
    index.wait_task(response.uid()).await;
@@ -56,6 +56,15 @@ static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
    }])
 });

+static SINGLE_DOCUMENT: Lazy<Value> = Lazy::new(|| {
+    json!([{
+            "title": "Shazam!",
+            "desc": "a Captain Marvel ersatz",
+            "id": "1",
+            "_vectors": {"default": [1.0, 3.0]},
+    }])
+});
+
 #[actix_rt::test]
 async fn simple_search() {
    let server = Server::new().await;
@@ -149,3 +158,18 @@ async fn invalid_semantic_ratio() {
    }
    "###);
 }
+
+#[actix_rt::test]
+async fn single_document() {
+    let server = Server::new().await;
+    let index = index_with_documents(&server, &SINGLE_DOCUMENT).await;
+
+    let (response, code) = index
+    .search_post(
+        json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}),
+    )
+    .await;
+
+    snapshot!(code, @"200 OK");
+    snapshot!(response["hits"][0], @r###"{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0,"_semanticScore":1.0}"###);
+}
--- a/meilisearch/tests/search/mod.rs
+++ b/meilisearch/tests/search/mod.rs
@@ -890,13 +890,21 @@ async fn experimental_feature_vector_store() {
    let (response, code) = index
        .update_settings(json!({"embedders": {
            "manual": {
-                "source": {
-                    "userProvided": {"dimensions": 3}
-                }
+                "source": "userProvided",
+                "dimensions": 3,
            }
        }}))
        .await;

+    meili_snap::snapshot!(response, @r###"
+    {
+      "taskUid": 1,
+      "indexUid": "test",
+      "status": "enqueued",
+      "type": "settingsUpdate",
+      "enqueuedAt": "[date]"
+    }
+    "###);
    meili_snap::snapshot!(code, @"202 Accepted");
    let response = index.wait_task(response.uid()).await;

--- a/meilisearch/tests/settings/get_settings.rs
+++ b/meilisearch/tests/settings/get_settings.rs
@@ -54,7 +54,7 @@ async fn get_settings() {
    let (response, code) = index.settings().await;
    assert_eq!(code, 200);
    let settings = response.as_object().unwrap();
-    assert_eq!(settings.keys().len(), 16);
+    assert_eq!(settings.keys().len(), 15);
    assert_eq!(settings["displayedAttributes"], json!(["*"]));
    assert_eq!(settings["searchableAttributes"], json!(["*"]));
    assert_eq!(settings["filterableAttributes"], json!([]));
@@ -83,7 +83,7 @@ async fn get_settings() {
            "maxTotalHits": 1000,
        })
    );
-    assert_eq!(settings["embedders"], json!({}));
+    assert_eq!(settings["proximityPrecision"], json!("byWord"));
 }

 #[actix_rt::test]
--- a/meilisearch/tests/tasks/mod.rs
+++ b/meilisearch/tests/tasks/mod.rs
@@ -1,4 +1,5 @@
 mod errors;
+mod webhook;

 use meili_snap::insta::assert_json_snapshot;
 use time::format_description::well_known::Rfc3339;
--- a/meilisearch/tests/tasks/webhook.rs
+++ b/meilisearch/tests/tasks/webhook.rs
@@ -0,0 +1,123 @@
+//! To test the webhook, we need to spawn a new server with a URL listening for
+//! post requests. The webhook handle starts a server and forwards all the
+//! received requests into a channel for you to handle.
+
+use std::sync::Arc;
+
+use actix_http::body::MessageBody;
+use actix_web::dev::{ServiceFactory, ServiceResponse};
+use actix_web::web::{Bytes, Data};
+use actix_web::{post, App, HttpResponse, HttpServer};
+use meili_snap::{json_string, snapshot};
+use meilisearch::Opt;
+use tokio::sync::mpsc;
+use url::Url;
+
+use crate::common::{default_settings, Server};
+use crate::json;
+
+#[post("/")]
+async fn forward_body(sender: Data<mpsc::UnboundedSender<Vec<u8>>>, body: Bytes) -> HttpResponse {
+    let body = body.to_vec();
+    sender.send(body).unwrap();
+    HttpResponse::Ok().into()
+}
+
+fn create_app(
+    sender: Arc<mpsc::UnboundedSender<Vec<u8>>>,
+) -> actix_web::App<
+    impl ServiceFactory<
+        actix_web::dev::ServiceRequest,
+        Config = (),
+        Response = ServiceResponse<impl MessageBody>,
+        Error = actix_web::Error,
+        InitError = (),
+    >,
+> {
+    App::new().service(forward_body).app_data(Data::from(sender))
+}
+
+struct WebhookHandle {
+    pub server_handle: tokio::task::JoinHandle<Result<(), std::io::Error>>,
+    pub url: String,
+    pub receiver: mpsc::UnboundedReceiver<Vec<u8>>,
+}
+
+async fn create_webhook_server() -> WebhookHandle {
+    let mut log_builder = env_logger::Builder::new();
+    log_builder.parse_filters("info");
+    log_builder.init();
+
+    let (sender, receiver) = mpsc::unbounded_channel();
+    let sender = Arc::new(sender);
+
+    // By listening on the port 0, the system will give us any available port.
+    let server =
+        HttpServer::new(move || create_app(sender.clone())).bind(("127.0.0.1", 0)).unwrap();
+    let (ip, scheme) = server.addrs_with_scheme()[0];
+    let url = format!("{scheme}://{ip}/");
+
+    let server_handle = tokio::spawn(server.run());
+    WebhookHandle { server_handle, url, receiver }
+}
+
+#[actix_web::test]
+async fn test_basic_webhook() {
+    let WebhookHandle { server_handle, url, mut receiver } = create_webhook_server().await;
+
+    let db_path = tempfile::tempdir().unwrap();
+    let server = Server::new_with_options(Opt {
+        task_webhook_url: Some(Url::parse(&url).unwrap()),
+        ..default_settings(db_path.path())
+    })
+    .await
+    .unwrap();
+
+    let index = server.index("tamo");
+    // May be flaky: we're relying on the fact that while the first document addition is processed, the other
+    // operations will be received and will be batched together. If it doesn't happen it's not a problem
+    // the rest of the test won't assume anything about the number of tasks per batch.
+    for i in 0..5 {
+        let (_, _status) = index.add_documents(json!({ "id": i, "doggo": "bone" }), None).await;
+    }
+
+    let mut nb_tasks = 0;
+    while let Some(payload) = receiver.recv().await {
+        let payload = String::from_utf8(payload).unwrap();
+        let jsonl = payload.split('\n');
+        for json in jsonl {
+            if json.is_empty() {
+                break; // we reached EOF
+            }
+            nb_tasks += 1;
+            let json: serde_json::Value = serde_json::from_str(json).unwrap();
+            snapshot!(
+                json_string!(json, { ".uid" => "[uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
+            @r###"
+            {
+              "uid": "[uid]",
+              "indexUid": "tamo",
+              "status": "succeeded",
+              "type": "documentAdditionOrUpdate",
+              "canceledBy": null,
+              "details": {
+                "receivedDocuments": 1,
+                "indexedDocuments": 1
+              },
+              "error": null,
+              "duration": "[duration]",
+              "enqueuedAt": "[date]",
+              "startedAt": "[date]",
+              "finishedAt": "[date]"
+            }
+            "###);
+        }
+        if nb_tasks == 5 {
+            break;
+        }
+    }
+
+    assert!(nb_tasks == 5, "We should have received the 5 tasks but only received {nb_tasks}");
+
+    server_handle.abort();
+}
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -192,7 +192,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
    MissingDocumentField(#[from] crate::prompt::error::RenderPromptError),
    #[error(transparent)]
    InvalidPrompt(#[from] crate::prompt::error::NewPromptError),
-    #[error("Invalid prompt in for embeddings with name '{0}': {1}.")]
+    #[error("`.embedders.{0}.documentTemplate`: Invalid template: {1}.")]
    InvalidPromptForEmbeddings(String, crate::prompt::error::NewPromptError),
    #[error("Too many embedders in the configuration. Found {0}, but limited to 256.")]
    TooManyEmbedders(usize),
@@ -200,6 +200,33 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
    InvalidEmbedder(String),
    #[error("Too many vectors for document with id {0}: found {1}, but limited to 256.")]
    TooManyVectors(String, usize),
+    #[error("`.embedders.{embedder_name}`: Field `{field}` unavailable for source `{source_}` (only available for sources: {}). Available fields: {}",
+        allowed_sources_for_field
+         .iter()
+         .map(|accepted| format!("`{}`", accepted))
+         .collect::<Vec<String>>()
+         .join(", "),
+        allowed_fields_for_source
+         .iter()
+         .map(|accepted| format!("`{}`", accepted))
+         .collect::<Vec<String>>()
+         .join(", ")
+    )]
+    InvalidFieldForSource {
+        embedder_name: String,
+        source_: crate::vector::settings::EmbedderSource,
+        field: &'static str,
+        allowed_fields_for_source: &'static [&'static str],
+        allowed_sources_for_field: &'static [crate::vector::settings::EmbedderSource],
+    },
+    #[error("`.embedders.{embedder_name}.model`: Invalid model `{model}` for OpenAI. Supported models: {:?}", crate::vector::openai::EmbeddingModel::supported_models())]
+    InvalidOpenAiModel { embedder_name: String, model: String },
+    #[error("`.embedders.{embedder_name}`: Missing field `{field}` (note: this field is mandatory for source {source_})")]
+    MissingFieldForSource {
+        field: &'static str,
+        source_: crate::vector::settings::EmbedderSource,
+        embedder_name: String,
+    },
 }

 impl From<crate::vector::Error> for Error {
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -27,8 +27,8 @@ static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
 static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
 static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));

-/// The maximum number of facets returned by the facet search route.
-const MAX_NUMBER_OF_FACETS: usize = 100;
+/// The maximum number of values per facet returned by the facet search route.
+const DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET: usize = 100;

 pub mod facet;
 mod fst_utils;
@@ -306,6 +306,7 @@ pub struct SearchForFacetValues<'a> {
    query: Option<String>,
    facet: String,
    search_query: Search<'a>,
+    max_values: usize,
    is_hybrid: bool,
 }

@@ -315,7 +316,13 @@ impl<'a> SearchForFacetValues<'a> {
        search_query: Search<'a>,
        is_hybrid: bool,
    ) -> SearchForFacetValues<'a> {
-        SearchForFacetValues { query: None, facet, search_query, is_hybrid }
+        SearchForFacetValues {
+            query: None,
+            facet,
+            search_query,
+            max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET,
+            is_hybrid,
+        }
    }

    pub fn query(&mut self, query: impl Into<String>) -> &mut Self {
@@ -323,6 +330,11 @@ impl<'a> SearchForFacetValues<'a> {
        self
    }

+    pub fn max_values(&mut self, max: usize) -> &mut Self {
+        self.max_values = max;
+        self
+    }
+
    fn one_original_value_of(
        &self,
        field_id: FieldId,
@@ -462,7 +474,7 @@ impl<'a> SearchForFacetValues<'a> {
                            .unwrap_or_else(|| left_bound.to_string());
                        results.push(FacetValueHit { value, count });
                    }
-                    if results.len() >= MAX_NUMBER_OF_FACETS {
+                    if results.len() >= self.max_values {
                        break;
                    }
                }
@@ -507,7 +519,7 @@ impl<'a> SearchForFacetValues<'a> {
                    .unwrap_or_else(|| query.to_string());
                results.push(FacetValueHit { value, count });
            }
-            if results.len() >= MAX_NUMBER_OF_FACETS {
+            if results.len() >= self.max_values {
                return Ok(ControlFlow::Break(()));
            }
        }
--- a/milli/src/search/new/bucket_sort.rs
+++ b/milli/src/search/new/bucket_sort.rs
@@ -15,6 +15,7 @@ pub struct BucketSortOutput {

 // TODO: would probably be good to regroup some of these inside of a struct?
 #[allow(clippy::too_many_arguments)]
+#[logging_timer::time]
 pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
    ctx: &mut SearchContext<'ctx>,
    mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>,
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -72,7 +72,7 @@ impl<'m> MatcherBuilder<'m> {
    }
 }

-#[derive(Copy, Clone, Default)]
+#[derive(Copy, Clone, Default, Debug)]
 pub struct FormatOptions {
    pub highlight: bool,
    pub crop: Option<usize>,
@@ -82,6 +82,10 @@ impl FormatOptions {
    pub fn merge(self, other: Self) -> Self {
        Self { highlight: self.highlight || other.highlight, crop: self.crop.or(other.crop) }
    }
+
+    pub fn should_format(&self) -> bool {
+        self.highlight || self.crop.is_some()
+    }
 }

 #[derive(Clone, Debug)]
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@@ -191,6 +191,7 @@ fn resolve_maximally_reduced_query_graph(
    Ok(docids)
 }

+#[logging_timer::time]
 fn resolve_universe(
    ctx: &mut SearchContext,
    initial_universe: &RoaringBitmap,
@@ -556,6 +557,7 @@ pub fn execute_vector_search(
 }

 #[allow(clippy::too_many_arguments)]
+#[logging_timer::time]
 pub fn execute_search(
    ctx: &mut SearchContext,
    query: Option<&str>,
--- a/milli/src/search/new/query_term/parse_query.rs
+++ b/milli/src/search/new/query_term/parse_query.rs
@@ -5,6 +5,7 @@ use super::*;
 use crate::{Result, SearchContext, MAX_WORD_LENGTH};

 /// Convert the tokenised search query into a list of located query terms.
+#[logging_timer::time]
 pub fn located_query_terms_from_tokens(
    ctx: &mut SearchContext,
    query: NormalizedTokenIter,
--- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@@ -26,7 +26,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
    obkv_documents: grenad::Reader<R>,
    indexer: GrenadParameters,
    searchable_fields: &Option<HashSet<FieldId>>,
-    stop_words: Option<&fst::Set<&[u8]>>,
+    stop_words: Option<&fst::Set<Vec<u8>>>,
    allowed_separators: Option<&[&str]>,
    dictionary: Option<&[&str]>,
    max_positions_per_attributes: Option<u32>,
@@ -181,11 +181,11 @@ fn searchable_fields_changed(

 /// Factorize tokenizer building.
 fn tokenizer_builder<'a>(
-    stop_words: Option<&'a fst::Set<&[u8]>>,
+    stop_words: Option<&'a fst::Set<Vec<u8>>>,
    allowed_separators: Option<&'a [&str]>,
    dictionary: Option<&'a [&str]>,
    script_language: Option<&'a HashMap<Script, Vec<Language>>>,
-) -> TokenizerBuilder<'a, &'a [u8]> {
+) -> TokenizerBuilder<'a, Vec<u8>> {
    let mut tokenizer_builder = TokenizerBuilder::new();
    if let Some(stop_words) = stop_words {
        tokenizer_builder.stop_words(stop_words);
@@ -211,7 +211,7 @@ fn lang_safe_tokens_from_document<'a>(
    obkv: &KvReader<FieldId>,
    searchable_fields: &Option<HashSet<FieldId>>,
    tokenizer: &Tokenizer,
-    stop_words: Option<&fst::Set<&[u8]>>,
+    stop_words: Option<&fst::Set<Vec<u8>>>,
    allowed_separators: Option<&[&str]>,
    dictionary: Option<&[&str]>,
    max_positions_per_attributes: u32,
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@@ -14,7 +14,6 @@ use std::fs::File;
 use std::io::BufReader;

 use crossbeam_channel::Sender;
-use log::debug;
 use rayon::prelude::*;

 use self::extract_docid_word_positions::extract_docid_word_positions;
@@ -29,10 +28,7 @@ use self::extract_vector_points::{
 use self::extract_word_docids::extract_word_docids;
 use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids;
 use self::extract_word_position_docids::extract_word_position_docids;
-use super::helpers::{
-    as_cloneable_grenad, merge_deladd_cbo_roaring_bitmaps, CursorClonableMmap, GrenadParameters,
-    MergeFn, MergeableReader,
-};
+use super::helpers::{as_cloneable_grenad, CursorClonableMmap, GrenadParameters};
 use super::{helpers, TypedChunk};
 use crate::proximity::ProximityPrecision;
 use crate::vector::EmbeddingConfigs;
@@ -51,7 +47,7 @@ pub(crate) fn data_from_obkv_documents(
    primary_key_id: FieldId,
    geo_fields_ids: Option<(FieldId, FieldId)>,
    field_id_map: FieldsIdsMap,
-    stop_words: Option<fst::Set<&[u8]>>,
+    stop_words: Option<fst::Set<Vec<u8>>>,
    allowed_separators: Option<&[&str]>,
    dictionary: Option<&[&str]>,
    max_positions_per_attributes: Option<u32>,
@@ -61,218 +57,170 @@ pub(crate) fn data_from_obkv_documents(
 ) -> Result<()> {
    puffin::profile_function!();

-    original_obkv_chunks
-        .par_bridge()
-        .map(|original_documents_chunk| {
-            send_original_documents_data(
-                original_documents_chunk,
-                indexer,
-                lmdb_writer_sx.clone(),
-                field_id_map.clone(),
-                embedders.clone(),
-            )
-        })
-        .collect::<Result<()>>()?;
-
-    #[allow(clippy::type_complexity)]
-    let result: Result<(Vec<_>, (Vec<_>, (Vec<_>, (Vec<_>, (Vec<_>, Vec<_>)))))> =
-        flattened_obkv_chunks
-            .par_bridge()
-            .map(|flattened_obkv_chunks| {
-                send_and_extract_flattened_documents_data(
-                    flattened_obkv_chunks,
-                    indexer,
-                    lmdb_writer_sx.clone(),
-                    &searchable_fields,
-                    &faceted_fields,
-                    primary_key_id,
-                    geo_fields_ids,
-                    &stop_words,
-                    &allowed_separators,
-                    &dictionary,
-                    max_positions_per_attributes,
-                )
-            })
-            .collect();
-
-    let (
-        docid_word_positions_chunks,
-        (
-            fid_docid_facet_numbers_chunks,
-            (
-                fid_docid_facet_strings_chunks,
-                (
-                    facet_is_null_docids_chunks,
-                    (facet_is_empty_docids_chunks, facet_exists_docids_chunks),
-                ),
-            ),
-        ),
-    ) = result?;
-
-    // merge facet_exists_docids and send them as a typed chunk
-    {
-        let lmdb_writer_sx = lmdb_writer_sx.clone();
-        rayon::spawn(move || {
-            debug!("merge {} database", "facet-id-exists-docids");
-            match facet_exists_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
-                Ok(reader) => {
-                    let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetExistsDocids(reader)));
-                }
-                Err(e) => {
-                    let _ = lmdb_writer_sx.send(Err(e));
-                }
-            }
-        });
-    }
-
-    // merge facet_is_null_docids and send them as a typed chunk
-    {
-        let lmdb_writer_sx = lmdb_writer_sx.clone();
-        rayon::spawn(move || {
-            debug!("merge {} database", "facet-id-is-null-docids");
-            match facet_is_null_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
-                Ok(reader) => {
-                    let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsNullDocids(reader)));
-                }
-                Err(e) => {
-                    let _ = lmdb_writer_sx.send(Err(e));
-                }
-            }
-        });
-    }
-
-    // merge facet_is_empty_docids and send them as a typed chunk
-    {
-        let lmdb_writer_sx = lmdb_writer_sx.clone();
-        rayon::spawn(move || {
-            debug!("merge {} database", "facet-id-is-empty-docids");
-            match facet_is_empty_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
-                Ok(reader) => {
-                    let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsEmptyDocids(reader)));
-                }
-                Err(e) => {
-                    let _ = lmdb_writer_sx.send(Err(e));
-                }
-            }
-        });
-    }
-
-    if proximity_precision == ProximityPrecision::ByWord {
-        spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
-            docid_word_positions_chunks.clone(),
-            indexer,
-            lmdb_writer_sx.clone(),
-            extract_word_pair_proximity_docids,
-            merge_deladd_cbo_roaring_bitmaps,
-            TypedChunk::WordPairProximityDocids,
-            "word-pair-proximity-docids",
-        );
-    }
-
-    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
-        docid_word_positions_chunks.clone(),
-        indexer,
-        lmdb_writer_sx.clone(),
-        extract_fid_word_count_docids,
-        merge_deladd_cbo_roaring_bitmaps,
-        TypedChunk::FieldIdWordCountDocids,
-        "field-id-wordcount-docids",
-    );
-
-    spawn_extraction_task::<
-        _,
-        _,
-        Vec<(
-            grenad::Reader<BufReader<File>>,
-            grenad::Reader<BufReader<File>>,
-            grenad::Reader<BufReader<File>>,
-        )>,
-    >(
-        docid_word_positions_chunks.clone(),
-        indexer,
-        lmdb_writer_sx.clone(),
-        move |doc_word_pos, indexer| extract_word_docids(doc_word_pos, indexer, &exact_attributes),
-        merge_deladd_cbo_roaring_bitmaps,
-        |(word_docids_reader, exact_word_docids_reader, word_fid_docids_reader)| {
-            TypedChunk::WordDocids {
-                word_docids_reader,
-                exact_word_docids_reader,
-                word_fid_docids_reader,
-            }
+    let (original_pipeline_result, flattened_pipeline_result): (Result<_>, Result<_>) = rayon::join(
+        || {
+            original_obkv_chunks
+                .par_bridge()
+                .map(|original_documents_chunk| {
+                    send_original_documents_data(
+                        original_documents_chunk,
+                        indexer,
+                        lmdb_writer_sx.clone(),
+                        field_id_map.clone(),
+                        embedders.clone(),
+                    )
+                })
+                .collect::<Result<()>>()
+        },
+        || {
+            flattened_obkv_chunks
+                .par_bridge()
+                .map(|flattened_obkv_chunks| {
+                    send_and_extract_flattened_documents_data(
+                        flattened_obkv_chunks,
+                        indexer,
+                        lmdb_writer_sx.clone(),
+                        &searchable_fields,
+                        &faceted_fields,
+                        primary_key_id,
+                        geo_fields_ids,
+                        &stop_words,
+                        &allowed_separators,
+                        &dictionary,
+                        max_positions_per_attributes,
+                    )
+                })
+                .inspect(|result| {
+                    if proximity_precision == ProximityPrecision::ByWord {
+                        if let Ok((docid_word_positions_chunk, _)) = result {
+                            run_extraction_task::<_, _, grenad::Reader<BufReader<File>>>(
+                                docid_word_positions_chunk.clone(),
+                                indexer,
+                                lmdb_writer_sx.clone(),
+                                extract_word_pair_proximity_docids,
+                                TypedChunk::WordPairProximityDocids,
+                                "word-pair-proximity-docids",
+                            );
+                        }
+                    }
+                })
+                .inspect(|result| {
+                    if let Ok((docid_word_positions_chunk, _)) = result {
+                        run_extraction_task::<_, _, grenad::Reader<BufReader<File>>>(
+                            docid_word_positions_chunk.clone(),
+                            indexer,
+                            lmdb_writer_sx.clone(),
+                            extract_fid_word_count_docids,
+                            TypedChunk::FieldIdWordCountDocids,
+                            "field-id-wordcount-docids",
+                        );
+                    }
+                })
+                .inspect(|result| {
+                    if let Ok((docid_word_positions_chunk, _)) = result {
+                        let exact_attributes = exact_attributes.clone();
+                        run_extraction_task::<
+                            _,
+                            _,
+                            (
+                                grenad::Reader<BufReader<File>>,
+                                grenad::Reader<BufReader<File>>,
+                                grenad::Reader<BufReader<File>>,
+                            ),
+                        >(
+                            docid_word_positions_chunk.clone(),
+                            indexer,
+                            lmdb_writer_sx.clone(),
+                            move |doc_word_pos, indexer| {
+                                extract_word_docids(doc_word_pos, indexer, &exact_attributes)
+                            },
+                            |(
+                                word_docids_reader,
+                                exact_word_docids_reader,
+                                word_fid_docids_reader,
+                            )| {
+                                TypedChunk::WordDocids {
+                                    word_docids_reader,
+                                    exact_word_docids_reader,
+                                    word_fid_docids_reader,
+                                }
+                            },
+                            "word-docids",
+                        );
+                    }
+                })
+                .inspect(|result| {
+                    if let Ok((docid_word_positions_chunk, _)) = result {
+                        run_extraction_task::<_, _, grenad::Reader<BufReader<File>>>(
+                            docid_word_positions_chunk.clone(),
+                            indexer,
+                            lmdb_writer_sx.clone(),
+                            extract_word_position_docids,
+                            TypedChunk::WordPositionDocids,
+                            "word-position-docids",
+                        );
+                    }
+                })
+                .inspect(|result| {
+                    if let Ok((_, (_, fid_docid_facet_strings_chunk))) = result {
+                        run_extraction_task::<_, _, grenad::Reader<BufReader<File>>>(
+                            fid_docid_facet_strings_chunk.clone(),
+                            indexer,
+                            lmdb_writer_sx.clone(),
+                            extract_facet_string_docids,
+                            TypedChunk::FieldIdFacetStringDocids,
+                            "field-id-facet-string-docids",
+                        );
+                    }
+                })
+                .inspect(|result| {
+                    if let Ok((_, (fid_docid_facet_numbers_chunk, _))) = result {
+                        run_extraction_task::<_, _, grenad::Reader<BufReader<File>>>(
+                            fid_docid_facet_numbers_chunk.clone(),
+                            indexer,
+                            lmdb_writer_sx.clone(),
+                            extract_facet_number_docids,
+                            TypedChunk::FieldIdFacetNumberDocids,
+                            "field-id-facet-number-docids",
+                        );
+                    }
+                })
+                .map(|r| r.map(|_| ()))
+                .collect::<Result<()>>()
        },
-        "word-docids",
    );

-    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
-        docid_word_positions_chunks.clone(),
-        indexer,
-        lmdb_writer_sx.clone(),
-        extract_word_position_docids,
-        merge_deladd_cbo_roaring_bitmaps,
-        TypedChunk::WordPositionDocids,
-        "word-position-docids",
-    );
-
-    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
-        fid_docid_facet_strings_chunks,
-        indexer,
-        lmdb_writer_sx.clone(),
-        extract_facet_string_docids,
-        merge_deladd_cbo_roaring_bitmaps,
-        TypedChunk::FieldIdFacetStringDocids,
-        "field-id-facet-string-docids",
-    );
-
-    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
-        fid_docid_facet_numbers_chunks,
-        indexer,
-        lmdb_writer_sx,
-        extract_facet_number_docids,
-        merge_deladd_cbo_roaring_bitmaps,
-        TypedChunk::FieldIdFacetNumberDocids,
-        "field-id-facet-number-docids",
-    );
-
-    Ok(())
+    original_pipeline_result.and(flattened_pipeline_result)
 }

 /// Spawn a new task to extract data for a specific DB using extract_fn.
 /// Generated grenad chunks are merged using the merge_fn.
 /// The result of merged chunks is serialized as TypedChunk using the serialize_fn
 /// and sent into lmdb_writer_sx.
-fn spawn_extraction_task<FE, FS, M>(
-    chunks: Vec<grenad::Reader<CursorClonableMmap>>,
+fn run_extraction_task<FE, FS, M>(
+    chunk: grenad::Reader<CursorClonableMmap>,
    indexer: GrenadParameters,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
    extract_fn: FE,
-    merge_fn: MergeFn,
    serialize_fn: FS,
    name: &'static str,
 ) where
-    FE: Fn(grenad::Reader<CursorClonableMmap>, GrenadParameters) -> Result<M::Output>
+    FE: Fn(grenad::Reader<CursorClonableMmap>, GrenadParameters) -> Result<M>
        + Sync
        + Send
        + 'static,
-    FS: Fn(M::Output) -> TypedChunk + Sync + Send + 'static,
-    M: MergeableReader + FromParallelIterator<M::Output> + Send + 'static,
-    M::Output: Send,
+    FS: Fn(M) -> TypedChunk + Sync + Send + 'static,
+    M: Send,
 {
-    rayon::spawn(move || {
-        puffin::profile_scope!("extract_multiple_chunks", name);
-        let chunks: Result<M> =
-            chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer)).collect();
-        rayon::spawn(move || match chunks {
-            Ok(chunks) => {
-                debug!("merge {} database", name);
-                puffin::profile_scope!("merge_multiple_chunks", name);
-                let reader = chunks.merge(merge_fn, &indexer);
-                let _ = lmdb_writer_sx.send(reader.map(serialize_fn));
-            }
-            Err(e) => {
-                let _ = lmdb_writer_sx.send(Err(e));
-            }
-        })
-    });
+    puffin::profile_scope!("extract_chunk", name);
+    match extract_fn(chunk, indexer) {
+        Ok(chunk) => {
+            let _ = lmdb_writer_sx.send(Ok(serialize_fn(chunk)));
+        }
+        Err(e) => {
+            let _ = lmdb_writer_sx.send(Err(e));
+        }
+    }
 }

 /// Extract chunked data and send it into lmdb_writer_sx sender:
@@ -350,22 +298,13 @@ fn send_and_extract_flattened_documents_data(
    faceted_fields: &HashSet<FieldId>,
    primary_key_id: FieldId,
    geo_fields_ids: Option<(FieldId, FieldId)>,
-    stop_words: &Option<fst::Set<&[u8]>>,
+    stop_words: &Option<fst::Set<Vec<u8>>>,
    allowed_separators: &Option<&[&str]>,
    dictionary: &Option<&[&str]>,
    max_positions_per_attributes: Option<u32>,
 ) -> Result<(
    grenad::Reader<CursorClonableMmap>,
-    (
-        grenad::Reader<CursorClonableMmap>,
-        (
-            grenad::Reader<CursorClonableMmap>,
-            (
-                grenad::Reader<BufReader<File>>,
-                (grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>),
-            ),
-        ),
-    ),
+    (grenad::Reader<CursorClonableMmap>, grenad::Reader<CursorClonableMmap>),
 )> {
    let flattened_documents_chunk =
        flattened_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
@@ -436,16 +375,17 @@ fn send_and_extract_flattened_documents_data(
                    fid_docid_facet_strings_chunk.clone(),
                )));

-                Ok((
-                    fid_docid_facet_numbers_chunk,
-                    (
-                        fid_docid_facet_strings_chunk,
-                        (
-                            fid_facet_is_null_docids_chunk,
-                            (fid_facet_is_empty_docids_chunk, fid_facet_exists_docids_chunk),
-                        ),
-                    ),
-                ))
+                let _ = lmdb_writer_sx
+                    .send(Ok(TypedChunk::FieldIdFacetIsNullDocids(fid_facet_is_null_docids_chunk)));
+
+                let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsEmptyDocids(
+                    fid_facet_is_empty_docids_chunk,
+                )));
+
+                let _ = lmdb_writer_sx
+                    .send(Ok(TypedChunk::FieldIdFacetExistsDocids(fid_facet_exists_docids_chunk)));
+
+                Ok((fid_docid_facet_numbers_chunk, fid_docid_facet_strings_chunk))
            },
        );

--- a/milli/src/update/index_documents/helpers/grenad_helpers.rs
+++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs
@@ -82,90 +82,6 @@ pub unsafe fn as_cloneable_grenad(
    Ok(reader)
 }

-pub trait MergeableReader
-where
-    Self: Sized,
-{
-    type Output;
-
-    fn merge(self, merge_fn: MergeFn, indexer: &GrenadParameters) -> Result<Self::Output>;
-}
-
-impl MergeableReader for Vec<grenad::Reader<BufReader<File>>> {
-    type Output = grenad::Reader<BufReader<File>>;
-
-    fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
-        let mut merger = MergerBuilder::new(merge_fn);
-        self.into_iter().try_for_each(|r| merger.push(r))?;
-        merger.finish(params)
-    }
-}
-
-impl MergeableReader for Vec<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
-    type Output = (grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>);
-
-    fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
-        let mut m1 = MergerBuilder::new(merge_fn);
-        let mut m2 = MergerBuilder::new(merge_fn);
-        for (r1, r2) in self.into_iter() {
-            m1.push(r1)?;
-            m2.push(r2)?;
-        }
-        Ok((m1.finish(params)?, m2.finish(params)?))
-    }
-}
-
-impl MergeableReader
-    for Vec<(
-        grenad::Reader<BufReader<File>>,
-        grenad::Reader<BufReader<File>>,
-        grenad::Reader<BufReader<File>>,
-    )>
-{
-    type Output = (
-        grenad::Reader<BufReader<File>>,
-        grenad::Reader<BufReader<File>>,
-        grenad::Reader<BufReader<File>>,
-    );
-
-    fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
-        let mut m1 = MergerBuilder::new(merge_fn);
-        let mut m2 = MergerBuilder::new(merge_fn);
-        let mut m3 = MergerBuilder::new(merge_fn);
-        for (r1, r2, r3) in self.into_iter() {
-            m1.push(r1)?;
-            m2.push(r2)?;
-            m3.push(r3)?;
-        }
-        Ok((m1.finish(params)?, m2.finish(params)?, m3.finish(params)?))
-    }
-}
-
-struct MergerBuilder<R>(grenad::MergerBuilder<R, MergeFn>);
-
-impl<R: io::Read + io::Seek> MergerBuilder<R> {
-    fn new(merge_fn: MergeFn) -> Self {
-        Self(grenad::MergerBuilder::new(merge_fn))
-    }
-
-    fn push(&mut self, reader: grenad::Reader<R>) -> Result<()> {
-        self.0.push(reader.into_cursor()?);
-        Ok(())
-    }
-
-    fn finish(self, params: &GrenadParameters) -> Result<grenad::Reader<BufReader<File>>> {
-        let merger = self.0.build();
-        let mut writer = create_writer(
-            params.chunk_compression_type,
-            params.chunk_compression_level,
-            tempfile::tempfile()?,
-        );
-        merger.write_into_stream_writer(&mut writer)?;
-
-        writer_into_reader(writer)
-    }
-}
-
 #[derive(Debug, Clone, Copy)]
 pub struct GrenadParameters {
    pub chunk_compression_type: CompressionType,
--- a/milli/src/update/index_documents/helpers/mod.rs
+++ b/milli/src/update/index_documents/helpers/mod.rs
@@ -10,13 +10,13 @@ use fst::{IntoStreamer, Streamer};
 pub use grenad_helpers::{
    as_cloneable_grenad, create_sorter, create_writer, grenad_obkv_into_chunks,
    merge_ignore_values, sorter_into_reader, write_sorter_into_database, writer_into_reader,
-    GrenadParameters, MergeableReader,
+    GrenadParameters,
 };
 pub use merge_functions::{
    keep_first, keep_latest_obkv, merge_btreeset_string, merge_cbo_roaring_bitmaps,
    merge_deladd_cbo_roaring_bitmaps, merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
    merge_roaring_bitmaps, obkvs_keep_last_addition_merge_deletions,
-    obkvs_merge_additions_and_deletions, serialize_roaring_bitmap, MergeFn,
+    obkvs_merge_additions_and_deletions, MergeFn,
 };

 use crate::MAX_WORD_LENGTH;
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -5,12 +5,13 @@ mod transform;
 mod typed_chunk;

 use std::collections::{HashMap, HashSet};
-use std::io::{Cursor, Read, Seek};
+use std::io::{Read, Seek};
 use std::iter::FromIterator;
 use std::num::NonZeroU32;
 use std::result::Result as StdResult;

 use crossbeam_channel::{Receiver, Sender};
+use grenad::{Merger, MergerBuilder};
 use heed::types::Str;
 use heed::Database;
 use log::debug;
@@ -21,7 +22,7 @@ use slice_group_by::GroupBy;
 use typed_chunk::{write_typed_chunk_into_index, TypedChunk};

 use self::enrich::enrich_documents_batch;
-pub use self::enrich::{extract_finite_float_from_value, validate_geo_from_json, DocumentId};
+pub use self::enrich::{extract_finite_float_from_value, DocumentId};
 pub use self::helpers::{
    as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
    fst_stream_into_vec, merge_btreeset_string, merge_cbo_roaring_bitmaps,
@@ -313,9 +314,6 @@ where
            }
        };

-        let original_documents = grenad::Reader::new(original_documents)?;
-        let flattened_documents = grenad::Reader::new(flattened_documents)?;
-
        // create LMDB writer channel
        let (lmdb_writer_sx, lmdb_writer_rx): (
            Sender<Result<TypedChunk>>,
@@ -354,11 +352,7 @@ where

        let stop_words = self.index.stop_words(self.wtxn)?;
        let separators = self.index.allowed_separators(self.wtxn)?;
-        let separators: Option<Vec<_>> =
-            separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
        let dictionary = self.index.dictionary(self.wtxn)?;
-        let dictionary: Option<Vec<_>> =
-            dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
        let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?;
        let proximity_precision = self.index.proximity_precision(self.wtxn)?.unwrap_or_default();

@@ -368,55 +362,77 @@ where
            max_memory: self.indexer_config.max_memory,
            max_nb_chunks: self.indexer_config.max_nb_chunks, // default value, may be chosen.
        };
-        let documents_chunk_size =
-            self.indexer_config.documents_chunk_size.unwrap_or(1024 * 1024 * 4); // 4MiB
+        let documents_chunk_size = match self.indexer_config.documents_chunk_size {
+            Some(chunk_size) => chunk_size,
+            None => {
+                let default_chunk_size = 1024 * 1024 * 4; // 4MiB
+                let min_chunk_size = 1024 * 512; // 512KiB
+
+                // compute the chunk size from the number of available threads and the inputed data size.
+                let total_size = flattened_documents.metadata().map(|m| m.len());
+                let current_num_threads = pool.current_num_threads();
+                total_size
+                    .map_or(default_chunk_size, |size| (size as usize) / current_num_threads)
+                    .max(min_chunk_size)
+            }
+        };
+
+        let original_documents = grenad::Reader::new(original_documents)?;
+        let flattened_documents = grenad::Reader::new(flattened_documents)?;
+
        let max_positions_per_attributes = self.indexer_config.max_positions_per_attributes;

        let cloned_embedder = self.embedders.clone();

        // Run extraction pipeline in parallel.
        pool.install(|| {
-            puffin::profile_scope!("extract_and_send_grenad_chunks");
-            // split obkv file into several chunks
-            let original_chunk_iter =
-                grenad_obkv_into_chunks(original_documents, pool_params, documents_chunk_size);
+            let stop_words = stop_words.map(|sw| sw.map_data(Vec::from).unwrap());
+            rayon::spawn(move || {
+                puffin::profile_scope!("extract_and_send_grenad_chunks");
+                // split obkv file into several chunks
+                let original_chunk_iter =
+                    grenad_obkv_into_chunks(original_documents, pool_params, documents_chunk_size);

-            // split obkv file into several chunks
-            let flattened_chunk_iter =
-                grenad_obkv_into_chunks(flattened_documents, pool_params, documents_chunk_size);
+                // split obkv file into several chunks
+                let flattened_chunk_iter =
+                    grenad_obkv_into_chunks(flattened_documents, pool_params, documents_chunk_size);

-            let result = original_chunk_iter.and_then(|original_chunk| {
-                let flattened_chunk = flattened_chunk_iter?;
-                // extract all databases from the chunked obkv douments
-                extract::data_from_obkv_documents(
-                    original_chunk,
-                    flattened_chunk,
-                    pool_params,
-                    lmdb_writer_sx.clone(),
-                    searchable_fields,
-                    faceted_fields,
-                    primary_key_id,
-                    geo_fields_ids,
-                    field_id_map,
-                    stop_words,
-                    separators.as_deref(),
-                    dictionary.as_deref(),
-                    max_positions_per_attributes,
-                    exact_attributes,
-                    proximity_precision,
-                    cloned_embedder,
-                )
+                let separators: Option<Vec<_>> =
+                    separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
+                let dictionary: Option<Vec<_>> =
+                    dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
+                let result = original_chunk_iter.and_then(|original_chunk| {
+                    let flattened_chunk = flattened_chunk_iter?;
+                    // extract all databases from the chunked obkv douments
+                    extract::data_from_obkv_documents(
+                        original_chunk,
+                        flattened_chunk,
+                        pool_params,
+                        lmdb_writer_sx.clone(),
+                        searchable_fields,
+                        faceted_fields,
+                        primary_key_id,
+                        geo_fields_ids,
+                        field_id_map,
+                        stop_words,
+                        separators.as_deref(),
+                        dictionary.as_deref(),
+                        max_positions_per_attributes,
+                        exact_attributes,
+                        proximity_precision,
+                        cloned_embedder,
+                    )
+                });
+
+                if let Err(e) = result {
+                    let _ = lmdb_writer_sx.send(Err(e));
+                }
+
+                // needs to be dropped to avoid channel waiting lock.
+                drop(lmdb_writer_sx);
            });
-
-            if let Err(e) = result {
-                let _ = lmdb_writer_sx.send(Err(e));
-            }
-
-            // needs to be dropped to avoid channel waiting lock.
-            drop(lmdb_writer_sx);
        });

-        let index_is_empty = self.index.number_of_documents(self.wtxn)? == 0;
        let mut final_documents_ids = RoaringBitmap::new();

        let mut databases_seen = 0;
@@ -444,12 +460,21 @@ where
                    word_fid_docids_reader,
                } => {
                    let cloneable_chunk = unsafe { as_cloneable_grenad(&word_docids_reader)? };
-                    word_docids = Some(cloneable_chunk);
+                    let word_docids = word_docids.get_or_insert_with(|| {
+                        MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn)
+                    });
+                    word_docids.push(cloneable_chunk.into_cursor()?);
                    let cloneable_chunk =
                        unsafe { as_cloneable_grenad(&exact_word_docids_reader)? };
-                    exact_word_docids = Some(cloneable_chunk);
+                    let exact_word_docids = exact_word_docids.get_or_insert_with(|| {
+                        MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn)
+                    });
+                    exact_word_docids.push(cloneable_chunk.into_cursor()?);
                    let cloneable_chunk = unsafe { as_cloneable_grenad(&word_fid_docids_reader)? };
-                    word_fid_docids = Some(cloneable_chunk);
+                    let word_fid_docids = word_fid_docids.get_or_insert_with(|| {
+                        MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn)
+                    });
+                    word_fid_docids.push(cloneable_chunk.into_cursor()?);
                    TypedChunk::WordDocids {
                        word_docids_reader,
                        exact_word_docids_reader,
@@ -458,7 +483,10 @@ where
                }
                TypedChunk::WordPositionDocids(chunk) => {
                    let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? };
-                    word_position_docids = Some(cloneable_chunk);
+                    let word_position_docids = word_position_docids.get_or_insert_with(|| {
+                        MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn)
+                    });
+                    word_position_docids.push(cloneable_chunk.into_cursor()?);
                    TypedChunk::WordPositionDocids(chunk)
                }
                TypedChunk::VectorPoints {
@@ -481,7 +509,7 @@ where
            };

            let (docids, is_merged_database) =
-                write_typed_chunk_into_index(typed_chunk, self.index, self.wtxn, index_is_empty)?;
+                write_typed_chunk_into_index(typed_chunk, self.index, self.wtxn)?;
            if !docids.is_empty() {
                final_documents_ids |= docids;
                let documents_seen_count = final_documents_ids.len();
@@ -538,10 +566,10 @@ where
        }

        self.execute_prefix_databases(
-            word_docids,
-            exact_word_docids,
-            word_position_docids,
-            word_fid_docids,
+            word_docids.map(MergerBuilder::build),
+            exact_word_docids.map(MergerBuilder::build),
+            word_position_docids.map(MergerBuilder::build),
+            word_fid_docids.map(MergerBuilder::build),
        )?;

        Ok(number_of_documents)
@@ -550,10 +578,10 @@ where
    #[logging_timer::time("IndexDocuments::{}")]
    pub fn execute_prefix_databases(
        self,
-        word_docids: Option<grenad::Reader<CursorClonableMmap>>,
-        exact_word_docids: Option<grenad::Reader<CursorClonableMmap>>,
-        word_position_docids: Option<grenad::Reader<CursorClonableMmap>>,
-        word_fid_docids: Option<grenad::Reader<CursorClonableMmap>>,
+        word_docids: Option<Merger<CursorClonableMmap, MergeFn>>,
+        exact_word_docids: Option<Merger<CursorClonableMmap, MergeFn>>,
+        word_position_docids: Option<Merger<CursorClonableMmap, MergeFn>>,
+        word_fid_docids: Option<Merger<CursorClonableMmap, MergeFn>>,
    ) -> Result<()>
    where
        FP: Fn(UpdateIndexingStep) + Sync,
@@ -728,7 +756,7 @@ where
 #[allow(clippy::too_many_arguments)]
 fn execute_word_prefix_docids(
    txn: &mut heed::RwTxn,
-    reader: grenad::Reader<Cursor<ClonableMmap>>,
+    merger: Merger<CursorClonableMmap, MergeFn>,
    word_docids_db: Database<Str, CboRoaringBitmapCodec>,
    word_prefix_docids_db: Database<Str, CboRoaringBitmapCodec>,
    indexer_config: &IndexerConfig,
@@ -738,13 +766,12 @@ fn execute_word_prefix_docids(
 ) -> Result<()> {
    puffin::profile_function!();

-    let cursor = reader.into_cursor()?;
    let mut builder = WordPrefixDocids::new(txn, word_docids_db, word_prefix_docids_db);
    builder.chunk_compression_type = indexer_config.chunk_compression_type;
    builder.chunk_compression_level = indexer_config.chunk_compression_level;
    builder.max_nb_chunks = indexer_config.max_nb_chunks;
    builder.max_memory = indexer_config.max_memory;
-    builder.execute(cursor, new_prefix_fst_words, common_prefix_fst_words, del_prefix_fst_words)?;
+    builder.execute(merger, new_prefix_fst_words, common_prefix_fst_words, del_prefix_fst_words)?;
    Ok(())
 }

@@ -2553,7 +2580,7 @@ mod tests {
    /// Vectors must be of the same length.
    #[test]
    fn test_multiple_vectors() {
-        use crate::vector::settings::{EmbedderSettings, EmbeddingSettings};
+        use crate::vector::settings::EmbeddingSettings;
        let index = TempIndex::new();

        index
@@ -2562,9 +2589,11 @@ mod tests {
                embedders.insert(
                    "manual".to_string(),
                    Setting::Set(EmbeddingSettings {
-                        embedder_options: Setting::Set(EmbedderSettings::UserProvided(
-                            crate::vector::settings::UserProvidedSettings { dimensions: 3 },
-                        )),
+                        source: Setting::Set(crate::vector::settings::EmbedderSource::UserProvided),
+                        model: Setting::NotSet,
+                        revision: Setting::NotSet,
+                        api_key: Setting::NotSet,
+                        dimensions: Setting::Set(3),
                        document_template: Setting::NotSet,
                    }),
                );
@@ -2579,10 +2608,10 @@ mod tests {
            .unwrap();
        index.add_documents(documents!([{"id": 1, "_vectors": { "manual": [6, 7, 8] }}])).unwrap();
        index
-            .add_documents(
-                documents!([{"id": 2, "_vectors": { "manual": [[9, 10, 11], [12, 13, 14], [15, 16, 17]] }}]),
-            )
-            .unwrap();
+               .add_documents(
+                   documents!([{"id": 2, "_vectors": { "manual": [[9, 10, 11], [12, 13, 14], [15, 16, 17]] }}]),
+               )
+               .unwrap();

        let rtxn = index.read_txn().unwrap();
        let res = index.search(&rtxn).vector([0.0, 1.0, 2.0].to_vec()).execute().unwrap();
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -7,7 +7,7 @@ use bytemuck::allocation::pod_collect_to_vec;
 use charabia::{Language, Script};
 use grenad::MergerBuilder;
 use heed::types::Bytes;
-use heed::{PutFlags, RwTxn};
+use heed::RwTxn;
 use obkv::{KvReader, KvWriter};
 use roaring::RoaringBitmap;

@@ -119,7 +119,6 @@ pub(crate) fn write_typed_chunk_into_index(
    typed_chunk: TypedChunk,
    index: &Index,
    wtxn: &mut RwTxn,
-    index_is_empty: bool,
 ) -> Result<(RoaringBitmap, bool)> {
    puffin::profile_function!(typed_chunk.to_debug_string());

@@ -172,11 +171,10 @@ pub(crate) fn write_typed_chunk_into_index(
            index.put_documents_ids(wtxn, &docids)?;
        }
        TypedChunk::FieldIdWordCountDocids(fid_word_count_docids_iter) => {
-            append_entries_into_database(
+            write_entries_into_database(
                fid_word_count_docids_iter,
                &index.field_id_word_count_docids,
                wtxn,
-                index_is_empty,
                deladd_serialize_add_side,
                merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
            )?;
@@ -188,31 +186,28 @@ pub(crate) fn write_typed_chunk_into_index(
            word_fid_docids_reader,
        } => {
            let word_docids_iter = unsafe { as_cloneable_grenad(&word_docids_reader) }?;
-            append_entries_into_database(
+            write_entries_into_database(
                word_docids_iter.clone(),
                &index.word_docids,
                wtxn,
-                index_is_empty,
                deladd_serialize_add_side,
                merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
            )?;

            let exact_word_docids_iter = unsafe { as_cloneable_grenad(&exact_word_docids_reader) }?;
-            append_entries_into_database(
+            write_entries_into_database(
                exact_word_docids_iter.clone(),
                &index.exact_word_docids,
                wtxn,
-                index_is_empty,
                deladd_serialize_add_side,
                merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
            )?;

            let word_fid_docids_iter = unsafe { as_cloneable_grenad(&word_fid_docids_reader) }?;
-            append_entries_into_database(
+            write_entries_into_database(
                word_fid_docids_iter,
                &index.word_fid_docids,
                wtxn,
-                index_is_empty,
                deladd_serialize_add_side,
                merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
            )?;
@@ -230,11 +225,10 @@ pub(crate) fn write_typed_chunk_into_index(
            is_merged_database = true;
        }
        TypedChunk::WordPositionDocids(word_position_docids_iter) => {
-            append_entries_into_database(
+            write_entries_into_database(
                word_position_docids_iter,
                &index.word_position_docids,
                wtxn,
-                index_is_empty,
                deladd_serialize_add_side,
                merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
            )?;
@@ -251,44 +245,40 @@ pub(crate) fn write_typed_chunk_into_index(
            is_merged_database = true;
        }
        TypedChunk::FieldIdFacetExistsDocids(facet_id_exists_docids) => {
-            append_entries_into_database(
+            write_entries_into_database(
                facet_id_exists_docids,
                &index.facet_id_exists_docids,
                wtxn,
-                index_is_empty,
                deladd_serialize_add_side,
                merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
            )?;
            is_merged_database = true;
        }
        TypedChunk::FieldIdFacetIsNullDocids(facet_id_is_null_docids) => {
-            append_entries_into_database(
+            write_entries_into_database(
                facet_id_is_null_docids,
                &index.facet_id_is_null_docids,
                wtxn,
-                index_is_empty,
                deladd_serialize_add_side,
                merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
            )?;
            is_merged_database = true;
        }
        TypedChunk::FieldIdFacetIsEmptyDocids(facet_id_is_empty_docids) => {
-            append_entries_into_database(
+            write_entries_into_database(
                facet_id_is_empty_docids,
                &index.facet_id_is_empty_docids,
                wtxn,
-                index_is_empty,
                deladd_serialize_add_side,
                merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
            )?;
            is_merged_database = true;
        }
        TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => {
-            append_entries_into_database(
+            write_entries_into_database(
                word_pair_proximity_docids_iter,
                &index.word_pair_proximity_docids,
                wtxn,
-                index_is_empty,
                deladd_serialize_add_side,
                merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
            )?;
@@ -541,7 +531,6 @@ fn write_entries_into_database<R, K, V, FS, FM>(
    data: grenad::Reader<R>,
    database: &heed::Database<K, V>,
    wtxn: &mut RwTxn,
-    index_is_empty: bool,
    serialize_value: FS,
    merge_values: FM,
 ) -> Result<()>
@@ -559,13 +548,9 @@ where
    while let Some((key, value)) = cursor.move_on_next()? {
        if valid_lmdb_key(key) {
            buffer.clear();
-            let value = if index_is_empty {
-                Some(serialize_value(value, &mut buffer)?)
-            } else {
-                match database.get(wtxn, key)? {
-                    Some(prev_value) => merge_values(value, prev_value, &mut buffer)?,
-                    None => Some(serialize_value(value, &mut buffer)?),
-                }
+            let value = match database.get(wtxn, key)? {
+                Some(prev_value) => merge_values(value, prev_value, &mut buffer)?,
+                None => Some(serialize_value(value, &mut buffer)?),
            };
            match value {
                Some(value) => database.put(wtxn, key, value)?,
@@ -578,58 +563,3 @@ where

    Ok(())
 }
-
-/// Write provided entries in database using serialize_value function.
-/// merge_values function is used if an entry already exist in the database.
-/// All provided entries must be ordered.
-/// If the index is not empty, write_entries_into_database is called instead.
-fn append_entries_into_database<R, K, V, FS, FM>(
-    data: grenad::Reader<R>,
-    database: &heed::Database<K, V>,
-    wtxn: &mut RwTxn,
-    index_is_empty: bool,
-    serialize_value: FS,
-    merge_values: FM,
-) -> Result<()>
-where
-    R: io::Read + io::Seek,
-    FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
-    FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec<u8>) -> Result<Option<&'a [u8]>>,
-    K: for<'a> heed::BytesDecode<'a>,
-{
-    puffin::profile_function!(format!("number of entries: {}", data.len()));
-
-    if !index_is_empty {
-        return write_entries_into_database(
-            data,
-            database,
-            wtxn,
-            false,
-            serialize_value,
-            merge_values,
-        );
-    }
-
-    let mut buffer = Vec::new();
-    let mut database = database.iter_mut(wtxn)?.remap_types::<Bytes, Bytes>();
-
-    let mut cursor = data.into_cursor()?;
-    while let Some((key, value)) = cursor.move_on_next()? {
-        if valid_lmdb_key(key) {
-            debug_assert!(
-                K::bytes_decode(key).is_ok(),
-                "Couldn't decode key with the database decoder, key length: {} - key bytes: {:x?}",
-                key.len(),
-                &key
-            );
-            buffer.clear();
-            let value = serialize_value(value, &mut buffer)?;
-            unsafe {
-                // safety: We do not keep a reference to anything that lives inside the database
-                database.put_current_with_options::<Bytes>(PutFlags::APPEND, key, value)?
-            };
-        }
-    }
-
-    Ok(())
-}
--- a/milli/src/update/mod.rs
+++ b/milli/src/update/mod.rs
@@ -8,7 +8,7 @@ pub use self::index_documents::{
    MergeFn,
 };
 pub use self::indexer_config::IndexerConfig;
-pub use self::settings::{Setting, Settings};
+pub use self::settings::{validate_embedding_settings, Setting, Settings};
 pub use self::update_step::UpdateIndexingStep;
 pub use self::word_prefix_docids::WordPrefixDocids;
 pub use self::words_prefix_integer_docids::WordPrefixIntegerDocids;
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -17,7 +17,7 @@ use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS
 use crate::proximity::ProximityPrecision;
 use crate::update::index_documents::IndexDocumentsMethod;
 use crate::update::{IndexDocuments, UpdateIndexingStep};
-use crate::vector::settings::{EmbeddingSettings, PromptSettings};
+use crate::vector::settings::{check_set, check_unset, EmbedderSource, EmbeddingSettings};
 use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs};
 use crate::{FieldsIdsMap, Index, OrderBy, Result};

@@ -78,11 +78,19 @@ impl<T> Setting<T> {
        }
    }

-    pub fn apply(&mut self, new: Self) {
+    /// Returns `true` if applying the new setting changed this setting
+    pub fn apply(&mut self, new: Self) -> bool
+    where
+        T: PartialEq + Eq,
+    {
        if let Setting::NotSet = new {
-            return;
+            return false;
+        }
+        if self == &new {
+            return false;
        }
        *self = new;
+        true
    }
 }

@@ -950,17 +958,23 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
                    .merge_join_by(configs.into_iter(), |(left, _), (right, _)| left.cmp(right))
                {
                    match joined {
+                        // updated config
                        EitherOrBoth::Both((name, mut old), (_, new)) => {
-                            old.apply(new);
-                            let new = validate_prompt(&name, old)?;
-                            changed = true;
+                            changed |= old.apply(new);
+                            let new = validate_embedding_settings(old, &name)?;
                            new_configs.insert(name, new);
                        }
+                        // unchanged config
                        EitherOrBoth::Left((name, setting)) => {
                            new_configs.insert(name, setting);
                        }
-                        EitherOrBoth::Right((name, setting)) => {
-                            let setting = validate_prompt(&name, setting)?;
+                        // new config
+                        EitherOrBoth::Right((name, mut setting)) => {
+                            // apply the default source in case the source was not set so that it gets validated
+                            crate::vector::settings::EmbeddingSettings::apply_default_source(
+                                &mut setting,
+                            );
+                            let setting = validate_embedding_settings(setting, &name)?;
                            changed = true;
                            new_configs.insert(name, setting);
                        }
@@ -1072,8 +1086,12 @@ fn validate_prompt(
 ) -> Result<Setting<EmbeddingSettings>> {
    match new {
        Setting::Set(EmbeddingSettings {
-            embedder_options,
-            document_template: Setting::Set(PromptSettings { template: Setting::Set(template) }),
+            source,
+            model,
+            revision,
+            api_key,
+            dimensions,
+            document_template: Setting::Set(template),
        }) => {
            // validate
            let template = crate::prompt::Prompt::new(template)
@@ -1081,16 +1099,71 @@ fn validate_prompt(
                .map_err(|inner| UserError::InvalidPromptForEmbeddings(name.to_owned(), inner))?;

            Ok(Setting::Set(EmbeddingSettings {
-                embedder_options,
-                document_template: Setting::Set(PromptSettings {
-                    template: Setting::Set(template),
-                }),
+                source,
+                model,
+                revision,
+                api_key,
+                dimensions,
+                document_template: Setting::Set(template),
            }))
        }
        new => Ok(new),
    }
 }

+pub fn validate_embedding_settings(
+    settings: Setting<EmbeddingSettings>,
+    name: &str,
+) -> Result<Setting<EmbeddingSettings>> {
+    let settings = validate_prompt(name, settings)?;
+    let Setting::Set(settings) = settings else { return Ok(settings) };
+    let EmbeddingSettings { source, model, revision, api_key, dimensions, document_template } =
+        settings;
+    let Some(inferred_source) = source.set() else {
+        return Ok(Setting::Set(EmbeddingSettings {
+            source,
+            model,
+            revision,
+            api_key,
+            dimensions,
+            document_template,
+        }));
+    };
+    match inferred_source {
+        EmbedderSource::OpenAi => {
+            check_unset(&revision, "revision", inferred_source, name)?;
+            check_unset(&dimensions, "dimensions", inferred_source, name)?;
+            if let Setting::Set(model) = &model {
+                crate::vector::openai::EmbeddingModel::from_name(model.as_str()).ok_or(
+                    crate::error::UserError::InvalidOpenAiModel {
+                        embedder_name: name.to_owned(),
+                        model: model.clone(),
+                    },
+                )?;
+            }
+        }
+        EmbedderSource::HuggingFace => {
+            check_unset(&api_key, "apiKey", inferred_source, name)?;
+            check_unset(&dimensions, "dimensions", inferred_source, name)?;
+        }
+        EmbedderSource::UserProvided => {
+            check_unset(&model, "model", inferred_source, name)?;
+            check_unset(&revision, "revision", inferred_source, name)?;
+            check_unset(&api_key, "apiKey", inferred_source, name)?;
+            check_unset(&document_template, "documentTemplate", inferred_source, name)?;
+            check_set(&dimensions, "dimensions", inferred_source, name)?;
+        }
+    }
+    Ok(Setting::Set(EmbeddingSettings {
+        source,
+        model,
+        revision,
+        api_key,
+        dimensions,
+        document_template,
+    }))
+}
+
 #[cfg(test)]
 mod tests {
    use big_s::S;
--- a/milli/src/update/word_prefix_docids.rs
+++ b/milli/src/update/word_prefix_docids.rs
@@ -42,7 +42,7 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
    #[logging_timer::time("WordPrefixDocids::{}")]
    pub fn execute(
        self,
-        mut new_word_docids_iter: grenad::ReaderCursor<CursorClonableMmap>,
+        new_word_docids: grenad::Merger<CursorClonableMmap, MergeFn>,
        new_prefix_fst_words: &[String],
        common_prefix_fst_words: &[&[String]],
        del_prefix_fst_words: &HashSet<Vec<u8>>,
@@ -63,7 +63,8 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
        if !common_prefix_fst_words.is_empty() {
            let mut current_prefixes: Option<&&[String]> = None;
            let mut prefixes_cache = HashMap::new();
-            while let Some((word, data)) = new_word_docids_iter.move_on_next()? {
+            let mut new_word_docids_iter = new_word_docids.into_stream_merger_iter()?;
+            while let Some((word, data)) = new_word_docids_iter.next()? {
                current_prefixes = match current_prefixes.take() {
                    Some(prefixes) if word.starts_with(prefixes[0].as_bytes()) => Some(prefixes),
                    _otherwise => {
--- a/milli/src/update/words_prefix_integer_docids.rs
+++ b/milli/src/update/words_prefix_integer_docids.rs
@@ -47,7 +47,7 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
    #[logging_timer::time("WordPrefixIntegerDocids::{}")]
    pub fn execute(
        self,
-        new_word_integer_docids: grenad::Reader<CursorClonableMmap>,
+        new_word_integer_docids: grenad::Merger<CursorClonableMmap, MergeFn>,
        new_prefix_fst_words: &[String],
        common_prefix_fst_words: &[&[String]],
        del_prefix_fst_words: &HashSet<Vec<u8>>,
@@ -64,14 +64,14 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
            self.max_memory,
        );

-        let mut new_word_integer_docids_iter = new_word_integer_docids.into_cursor()?;
-
        if !common_prefix_fst_words.is_empty() {
            // We fetch all the new common prefixes between the previous and new prefix fst.
            let mut buffer = Vec::new();
            let mut current_prefixes: Option<&&[String]> = None;
            let mut prefixes_cache = HashMap::new();
-            while let Some((key, data)) = new_word_integer_docids_iter.move_on_next()? {
+            let mut new_word_integer_docids_iter =
+                new_word_integer_docids.into_stream_merger_iter()?;
+            while let Some((key, data)) = new_word_integer_docids_iter.next()? {
                let (word, pos) =
                    StrBEU16Codec::bytes_decode(key).map_err(heed::Error::Decoding)?;

--- a/milli/src/vector/openai.rs
+++ b/milli/src/vector/openai.rs
@@ -34,6 +34,9 @@ pub struct EmbedderOptions {
 #[serde(deny_unknown_fields, rename_all = "camelCase")]
 #[deserr(rename_all = camelCase, deny_unknown_fields)]
 pub enum EmbeddingModel {
+    // # WARNING
+    //
+    // If ever adding a model, make sure to add it to the list of supported models below.
    #[default]
    #[serde(rename = "text-embedding-ada-002")]
    #[deserr(rename = "text-embedding-ada-002")]
@@ -41,6 +44,10 @@ pub enum EmbeddingModel {
 }

 impl EmbeddingModel {
+    pub fn supported_models() -> &'static [&'static str] {
+        &["text-embedding-ada-002"]
+    }
+
    pub fn max_token(&self) -> usize {
        match self {
            EmbeddingModel::TextEmbeddingAda002 => 8191,
@@ -59,7 +66,7 @@ impl EmbeddingModel {
        }
    }

-    pub fn from_name(name: &'static str) -> Option<Self> {
+    pub fn from_name(name: &str) -> Option<Self> {
        match name {
            "text-embedding-ada-002" => Some(EmbeddingModel::TextEmbeddingAda002),
            _ => None,
--- a/milli/src/vector/settings.rs
+++ b/milli/src/vector/settings.rs
@@ -4,32 +4,189 @@ use serde::{Deserialize, Serialize};
 use crate::prompt::PromptData;
 use crate::update::Setting;
 use crate::vector::EmbeddingConfig;
+use crate::UserError;

 #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
 #[serde(deny_unknown_fields, rename_all = "camelCase")]
 #[deserr(rename_all = camelCase, deny_unknown_fields)]
 pub struct EmbeddingSettings {
-    #[serde(default, skip_serializing_if = "Setting::is_not_set", rename = "source")]
-    #[deserr(default, rename = "source")]
-    pub embedder_options: Setting<EmbedderSettings>,
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[deserr(default)]
-    pub document_template: Setting<PromptSettings>,
+    pub source: Setting<EmbedderSource>,
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
+    #[deserr(default)]
+    pub model: Setting<String>,
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
+    #[deserr(default)]
+    pub revision: Setting<String>,
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
+    #[deserr(default)]
+    pub api_key: Setting<String>,
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
+    #[deserr(default)]
+    pub dimensions: Setting<usize>,
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
+    #[deserr(default)]
+    pub document_template: Setting<String>,
+}
+
+pub fn check_unset<T>(
+    key: &Setting<T>,
+    field: &'static str,
+    source: EmbedderSource,
+    embedder_name: &str,
+) -> Result<(), UserError> {
+    if matches!(key, Setting::NotSet) {
+        Ok(())
+    } else {
+        Err(UserError::InvalidFieldForSource {
+            embedder_name: embedder_name.to_owned(),
+            source_: source,
+            field,
+            allowed_fields_for_source: EmbeddingSettings::allowed_fields_for_source(source),
+            allowed_sources_for_field: EmbeddingSettings::allowed_sources_for_field(field),
+        })
+    }
+}
+
+pub fn check_set<T>(
+    key: &Setting<T>,
+    field: &'static str,
+    source: EmbedderSource,
+    embedder_name: &str,
+) -> Result<(), UserError> {
+    if matches!(key, Setting::Set(_)) {
+        Ok(())
+    } else {
+        Err(UserError::MissingFieldForSource {
+            field,
+            source_: source,
+            embedder_name: embedder_name.to_owned(),
+        })
+    }
+}
+
+impl EmbeddingSettings {
+    pub const SOURCE: &'static str = "source";
+    pub const MODEL: &'static str = "model";
+    pub const REVISION: &'static str = "revision";
+    pub const API_KEY: &'static str = "apiKey";
+    pub const DIMENSIONS: &'static str = "dimensions";
+    pub const DOCUMENT_TEMPLATE: &'static str = "documentTemplate";
+
+    pub fn allowed_sources_for_field(field: &'static str) -> &'static [EmbedderSource] {
+        match field {
+            Self::SOURCE => {
+                &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::UserProvided]
+            }
+            Self::MODEL => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi],
+            Self::REVISION => &[EmbedderSource::HuggingFace],
+            Self::API_KEY => &[EmbedderSource::OpenAi],
+            Self::DIMENSIONS => &[EmbedderSource::UserProvided],
+            Self::DOCUMENT_TEMPLATE => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi],
+            _other => unreachable!("unknown field"),
+        }
+    }
+
+    pub fn allowed_fields_for_source(source: EmbedderSource) -> &'static [&'static str] {
+        match source {
+            EmbedderSource::OpenAi => {
+                &[Self::SOURCE, Self::MODEL, Self::API_KEY, Self::DOCUMENT_TEMPLATE]
+            }
+            EmbedderSource::HuggingFace => {
+                &[Self::SOURCE, Self::MODEL, Self::REVISION, Self::DOCUMENT_TEMPLATE]
+            }
+            EmbedderSource::UserProvided => &[Self::SOURCE, Self::DIMENSIONS],
+        }
+    }
+
+    pub(crate) fn apply_default_source(setting: &mut Setting<EmbeddingSettings>) {
+        if let Setting::Set(EmbeddingSettings {
+            source: source @ (Setting::NotSet | Setting::Reset),
+            ..
+        }) = setting
+        {
+            *source = Setting::Set(EmbedderSource::default())
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
+#[serde(deny_unknown_fields, rename_all = "camelCase")]
+#[deserr(rename_all = camelCase, deny_unknown_fields)]
+pub enum EmbedderSource {
+    #[default]
+    OpenAi,
+    HuggingFace,
+    UserProvided,
+}
+
+impl std::fmt::Display for EmbedderSource {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let s = match self {
+            EmbedderSource::OpenAi => "openAi",
+            EmbedderSource::HuggingFace => "huggingFace",
+            EmbedderSource::UserProvided => "userProvided",
+        };
+        f.write_str(s)
+    }
 }

 impl EmbeddingSettings {
    pub fn apply(&mut self, new: Self) {
-        let EmbeddingSettings { embedder_options, document_template: prompt } = new;
-        self.embedder_options.apply(embedder_options);
-        self.document_template.apply(prompt);
+        let EmbeddingSettings { source, model, revision, api_key, dimensions, document_template } =
+            new;
+        let old_source = self.source;
+        self.source.apply(source);
+        // Reinitialize the whole setting object on a source change
+        if old_source != self.source {
+            *self = EmbeddingSettings {
+                source,
+                model,
+                revision,
+                api_key,
+                dimensions,
+                document_template,
+            };
+            return;
+        }
+
+        self.model.apply(model);
+        self.revision.apply(revision);
+        self.api_key.apply(api_key);
+        self.dimensions.apply(dimensions);
+        self.document_template.apply(document_template);
    }
 }

 impl From<EmbeddingConfig> for EmbeddingSettings {
    fn from(value: EmbeddingConfig) -> Self {
-        Self {
-            embedder_options: Setting::Set(value.embedder_options.into()),
-            document_template: Setting::Set(value.prompt.into()),
+        let EmbeddingConfig { embedder_options, prompt } = value;
+        match embedder_options {
+            super::EmbedderOptions::HuggingFace(options) => Self {
+                source: Setting::Set(EmbedderSource::HuggingFace),
+                model: Setting::Set(options.model),
+                revision: options.revision.map(Setting::Set).unwrap_or_default(),
+                api_key: Setting::NotSet,
+                dimensions: Setting::NotSet,
+                document_template: Setting::Set(prompt.template),
+            },
+            super::EmbedderOptions::OpenAi(options) => Self {
+                source: Setting::Set(EmbedderSource::OpenAi),
+                model: Setting::Set(options.embedding_model.name().to_owned()),
+                revision: Setting::NotSet,
+                api_key: options.api_key.map(Setting::Set).unwrap_or_default(),
+                dimensions: Setting::NotSet,
+                document_template: Setting::Set(prompt.template),
+            },
+            super::EmbedderOptions::UserProvided(options) => Self {
+                source: Setting::Set(EmbedderSource::UserProvided),
+                model: Setting::NotSet,
+                revision: Setting::NotSet,
+                api_key: Setting::NotSet,
+                dimensions: Setting::Set(options.dimensions),
+                document_template: Setting::NotSet,
+            },
        }
    }
 }
@@ -37,256 +194,51 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
 impl From<EmbeddingSettings> for EmbeddingConfig {
    fn from(value: EmbeddingSettings) -> Self {
        let mut this = Self::default();
-        let EmbeddingSettings { embedder_options, document_template: prompt } = value;
-        if let Some(embedder_options) = embedder_options.set() {
-            this.embedder_options = embedder_options.into();
-        }
-        if let Some(prompt) = prompt.set() {
-            this.prompt = prompt.into();
-        }
-        this
-    }
-}
-
-#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
-#[serde(deny_unknown_fields, rename_all = "camelCase")]
-#[deserr(rename_all = camelCase, deny_unknown_fields)]
-pub struct PromptSettings {
-    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
-    #[deserr(default)]
-    pub template: Setting<String>,
-}
-
-impl PromptSettings {
-    pub fn apply(&mut self, new: Self) {
-        let PromptSettings { template } = new;
-        self.template.apply(template);
-    }
-}
-
-impl From<PromptData> for PromptSettings {
-    fn from(value: PromptData) -> Self {
-        Self { template: Setting::Set(value.template) }
-    }
-}
-
-impl From<PromptSettings> for PromptData {
-    fn from(value: PromptSettings) -> Self {
-        let mut this = PromptData::default();
-        let PromptSettings { template } = value;
-        if let Some(template) = template.set() {
-            this.template = template;
-        }
-        this
-    }
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
-#[serde(deny_unknown_fields, rename_all = "camelCase")]
-pub enum EmbedderSettings {
-    HuggingFace(Setting<HfEmbedderSettings>),
-    OpenAi(Setting<OpenAiEmbedderSettings>),
-    UserProvided(UserProvidedSettings),
-}
-
-impl<E> Deserr<E> for EmbedderSettings
-where
-    E: deserr::DeserializeError,
-{
-    fn deserialize_from_value<V: deserr::IntoValue>(
-        value: deserr::Value<V>,
-        location: deserr::ValuePointerRef,
-    ) -> Result<Self, E> {
-        match value {
-            deserr::Value::Map(map) => {
-                if deserr::Map::len(&map) != 1 {
-                    return Err(deserr::take_cf_content(E::error::<V>(
-                        None,
-                        deserr::ErrorKind::Unexpected {
-                            msg: format!(
-                                "Expected a single field, got {} fields",
-                                deserr::Map::len(&map)
-                            ),
-                        },
-                        location,
-                    )));
+        let EmbeddingSettings { source, model, revision, api_key, dimensions, document_template } =
+            value;
+        if let Some(source) = source.set() {
+            match source {
+                EmbedderSource::OpenAi => {
+                    let mut options = super::openai::EmbedderOptions::with_default_model(None);
+                    if let Some(model) = model.set() {
+                        if let Some(model) = super::openai::EmbeddingModel::from_name(&model) {
+                            options.embedding_model = model;
+                        }
+                    }
+                    if let Some(api_key) = api_key.set() {
+                        options.api_key = Some(api_key);
+                    }
+                    this.embedder_options = super::EmbedderOptions::OpenAi(options);
                }
-                let mut it = deserr::Map::into_iter(map);
-                let (k, v) = it.next().unwrap();
-
-                match k.as_str() {
-                    "huggingFace" => Ok(EmbedderSettings::HuggingFace(Setting::Set(
-                        HfEmbedderSettings::deserialize_from_value(
-                            v.into_value(),
-                            location.push_key(&k),
-                        )?,
-                    ))),
-                    "openAi" => Ok(EmbedderSettings::OpenAi(Setting::Set(
-                        OpenAiEmbedderSettings::deserialize_from_value(
-                            v.into_value(),
-                            location.push_key(&k),
-                        )?,
-                    ))),
-                    "userProvided" => Ok(EmbedderSettings::UserProvided(
-                        UserProvidedSettings::deserialize_from_value(
-                            v.into_value(),
-                            location.push_key(&k),
-                        )?,
-                    )),
-                    other => Err(deserr::take_cf_content(E::error::<V>(
-                        None,
-                        deserr::ErrorKind::UnknownKey {
-                            key: other,
-                            accepted: &["huggingFace", "openAi", "userProvided"],
-                        },
-                        location,
-                    ))),
+                EmbedderSource::HuggingFace => {
+                    let mut options = super::hf::EmbedderOptions::default();
+                    if let Some(model) = model.set() {
+                        options.model = model;
+                        // Reset the revision if we are setting the model.
+                        // This allows the following:
+                        // "huggingFace": {} -> default model with default revision
+                        // "huggingFace": { "model": "name-of-the-default-model" } -> default model without a revision
+                        // "huggingFace": { "model": "some-other-model" } -> most importantly, other model without a revision
+                        options.revision = None;
+                    }
+                    if let Some(revision) = revision.set() {
+                        options.revision = Some(revision);
+                    }
+                    this.embedder_options = super::EmbedderOptions::HuggingFace(options);
+                }
+                EmbedderSource::UserProvided => {
+                    this.embedder_options =
+                        super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions {
+                            dimensions: dimensions.set().unwrap(),
+                        });
                }
            }
-            _ => Err(deserr::take_cf_content(E::error::<V>(
-                None,
-                deserr::ErrorKind::IncorrectValueKind {
-                    actual: value,
-                    accepted: &[deserr::ValueKind::Map],
-                },
-                location,
-            ))),
        }
-    }
-}

-impl Default for EmbedderSettings {
-    fn default() -> Self {
-        Self::OpenAi(Default::default())
-    }
-}
-
-impl From<crate::vector::EmbedderOptions> for EmbedderSettings {
-    fn from(value: crate::vector::EmbedderOptions) -> Self {
-        match value {
-            crate::vector::EmbedderOptions::HuggingFace(hf) => {
-                Self::HuggingFace(Setting::Set(hf.into()))
-            }
-            crate::vector::EmbedderOptions::OpenAi(openai) => {
-                Self::OpenAi(Setting::Set(openai.into()))
-            }
-            crate::vector::EmbedderOptions::UserProvided(user_provided) => {
-                Self::UserProvided(user_provided.into())
-            }
+        if let Setting::Set(template) = document_template {
+            this.prompt = PromptData { template }
        }
-    }
-}

-impl From<EmbedderSettings> for crate::vector::EmbedderOptions {
-    fn from(value: EmbedderSettings) -> Self {
-        match value {
-            EmbedderSettings::HuggingFace(Setting::Set(hf)) => Self::HuggingFace(hf.into()),
-            EmbedderSettings::HuggingFace(_setting) => Self::HuggingFace(Default::default()),
-            EmbedderSettings::OpenAi(Setting::Set(ai)) => Self::OpenAi(ai.into()),
-            EmbedderSettings::OpenAi(_setting) => {
-                Self::OpenAi(crate::vector::openai::EmbedderOptions::with_default_model(None))
-            }
-            EmbedderSettings::UserProvided(user_provided) => {
-                Self::UserProvided(user_provided.into())
-            }
-        }
-    }
-}
-
-#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
-#[serde(deny_unknown_fields, rename_all = "camelCase")]
-#[deserr(rename_all = camelCase, deny_unknown_fields)]
-pub struct HfEmbedderSettings {
-    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
-    #[deserr(default)]
-    pub model: Setting<String>,
-    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
-    #[deserr(default)]
-    pub revision: Setting<String>,
-}
-
-impl HfEmbedderSettings {
-    pub fn apply(&mut self, new: Self) {
-        let HfEmbedderSettings { model, revision } = new;
-        self.model.apply(model);
-        self.revision.apply(revision);
-    }
-}
-
-impl From<crate::vector::hf::EmbedderOptions> for HfEmbedderSettings {
-    fn from(value: crate::vector::hf::EmbedderOptions) -> Self {
-        Self {
-            model: Setting::Set(value.model),
-            revision: value.revision.map(Setting::Set).unwrap_or(Setting::NotSet),
-        }
-    }
-}
-
-impl From<HfEmbedderSettings> for crate::vector::hf::EmbedderOptions {
-    fn from(value: HfEmbedderSettings) -> Self {
-        let HfEmbedderSettings { model, revision } = value;
-        let mut this = Self::default();
-        if let Some(model) = model.set() {
-            this.model = model;
-        }
-        if let Some(revision) = revision.set() {
-            this.revision = Some(revision);
-        }
        this
    }
 }
-
-#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
-#[serde(deny_unknown_fields, rename_all = "camelCase")]
-#[deserr(rename_all = camelCase, deny_unknown_fields)]
-pub struct OpenAiEmbedderSettings {
-    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
-    #[deserr(default)]
-    pub api_key: Setting<String>,
-    #[serde(default, skip_serializing_if = "Setting::is_not_set", rename = "model")]
-    #[deserr(default, rename = "model")]
-    pub embedding_model: Setting<crate::vector::openai::EmbeddingModel>,
-}
-
-impl OpenAiEmbedderSettings {
-    pub fn apply(&mut self, new: Self) {
-        let Self { api_key, embedding_model: embedding_mode } = new;
-        self.api_key.apply(api_key);
-        self.embedding_model.apply(embedding_mode);
-    }
-}
-
-impl From<crate::vector::openai::EmbedderOptions> for OpenAiEmbedderSettings {
-    fn from(value: crate::vector::openai::EmbedderOptions) -> Self {
-        Self {
-            api_key: value.api_key.map(Setting::Set).unwrap_or(Setting::Reset),
-            embedding_model: Setting::Set(value.embedding_model),
-        }
-    }
-}
-
-impl From<OpenAiEmbedderSettings> for crate::vector::openai::EmbedderOptions {
-    fn from(value: OpenAiEmbedderSettings) -> Self {
-        let OpenAiEmbedderSettings { api_key, embedding_model } = value;
-        Self { api_key: api_key.set(), embedding_model: embedding_model.set().unwrap_or_default() }
-    }
-}
-
-#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
-#[serde(deny_unknown_fields, rename_all = "camelCase")]
-#[deserr(rename_all = camelCase, deny_unknown_fields)]
-pub struct UserProvidedSettings {
-    pub dimensions: usize,
-}
-
-impl From<UserProvidedSettings> for crate::vector::manual::EmbedderOptions {
-    fn from(value: UserProvidedSettings) -> Self {
-        Self { dimensions: value.dimensions }
-    }
-}
-
-impl From<crate::vector::manual::EmbedderOptions> for UserProvidedSettings {
-    fn from(value: crate::vector::manual::EmbedderOptions) -> Self {
-        Self { dimensions: value.dimensions }
-    }
-}
Author	SHA1	Message	Date
ManyTheFish	60bfd3aef1	Send directly each chunk to the main thread instead of merging them at the end of the extracting	2024-01-22 16:30:27 +01:00
ManyTheFish	5027eea1a8	Remove append function	2024-01-22 16:30:09 +01:00
ManyTheFish	5079fb4b14	Compute chunk size based on the input data size ant the number of indexing threads	2024-01-22 16:29:44 +01:00
meili-bors[bot]	8e016fbfeb	Merge #4319 4319: Update README r=curquiza a=codesmith-emmy # Pull Request ## Related issue Fixes #<issue_number> ## What does this PR do? - ... ## PR checklist Please check if your PR fulfills the following requirements: - [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [ ] Have you read the contributing guidelines? - [ ] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: emmanuel <154705254+codesmith-emmy@users.noreply.github.com>	2024-01-15 18:41:14 +00:00
meili-bors[bot]	1ccde9bf0b	Merge #4316 4316: Autobatch the task deletions r=curquiza a=irevoire # Pull Request ## Related issue Fix part of https://github.com/meilisearch/meilisearch-support/issues/69 Fix #4315 ## What does this PR do? - Autobatch the task deletions Co-authored-by: Tamo <tamo@meilisearch.com>	2024-01-15 17:54:50 +00:00
meili-bors[bot]	34e814f400	Merge #4327 4327: Bring back changes from `release-v1.6.0` to `main` r=dureuill a=curquiza Co-authored-by: Paul Sanders <psanders1@gmail.com> Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com> Co-authored-by: Louis Dureuil <louis.dureuil@xinra.net> Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Clément Renault <clement@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com> Co-authored-by: Morgane Dubus <30866152+mdubus@users.noreply.github.com>	2024-01-15 16:52:05 +00:00
meili-bors[bot]	a6fa0b97ec	Merge #4318 4318: Hide embedders r=ManyTheFish a=dureuill Hides `embedders` when it is an empty dictionary. Manual tests: - getting settings with empty embedders: not displayed - getting settings with non-empty embedders: displayed like before - dump with empty embedders: can be imported - dump with non-empty embedders: can be imported Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-01-15 09:37:31 +00:00
emmanuel	552127021f	Update	2024-01-12 16:03:23 +01:00
Louis Dureuil	38abfec611	Fix tests	2024-01-11 21:35:30 +01:00
Louis Dureuil	84a5c304fc	Don't display the embedders setting when it is an empty dict	2024-01-11 21:35:06 +01:00
meili-bors[bot]	e93d36d5b9	Merge #4313 4313: Fix document formatting performances r=Kerollmops a=ManyTheFish reduce the formatted option list to the attributes that should be formatted, instead of all the attributes to display. The time to compute the `format` list scales with the number of fields to format; cumulated with `map_leaf_values` that iterates over all the nested fields, it gives a quadratic complexity: `d*f` where `d` is the total number of fields to display and `f` is the total number of fields to format. Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-01-11 14:19:44 +00:00
ManyTheFish	95f8e21533	fix typos	2024-01-11 15:07:08 +01:00
Tamo	b4d7d80ad9	autobatch the task deletions	2024-01-11 14:58:07 +01:00
meili-bors[bot]	68f197624e	Merge #4314 4314: Fix proximity precision telemetry r=Kerollmops a=ManyTheFish The proximity precision telemetry was partially missing in the global setting route. This PR adds the missing field and return the default value when the value is not set. Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-01-11 13:50:03 +00:00
ManyTheFish	b79b03d4e2	Fix proximity precision telemetry	2024-01-11 13:24:26 +01:00
ManyTheFish	86270e6878	Transform fields contained into _format into strings	2024-01-11 12:44:56 +01:00
ManyTheFish	81b6128b29	Update tests	2024-01-11 12:28:32 +01:00
ManyTheFish	5f5a486895	Reduce formatting time	2024-01-11 11:36:41 +01:00
ManyTheFish	5f4fc6c955	Add timer logs	2024-01-11 09:44:16 +01:00
meili-bors[bot]	1f5e8fc072	Merge #4311 4311: Limit the number of values returned by the facet search r=dureuill a=Kerollmops This PR fixes a bug where the number of values per facet returned by the `indexes/{index}/facet-search` route was not tacking the `faceting.maxValuePerFacet` setting. It also adds a test. Co-authored-by: Clément Renault <clement@meilisearch.com>	2024-01-10 16:04:06 +00:00
Clément Renault	3f3462ab62	Limit the number of values returned by the facet search	2024-01-10 16:54:08 +01:00
meili-bors[bot]	93363b0201	Merge #4308 4308: Fix hang on `/indexes` and `/stats` routes r=Kerollmops a=dureuill # Pull Request ## Related issue Fixes #4218 ## Context - A previous fix added a field to the `IndexScheduler` to memorize the `currently_updating_index`, so that accessing it through the search would return the handle without trying to open it. This resolved a hang on the search, but #4218 reported further hangs on the `/indexes` and `/stats` routes - These routes were shunting the `IndexScheduler` and using internal `IndexMapper` logic to access the indexes, again trying to reopen the updating index. ## What does this PR do? - Moves the logic relative to the `currently_updating_index` from the `IndexScheduler` to the `IndexMapper`, so that any index request to the `IndexMapper` can benefit from it. ## Test 1. Follow reproducer from #4218 2. Before this PR, notice a hang on `/stats` and `/indexes`, but not on `/indexes/<updating_index>/search` 3. After this PR, notice no hang on either of `/stats`, `/indexes` or `/indexes/<updating_index>/search` Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-01-10 10:46:20 +00:00
Louis Dureuil	97bb1ff9e2	Move `currently_updating_index` to IndexMapper	2024-01-09 15:37:27 +01:00
meili-bors[bot]	5ee1378856	Merge #4303 4303: Display default value when proximityPrecision is not set r=dureuill a=ManyTheFish # Pull Request ## Related Issue: #4187 Spec change requests: https://github.com/meilisearch/specifications/pull/261#discussion_r1441725272 ## What does this PR do? - Display default value when proximityPrecision is not set instead of Null Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-01-08 14:29:57 +00:00
ManyTheFish	e27b850b09	move the default display strategy on setting getter function	2024-01-08 14:03:47 +01:00
ManyTheFish	f75f22e026	Display default value when proximityPrecision is not set	2024-01-08 11:09:37 +01:00
meili-bors[bot]	6203f4acef	Merge #4296 4296: Fix single element search r=irevoire a=dureuill # Pull Request Before this PR, indexing a single vector in a single document would result in the vector not being found by the vector search. This PR adds a test case for this condition, and resolves it by bumping arroy to a version containing the fix. # Test case Output of the test before and after this PR: ```diff diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs index 2cd4b83e7..79819cab2 100644 --- a/meilisearch/tests/search/hybrid.rs on release-v1.6.0 +++ b/meilisearch/tests/search/hybrid.rs on fix-single-element-search `@@` -171,5 +171,5 `@@` async fn single_document() { .await; snapshot!(code, `@"200` OK"); - snapshot!(response["hits"][0], `@r###"{"title":"Shazam!","desc":"a` Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.0}"###); + snapshot!(response["hits"][0], `@r###"{"title":"Shazam!","desc":"a` Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0,"_semanticScore":1.0}"###); } ``` Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-01-03 15:01:43 +00:00
Louis Dureuil	12edc2c20a	Update arroy to a fixed version	2024-01-03 15:59:37 +01:00
Louis Dureuil	94b9f3b310	Add test	2024-01-03 15:56:20 +01:00
meili-bors[bot]	5204c0b60b	Merge #4297 4297: Update license for 2024 r=curquiza a=meili-bot _This PR is auto-generated._ Co-authored-by: meili-bot <74670311+meili-bot@users.noreply.github.com>	2024-01-03 13:54:19 +00:00
meili-bot	e73cd692db	Update LICENSE	2024-01-03 14:32:41 +01:00
meili-bors[bot]	29b453346b	Merge #4293 4293: Update SDK test dependencies r=curquiza a=curquiza Replace dependabot updates The changes are really un-impactful for the engine team velocity because is about a CI - that does not run during release deployment - that does not run to merge a PR It's only a weekly scheduled CI to check the breaking we introduced in the integrations. I updated the dependencies based on what we do on the integration CIs For example for dart, I looked at what we have in the [Dart CI](`63fd758882/.github/workflows/tests.yml (L16-L54)`) and I updated our CI in this repo accordingly. I did the same for each repository. This ensures we test the same things. Co-authored-by: curquiza <clementine@meilisearch.com>	2024-01-03 13:26:50 +00:00
meili-bors[bot]	c4bb435374	Merge #4295 4295: fix compilation warnings on main r=curquiza a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4292 ## What does this PR do? - Removed unused imports #4294 fixes the issue for the release v1.6 Co-authored-by: Tamo <tamo@meilisearch.com>	2024-01-02 15:33:06 +00:00
meili-bors[bot]	da99a04eb3	Merge #4294 4294: fix compilation warnings for release v1.6 r=curquiza a=irevoire # Pull Request ## Related issue Fixes #4292 ## What does this PR do? - Removed unused imports #4295 fixes the issue no main Co-authored-by: Tamo <tamo@meilisearch.com>	2024-01-02 15:00:40 +00:00
Tamo	54ae6951eb	fix warning	2024-01-02 15:19:30 +01:00
Tamo	2bcff2ea46	fix warning	2024-01-02 15:19:00 +01:00
curquiza	1275e72e0b	Update SDK test dependencies	2024-01-02 09:59:46 +01:00
meili-bors[bot]	658ec6e0a4	Merge #4279 4279: Check experimental feature on setting update query rather than in the task. r=ManyTheFish a=dureuill Improve the UX by checking for the vector store feature and returning an error synchronously when sending a setting update, rather than in the indexing task. Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2023-12-22 11:36:12 +00:00
meili-bors[bot]	43e822e802	Merge #4238 4238: Task queue webhook r=dureuill a=irevoire # Prototype `prototype-task-queue-webhook-1` The prototype is available through Docker by using the following command: ```bash docker run -p 7700:7700 -v $(pwd)/meili_data:/meili_data getmeili/meilisearch:prototype-task-queue-webhook-1 ``` # Pull Request Implements the task queue webhook. ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4236 ## What does this PR do? - Provide a new cli and env var for the webhook, respectively called `--task-webhook-url` and `MEILI_TASK_WEBHOOK_URL` - Also supports sending the requests with a custom `Authorization` header by specifying the optional `--task-webhook-authorization-header` CLI parameter or `MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER` env variable. - Throw an error if the specified URL is invalid - Every time a batch is processed, send all the finished tasks into the webhook with our public `TaskView` type as a JSON Line GZIPed body. - Add one test. ## PR checklist ### Before becoming ready to review - [x] Add a test - [x] Compress the data we send - [x] Chunk and stream the data we send - [x] Remove the unwrap in the index-scheduler when sending the data fails - [x] The analytics are missing ### Before merging - [x] Release a prototype Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Clément Renault <clement@meilisearch.com>	2023-12-21 14:43:46 +00:00
Louis Dureuil	ee54d3171e	Check experimental feature at query time	2023-12-21 15:26:12 +01:00
meili-bors[bot]	a0e713c4e7	Merge #4277 4277: Update mini-dashboard to v0.2.12 r=curquiza a=mdubus # Pull Request ## Related issue Fixes #4276 ## What does this PR do? Upgrade mini-dashboard to version 0.2.12 ([see changes](https://github.com/meilisearch/mini-dashboard/releases/tag/v0.2.12)) ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: Morgane Dubus <30866152+mdubus@users.noreply.github.com>	2023-12-21 11:03:46 +00:00
meili-bors[bot]	d4cb0a885b	Merge #4275 4275: Flatten settings r=dureuill a=dureuill # Pull Request ## Related issue Initial internal feedback seems to indicate that the current shape of the `embedders` setting is undesirable: it has too much depth. This PR changes this by flattening the structure of the embedders to the following: ```json5 // NEW structure "embedders": { // still starts with the embedder name "default": { "source": "huggingFace", // now a string // properties of the source are all at the same level as the source "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "revision": "a9c555277f9bcf24f28fa5e092e665fc6f7c49cd", "documentTemplate": "A product titled '{{doc.title}}'" // now a string } } ``` By comparison, the old structure was: ```json5 // PREVIOUS version, no longer working with this PR "embedders": { // still starts with the embedder name "default": { "source": { "huggingFace": { "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "revision": "a9c555277f9bcf24f28fa5e092e665fc6f7c49cd" }, "documentTemplate": { "template": "A product titled '{{doc.title}}'" // now a string } } } ``` The fields that are accepted in the new version of the `embedders` setting are depending on the value of the `source` field: ```json5 // huggingFace "embedders": { "default": { "source": "huggingFace", "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "revision": "a9c555277f9bcf24f28fa5e092e665fc6f7c49cd", "documentTemplate": "A product titled '{{doc.title}}'" } } // openAi "embedders": { "default": { "source": "openAi", "model": "text-embedding-ada-002", "apiKey": "open_ai_api_key", "documentTemplate": "A product titled '{{doc.title}}'" } } // userProvided "embedders": { "default": { "source": "userProvided", "dimensions": 42, // mandatory } } ``` ## What does this PR do? - Flatten the settings structure - Validate the prompt earlier to return a synchronous error on setting change rather than in the failing task - Make it an error to pass a field for the wrong source (see above for allowed fields for each source) - Not changed: It is still an error not to pass `dimensions` to the `userProvided` embedder - If `source` was specified in the settings, validate the setting early to return a synchronous error in case of a missing mandatory field for the userProvided source (dimensions) or a forbidden field for the specified source. - If `source` was not specified in the settings, still validate the setting, but only at indexing time, by using the source stored in the DB. - Resets all values if the source changes, even if the user did not reset them explicitly. ## PR checklist Please check if your PR fulfills the following requirements: - [ ] Change the public facing guide for using the API - [ ] Change examples of use in the changelog Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2023-12-21 09:58:01 +00:00
Morgane Dubus	f52dee2b3b	Update Cargo.toml Update mini-dashboard with v0.2.12	2023-12-21 09:53:13 +01:00
Louis Dureuil	0bf879fb88	Fix warning on rust stable	2023-12-20 17:48:09 +01:00
Louis Dureuil	6ff81de401	Fix tests	2023-12-20 17:16:46 +01:00
Louis Dureuil	2e4c9651df	Validate settings in route	2023-12-20 17:16:46 +01:00
Louis Dureuil	ec9649c922	Add function to validate settings in Meilisearch, to be used in the routes	2023-12-20 17:16:46 +01:00
Louis Dureuil	9123370e90	Validate fused settings in settings task after fusing with existing setting	2023-12-20 17:16:46 +01:00
Louis Dureuil	14b396d302	Add new errors	2023-12-20 17:16:45 +01:00
Louis Dureuil	393216bf30	Flatten embedders settings	2023-12-20 17:16:43 +01:00
Louis Dureuil	e249e4db7b	Change Setting::apply function signature	2023-12-20 17:15:24 +01:00
meili-bors[bot]	de2ca7006e	Merge #4272 4272: Don't pass default revision when the model is explicitly set in config r=Kerollmops a=dureuill # Pull Request ## Related issue Fixes #4271 ## What does this PR do? - When the `model` is explicitly set in the `embedders` setting, we reset the `revision` to `None`, such that if the user doesn't specify a revision, the head of the model repository is chosen. - Not changed: If the user specifies a revision, it applies, like previously. - Not changed: If the user doesn't specify a model, the default model with the default revision applies, like previously. ## Manual testing on a fresh DB 1. Enable experimental feature: ```sh curl \ -X PATCH 'http://localhost:7700/experimental-features/' \ -H 'Content-Type: application/json' -H 'Authorization: Bearer foo' \ --data-binary '{ "vectorStore": true }' ``` 2. Send settings with a specified model but no specified revision: ```sh curl \ -X PATCH 'http://localhost:7700/indexes/products/settings' \ -H 'Content-Type: application/json' --data-binary \ '{ "embedders": { "default": { "source": { "huggingFace": { "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" } }, "documentTemplate": { "template": "A product titled '{{doc.title}}'"} } } }' ``` 3. Check that the task was successful: ```sh curl 'http://localhost:7700/tasks/0' {"uid":0,"indexUid":"products","status":"succeeded","type":"settingsUpdate","canceledBy":null,"details":{"embedders":{"default":{"source":{"huggingFace":{"model":"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"}},"documentTemplate":{"template":"A product titled {{doc.title}}"}}}},"error":null,"duration":"PT0.001892S","enqueuedAt":"2023-12-20T09:17:01.73789Z","startedAt":"2023-12-20T09:17:01.73854Z","finishedAt":"2023-12-20T09:17:01.740432Z"} ``` 4. Send documents to index: ```sh curl 'https://localhost:7700/indexes/products/documents' -H 'Content-Type: application/json' --data-binary '{"id": 0, "title": "Best product"}' ``` Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2023-12-20 14:27:51 +00:00
Louis Dureuil	333ce12eb2	Fixed issue where the default revision is always the one we picked for the default model	2023-12-20 10:17:49 +01:00
Clément Renault	fa2b96b9a5	Add an Authorization Header along with the webhook calls	2023-12-19 12:18:45 +01:00
Tamo	19736cefe8	add the analytics	2023-12-19 10:36:04 +01:00
Tamo	4fb25b8782	fix clippy	2023-12-19 10:35:51 +01:00
Tamo	c83a33017e	stream and chunk the data	2023-12-19 10:35:51 +01:00
Tamo	be72326c0a	gzip the tasks	2023-12-19 10:35:51 +01:00
Tamo	547379abb0	parse the url correctly	2023-12-19 10:35:51 +01:00
Tamo	0b2fff27f2	update and fix the test	2023-12-19 10:35:51 +01:00
Tamo	3adbc2b942	return a task view instead of a task	2023-12-19 10:35:51 +01:00
Tamo	fbea721378	add a first working test with actixweb	2023-12-19 10:35:51 +01:00
Tamo	391eb72137	start writing a test with actix but it doesn't works	2023-12-19 10:35:50 +01:00
Tamo	d78ad51082	Implement the webhook	2023-12-19 10:35:50 +01:00
Tamo	1956045a06	add the option	2023-12-19 10:23:56 +01:00