Only insert facets in bulks no more incrementally

Add more logging to the received chunks
Remove unused imports
2025-12-02 10:45:36 +00:00 · 2024-01-26 11:07:48 +01:00 · 2024-01-25 20:16:59 +01:00 · 2024-01-25 17:58:47 +01:00 · 2024-01-25 17:48:31 +01:00 · 2024-01-25 17:47:33 +01:00
39 changed files with 1064 additions and 638 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -383,7 +383,7 @@ dependencies = [
 [[package]]
 name = "arroy"
 version = "0.1.0"
-source = "git+https://github.com/meilisearch/arroy.git#4f193fd534acd357b65bfe9eec4b3fed8ece2007"
+source = "git+https://github.com/meilisearch/arroy.git#d372648212e561a4845077cdb9239423d78655a2"
 dependencies = [
 "bytemuck",
 "byteorder",
@@ -1677,9 +1677,9 @@ dependencies = [

 [[package]]
 name = "flate2"
-version = "1.0.26"
+version = "1.0.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743"
+checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e"
 dependencies = [
 "crc32fast",
 "miniz_oxide",
@@ -1701,9 +1701,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"

 [[package]]
 name = "form_urlencoded"
-version = "1.2.0"
+version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652"
+checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456"
 dependencies = [
 "percent-encoding",
 ]
@@ -2753,9 +2753,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"

 [[package]]
 name = "idna"
-version = "0.4.0"
+version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c"
+checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6"
 dependencies = [
 "unicode-bidi",
 "unicode-normalization",
@@ -2774,6 +2774,7 @@ dependencies = [
 "dump",
 "enum-iterator",
 "file-store",
+ "flate2",
 "insta",
 "log",
 "meili-snap",
@@ -2789,6 +2790,7 @@ dependencies = [
 "tempfile",
 "thiserror",
 "time",
+ "ureq",
 "uuid 1.5.0",
 ]

@@ -3559,6 +3561,7 @@ dependencies = [
 "tokio",
 "tokio-stream",
 "toml",
+ "url",
 "urlencoding",
 "uuid 1.5.0",
 "vergen",
@@ -4067,9 +4070,9 @@ dependencies = [

 [[package]]
 name = "percent-encoding"
-version = "2.3.0"
+version = "2.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
+checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"

 [[package]]
 name = "permissive-json-pointer"
@@ -5597,13 +5600,14 @@ dependencies = [

 [[package]]
 name = "url"
-version = "2.4.0"
+version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb"
+checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633"
 dependencies = [
 "form_urlencoded",
 "idna",
 "percent-encoding",
+ "serde",
 ]

 [[package]]
--- a/index-scheduler/Cargo.toml
+++ b/index-scheduler/Cargo.toml
@@ -18,6 +18,7 @@ derive_builder = "0.12.0"
 dump = { path = "../dump" }
 enum-iterator = "1.4.0"
 file-store = { path = "../file-store" }
+flate2 = "1.0.28"
 log = "0.4.17"
 meilisearch-auth = { path = "../meilisearch-auth" }
 meilisearch-types = { path = "../meilisearch-types" }
@@ -30,6 +31,7 @@ synchronoise = "1.0.1"
 tempfile = "3.5.0"
 thiserror = "1.0.40"
 time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
+ureq = "2.9.1"
 uuid = { version = "1.3.1", features = ["serde", "v4"] }

 [dev-dependencies]
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -936,8 +936,8 @@ impl IndexScheduler {
                };

                // the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
-                *self.currently_updating_index.write().unwrap() =
-                    Some((index_uid.clone(), index.clone()));
+                self.index_mapper
+                    .set_currently_updating_index(Some((index_uid.clone(), index.clone())));

                let mut index_wtxn = index.write_txn()?;
                let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
@@ -1351,9 +1351,6 @@ impl IndexScheduler {

                for (task, (_, settings)) in tasks.iter_mut().zip(settings) {
                    let checked_settings = settings.clone().check();
-                    if matches!(checked_settings.embedders, milli::update::Setting::Set(_)) {
-                        self.features().check_vector("Passing `embedders` in settings")?
-                    }
                    task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) });
                    apply_settings_to_builder(&checked_settings, &mut builder);

--- a/index-scheduler/src/index_mapper/mod.rs
+++ b/index-scheduler/src/index_mapper/mod.rs
@@ -69,6 +69,10 @@ pub struct IndexMapper {
    /// Whether we open a meilisearch index with the MDB_WRITEMAP option or not.
    enable_mdb_writemap: bool,
    pub indexer_config: Arc<IndexerConfig>,
+
+    /// A few types of long running batches of tasks that act on a single index set this field
+    /// so that a handle to the index is available from other threads (search) in an optimized manner.
+    currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
 }

 /// Whether the index is available for use or is forbidden to be inserted back in the index map
@@ -151,6 +155,7 @@ impl IndexMapper {
            index_growth_amount,
            enable_mdb_writemap,
            indexer_config: Arc::new(indexer_config),
+            currently_updating_index: Default::default(),
        })
    }

@@ -303,6 +308,14 @@ impl IndexMapper {

    /// Return an index, may open it if it wasn't already opened.
    pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
+        if let Some((current_name, current_index)) =
+            self.currently_updating_index.read().unwrap().as_ref()
+        {
+            if current_name == name {
+                return Ok(current_index.clone());
+            }
+        }
+
        let uuid = self
            .index_mapping
            .get(rtxn, name)?
@@ -474,4 +487,8 @@ impl IndexMapper {
    pub fn indexer_config(&self) -> &IndexerConfig {
        &self.indexer_config
    }
+
+    pub fn set_currently_updating_index(&self, index: Option<(String, Index)>) {
+        *self.currently_updating_index.write().unwrap() = index;
+    }
 }
--- a/index-scheduler/src/insta_snapshot.rs
+++ b/index-scheduler/src/insta_snapshot.rs
@@ -37,10 +37,11 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
        snapshots_path: _,
        auth_path: _,
        version_file_path: _,
+        webhook_url: _,
+        webhook_authorization_header: _,
        test_breakpoint_sdr: _,
        planned_failures: _,
        run_loop_iteration: _,
-        currently_updating_index: _,
        embedders: _,
    } = scheduler;

--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -34,6 +34,7 @@ pub type TaskId = u32;

 use std::collections::{BTreeMap, HashMap};
 use std::fs::File;
+use std::io::{self, BufReader, Read};
 use std::ops::{Bound, RangeBounds};
 use std::path::{Path, PathBuf};
 use std::sync::atomic::AtomicBool;
@@ -45,6 +46,8 @@ use dump::{KindDump, TaskDump, UpdateFile};
 pub use error::Error;
 pub use features::RoFeatures;
 use file_store::FileStore;
+use flate2::bufread::GzEncoder;
+use flate2::Compression;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
 use meilisearch_types::heed::byteorder::BE;
@@ -54,6 +57,7 @@ use meilisearch_types::milli::documents::DocumentsBatchBuilder;
 use meilisearch_types::milli::update::IndexerConfig;
 use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
 use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
+use meilisearch_types::task_view::TaskView;
 use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
 use puffin::FrameView;
 use roaring::RoaringBitmap;
@@ -170,8 +174,8 @@ impl ProcessingTasks {
    }

    /// Set the processing tasks to an empty list
-    fn stop_processing(&mut self) {
-        self.processing = RoaringBitmap::new();
+    fn stop_processing(&mut self) -> RoaringBitmap {
+        std::mem::take(&mut self.processing)
    }

    /// Returns `true` if there, at least, is one task that is currently processing that we must stop.
@@ -241,6 +245,10 @@ pub struct IndexSchedulerOptions {
    pub snapshots_path: PathBuf,
    /// The path to the folder containing the dumps.
    pub dumps_path: PathBuf,
+    /// The URL on which we must send the tasks statuses
+    pub webhook_url: Option<String>,
+    /// The value we will send into the Authorization HTTP header on the webhook URL
+    pub webhook_authorization_header: Option<String>,
    /// The maximum size, in bytes, of the task index.
    pub task_db_size: usize,
    /// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
@@ -323,6 +331,11 @@ pub struct IndexScheduler {
    /// The maximum number of tasks that will be batched together.
    pub(crate) max_number_of_batched_tasks: usize,

+    /// The webhook url we should send tasks to after processing every batches.
+    pub(crate) webhook_url: Option<String>,
+    /// The Authorization header to send to the webhook URL.
+    pub(crate) webhook_authorization_header: Option<String>,
+
    /// A frame to output the indexation profiling files to disk.
    pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,

@@ -338,10 +351,6 @@ pub struct IndexScheduler {
    /// The path to the version file of Meilisearch.
    pub(crate) version_file_path: PathBuf,

-    /// A few types of long running batches of tasks that act on a single index set this field
-    /// so that a handle to the index is available from other threads (search) in an optimized manner.
-    currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
-
    embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>,

    // ================= test
@@ -388,7 +397,8 @@ impl IndexScheduler {
            dumps_path: self.dumps_path.clone(),
            auth_path: self.auth_path.clone(),
            version_file_path: self.version_file_path.clone(),
-            currently_updating_index: self.currently_updating_index.clone(),
+            webhook_url: self.webhook_url.clone(),
+            webhook_authorization_header: self.webhook_authorization_header.clone(),
            embedders: self.embedders.clone(),
            #[cfg(test)]
            test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
@@ -487,7 +497,8 @@ impl IndexScheduler {
            snapshots_path: options.snapshots_path,
            auth_path: options.auth_path,
            version_file_path: options.version_file_path,
-            currently_updating_index: Arc::new(RwLock::new(None)),
+            webhook_url: options.webhook_url,
+            webhook_authorization_header: options.webhook_authorization_header,
            embedders: Default::default(),

            #[cfg(test)]
@@ -671,13 +682,6 @@ impl IndexScheduler {
    /// If you need to fetch information from or perform an action on all indexes,
    /// see the `try_for_each_index` function.
    pub fn index(&self, name: &str) -> Result<Index> {
-        if let Some((current_name, current_index)) =
-            self.currently_updating_index.read().unwrap().as_ref()
-        {
-            if current_name == name {
-                return Ok(current_index.clone());
-            }
-        }
        let rtxn = self.env.read_txn()?;
        self.index_mapper.index(&rtxn, name)
    }
@@ -1158,7 +1162,7 @@ impl IndexScheduler {
        };

        // Reset the currently updating index to relinquish the index handle
-        *self.currently_updating_index.write().unwrap() = None;
+        self.index_mapper.set_currently_updating_index(None);

        #[cfg(test)]
        self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
@@ -1251,19 +1255,99 @@ impl IndexScheduler {
            }
        }

-        self.processing_tasks.write().unwrap().stop_processing();
+        let processed = self.processing_tasks.write().unwrap().stop_processing();

        #[cfg(test)]
        self.maybe_fail(tests::FailureLocation::CommittingWtxn)?;

        wtxn.commit().map_err(Error::HeedTransaction)?;

+        // We shouldn't crash the tick function if we can't send data to the webhook.
+        let _ = self.notify_webhook(&processed);
+
        #[cfg(test)]
        self.breakpoint(Breakpoint::AfterProcessing);

        Ok(TickOutcome::TickAgain(processed_tasks))
    }

+    /// Once the tasks changes have been commited we must send all the tasks that were updated to our webhook if there is one.
+    fn notify_webhook(&self, updated: &RoaringBitmap) -> Result<()> {
+        if let Some(ref url) = self.webhook_url {
+            struct TaskReader<'a, 'b> {
+                rtxn: &'a RoTxn<'a>,
+                index_scheduler: &'a IndexScheduler,
+                tasks: &'b mut roaring::bitmap::Iter<'b>,
+                buffer: Vec<u8>,
+                written: usize,
+            }
+
+            impl<'a, 'b> Read for TaskReader<'a, 'b> {
+                fn read(&mut self, mut buf: &mut [u8]) -> std::io::Result<usize> {
+                    if self.buffer.is_empty() {
+                        match self.tasks.next() {
+                            None => return Ok(0),
+                            Some(task_id) => {
+                                let task = self
+                                    .index_scheduler
+                                    .get_task(self.rtxn, task_id)
+                                    .map_err(|err| io::Error::new(io::ErrorKind::Other, err))?
+                                    .ok_or_else(|| {
+                                        io::Error::new(
+                                            io::ErrorKind::Other,
+                                            Error::CorruptedTaskQueue,
+                                        )
+                                    })?;
+
+                                serde_json::to_writer(
+                                    &mut self.buffer,
+                                    &TaskView::from_task(&task),
+                                )?;
+                                self.buffer.push(b'\n');
+                            }
+                        }
+                    }
+
+                    let mut to_write = &self.buffer[self.written..];
+                    let wrote = io::copy(&mut to_write, &mut buf)?;
+                    self.written += wrote as usize;
+
+                    // we wrote everything and must refresh our buffer on the next call
+                    if self.written == self.buffer.len() {
+                        self.written = 0;
+                        self.buffer.clear();
+                    }
+
+                    Ok(wrote as usize)
+                }
+            }
+
+            let rtxn = self.env.read_txn()?;
+
+            let task_reader = TaskReader {
+                rtxn: &rtxn,
+                index_scheduler: self,
+                tasks: &mut updated.into_iter(),
+                buffer: Vec::with_capacity(50), // on average a task is around ~100 bytes
+                written: 0,
+            };
+
+            // let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
+            let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
+            let request = ureq::post(url).set("Content-Encoding", "gzip");
+            let request = match &self.webhook_authorization_header {
+                Some(header) => request.set("Authorization", header),
+                None => request,
+            };
+
+            if let Err(e) = request.send(reader) {
+                log::error!("While sending data to the webhook: {e}");
+            }
+        }
+
+        Ok(())
+    }
+
    /// Register a task to cleanup the task queue if needed
    fn cleanup_task_queue(&self) -> Result<()> {
        let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
@@ -1677,6 +1761,8 @@ mod tests {
                indexes_path: tempdir.path().join("indexes"),
                snapshots_path: tempdir.path().join("snapshots"),
                dumps_path: tempdir.path().join("dumps"),
+                webhook_url: None,
+                webhook_authorization_header: None,
                task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
                index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
                enable_mdb_writemap: false,
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@@ -344,7 +344,10 @@ impl ErrorCode for milli::Error {
                        Code::InvalidDocumentId
                    }
                    UserError::MissingDocumentField(_) => Code::InvalidDocumentFields,
-                    UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
+                    UserError::InvalidFieldForSource { .. }
+                    | UserError::MissingFieldForSource { .. }
+                    | UserError::InvalidOpenAiModel { .. }
+                    | UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
                    UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
                    UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
                    UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
--- a/meilisearch-types/src/lib.rs
+++ b/meilisearch-types/src/lib.rs
@@ -9,6 +9,7 @@ pub mod index_uid_pattern;
 pub mod keys;
 pub mod settings;
 pub mod star_or;
+pub mod task_view;
 pub mod tasks;
 pub mod versioning;
 pub use milli::{heed, Index};
--- a/meilisearch-types/src/settings.rs
+++ b/meilisearch-types/src/settings.rs
@@ -318,6 +318,21 @@ impl Settings<Unchecked> {
            _kind: PhantomData,
        }
    }
+
+    pub fn validate(self) -> Result<Self, milli::Error> {
+        self.validate_embedding_settings()
+    }
+
+    fn validate_embedding_settings(mut self) -> Result<Self, milli::Error> {
+        let Setting::Set(mut configs) = self.embedders else { return Ok(self) };
+        for (name, config) in configs.iter_mut() {
+            let config_to_check = std::mem::take(config);
+            let checked_config = milli::update::validate_embedding_settings(config_to_check, name)?;
+            *config = checked_config
+        }
+        self.embedders = Setting::Set(configs);
+        Ok(self)
+    }
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -585,11 +600,12 @@ pub fn settings(
        ),
    };

-    let embedders = index
+    let embedders: BTreeMap<_, _> = index
        .embedding_configs(rtxn)?
        .into_iter()
        .map(|(name, config)| (name, Setting::Set(config.into())))
        .collect();
+    let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };

    Ok(Settings {
        displayed_attributes: match displayed_attributes {
@@ -611,15 +627,12 @@ pub fn settings(
            Some(field) => Setting::Set(field),
            None => Setting::Reset,
        },
-        proximity_precision: match proximity_precision {
-            Some(precision) => Setting::Set(precision),
-            None => Setting::Reset,
-        },
+        proximity_precision: Setting::Set(proximity_precision.unwrap_or_default()),
        synonyms: Setting::Set(synonyms),
        typo_tolerance: Setting::Set(typo_tolerance),
        faceting: Setting::Set(faceting),
        pagination: Setting::Set(pagination),
-        embedders: Setting::Set(embedders),
+        embedders,
        _kind: PhantomData,
    })
 }
@@ -720,10 +733,11 @@ impl From<RankingRuleView> for Criterion {
    }
 }

-#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
+#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
 #[serde(deny_unknown_fields, rename_all = "camelCase")]
 #[deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields)]
 pub enum ProximityPrecisionView {
+    #[default]
    ByWord,
    ByAttribute,
 }
--- a/meilisearch-types/src/task_view.rs
+++ b/meilisearch-types/src/task_view.rs
@@ -0,0 +1,139 @@
+use serde::Serialize;
+use time::{Duration, OffsetDateTime};
+
+use crate::error::ResponseError;
+use crate::settings::{Settings, Unchecked};
+use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId};
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct TaskView {
+    pub uid: TaskId,
+    #[serde(default)]
+    pub index_uid: Option<String>,
+    pub status: Status,
+    #[serde(rename = "type")]
+    pub kind: Kind,
+    pub canceled_by: Option<TaskId>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub details: Option<DetailsView>,
+    pub error: Option<ResponseError>,
+    #[serde(serialize_with = "serialize_duration", default)]
+    pub duration: Option<Duration>,
+    #[serde(with = "time::serde::rfc3339")]
+    pub enqueued_at: OffsetDateTime,
+    #[serde(with = "time::serde::rfc3339::option", default)]
+    pub started_at: Option<OffsetDateTime>,
+    #[serde(with = "time::serde::rfc3339::option", default)]
+    pub finished_at: Option<OffsetDateTime>,
+}
+
+impl TaskView {
+    pub fn from_task(task: &Task) -> TaskView {
+        TaskView {
+            uid: task.uid,
+            index_uid: task.index_uid().map(ToOwned::to_owned),
+            status: task.status,
+            kind: task.kind.as_kind(),
+            canceled_by: task.canceled_by,
+            details: task.details.clone().map(DetailsView::from),
+            error: task.error.clone(),
+            duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
+            enqueued_at: task.enqueued_at,
+            started_at: task.started_at,
+            finished_at: task.finished_at,
+        }
+    }
+}
+
+#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct DetailsView {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub received_documents: Option<u64>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub indexed_documents: Option<Option<u64>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub primary_key: Option<Option<String>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub provided_ids: Option<usize>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub deleted_documents: Option<Option<u64>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub matched_tasks: Option<u64>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub canceled_tasks: Option<Option<u64>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub deleted_tasks: Option<Option<u64>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub original_filter: Option<Option<String>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub dump_uid: Option<Option<String>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[serde(flatten)]
+    pub settings: Option<Box<Settings<Unchecked>>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub swaps: Option<Vec<IndexSwap>>,
+}
+
+impl From<Details> for DetailsView {
+    fn from(details: Details) -> Self {
+        match details {
+            Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
+                DetailsView {
+                    received_documents: Some(received_documents),
+                    indexed_documents: Some(indexed_documents),
+                    ..DetailsView::default()
+                }
+            }
+            Details::SettingsUpdate { settings } => {
+                DetailsView { settings: Some(settings), ..DetailsView::default() }
+            }
+            Details::IndexInfo { primary_key } => {
+                DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
+            }
+            Details::DocumentDeletion {
+                provided_ids: received_document_ids,
+                deleted_documents,
+            } => DetailsView {
+                provided_ids: Some(received_document_ids),
+                deleted_documents: Some(deleted_documents),
+                original_filter: Some(None),
+                ..DetailsView::default()
+            },
+            Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
+                DetailsView {
+                    provided_ids: Some(0),
+                    original_filter: Some(Some(original_filter)),
+                    deleted_documents: Some(deleted_documents),
+                    ..DetailsView::default()
+                }
+            }
+            Details::ClearAll { deleted_documents } => {
+                DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
+            }
+            Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
+                DetailsView {
+                    matched_tasks: Some(matched_tasks),
+                    canceled_tasks: Some(canceled_tasks),
+                    original_filter: Some(Some(original_filter)),
+                    ..DetailsView::default()
+                }
+            }
+            Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
+                DetailsView {
+                    matched_tasks: Some(matched_tasks),
+                    deleted_tasks: Some(deleted_tasks),
+                    original_filter: Some(Some(original_filter)),
+                    ..DetailsView::default()
+                }
+            }
+            Details::Dump { dump_uid } => {
+                DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
+            }
+            Details::IndexSwap { swaps } => {
+                DetailsView { swaps: Some(swaps), ..Default::default() }
+            }
+        }
+    }
+}
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -104,6 +104,7 @@ walkdir = "2.3.3"
 yaup = "0.2.1"
 serde_urlencoded = "0.7.1"
 termcolor = "1.2.0"
+url = { version = "2.5.0", features = ["serde"] }

 [dev-dependencies]
 actix-rt = "2.8.0"
@@ -153,5 +154,5 @@ greek = ["meilisearch-types/greek"]
 khmer = ["meilisearch-types/khmer"]

 [package.metadata.mini-dashboard]
-assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
-sha1 = "83cd44ed1e5f97ecb581dc9f958a63f4ccc982d9"
+assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.12/build.zip"
+sha1 = "acfe9a018c93eb0604ea87ee87bff7df5474e18e"
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -264,6 +264,8 @@ struct Infos {
    ignore_snapshot_if_db_exists: bool,
    http_addr: bool,
    http_payload_size_limit: Byte,
+    task_queue_webhook: bool,
+    task_webhook_authorization_header: bool,
    log_level: String,
    max_indexing_memory: MaxMemory,
    max_indexing_threads: MaxThreads,
@@ -290,6 +292,8 @@ impl From<Opt> for Infos {
            http_addr,
            master_key: _,
            env,
+            task_webhook_url,
+            task_webhook_authorization_header,
            max_index_size: _,
            max_task_db_size: _,
            http_payload_size_limit,
@@ -343,6 +347,8 @@ impl From<Opt> for Infos {
            http_addr: http_addr != default_http_addr(),
            http_payload_size_limit,
            experimental_max_number_of_batched_tasks,
+            task_queue_webhook: task_webhook_url.is_some(),
+            task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
            log_level: log_level.to_string(),
            max_indexing_memory,
            max_indexing_threads,
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@@ -228,6 +228,8 @@ fn open_or_create_database_unchecked(
            indexes_path: opt.db_path.join("indexes"),
            snapshots_path: opt.snapshot_dir.clone(),
            dumps_path: opt.dump_dir.clone(),
+            webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
+            webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
            task_db_size: opt.max_task_db_size.get_bytes() as usize,
            index_base_map_size: opt.max_index_size.get_bytes() as usize,
            enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@@ -21,6 +21,7 @@ use rustls::RootCertStore;
 use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
 use serde::{Deserialize, Serialize};
 use sysinfo::{RefreshKind, System, SystemExt};
+use url::Url;

 const POSSIBLE_ENV: [&str; 2] = ["development", "production"];

@@ -28,6 +29,8 @@ const MEILI_DB_PATH: &str = "MEILI_DB_PATH";
 const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
 const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
 const MEILI_ENV: &str = "MEILI_ENV";
+const MEILI_TASK_WEBHOOK_URL: &str = "MEILI_TASK_WEBHOOK_URL";
+const MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER: &str = "MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER";
 #[cfg(feature = "analytics")]
 const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
 const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
@@ -156,6 +159,14 @@ pub struct Opt {
    #[serde(default = "default_env")]
    pub env: String,

+    /// Called whenever a task finishes so a third party can be notified.
+    #[clap(long, env = MEILI_TASK_WEBHOOK_URL)]
+    pub task_webhook_url: Option<Url>,
+
+    /// The Authorization header to send on the webhook URL whenever a task finishes so a third party can be notified.
+    #[clap(long, env = MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER)]
+    pub task_webhook_authorization_header: Option<String>,
+
    /// Deactivates Meilisearch's built-in telemetry when provided.
    ///
    /// Meilisearch automatically collects data from all instances that do not opt out using this flag.
@@ -375,6 +386,8 @@ impl Opt {
            http_addr,
            master_key,
            env,
+            task_webhook_url,
+            task_webhook_authorization_header,
            max_index_size: _,
            max_task_db_size: _,
            http_payload_size_limit,
@@ -409,6 +422,16 @@ impl Opt {
            export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
        }
        export_to_env_if_not_present(MEILI_ENV, env);
+        if let Some(task_webhook_url) = task_webhook_url {
+            export_to_env_if_not_present(MEILI_TASK_WEBHOOK_URL, task_webhook_url.to_string());
+        }
+        if let Some(task_webhook_authorization_header) = task_webhook_authorization_header {
+            export_to_env_if_not_present(
+                MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER,
+                task_webhook_authorization_header,
+            );
+        }
+
        #[cfg(feature = "analytics")]
        {
            export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@@ -90,6 +90,11 @@ macro_rules! make_setting_route {
                    ..Default::default()
                };

+                let new_settings = $crate::routes::indexes::settings::validate_settings(
+                    new_settings,
+                    &index_scheduler,
+                )?;
+
                let allow_index_creation =
                    index_scheduler.filters().allow_index_creation(&index_uid);

@@ -453,7 +458,7 @@ make_setting_route!(
            json!({
                "proximity_precision": {
                    "set": precision.is_some(),
-                    "value": precision,
+                    "value": precision.unwrap_or_default(),
                }
            }),
            Some(req),
@@ -582,13 +587,13 @@ fn embedder_analytics(
        for source in s
            .values()
            .filter_map(|config| config.clone().set())
-            .filter_map(|config| config.embedder_options.set())
+            .filter_map(|config| config.source.set())
        {
-            use meilisearch_types::milli::vector::settings::EmbedderSettings;
+            use meilisearch_types::milli::vector::settings::EmbedderSource;
            match source {
-                EmbedderSettings::OpenAi(_) => sources.insert("openAi"),
-                EmbedderSettings::HuggingFace(_) => sources.insert("huggingFace"),
-                EmbedderSettings::UserProvided(_) => sources.insert("userProvided"),
+                EmbedderSource::OpenAi => sources.insert("openAi"),
+                EmbedderSource::HuggingFace => sources.insert("huggingFace"),
+                EmbedderSource::UserProvided => sources.insert("userProvided"),
            };
        }
    };
@@ -651,6 +656,7 @@ pub async fn update_all(
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;

    let new_settings = body.into_inner();
+    let new_settings = validate_settings(new_settings, &index_scheduler)?;

    analytics.publish(
        "Settings Updated".to_string(),
@@ -684,7 +690,8 @@ pub async fn update_all(
                "set": new_settings.distinct_attribute.as_ref().set().is_some()
            },
            "proximity_precision": {
-                "set": new_settings.proximity_precision.as_ref().set().is_some()
+                "set": new_settings.proximity_precision.as_ref().set().is_some(),
+                "value": new_settings.proximity_precision.as_ref().set().copied().unwrap_or_default()
            },
            "typo_tolerance": {
                "enabled": new_settings.typo_tolerance
@@ -800,3 +807,13 @@ pub async fn delete_all(
    debug!("returns: {:?}", task);
    Ok(HttpResponse::Accepted().json(task))
 }
+
+fn validate_settings(
+    settings: Settings<Unchecked>,
+    index_scheduler: &IndexScheduler,
+) -> Result<Settings<Unchecked>, ResponseError> {
+    if matches!(settings.embedders, Setting::Set(_)) {
+        index_scheduler.features().check_vector("Passing `embedders` in settings")?
+    }
+    Ok(settings.validate()?)
+}
--- a/meilisearch/src/routes/tasks.rs
+++ b/meilisearch/src/routes/tasks.rs
@@ -8,11 +8,9 @@ use meilisearch_types::deserr::DeserrQueryParamError;
 use meilisearch_types::error::deserr_codes::*;
 use meilisearch_types::error::{InvalidTaskDateError, ResponseError};
 use meilisearch_types::index_uid::IndexUid;
-use meilisearch_types::settings::{Settings, Unchecked};
 use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList};
-use meilisearch_types::tasks::{
-    serialize_duration, Details, IndexSwap, Kind, KindWithContent, Status, Task,
-};
+use meilisearch_types::task_view::TaskView;
+use meilisearch_types::tasks::{Kind, KindWithContent, Status};
 use serde::Serialize;
 use serde_json::json;
 use time::format_description::well_known::Rfc3339;
@@ -37,140 +35,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
    .service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks))))
    .service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))));
 }
-
-#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
-#[serde(rename_all = "camelCase")]
-pub struct TaskView {
-    pub uid: TaskId,
-    #[serde(default)]
-    pub index_uid: Option<String>,
-    pub status: Status,
-    #[serde(rename = "type")]
-    pub kind: Kind,
-    pub canceled_by: Option<TaskId>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub details: Option<DetailsView>,
-    pub error: Option<ResponseError>,
-    #[serde(serialize_with = "serialize_duration", default)]
-    pub duration: Option<Duration>,
-    #[serde(with = "time::serde::rfc3339")]
-    pub enqueued_at: OffsetDateTime,
-    #[serde(with = "time::serde::rfc3339::option", default)]
-    pub started_at: Option<OffsetDateTime>,
-    #[serde(with = "time::serde::rfc3339::option", default)]
-    pub finished_at: Option<OffsetDateTime>,
-}
-
-impl TaskView {
-    pub fn from_task(task: &Task) -> TaskView {
-        TaskView {
-            uid: task.uid,
-            index_uid: task.index_uid().map(ToOwned::to_owned),
-            status: task.status,
-            kind: task.kind.as_kind(),
-            canceled_by: task.canceled_by,
-            details: task.details.clone().map(DetailsView::from),
-            error: task.error.clone(),
-            duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
-            enqueued_at: task.enqueued_at,
-            started_at: task.started_at,
-            finished_at: task.finished_at,
-        }
-    }
-}
-
-#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
-#[serde(rename_all = "camelCase")]
-pub struct DetailsView {
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub received_documents: Option<u64>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub indexed_documents: Option<Option<u64>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub primary_key: Option<Option<String>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub provided_ids: Option<usize>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub deleted_documents: Option<Option<u64>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub matched_tasks: Option<u64>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub canceled_tasks: Option<Option<u64>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub deleted_tasks: Option<Option<u64>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub original_filter: Option<Option<String>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub dump_uid: Option<Option<String>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    #[serde(flatten)]
-    pub settings: Option<Box<Settings<Unchecked>>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub swaps: Option<Vec<IndexSwap>>,
-}
-
-impl From<Details> for DetailsView {
-    fn from(details: Details) -> Self {
-        match details {
-            Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
-                DetailsView {
-                    received_documents: Some(received_documents),
-                    indexed_documents: Some(indexed_documents),
-                    ..DetailsView::default()
-                }
-            }
-            Details::SettingsUpdate { settings } => {
-                DetailsView { settings: Some(settings), ..DetailsView::default() }
-            }
-            Details::IndexInfo { primary_key } => {
-                DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
-            }
-            Details::DocumentDeletion {
-                provided_ids: received_document_ids,
-                deleted_documents,
-            } => DetailsView {
-                provided_ids: Some(received_document_ids),
-                deleted_documents: Some(deleted_documents),
-                original_filter: Some(None),
-                ..DetailsView::default()
-            },
-            Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
-                DetailsView {
-                    provided_ids: Some(0),
-                    original_filter: Some(Some(original_filter)),
-                    deleted_documents: Some(deleted_documents),
-                    ..DetailsView::default()
-                }
-            }
-            Details::ClearAll { deleted_documents } => {
-                DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
-            }
-            Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
-                DetailsView {
-                    matched_tasks: Some(matched_tasks),
-                    canceled_tasks: Some(canceled_tasks),
-                    original_filter: Some(Some(original_filter)),
-                    ..DetailsView::default()
-                }
-            }
-            Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
-                DetailsView {
-                    matched_tasks: Some(matched_tasks),
-                    deleted_tasks: Some(deleted_tasks),
-                    original_filter: Some(Some(original_filter)),
-                    ..DetailsView::default()
-                }
-            }
-            Details::Dump { dump_uid } => {
-                DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
-            }
-            Details::IndexSwap { swaps } => {
-                DetailsView { swaps: Some(swaps), ..Default::default() }
-            }
-        }
-    }
-}
-
 #[derive(Debug, Deserr)]
 #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
 pub struct TasksFilterQuery {
--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@@ -735,6 +735,9 @@ pub fn perform_facet_search(
    if let Some(facet_query) = &facet_query {
        facet_search.query(facet_query);
    }
+    if let Some(max_facets) = index.max_values_per_facet(&rtxn)? {
+        facet_search.max_values(max_facets as usize);
+    }

    Ok(FacetSearchResult {
        facet_hits: facet_search.execute()?,
@@ -897,6 +900,14 @@ fn format_fields<'a>(
    let mut matches_position = compute_matches.then(BTreeMap::new);
    let mut document = document.clone();

+    // reduce the formatted option list to the attributes that should be formatted,
+    // instead of all the attributes to display.
+    let formatting_fields_options: Vec<_> = formatted_options
+        .iter()
+        .filter(|(_, option)| option.should_format())
+        .map(|(fid, option)| (field_ids_map.name(*fid).unwrap(), option))
+        .collect();
+
    // select the attributes to retrieve
    let displayable_names =
        displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
@@ -905,13 +916,15 @@ fn format_fields<'a>(
        // to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
        // and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
        // highlighted.
-        let format = formatted_options
+        // Warn: The time to compute the format list scales with the number of fields to format;
+        // cumulated with map_leaf_values that iterates over all the nested fields, it gives a quadratic complexity:
+        // d*f where d is the total number of fields to display and f is the total number of fields to format.
+        let format = formatting_fields_options
            .iter()
-            .filter(|(field, _option)| {
-                let name = field_ids_map.name(**field).unwrap();
+            .filter(|(name, _option)| {
                milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)
            })
-            .map(|(_, option)| *option)
+            .map(|(_, option)| **option)
            .reduce(|acc, option| acc.merge(option));
        let mut infos = Vec::new();

@@ -1008,7 +1021,7 @@ fn format_value<'a>(
                    let value = matcher.format(format_options);
                    Value::String(value.into_owned())
                }
-                None => Value::Number(number),
+                None => Value::String(s),
            }
        }
        value => value,
--- a/meilisearch/tests/dumps/mod.rs
+++ b/meilisearch/tests/dumps/mod.rs
@@ -59,7 +59,7 @@ async fn import_dump_v1_movie_raw() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -77,8 +77,7 @@ async fn import_dump_v1_movie_raw() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -221,7 +220,7 @@ async fn import_dump_v1_movie_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -239,8 +238,7 @@ async fn import_dump_v1_movie_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -369,7 +367,7 @@ async fn import_dump_v1_rubygems_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -387,8 +385,7 @@ async fn import_dump_v1_rubygems_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -503,7 +500,7 @@ async fn import_dump_v2_movie_raw() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -521,8 +518,7 @@ async fn import_dump_v2_movie_raw() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -649,7 +645,7 @@ async fn import_dump_v2_movie_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -667,8 +663,7 @@ async fn import_dump_v2_movie_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -794,7 +789,7 @@ async fn import_dump_v2_rubygems_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -812,8 +807,7 @@ async fn import_dump_v2_rubygems_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -928,7 +922,7 @@ async fn import_dump_v3_movie_raw() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -946,8 +940,7 @@ async fn import_dump_v3_movie_raw() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -1074,7 +1067,7 @@ async fn import_dump_v3_movie_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -1092,8 +1085,7 @@ async fn import_dump_v3_movie_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -1219,7 +1211,7 @@ async fn import_dump_v3_rubygems_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -1237,8 +1229,7 @@ async fn import_dump_v3_rubygems_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -1353,7 +1344,7 @@ async fn import_dump_v4_movie_raw() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -1371,8 +1362,7 @@ async fn import_dump_v4_movie_raw() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -1499,7 +1489,7 @@ async fn import_dump_v4_movie_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -1517,8 +1507,7 @@ async fn import_dump_v4_movie_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -1644,7 +1633,7 @@ async fn import_dump_v4_rubygems_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@@ -1662,8 +1651,7 @@ async fn import_dump_v4_rubygems_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@@ -1907,8 +1895,7 @@ async fn import_dump_v6_containing_experimental_features() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###);

--- a/meilisearch/tests/search/facet_search.rs
+++ b/meilisearch/tests/search/facet_search.rs
@@ -105,6 +105,24 @@ async fn more_advanced_facet_search() {
    snapshot!(response["facetHits"].as_array().unwrap().len(), @"1");
 }

+#[actix_rt::test]
+async fn simple_facet_search_with_max_values() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = DOCUMENTS.clone();
+    index.update_settings_faceting(json!({ "maxValuesPerFacet": 1 })).await;
+    index.update_settings_filterable_attributes(json!(["genres"])).await;
+    index.add_documents(documents, None).await;
+    index.wait_task(2).await;
+
+    let (response, code) =
+        index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
+
+    assert_eq!(code, 200, "{}", response);
+    assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 1);
+}
+
 #[actix_rt::test]
 async fn non_filterable_facet_search_error() {
    let server = Server::new().await;
--- a/meilisearch/tests/search/hybrid.rs
+++ b/meilisearch/tests/search/hybrid.rs
@@ -21,9 +21,9 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Inde
    "###);

    let (response, code) = index
-        .update_settings(
-            json!({ "embedders": {"default": {"source": {"userProvided": {"dimensions": 2}}}} }),
-        )
+        .update_settings(json!({ "embedders": {"default": {
+                "source": "userProvided",
+                "dimensions": 2}}} ))
        .await;
    assert_eq!(202, code, "{:?}", response);
    index.wait_task(response.uid()).await;
@@ -56,6 +56,15 @@ static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
    }])
 });

+static SINGLE_DOCUMENT: Lazy<Value> = Lazy::new(|| {
+    json!([{
+            "title": "Shazam!",
+            "desc": "a Captain Marvel ersatz",
+            "id": "1",
+            "_vectors": {"default": [1.0, 3.0]},
+    }])
+});
+
 #[actix_rt::test]
 async fn simple_search() {
    let server = Server::new().await;
@@ -149,3 +158,18 @@ async fn invalid_semantic_ratio() {
    }
    "###);
 }
+
+#[actix_rt::test]
+async fn single_document() {
+    let server = Server::new().await;
+    let index = index_with_documents(&server, &SINGLE_DOCUMENT).await;
+
+    let (response, code) = index
+    .search_post(
+        json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}),
+    )
+    .await;
+
+    snapshot!(code, @"200 OK");
+    snapshot!(response["hits"][0], @r###"{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0,"_semanticScore":1.0}"###);
+}
--- a/meilisearch/tests/search/mod.rs
+++ b/meilisearch/tests/search/mod.rs
@@ -890,13 +890,21 @@ async fn experimental_feature_vector_store() {
    let (response, code) = index
        .update_settings(json!({"embedders": {
            "manual": {
-                "source": {
-                    "userProvided": {"dimensions": 3}
-                }
+                "source": "userProvided",
+                "dimensions": 3,
            }
        }}))
        .await;

+    meili_snap::snapshot!(response, @r###"
+    {
+      "taskUid": 1,
+      "indexUid": "test",
+      "status": "enqueued",
+      "type": "settingsUpdate",
+      "enqueuedAt": "[date]"
+    }
+    "###);
    meili_snap::snapshot!(code, @"202 Accepted");
    let response = index.wait_task(response.uid()).await;

--- a/meilisearch/tests/settings/get_settings.rs
+++ b/meilisearch/tests/settings/get_settings.rs
@@ -54,7 +54,7 @@ async fn get_settings() {
    let (response, code) = index.settings().await;
    assert_eq!(code, 200);
    let settings = response.as_object().unwrap();
-    assert_eq!(settings.keys().len(), 16);
+    assert_eq!(settings.keys().len(), 15);
    assert_eq!(settings["displayedAttributes"], json!(["*"]));
    assert_eq!(settings["searchableAttributes"], json!(["*"]));
    assert_eq!(settings["filterableAttributes"], json!([]));
@@ -83,7 +83,7 @@ async fn get_settings() {
            "maxTotalHits": 1000,
        })
    );
-    assert_eq!(settings["embedders"], json!({}));
+    assert_eq!(settings["proximityPrecision"], json!("byWord"));
 }

 #[actix_rt::test]
--- a/meilisearch/tests/tasks/mod.rs
+++ b/meilisearch/tests/tasks/mod.rs
@@ -1,4 +1,5 @@
 mod errors;
+mod webhook;

 use meili_snap::insta::assert_json_snapshot;
 use time::format_description::well_known::Rfc3339;
--- a/meilisearch/tests/tasks/webhook.rs
+++ b/meilisearch/tests/tasks/webhook.rs
@@ -0,0 +1,123 @@
+//! To test the webhook, we need to spawn a new server with a URL listening for
+//! post requests. The webhook handle starts a server and forwards all the
+//! received requests into a channel for you to handle.
+
+use std::sync::Arc;
+
+use actix_http::body::MessageBody;
+use actix_web::dev::{ServiceFactory, ServiceResponse};
+use actix_web::web::{Bytes, Data};
+use actix_web::{post, App, HttpResponse, HttpServer};
+use meili_snap::{json_string, snapshot};
+use meilisearch::Opt;
+use tokio::sync::mpsc;
+use url::Url;
+
+use crate::common::{default_settings, Server};
+use crate::json;
+
+#[post("/")]
+async fn forward_body(sender: Data<mpsc::UnboundedSender<Vec<u8>>>, body: Bytes) -> HttpResponse {
+    let body = body.to_vec();
+    sender.send(body).unwrap();
+    HttpResponse::Ok().into()
+}
+
+fn create_app(
+    sender: Arc<mpsc::UnboundedSender<Vec<u8>>>,
+) -> actix_web::App<
+    impl ServiceFactory<
+        actix_web::dev::ServiceRequest,
+        Config = (),
+        Response = ServiceResponse<impl MessageBody>,
+        Error = actix_web::Error,
+        InitError = (),
+    >,
+> {
+    App::new().service(forward_body).app_data(Data::from(sender))
+}
+
+struct WebhookHandle {
+    pub server_handle: tokio::task::JoinHandle<Result<(), std::io::Error>>,
+    pub url: String,
+    pub receiver: mpsc::UnboundedReceiver<Vec<u8>>,
+}
+
+async fn create_webhook_server() -> WebhookHandle {
+    let mut log_builder = env_logger::Builder::new();
+    log_builder.parse_filters("info");
+    log_builder.init();
+
+    let (sender, receiver) = mpsc::unbounded_channel();
+    let sender = Arc::new(sender);
+
+    // By listening on the port 0, the system will give us any available port.
+    let server =
+        HttpServer::new(move || create_app(sender.clone())).bind(("127.0.0.1", 0)).unwrap();
+    let (ip, scheme) = server.addrs_with_scheme()[0];
+    let url = format!("{scheme}://{ip}/");
+
+    let server_handle = tokio::spawn(server.run());
+    WebhookHandle { server_handle, url, receiver }
+}
+
+#[actix_web::test]
+async fn test_basic_webhook() {
+    let WebhookHandle { server_handle, url, mut receiver } = create_webhook_server().await;
+
+    let db_path = tempfile::tempdir().unwrap();
+    let server = Server::new_with_options(Opt {
+        task_webhook_url: Some(Url::parse(&url).unwrap()),
+        ..default_settings(db_path.path())
+    })
+    .await
+    .unwrap();
+
+    let index = server.index("tamo");
+    // May be flaky: we're relying on the fact that while the first document addition is processed, the other
+    // operations will be received and will be batched together. If it doesn't happen it's not a problem
+    // the rest of the test won't assume anything about the number of tasks per batch.
+    for i in 0..5 {
+        let (_, _status) = index.add_documents(json!({ "id": i, "doggo": "bone" }), None).await;
+    }
+
+    let mut nb_tasks = 0;
+    while let Some(payload) = receiver.recv().await {
+        let payload = String::from_utf8(payload).unwrap();
+        let jsonl = payload.split('\n');
+        for json in jsonl {
+            if json.is_empty() {
+                break; // we reached EOF
+            }
+            nb_tasks += 1;
+            let json: serde_json::Value = serde_json::from_str(json).unwrap();
+            snapshot!(
+                json_string!(json, { ".uid" => "[uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
+            @r###"
+            {
+              "uid": "[uid]",
+              "indexUid": "tamo",
+              "status": "succeeded",
+              "type": "documentAdditionOrUpdate",
+              "canceledBy": null,
+              "details": {
+                "receivedDocuments": 1,
+                "indexedDocuments": 1
+              },
+              "error": null,
+              "duration": "[duration]",
+              "enqueuedAt": "[date]",
+              "startedAt": "[date]",
+              "finishedAt": "[date]"
+            }
+            "###);
+        }
+        if nb_tasks == 5 {
+            break;
+        }
+    }
+
+    assert!(nb_tasks == 5, "We should have received the 5 tasks but only received {nb_tasks}");
+
+    server_handle.abort();
+}
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -192,7 +192,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
    MissingDocumentField(#[from] crate::prompt::error::RenderPromptError),
    #[error(transparent)]
    InvalidPrompt(#[from] crate::prompt::error::NewPromptError),
-    #[error("Invalid prompt in for embeddings with name '{0}': {1}.")]
+    #[error("`.embedders.{0}.documentTemplate`: Invalid template: {1}.")]
    InvalidPromptForEmbeddings(String, crate::prompt::error::NewPromptError),
    #[error("Too many embedders in the configuration. Found {0}, but limited to 256.")]
    TooManyEmbedders(usize),
@@ -200,6 +200,33 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
    InvalidEmbedder(String),
    #[error("Too many vectors for document with id {0}: found {1}, but limited to 256.")]
    TooManyVectors(String, usize),
+    #[error("`.embedders.{embedder_name}`: Field `{field}` unavailable for source `{source_}` (only available for sources: {}). Available fields: {}",
+        allowed_sources_for_field
+         .iter()
+         .map(|accepted| format!("`{}`", accepted))
+         .collect::<Vec<String>>()
+         .join(", "),
+        allowed_fields_for_source
+         .iter()
+         .map(|accepted| format!("`{}`", accepted))
+         .collect::<Vec<String>>()
+         .join(", ")
+    )]
+    InvalidFieldForSource {
+        embedder_name: String,
+        source_: crate::vector::settings::EmbedderSource,
+        field: &'static str,
+        allowed_fields_for_source: &'static [&'static str],
+        allowed_sources_for_field: &'static [crate::vector::settings::EmbedderSource],
+    },
+    #[error("`.embedders.{embedder_name}.model`: Invalid model `{model}` for OpenAI. Supported models: {:?}", crate::vector::openai::EmbeddingModel::supported_models())]
+    InvalidOpenAiModel { embedder_name: String, model: String },
+    #[error("`.embedders.{embedder_name}`: Missing field `{field}` (note: this field is mandatory for source {source_})")]
+    MissingFieldForSource {
+        field: &'static str,
+        source_: crate::vector::settings::EmbedderSource,
+        embedder_name: String,
+    },
 }

 impl From<crate::vector::Error> for Error {
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -27,8 +27,8 @@ static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
 static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
 static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));

-/// The maximum number of facets returned by the facet search route.
-const MAX_NUMBER_OF_FACETS: usize = 100;
+/// The maximum number of values per facet returned by the facet search route.
+const DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET: usize = 100;

 pub mod facet;
 mod fst_utils;
@@ -306,6 +306,7 @@ pub struct SearchForFacetValues<'a> {
    query: Option<String>,
    facet: String,
    search_query: Search<'a>,
+    max_values: usize,
    is_hybrid: bool,
 }

@@ -315,7 +316,13 @@ impl<'a> SearchForFacetValues<'a> {
        search_query: Search<'a>,
        is_hybrid: bool,
    ) -> SearchForFacetValues<'a> {
-        SearchForFacetValues { query: None, facet, search_query, is_hybrid }
+        SearchForFacetValues {
+            query: None,
+            facet,
+            search_query,
+            max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET,
+            is_hybrid,
+        }
    }

    pub fn query(&mut self, query: impl Into<String>) -> &mut Self {
@@ -323,6 +330,11 @@ impl<'a> SearchForFacetValues<'a> {
        self
    }

+    pub fn max_values(&mut self, max: usize) -> &mut Self {
+        self.max_values = max;
+        self
+    }
+
    fn one_original_value_of(
        &self,
        field_id: FieldId,
@@ -462,7 +474,7 @@ impl<'a> SearchForFacetValues<'a> {
                            .unwrap_or_else(|| left_bound.to_string());
                        results.push(FacetValueHit { value, count });
                    }
-                    if results.len() >= MAX_NUMBER_OF_FACETS {
+                    if results.len() >= self.max_values {
                        break;
                    }
                }
@@ -507,7 +519,7 @@ impl<'a> SearchForFacetValues<'a> {
                    .unwrap_or_else(|| query.to_string());
                results.push(FacetValueHit { value, count });
            }
-            if results.len() >= MAX_NUMBER_OF_FACETS {
+            if results.len() >= self.max_values {
                return Ok(ControlFlow::Break(()));
            }
        }
--- a/milli/src/search/new/bucket_sort.rs
+++ b/milli/src/search/new/bucket_sort.rs
@@ -15,6 +15,7 @@ pub struct BucketSortOutput {

 // TODO: would probably be good to regroup some of these inside of a struct?
 #[allow(clippy::too_many_arguments)]
+#[logging_timer::time]
 pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
    ctx: &mut SearchContext<'ctx>,
    mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>,
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -72,7 +72,7 @@ impl<'m> MatcherBuilder<'m> {
    }
 }

-#[derive(Copy, Clone, Default)]
+#[derive(Copy, Clone, Default, Debug)]
 pub struct FormatOptions {
    pub highlight: bool,
    pub crop: Option<usize>,
@@ -82,6 +82,10 @@ impl FormatOptions {
    pub fn merge(self, other: Self) -> Self {
        Self { highlight: self.highlight || other.highlight, crop: self.crop.or(other.crop) }
    }
+
+    pub fn should_format(&self) -> bool {
+        self.highlight || self.crop.is_some()
+    }
 }

 #[derive(Clone, Debug)]
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@@ -191,6 +191,7 @@ fn resolve_maximally_reduced_query_graph(
    Ok(docids)
 }

+#[logging_timer::time]
 fn resolve_universe(
    ctx: &mut SearchContext,
    initial_universe: &RoaringBitmap,
@@ -556,6 +557,7 @@ pub fn execute_vector_search(
 }

 #[allow(clippy::too_many_arguments)]
+#[logging_timer::time]
 pub fn execute_search(
    ctx: &mut SearchContext,
    query: Option<&str>,
--- a/milli/src/search/new/query_term/parse_query.rs
+++ b/milli/src/search/new/query_term/parse_query.rs
@@ -5,6 +5,7 @@ use super::*;
 use crate::{Result, SearchContext, MAX_WORD_LENGTH};

 /// Convert the tokenised search query into a list of located query terms.
+#[logging_timer::time]
 pub fn located_query_terms_from_tokens(
    ctx: &mut SearchContext,
    query: NormalizedTokenIter,
--- a/milli/src/update/facet/incremental.rs
+++ b/milli/src/update/facet/incremental.rs
@@ -61,6 +61,7 @@ impl FacetsUpdateIncremental {
        }
    }

+    #[logging_timer::time("FacetsUpdateIncremental::{}")]
    pub fn execute(self, wtxn: &mut RwTxn) -> crate::Result<()> {
        let mut cursor = self.delta_data.into_cursor()?;
        while let Some((key, value)) = cursor.move_on_next()? {
--- a/milli/src/update/facet/mod.rs
+++ b/milli/src/update/facet/mod.rs
@@ -76,26 +76,18 @@ pub const FACET_MAX_GROUP_SIZE: u8 = 8;
 pub const FACET_GROUP_SIZE: u8 = 4;
 pub const FACET_MIN_LEVEL_SIZE: u8 = 5;

-use std::collections::BTreeSet;
 use std::fs::File;
 use std::io::BufReader;
-use std::iter::FromIterator;

-use charabia::normalizer::{Normalize, NormalizerOption};
-use grenad::{CompressionType, SortAlgorithm};
-use heed::types::{Bytes, DecodeIgnore, SerdeJson};
-use heed::BytesEncode;
 use log::debug;
 use time::OffsetDateTime;

 use self::incremental::FacetsUpdateIncremental;
 use super::FacetsUpdateBulk;
 use crate::facet::FacetType;
-use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
+use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec};
 use crate::heed_codec::BytesRefCodec;
-use crate::update::index_documents::create_sorter;
-use crate::update::merge_btreeset_string;
-use crate::{BEU16StrCodec, Index, Result, MAX_FACET_VALUE_LENGTH};
+use crate::{Index, Result};

 pub mod bulk;
 pub mod incremental;
@@ -146,115 +138,114 @@ impl<'i> FacetsUpdate<'i> {
        self.index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;

        // See self::comparison_bench::benchmark_facet_indexing
-        if self.delta_data.len() >= (self.database.len(wtxn)? / 50) {
-            let field_ids =
-                self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::<Vec<_>>();
-            let bulk_update = FacetsUpdateBulk::new(
-                self.index,
-                field_ids,
-                self.facet_type,
-                self.delta_data,
-                self.group_size,
-                self.min_level_size,
-            );
-            bulk_update.execute(wtxn)?;
-        } else {
-            let incremental_update = FacetsUpdateIncremental::new(
-                self.index,
-                self.facet_type,
-                self.delta_data,
-                self.group_size,
-                self.min_level_size,
-                self.max_group_size,
-            );
-            incremental_update.execute(wtxn)?;
-        }
-
-        // We clear the list of normalized-for-search facets
-        // and the previous FSTs to compute everything from scratch
-        self.index.facet_id_normalized_string_strings.clear(wtxn)?;
-        self.index.facet_id_string_fst.clear(wtxn)?;
-
-        // As we can't use the same write transaction to read and write in two different databases
-        // we must create a temporary sorter that we will write into LMDB afterward.
-        // As multiple unnormalized facet values can become the same normalized facet value
-        // we must merge them together.
-        let mut sorter = create_sorter(
-            SortAlgorithm::Unstable,
-            merge_btreeset_string,
-            CompressionType::None,
-            None,
-            None,
-            None,
+        // if self.delta_data.len() >= (self.database.len(wtxn)? / 50) {
+        let field_ids = self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::<Vec<_>>();
+        let bulk_update = FacetsUpdateBulk::new(
+            self.index,
+            field_ids,
+            self.facet_type,
+            self.delta_data,
+            self.group_size,
+            self.min_level_size,
        );
+        bulk_update.execute(wtxn)?;
+        // } else {
+        //     let incremental_update = FacetsUpdateIncremental::new(
+        //         self.index,
+        //         self.facet_type,
+        //         self.delta_data,
+        //         self.group_size,
+        //         self.min_level_size,
+        //         self.max_group_size,
+        //     );
+        //     incremental_update.execute(wtxn)?;
+        // }

-        // We iterate on the list of original, semi-normalized, facet values
-        // and normalize them for search, inserting them in LMDB in any given order.
-        let options = NormalizerOption { lossy: true, ..Default::default() };
-        let database = self.index.facet_id_string_docids.remap_data_type::<DecodeIgnore>();
-        for result in database.iter(wtxn)? {
-            let (facet_group_key, ()) = result?;
-            if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
-                let mut normalized_facet = left_bound.normalize(&options);
-                let normalized_truncated_facet: String;
-                if normalized_facet.len() > MAX_FACET_VALUE_LENGTH {
-                    normalized_truncated_facet = normalized_facet
-                        .char_indices()
-                        .take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
-                        .map(|(_, c)| c)
-                        .collect();
-                    normalized_facet = normalized_truncated_facet.into();
-                }
-                let set = BTreeSet::from_iter(std::iter::once(left_bound));
-                let key = (field_id, normalized_facet.as_ref());
-                let key = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
-                let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
-                sorter.insert(key, val)?;
-            }
-        }
+        // // We clear the list of normalized-for-search facets
+        // // and the previous FSTs to compute everything from scratch
+        // self.index.facet_id_normalized_string_strings.clear(wtxn)?;
+        // self.index.facet_id_string_fst.clear(wtxn)?;

-        // In this loop we don't need to take care of merging bitmaps
-        // as the grenad sorter already merged them for us.
-        let mut merger_iter = sorter.into_stream_merger_iter()?;
-        while let Some((key_bytes, btreeset_bytes)) = merger_iter.next()? {
-            self.index.facet_id_normalized_string_strings.remap_types::<Bytes, Bytes>().put(
-                wtxn,
-                key_bytes,
-                btreeset_bytes,
-            )?;
-        }
+        // // As we can't use the same write transaction to read and write in two different databases
+        // // we must create a temporary sorter that we will write into LMDB afterward.
+        // // As multiple unnormalized facet values can become the same normalized facet value
+        // // we must merge them together.
+        // let mut sorter = create_sorter(
+        //     SortAlgorithm::Unstable,
+        //     merge_btreeset_string,
+        //     CompressionType::None,
+        //     None,
+        //     None,
+        //     None,
+        // );

-        // We compute one FST by string facet
-        let mut text_fsts = vec![];
-        let mut current_fst: Option<(u16, fst::SetBuilder<Vec<u8>>)> = None;
-        let database =
-            self.index.facet_id_normalized_string_strings.remap_data_type::<DecodeIgnore>();
-        for result in database.iter(wtxn)? {
-            let ((field_id, normalized_facet), _) = result?;
-            current_fst = match current_fst.take() {
-                Some((fid, fst_builder)) if fid != field_id => {
-                    let fst = fst_builder.into_set();
-                    text_fsts.push((fid, fst));
-                    Some((field_id, fst::SetBuilder::memory()))
-                }
-                Some((field_id, fst_builder)) => Some((field_id, fst_builder)),
-                None => Some((field_id, fst::SetBuilder::memory())),
-            };
+        // // We iterate on the list of original, semi-normalized, facet values
+        // // and normalize them for search, inserting them in LMDB in any given order.
+        // let options = NormalizerOption { lossy: true, ..Default::default() };
+        // let database = self.index.facet_id_string_docids.remap_data_type::<DecodeIgnore>();
+        // for result in database.iter(wtxn)? {
+        //     let (facet_group_key, ()) = result?;
+        //     if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
+        //         let mut normalized_facet = left_bound.normalize(&options);
+        //         let normalized_truncated_facet: String;
+        //         if normalized_facet.len() > MAX_FACET_VALUE_LENGTH {
+        //             normalized_truncated_facet = normalized_facet
+        //                 .char_indices()
+        //                 .take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
+        //                 .map(|(_, c)| c)
+        //                 .collect();
+        //             normalized_facet = normalized_truncated_facet.into();
+        //         }
+        //         let set = BTreeSet::from_iter(std::iter::once(left_bound));
+        //         let key = (field_id, normalized_facet.as_ref());
+        //         let key = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
+        //         let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
+        //         sorter.insert(key, val)?;
+        //     }
+        // }

-            if let Some((_, fst_builder)) = current_fst.as_mut() {
-                fst_builder.insert(normalized_facet)?;
-            }
-        }
+        // // In this loop we don't need to take care of merging bitmaps
+        // // as the grenad sorter already merged them for us.
+        // let mut merger_iter = sorter.into_stream_merger_iter()?;
+        // while let Some((key_bytes, btreeset_bytes)) = merger_iter.next()? {
+        //     self.index.facet_id_normalized_string_strings.remap_types::<Bytes, Bytes>().put(
+        //         wtxn,
+        //         key_bytes,
+        //         btreeset_bytes,
+        //     )?;
+        // }

-        if let Some((field_id, fst_builder)) = current_fst {
-            let fst = fst_builder.into_set();
-            text_fsts.push((field_id, fst));
-        }
+        // // We compute one FST by string facet
+        // let mut text_fsts = vec![];
+        // let mut current_fst: Option<(u16, fst::SetBuilder<Vec<u8>>)> = None;
+        // let database =
+        //     self.index.facet_id_normalized_string_strings.remap_data_type::<DecodeIgnore>();
+        // for result in database.iter(wtxn)? {
+        //     let ((field_id, normalized_facet), _) = result?;
+        //     current_fst = match current_fst.take() {
+        //         Some((fid, fst_builder)) if fid != field_id => {
+        //             let fst = fst_builder.into_set();
+        //             text_fsts.push((fid, fst));
+        //             Some((field_id, fst::SetBuilder::memory()))
+        //         }
+        //         Some((field_id, fst_builder)) => Some((field_id, fst_builder)),
+        //         None => Some((field_id, fst::SetBuilder::memory())),
+        //     };

-        // We write those FSTs in LMDB now
-        for (field_id, fst) in text_fsts {
-            self.index.facet_id_string_fst.put(wtxn, &field_id, &fst)?;
-        }
+        //     if let Some((_, fst_builder)) = current_fst.as_mut() {
+        //         fst_builder.insert(normalized_facet)?;
+        //     }
+        // }
+
+        // if let Some((field_id, fst_builder)) = current_fst {
+        //     let fst = fst_builder.into_set();
+        //     text_fsts.push((field_id, fst));
+        // }
+
+        // // We write those FSTs in LMDB now
+        // for (field_id, fst) in text_fsts {
+        //     self.index.facet_id_string_fst.put(wtxn, &field_id, &fst)?;
+        // }

        Ok(())
    }
--- a/milli/src/update/index_documents/helpers/mod.rs
+++ b/milli/src/update/index_documents/helpers/mod.rs
@@ -16,7 +16,7 @@ pub use merge_functions::{
    keep_first, keep_latest_obkv, merge_btreeset_string, merge_cbo_roaring_bitmaps,
    merge_deladd_cbo_roaring_bitmaps, merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
    merge_roaring_bitmaps, obkvs_keep_last_addition_merge_deletions,
-    obkvs_merge_additions_and_deletions, serialize_roaring_bitmap, MergeFn,
+    obkvs_merge_additions_and_deletions, MergeFn,
 };

 use crate::MAX_WORD_LENGTH;
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -21,7 +21,7 @@ use slice_group_by::GroupBy;
 use typed_chunk::{write_typed_chunk_into_index, TypedChunk};

 use self::enrich::enrich_documents_batch;
-pub use self::enrich::{extract_finite_float_from_value, validate_geo_from_json, DocumentId};
+pub use self::enrich::{extract_finite_float_from_value, DocumentId};
 pub use self::helpers::{
    as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
    fst_stream_into_vec, merge_btreeset_string, merge_cbo_roaring_bitmaps,
@@ -2553,7 +2553,7 @@ mod tests {
    /// Vectors must be of the same length.
    #[test]
    fn test_multiple_vectors() {
-        use crate::vector::settings::{EmbedderSettings, EmbeddingSettings};
+        use crate::vector::settings::EmbeddingSettings;
        let index = TempIndex::new();

        index
@@ -2562,9 +2562,11 @@ mod tests {
                embedders.insert(
                    "manual".to_string(),
                    Setting::Set(EmbeddingSettings {
-                        embedder_options: Setting::Set(EmbedderSettings::UserProvided(
-                            crate::vector::settings::UserProvidedSettings { dimensions: 3 },
-                        )),
+                        source: Setting::Set(crate::vector::settings::EmbedderSource::UserProvided),
+                        model: Setting::NotSet,
+                        revision: Setting::NotSet,
+                        api_key: Setting::NotSet,
+                        dimensions: Setting::Set(3),
                        document_template: Setting::NotSet,
                    }),
                );
@@ -2579,10 +2581,10 @@ mod tests {
            .unwrap();
        index.add_documents(documents!([{"id": 1, "_vectors": { "manual": [6, 7, 8] }}])).unwrap();
        index
-            .add_documents(
-                documents!([{"id": 2, "_vectors": { "manual": [[9, 10, 11], [12, 13, 14], [15, 16, 17]] }}]),
-            )
-            .unwrap();
+               .add_documents(
+                   documents!([{"id": 2, "_vectors": { "manual": [[9, 10, 11], [12, 13, 14], [15, 16, 17]] }}]),
+               )
+               .unwrap();

        let rtxn = index.read_txn().unwrap();
        let res = index.search(&rtxn).vector([0.0, 1.0, 2.0].to_vec()).execute().unwrap();
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -123,6 +123,8 @@ pub(crate) fn write_typed_chunk_into_index(
 ) -> Result<(RoaringBitmap, bool)> {
    puffin::profile_function!(typed_chunk.to_debug_string());

+    log::debug!("Received a chunk to process: {}", typed_chunk.to_debug_string());
+
    let mut is_merged_database = false;
    match typed_chunk {
        TypedChunk::Documents(obkv_documents_iter) => {
--- a/milli/src/update/mod.rs
+++ b/milli/src/update/mod.rs
@@ -8,7 +8,7 @@ pub use self::index_documents::{
    MergeFn,
 };
 pub use self::indexer_config::IndexerConfig;
-pub use self::settings::{Setting, Settings};
+pub use self::settings::{validate_embedding_settings, Setting, Settings};
 pub use self::update_step::UpdateIndexingStep;
 pub use self::word_prefix_docids::WordPrefixDocids;
 pub use self::words_prefix_integer_docids::WordPrefixIntegerDocids;
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -17,7 +17,7 @@ use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS
 use crate::proximity::ProximityPrecision;
 use crate::update::index_documents::IndexDocumentsMethod;
 use crate::update::{IndexDocuments, UpdateIndexingStep};
-use crate::vector::settings::{EmbeddingSettings, PromptSettings};
+use crate::vector::settings::{check_set, check_unset, EmbedderSource, EmbeddingSettings};
 use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs};
 use crate::{FieldsIdsMap, Index, OrderBy, Result};

@@ -78,11 +78,19 @@ impl<T> Setting<T> {
        }
    }

-    pub fn apply(&mut self, new: Self) {
+    /// Returns `true` if applying the new setting changed this setting
+    pub fn apply(&mut self, new: Self) -> bool
+    where
+        T: PartialEq + Eq,
+    {
        if let Setting::NotSet = new {
-            return;
+            return false;
+        }
+        if self == &new {
+            return false;
        }
        *self = new;
+        true
    }
 }

@@ -950,17 +958,23 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
                    .merge_join_by(configs.into_iter(), |(left, _), (right, _)| left.cmp(right))
                {
                    match joined {
+                        // updated config
                        EitherOrBoth::Both((name, mut old), (_, new)) => {
-                            old.apply(new);
-                            let new = validate_prompt(&name, old)?;
-                            changed = true;
+                            changed |= old.apply(new);
+                            let new = validate_embedding_settings(old, &name)?;
                            new_configs.insert(name, new);
                        }
+                        // unchanged config
                        EitherOrBoth::Left((name, setting)) => {
                            new_configs.insert(name, setting);
                        }
-                        EitherOrBoth::Right((name, setting)) => {
-                            let setting = validate_prompt(&name, setting)?;
+                        // new config
+                        EitherOrBoth::Right((name, mut setting)) => {
+                            // apply the default source in case the source was not set so that it gets validated
+                            crate::vector::settings::EmbeddingSettings::apply_default_source(
+                                &mut setting,
+                            );
+                            let setting = validate_embedding_settings(setting, &name)?;
                            changed = true;
                            new_configs.insert(name, setting);
                        }
@@ -1072,8 +1086,12 @@ fn validate_prompt(
 ) -> Result<Setting<EmbeddingSettings>> {
    match new {
        Setting::Set(EmbeddingSettings {
-            embedder_options,
-            document_template: Setting::Set(PromptSettings { template: Setting::Set(template) }),
+            source,
+            model,
+            revision,
+            api_key,
+            dimensions,
+            document_template: Setting::Set(template),
        }) => {
            // validate
            let template = crate::prompt::Prompt::new(template)
@@ -1081,16 +1099,71 @@ fn validate_prompt(
                .map_err(|inner| UserError::InvalidPromptForEmbeddings(name.to_owned(), inner))?;

            Ok(Setting::Set(EmbeddingSettings {
-                embedder_options,
-                document_template: Setting::Set(PromptSettings {
-                    template: Setting::Set(template),
-                }),
+                source,
+                model,
+                revision,
+                api_key,
+                dimensions,
+                document_template: Setting::Set(template),
            }))
        }
        new => Ok(new),
    }
 }

+pub fn validate_embedding_settings(
+    settings: Setting<EmbeddingSettings>,
+    name: &str,
+) -> Result<Setting<EmbeddingSettings>> {
+    let settings = validate_prompt(name, settings)?;
+    let Setting::Set(settings) = settings else { return Ok(settings) };
+    let EmbeddingSettings { source, model, revision, api_key, dimensions, document_template } =
+        settings;
+    let Some(inferred_source) = source.set() else {
+        return Ok(Setting::Set(EmbeddingSettings {
+            source,
+            model,
+            revision,
+            api_key,
+            dimensions,
+            document_template,
+        }));
+    };
+    match inferred_source {
+        EmbedderSource::OpenAi => {
+            check_unset(&revision, "revision", inferred_source, name)?;
+            check_unset(&dimensions, "dimensions", inferred_source, name)?;
+            if let Setting::Set(model) = &model {
+                crate::vector::openai::EmbeddingModel::from_name(model.as_str()).ok_or(
+                    crate::error::UserError::InvalidOpenAiModel {
+                        embedder_name: name.to_owned(),
+                        model: model.clone(),
+                    },
+                )?;
+            }
+        }
+        EmbedderSource::HuggingFace => {
+            check_unset(&api_key, "apiKey", inferred_source, name)?;
+            check_unset(&dimensions, "dimensions", inferred_source, name)?;
+        }
+        EmbedderSource::UserProvided => {
+            check_unset(&model, "model", inferred_source, name)?;
+            check_unset(&revision, "revision", inferred_source, name)?;
+            check_unset(&api_key, "apiKey", inferred_source, name)?;
+            check_unset(&document_template, "documentTemplate", inferred_source, name)?;
+            check_set(&dimensions, "dimensions", inferred_source, name)?;
+        }
+    }
+    Ok(Setting::Set(EmbeddingSettings {
+        source,
+        model,
+        revision,
+        api_key,
+        dimensions,
+        document_template,
+    }))
+}
+
 #[cfg(test)]
 mod tests {
    use big_s::S;
--- a/milli/src/vector/openai.rs
+++ b/milli/src/vector/openai.rs
@@ -34,6 +34,9 @@ pub struct EmbedderOptions {
 #[serde(deny_unknown_fields, rename_all = "camelCase")]
 #[deserr(rename_all = camelCase, deny_unknown_fields)]
 pub enum EmbeddingModel {
+    // # WARNING
+    //
+    // If ever adding a model, make sure to add it to the list of supported models below.
    #[default]
    #[serde(rename = "text-embedding-ada-002")]
    #[deserr(rename = "text-embedding-ada-002")]
@@ -41,6 +44,10 @@ pub enum EmbeddingModel {
 }

 impl EmbeddingModel {
+    pub fn supported_models() -> &'static [&'static str] {
+        &["text-embedding-ada-002"]
+    }
+
    pub fn max_token(&self) -> usize {
        match self {
            EmbeddingModel::TextEmbeddingAda002 => 8191,
@@ -59,7 +66,7 @@ impl EmbeddingModel {
        }
    }

-    pub fn from_name(name: &'static str) -> Option<Self> {
+    pub fn from_name(name: &str) -> Option<Self> {
        match name {
            "text-embedding-ada-002" => Some(EmbeddingModel::TextEmbeddingAda002),
            _ => None,
--- a/milli/src/vector/settings.rs
+++ b/milli/src/vector/settings.rs
@@ -4,32 +4,189 @@ use serde::{Deserialize, Serialize};
 use crate::prompt::PromptData;
 use crate::update::Setting;
 use crate::vector::EmbeddingConfig;
+use crate::UserError;

 #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
 #[serde(deny_unknown_fields, rename_all = "camelCase")]
 #[deserr(rename_all = camelCase, deny_unknown_fields)]
 pub struct EmbeddingSettings {
-    #[serde(default, skip_serializing_if = "Setting::is_not_set", rename = "source")]
-    #[deserr(default, rename = "source")]
-    pub embedder_options: Setting<EmbedderSettings>,
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[deserr(default)]
-    pub document_template: Setting<PromptSettings>,
+    pub source: Setting<EmbedderSource>,
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
+    #[deserr(default)]
+    pub model: Setting<String>,
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
+    #[deserr(default)]
+    pub revision: Setting<String>,
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
+    #[deserr(default)]
+    pub api_key: Setting<String>,
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
+    #[deserr(default)]
+    pub dimensions: Setting<usize>,
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
+    #[deserr(default)]
+    pub document_template: Setting<String>,
+}
+
+pub fn check_unset<T>(
+    key: &Setting<T>,
+    field: &'static str,
+    source: EmbedderSource,
+    embedder_name: &str,
+) -> Result<(), UserError> {
+    if matches!(key, Setting::NotSet) {
+        Ok(())
+    } else {
+        Err(UserError::InvalidFieldForSource {
+            embedder_name: embedder_name.to_owned(),
+            source_: source,
+            field,
+            allowed_fields_for_source: EmbeddingSettings::allowed_fields_for_source(source),
+            allowed_sources_for_field: EmbeddingSettings::allowed_sources_for_field(field),
+        })
+    }
+}
+
+pub fn check_set<T>(
+    key: &Setting<T>,
+    field: &'static str,
+    source: EmbedderSource,
+    embedder_name: &str,
+) -> Result<(), UserError> {
+    if matches!(key, Setting::Set(_)) {
+        Ok(())
+    } else {
+        Err(UserError::MissingFieldForSource {
+            field,
+            source_: source,
+            embedder_name: embedder_name.to_owned(),
+        })
+    }
+}
+
+impl EmbeddingSettings {
+    pub const SOURCE: &'static str = "source";
+    pub const MODEL: &'static str = "model";
+    pub const REVISION: &'static str = "revision";
+    pub const API_KEY: &'static str = "apiKey";
+    pub const DIMENSIONS: &'static str = "dimensions";
+    pub const DOCUMENT_TEMPLATE: &'static str = "documentTemplate";
+
+    pub fn allowed_sources_for_field(field: &'static str) -> &'static [EmbedderSource] {
+        match field {
+            Self::SOURCE => {
+                &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::UserProvided]
+            }
+            Self::MODEL => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi],
+            Self::REVISION => &[EmbedderSource::HuggingFace],
+            Self::API_KEY => &[EmbedderSource::OpenAi],
+            Self::DIMENSIONS => &[EmbedderSource::UserProvided],
+            Self::DOCUMENT_TEMPLATE => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi],
+            _other => unreachable!("unknown field"),
+        }
+    }
+
+    pub fn allowed_fields_for_source(source: EmbedderSource) -> &'static [&'static str] {
+        match source {
+            EmbedderSource::OpenAi => {
+                &[Self::SOURCE, Self::MODEL, Self::API_KEY, Self::DOCUMENT_TEMPLATE]
+            }
+            EmbedderSource::HuggingFace => {
+                &[Self::SOURCE, Self::MODEL, Self::REVISION, Self::DOCUMENT_TEMPLATE]
+            }
+            EmbedderSource::UserProvided => &[Self::SOURCE, Self::DIMENSIONS],
+        }
+    }
+
+    pub(crate) fn apply_default_source(setting: &mut Setting<EmbeddingSettings>) {
+        if let Setting::Set(EmbeddingSettings {
+            source: source @ (Setting::NotSet | Setting::Reset),
+            ..
+        }) = setting
+        {
+            *source = Setting::Set(EmbedderSource::default())
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
+#[serde(deny_unknown_fields, rename_all = "camelCase")]
+#[deserr(rename_all = camelCase, deny_unknown_fields)]
+pub enum EmbedderSource {
+    #[default]
+    OpenAi,
+    HuggingFace,
+    UserProvided,
+}
+
+impl std::fmt::Display for EmbedderSource {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let s = match self {
+            EmbedderSource::OpenAi => "openAi",
+            EmbedderSource::HuggingFace => "huggingFace",
+            EmbedderSource::UserProvided => "userProvided",
+        };
+        f.write_str(s)
+    }
 }

 impl EmbeddingSettings {
    pub fn apply(&mut self, new: Self) {
-        let EmbeddingSettings { embedder_options, document_template: prompt } = new;
-        self.embedder_options.apply(embedder_options);
-        self.document_template.apply(prompt);
+        let EmbeddingSettings { source, model, revision, api_key, dimensions, document_template } =
+            new;
+        let old_source = self.source;
+        self.source.apply(source);
+        // Reinitialize the whole setting object on a source change
+        if old_source != self.source {
+            *self = EmbeddingSettings {
+                source,
+                model,
+                revision,
+                api_key,
+                dimensions,
+                document_template,
+            };
+            return;
+        }
+
+        self.model.apply(model);
+        self.revision.apply(revision);
+        self.api_key.apply(api_key);
+        self.dimensions.apply(dimensions);
+        self.document_template.apply(document_template);
    }
 }

 impl From<EmbeddingConfig> for EmbeddingSettings {
    fn from(value: EmbeddingConfig) -> Self {
-        Self {
-            embedder_options: Setting::Set(value.embedder_options.into()),
-            document_template: Setting::Set(value.prompt.into()),
+        let EmbeddingConfig { embedder_options, prompt } = value;
+        match embedder_options {
+            super::EmbedderOptions::HuggingFace(options) => Self {
+                source: Setting::Set(EmbedderSource::HuggingFace),
+                model: Setting::Set(options.model),
+                revision: options.revision.map(Setting::Set).unwrap_or_default(),
+                api_key: Setting::NotSet,
+                dimensions: Setting::NotSet,
+                document_template: Setting::Set(prompt.template),
+            },
+            super::EmbedderOptions::OpenAi(options) => Self {
+                source: Setting::Set(EmbedderSource::OpenAi),
+                model: Setting::Set(options.embedding_model.name().to_owned()),
+                revision: Setting::NotSet,
+                api_key: options.api_key.map(Setting::Set).unwrap_or_default(),
+                dimensions: Setting::NotSet,
+                document_template: Setting::Set(prompt.template),
+            },
+            super::EmbedderOptions::UserProvided(options) => Self {
+                source: Setting::Set(EmbedderSource::UserProvided),
+                model: Setting::NotSet,
+                revision: Setting::NotSet,
+                api_key: Setting::NotSet,
+                dimensions: Setting::Set(options.dimensions),
+                document_template: Setting::NotSet,
+            },
        }
    }
 }
@@ -37,256 +194,51 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
 impl From<EmbeddingSettings> for EmbeddingConfig {
    fn from(value: EmbeddingSettings) -> Self {
        let mut this = Self::default();
-        let EmbeddingSettings { embedder_options, document_template: prompt } = value;
-        if let Some(embedder_options) = embedder_options.set() {
-            this.embedder_options = embedder_options.into();
-        }
-        if let Some(prompt) = prompt.set() {
-            this.prompt = prompt.into();
-        }
-        this
-    }
-}
-
-#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
-#[serde(deny_unknown_fields, rename_all = "camelCase")]
-#[deserr(rename_all = camelCase, deny_unknown_fields)]
-pub struct PromptSettings {
-    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
-    #[deserr(default)]
-    pub template: Setting<String>,
-}
-
-impl PromptSettings {
-    pub fn apply(&mut self, new: Self) {
-        let PromptSettings { template } = new;
-        self.template.apply(template);
-    }
-}
-
-impl From<PromptData> for PromptSettings {
-    fn from(value: PromptData) -> Self {
-        Self { template: Setting::Set(value.template) }
-    }
-}
-
-impl From<PromptSettings> for PromptData {
-    fn from(value: PromptSettings) -> Self {
-        let mut this = PromptData::default();
-        let PromptSettings { template } = value;
-        if let Some(template) = template.set() {
-            this.template = template;
-        }
-        this
-    }
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
-#[serde(deny_unknown_fields, rename_all = "camelCase")]
-pub enum EmbedderSettings {
-    HuggingFace(Setting<HfEmbedderSettings>),
-    OpenAi(Setting<OpenAiEmbedderSettings>),
-    UserProvided(UserProvidedSettings),
-}
-
-impl<E> Deserr<E> for EmbedderSettings
-where
-    E: deserr::DeserializeError,
-{
-    fn deserialize_from_value<V: deserr::IntoValue>(
-        value: deserr::Value<V>,
-        location: deserr::ValuePointerRef,
-    ) -> Result<Self, E> {
-        match value {
-            deserr::Value::Map(map) => {
-                if deserr::Map::len(&map) != 1 {
-                    return Err(deserr::take_cf_content(E::error::<V>(
-                        None,
-                        deserr::ErrorKind::Unexpected {
-                            msg: format!(
-                                "Expected a single field, got {} fields",
-                                deserr::Map::len(&map)
-                            ),
-                        },
-                        location,
-                    )));
+        let EmbeddingSettings { source, model, revision, api_key, dimensions, document_template } =
+            value;
+        if let Some(source) = source.set() {
+            match source {
+                EmbedderSource::OpenAi => {
+                    let mut options = super::openai::EmbedderOptions::with_default_model(None);
+                    if let Some(model) = model.set() {
+                        if let Some(model) = super::openai::EmbeddingModel::from_name(&model) {
+                            options.embedding_model = model;
+                        }
+                    }
+                    if let Some(api_key) = api_key.set() {
+                        options.api_key = Some(api_key);
+                    }
+                    this.embedder_options = super::EmbedderOptions::OpenAi(options);
                }
-                let mut it = deserr::Map::into_iter(map);
-                let (k, v) = it.next().unwrap();
-
-                match k.as_str() {
-                    "huggingFace" => Ok(EmbedderSettings::HuggingFace(Setting::Set(
-                        HfEmbedderSettings::deserialize_from_value(
-                            v.into_value(),
-                            location.push_key(&k),
-                        )?,
-                    ))),
-                    "openAi" => Ok(EmbedderSettings::OpenAi(Setting::Set(
-                        OpenAiEmbedderSettings::deserialize_from_value(
-                            v.into_value(),
-                            location.push_key(&k),
-                        )?,
-                    ))),
-                    "userProvided" => Ok(EmbedderSettings::UserProvided(
-                        UserProvidedSettings::deserialize_from_value(
-                            v.into_value(),
-                            location.push_key(&k),
-                        )?,
-                    )),
-                    other => Err(deserr::take_cf_content(E::error::<V>(
-                        None,
-                        deserr::ErrorKind::UnknownKey {
-                            key: other,
-                            accepted: &["huggingFace", "openAi", "userProvided"],
-                        },
-                        location,
-                    ))),
+                EmbedderSource::HuggingFace => {
+                    let mut options = super::hf::EmbedderOptions::default();
+                    if let Some(model) = model.set() {
+                        options.model = model;
+                        // Reset the revision if we are setting the model.
+                        // This allows the following:
+                        // "huggingFace": {} -> default model with default revision
+                        // "huggingFace": { "model": "name-of-the-default-model" } -> default model without a revision
+                        // "huggingFace": { "model": "some-other-model" } -> most importantly, other model without a revision
+                        options.revision = None;
+                    }
+                    if let Some(revision) = revision.set() {
+                        options.revision = Some(revision);
+                    }
+                    this.embedder_options = super::EmbedderOptions::HuggingFace(options);
+                }
+                EmbedderSource::UserProvided => {
+                    this.embedder_options =
+                        super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions {
+                            dimensions: dimensions.set().unwrap(),
+                        });
                }
            }
-            _ => Err(deserr::take_cf_content(E::error::<V>(
-                None,
-                deserr::ErrorKind::IncorrectValueKind {
-                    actual: value,
-                    accepted: &[deserr::ValueKind::Map],
-                },
-                location,
-            ))),
        }
-    }
-}

-impl Default for EmbedderSettings {
-    fn default() -> Self {
-        Self::OpenAi(Default::default())
-    }
-}
-
-impl From<crate::vector::EmbedderOptions> for EmbedderSettings {
-    fn from(value: crate::vector::EmbedderOptions) -> Self {
-        match value {
-            crate::vector::EmbedderOptions::HuggingFace(hf) => {
-                Self::HuggingFace(Setting::Set(hf.into()))
-            }
-            crate::vector::EmbedderOptions::OpenAi(openai) => {
-                Self::OpenAi(Setting::Set(openai.into()))
-            }
-            crate::vector::EmbedderOptions::UserProvided(user_provided) => {
-                Self::UserProvided(user_provided.into())
-            }
+        if let Setting::Set(template) = document_template {
+            this.prompt = PromptData { template }
        }
-    }
-}

-impl From<EmbedderSettings> for crate::vector::EmbedderOptions {
-    fn from(value: EmbedderSettings) -> Self {
-        match value {
-            EmbedderSettings::HuggingFace(Setting::Set(hf)) => Self::HuggingFace(hf.into()),
-            EmbedderSettings::HuggingFace(_setting) => Self::HuggingFace(Default::default()),
-            EmbedderSettings::OpenAi(Setting::Set(ai)) => Self::OpenAi(ai.into()),
-            EmbedderSettings::OpenAi(_setting) => {
-                Self::OpenAi(crate::vector::openai::EmbedderOptions::with_default_model(None))
-            }
-            EmbedderSettings::UserProvided(user_provided) => {
-                Self::UserProvided(user_provided.into())
-            }
-        }
-    }
-}
-
-#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
-#[serde(deny_unknown_fields, rename_all = "camelCase")]
-#[deserr(rename_all = camelCase, deny_unknown_fields)]
-pub struct HfEmbedderSettings {
-    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
-    #[deserr(default)]
-    pub model: Setting<String>,
-    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
-    #[deserr(default)]
-    pub revision: Setting<String>,
-}
-
-impl HfEmbedderSettings {
-    pub fn apply(&mut self, new: Self) {
-        let HfEmbedderSettings { model, revision } = new;
-        self.model.apply(model);
-        self.revision.apply(revision);
-    }
-}
-
-impl From<crate::vector::hf::EmbedderOptions> for HfEmbedderSettings {
-    fn from(value: crate::vector::hf::EmbedderOptions) -> Self {
-        Self {
-            model: Setting::Set(value.model),
-            revision: value.revision.map(Setting::Set).unwrap_or(Setting::NotSet),
-        }
-    }
-}
-
-impl From<HfEmbedderSettings> for crate::vector::hf::EmbedderOptions {
-    fn from(value: HfEmbedderSettings) -> Self {
-        let HfEmbedderSettings { model, revision } = value;
-        let mut this = Self::default();
-        if let Some(model) = model.set() {
-            this.model = model;
-        }
-        if let Some(revision) = revision.set() {
-            this.revision = Some(revision);
-        }
        this
    }
 }
-
-#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
-#[serde(deny_unknown_fields, rename_all = "camelCase")]
-#[deserr(rename_all = camelCase, deny_unknown_fields)]
-pub struct OpenAiEmbedderSettings {
-    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
-    #[deserr(default)]
-    pub api_key: Setting<String>,
-    #[serde(default, skip_serializing_if = "Setting::is_not_set", rename = "model")]
-    #[deserr(default, rename = "model")]
-    pub embedding_model: Setting<crate::vector::openai::EmbeddingModel>,
-}
-
-impl OpenAiEmbedderSettings {
-    pub fn apply(&mut self, new: Self) {
-        let Self { api_key, embedding_model: embedding_mode } = new;
-        self.api_key.apply(api_key);
-        self.embedding_model.apply(embedding_mode);
-    }
-}
-
-impl From<crate::vector::openai::EmbedderOptions> for OpenAiEmbedderSettings {
-    fn from(value: crate::vector::openai::EmbedderOptions) -> Self {
-        Self {
-            api_key: value.api_key.map(Setting::Set).unwrap_or(Setting::Reset),
-            embedding_model: Setting::Set(value.embedding_model),
-        }
-    }
-}
-
-impl From<OpenAiEmbedderSettings> for crate::vector::openai::EmbedderOptions {
-    fn from(value: OpenAiEmbedderSettings) -> Self {
-        let OpenAiEmbedderSettings { api_key, embedding_model } = value;
-        Self { api_key: api_key.set(), embedding_model: embedding_model.set().unwrap_or_default() }
-    }
-}
-
-#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
-#[serde(deny_unknown_fields, rename_all = "camelCase")]
-#[deserr(rename_all = camelCase, deny_unknown_fields)]
-pub struct UserProvidedSettings {
-    pub dimensions: usize,
-}
-
-impl From<UserProvidedSettings> for crate::vector::manual::EmbedderOptions {
-    fn from(value: UserProvidedSettings) -> Self {
-        Self { dimensions: value.dimensions }
-    }
-}
-
-impl From<crate::vector::manual::EmbedderOptions> for UserProvidedSettings {
-    fn from(value: crate::vector::manual::EmbedderOptions) -> Self {
-        Self { dimensions: value.dimensions }
-    }
-}
Author	SHA1	Message	Date
Clément Renault	d7bd3217d6	Only insert facets in bulks no more incrementally	2024-01-26 11:07:48 +01:00
Clément Renault	6210377b90	Add more logging to the received chunks	2024-01-25 20:16:59 +01:00
Clément Renault	9e3d1e1bbd	Remove unused imports	2024-01-25 17:58:47 +01:00
Clément Renault	bceaf4f981	Add a log on the time taken by the incremental facet updating	2024-01-25 17:48:31 +01:00
Clément Renault	d29b301618	Disable the facet search	2024-01-25 17:47:33 +01:00
meili-bors[bot]	a6fa0b97ec	Merge #4318 4318: Hide embedders r=ManyTheFish a=dureuill Hides `embedders` when it is an empty dictionary. Manual tests: - getting settings with empty embedders: not displayed - getting settings with non-empty embedders: displayed like before - dump with empty embedders: can be imported - dump with non-empty embedders: can be imported Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-01-15 09:37:31 +00:00
Louis Dureuil	38abfec611	Fix tests	2024-01-11 21:35:30 +01:00
Louis Dureuil	84a5c304fc	Don't display the embedders setting when it is an empty dict	2024-01-11 21:35:06 +01:00
meili-bors[bot]	e93d36d5b9	Merge #4313 4313: Fix document formatting performances r=Kerollmops a=ManyTheFish reduce the formatted option list to the attributes that should be formatted, instead of all the attributes to display. The time to compute the `format` list scales with the number of fields to format; cumulated with `map_leaf_values` that iterates over all the nested fields, it gives a quadratic complexity: `d*f` where `d` is the total number of fields to display and `f` is the total number of fields to format. Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-01-11 14:19:44 +00:00
ManyTheFish	95f8e21533	fix typos	2024-01-11 15:07:08 +01:00
meili-bors[bot]	68f197624e	Merge #4314 4314: Fix proximity precision telemetry r=Kerollmops a=ManyTheFish The proximity precision telemetry was partially missing in the global setting route. This PR adds the missing field and return the default value when the value is not set. Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-01-11 13:50:03 +00:00
ManyTheFish	b79b03d4e2	Fix proximity precision telemetry	2024-01-11 13:24:26 +01:00
ManyTheFish	86270e6878	Transform fields contained into _format into strings	2024-01-11 12:44:56 +01:00
ManyTheFish	81b6128b29	Update tests	2024-01-11 12:28:32 +01:00
ManyTheFish	5f5a486895	Reduce formatting time	2024-01-11 11:36:41 +01:00
ManyTheFish	5f4fc6c955	Add timer logs	2024-01-11 09:44:16 +01:00
meili-bors[bot]	1f5e8fc072	Merge #4311 4311: Limit the number of values returned by the facet search r=dureuill a=Kerollmops This PR fixes a bug where the number of values per facet returned by the `indexes/{index}/facet-search` route was not tacking the `faceting.maxValuePerFacet` setting. It also adds a test. Co-authored-by: Clément Renault <clement@meilisearch.com>	2024-01-10 16:04:06 +00:00
Clément Renault	3f3462ab62	Limit the number of values returned by the facet search	2024-01-10 16:54:08 +01:00
meili-bors[bot]	93363b0201	Merge #4308 4308: Fix hang on `/indexes` and `/stats` routes r=Kerollmops a=dureuill # Pull Request ## Related issue Fixes #4218 ## Context - A previous fix added a field to the `IndexScheduler` to memorize the `currently_updating_index`, so that accessing it through the search would return the handle without trying to open it. This resolved a hang on the search, but #4218 reported further hangs on the `/indexes` and `/stats` routes - These routes were shunting the `IndexScheduler` and using internal `IndexMapper` logic to access the indexes, again trying to reopen the updating index. ## What does this PR do? - Moves the logic relative to the `currently_updating_index` from the `IndexScheduler` to the `IndexMapper`, so that any index request to the `IndexMapper` can benefit from it. ## Test 1. Follow reproducer from #4218 2. Before this PR, notice a hang on `/stats` and `/indexes`, but not on `/indexes/<updating_index>/search` 3. After this PR, notice no hang on either of `/stats`, `/indexes` or `/indexes/<updating_index>/search` Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-01-10 10:46:20 +00:00
Louis Dureuil	97bb1ff9e2	Move `currently_updating_index` to IndexMapper	2024-01-09 15:37:27 +01:00
meili-bors[bot]	5ee1378856	Merge #4303 4303: Display default value when proximityPrecision is not set r=dureuill a=ManyTheFish # Pull Request ## Related Issue: #4187 Spec change requests: https://github.com/meilisearch/specifications/pull/261#discussion_r1441725272 ## What does this PR do? - Display default value when proximityPrecision is not set instead of Null Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-01-08 14:29:57 +00:00
ManyTheFish	e27b850b09	move the default display strategy on setting getter function	2024-01-08 14:03:47 +01:00
ManyTheFish	f75f22e026	Display default value when proximityPrecision is not set	2024-01-08 11:09:37 +01:00
meili-bors[bot]	6203f4acef	Merge #4296 4296: Fix single element search r=irevoire a=dureuill # Pull Request Before this PR, indexing a single vector in a single document would result in the vector not being found by the vector search. This PR adds a test case for this condition, and resolves it by bumping arroy to a version containing the fix. # Test case Output of the test before and after this PR: ```diff diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs index 2cd4b83e7..79819cab2 100644 --- a/meilisearch/tests/search/hybrid.rs on release-v1.6.0 +++ b/meilisearch/tests/search/hybrid.rs on fix-single-element-search `@@` -171,5 +171,5 `@@` async fn single_document() { .await; snapshot!(code, `@"200` OK"); - snapshot!(response["hits"][0], `@r###"{"title":"Shazam!","desc":"a` Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.0}"###); + snapshot!(response["hits"][0], `@r###"{"title":"Shazam!","desc":"a` Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0,"_semanticScore":1.0}"###); } ``` Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-01-03 15:01:43 +00:00
Louis Dureuil	12edc2c20a	Update arroy to a fixed version	2024-01-03 15:59:37 +01:00
Louis Dureuil	94b9f3b310	Add test	2024-01-03 15:56:20 +01:00
meili-bors[bot]	da99a04eb3	Merge #4294 4294: fix compilation warnings for release v1.6 r=curquiza a=irevoire # Pull Request ## Related issue Fixes #4292 ## What does this PR do? - Removed unused imports #4295 fixes the issue no main Co-authored-by: Tamo <tamo@meilisearch.com>	2024-01-02 15:00:40 +00:00
Tamo	54ae6951eb	fix warning	2024-01-02 15:19:30 +01:00
meili-bors[bot]	658ec6e0a4	Merge #4279 4279: Check experimental feature on setting update query rather than in the task. r=ManyTheFish a=dureuill Improve the UX by checking for the vector store feature and returning an error synchronously when sending a setting update, rather than in the indexing task. Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2023-12-22 11:36:12 +00:00
meili-bors[bot]	43e822e802	Merge #4238 4238: Task queue webhook r=dureuill a=irevoire # Prototype `prototype-task-queue-webhook-1` The prototype is available through Docker by using the following command: ```bash docker run -p 7700:7700 -v $(pwd)/meili_data:/meili_data getmeili/meilisearch:prototype-task-queue-webhook-1 ``` # Pull Request Implements the task queue webhook. ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4236 ## What does this PR do? - Provide a new cli and env var for the webhook, respectively called `--task-webhook-url` and `MEILI_TASK_WEBHOOK_URL` - Also supports sending the requests with a custom `Authorization` header by specifying the optional `--task-webhook-authorization-header` CLI parameter or `MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER` env variable. - Throw an error if the specified URL is invalid - Every time a batch is processed, send all the finished tasks into the webhook with our public `TaskView` type as a JSON Line GZIPed body. - Add one test. ## PR checklist ### Before becoming ready to review - [x] Add a test - [x] Compress the data we send - [x] Chunk and stream the data we send - [x] Remove the unwrap in the index-scheduler when sending the data fails - [x] The analytics are missing ### Before merging - [x] Release a prototype Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Clément Renault <clement@meilisearch.com>	2023-12-21 14:43:46 +00:00
Louis Dureuil	ee54d3171e	Check experimental feature at query time	2023-12-21 15:26:12 +01:00
meili-bors[bot]	a0e713c4e7	Merge #4277 4277: Update mini-dashboard to v0.2.12 r=curquiza a=mdubus # Pull Request ## Related issue Fixes #4276 ## What does this PR do? Upgrade mini-dashboard to version 0.2.12 ([see changes](https://github.com/meilisearch/mini-dashboard/releases/tag/v0.2.12)) ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: Morgane Dubus <30866152+mdubus@users.noreply.github.com>	2023-12-21 11:03:46 +00:00
meili-bors[bot]	d4cb0a885b	Merge #4275 4275: Flatten settings r=dureuill a=dureuill # Pull Request ## Related issue Initial internal feedback seems to indicate that the current shape of the `embedders` setting is undesirable: it has too much depth. This PR changes this by flattening the structure of the embedders to the following: ```json5 // NEW structure "embedders": { // still starts with the embedder name "default": { "source": "huggingFace", // now a string // properties of the source are all at the same level as the source "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "revision": "a9c555277f9bcf24f28fa5e092e665fc6f7c49cd", "documentTemplate": "A product titled '{{doc.title}}'" // now a string } } ``` By comparison, the old structure was: ```json5 // PREVIOUS version, no longer working with this PR "embedders": { // still starts with the embedder name "default": { "source": { "huggingFace": { "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "revision": "a9c555277f9bcf24f28fa5e092e665fc6f7c49cd" }, "documentTemplate": { "template": "A product titled '{{doc.title}}'" // now a string } } } ``` The fields that are accepted in the new version of the `embedders` setting are depending on the value of the `source` field: ```json5 // huggingFace "embedders": { "default": { "source": "huggingFace", "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "revision": "a9c555277f9bcf24f28fa5e092e665fc6f7c49cd", "documentTemplate": "A product titled '{{doc.title}}'" } } // openAi "embedders": { "default": { "source": "openAi", "model": "text-embedding-ada-002", "apiKey": "open_ai_api_key", "documentTemplate": "A product titled '{{doc.title}}'" } } // userProvided "embedders": { "default": { "source": "userProvided", "dimensions": 42, // mandatory } } ``` ## What does this PR do? - Flatten the settings structure - Validate the prompt earlier to return a synchronous error on setting change rather than in the failing task - Make it an error to pass a field for the wrong source (see above for allowed fields for each source) - Not changed: It is still an error not to pass `dimensions` to the `userProvided` embedder - If `source` was specified in the settings, validate the setting early to return a synchronous error in case of a missing mandatory field for the userProvided source (dimensions) or a forbidden field for the specified source. - If `source` was not specified in the settings, still validate the setting, but only at indexing time, by using the source stored in the DB. - Resets all values if the source changes, even if the user did not reset them explicitly. ## PR checklist Please check if your PR fulfills the following requirements: - [ ] Change the public facing guide for using the API - [ ] Change examples of use in the changelog Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2023-12-21 09:58:01 +00:00
Morgane Dubus	f52dee2b3b	Update Cargo.toml Update mini-dashboard with v0.2.12	2023-12-21 09:53:13 +01:00
Louis Dureuil	0bf879fb88	Fix warning on rust stable	2023-12-20 17:48:09 +01:00
Louis Dureuil	6ff81de401	Fix tests	2023-12-20 17:16:46 +01:00
Louis Dureuil	2e4c9651df	Validate settings in route	2023-12-20 17:16:46 +01:00
Louis Dureuil	ec9649c922	Add function to validate settings in Meilisearch, to be used in the routes	2023-12-20 17:16:46 +01:00
Louis Dureuil	9123370e90	Validate fused settings in settings task after fusing with existing setting	2023-12-20 17:16:46 +01:00
Louis Dureuil	14b396d302	Add new errors	2023-12-20 17:16:45 +01:00
Louis Dureuil	393216bf30	Flatten embedders settings	2023-12-20 17:16:43 +01:00
Louis Dureuil	e249e4db7b	Change Setting::apply function signature	2023-12-20 17:15:24 +01:00
meili-bors[bot]	de2ca7006e	Merge #4272 4272: Don't pass default revision when the model is explicitly set in config r=Kerollmops a=dureuill # Pull Request ## Related issue Fixes #4271 ## What does this PR do? - When the `model` is explicitly set in the `embedders` setting, we reset the `revision` to `None`, such that if the user doesn't specify a revision, the head of the model repository is chosen. - Not changed: If the user specifies a revision, it applies, like previously. - Not changed: If the user doesn't specify a model, the default model with the default revision applies, like previously. ## Manual testing on a fresh DB 1. Enable experimental feature: ```sh curl \ -X PATCH 'http://localhost:7700/experimental-features/' \ -H 'Content-Type: application/json' -H 'Authorization: Bearer foo' \ --data-binary '{ "vectorStore": true }' ``` 2. Send settings with a specified model but no specified revision: ```sh curl \ -X PATCH 'http://localhost:7700/indexes/products/settings' \ -H 'Content-Type: application/json' --data-binary \ '{ "embedders": { "default": { "source": { "huggingFace": { "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" } }, "documentTemplate": { "template": "A product titled '{{doc.title}}'"} } } }' ``` 3. Check that the task was successful: ```sh curl 'http://localhost:7700/tasks/0' {"uid":0,"indexUid":"products","status":"succeeded","type":"settingsUpdate","canceledBy":null,"details":{"embedders":{"default":{"source":{"huggingFace":{"model":"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"}},"documentTemplate":{"template":"A product titled {{doc.title}}"}}}},"error":null,"duration":"PT0.001892S","enqueuedAt":"2023-12-20T09:17:01.73789Z","startedAt":"2023-12-20T09:17:01.73854Z","finishedAt":"2023-12-20T09:17:01.740432Z"} ``` 4. Send documents to index: ```sh curl 'https://localhost:7700/indexes/products/documents' -H 'Content-Type: application/json' --data-binary '{"id": 0, "title": "Best product"}' ``` Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2023-12-20 14:27:51 +00:00
Louis Dureuil	333ce12eb2	Fixed issue where the default revision is always the one we picked for the default model	2023-12-20 10:17:49 +01:00
Clément Renault	fa2b96b9a5	Add an Authorization Header along with the webhook calls	2023-12-19 12:18:45 +01:00
Tamo	19736cefe8	add the analytics	2023-12-19 10:36:04 +01:00
Tamo	4fb25b8782	fix clippy	2023-12-19 10:35:51 +01:00
Tamo	c83a33017e	stream and chunk the data	2023-12-19 10:35:51 +01:00
Tamo	be72326c0a	gzip the tasks	2023-12-19 10:35:51 +01:00
Tamo	547379abb0	parse the url correctly	2023-12-19 10:35:51 +01:00
Tamo	0b2fff27f2	update and fix the test	2023-12-19 10:35:51 +01:00
Tamo	3adbc2b942	return a task view instead of a task	2023-12-19 10:35:51 +01:00
Tamo	fbea721378	add a first working test with actixweb	2023-12-19 10:35:51 +01:00
Tamo	391eb72137	start writing a test with actix but it doesn't works	2023-12-19 10:35:50 +01:00
Tamo	d78ad51082	Implement the webhook	2023-12-19 10:35:50 +01:00
Tamo	1956045a06	add the option	2023-12-19 10:23:56 +01:00