Revert mimalloc to 0.1.37

Merge #4648
4648: Update version for the next release (v1.8.1) in Cargo.toml r=ManyTheFish a=meili-bot ⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Co-authored-by: ManyTheFish <ManyTheFish@users.noreply.github.com>
2025-12-15 08:56:56 +00:00 · 2024-06-05 14:21:01 +02:00 · 2024-05-21 16:38:36 +00:00 · 2024-05-21 15:13:03 +00:00 · 2024-05-21 13:16:11 +00:00 · 2024-05-21 13:44:20 +02:00
39 changed files with 596 additions and 453 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -494,7 +494,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
 [[package]]
 name = "benchmarks"
-version = "1.8.0"
+version = "1.8.1"
 dependencies = [
 "anyhow",
 "bytes",
@@ -639,7 +639,7 @@ dependencies = [
 [[package]]
 name = "build-info"
-version = "1.8.0"
+version = "1.8.1"
 dependencies = [
 "anyhow",
 "time",
@@ -889,9 +889,9 @@ dependencies = [
 [[package]]
 name = "charabia"
-version = "0.8.9"
+version = "0.8.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6a65052f308636e5d5e1777f0dbc07919f5fbac24b6c8ad3e140472e5520de9"
+checksum = "933f20f2269b24d32fd5503e7b3c268af902190daf8d9d2b73ed2e75d77c00b4"
 dependencies = [
 "aho-corasick",
 "cow-utils",
@@ -1539,7 +1539,7 @@ dependencies = [
 [[package]]
 name = "dump"
-version = "1.8.0"
+version = "1.8.1"
 dependencies = [
 "anyhow",
 "big_s",
@@ -1787,7 +1787,7 @@ dependencies = [
 [[package]]
 name = "file-store"
-version = "1.8.0"
+version = "1.8.1"
 dependencies = [
 "faux",
 "tempfile",
@@ -1810,7 +1810,7 @@ dependencies = [
 [[package]]
 name = "filter-parser"
-version = "1.8.0"
+version = "1.8.1"
 dependencies = [
 "insta",
 "nom",
@@ -1830,7 +1830,7 @@ dependencies = [
 [[package]]
 name = "flatten-serde-json"
-version = "1.8.0"
+version = "1.8.1"
 dependencies = [
 "criterion",
 "serde_json",
@@ -1948,7 +1948,7 @@ dependencies = [
 [[package]]
 name = "fuzzers"
-version = "1.8.0"
+version = "1.8.1"
 dependencies = [
 "arbitrary",
 "clap",
@@ -2442,7 +2442,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
 [[package]]
 name = "index-scheduler"
-version = "1.8.0"
+version = "1.8.1"
 dependencies = [
 "anyhow",
 "big_s",
@@ -2638,7 +2638,7 @@ dependencies = [
 [[package]]
 name = "json-depth-checker"
-version = "1.8.0"
+version = "1.8.1"
 dependencies = [
 "criterion",
 "serde_json",
@@ -3275,7 +3275,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
 [[package]]
 name = "meili-snap"
-version = "1.8.0"
+version = "1.8.1"
 dependencies = [
 "insta",
 "md5",
@@ -3284,7 +3284,7 @@ dependencies = [
 [[package]]
 name = "meilisearch"
-version = "1.8.0"
+version = "1.8.1"
 dependencies = [
 "actix-cors",
 "actix-http",
@@ -3377,7 +3377,7 @@ dependencies = [
 [[package]]
 name = "meilisearch-auth"
-version = "1.8.0"
+version = "1.8.1"
 dependencies = [
 "base64 0.21.7",
 "enum-iterator",
@@ -3396,7 +3396,7 @@ dependencies = [
 [[package]]
 name = "meilisearch-types"
-version = "1.8.0"
+version = "1.8.1"
 dependencies = [
 "actix-web",
 "anyhow",
@@ -3426,7 +3426,7 @@ dependencies = [
 [[package]]
 name = "meilitool"
-version = "1.8.0"
+version = "1.8.1"
 dependencies = [
 "anyhow",
 "clap",
@@ -3465,7 +3465,7 @@ dependencies = [
 [[package]]
 name = "milli"
-version = "1.8.0"
+version = "1.8.1"
 dependencies = [
 "arroy",
 "big_s",
@@ -3906,7 +3906,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
 [[package]]
 name = "permissive-json-pointer"
-version = "1.8.0"
+version = "1.8.1"
 dependencies = [
 "big_s",
 "serde_json",
@@ -6074,7 +6074,7 @@ dependencies = [
 [[package]]
 name = "xtask"
-version = "1.8.0"
+version = "1.8.1"
 dependencies = [
 "anyhow",
 "build-info",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,7 +22,7 @@ members = [
 ]
 [workspace.package]
-version = "1.8.0"
+version = "1.8.1"
 authors = [
    "Quentin de Quelen <quentin@dequelen.me>",
    "Clément Renault <clement@meilisearch.com>",
--- a/benchmarks/Cargo.toml
+++ b/benchmarks/Cargo.toml
@@ -14,7 +14,7 @@ license.workspace = true
 anyhow = "1.0.79"
 csv = "1.3.0"
 milli = { path = "../milli" }
-mimalloc = { version = "0.1.39", default-features = false }
+mimalloc = { version = "0.1.37", default-features = false }
 serde_json = { version = "1.0.111", features = ["preserve_order"] }
 [dev-dependencies]
--- a/dump/src/lib.rs
+++ b/dump/src/lib.rs
@@ -256,8 +256,8 @@ pub(crate) mod test {
    pub fn create_test_settings() -> Settings<Checked> {
        let settings = Settings {
-            displayed_attributes: Setting::Set(vec![S("race"), S("name")]),
+            displayed_attributes: Setting::Set(vec![S("race"), S("name")]).into(),
-            searchable_attributes: Setting::Set(vec![S("name"), S("race")]),
+            searchable_attributes: Setting::Set(vec![S("name"), S("race")]).into(),
            filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }),
            sortable_attributes: Setting::Set(btreeset! { S("age") }),
            ranking_rules: Setting::NotSet,
--- a/dump/src/reader/compat/v5_to_v6.rs
+++ b/dump/src/reader/compat/v5_to_v6.rs
@@ -315,8 +315,8 @@ impl From<v5::ResponseError> for v6::ResponseError {
 impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
    fn from(settings: v5::Settings<T>) -> Self {
        v6::Settings {
-            displayed_attributes: settings.displayed_attributes.into(),
+            displayed_attributes: v6::Setting::from(settings.displayed_attributes).into(),
-            searchable_attributes: settings.searchable_attributes.into(),
+            searchable_attributes: v6::Setting::from(settings.searchable_attributes).into(),
            filterable_attributes: settings.filterable_attributes.into(),
            sortable_attributes: settings.sortable_attributes.into(),
            ranking_rules: {
--- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap
+++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued [0,]
--- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued []
--- a/meilisearch-types/Cargo.toml
+++ b/meilisearch-types/Cargo.toml
@@ -57,3 +57,5 @@ greek = ["milli/greek"]
 khmer = ["milli/khmer"]
 # allow vietnamese specialized tokenization
 vietnamese = ["milli/vietnamese"]
 # force swedish character recomposition
 swedish-recomposition = ["milli/swedish-recomposition"]
--- a/meilisearch-types/src/settings.rs
+++ b/meilisearch-types/src/settings.rs
@@ -3,7 +3,7 @@ use std::convert::Infallible;
 use std::fmt;
 use std::marker::PhantomData;
 use std::num::NonZeroUsize;
-use std::ops::ControlFlow;
+use std::ops::{ControlFlow, Deref};
 use std::str::FromStr;
 use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
@@ -143,21 +143,13 @@ impl MergeWithError<milli::CriterionError> for DeserrJsonError<InvalidSettingsRa
 )]
 #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
 pub struct Settings<T> {
-    #[serde(
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
        default,
        serialize_with = "serialize_with_wildcard",
        skip_serializing_if = "Setting::is_not_set"
    )]
    #[deserr(default, error = DeserrJsonError<InvalidSettingsDisplayedAttributes>)]
-    pub displayed_attributes: Setting<Vec<String>>,
+    pub displayed_attributes: WildcardSetting,
-    #[serde(
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
        default,
        serialize_with = "serialize_with_wildcard",
        skip_serializing_if = "Setting::is_not_set"
    )]
    #[deserr(default, error = DeserrJsonError<InvalidSettingsSearchableAttributes>)]
-    pub searchable_attributes: Setting<Vec<String>>,
+    pub searchable_attributes: WildcardSetting,
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[deserr(default, error = DeserrJsonError<InvalidSettingsFilterableAttributes>)]
@@ -251,8 +243,8 @@ impl<T> Settings<T> {
 impl Settings<Checked> {
    pub fn cleared() -> Settings<Checked> {
        Settings {
-            displayed_attributes: Setting::Reset,
+            displayed_attributes: Setting::Reset.into(),
-            searchable_attributes: Setting::Reset,
+            searchable_attributes: Setting::Reset.into(),
            filterable_attributes: Setting::Reset,
            sortable_attributes: Setting::Reset,
            ranking_rules: Setting::Reset,
@@ -319,7 +311,7 @@ impl Settings<Checked> {
 impl Settings<Unchecked> {
    pub fn check(self) -> Settings<Checked> {
-        let displayed_attributes = match self.displayed_attributes {
+        let displayed_attributes = match self.displayed_attributes.0 {
            Setting::Set(fields) => {
                if fields.iter().any(|f| f == "*") {
                    Setting::Reset
@@ -330,7 +322,7 @@ impl Settings<Unchecked> {
            otherwise => otherwise,
        };
-        let searchable_attributes = match self.searchable_attributes {
+        let searchable_attributes = match self.searchable_attributes.0 {
            Setting::Set(fields) => {
                if fields.iter().any(|f| f == "*") {
                    Setting::Reset
@@ -342,8 +334,8 @@ impl Settings<Unchecked> {
        };
        Settings {
-            displayed_attributes,
+            displayed_attributes: displayed_attributes.into(),
-            searchable_attributes,
+            searchable_attributes: searchable_attributes.into(),
            filterable_attributes: self.filterable_attributes,
            sortable_attributes: self.sortable_attributes,
            ranking_rules: self.ranking_rules,
@@ -412,13 +404,13 @@ pub fn apply_settings_to_builder(
        _kind,
    } = settings;
-    match searchable_attributes {
+    match searchable_attributes.deref() {
        Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
        Setting::Reset => builder.reset_searchable_fields(),
        Setting::NotSet => (),
    }
-    match displayed_attributes {
+    match displayed_attributes.deref() {
        Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
        Setting::Reset => builder.reset_displayed_fields(),
        Setting::NotSet => (),
@@ -690,11 +682,13 @@ pub fn settings(
        displayed_attributes: match displayed_attributes {
            Some(attrs) => Setting::Set(attrs),
            None => Setting::Reset,
-        },
+        }
        .into(),
        searchable_attributes: match searchable_attributes {
            Some(attrs) => Setting::Set(attrs),
            None => Setting::Reset,
-        },
+        }
        .into(),
        filterable_attributes: Setting::Set(filterable_attributes),
        sortable_attributes: Setting::Set(sortable_attributes),
        ranking_rules: Setting::Set(criteria.iter().map(|c| c.clone().into()).collect()),
@@ -848,6 +842,41 @@ impl From<ProximityPrecisionView> for ProximityPrecision {
    }
 }
 #[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)]
 pub struct WildcardSetting(Setting<Vec<String>>);
 impl From<Setting<Vec<String>>> for WildcardSetting {
    fn from(setting: Setting<Vec<String>>) -> Self {
        Self(setting)
    }
 }
 impl Serialize for WildcardSetting {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        serialize_with_wildcard(&self.0, serializer)
    }
 }
 impl<E: deserr::DeserializeError> Deserr<E> for WildcardSetting {
    fn deserialize_from_value<V: deserr::IntoValue>(
        value: deserr::Value<V>,
        location: ValuePointerRef<'_>,
    ) -> Result<Self, E> {
        Ok(Self(Setting::deserialize_from_value(value, location)?))
    }
 }
 impl std::ops::Deref for WildcardSetting {
    type Target = Setting<Vec<String>>;
    fn deref(&self) -> &Self::Target {
        &self.0
    }
 }
 #[cfg(test)]
 pub(crate) mod test {
    use super::*;
@@ -856,8 +885,8 @@ pub(crate) mod test {
    fn test_setting_check() {
        // test no changes
        let settings = Settings {
-            displayed_attributes: Setting::Set(vec![String::from("hello")]),
+            displayed_attributes: Setting::Set(vec![String::from("hello")]).into(),
-            searchable_attributes: Setting::Set(vec![String::from("hello")]),
+            searchable_attributes: Setting::Set(vec![String::from("hello")]).into(),
            filterable_attributes: Setting::NotSet,
            sortable_attributes: Setting::NotSet,
            ranking_rules: Setting::NotSet,
@@ -883,8 +912,9 @@ pub(crate) mod test {
        // test wildcard
        // test no changes
        let settings = Settings {
-            displayed_attributes: Setting::Set(vec![String::from("*")]),
+            displayed_attributes: Setting::Set(vec![String::from("*")]).into(),
-            searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")]),
+            searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")])
                .into(),
            filterable_attributes: Setting::NotSet,
            sortable_attributes: Setting::NotSet,
            ranking_rules: Setting::NotSet,
@@ -904,7 +934,7 @@ pub(crate) mod test {
        };
        let checked = settings.check();
-        assert_eq!(checked.displayed_attributes, Setting::Reset);
+        assert_eq!(checked.displayed_attributes, Setting::Reset.into());
-        assert_eq!(checked.searchable_attributes, Setting::Reset);
+        assert_eq!(checked.searchable_attributes, Setting::Reset.into());
    }
 }
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -56,7 +56,7 @@ jsonwebtoken = "9.2.0"
 lazy_static = "1.4.0"
 meilisearch-auth = { path = "../meilisearch-auth" }
 meilisearch-types = { path = "../meilisearch-types" }
-mimalloc = { version = "0.1.39", default-features = false }
+mimalloc = { version = "0.1.37", default-features = false }
 mime = "0.3.17"
 num_cpus = "1.16.0"
 obkv = "0.2.1"
@@ -156,6 +156,7 @@ thai = ["meilisearch-types/thai"]
 greek = ["meilisearch-types/greek"]
 khmer = ["meilisearch-types/khmer"]
 vietnamese = ["meilisearch-types/vietnamese"]
 swedish-recomposition = ["meilisearch-types/swedish-recomposition"]
 [package.metadata.mini-dashboard]
 assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.13/build.zip"
--- a/meilisearch/src/analytics/mock_analytics.rs
+++ b/meilisearch/src/analytics/mock_analytics.rs
@@ -7,7 +7,6 @@ use serde_json::Value;
 use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind};
 use crate::routes::indexes::documents::UpdateDocumentsQuery;
 use crate::routes::tasks::TasksFilterQuery;
 use crate::Opt;
 pub struct MockAnalytics {
@@ -86,6 +85,4 @@ impl Analytics for MockAnalytics {
    }
    fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
    fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
    fn get_tasks(&self, _query: &TasksFilterQuery, _request: &HttpRequest) {}
    fn health_seen(&self, _request: &HttpRequest) {}
 }
--- a/meilisearch/src/analytics/mod.rs
+++ b/meilisearch/src/analytics/mod.rs
@@ -14,7 +14,6 @@ use platform_dirs::AppDirs;
 use serde_json::Value;
 use crate::routes::indexes::documents::UpdateDocumentsQuery;
 use crate::routes::tasks::TasksFilterQuery;
 // if the analytics feature is disabled
 // the `SegmentAnalytics` point to the mock instead of the real analytics
@@ -117,10 +116,4 @@ pub trait Analytics: Sync + Send {
        index_creation: bool,
        request: &HttpRequest,
    );
    // this method should be called to aggregate the get tasks requests.
    fn get_tasks(&self, query: &TasksFilterQuery, request: &HttpRequest);
    // this method should be called to aggregate a add documents request
    fn health_seen(&self, request: &HttpRequest);
 }
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -33,7 +33,6 @@ use crate::option::{
 };
 use crate::routes::indexes::documents::UpdateDocumentsQuery;
 use crate::routes::indexes::facet_search::FacetSearchQuery;
 use crate::routes::tasks::TasksFilterQuery;
 use crate::routes::{create_all_stats, Stats};
 use crate::search::{
    FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
@@ -81,8 +80,6 @@ pub enum AnalyticsMsg {
    AggregateUpdateDocuments(DocumentsAggregator),
    AggregateGetFetchDocuments(DocumentsFetchAggregator),
    AggregatePostFetchDocuments(DocumentsFetchAggregator),
    AggregateTasks(TasksAggregator),
    AggregateHealth(HealthAggregator),
 }
 pub struct SegmentAnalytics {
@@ -152,8 +149,6 @@ impl SegmentAnalytics {
            update_documents_aggregator: DocumentsAggregator::default(),
            get_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
            post_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
            get_tasks_aggregator: TasksAggregator::default(),
            health_aggregator: HealthAggregator::default(),
        });
        tokio::spawn(segment.run(index_scheduler.clone(), auth_controller.clone()));
@@ -231,16 +226,6 @@ impl super::Analytics for SegmentAnalytics {
        let aggregate = DocumentsFetchAggregator::from_query(documents_query, request);
        let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFetchDocuments(aggregate));
    }
    fn get_tasks(&self, query: &TasksFilterQuery, request: &HttpRequest) {
        let aggregate = TasksAggregator::from_query(query, request);
        let _ = self.sender.try_send(AnalyticsMsg::AggregateTasks(aggregate));
    }
    fn health_seen(&self, request: &HttpRequest) {
        let aggregate = HealthAggregator::from_query(request);
        let _ = self.sender.try_send(AnalyticsMsg::AggregateHealth(aggregate));
    }
 }
 /// This structure represent the `infos` field we send in the analytics.
@@ -394,8 +379,6 @@ pub struct Segment {
    update_documents_aggregator: DocumentsAggregator,
    get_fetch_documents_aggregator: DocumentsFetchAggregator,
    post_fetch_documents_aggregator: DocumentsFetchAggregator,
    get_tasks_aggregator: TasksAggregator,
    health_aggregator: HealthAggregator,
 }
 impl Segment {
@@ -458,8 +441,6 @@ impl Segment {
                        Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
                        Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg),
                        Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg),
                        Some(AnalyticsMsg::AggregateTasks(agreg)) => self.get_tasks_aggregator.aggregate(agreg),
                        Some(AnalyticsMsg::AggregateHealth(agreg)) => self.health_aggregator.aggregate(agreg),
                        None => (),
                    }
                }
@@ -513,8 +494,6 @@ impl Segment {
            update_documents_aggregator,
            get_fetch_documents_aggregator,
            post_fetch_documents_aggregator,
            get_tasks_aggregator,
            health_aggregator,
        } = self;
        if let Some(get_search) =
@@ -562,12 +541,6 @@ impl Segment {
        {
            let _ = self.batcher.push(post_fetch_documents).await;
        }
        if let Some(get_tasks) = take(get_tasks_aggregator).into_event(user, "Tasks Seen") {
            let _ = self.batcher.push(get_tasks).await;
        }
        if let Some(health) = take(health_aggregator).into_event(user, "Health Seen") {
            let _ = self.batcher.push(health).await;
        }
        let _ = self.batcher.flush().await;
    }
 }
@@ -1503,176 +1476,6 @@ impl DocumentsDeletionAggregator {
    }
 }
 #[derive(Default, Serialize)]
 pub struct TasksAggregator {
    #[serde(skip)]
    timestamp: Option<OffsetDateTime>,
    // context
    #[serde(rename = "user-agent")]
    user_agents: HashSet<String>,
    filtered_by_uid: bool,
    filtered_by_index_uid: bool,
    filtered_by_type: bool,
    filtered_by_status: bool,
    filtered_by_canceled_by: bool,
    filtered_by_before_enqueued_at: bool,
    filtered_by_after_enqueued_at: bool,
    filtered_by_before_started_at: bool,
    filtered_by_after_started_at: bool,
    filtered_by_before_finished_at: bool,
    filtered_by_after_finished_at: bool,
    total_received: usize,
 }
 impl TasksAggregator {
    pub fn from_query(query: &TasksFilterQuery, request: &HttpRequest) -> Self {
        let TasksFilterQuery {
            limit: _,
            from: _,
            uids,
            index_uids,
            types,
            statuses,
            canceled_by,
            before_enqueued_at,
            after_enqueued_at,
            before_started_at,
            after_started_at,
            before_finished_at,
            after_finished_at,
        } = query;
        Self {
            timestamp: Some(OffsetDateTime::now_utc()),
            user_agents: extract_user_agents(request).into_iter().collect(),
            filtered_by_uid: uids.is_some(),
            filtered_by_index_uid: index_uids.is_some(),
            filtered_by_type: types.is_some(),
            filtered_by_status: statuses.is_some(),
            filtered_by_canceled_by: canceled_by.is_some(),
            filtered_by_before_enqueued_at: before_enqueued_at.is_some(),
            filtered_by_after_enqueued_at: after_enqueued_at.is_some(),
            filtered_by_before_started_at: before_started_at.is_some(),
            filtered_by_after_started_at: after_started_at.is_some(),
            filtered_by_before_finished_at: before_finished_at.is_some(),
            filtered_by_after_finished_at: after_finished_at.is_some(),
            total_received: 1,
        }
    }
    /// Aggregate one [TasksAggregator] into another.
    pub fn aggregate(&mut self, other: Self) {
        let Self {
            timestamp,
            user_agents,
            total_received,
            filtered_by_uid,
            filtered_by_index_uid,
            filtered_by_type,
            filtered_by_status,
            filtered_by_canceled_by,
            filtered_by_before_enqueued_at,
            filtered_by_after_enqueued_at,
            filtered_by_before_started_at,
            filtered_by_after_started_at,
            filtered_by_before_finished_at,
            filtered_by_after_finished_at,
        } = other;
        if self.timestamp.is_none() {
            self.timestamp = timestamp;
        }
        // we can't create a union because there is no `into_union` method
        for user_agent in user_agents {
            self.user_agents.insert(user_agent);
        }
        self.filtered_by_uid |= filtered_by_uid;
        self.filtered_by_index_uid |= filtered_by_index_uid;
        self.filtered_by_type |= filtered_by_type;
        self.filtered_by_status |= filtered_by_status;
        self.filtered_by_canceled_by |= filtered_by_canceled_by;
        self.filtered_by_before_enqueued_at |= filtered_by_before_enqueued_at;
        self.filtered_by_after_enqueued_at |= filtered_by_after_enqueued_at;
        self.filtered_by_before_started_at |= filtered_by_before_started_at;
        self.filtered_by_after_started_at |= filtered_by_after_started_at;
        self.filtered_by_before_finished_at |= filtered_by_before_finished_at;
        self.filtered_by_after_finished_at |= filtered_by_after_finished_at;
        self.filtered_by_after_finished_at |= filtered_by_after_finished_at;
        self.total_received = self.total_received.saturating_add(total_received);
    }
    pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
        // if we had no timestamp it means we never encountered any events and
        // thus we don't need to send this event.
        let timestamp = self.timestamp?;
        Some(Track {
            timestamp: Some(timestamp),
            user: user.clone(),
            event: event_name.to_string(),
            properties: serde_json::to_value(self).ok()?,
            ..Default::default()
        })
    }
 }
 #[derive(Default, Serialize)]
 pub struct HealthAggregator {
    #[serde(skip)]
    timestamp: Option<OffsetDateTime>,
    // context
    #[serde(rename = "user-agent")]
    user_agents: HashSet<String>,
    #[serde(rename = "requests.total_received")]
    total_received: usize,
 }
 impl HealthAggregator {
    pub fn from_query(request: &HttpRequest) -> Self {
        Self {
            timestamp: Some(OffsetDateTime::now_utc()),
            user_agents: extract_user_agents(request).into_iter().collect(),
            total_received: 1,
        }
    }
    /// Aggregate one [HealthAggregator] into another.
    pub fn aggregate(&mut self, other: Self) {
        let Self { timestamp, user_agents, total_received } = other;
        if self.timestamp.is_none() {
            self.timestamp = timestamp;
        }
        // we can't create a union because there is no `into_union` method
        for user_agent in user_agents {
            self.user_agents.insert(user_agent);
        }
        self.total_received = self.total_received.saturating_add(total_received);
    }
    pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
        // if we had no timestamp it means we never encountered any events and
        // thus we don't need to send this event.
        let timestamp = self.timestamp?;
        Some(Track {
            timestamp: Some(timestamp),
            user: user.clone(),
            event: event_name.to_string(),
            properties: serde_json::to_value(self).ok()?,
            ..Default::default()
        })
    }
 }
 #[derive(Default, Serialize)]
 pub struct DocumentsFetchAggregator {
    #[serde(skip)]
--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@@ -13,6 +13,7 @@ use byte_unit::{Byte, ByteError};
 use clap::Parser;
 use meilisearch_types::features::InstanceTogglableFeatures;
 use meilisearch_types::milli::update::IndexerConfig;
 use meilisearch_types::milli::ThreadPoolNoAbortBuilder;
 use rustls::server::{
    AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, ServerSessionMemoryCache,
 };
@@ -666,7 +667,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
    type Error = anyhow::Error;
    fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> {
-        let thread_pool = rayon::ThreadPoolBuilder::new()
+        let thread_pool = ThreadPoolNoAbortBuilder::new()
            .thread_name(|index| format!("indexing-thread:{index}"))
            .num_threads(*other.max_indexing_threads)
            .build()?;
--- a/meilisearch/src/routes/indexes/mod.rs
+++ b/meilisearch/src/routes/indexes/mod.rs
@@ -269,12 +269,8 @@ impl From<index_scheduler::IndexStats> for IndexStats {
 pub async fn get_index_stats(
    index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
    index_uid: web::Path<String>,
    req: HttpRequest,
    analytics: web::Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;
    analytics.publish("Stats Seen".to_string(), json!({ "per_index_uid": true }), Some(&req));
    let stats = IndexStats::from(index_scheduler.index_stats(&index_uid)?);
    debug!(returns = ?stats, "Get index stats");
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@@ -137,10 +137,8 @@ macro_rules! make_setting_route {
                let settings = settings(&index, &rtxn, meilisearch_types::settings::SecretPolicy::HideSecrets)?;
                debug!(returns = ?settings, "Update settings");
                let mut json = serde_json::json!(&settings);
                let val = json[$camelcase_attr].take();
-                Ok(HttpResponse::Ok().json(val))
+                Ok(HttpResponse::Ok().json(settings.$attr))
            }
            pub fn resources() -> Resource {
--- a/meilisearch/src/routes/mod.rs
+++ b/meilisearch/src/routes/mod.rs
@@ -8,11 +8,9 @@ use meilisearch_types::error::{Code, ResponseError};
 use meilisearch_types::settings::{Settings, Unchecked};
 use meilisearch_types::tasks::{Kind, Status, Task, TaskId};
 use serde::{Deserialize, Serialize};
 use serde_json::json;
 use time::OffsetDateTime;
 use tracing::debug;
 use crate::analytics::Analytics;
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
 use crate::search_queue::SearchQueue;
@@ -296,10 +294,7 @@ pub struct Stats {
 async fn get_stats(
    index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
    auth_controller: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<AuthController>>,
    req: HttpRequest,
    analytics: web::Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    analytics.publish("Stats Seen".to_string(), json!({ "per_index_uid": false }), Some(&req));
    let filters = index_scheduler.filters();
    let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?;
@@ -355,11 +350,7 @@ struct VersionResponse {
 async fn get_version(
    _index_scheduler: GuardedData<ActionPolicy<{ actions::VERSION }>, Data<IndexScheduler>>,
    req: HttpRequest,
    analytics: web::Data<dyn Analytics>,
 ) -> HttpResponse {
    analytics.publish("Version Seen".to_string(), json!(null), Some(&req));
    let build_info = build_info::BuildInfo::from_build();
    HttpResponse::Ok().json(VersionResponse {
@@ -376,21 +367,11 @@ async fn get_version(
    })
 }
 #[derive(Serialize)]
 struct KeysResponse {
    private: Option<String>,
    public: Option<String>,
 }
 pub async fn get_health(
    req: HttpRequest,
    index_scheduler: Data<IndexScheduler>,
    auth_controller: Data<AuthController>,
    search_queue: Data<SearchQueue>,
    analytics: web::Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    analytics.health_seen(&req);
    search_queue.health().unwrap();
    index_scheduler.health().unwrap();
    auth_controller.health().unwrap();
--- a/meilisearch/src/routes/tasks.rs
+++ b/meilisearch/src/routes/tasks.rs
@@ -270,12 +270,8 @@ pub struct AllTasks {
 async fn get_tasks(
    index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_GET }>, Data<IndexScheduler>>,
    params: AwebQueryParameter<TasksFilterQuery, DeserrQueryParamError>,
    req: HttpRequest,
    analytics: web::Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let mut params = params.into_inner();
    analytics.get_tasks(&params, &req);
    // We +1 just to know if there is more after this "page" or not.
    params.limit.0 = params.limit.0.saturating_add(1);
    let limit = params.limit.0;
@@ -298,8 +294,6 @@ async fn get_tasks(
 async fn get_task(
    index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_GET }>, Data<IndexScheduler>>,
    task_uid: web::Path<String>,
    req: HttpRequest,
    analytics: web::Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let task_uid_string = task_uid.into_inner();
@@ -310,8 +304,6 @@ async fn get_task(
        }
    };
    analytics.publish("Tasks Seen".to_string(), json!({ "per_task_uid": true }), Some(&req));
    let query = index_scheduler::Query { uids: Some(vec![task_uid]), ..Query::default() };
    let filters = index_scheduler.filters();
    let (tasks, _) = index_scheduler.get_tasks_from_authorized_indexes(query, filters)?;
--- a/meilisearch/tests/search/geo.rs
+++ b/meilisearch/tests/search/geo.rs
@@ -117,3 +117,69 @@ async fn geo_bounding_box_with_string_and_number() {
        )
        .await;
 }
 #[actix_rt::test]
 async fn bug_4640() {
    // https://github.com/meilisearch/meilisearch/issues/4640
    let server = Server::new().await;
    let index = server.index("test");
    let documents = DOCUMENTS.clone();
    index.add_documents(documents, None).await;
    index.update_settings_filterable_attributes(json!(["_geo"])).await;
    let (ret, _code) = index.update_settings_sortable_attributes(json!(["_geo"])).await;
    index.wait_task(ret.uid()).await;
    // Sort the document with the second one first
    index
        .search(
            json!({
                "sort": ["_geoPoint(45.4777599, 9.1967508):asc"],
            }),
            |response, code| {
                assert_eq!(code, 200, "{}", response);
                snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
                {
                  "hits": [
                    {
                      "id": 2,
                      "name": "La Bella Italia",
                      "address": "456 Elm Street, Townsville",
                      "type": "Italian",
                      "rating": 9,
                      "_geo": {
                        "lat": "45.4777599",
                        "lng": "9.1967508"
                      }
                    },
                    {
                      "id": 1,
                      "name": "Taco Truck",
                      "address": "444 Salsa Street, Burritoville",
                      "type": "Mexican",
                      "rating": 9,
                      "_geo": {
                        "lat": 34.0522,
                        "lng": -118.2437
                      },
                      "_geoDistance": 9714063
                    },
                    {
                      "id": 3,
                      "name": "Crêpe Truck",
                      "address": "2 Billig Avenue, Rouenville",
                      "type": "French",
                      "rating": 10
                    }
                  ],
                  "query": "",
                  "processingTimeMs": "[time]",
                  "limit": 20,
                  "offset": 0,
                  "estimatedTotalHits": 3
                }
                "###);
            },
        )
        .await;
 }
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -17,7 +17,7 @@ bincode = "1.3.3"
 bstr = "1.9.0"
 bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] }
 byteorder = "1.5.0"
-charabia = { version = "0.8.9", default-features = false }
+charabia = { version = "0.8.10", default-features = false }
 concat-arrays = "0.1.2"
 crossbeam-channel = "0.5.11"
 deserr = "0.6.1"
@@ -89,7 +89,7 @@ ureq = { version = "2.9.6", features = ["json"] }
 url = "2.5.0"
 [dev-dependencies]
-mimalloc = { version = "0.1.39", default-features = false }
+mimalloc = { version = "0.1.37", default-features = false }
 big_s = "1.0.2"
 insta = "1.34.0"
 maplit = "1.0.2"
@@ -136,7 +136,11 @@ greek = ["charabia/greek"]
 # allow khmer specialized tokenization
 khmer = ["charabia/khmer"]
 # allow vietnamese specialized tokenization
 vietnamese = ["charabia/vietnamese"]
 # force swedish character recomposition
 swedish-recomposition = ["charabia/swedish-recomposition"]
 # allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
 cuda = ["candle-core/cuda"]
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -9,6 +9,7 @@ use serde_json::Value;
 use thiserror::Error;
 use crate::documents::{self, DocumentsBatchCursorError};
 use crate::thread_pool_no_abort::PanicCatched;
 use crate::{CriterionError, DocumentId, FieldId, Object, SortError};
 pub fn is_reserved_keyword(keyword: &str) -> bool {
@@ -39,17 +40,19 @@ pub enum InternalError {
    Fst(#[from] fst::Error),
    #[error(transparent)]
    DocumentsError(#[from] documents::Error),
-    #[error("Invalid compression type have been specified to grenad.")]
+    #[error("Invalid compression type have been specified to grenad")]
    GrenadInvalidCompressionType,
-    #[error("Invalid grenad file with an invalid version format.")]
+    #[error("Invalid grenad file with an invalid version format")]
    GrenadInvalidFormatVersion,
-    #[error("Invalid merge while processing {process}.")]
+    #[error("Invalid merge while processing {process}")]
    IndexingMergingKeys { process: &'static str },
    #[error("{}", HeedError::InvalidDatabaseTyping)]
    InvalidDatabaseTyping,
    #[error(transparent)]
    RayonThreadPool(#[from] ThreadPoolBuildError),
    #[error(transparent)]
    PanicInThreadPool(#[from] PanicCatched),
    #[error(transparent)]
    SerdeJson(#[from] serde_json::Error),
    #[error(transparent)]
    Serialization(#[from] SerializationError),
@@ -57,9 +60,9 @@ pub enum InternalError {
    Store(#[from] MdbError),
    #[error(transparent)]
    Utf8(#[from] str::Utf8Error),
-    #[error("An indexation process was explicitly aborted.")]
+    #[error("An indexation process was explicitly aborted")]
    AbortedIndexation,
-    #[error("The matching words list contains at least one invalid member.")]
+    #[error("The matching words list contains at least one invalid member")]
    InvalidMatchingWords,
    #[error(transparent)]
    ArroyError(#[from] arroy::Error),
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@@ -21,6 +21,7 @@ pub mod prompt;
 pub mod proximity;
 pub mod score_details;
 mod search;
 mod thread_pool_no_abort;
 pub mod update;
 pub mod vector;
@@ -42,6 +43,7 @@ pub use search::new::{
    SearchLogger, VisualSearchLogger,
 };
 use serde_json::Value;
 pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
 pub use {charabia as tokenizer, heed};
 pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
--- a/milli/src/thread_pool_no_abort.rs
+++ b/milli/src/thread_pool_no_abort.rs
@@ -0,0 +1,69 @@
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::Arc;
 use rayon::{ThreadPool, ThreadPoolBuilder};
 use thiserror::Error;
 /// A rayon ThreadPool wrapper that can catch panics in the pool
 /// and modifies the install function accordingly.
 #[derive(Debug)]
 pub struct ThreadPoolNoAbort {
    thread_pool: ThreadPool,
    /// Set to true if the thread pool catched a panic.
    pool_catched_panic: Arc<AtomicBool>,
 }
 impl ThreadPoolNoAbort {
    pub fn install<OP, R>(&self, op: OP) -> Result<R, PanicCatched>
    where
        OP: FnOnce() -> R + Send,
        R: Send,
    {
        let output = self.thread_pool.install(op);
        // While reseting the pool panic catcher we return an error if we catched one.
        if self.pool_catched_panic.swap(false, Ordering::SeqCst) {
            Err(PanicCatched)
        } else {
            Ok(output)
        }
    }
    pub fn current_num_threads(&self) -> usize {
        self.thread_pool.current_num_threads()
    }
 }
 #[derive(Error, Debug)]
 #[error("A panic occured. Read the logs to find more information about it")]
 pub struct PanicCatched;
 #[derive(Default)]
 pub struct ThreadPoolNoAbortBuilder(ThreadPoolBuilder);
 impl ThreadPoolNoAbortBuilder {
    pub fn new() -> ThreadPoolNoAbortBuilder {
        ThreadPoolNoAbortBuilder::default()
    }
    pub fn thread_name<F>(mut self, closure: F) -> Self
    where
        F: FnMut(usize) -> String + 'static,
    {
        self.0 = self.0.thread_name(closure);
        self
    }
    pub fn num_threads(mut self, num_threads: usize) -> ThreadPoolNoAbortBuilder {
        self.0 = self.0.num_threads(num_threads);
        self
    }
    pub fn build(mut self) -> Result<ThreadPoolNoAbort, rayon::ThreadPoolBuildError> {
        let pool_catched_panic = Arc::new(AtomicBool::new(false));
        self.0 = self.0.panic_handler({
            let catched_panic = pool_catched_panic.clone();
            move |_result| catched_panic.store(true, Ordering::SeqCst)
        });
        Ok(ThreadPoolNoAbort { thread_pool: self.0.build()?, pool_catched_panic })
    }
 }
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@@ -45,7 +45,6 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
    obkv_documents: grenad::Reader<R>,
    indexer: GrenadParameters,
    settings_diff: &InnerIndexSettingsDiff,
    geo_fields_ids: Option<(FieldId, FieldId)>,
 ) -> Result<ExtractedFacetValues> {
    puffin::profile_function!();
@@ -127,12 +126,18 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
                    add_exists.insert(document);
                }
-                let geo_support =
+                let del_geo_support = settings_diff
-                    geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
+                    .old
                    .geo_fields_ids
                    .map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
                let add_geo_support = settings_diff
                    .new
                    .geo_fields_ids
                    .map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
                let del_filterable_values =
-                    del_value.map(|value| extract_facet_values(&value, geo_support));
+                    del_value.map(|value| extract_facet_values(&value, del_geo_support));
                let add_filterable_values =
-                    add_value.map(|value| extract_facet_values(&value, geo_support));
+                    add_value.map(|value| extract_facet_values(&value, add_geo_support));
                // Those closures are just here to simplify things a bit.
                let mut insert_numbers_diff = |del_numbers, add_numbers| {
--- a/milli/src/update/index_documents/extract/extract_geo_points.rs
+++ b/milli/src/update/index_documents/extract/extract_geo_points.rs
@@ -8,6 +8,7 @@ use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
 use crate::error::GeoError;
 use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::index_documents::extract_finite_float_from_value;
 use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
 use crate::{FieldId, InternalError, Result};
 /// Extracts the geographical coordinates contained in each document under the `_geo` field.
@@ -18,7 +19,7 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
    obkv_documents: grenad::Reader<R>,
    indexer: GrenadParameters,
    primary_key_id: FieldId,
-    (lat_fid, lng_fid): (FieldId, FieldId),
+    settings_diff: &InnerIndexSettingsDiff,
 ) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();
@@ -40,23 +41,12 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
            serde_json::from_slice(document_id).unwrap()
        };
-        // first we get the two fields
+        // extract old version
-        match (obkv.get(lat_fid), obkv.get(lng_fid)) {
+        let del_lat_lng =
-            (Some(lat), Some(lng)) => {
+            extract_lat_lng(&obkv, &settings_diff.old, DelAdd::Deletion, document_id)?;
-                let deladd_lat_obkv = KvReaderDelAdd::new(lat);
+        // extract new version
-                let deladd_lng_obkv = KvReaderDelAdd::new(lng);
+        let add_lat_lng =
-
+            extract_lat_lng(&obkv, &settings_diff.new, DelAdd::Addition, document_id)?;
                // then we extract the values
                let del_lat_lng = deladd_lat_obkv
                    .get(DelAdd::Deletion)
                    .zip(deladd_lng_obkv.get(DelAdd::Deletion))
                    .map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
                    .transpose()?;
                let add_lat_lng = deladd_lat_obkv
                    .get(DelAdd::Addition)
                    .zip(deladd_lng_obkv.get(DelAdd::Addition))
                    .map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
                    .transpose()?;
        if del_lat_lng != add_lat_lng {
            let mut obkv = KvWriterDelAdd::memory();
@@ -74,21 +64,31 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
            writer.insert(docid_bytes, bytes)?;
        }
    }
            (None, Some(_)) => {
                return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
            }
            (Some(_), None) => {
                return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
            }
            (None, None) => (),
        }
    }
    writer_into_reader(writer)
 }
 /// Extract the finite floats lat and lng from two bytes slices.
-fn extract_lat_lng(lat: &[u8], lng: &[u8], document_id: impl Fn() -> Value) -> Result<[f64; 2]> {
+fn extract_lat_lng(
    document: &obkv::KvReader<FieldId>,
    settings: &InnerIndexSettings,
    deladd: DelAdd,
    document_id: impl Fn() -> Value,
 ) -> Result<Option<[f64; 2]>> {
    match settings.geo_fields_ids {
        Some((lat_fid, lng_fid)) => {
            let lat = document.get(lat_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
            let lng = document.get(lng_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
            let (lat, lng) = match (lat, lng) {
                (Some(lat), Some(lng)) => (lat, lng),
                (Some(_), None) => {
                    return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
                }
                (None, Some(_)) => {
                    return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
                }
                (None, None) => return Ok(None),
            };
            let lat = extract_finite_float_from_value(
                serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
            )
@@ -98,6 +98,8 @@ fn extract_lat_lng(lat: &[u8], lng: &[u8], document_id: impl Fn() -> Value) -> R
                serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
            )
            .map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
-
+            Ok(Some([lat, lng]))
-    Ok([lat, lng])
+        }
        None => Ok(None),
    }
 }
--- a/milli/src/update/index_documents/extract/extract_vector_points.rs
+++ b/milli/src/update/index_documents/extract/extract_vector_points.rs
@@ -19,7 +19,7 @@ use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::index_documents::helpers::try_split_at;
 use crate::update::settings::InnerIndexSettingsDiff;
 use crate::vector::Embedder;
-use crate::{DocumentId, InternalError, Result, VectorOrArrayOfVectors};
+use crate::{DocumentId, InternalError, Result, ThreadPoolNoAbort, VectorOrArrayOfVectors};
 /// The length of the elements that are always in the buffer when inserting new values.
 const TRUNCATE_SIZE: usize = size_of::<DocumentId>();
@@ -198,11 +198,16 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
                if document_is_kept {
                    // Don't give up if the old prompt was failing
-                    let old_prompt = prompt
+                    let old_prompt = Some(prompt)
-                        .render(obkv, DelAdd::Deletion, old_fields_ids_map)
+                        // TODO: this filter works because we erase the vec database when a embedding setting changes.
-                        .unwrap_or_default();
+                        // When vector pipeline will be optimized, this should be removed.
                        .filter(|_| !settings_diff.reindex_vectors())
                        .map(|p| {
                            p.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or_default()
                        });
                    let new_prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?;
-                    if old_prompt != new_prompt {
+                    if old_prompt.as_ref() != Some(&new_prompt) {
                        let old_prompt = old_prompt.unwrap_or_default();
                        tracing::trace!(
                            "🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}"
                        );
@@ -224,6 +229,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
            &mut manual_vectors_writer,
            &mut key_buffer,
            delta,
            settings_diff,
        )?;
    }
@@ -264,10 +270,15 @@ fn push_vectors_diff(
    manual_vectors_writer: &mut Writer<BufWriter<File>>,
    key_buffer: &mut Vec<u8>,
    delta: VectorStateDelta,
    settings_diff: &InnerIndexSettingsDiff,
 ) -> Result<()> {
    puffin::profile_function!();
    let (must_remove, prompt, (mut del_vectors, mut add_vectors)) = delta.into_values();
-    if must_remove {
+    if must_remove
    // TODO: the below condition works because we erase the vec database when a embedding setting changes.
    // When vector pipeline will be optimized, this should be removed.
    && !settings_diff.reindex_vectors()
    {
        key_buffer.truncate(TRUNCATE_SIZE);
        remove_vectors_writer.insert(&key_buffer, [])?;
    }
@@ -295,6 +306,9 @@ fn push_vectors_diff(
        match eob {
            EitherOrBoth::Both(_, _) => (), // no need to touch anything
            EitherOrBoth::Left(vector) => {
                // TODO: the below condition works because we erase the vec database when a embedding setting changes.
                // When vector pipeline will be optimized, this should be removed.
                if !settings_diff.reindex_vectors() {
                    // We insert only the Del part of the Obkv to inform
                    // that we only want to remove all those vectors.
                    let mut obkv = KvWriterDelAdd::memory();
@@ -302,6 +316,7 @@ fn push_vectors_diff(
                    let bytes = obkv.into_inner()?;
                    manual_vectors_writer.insert(&key_buffer, bytes)?;
                }
            }
            EitherOrBoth::Right(vector) => {
                // We insert only the Add part of the Obkv to inform
                // that we only want to remove all those vectors.
@@ -347,7 +362,7 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
    prompt_reader: grenad::Reader<R>,
    indexer: GrenadParameters,
    embedder: Arc<Embedder>,
-    request_threads: &rayon::ThreadPool,
+    request_threads: &ThreadPoolNoAbort,
 ) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();
    let n_chunks = embedder.chunk_count_hint(); // chunk level parallelism
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@@ -31,7 +31,7 @@ use self::extract_word_position_docids::extract_word_position_docids;
 use super::helpers::{as_cloneable_grenad, CursorClonableMmap, GrenadParameters};
 use super::{helpers, TypedChunk};
 use crate::update::settings::InnerIndexSettingsDiff;
-use crate::{FieldId, Result};
+use crate::{FieldId, Result, ThreadPoolNoAbortBuilder};
 /// Extract data for each databases from obkv documents in parallel.
 /// Send data in grenad file over provided Sender.
@@ -43,7 +43,6 @@ pub(crate) fn data_from_obkv_documents(
    indexer: GrenadParameters,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
    primary_key_id: FieldId,
    geo_fields_ids: Option<(FieldId, FieldId)>,
    settings_diff: Arc<InnerIndexSettingsDiff>,
    max_positions_per_attributes: Option<u32>,
 ) -> Result<()> {
@@ -72,7 +71,6 @@ pub(crate) fn data_from_obkv_documents(
                        indexer,
                        lmdb_writer_sx.clone(),
                        primary_key_id,
                        geo_fields_ids,
                        settings_diff.clone(),
                        max_positions_per_attributes,
                    )
@@ -229,7 +227,7 @@ fn send_original_documents_data(
    let documents_chunk_cloned = original_documents_chunk.clone();
    let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
-    let request_threads = rayon::ThreadPoolBuilder::new()
+    let request_threads = ThreadPoolNoAbortBuilder::new()
        .num_threads(crate::vector::REQUEST_PARALLELISM)
        .thread_name(|index| format!("embedding-request-{index}"))
        .build()?;
@@ -300,7 +298,6 @@ fn send_and_extract_flattened_documents_data(
    indexer: GrenadParameters,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
    primary_key_id: FieldId,
    geo_fields_ids: Option<(FieldId, FieldId)>,
    settings_diff: Arc<InnerIndexSettingsDiff>,
    max_positions_per_attributes: Option<u32>,
 ) -> Result<(
@@ -310,12 +307,13 @@ fn send_and_extract_flattened_documents_data(
    let flattened_documents_chunk =
        flattened_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
-    if let Some(geo_fields_ids) = geo_fields_ids {
+    if settings_diff.run_geo_indexing() {
        let documents_chunk_cloned = flattened_documents_chunk.clone();
        let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
        let settings_diff = settings_diff.clone();
        rayon::spawn(move || {
            let result =
-                extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, geo_fields_ids);
+                extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, &settings_diff);
            let _ = match result {
                Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))),
                Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
@@ -354,7 +352,6 @@ fn send_and_extract_flattened_documents_data(
                    flattened_documents_chunk.clone(),
                    indexer,
                    &settings_diff,
                    geo_fields_ids,
                )?;
                // send fid_docid_facet_numbers_chunk to DB writer
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -33,6 +33,7 @@ use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
 pub use self::transform::{Transform, TransformOutput};
 use crate::documents::{obkv_to_object, DocumentsBatchReader};
 use crate::error::{Error, InternalError, UserError};
 use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder;
 pub use crate::update::index_documents::helpers::CursorClonableMmap;
 use crate::update::{
    IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
@@ -298,18 +299,18 @@ where
        let backup_pool;
        let pool = match self.indexer_config.thread_pool {
            Some(ref pool) => pool,
            #[cfg(not(test))]
            None => {
-                // We initialize a bakcup pool with the default
+                // We initialize a backup pool with the default
                // settings if none have already been set.
-                backup_pool = rayon::ThreadPoolBuilder::new().build()?;
+                #[allow(unused_mut)]
-                &backup_pool
+                let mut pool_builder = ThreadPoolNoAbortBuilder::new();
-            }
+
                #[cfg(test)]
-            None => {
+                {
-                // We initialize a bakcup pool with the default
+                    pool_builder = pool_builder.num_threads(1);
-                // settings if none have already been set.
+                }
-                backup_pool = rayon::ThreadPoolBuilder::new().num_threads(1).build()?;
+
                backup_pool = pool_builder.build()?;
                &backup_pool
            }
        };
@@ -323,28 +324,6 @@ where
        // get the primary key field id
        let primary_key_id = settings_diff.new.fields_ids_map.id(&primary_key).unwrap();
        // get the fid of the `_geo.lat` and `_geo.lng` fields.
        let mut field_id_map = self.index.fields_ids_map(self.wtxn)?;
        // self.index.fields_ids_map($a)? ==>> field_id_map
        let geo_fields_ids = match field_id_map.id("_geo") {
            Some(gfid) => {
                let is_sortable = self.index.sortable_fields_ids(self.wtxn)?.contains(&gfid);
                let is_filterable = self.index.filterable_fields_ids(self.wtxn)?.contains(&gfid);
                // if `_geo` is faceted then we get the `lat` and `lng`
                if is_sortable || is_filterable {
                    let field_ids = field_id_map
                        .insert("_geo.lat")
                        .zip(field_id_map.insert("_geo.lng"))
                        .ok_or(UserError::AttributeLimitReached)?;
                    Some(field_ids)
                } else {
                    None
                }
            }
            None => None,
        };
        let pool_params = GrenadParameters {
            chunk_compression_type: self.indexer_config.chunk_compression_type,
            chunk_compression_level: self.indexer_config.chunk_compression_level,
@@ -411,7 +390,6 @@ where
                        pool_params,
                        lmdb_writer_sx.clone(),
                        primary_key_id,
                        geo_fields_ids,
                        settings_diff.clone(),
                        max_positions_per_attributes,
                    )
@@ -533,7 +511,7 @@ where
            }
            Ok(())
-        })?;
+        }).map_err(InternalError::from)??;
        // We write the field distribution into the main database
        self.index.put_field_distribution(self.wtxn, &field_distribution)?;
@@ -562,7 +540,8 @@ where
                    writer.build(wtxn, &mut rng, None)?;
                }
                Result::Ok(())
-            })?;
+            })
            .map_err(InternalError::from)??;
        }
        self.execute_prefix_databases(
--- a/milli/src/update/indexer_config.rs
+++ b/milli/src/update/indexer_config.rs
@@ -1,5 +1,6 @@
 use grenad::CompressionType;
-use rayon::ThreadPool;
+
 use crate::thread_pool_no_abort::ThreadPoolNoAbort;
 #[derive(Debug)]
 pub struct IndexerConfig {
@@ -9,7 +10,7 @@ pub struct IndexerConfig {
    pub max_memory: Option<usize>,
    pub chunk_compression_type: CompressionType,
    pub chunk_compression_level: Option<u32>,
-    pub thread_pool: Option<ThreadPool>,
+    pub thread_pool: Option<ThreadPoolNoAbort>,
    pub max_positions_per_attributes: Option<u32>,
    pub skip_index_budget: bool,
 }
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -1161,6 +1161,11 @@ impl InnerIndexSettingsDiff {
    pub fn settings_update_only(&self) -> bool {
        self.settings_update_only
    }
    pub fn run_geo_indexing(&self) -> bool {
        self.old.geo_fields_ids != self.new.geo_fields_ids
            || (!self.settings_update_only && self.new.geo_fields_ids.is_some())
    }
 }
 #[derive(Clone)]
@@ -1177,6 +1182,7 @@ pub(crate) struct InnerIndexSettings {
    pub proximity_precision: ProximityPrecision,
    pub embedding_configs: EmbeddingConfigs,
    pub existing_fields: HashSet<String>,
    pub geo_fields_ids: Option<(FieldId, FieldId)>,
 }
 impl InnerIndexSettings {
@@ -1185,7 +1191,7 @@ impl InnerIndexSettings {
        let stop_words = stop_words.map(|sw| sw.map_data(Vec::from).unwrap());
        let allowed_separators = index.allowed_separators(rtxn)?;
        let dictionary = index.dictionary(rtxn)?;
-        let fields_ids_map = index.fields_ids_map(rtxn)?;
+        let mut fields_ids_map = index.fields_ids_map(rtxn)?;
        let user_defined_searchable_fields = index.user_defined_searchable_fields(rtxn)?;
        let user_defined_searchable_fields =
            user_defined_searchable_fields.map(|sf| sf.into_iter().map(String::from).collect());
@@ -1200,6 +1206,24 @@ impl InnerIndexSettings {
            .into_iter()
            .filter_map(|(field, count)| (count != 0).then_some(field))
            .collect();
        // index.fields_ids_map($a)? ==>> fields_ids_map
        let geo_fields_ids = match fields_ids_map.id("_geo") {
            Some(gfid) => {
                let is_sortable = index.sortable_fields_ids(rtxn)?.contains(&gfid);
                let is_filterable = index.filterable_fields_ids(rtxn)?.contains(&gfid);
                // if `_geo` is faceted then we get the `lat` and `lng`
                if is_sortable || is_filterable {
                    let field_ids = fields_ids_map
                        .insert("_geo.lat")
                        .zip(fields_ids_map.insert("_geo.lng"))
                        .ok_or(UserError::AttributeLimitReached)?;
                    Some(field_ids)
                } else {
                    None
                }
            }
            None => None,
        };
        Ok(Self {
            stop_words,
@@ -1214,6 +1238,7 @@ impl InnerIndexSettings {
            proximity_precision,
            embedding_configs,
            existing_fields,
            geo_fields_ids,
        })
    }
--- a/milli/src/vector/error.rs
+++ b/milli/src/vector/error.rs
@@ -3,6 +3,7 @@ use std::path::PathBuf;
 use hf_hub::api::sync::ApiError;
 use crate::error::FaultSource;
 use crate::PanicCatched;
 #[derive(Debug, thiserror::Error)]
 #[error("Error while generating embeddings: {inner}")]
@@ -80,6 +81,8 @@ pub enum EmbedErrorKind {
    OpenAiUnexpectedDimension(usize, usize),
    #[error("no embedding was produced")]
    MissingEmbedding,
    #[error(transparent)]
    PanicInThreadPool(#[from] PanicCatched),
 }
 impl EmbedError {
--- a/milli/src/vector/mod.rs
+++ b/milli/src/vector/mod.rs
@@ -7,6 +7,7 @@ use serde::{Deserialize, Serialize};
 use self::error::{EmbedError, NewEmbedderError};
 use crate::prompt::{Prompt, PromptData};
 use crate::ThreadPoolNoAbort;
 pub mod error;
 pub mod hf;
@@ -254,7 +255,7 @@ impl Embedder {
    pub fn embed_chunks(
        &self,
        text_chunks: Vec<Vec<String>>,
-        threads: &rayon::ThreadPool,
+        threads: &ThreadPoolNoAbort,
    ) -> std::result::Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
        match self {
            Embedder::HuggingFace(embedder) => embedder.embed_chunks(text_chunks),
--- a/milli/src/vector/ollama.rs
+++ b/milli/src/vector/ollama.rs
@@ -3,6 +3,8 @@ use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _};
 use super::error::{EmbedError, EmbedErrorKind, NewEmbedderError, NewEmbedderErrorKind};
 use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions};
 use super::{DistributionShift, Embeddings};
 use crate::error::FaultSource;
 use crate::ThreadPoolNoAbort;
 #[derive(Debug)]
 pub struct Embedder {
@@ -71,11 +73,16 @@ impl Embedder {
    pub fn embed_chunks(
        &self,
        text_chunks: Vec<Vec<String>>,
-        threads: &rayon::ThreadPool,
+        threads: &ThreadPoolNoAbort,
    ) -> Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
-        threads.install(move || {
+        threads
            .install(move || {
                text_chunks.into_par_iter().map(move |chunk| self.embed(chunk)).collect()
            })
            .map_err(|error| EmbedError {
                kind: EmbedErrorKind::PanicInThreadPool(error),
                fault: FaultSource::Bug,
            })?
    }
    pub fn chunk_count_hint(&self) -> usize {
--- a/milli/src/vector/openai.rs
+++ b/milli/src/vector/openai.rs
@@ -4,7 +4,9 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator as _};
 use super::error::{EmbedError, NewEmbedderError};
 use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions};
 use super::{DistributionShift, Embeddings};
 use crate::error::FaultSource;
 use crate::vector::error::EmbedErrorKind;
 use crate::ThreadPoolNoAbort;
 #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
 pub struct EmbedderOptions {
@@ -241,11 +243,16 @@ impl Embedder {
    pub fn embed_chunks(
        &self,
        text_chunks: Vec<Vec<String>>,
-        threads: &rayon::ThreadPool,
+        threads: &ThreadPoolNoAbort,
    ) -> Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
-        threads.install(move || {
+        threads
            .install(move || {
                text_chunks.into_par_iter().map(move |chunk| self.embed(chunk)).collect()
            })
            .map_err(|error| EmbedError {
                kind: EmbedErrorKind::PanicInThreadPool(error),
                fault: FaultSource::Bug,
            })?
    }
    pub fn chunk_count_hint(&self) -> usize {
--- a/milli/src/vector/rest.rs
+++ b/milli/src/vector/rest.rs
@@ -2,9 +2,12 @@ use deserr::Deserr;
 use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _};
 use serde::{Deserialize, Serialize};
 use super::error::EmbedErrorKind;
 use super::{
    DistributionShift, EmbedError, Embedding, Embeddings, NewEmbedderError, REQUEST_PARALLELISM,
 };
 use crate::error::FaultSource;
 use crate::ThreadPoolNoAbort;
 // retrying in case of failure
@@ -158,11 +161,16 @@ impl Embedder {
    pub fn embed_chunks(
        &self,
        text_chunks: Vec<Vec<String>>,
-        threads: &rayon::ThreadPool,
+        threads: &ThreadPoolNoAbort,
    ) -> Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
-        threads.install(move || {
+        threads
            .install(move || {
                text_chunks.into_par_iter().map(move |chunk| self.embed(chunk)).collect()
            })
            .map_err(|error| EmbedError {
                kind: EmbedErrorKind::PanicInThreadPool(error),
                fault: FaultSource::Bug,
            })?
    }
    pub fn chunk_count_hint(&self) -> usize {
--- a/milli/src/vector/settings.rs
+++ b/milli/src/vector/settings.rs
@@ -301,10 +301,14 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
    fn from(value: EmbeddingConfig) -> Self {
        let EmbeddingConfig { embedder_options, prompt } = value;
        match embedder_options {
-            super::EmbedderOptions::HuggingFace(options) => Self {
+            super::EmbedderOptions::HuggingFace(super::hf::EmbedderOptions {
                model,
                revision,
                distribution,
            }) => Self {
                source: Setting::Set(EmbedderSource::HuggingFace),
-                model: Setting::Set(options.model),
+                model: Setting::Set(model),
-                revision: options.revision.map(Setting::Set).unwrap_or_default(),
+                revision: revision.map(Setting::Set).unwrap_or_default(),
                api_key: Setting::NotSet,
                dimensions: Setting::NotSet,
                document_template: Setting::Set(prompt.template),
@@ -314,14 +318,19 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
                path_to_embeddings: Setting::NotSet,
                embedding_object: Setting::NotSet,
                input_type: Setting::NotSet,
-                distribution: options.distribution.map(Setting::Set).unwrap_or_default(),
+                distribution: distribution.map(Setting::Set).unwrap_or_default(),
            },
-            super::EmbedderOptions::OpenAi(options) => Self {
+            super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions {
                api_key,
                embedding_model,
                dimensions,
                distribution,
            }) => Self {
                source: Setting::Set(EmbedderSource::OpenAi),
-                model: Setting::Set(options.embedding_model.name().to_owned()),
+                model: Setting::Set(embedding_model.name().to_owned()),
                revision: Setting::NotSet,
-                api_key: options.api_key.map(Setting::Set).unwrap_or_default(),
+                api_key: api_key.map(Setting::Set).unwrap_or_default(),
-                dimensions: options.dimensions.map(Setting::Set).unwrap_or_default(),
+                dimensions: dimensions.map(Setting::Set).unwrap_or_default(),
                document_template: Setting::Set(prompt.template),
                url: Setting::NotSet,
                query: Setting::NotSet,
@@ -329,29 +338,37 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
                path_to_embeddings: Setting::NotSet,
                embedding_object: Setting::NotSet,
                input_type: Setting::NotSet,
-                distribution: options.distribution.map(Setting::Set).unwrap_or_default(),
+                distribution: distribution.map(Setting::Set).unwrap_or_default(),
            },
-            super::EmbedderOptions::Ollama(options) => Self {
+            super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions {
                embedding_model,
                url,
                api_key,
                distribution,
            }) => Self {
                source: Setting::Set(EmbedderSource::Ollama),
-                model: Setting::Set(options.embedding_model.to_owned()),
+                model: Setting::Set(embedding_model),
                revision: Setting::NotSet,
-                api_key: Setting::NotSet,
+                api_key: api_key.map(Setting::Set).unwrap_or_default(),
                dimensions: Setting::NotSet,
                document_template: Setting::Set(prompt.template),
-                url: Setting::NotSet,
+                url: url.map(Setting::Set).unwrap_or_default(),
                query: Setting::NotSet,
                input_field: Setting::NotSet,
                path_to_embeddings: Setting::NotSet,
                embedding_object: Setting::NotSet,
                input_type: Setting::NotSet,
-                distribution: options.distribution.map(Setting::Set).unwrap_or_default(),
+                distribution: distribution.map(Setting::Set).unwrap_or_default(),
            },
-            super::EmbedderOptions::UserProvided(options) => Self {
+            super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions {
                dimensions,
                distribution,
            }) => Self {
                source: Setting::Set(EmbedderSource::UserProvided),
                model: Setting::NotSet,
                revision: Setting::NotSet,
                api_key: Setting::NotSet,
-                dimensions: Setting::Set(options.dimensions),
+                dimensions: Setting::Set(dimensions),
                document_template: Setting::NotSet,
                url: Setting::NotSet,
                query: Setting::NotSet,
@@ -359,7 +376,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
                path_to_embeddings: Setting::NotSet,
                embedding_object: Setting::NotSet,
                input_type: Setting::NotSet,
-                distribution: options.distribution.map(Setting::Set).unwrap_or_default(),
+                distribution: distribution.map(Setting::Set).unwrap_or_default(),
            },
            super::EmbedderOptions::Rest(super::rest::EmbedderOptions {
                api_key,
--- a/tracing-trace/src/processor/firefox_profiler.rs
+++ b/tracing-trace/src/processor/firefox_profiler.rs
@@ -217,9 +217,7 @@ fn add_memory_samples(
    memory_counters: &mut Option<MemoryCounterHandles>,
    last_memory: &mut MemoryStats,
 ) -> Option<MemoryStats> {
-    let Some(stats) = memory else {
+    let stats = memory?;
        return None;
    };
    let memory_counters =
        memory_counters.get_or_insert_with(|| MemoryCounterHandles::new(profile, main));
--- a/workloads/movies-subset-hf-embeddings.json
+++ b/workloads/movies-subset-hf-embeddings.json
@@ -0,0 +1,68 @@
 {
  "name": "movies-subset-hf-embeddings",
  "run_count": 5,
  "extra_cli_args": [
    "--max-indexing-threads=4"
  ],
  "assets": {
    "movies-100.json": {
      "local_location": null,
      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies-100.json",
      "sha256": "d215e395e4240f12f03b8f1f68901eac82d9e7ded5b462cbf4a6b8efde76c6c6"
    }
  },
  "commands": [
    {
      "route": "experimental-features",
      "method": "PATCH",
      "body": {
        "inline": {
          "vectorStore": true
        }
      },
      "synchronous": "DontWait"
    },
    {
      "route": "indexes/movies/settings",
      "method": "PATCH",
      "body": {
        "inline": {
          "searchableAttributes": [
            "title",
            "overview"
          ],
          "filterableAttributes": [
            "genres",
            "release_date"
          ],
          "sortableAttributes": [
            "release_date"
          ]
        }
      },
      "synchronous": "WaitForTask"
    },
    {
      "route": "indexes/movies/settings",
      "method": "PATCH",
      "body": {
        "inline": {
          "embedders": {
            "default": {
              "source": "huggingFace"
            }
          }
        }
      },
      "synchronous": "WaitForTask"
    },
    {
      "route": "indexes/movies/documents",
      "method": "POST",
      "body": {
        "asset": "movies-100.json"
      },
      "synchronous": "WaitForTask"
    }
  ]
 }
--- a/workloads/settings-add-embeddings.json
+++ b/workloads/settings-add-embeddings.json
@@ -0,0 +1,72 @@
 {
  "name": "settings-add-embeddings-hf",
  "run_count": 5,
  "extra_cli_args": [
    "--max-indexing-threads=4"
  ],
  "assets": {
    "movies-100.json": {
      "local_location": null,
      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies-100.json",
      "sha256": "d215e395e4240f12f03b8f1f68901eac82d9e7ded5b462cbf4a6b8efde76c6c6"
    }
  },
  "commands": [
    {
      "route": "experimental-features",
      "method": "PATCH",
      "body": {
        "inline": {
          "vectorStore": true
        }
      },
      "synchronous": "DontWait"
    },
    {
      "route": "indexes/movies/settings",
      "method": "PATCH",
      "body": {
        "inline": {
          "searchableAttributes": [
            "title",
            "overview"
          ],
          "filterableAttributes": [
            "genres",
            "release_date"
          ],
          "sortableAttributes": [
            "release_date"
          ]
        }
      },
      "synchronous": "DontWait"
    },
    {
      "route": "indexes/movies/documents",
      "method": "POST",
      "body": {
        "asset": "movies-100.json"
      },
      "synchronous": "WaitForTask"
    },
    {
      "route": "indexes/movies/settings",
      "method": "PATCH",
      "body": {
        "inline": {
          "embedders": {
            "default": {
              "source": "huggingFace",
              "model": null,
              "revision": null,
              "documentTemplate": null,
              "distribution": null
            }
          }
        }
      },
      "synchronous": "WaitForTask"
    }
  ]
 }
Author	SHA1	Message	Date
Louis Dureuil	036251dad5	Revert mimalloc to 0.1.37	2024-06-05 14:21:01 +02:00
meili-bors[bot]	ba75d23bfe	Merge #4648 4648: Update version for the next release (v1.8.1) in Cargo.toml r=ManyTheFish a=meili-bot ⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Co-authored-by: ManyTheFish <ManyTheFish@users.noreply.github.com>	2024-05-21 16:38:36 +00:00
ManyTheFish	7fbb3bf8e8	Update version for the next release (v1.8.1) in Cargo.toml	2024-05-21 15:13:03 +00:00
meili-bors[bot]	9066a446a3	Merge #4642 4642: Index the _geo fields when changing the setting while there is already documents in the DB r=ManyTheFish a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4640 Fixes https://github.com/meilisearch/meilisearch/issues/4628 ## What does this PR do? - Add an integration test that first indexes the document and then changes the settings - Fix `extract_geo_point` by detecting if the `_geo` field has been faceted in this setting change and index all documents Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-05-21 13:16:11 +00:00
ManyTheFish	f762307838	Fix clippy	2024-05-21 13:44:20 +02:00
ManyTheFish	3e94a90722	Fixes	2024-05-21 13:39:46 +02:00
ManyTheFish	fc7e817221	Index geo points based on the settings differences	2024-05-20 12:27:26 +02:00
Tamo	0f78703b85	add a test reproducing the bug	2024-05-20 10:58:08 +02:00
meili-bors[bot]	c668043c4f	Merge #4617 4617: Destructure `EmbedderOptions` so we don't miss some options r=dureuill a=dureuill # Pull Request ## Related issue #4595 was caused by the code not destructuring the embedder options. ## What does this PR do? This PR adds the missing `url` parameter for ollama, and makes sure similar issue cannot happen in the future Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-05-02 14:55:32 +00:00
Louis Dureuil	5a305bfdea	Remove unused struct	2024-05-02 16:14:37 +02:00
Louis Dureuil	f4dd73ec8c	Destructure EmbedderOptions so we don't miss some options	2024-05-02 15:39:36 +02:00
meili-bors[bot]	66dce4600d	Merge #4603 4603: Update charabia v0.8.10 r=Kerollmops a=ManyTheFish - Update Charabia v0.8.10 - Add `swedish-recomposition` as an optional feature flag Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-04-30 13:04:02 +00:00
ManyTheFish	fe51ceca6d	Update lock file	2024-04-30 14:33:37 +02:00
ManyTheFish	88174b8ae4	Update charabia v0.8.10	2024-04-30 14:30:23 +02:00
meili-bors[bot]	ebca29f3de	Merge #4597 4597: Fix embeddings settings update r=ManyTheFish a=ManyTheFish # Pull Request - add some conditions reducing the work done when changing the settings - add some benchmarks on embedders ## Related issue Fixes #4585 Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-04-25 16:37:28 +00:00
meili-bors[bot]	c793b6ef6d	Merge #4600 4600: Fix embedders api r=ManyTheFish a=ManyTheFish # Pull Request ## Related issue Fixes #4594 Fixes #4595 Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-04-25 13:16:33 +00:00
ManyTheFish	cbbfff3594	Remove debuging prints	2024-04-25 10:37:18 +02:00
ManyTheFish	dbcf50589b	Fix clippy	2024-04-25 10:36:10 +02:00
meili-bors[bot]	3e5cd027a5	Merge #4593 4593: Stop crashing when panic occurs in thread pool r=ManyTheFish a=Kerollmops This PR fixes #4362 by introducing a new boolean to catch panics in the rayon thread pool. The boolean is read after performing the operations in rayon, and the indexation process is stopped. This first version doesn't expose the panic message but marks the task as failed. The current implementation exposes a `ThreadPoolNoAbort` wrapper. The `rayon::ThreadPool` has been wrapped to check that nothing went wrong after running the `ThreadPool::install` function. An atomic boolean and some `store/load` logic make the system work efficiently. Before, Meilisearch was completely crashing... <img width="1563" alt="Capture d’écran 2024-04-22 à 15 49 02" src="https://github.com/meilisearch/meilisearch/assets/3610253/ce114917-a881-4fbb-85df-c195fcf0c7cb"> Now, it handles the panics correctly and marks the task as failed. <img width="1558" alt="Capture d’écran 2024-04-22 à 15 42 14" src="https://github.com/meilisearch/meilisearch/assets/3610253/8bd031ef-5e8f-4a12-a91e-c823597a2344"> Co-authored-by: Clément Renault <clement@meilisearch.com>	2024-04-24 16:27:08 +00:00
ManyTheFish	7468c1cf8d	Introduce WildcardSetting that are serialized as wildcards by default	2024-04-24 18:15:03 +02:00
Clément Renault	d4aeff92d0	Introduce the ThreadPoolNoAbort wrapper	2024-04-24 16:40:12 +02:00
ManyTheFish	e87cb373de	Avoid intermediate serializing when displaying settings	2024-04-24 12:33:07 +02:00
ManyTheFish	9b76501875	Display set API key for Ollama embedder	2024-04-24 12:33:07 +02:00
ManyTheFish	6247e95dc3	Add benchmark for embeddings	2024-04-23 17:42:20 +02:00
Clément Renault	b3173d0423	Remove useless dots in the error messages	2024-04-22 18:09:33 +02:00
Clément Renault	96cc5319c8	Introduce a new internal error type to categorize panics	2024-04-22 18:09:33 +02:00
Clément Renault	0c7003c5df	Introduce an atomic to catch panics in thread pools	2024-04-22 18:09:33 +02:00
ManyTheFish	a1aa999026	Add conditions reducing wrok	2024-04-22 14:18:35 +02:00
meili-bors[bot]	aa0bbbb246	Merge #4578 4578: Remove useless analytics r=ManyTheFish a=irevoire # Pull Request ## Related issue Fixes #4577 ## What does this PR do? Remove the following analytics: - `Health Seen` - `Stats Seen` - `Task Seen` - `Version Seen` Co-authored-by: Tamo <tamo@meilisearch.com>	2024-04-18 13:30:42 +00:00
Tamo	2dd9dd6d0a	remove the Health Seen analytic	2024-04-17 11:43:40 +02:00
Tamo	e1f27de51a	remove the Stats Seen analytic	2024-04-16 18:49:41 +02:00
Tamo	abae31aee0	remove the Task Seen analytic	2024-04-16 18:48:10 +02:00
Tamo	70ce0095ea	remove the Version Seen analytic	2024-04-16 18:48:03 +02:00