Enable swedish recomposition

Merge #4798
4798: Update version for the next release (v1.8.4) in Cargo.toml r=dureuill a=meili-bot ⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Co-authored-by: dureuill <dureuill@users.noreply.github.com>
2025-12-08 05:35:42 +00:00 · 2024-07-17 17:03:25 +02:00 · 2024-07-15 14:09:11 +00:00 · 2024-07-15 13:26:30 +00:00 · 2024-07-15 10:24:00 +00:00 · 2024-07-15 12:04:14 +02:00
47 changed files with 572 additions and 233 deletions
--- a/.github/workflows/flaky-tests.yml
+++ b/.github/workflows/flaky-tests.yml
@@ -1,4 +1,6 @@
 name: Look for flaky tests
+env:
+  ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
 on:
  workflow_dispatch:
  schedule:
--- a/.github/workflows/fuzzer-indexing.yml
+++ b/.github/workflows/fuzzer-indexing.yml
@@ -1,5 +1,6 @@
 name: Run the indexing fuzzer
-
+env:
+  ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
 on:
  push:
    branches:
--- a/.github/workflows/publish-apt-brew-pkg.yml
+++ b/.github/workflows/publish-apt-brew-pkg.yml
@@ -15,6 +15,8 @@ jobs:

  debian:
    name: Publish debian packagge
+    env:
+      ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
    runs-on: ubuntu-latest
    needs: check-version
    container:
--- a/.github/workflows/publish-binaries.yml
+++ b/.github/workflows/publish-binaries.yml
@@ -35,6 +35,8 @@ jobs:
  publish-linux:
    name: Publish binary for Linux
    runs-on: ubuntu-latest
+    env:
+      ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
    needs: check-version
    container:
      # Use ubuntu-18.04 to compile with glibc 2.27
@@ -132,6 +134,8 @@ jobs:
    name: Publish binary for aarch64
    runs-on: ubuntu-latest
    needs: check-version
+    env:
+      ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
    container:
      # Use ubuntu-18.04 to compile with glibc 2.27
      image: ubuntu:18.04
--- a/.github/workflows/test-suite.yml
+++ b/.github/workflows/test-suite.yml
@@ -21,6 +21,8 @@ jobs:
  test-linux:
    name: Tests on ubuntu-18.04
    runs-on: ubuntu-latest
+    env:
+      ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
    container:
      # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
      image: ubuntu:18.04
@@ -77,6 +79,8 @@ jobs:
  test-all-features:
    name: Tests almost all features
    runs-on: ubuntu-latest
+    env:
+      ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
    container:
      # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
      image: ubuntu:18.04
@@ -100,6 +104,8 @@ jobs:

  test-disabled-tokenization:
    name: Test disabled tokenization
+    env:
+      ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
    runs-on: ubuntu-latest
    container:
      image: ubuntu:18.04
@@ -127,6 +133,8 @@ jobs:
  # We run tests in debug also, to make sure that the debug_assertions are hit
  test-debug:
    name: Run tests in debug
+    env:
+      ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
    runs-on: ubuntu-latest
    container:
      # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -494,7 +494,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"

 [[package]]
 name = "benchmarks"
-version = "1.8.0"
+version = "1.8.4"
 dependencies = [
 "anyhow",
 "bytes",
@@ -639,7 +639,7 @@ dependencies = [

 [[package]]
 name = "build-info"
-version = "1.8.0"
+version = "1.8.4"
 dependencies = [
 "anyhow",
 "time",
@@ -889,9 +889,9 @@ dependencies = [

 [[package]]
 name = "charabia"
-version = "0.8.9"
+version = "0.8.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6a65052f308636e5d5e1777f0dbc07919f5fbac24b6c8ad3e140472e5520de9"
+checksum = "933f20f2269b24d32fd5503e7b3c268af902190daf8d9d2b73ed2e75d77c00b4"
 dependencies = [
 "aho-corasick",
 "cow-utils",
@@ -1539,7 +1539,7 @@ dependencies = [

 [[package]]
 name = "dump"
-version = "1.8.0"
+version = "1.8.4"
 dependencies = [
 "anyhow",
 "big_s",
@@ -1787,7 +1787,7 @@ dependencies = [

 [[package]]
 name = "file-store"
-version = "1.8.0"
+version = "1.8.4"
 dependencies = [
 "faux",
 "tempfile",
@@ -1810,7 +1810,7 @@ dependencies = [

 [[package]]
 name = "filter-parser"
-version = "1.8.0"
+version = "1.8.4"
 dependencies = [
 "insta",
 "nom",
@@ -1830,7 +1830,7 @@ dependencies = [

 [[package]]
 name = "flatten-serde-json"
-version = "1.8.0"
+version = "1.8.4"
 dependencies = [
 "criterion",
 "serde_json",
@@ -1948,7 +1948,7 @@ dependencies = [

 [[package]]
 name = "fuzzers"
-version = "1.8.0"
+version = "1.8.4"
 dependencies = [
 "arbitrary",
 "clap",
@@ -2442,7 +2442,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"

 [[package]]
 name = "index-scheduler"
-version = "1.8.0"
+version = "1.8.4"
 dependencies = [
 "anyhow",
 "big_s",
@@ -2638,7 +2638,7 @@ dependencies = [

 [[package]]
 name = "json-depth-checker"
-version = "1.8.0"
+version = "1.8.4"
 dependencies = [
 "criterion",
 "serde_json",
@@ -3275,7 +3275,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"

 [[package]]
 name = "meili-snap"
-version = "1.8.0"
+version = "1.8.4"
 dependencies = [
 "insta",
 "md5",
@@ -3284,7 +3284,7 @@ dependencies = [

 [[package]]
 name = "meilisearch"
-version = "1.8.0"
+version = "1.8.4"
 dependencies = [
 "actix-cors",
 "actix-http",
@@ -3377,7 +3377,7 @@ dependencies = [

 [[package]]
 name = "meilisearch-auth"
-version = "1.8.0"
+version = "1.8.4"
 dependencies = [
 "base64 0.21.7",
 "enum-iterator",
@@ -3396,7 +3396,7 @@ dependencies = [

 [[package]]
 name = "meilisearch-types"
-version = "1.8.0"
+version = "1.8.4"
 dependencies = [
 "actix-web",
 "anyhow",
@@ -3426,7 +3426,7 @@ dependencies = [

 [[package]]
 name = "meilitool"
-version = "1.8.0"
+version = "1.8.4"
 dependencies = [
 "anyhow",
 "clap",
@@ -3465,7 +3465,7 @@ dependencies = [

 [[package]]
 name = "milli"
-version = "1.8.0"
+version = "1.8.4"
 dependencies = [
 "arroy",
 "big_s",
@@ -3906,7 +3906,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"

 [[package]]
 name = "permissive-json-pointer"
-version = "1.8.0"
+version = "1.8.4"
 dependencies = [
 "big_s",
 "serde_json",
@@ -6074,7 +6074,7 @@ dependencies = [

 [[package]]
 name = "xtask"
-version = "1.8.0"
+version = "1.8.4"
 dependencies = [
 "anyhow",
 "build-info",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,7 +22,7 @@ members = [
 ]

 [workspace.package]
-version = "1.8.0"
+version = "1.8.4"
 authors = [
    "Quentin de Quelen <quentin@dequelen.me>",
    "Clément Renault <clement@meilisearch.com>",
--- a/2
+++ b/2
@@ -17,7 +17,7 @@ RUN     set -eux; \
        if [ "$apkArch" = "aarch64" ]; then \
            export JEMALLOC_SYS_WITH_LG_PAGE=16; \
        fi && \
-        cargo build --release -p meilisearch -p meilitool
+        cargo build --release -p meilisearch -p meilitool --features "swedish-recomposition"

 # Run
 FROM    alpine:3.16
--- a/dump/src/lib.rs
+++ b/dump/src/lib.rs
@@ -256,8 +256,8 @@ pub(crate) mod test {

    pub fn create_test_settings() -> Settings<Checked> {
        let settings = Settings {
-            displayed_attributes: Setting::Set(vec![S("race"), S("name")]),
-            searchable_attributes: Setting::Set(vec![S("name"), S("race")]),
+            displayed_attributes: Setting::Set(vec![S("race"), S("name")]).into(),
+            searchable_attributes: Setting::Set(vec![S("name"), S("race")]).into(),
            filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }),
            sortable_attributes: Setting::Set(btreeset! { S("age") }),
            ranking_rules: Setting::NotSet,
--- a/dump/src/reader/compat/v5_to_v6.rs
+++ b/dump/src/reader/compat/v5_to_v6.rs
@@ -315,8 +315,8 @@ impl From<v5::ResponseError> for v6::ResponseError {
 impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
    fn from(settings: v5::Settings<T>) -> Self {
        v6::Settings {
-            displayed_attributes: settings.displayed_attributes.into(),
-            searchable_attributes: settings.searchable_attributes.into(),
+            displayed_attributes: v6::Setting::from(settings.displayed_attributes).into(),
+            searchable_attributes: v6::Setting::from(settings.searchable_attributes).into(),
            filterable_attributes: settings.filterable_attributes.into(),
            sortable_attributes: settings.sortable_attributes.into(),
            ranking_rules: {
--- a/dump/src/reader/v3/settings.rs
+++ b/dump/src/reader/v3/settings.rs
@@ -152,6 +152,7 @@ impl Settings<Unchecked> {
 }

 #[derive(Debug, Clone, Deserialize)]
+#[allow(dead_code)] // otherwise rustc complains that the fields go unused
 #[cfg_attr(test, derive(serde::Serialize))]
 #[serde(deny_unknown_fields)]
 #[serde(rename_all = "camelCase")]
--- a/dump/src/reader/v4/settings.rs
+++ b/dump/src/reader/v4/settings.rs
@@ -182,6 +182,7 @@ impl Settings<Unchecked> {
    }
 }

+#[allow(dead_code)] // otherwise rustc complains that the fields go unused
 #[derive(Debug, Clone, Deserialize)]
 #[cfg_attr(test, derive(serde::Serialize))]
 #[serde(deny_unknown_fields)]
--- a/dump/src/reader/v5/tasks.rs
+++ b/dump/src/reader/v5/tasks.rs
@@ -200,6 +200,7 @@ impl std::ops::Deref for IndexUid {
    }
 }

+#[allow(dead_code)] // otherwise rustc complains that the fields go unused
 #[derive(Debug)]
 #[cfg_attr(test, derive(serde::Serialize))]
 #[cfg_attr(test, serde(rename_all = "camelCase"))]
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -914,8 +914,34 @@ impl IndexScheduler {
                        if self.must_stop_processing.get() {
                            return Err(Error::AbortedTask);
                        }
-                        let (_id, doc) = ret?;
-                        let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
+                        let (id, doc) = ret?;
+                        let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
+
+                        'inject_vectors: {
+                            let embeddings = index.embeddings(&rtxn, id)?;
+
+                            if embeddings.is_empty() {
+                                break 'inject_vectors;
+                            }
+
+                            let vectors = document
+                                .entry("_vectors".to_owned())
+                                .or_insert(serde_json::Value::Object(Default::default()));
+
+                            let serde_json::Value::Object(vectors) = vectors else {
+                                break 'inject_vectors;
+                            };
+
+                            for (embedder_name, embeddings) in embeddings {
+                                vectors.entry(embedder_name).or_insert_with(|| {
+                                    serde_json::json!({
+                                        "embeddings": embeddings,
+                                        "regenerate": true
+                                    })
+                                });
+                            }
+                        }
+
                        index_dumper.push_document(&document)?;
                    }

--- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap
+++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued [0,]
--- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued []
--- a/meilisearch-types/Cargo.toml
+++ b/meilisearch-types/Cargo.toml
@@ -57,3 +57,5 @@ greek = ["milli/greek"]
 khmer = ["milli/khmer"]
 # allow vietnamese specialized tokenization
 vietnamese = ["milli/vietnamese"]
+# force swedish character recomposition
+swedish-recomposition = ["milli/swedish-recomposition"]
--- a/meilisearch-types/src/settings.rs
+++ b/meilisearch-types/src/settings.rs
@@ -3,7 +3,7 @@ use std::convert::Infallible;
 use std::fmt;
 use std::marker::PhantomData;
 use std::num::NonZeroUsize;
-use std::ops::ControlFlow;
+use std::ops::{ControlFlow, Deref};
 use std::str::FromStr;

 use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
@@ -143,21 +143,13 @@ impl MergeWithError<milli::CriterionError> for DeserrJsonError<InvalidSettingsRa
 )]
 #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
 pub struct Settings<T> {
-    #[serde(
-        default,
-        serialize_with = "serialize_with_wildcard",
-        skip_serializing_if = "Setting::is_not_set"
-    )]
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[deserr(default, error = DeserrJsonError<InvalidSettingsDisplayedAttributes>)]
-    pub displayed_attributes: Setting<Vec<String>>,
+    pub displayed_attributes: WildcardSetting,

-    #[serde(
-        default,
-        serialize_with = "serialize_with_wildcard",
-        skip_serializing_if = "Setting::is_not_set"
-    )]
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[deserr(default, error = DeserrJsonError<InvalidSettingsSearchableAttributes>)]
-    pub searchable_attributes: Setting<Vec<String>>,
+    pub searchable_attributes: WildcardSetting,

    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[deserr(default, error = DeserrJsonError<InvalidSettingsFilterableAttributes>)]
@@ -251,8 +243,8 @@ impl<T> Settings<T> {
 impl Settings<Checked> {
    pub fn cleared() -> Settings<Checked> {
        Settings {
-            displayed_attributes: Setting::Reset,
-            searchable_attributes: Setting::Reset,
+            displayed_attributes: Setting::Reset.into(),
+            searchable_attributes: Setting::Reset.into(),
            filterable_attributes: Setting::Reset,
            sortable_attributes: Setting::Reset,
            ranking_rules: Setting::Reset,
@@ -319,7 +311,7 @@ impl Settings<Checked> {

 impl Settings<Unchecked> {
    pub fn check(self) -> Settings<Checked> {
-        let displayed_attributes = match self.displayed_attributes {
+        let displayed_attributes = match self.displayed_attributes.0 {
            Setting::Set(fields) => {
                if fields.iter().any(|f| f == "*") {
                    Setting::Reset
@@ -330,7 +322,7 @@ impl Settings<Unchecked> {
            otherwise => otherwise,
        };

-        let searchable_attributes = match self.searchable_attributes {
+        let searchable_attributes = match self.searchable_attributes.0 {
            Setting::Set(fields) => {
                if fields.iter().any(|f| f == "*") {
                    Setting::Reset
@@ -342,8 +334,8 @@ impl Settings<Unchecked> {
        };

        Settings {
-            displayed_attributes,
-            searchable_attributes,
+            displayed_attributes: displayed_attributes.into(),
+            searchable_attributes: searchable_attributes.into(),
            filterable_attributes: self.filterable_attributes,
            sortable_attributes: self.sortable_attributes,
            ranking_rules: self.ranking_rules,
@@ -412,13 +404,13 @@ pub fn apply_settings_to_builder(
        _kind,
    } = settings;

-    match searchable_attributes {
+    match searchable_attributes.deref() {
        Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
        Setting::Reset => builder.reset_searchable_fields(),
        Setting::NotSet => (),
    }

-    match displayed_attributes {
+    match displayed_attributes.deref() {
        Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
        Setting::Reset => builder.reset_displayed_fields(),
        Setting::NotSet => (),
@@ -690,11 +682,13 @@ pub fn settings(
        displayed_attributes: match displayed_attributes {
            Some(attrs) => Setting::Set(attrs),
            None => Setting::Reset,
-        },
+        }
+        .into(),
        searchable_attributes: match searchable_attributes {
            Some(attrs) => Setting::Set(attrs),
            None => Setting::Reset,
-        },
+        }
+        .into(),
        filterable_attributes: Setting::Set(filterable_attributes),
        sortable_attributes: Setting::Set(sortable_attributes),
        ranking_rules: Setting::Set(criteria.iter().map(|c| c.clone().into()).collect()),
@@ -848,6 +842,41 @@ impl From<ProximityPrecisionView> for ProximityPrecision {
    }
 }

+#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)]
+pub struct WildcardSetting(Setting<Vec<String>>);
+
+impl From<Setting<Vec<String>>> for WildcardSetting {
+    fn from(setting: Setting<Vec<String>>) -> Self {
+        Self(setting)
+    }
+}
+
+impl Serialize for WildcardSetting {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        serialize_with_wildcard(&self.0, serializer)
+    }
+}
+
+impl<E: deserr::DeserializeError> Deserr<E> for WildcardSetting {
+    fn deserialize_from_value<V: deserr::IntoValue>(
+        value: deserr::Value<V>,
+        location: ValuePointerRef<'_>,
+    ) -> Result<Self, E> {
+        Ok(Self(Setting::deserialize_from_value(value, location)?))
+    }
+}
+
+impl std::ops::Deref for WildcardSetting {
+    type Target = Setting<Vec<String>>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
 #[cfg(test)]
 pub(crate) mod test {
    use super::*;
@@ -856,8 +885,8 @@ pub(crate) mod test {
    fn test_setting_check() {
        // test no changes
        let settings = Settings {
-            displayed_attributes: Setting::Set(vec![String::from("hello")]),
-            searchable_attributes: Setting::Set(vec![String::from("hello")]),
+            displayed_attributes: Setting::Set(vec![String::from("hello")]).into(),
+            searchable_attributes: Setting::Set(vec![String::from("hello")]).into(),
            filterable_attributes: Setting::NotSet,
            sortable_attributes: Setting::NotSet,
            ranking_rules: Setting::NotSet,
@@ -883,8 +912,9 @@ pub(crate) mod test {
        // test wildcard
        // test no changes
        let settings = Settings {
-            displayed_attributes: Setting::Set(vec![String::from("*")]),
-            searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")]),
+            displayed_attributes: Setting::Set(vec![String::from("*")]).into(),
+            searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")])
+                .into(),
            filterable_attributes: Setting::NotSet,
            sortable_attributes: Setting::NotSet,
            ranking_rules: Setting::NotSet,
@@ -904,7 +934,7 @@ pub(crate) mod test {
        };

        let checked = settings.check();
-        assert_eq!(checked.displayed_attributes, Setting::Reset);
-        assert_eq!(checked.searchable_attributes, Setting::Reset);
+        assert_eq!(checked.displayed_attributes, Setting::Reset.into());
+        assert_eq!(checked.searchable_attributes, Setting::Reset.into());
    }
 }
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -156,6 +156,7 @@ thai = ["meilisearch-types/thai"]
 greek = ["meilisearch-types/greek"]
 khmer = ["meilisearch-types/khmer"]
 vietnamese = ["meilisearch-types/vietnamese"]
+swedish-recomposition = ["meilisearch-types/swedish-recomposition"]

 [package.metadata.mini-dashboard]
 assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.13/build.zip"
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@@ -419,7 +419,41 @@ fn import_dump(
        let file = tempfile::tempfile()?;
        let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
        for document in index_reader.documents()? {
-            builder.append_json_object(&document?)?;
+            let mut document = document?;
+
+            'remove_injected_vectors: {
+                let Some(vectors) = document.get_mut("_vectors") else {
+                    break 'remove_injected_vectors;
+                };
+
+                let Some(vectors) = vectors.as_object_mut() else { break 'remove_injected_vectors };
+
+                vectors.retain(|_embedder, embedding_object| {
+                    // don't touch values that aren't objects
+                    let Some(embedding_object) = embedding_object.as_object() else {
+                        return true;
+                    };
+
+                    let mut has_regenerate_true = false;
+                    for (field, value) in embedding_object {
+                        match (field.as_str(), value) {
+                            // detected regenerate : true
+                            // if we don't have any superfluous field, we'll remove the entire entry
+                            ("regenerate", serde_json::Value::Bool(true)) => {
+                                has_regenerate_true = true;
+                            }
+                            // ignore embeddings
+                            ("embeddings", _) => continue,
+                            // any other field: immediately retain the entry
+                            _ => return true,
+                        }
+                    }
+                    // retain the entry unless it has regenerate: true
+                    !has_regenerate_true
+                })
+            }
+
+            builder.append_json_object(&document)?;
        }

        // This flush the content of the batch builder.
--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@@ -13,6 +13,7 @@ use byte_unit::{Byte, ByteError};
 use clap::Parser;
 use meilisearch_types::features::InstanceTogglableFeatures;
 use meilisearch_types::milli::update::IndexerConfig;
+use meilisearch_types::milli::ThreadPoolNoAbortBuilder;
 use rustls::server::{
    AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, ServerSessionMemoryCache,
 };
@@ -666,7 +667,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
    type Error = anyhow::Error;

    fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> {
-        let thread_pool = rayon::ThreadPoolBuilder::new()
+        let thread_pool = ThreadPoolNoAbortBuilder::new()
            .thread_name(|index| format!("indexing-thread:{index}"))
            .num_threads(*other.max_indexing_threads)
            .build()?;
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@@ -137,10 +137,8 @@ macro_rules! make_setting_route {
                let settings = settings(&index, &rtxn, meilisearch_types::settings::SecretPolicy::HideSecrets)?;

                debug!(returns = ?settings, "Update settings");
-                let mut json = serde_json::json!(&settings);
-                let val = json[$camelcase_attr].take();

-                Ok(HttpResponse::Ok().json(val))
+                Ok(HttpResponse::Ok().json(settings.$attr))
            }

            pub fn resources() -> Resource {
--- a/meilisearch/src/routes/mod.rs
+++ b/meilisearch/src/routes/mod.rs
@@ -367,12 +367,6 @@ async fn get_version(
    })
 }

-#[derive(Serialize)]
-struct KeysResponse {
-    private: Option<String>,
-    public: Option<String>,
-}
-
 pub async fn get_health(
    index_scheduler: Data<IndexScheduler>,
    auth_controller: Data<AuthController>,
--- a/meilisearch/src/search_queue.rs
+++ b/meilisearch/src/search_queue.rs
@@ -40,8 +40,9 @@ pub struct Permit {

 impl Drop for Permit {
    fn drop(&mut self) {
+        let sender = self.sender.clone();
        // if the channel is closed then the whole instance is down
-        let _ = futures::executor::block_on(self.sender.send(()));
+        std::mem::drop(tokio::spawn(async move { sender.send(()).await }));
    }
 }

--- a/meilisearch/tests/search/geo.rs
+++ b/meilisearch/tests/search/geo.rs
@@ -117,3 +117,69 @@ async fn geo_bounding_box_with_string_and_number() {
        )
        .await;
 }
+
+#[actix_rt::test]
+async fn bug_4640() {
+    // https://github.com/meilisearch/meilisearch/issues/4640
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, None).await;
+    index.update_settings_filterable_attributes(json!(["_geo"])).await;
+    let (ret, _code) = index.update_settings_sortable_attributes(json!(["_geo"])).await;
+    index.wait_task(ret.uid()).await;
+
+    // Sort the document with the second one first
+    index
+        .search(
+            json!({
+                "sort": ["_geoPoint(45.4777599, 9.1967508):asc"],
+            }),
+            |response, code| {
+                assert_eq!(code, 200, "{}", response);
+                snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
+                {
+                  "hits": [
+                    {
+                      "id": 2,
+                      "name": "La Bella Italia",
+                      "address": "456 Elm Street, Townsville",
+                      "type": "Italian",
+                      "rating": 9,
+                      "_geo": {
+                        "lat": "45.4777599",
+                        "lng": "9.1967508"
+                      }
+                    },
+                    {
+                      "id": 1,
+                      "name": "Taco Truck",
+                      "address": "444 Salsa Street, Burritoville",
+                      "type": "Mexican",
+                      "rating": 9,
+                      "_geo": {
+                        "lat": 34.0522,
+                        "lng": -118.2437
+                      },
+                      "_geoDistance": 9714063
+                    },
+                    {
+                      "id": 3,
+                      "name": "Crêpe Truck",
+                      "address": "2 Billig Avenue, Rouenville",
+                      "type": "French",
+                      "rating": 10
+                    }
+                  ],
+                  "query": "",
+                  "processingTimeMs": "[time]",
+                  "limit": 20,
+                  "offset": 0,
+                  "estimatedTotalHits": 3
+                }
+                "###);
+            },
+        )
+        .await;
+}
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -17,7 +17,7 @@ bincode = "1.3.3"
 bstr = "1.9.0"
 bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] }
 byteorder = "1.5.0"
-charabia = { version = "0.8.9", default-features = false }
+charabia = { version = "0.8.10", default-features = false }
 concat-arrays = "0.1.2"
 crossbeam-channel = "0.5.11"
 deserr = "0.6.1"
@@ -74,10 +74,10 @@ csv = "1.3.0"
 candle-core = { version = "0.4.1" }
 candle-transformers = { version = "0.4.1" }
 candle-nn = { version = "0.4.1" }
-tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default_features = false, features = [
+tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [
    "onig",
 ] }
-hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default_features = false, features = [
+hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [
    "online",
 ] }
 tiktoken-rs = "0.5.8"
@@ -136,7 +136,11 @@ greek = ["charabia/greek"]
 # allow khmer specialized tokenization
 khmer = ["charabia/khmer"]

+# allow vietnamese specialized tokenization
 vietnamese = ["charabia/vietnamese"]

+# force swedish character recomposition
+swedish-recomposition = ["charabia/swedish-recomposition"]
+
 # allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
 cuda = ["candle-core/cuda"]
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -9,6 +9,7 @@ use serde_json::Value;
 use thiserror::Error;

 use crate::documents::{self, DocumentsBatchCursorError};
+use crate::thread_pool_no_abort::PanicCatched;
 use crate::{CriterionError, DocumentId, FieldId, Object, SortError};

 pub fn is_reserved_keyword(keyword: &str) -> bool {
@@ -39,17 +40,19 @@ pub enum InternalError {
    Fst(#[from] fst::Error),
    #[error(transparent)]
    DocumentsError(#[from] documents::Error),
-    #[error("Invalid compression type have been specified to grenad.")]
+    #[error("Invalid compression type have been specified to grenad")]
    GrenadInvalidCompressionType,
-    #[error("Invalid grenad file with an invalid version format.")]
+    #[error("Invalid grenad file with an invalid version format")]
    GrenadInvalidFormatVersion,
-    #[error("Invalid merge while processing {process}.")]
+    #[error("Invalid merge while processing {process}")]
    IndexingMergingKeys { process: &'static str },
    #[error("{}", HeedError::InvalidDatabaseTyping)]
    InvalidDatabaseTyping,
    #[error(transparent)]
    RayonThreadPool(#[from] ThreadPoolBuildError),
    #[error(transparent)]
+    PanicInThreadPool(#[from] PanicCatched),
+    #[error(transparent)]
    SerdeJson(#[from] serde_json::Error),
    #[error(transparent)]
    Serialization(#[from] SerializationError),
@@ -57,9 +60,9 @@ pub enum InternalError {
    Store(#[from] MdbError),
    #[error(transparent)]
    Utf8(#[from] str::Utf8Error),
-    #[error("An indexation process was explicitly aborted.")]
+    #[error("An indexation process was explicitly aborted")]
    AbortedIndexation,
-    #[error("The matching words list contains at least one invalid member.")]
+    #[error("The matching words list contains at least one invalid member")]
    InvalidMatchingWords,
    #[error(transparent)]
    ArroyError(#[from] arroy::Error),
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -22,7 +22,7 @@ use crate::heed_codec::{
 };
 use crate::order_by_map::OrderByMap;
 use crate::proximity::ProximityPrecision;
-use crate::vector::EmbeddingConfig;
+use crate::vector::{Embedding, EmbeddingConfig};
 use crate::{
    default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
    FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
@@ -1516,6 +1516,42 @@ impl Index {
            .unwrap_or_default())
    }

+    pub fn embeddings(
+        &self,
+        rtxn: &RoTxn<'_>,
+        docid: DocumentId,
+    ) -> Result<BTreeMap<String, Vec<Embedding>>> {
+        let mut res = BTreeMap::new();
+        for row in self.embedder_category_id.iter(rtxn)? {
+            let (embedder_name, embedder_id) = row?;
+            let embedder_id = (embedder_id as u16) << 8;
+            let mut embeddings = Vec::new();
+            'vectors: for i in 0..=u8::MAX {
+                let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy)
+                    .map(Some)
+                    .or_else(|e| match e {
+                        arroy::Error::MissingMetadata => Ok(None),
+                        e => Err(e),
+                    })
+                    .transpose();
+
+                let Some(reader) = reader else {
+                    break 'vectors;
+                };
+
+                let embedding = reader?.item_vector(rtxn, docid)?;
+                if let Some(embedding) = embedding {
+                    embeddings.push(embedding)
+                } else {
+                    break 'vectors;
+                }
+            }
+
+            res.insert(embedder_name.to_owned(), embeddings);
+        }
+        Ok(res)
+    }
+
    pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> {
        self.main.remap_types::<Str, BEU64>().put(wtxn, main_key::SEARCH_CUTOFF, &cutoff)
    }
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@@ -21,6 +21,7 @@ pub mod prompt;
 pub mod proximity;
 pub mod score_details;
 mod search;
+mod thread_pool_no_abort;
 pub mod update;
 pub mod vector;

@@ -42,6 +43,7 @@ pub use search::new::{
    SearchLogger, VisualSearchLogger,
 };
 use serde_json::Value;
+pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
 pub use {charabia as tokenizer, heed};

 pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
--- a/milli/src/search/new/logger/visual.rs
+++ b/milli/src/search/new/logger/visual.rs
@@ -22,7 +22,7 @@ pub enum SearchEvents {
    RankingRuleStartIteration { ranking_rule_idx: usize, universe_len: u64 },
    RankingRuleNextBucket { ranking_rule_idx: usize, universe_len: u64, bucket_len: u64 },
    RankingRuleSkipBucket { ranking_rule_idx: usize, bucket_len: u64 },
-    RankingRuleEndIteration { ranking_rule_idx: usize, universe_len: u64 },
+    RankingRuleEndIteration { ranking_rule_idx: usize },
    ExtendResults { new: Vec<u32> },
    ProximityGraph { graph: RankingRuleGraph<ProximityGraph> },
    ProximityPaths { paths: Vec<Vec<Interned<ProximityCondition>>> },
@@ -123,12 +123,9 @@ impl SearchLogger<QueryGraph> for VisualSearchLogger {
        &mut self,
        ranking_rule_idx: usize,
        _ranking_rule: &dyn RankingRule<QueryGraph>,
-        universe: &RoaringBitmap,
+        _universe: &RoaringBitmap,
    ) {
-        self.events.push(SearchEvents::RankingRuleEndIteration {
-            ranking_rule_idx,
-            universe_len: universe.len(),
-        });
+        self.events.push(SearchEvents::RankingRuleEndIteration { ranking_rule_idx });
        self.location.pop();
    }
    fn add_to_results(&mut self, docids: &[u32]) {
@@ -326,7 +323,7 @@ impl<'ctx> DetailedLoggerFinish<'ctx> {
                assert!(ranking_rule_idx == self.rr_action_counter.len() - 1);
                self.write_skip_bucket(bucket_len)?;
            }
-            SearchEvents::RankingRuleEndIteration { ranking_rule_idx, universe_len: _ } => {
+            SearchEvents::RankingRuleEndIteration { ranking_rule_idx } => {
                assert!(ranking_rule_idx == self.rr_action_counter.len() - 1);
                self.write_end_iteration()?;
            }
--- a/milli/src/thread_pool_no_abort.rs
+++ b/milli/src/thread_pool_no_abort.rs
@@ -0,0 +1,69 @@
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::Arc;
+
+use rayon::{ThreadPool, ThreadPoolBuilder};
+use thiserror::Error;
+
+/// A rayon ThreadPool wrapper that can catch panics in the pool
+/// and modifies the install function accordingly.
+#[derive(Debug)]
+pub struct ThreadPoolNoAbort {
+    thread_pool: ThreadPool,
+    /// Set to true if the thread pool catched a panic.
+    pool_catched_panic: Arc<AtomicBool>,
+}
+
+impl ThreadPoolNoAbort {
+    pub fn install<OP, R>(&self, op: OP) -> Result<R, PanicCatched>
+    where
+        OP: FnOnce() -> R + Send,
+        R: Send,
+    {
+        let output = self.thread_pool.install(op);
+        // While reseting the pool panic catcher we return an error if we catched one.
+        if self.pool_catched_panic.swap(false, Ordering::SeqCst) {
+            Err(PanicCatched)
+        } else {
+            Ok(output)
+        }
+    }
+
+    pub fn current_num_threads(&self) -> usize {
+        self.thread_pool.current_num_threads()
+    }
+}
+
+#[derive(Error, Debug)]
+#[error("A panic occured. Read the logs to find more information about it")]
+pub struct PanicCatched;
+
+#[derive(Default)]
+pub struct ThreadPoolNoAbortBuilder(ThreadPoolBuilder);
+
+impl ThreadPoolNoAbortBuilder {
+    pub fn new() -> ThreadPoolNoAbortBuilder {
+        ThreadPoolNoAbortBuilder::default()
+    }
+
+    pub fn thread_name<F>(mut self, closure: F) -> Self
+    where
+        F: FnMut(usize) -> String + 'static,
+    {
+        self.0 = self.0.thread_name(closure);
+        self
+    }
+
+    pub fn num_threads(mut self, num_threads: usize) -> ThreadPoolNoAbortBuilder {
+        self.0 = self.0.num_threads(num_threads);
+        self
+    }
+
+    pub fn build(mut self) -> Result<ThreadPoolNoAbort, rayon::ThreadPoolBuildError> {
+        let pool_catched_panic = Arc::new(AtomicBool::new(false));
+        self.0 = self.0.panic_handler({
+            let catched_panic = pool_catched_panic.clone();
+            move |_result| catched_panic.store(true, Ordering::SeqCst)
+        });
+        Ok(ThreadPoolNoAbort { thread_pool: self.0.build()?, pool_catched_panic })
+    }
+}
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@@ -45,7 +45,6 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
    obkv_documents: grenad::Reader<R>,
    indexer: GrenadParameters,
    settings_diff: &InnerIndexSettingsDiff,
-    geo_fields_ids: Option<(FieldId, FieldId)>,
 ) -> Result<ExtractedFacetValues> {
    puffin::profile_function!();

@@ -127,12 +126,18 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
                    add_exists.insert(document);
                }

-                let geo_support =
-                    geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
+                let del_geo_support = settings_diff
+                    .old
+                    .geo_fields_ids
+                    .map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
+                let add_geo_support = settings_diff
+                    .new
+                    .geo_fields_ids
+                    .map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
                let del_filterable_values =
-                    del_value.map(|value| extract_facet_values(&value, geo_support));
+                    del_value.map(|value| extract_facet_values(&value, del_geo_support));
                let add_filterable_values =
-                    add_value.map(|value| extract_facet_values(&value, geo_support));
+                    add_value.map(|value| extract_facet_values(&value, add_geo_support));

                // Those closures are just here to simplify things a bit.
                let mut insert_numbers_diff = |del_numbers, add_numbers| {
--- a/milli/src/update/index_documents/extract/extract_geo_points.rs
+++ b/milli/src/update/index_documents/extract/extract_geo_points.rs
@@ -8,6 +8,7 @@ use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
 use crate::error::GeoError;
 use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::index_documents::extract_finite_float_from_value;
+use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
 use crate::{FieldId, InternalError, Result};

 /// Extracts the geographical coordinates contained in each document under the `_geo` field.
@@ -18,7 +19,7 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
    obkv_documents: grenad::Reader<R>,
    indexer: GrenadParameters,
    primary_key_id: FieldId,
-    (lat_fid, lng_fid): (FieldId, FieldId),
+    settings_diff: &InnerIndexSettingsDiff,
 ) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();

@@ -40,47 +41,27 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
            serde_json::from_slice(document_id).unwrap()
        };

-        // first we get the two fields
-        match (obkv.get(lat_fid), obkv.get(lng_fid)) {
-            (Some(lat), Some(lng)) => {
-                let deladd_lat_obkv = KvReaderDelAdd::new(lat);
-                let deladd_lng_obkv = KvReaderDelAdd::new(lng);
+        // extract old version
+        let del_lat_lng =
+            extract_lat_lng(&obkv, &settings_diff.old, DelAdd::Deletion, document_id)?;
+        // extract new version
+        let add_lat_lng =
+            extract_lat_lng(&obkv, &settings_diff.new, DelAdd::Addition, document_id)?;

-                // then we extract the values
-                let del_lat_lng = deladd_lat_obkv
-                    .get(DelAdd::Deletion)
-                    .zip(deladd_lng_obkv.get(DelAdd::Deletion))
-                    .map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
-                    .transpose()?;
-                let add_lat_lng = deladd_lat_obkv
-                    .get(DelAdd::Addition)
-                    .zip(deladd_lng_obkv.get(DelAdd::Addition))
-                    .map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
-                    .transpose()?;
-
-                if del_lat_lng != add_lat_lng {
-                    let mut obkv = KvWriterDelAdd::memory();
-                    if let Some([lat, lng]) = del_lat_lng {
-                        #[allow(clippy::drop_non_drop)]
-                        let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
-                        obkv.insert(DelAdd::Deletion, bytes)?;
-                    }
-                    if let Some([lat, lng]) = add_lat_lng {
-                        #[allow(clippy::drop_non_drop)]
-                        let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
-                        obkv.insert(DelAdd::Addition, bytes)?;
-                    }
-                    let bytes = obkv.into_inner()?;
-                    writer.insert(docid_bytes, bytes)?;
-                }
+        if del_lat_lng != add_lat_lng {
+            let mut obkv = KvWriterDelAdd::memory();
+            if let Some([lat, lng]) = del_lat_lng {
+                #[allow(clippy::drop_non_drop)]
+                let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
+                obkv.insert(DelAdd::Deletion, bytes)?;
            }
-            (None, Some(_)) => {
-                return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
+            if let Some([lat, lng]) = add_lat_lng {
+                #[allow(clippy::drop_non_drop)]
+                let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
+                obkv.insert(DelAdd::Addition, bytes)?;
            }
-            (Some(_), None) => {
-                return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
-            }
-            (None, None) => (),
+            let bytes = obkv.into_inner()?;
+            writer.insert(docid_bytes, bytes)?;
        }
    }

@@ -88,16 +69,37 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
 }

 /// Extract the finite floats lat and lng from two bytes slices.
-fn extract_lat_lng(lat: &[u8], lng: &[u8], document_id: impl Fn() -> Value) -> Result<[f64; 2]> {
-    let lat = extract_finite_float_from_value(
-        serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
-    )
-    .map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
+fn extract_lat_lng(
+    document: &obkv::KvReader<FieldId>,
+    settings: &InnerIndexSettings,
+    deladd: DelAdd,
+    document_id: impl Fn() -> Value,
+) -> Result<Option<[f64; 2]>> {
+    match settings.geo_fields_ids {
+        Some((lat_fid, lng_fid)) => {
+            let lat = document.get(lat_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
+            let lng = document.get(lng_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
+            let (lat, lng) = match (lat, lng) {
+                (Some(lat), Some(lng)) => (lat, lng),
+                (Some(_), None) => {
+                    return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
+                }
+                (None, Some(_)) => {
+                    return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
+                }
+                (None, None) => return Ok(None),
+            };
+            let lat = extract_finite_float_from_value(
+                serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
+            )
+            .map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;

-    let lng = extract_finite_float_from_value(
-        serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
-    )
-    .map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
-
-    Ok([lat, lng])
+            let lng = extract_finite_float_from_value(
+                serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
+            )
+            .map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
+            Ok(Some([lat, lng]))
+        }
+        None => Ok(None),
+    }
 }
--- a/milli/src/update/index_documents/extract/extract_vector_points.rs
+++ b/milli/src/update/index_documents/extract/extract_vector_points.rs
@@ -19,7 +19,7 @@ use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::index_documents::helpers::try_split_at;
 use crate::update::settings::InnerIndexSettingsDiff;
 use crate::vector::Embedder;
-use crate::{DocumentId, InternalError, Result, VectorOrArrayOfVectors};
+use crate::{DocumentId, InternalError, Result, ThreadPoolNoAbort, VectorOrArrayOfVectors};

 /// The length of the elements that are always in the buffer when inserting new values.
 const TRUNCATE_SIZE: usize = size_of::<DocumentId>();
@@ -362,7 +362,7 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
    prompt_reader: grenad::Reader<R>,
    indexer: GrenadParameters,
    embedder: Arc<Embedder>,
-    request_threads: &rayon::ThreadPool,
+    request_threads: &ThreadPoolNoAbort,
 ) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();
    let n_chunks = embedder.chunk_count_hint(); // chunk level parallelism
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@@ -11,7 +11,7 @@ mod extract_word_position_docids;

 use std::fs::File;
 use std::io::BufReader;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};

 use crossbeam_channel::Sender;
 use rayon::prelude::*;
@@ -31,7 +31,7 @@ use self::extract_word_position_docids::extract_word_position_docids;
 use super::helpers::{as_cloneable_grenad, CursorClonableMmap, GrenadParameters};
 use super::{helpers, TypedChunk};
 use crate::update::settings::InnerIndexSettingsDiff;
-use crate::{FieldId, Result};
+use crate::{FieldId, Result, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};

 /// Extract data for each databases from obkv documents in parallel.
 /// Send data in grenad file over provided Sender.
@@ -43,7 +43,6 @@ pub(crate) fn data_from_obkv_documents(
    indexer: GrenadParameters,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
    primary_key_id: FieldId,
-    geo_fields_ids: Option<(FieldId, FieldId)>,
    settings_diff: Arc<InnerIndexSettingsDiff>,
    max_positions_per_attributes: Option<u32>,
 ) -> Result<()> {
@@ -72,7 +71,6 @@ pub(crate) fn data_from_obkv_documents(
                        indexer,
                        lmdb_writer_sx.clone(),
                        primary_key_id,
-                        geo_fields_ids,
                        settings_diff.clone(),
                        max_positions_per_attributes,
                    )
@@ -215,6 +213,18 @@ fn run_extraction_task<FE, FS, M>(
    })
 }

+fn request_threads() -> &'static ThreadPoolNoAbort {
+    static REQUEST_THREADS: OnceLock<ThreadPoolNoAbort> = OnceLock::new();
+
+    REQUEST_THREADS.get_or_init(|| {
+        ThreadPoolNoAbortBuilder::new()
+            .num_threads(crate::vector::REQUEST_PARALLELISM)
+            .thread_name(|index| format!("embedding-request-{index}"))
+            .build()
+            .unwrap()
+    })
+}
+
 /// Extract chunked data and send it into lmdb_writer_sx sender:
 /// - documents
 fn send_original_documents_data(
@@ -229,11 +239,6 @@ fn send_original_documents_data(
    let documents_chunk_cloned = original_documents_chunk.clone();
    let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();

-    let request_threads = rayon::ThreadPoolBuilder::new()
-        .num_threads(crate::vector::REQUEST_PARALLELISM)
-        .thread_name(|index| format!("embedding-request-{index}"))
-        .build()?;
-
    if settings_diff.reindex_vectors() || !settings_diff.settings_update_only() {
        let settings_diff = settings_diff.clone();
        rayon::spawn(move || {
@@ -251,7 +256,7 @@ fn send_original_documents_data(
                            prompts,
                            indexer,
                            embedder.clone(),
-                            &request_threads,
+                            request_threads(),
                        ) {
                            Ok(results) => Some(results),
                            Err(error) => {
@@ -300,7 +305,6 @@ fn send_and_extract_flattened_documents_data(
    indexer: GrenadParameters,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
    primary_key_id: FieldId,
-    geo_fields_ids: Option<(FieldId, FieldId)>,
    settings_diff: Arc<InnerIndexSettingsDiff>,
    max_positions_per_attributes: Option<u32>,
 ) -> Result<(
@@ -310,12 +314,13 @@ fn send_and_extract_flattened_documents_data(
    let flattened_documents_chunk =
        flattened_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;

-    if let Some(geo_fields_ids) = geo_fields_ids {
+    if settings_diff.run_geo_indexing() {
        let documents_chunk_cloned = flattened_documents_chunk.clone();
        let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
+        let settings_diff = settings_diff.clone();
        rayon::spawn(move || {
            let result =
-                extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, geo_fields_ids);
+                extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, &settings_diff);
            let _ = match result {
                Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))),
                Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
@@ -354,7 +359,6 @@ fn send_and_extract_flattened_documents_data(
                    flattened_documents_chunk.clone(),
                    indexer,
                    &settings_diff,
-                    geo_fields_ids,
                )?;

                // send fid_docid_facet_numbers_chunk to DB writer
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -33,6 +33,7 @@ use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
 pub use self::transform::{Transform, TransformOutput};
 use crate::documents::{obkv_to_object, DocumentsBatchReader};
 use crate::error::{Error, InternalError, UserError};
+use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder;
 pub use crate::update::index_documents::helpers::CursorClonableMmap;
 use crate::update::{
    IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
@@ -298,18 +299,18 @@ where
        let backup_pool;
        let pool = match self.indexer_config.thread_pool {
            Some(ref pool) => pool,
-            #[cfg(not(test))]
            None => {
-                // We initialize a bakcup pool with the default
+                // We initialize a backup pool with the default
                // settings if none have already been set.
-                backup_pool = rayon::ThreadPoolBuilder::new().build()?;
-                &backup_pool
-            }
-            #[cfg(test)]
-            None => {
-                // We initialize a bakcup pool with the default
-                // settings if none have already been set.
-                backup_pool = rayon::ThreadPoolBuilder::new().num_threads(1).build()?;
+                #[allow(unused_mut)]
+                let mut pool_builder = ThreadPoolNoAbortBuilder::new();
+
+                #[cfg(test)]
+                {
+                    pool_builder = pool_builder.num_threads(1);
+                }
+
+                backup_pool = pool_builder.build()?;
                &backup_pool
            }
        };
@@ -323,28 +324,6 @@ where
        // get the primary key field id
        let primary_key_id = settings_diff.new.fields_ids_map.id(&primary_key).unwrap();

-        // get the fid of the `_geo.lat` and `_geo.lng` fields.
-        let mut field_id_map = self.index.fields_ids_map(self.wtxn)?;
-
-        // self.index.fields_ids_map($a)? ==>> field_id_map
-        let geo_fields_ids = match field_id_map.id("_geo") {
-            Some(gfid) => {
-                let is_sortable = self.index.sortable_fields_ids(self.wtxn)?.contains(&gfid);
-                let is_filterable = self.index.filterable_fields_ids(self.wtxn)?.contains(&gfid);
-                // if `_geo` is faceted then we get the `lat` and `lng`
-                if is_sortable || is_filterable {
-                    let field_ids = field_id_map
-                        .insert("_geo.lat")
-                        .zip(field_id_map.insert("_geo.lng"))
-                        .ok_or(UserError::AttributeLimitReached)?;
-                    Some(field_ids)
-                } else {
-                    None
-                }
-            }
-            None => None,
-        };
-
        let pool_params = GrenadParameters {
            chunk_compression_type: self.indexer_config.chunk_compression_type,
            chunk_compression_level: self.indexer_config.chunk_compression_level,
@@ -411,7 +390,6 @@ where
                        pool_params,
                        lmdb_writer_sx.clone(),
                        primary_key_id,
-                        geo_fields_ids,
                        settings_diff.clone(),
                        max_positions_per_attributes,
                    )
@@ -533,7 +511,7 @@ where
            }

            Ok(())
-        })?;
+        }).map_err(InternalError::from)??;

        // We write the field distribution into the main database
        self.index.put_field_distribution(self.wtxn, &field_distribution)?;
@@ -562,7 +540,8 @@ where
                    writer.build(wtxn, &mut rng, None)?;
                }
                Result::Ok(())
-            })?;
+            })
+            .map_err(InternalError::from)??;
        }

        self.execute_prefix_databases(
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@@ -48,7 +48,6 @@ pub struct Transform<'a, 'i> {
    fields_ids_map: FieldsIdsMap,

    indexer_settings: &'a IndexerConfig,
-    pub autogenerate_docids: bool,
    pub index_documents_method: IndexDocumentsMethod,
    available_documents_ids: AvailableDocumentsIds,

@@ -102,7 +101,7 @@ impl<'a, 'i> Transform<'a, 'i> {
        index: &'i Index,
        indexer_settings: &'a IndexerConfig,
        index_documents_method: IndexDocumentsMethod,
-        autogenerate_docids: bool,
+        _autogenerate_docids: bool,
    ) -> Result<Self> {
        // We must choose the appropriate merge function for when two or more documents
        // with the same user id must be merged or fully replaced in the same batch.
@@ -136,7 +135,6 @@ impl<'a, 'i> Transform<'a, 'i> {
            index,
            fields_ids_map: index.fields_ids_map(wtxn)?,
            indexer_settings,
-            autogenerate_docids,
            available_documents_ids: AvailableDocumentsIds::from_documents_ids(&documents_ids),
            original_sorter,
            flattened_sorter,
--- a/milli/src/update/indexer_config.rs
+++ b/milli/src/update/indexer_config.rs
@@ -1,5 +1,6 @@
 use grenad::CompressionType;
-use rayon::ThreadPool;
+
+use crate::thread_pool_no_abort::ThreadPoolNoAbort;

 #[derive(Debug)]
 pub struct IndexerConfig {
@@ -9,7 +10,7 @@ pub struct IndexerConfig {
    pub max_memory: Option<usize>,
    pub chunk_compression_type: CompressionType,
    pub chunk_compression_level: Option<u32>,
-    pub thread_pool: Option<ThreadPool>,
+    pub thread_pool: Option<ThreadPoolNoAbort>,
    pub max_positions_per_attributes: Option<u32>,
    pub skip_index_budget: bool,
 }
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -1161,6 +1161,11 @@ impl InnerIndexSettingsDiff {
    pub fn settings_update_only(&self) -> bool {
        self.settings_update_only
    }
+
+    pub fn run_geo_indexing(&self) -> bool {
+        self.old.geo_fields_ids != self.new.geo_fields_ids
+            || (!self.settings_update_only && self.new.geo_fields_ids.is_some())
+    }
 }

 #[derive(Clone)]
@@ -1177,6 +1182,7 @@ pub(crate) struct InnerIndexSettings {
    pub proximity_precision: ProximityPrecision,
    pub embedding_configs: EmbeddingConfigs,
    pub existing_fields: HashSet<String>,
+    pub geo_fields_ids: Option<(FieldId, FieldId)>,
 }

 impl InnerIndexSettings {
@@ -1185,7 +1191,7 @@ impl InnerIndexSettings {
        let stop_words = stop_words.map(|sw| sw.map_data(Vec::from).unwrap());
        let allowed_separators = index.allowed_separators(rtxn)?;
        let dictionary = index.dictionary(rtxn)?;
-        let fields_ids_map = index.fields_ids_map(rtxn)?;
+        let mut fields_ids_map = index.fields_ids_map(rtxn)?;
        let user_defined_searchable_fields = index.user_defined_searchable_fields(rtxn)?;
        let user_defined_searchable_fields =
            user_defined_searchable_fields.map(|sf| sf.into_iter().map(String::from).collect());
@@ -1200,6 +1206,24 @@ impl InnerIndexSettings {
            .into_iter()
            .filter_map(|(field, count)| (count != 0).then_some(field))
            .collect();
+        // index.fields_ids_map($a)? ==>> fields_ids_map
+        let geo_fields_ids = match fields_ids_map.id("_geo") {
+            Some(gfid) => {
+                let is_sortable = index.sortable_fields_ids(rtxn)?.contains(&gfid);
+                let is_filterable = index.filterable_fields_ids(rtxn)?.contains(&gfid);
+                // if `_geo` is faceted then we get the `lat` and `lng`
+                if is_sortable || is_filterable {
+                    let field_ids = fields_ids_map
+                        .insert("_geo.lat")
+                        .zip(fields_ids_map.insert("_geo.lng"))
+                        .ok_or(UserError::AttributeLimitReached)?;
+                    Some(field_ids)
+                } else {
+                    None
+                }
+            }
+            None => None,
+        };

        Ok(Self {
            stop_words,
@@ -1214,6 +1238,7 @@ impl InnerIndexSettings {
            proximity_precision,
            embedding_configs,
            existing_fields,
+            geo_fields_ids,
        })
    }

--- a/milli/src/vector/error.rs
+++ b/milli/src/vector/error.rs
@@ -3,6 +3,7 @@ use std::path::PathBuf;
 use hf_hub::api::sync::ApiError;

 use crate::error::FaultSource;
+use crate::PanicCatched;

 #[derive(Debug, thiserror::Error)]
 #[error("Error while generating embeddings: {inner}")]
@@ -80,6 +81,8 @@ pub enum EmbedErrorKind {
    OpenAiUnexpectedDimension(usize, usize),
    #[error("no embedding was produced")]
    MissingEmbedding,
+    #[error(transparent)]
+    PanicInThreadPool(#[from] PanicCatched),
 }

 impl EmbedError {
--- a/milli/src/vector/mod.rs
+++ b/milli/src/vector/mod.rs
@@ -7,6 +7,7 @@ use serde::{Deserialize, Serialize};

 use self::error::{EmbedError, NewEmbedderError};
 use crate::prompt::{Prompt, PromptData};
+use crate::ThreadPoolNoAbort;

 pub mod error;
 pub mod hf;
@@ -254,7 +255,7 @@ impl Embedder {
    pub fn embed_chunks(
        &self,
        text_chunks: Vec<Vec<String>>,
-        threads: &rayon::ThreadPool,
+        threads: &ThreadPoolNoAbort,
    ) -> std::result::Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
        match self {
            Embedder::HuggingFace(embedder) => embedder.embed_chunks(text_chunks),
--- a/milli/src/vector/ollama.rs
+++ b/milli/src/vector/ollama.rs
@@ -3,6 +3,8 @@ use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _};
 use super::error::{EmbedError, EmbedErrorKind, NewEmbedderError, NewEmbedderErrorKind};
 use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions};
 use super::{DistributionShift, Embeddings};
+use crate::error::FaultSource;
+use crate::ThreadPoolNoAbort;

 #[derive(Debug)]
 pub struct Embedder {
@@ -71,11 +73,16 @@ impl Embedder {
    pub fn embed_chunks(
        &self,
        text_chunks: Vec<Vec<String>>,
-        threads: &rayon::ThreadPool,
+        threads: &ThreadPoolNoAbort,
    ) -> Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
-        threads.install(move || {
-            text_chunks.into_par_iter().map(move |chunk| self.embed(chunk)).collect()
-        })
+        threads
+            .install(move || {
+                text_chunks.into_par_iter().map(move |chunk| self.embed(chunk)).collect()
+            })
+            .map_err(|error| EmbedError {
+                kind: EmbedErrorKind::PanicInThreadPool(error),
+                fault: FaultSource::Bug,
+            })?
    }

    pub fn chunk_count_hint(&self) -> usize {
--- a/milli/src/vector/openai.rs
+++ b/milli/src/vector/openai.rs
@@ -4,7 +4,9 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator as _};
 use super::error::{EmbedError, NewEmbedderError};
 use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions};
 use super::{DistributionShift, Embeddings};
+use crate::error::FaultSource;
 use crate::vector::error::EmbedErrorKind;
+use crate::ThreadPoolNoAbort;

 #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
 pub struct EmbedderOptions {
@@ -241,11 +243,16 @@ impl Embedder {
    pub fn embed_chunks(
        &self,
        text_chunks: Vec<Vec<String>>,
-        threads: &rayon::ThreadPool,
+        threads: &ThreadPoolNoAbort,
    ) -> Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
-        threads.install(move || {
-            text_chunks.into_par_iter().map(move |chunk| self.embed(chunk)).collect()
-        })
+        threads
+            .install(move || {
+                text_chunks.into_par_iter().map(move |chunk| self.embed(chunk)).collect()
+            })
+            .map_err(|error| EmbedError {
+                kind: EmbedErrorKind::PanicInThreadPool(error),
+                fault: FaultSource::Bug,
+            })?
    }

    pub fn chunk_count_hint(&self) -> usize {
--- a/milli/src/vector/rest.rs
+++ b/milli/src/vector/rest.rs
@@ -2,9 +2,12 @@ use deserr::Deserr;
 use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _};
 use serde::{Deserialize, Serialize};

+use super::error::EmbedErrorKind;
 use super::{
    DistributionShift, EmbedError, Embedding, Embeddings, NewEmbedderError, REQUEST_PARALLELISM,
 };
+use crate::error::FaultSource;
+use crate::ThreadPoolNoAbort;

 // retrying in case of failure

@@ -158,11 +161,16 @@ impl Embedder {
    pub fn embed_chunks(
        &self,
        text_chunks: Vec<Vec<String>>,
-        threads: &rayon::ThreadPool,
+        threads: &ThreadPoolNoAbort,
    ) -> Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
-        threads.install(move || {
-            text_chunks.into_par_iter().map(move |chunk| self.embed(chunk)).collect()
-        })
+        threads
+            .install(move || {
+                text_chunks.into_par_iter().map(move |chunk| self.embed(chunk)).collect()
+            })
+            .map_err(|error| EmbedError {
+                kind: EmbedErrorKind::PanicInThreadPool(error),
+                fault: FaultSource::Bug,
+            })?
    }

    pub fn chunk_count_hint(&self) -> usize {
--- a/milli/src/vector/settings.rs
+++ b/milli/src/vector/settings.rs
@@ -301,10 +301,14 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
    fn from(value: EmbeddingConfig) -> Self {
        let EmbeddingConfig { embedder_options, prompt } = value;
        match embedder_options {
-            super::EmbedderOptions::HuggingFace(options) => Self {
+            super::EmbedderOptions::HuggingFace(super::hf::EmbedderOptions {
+                model,
+                revision,
+                distribution,
+            }) => Self {
                source: Setting::Set(EmbedderSource::HuggingFace),
-                model: Setting::Set(options.model),
-                revision: options.revision.map(Setting::Set).unwrap_or_default(),
+                model: Setting::Set(model),
+                revision: revision.map(Setting::Set).unwrap_or_default(),
                api_key: Setting::NotSet,
                dimensions: Setting::NotSet,
                document_template: Setting::Set(prompt.template),
@@ -314,14 +318,19 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
                path_to_embeddings: Setting::NotSet,
                embedding_object: Setting::NotSet,
                input_type: Setting::NotSet,
-                distribution: options.distribution.map(Setting::Set).unwrap_or_default(),
+                distribution: distribution.map(Setting::Set).unwrap_or_default(),
            },
-            super::EmbedderOptions::OpenAi(options) => Self {
+            super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions {
+                api_key,
+                embedding_model,
+                dimensions,
+                distribution,
+            }) => Self {
                source: Setting::Set(EmbedderSource::OpenAi),
-                model: Setting::Set(options.embedding_model.name().to_owned()),
+                model: Setting::Set(embedding_model.name().to_owned()),
                revision: Setting::NotSet,
-                api_key: options.api_key.map(Setting::Set).unwrap_or_default(),
-                dimensions: options.dimensions.map(Setting::Set).unwrap_or_default(),
+                api_key: api_key.map(Setting::Set).unwrap_or_default(),
+                dimensions: dimensions.map(Setting::Set).unwrap_or_default(),
                document_template: Setting::Set(prompt.template),
                url: Setting::NotSet,
                query: Setting::NotSet,
@@ -329,29 +338,37 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
                path_to_embeddings: Setting::NotSet,
                embedding_object: Setting::NotSet,
                input_type: Setting::NotSet,
-                distribution: options.distribution.map(Setting::Set).unwrap_or_default(),
+                distribution: distribution.map(Setting::Set).unwrap_or_default(),
            },
-            super::EmbedderOptions::Ollama(options) => Self {
+            super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions {
+                embedding_model,
+                url,
+                api_key,
+                distribution,
+            }) => Self {
                source: Setting::Set(EmbedderSource::Ollama),
-                model: Setting::Set(options.embedding_model.to_owned()),
+                model: Setting::Set(embedding_model),
                revision: Setting::NotSet,
-                api_key: Setting::NotSet,
+                api_key: api_key.map(Setting::Set).unwrap_or_default(),
                dimensions: Setting::NotSet,
                document_template: Setting::Set(prompt.template),
-                url: Setting::NotSet,
+                url: url.map(Setting::Set).unwrap_or_default(),
                query: Setting::NotSet,
                input_field: Setting::NotSet,
                path_to_embeddings: Setting::NotSet,
                embedding_object: Setting::NotSet,
                input_type: Setting::NotSet,
-                distribution: options.distribution.map(Setting::Set).unwrap_or_default(),
+                distribution: distribution.map(Setting::Set).unwrap_or_default(),
            },
-            super::EmbedderOptions::UserProvided(options) => Self {
+            super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions {
+                dimensions,
+                distribution,
+            }) => Self {
                source: Setting::Set(EmbedderSource::UserProvided),
                model: Setting::NotSet,
                revision: Setting::NotSet,
                api_key: Setting::NotSet,
-                dimensions: Setting::Set(options.dimensions),
+                dimensions: Setting::Set(dimensions),
                document_template: Setting::NotSet,
                url: Setting::NotSet,
                query: Setting::NotSet,
@@ -359,7 +376,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
                path_to_embeddings: Setting::NotSet,
                embedding_object: Setting::NotSet,
                input_type: Setting::NotSet,
-                distribution: options.distribution.map(Setting::Set).unwrap_or_default(),
+                distribution: distribution.map(Setting::Set).unwrap_or_default(),
            },
            super::EmbedderOptions::Rest(super::rest::EmbedderOptions {
                api_key,
--- a/tracing-trace/src/processor/firefox_profiler.rs
+++ b/tracing-trace/src/processor/firefox_profiler.rs
@@ -217,9 +217,7 @@ fn add_memory_samples(
    memory_counters: &mut Option<MemoryCounterHandles>,
    last_memory: &mut MemoryStats,
 ) -> Option<MemoryStats> {
-    let Some(stats) = memory else {
-        return None;
-    };
+    let stats = memory?;

    let memory_counters =
        memory_counters.get_or_insert_with(|| MemoryCounterHandles::new(profile, main));
--- a/xtask/Cargo.toml
+++ b/xtask/Cargo.toml
@@ -21,7 +21,7 @@ reqwest = { version = "0.11.23", features = [
    "stream",
    "json",
    "rustls-tls",
-], default_features = false }
+], default-features = false }
 serde = { version = "1.0.195", features = ["derive"] }
 serde_json = "1.0.111"
 sha2 = "0.10.8"
Author	SHA1	Message	Date
ManyTheFish	98bacb5067	Enable swedish recomposition	2024-07-17 17:03:25 +02:00
meili-bors[bot]	50c6854964	Merge #4798 4798: Update version for the next release (v1.8.4) in Cargo.toml r=dureuill a=meili-bot ⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Co-authored-by: dureuill <dureuill@users.noreply.github.com>	2024-07-15 14:09:11 +00:00
meili-bors[bot]	f0f02e6412	Merge #4796 4796: Generate vectors in dumps r=dureuill a=dureuill # Pull Request ## What does this PR do? 1. Add an Index::embeddings method to compute the embeddings of a document 2. Write generated vectors in dumps 3. Remove generated vectors when importing dumps 4. Cherry pick the `ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION` workaround so that the older CI can still build ## Manual Tests (TODO) - [ ] Import a dump from a v1.8.3 into a v1.8.4 successfully - [x] Import a dump from a v1.8.4 into a v1.8.4 successfully - [x] Import a dump from a v1.8.4 into a v1.9.0 successfully - [x] generated vectors are not regenerated - [x] user provided vectors are still available - [x] generated vectors still have the correct value - [x] updating a document with generated vectors attempts to regenerate Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-07-15 13:26:30 +00:00
dureuill	43bf3ff4e0	Update version for the next release (v1.8.4) in Cargo.toml	2024-07-15 10:24:00 +00:00
Louis Dureuil	8fe6d31e01	CI: Add ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION workaround to keep using Ubuntu 18.04	2024-07-15 12:04:14 +02:00
Louis Dureuil	9ec209bbf4	When importing dumps, remove regenerate: true vectors items	2024-07-15 11:57:11 +02:00
Louis Dureuil	9375b7bba5	Inject generated vectors in dumps	2024-07-15 11:56:39 +02:00
Louis Dureuil	363a5cc590	Retrieve function from v1.9 to get embeddings in documents	2024-07-15 11:56:18 +02:00
meili-bors[bot]	7d69953267	Merge #4709 4709: Update version for the next release (v1.8.3) in Cargo.toml r=dureuill a=meili-bot ⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Co-authored-by: dureuill <dureuill@users.noreply.github.com>	2024-06-19 15:25:38 +00:00
meili-bors[bot]	7bd1b7ac43	Merge #4707 4707: Only spawn thread pool once r=irevoire a=dureuill # Pull Request ## Related issue Fixes #4692 ## What does this PR do? - There was a rayon thread pool of 40 threads that would be spawned multiple times per indexing operation. - Perhaps due to the sheer number of spawned threads, or to a leak in rayon thread pools, the system was unable to reclaim all the spawned threads at a sufficient rate. - As a result, the stack for the threads would accumulate and consume virtual memory, and eventually physical memory too. - Fortunately, the pool can actually be created once and then always reused. This PR performs this change. Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-06-19 14:51:06 +00:00
Louis Dureuil	1ff860e0a8	Fixes for Rust v1.79	2024-06-19 15:52:00 +02:00
dureuill	5d2b172e79	Update version for the next release (v1.8.3) in Cargo.toml	2024-06-19 13:32:04 +00:00
Louis Dureuil	e64d0a206e	Don't bind request_threads() to a local variable	2024-06-19 15:23:11 +02:00
Louis Dureuil	6254c7cee1	Only spawn the pool once	2024-06-19 15:17:46 +02:00
meili-bors[bot]	6c6c4732a1	Merge #4681 4681: Fix concurrency issue r=irevoire a=dureuill # Pull Request ## Related issue Fixes #4654 ## What does this PR do? - Asynchronously drop permits Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-06-10 09:36:08 +00:00
meili-bors[bot]	3976fe660e	Merge #4688 4688: Update version for the next release (v1.8.2) in Cargo.toml r=dureuill a=meili-bot ⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Co-authored-by: dureuill <dureuill@users.noreply.github.com>	2024-06-10 08:28:34 +00:00
Louis Dureuil	50f8218a5d	Asynchronously drop permits	2024-06-10 10:19:57 +02:00
dureuill	19585f1a4f	Update version for the next release (v1.8.2) in Cargo.toml	2024-06-10 07:59:36 +00:00
meili-bors[bot]	ba75d23bfe	Merge #4648 4648: Update version for the next release (v1.8.1) in Cargo.toml r=ManyTheFish a=meili-bot ⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Co-authored-by: ManyTheFish <ManyTheFish@users.noreply.github.com>	2024-05-21 16:38:36 +00:00
ManyTheFish	7fbb3bf8e8	Update version for the next release (v1.8.1) in Cargo.toml	2024-05-21 15:13:03 +00:00
meili-bors[bot]	9066a446a3	Merge #4642 4642: Index the _geo fields when changing the setting while there is already documents in the DB r=ManyTheFish a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4640 Fixes https://github.com/meilisearch/meilisearch/issues/4628 ## What does this PR do? - Add an integration test that first indexes the document and then changes the settings - Fix `extract_geo_point` by detecting if the `_geo` field has been faceted in this setting change and index all documents Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-05-21 13:16:11 +00:00
ManyTheFish	f762307838	Fix clippy	2024-05-21 13:44:20 +02:00
ManyTheFish	3e94a90722	Fixes	2024-05-21 13:39:46 +02:00
ManyTheFish	fc7e817221	Index geo points based on the settings differences	2024-05-20 12:27:26 +02:00
Tamo	0f78703b85	add a test reproducing the bug	2024-05-20 10:58:08 +02:00
meili-bors[bot]	c668043c4f	Merge #4617 4617: Destructure `EmbedderOptions` so we don't miss some options r=dureuill a=dureuill # Pull Request ## Related issue #4595 was caused by the code not destructuring the embedder options. ## What does this PR do? This PR adds the missing `url` parameter for ollama, and makes sure similar issue cannot happen in the future Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-05-02 14:55:32 +00:00
Louis Dureuil	5a305bfdea	Remove unused struct	2024-05-02 16:14:37 +02:00
Louis Dureuil	f4dd73ec8c	Destructure EmbedderOptions so we don't miss some options	2024-05-02 15:39:36 +02:00
meili-bors[bot]	66dce4600d	Merge #4603 4603: Update charabia v0.8.10 r=Kerollmops a=ManyTheFish - Update Charabia v0.8.10 - Add `swedish-recomposition` as an optional feature flag Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-04-30 13:04:02 +00:00
ManyTheFish	fe51ceca6d	Update lock file	2024-04-30 14:33:37 +02:00
ManyTheFish	88174b8ae4	Update charabia v0.8.10	2024-04-30 14:30:23 +02:00
meili-bors[bot]	ebca29f3de	Merge #4597 4597: Fix embeddings settings update r=ManyTheFish a=ManyTheFish # Pull Request - add some conditions reducing the work done when changing the settings - add some benchmarks on embedders ## Related issue Fixes #4585 Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-04-25 16:37:28 +00:00
meili-bors[bot]	c793b6ef6d	Merge #4600 4600: Fix embedders api r=ManyTheFish a=ManyTheFish # Pull Request ## Related issue Fixes #4594 Fixes #4595 Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-04-25 13:16:33 +00:00
ManyTheFish	cbbfff3594	Remove debuging prints	2024-04-25 10:37:18 +02:00
ManyTheFish	dbcf50589b	Fix clippy	2024-04-25 10:36:10 +02:00
meili-bors[bot]	3e5cd027a5	Merge #4593 4593: Stop crashing when panic occurs in thread pool r=ManyTheFish a=Kerollmops This PR fixes #4362 by introducing a new boolean to catch panics in the rayon thread pool. The boolean is read after performing the operations in rayon, and the indexation process is stopped. This first version doesn't expose the panic message but marks the task as failed. The current implementation exposes a `ThreadPoolNoAbort` wrapper. The `rayon::ThreadPool` has been wrapped to check that nothing went wrong after running the `ThreadPool::install` function. An atomic boolean and some `store/load` logic make the system work efficiently. Before, Meilisearch was completely crashing... <img width="1563" alt="Capture d’écran 2024-04-22 à 15 49 02" src="https://github.com/meilisearch/meilisearch/assets/3610253/ce114917-a881-4fbb-85df-c195fcf0c7cb"> Now, it handles the panics correctly and marks the task as failed. <img width="1558" alt="Capture d’écran 2024-04-22 à 15 42 14" src="https://github.com/meilisearch/meilisearch/assets/3610253/8bd031ef-5e8f-4a12-a91e-c823597a2344"> Co-authored-by: Clément Renault <clement@meilisearch.com>	2024-04-24 16:27:08 +00:00
ManyTheFish	7468c1cf8d	Introduce WildcardSetting that are serialized as wildcards by default	2024-04-24 18:15:03 +02:00
Clément Renault	d4aeff92d0	Introduce the ThreadPoolNoAbort wrapper	2024-04-24 16:40:12 +02:00
ManyTheFish	e87cb373de	Avoid intermediate serializing when displaying settings	2024-04-24 12:33:07 +02:00
ManyTheFish	9b76501875	Display set API key for Ollama embedder	2024-04-24 12:33:07 +02:00
Clément Renault	b3173d0423	Remove useless dots in the error messages	2024-04-22 18:09:33 +02:00
Clément Renault	96cc5319c8	Introduce a new internal error type to categorize panics	2024-04-22 18:09:33 +02:00
Clément Renault	0c7003c5df	Introduce an atomic to catch panics in thread pools	2024-04-22 18:09:33 +02:00