Fix a test trying to change settings with a wtxn

Make sure the embedders supports changing searchables
Make sure we don't crash on unreferenced fields
2025-11-24 13:37:19 +00:00 · 2025-11-24 13:41:12 +01:00 · 2025-11-24 13:41:11 +01:00 · 2025-11-24 13:41:11 +01:00 · 2025-11-24 13:41:11 +01:00 · 2025-11-24 13:41:11 +01:00
23 changed files with 999 additions and 148 deletions
--- a/.github/workflows/publish-release-assets.yml
+++ b/.github/workflows/publish-release-assets.yml
@@ -65,9 +65,9 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        os: [macos-13, windows-2022]
+        os: [macos-14, windows-2022]
        include:
-          - os: macos-13
+          - os: macos-14
            artifact_name: meilisearch
            asset_name: meilisearch-macos-amd64
          - os: windows-2022
@@ -90,7 +90,7 @@ jobs:

  publish-macos-apple-silicon:
    name: Publish binary for macOS silicon
-    runs-on: macos-13
+    runs-on: macos-14
    needs: check-version
    strategy:
      matrix:
--- a/.github/workflows/test-suite.yml
+++ b/.github/workflows/test-suite.yml
@@ -47,7 +47,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        os: [macos-13, windows-2022]
+        os: [macos-14, windows-2022]
    steps:
      - uses: actions/checkout@v5
      - name: Cache dependencies
--- a/crates/index-scheduler/src/scheduler/process_snapshot_creation.rs
+++ b/crates/index-scheduler/src/scheduler/process_snapshot_creation.rs
@@ -438,12 +438,15 @@ async fn multipart_stream_to_s3(
    db_name: String,
    reader: std::io::PipeReader,
 ) -> Result<(), Error> {
-    use std::{collections::VecDeque, os::fd::OwnedFd, path::PathBuf};
+    use std::collections::VecDeque;
+    use std::io;
+    use std::os::fd::OwnedFd;
+    use std::path::PathBuf;

    use bytes::{Bytes, BytesMut};
    use reqwest::{Client, Response};
-    use rusty_s3::S3Action as _;
-    use rusty_s3::{actions::CreateMultipartUpload, Bucket, BucketError, Credentials, UrlStyle};
+    use rusty_s3::actions::CreateMultipartUpload;
+    use rusty_s3::{Bucket, BucketError, Credentials, S3Action as _, UrlStyle};
    use tokio::task::JoinHandle;

    let reader = OwnedFd::from(reader);
@@ -517,7 +520,6 @@ async fn multipart_stream_to_s3(
        while buffer.len() < (s3_multipart_part_size as usize / 2) {
            // Wait for the pipe to be readable

-            use std::io;
            reader.readable().await?;

            match reader.try_read_buf(&mut buffer) {
@@ -581,15 +583,17 @@ async fn multipart_stream_to_s3(
        async move {
            match client.post(url).body(body).send().await {
                Ok(resp) if resp.status().is_client_error() => {
-                    resp.error_for_status().map_err(backoff::Error::Permanent)
+                    Err(backoff::Error::Permanent(Error::S3Error {
+                        status: resp.status(),
+                        body: resp.text().await.unwrap_or_default(),
+                    }))
                }
                Ok(resp) => Ok(resp),
-                Err(e) => Err(backoff::Error::transient(e)),
+                Err(e) => Err(backoff::Error::transient(Error::S3HttpError(e))),
            }
        }
    })
-    .await
-    .map_err(Error::S3HttpError)?;
+    .await?;

    let status = resp.status();
    let body = resp.text().await.map_err(|e| Error::S3Error { status, body: e.to_string() })?;
--- a/crates/meilisearch/src/analytics/segment_analytics.rs
+++ b/crates/meilisearch/src/analytics/segment_analytics.rs
@@ -195,7 +195,7 @@ struct Infos {
    experimental_enable_logs_route: bool,
    experimental_reduce_indexing_memory_usage: bool,
    experimental_max_number_of_batched_tasks: usize,
-    experimental_limit_batched_tasks_total_size: u64,
+    experimental_limit_batched_tasks_total_size: Option<u64>,
    experimental_network: bool,
    experimental_multimodal: bool,
    experimental_chat_completions: bool,
@@ -359,7 +359,7 @@ impl Infos {
            http_payload_size_limit,
            experimental_max_number_of_batched_tasks,
            experimental_limit_batched_tasks_total_size:
-                experimental_limit_batched_tasks_total_size.into(),
+                experimental_limit_batched_tasks_total_size.map(|size| size.as_u64()),
            task_queue_webhook: task_webhook_url.is_some(),
            task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
            log_level: log_level.to_string(),
--- a/crates/meilisearch/src/lib.rs
+++ b/crates/meilisearch/src/lib.rs
@@ -230,7 +230,17 @@ pub fn setup_meilisearch(
        cleanup_enabled: !opt.experimental_replication_parameters,
        max_number_of_tasks: 1_000_000,
        max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
-        batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size.into(),
+        batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size.map_or_else(
+            || {
+                opt.indexer_options
+                    .max_indexing_memory
+                    // By default, we use half of the available memory to determine the size of batched tasks
+                    .map_or(u64::MAX, |mem| mem.as_u64() / 2)
+                    // And never exceed 10 GiB when we infer the limit
+                    .min(10 * 1024 * 1024 * 1024)
+            },
+            |size| size.as_u64(),
+        ),
        index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize,
        index_count: DEFAULT_INDEX_COUNT,
        instance_features: opt.to_instance_features(),
--- a/crates/meilisearch/src/option.rs
+++ b/crates/meilisearch/src/option.rs
@@ -473,11 +473,14 @@ pub struct Opt {
    #[serde(default = "default_limit_batched_tasks")]
    pub experimental_max_number_of_batched_tasks: usize,

-    /// Experimentally reduces the maximum total size, in bytes, of tasks that will be processed at once,
-    /// see: <https://github.com/orgs/meilisearch/discussions/801>
-    #[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE, default_value_t = default_limit_batched_tasks_total_size())]
-    #[serde(default = "default_limit_batched_tasks_total_size")]
-    pub experimental_limit_batched_tasks_total_size: Byte,
+    /// Experimentally controls the maximum total size, in bytes, of tasks that will be processed
+    /// simultaneously. When unspecified, defaults to half of the maximum indexing memory and
+    /// clamped to 10 GiB.
+    ///
+    /// See: <https://github.com/orgs/meilisearch/discussions/801>
+    #[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE)]
+    #[serde(default)]
+    pub experimental_limit_batched_tasks_total_size: Option<Byte>,

    /// Enables experimental caching of search query embeddings. The value represents the maximal number of entries in the cache of each
    /// distinct embedder.
@@ -701,10 +704,12 @@ impl Opt {
            MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS,
            experimental_max_number_of_batched_tasks.to_string(),
        );
-        export_to_env_if_not_present(
-            MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE,
-            experimental_limit_batched_tasks_total_size.to_string(),
-        );
+        if let Some(limit) = experimental_limit_batched_tasks_total_size {
+            export_to_env_if_not_present(
+                MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE,
+                limit.to_string(),
+            );
+        }
        export_to_env_if_not_present(
            MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES,
            experimental_embedding_cache_entries.to_string(),
@@ -1273,10 +1278,6 @@ fn default_limit_batched_tasks() -> usize {
    usize::MAX
 }

-fn default_limit_batched_tasks_total_size() -> Byte {
-    Byte::from_u64(u64::MAX)
-}
-
 fn default_embedding_cache_entries() -> usize {
    0
 }
--- a/crates/meilisearch/src/personalization/mod.rs
+++ b/crates/meilisearch/src/personalization/mod.rs
@@ -1,14 +1,14 @@
-use crate::search::{Personalize, SearchResult};
-use meilisearch_types::{
-    error::{Code, ErrorCode, ResponseError},
-    milli::TimeBudget,
-};
+use std::time::Duration;
+
+use meilisearch_types::error::{Code, ErrorCode, ResponseError};
+use meilisearch_types::milli::TimeBudget;
 use rand::Rng;
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
-use std::time::Duration;
 use tracing::{debug, info, warn};

+use crate::search::{Personalize, SearchResult};
+
 const COHERE_API_URL: &str = "https://api.cohere.ai/v1/rerank";
 const MAX_RETRIES: u32 = 10;

--- a/crates/meilisearch/src/search/federated/types.rs
+++ b/crates/meilisearch/src/search/federated/types.rs
@@ -18,10 +18,9 @@ use serde::{Deserialize, Serialize};
 use utoipa::ToSchema;
 use uuid::Uuid;

-use crate::search::SearchMetadata;
-
 use super::super::{ComputedFacets, FacetStats, HitsInfo, SearchHit, SearchQueryWithIndex};
 use crate::milli::vector::Embedding;
+use crate::search::SearchMetadata;

 pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0;

--- a/crates/meilisearch/tests/documents/get_documents.rs
+++ b/crates/meilisearch/tests/documents/get_documents.rs
@@ -1339,3 +1339,117 @@ async fn get_document_with_vectors() {
    }
    "###);
 }
+
+#[actix_rt::test]
+async fn test_fetch_documents_pagination_with_sorting() {
+    let server = Server::new_shared();
+    let index = server.unique_index();
+    let (task, _code) = index.create(None).await;
+    server.wait_task(task.uid()).await.succeeded();
+
+    // Set name as sortable attribute
+    let (task, code) = index.update_settings_sortable_attributes(json!(["name"])).await;
+    assert_eq!(code, 202);
+    server.wait_task(task.uid()).await.succeeded();
+
+    let documents = json!((0..50)
+        .map(|i| json!({"id": i, "name": format!("doc_{:05}", std::cmp::min(i, 5))}))
+        .collect::<Vec<_>>());
+
+    // Add documents as described in the bug report
+    let (task, code) = index.add_documents(documents, None).await;
+    assert_eq!(code, 202);
+    server.wait_task(task.uid()).await.succeeded();
+
+    // Request 1 (first page): offset 0, limit 2
+    let (response, code) = index
+        .fetch_documents(json!({
+            "offset": 0,
+            "limit": 2,
+            "sort": ["name:asc"]
+        }))
+        .await;
+    assert_eq!(code, 200);
+    let results = response["results"].as_array().unwrap();
+    snapshot!(json_string!(results), @r###"
+    [
+      {
+        "id": 0,
+        "name": "doc_00000"
+      },
+      {
+        "id": 1,
+        "name": "doc_00001"
+      }
+    ]
+    "###);
+
+    // Request 2 (second page): offset 2, limit 2
+    let (response, code) = index
+        .fetch_documents(json!({
+            "offset": 2,
+            "limit": 2,
+            "sort": ["name:asc"]
+        }))
+        .await;
+    assert_eq!(code, 200);
+    let results = response["results"].as_array().unwrap();
+    snapshot!(json_string!(results), @r###"
+    [
+      {
+        "id": 2,
+        "name": "doc_00002"
+      },
+      {
+        "id": 3,
+        "name": "doc_00003"
+      }
+    ]
+    "###);
+
+    // Request 3 (third page): offset 4, limit 2
+    let (response, code) = index
+        .fetch_documents(json!({
+            "offset": 4,
+            "limit": 2,
+            "sort": ["name:asc"]
+        }))
+        .await;
+    assert_eq!(code, 200);
+    let results = response["results"].as_array().unwrap();
+    snapshot!(json_string!(results), @r###"
+    [
+      {
+        "id": 4,
+        "name": "doc_00004"
+      },
+      {
+        "id": 5,
+        "name": "doc_00005"
+      }
+    ]
+    "###);
+
+    // Request 4 (fourth page): offset 6, limit 2
+    let (response, code) = index
+        .fetch_documents(json!({
+            "offset": 6,
+            "limit": 2,
+            "sort": ["name:asc"]
+        }))
+        .await;
+    assert_eq!(code, 200);
+    let results = response["results"].as_array().unwrap();
+    snapshot!(json_string!(results), @r###"
+    [
+      {
+        "id": 6,
+        "name": "doc_00005"
+      },
+      {
+        "id": 7,
+        "name": "doc_00005"
+      }
+    ]
+    "###);
+}
--- a/crates/meilisearch/tests/search/hybrid.rs
+++ b/crates/meilisearch/tests/search/hybrid.rs
@@ -137,6 +137,60 @@ static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
    }])
 });

+static MANY_DOCS: Lazy<Value> = Lazy::new(|| {
+    json!([
+    {
+        "title": "Shazam!",
+        "desc": "a Captain Marvel ersatz",
+        "id": "1",
+    },
+    {
+        "title": "Captain Planet",
+        "desc": "He's not part of the Marvel Cinematic Universe",
+        "id": "2",
+    },
+    {
+        "title": "Captain Marvel",
+        "desc": "a Shazam ersatz",
+        "id": "3",
+    },
+        {
+        "title": "Captain Marvel",
+        "desc": "a Shazam ersatz",
+        "id": "4",
+    },
+        {
+        "title": "Captain Marvel",
+        "desc": "a Shazam ersatz",
+        "id": "5",
+    },
+        {
+        "title": "Captain Marvel",
+        "desc": "a Shazam ersatz",
+        "id": "6",
+    },
+        {
+        "title": "Captain Marvel",
+        "desc": "a Shazam ersatz",
+        "id": "7",
+    },
+        {
+        "title": "Captain Marvel",
+        "desc": "a Shazam ersatz",
+        "id": "8",
+    },
+        {
+        "title": "Captain Marvel",
+        "desc": "a Shazam ersatz",
+        "id": "9",
+    },
+        {
+        "title": "Captain Marvel",
+        "desc": "a Shazam ersatz",
+        "id": "10",
+    }])
+});
+
 #[actix_rt::test]
 async fn simple_search() {
    let server = Server::new_shared();
@@ -449,6 +503,38 @@ async fn simple_search_hf() {
    snapshot!(response["semanticHitCount"], @"3");
 }

+#[actix_rt::test]
+async fn issue_5976_missing_docs_hf() {
+    let server = Server::new_shared();
+    let index = index_with_documents_hf(server, &MANY_DOCS).await;
+    let (response, code) = index
+        .search_post(
+            json!({"q": "Wonder replacement", "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "retrieveVectors": true}),
+        )
+        .await;
+    snapshot!(code, @"200 OK");
+    let are_empty: Vec<_> = response["hits"]
+        .as_array()
+        .unwrap()
+        .iter()
+        .map(|hit| hit["_vectors"]["default"]["embeddings"].as_array().unwrap().is_empty())
+        .collect();
+    snapshot!(json!(are_empty), @r###"
+    [
+      false,
+      false,
+      false,
+      false,
+      false,
+      false,
+      false,
+      false,
+      false,
+      false
+    ]
+    "###);
+}
+
 #[actix_rt::test]
 async fn distribution_shift() {
    let server = Server::new_shared();
--- a/crates/milli/src/documents/sort.rs
+++ b/crates/milli/src/documents/sort.rs
@@ -87,7 +87,7 @@ impl Iterator for SortedDocumentsIterator<'_> {
        };

        // Otherwise don't directly iterate over children, skip them if we know we will go further
-        let mut to_skip = n - 1;
+        let mut to_skip = n;
        while to_skip > 0 {
            if let Err(e) = SortedDocumentsIterator::update_current(
                current_child,
@@ -108,7 +108,7 @@ impl Iterator for SortedDocumentsIterator<'_> {
                continue;
            } else {
                // The current iterator is large enough, so we can forward the call to it.
-                return inner.nth(to_skip + 1);
+                return inner.nth(to_skip);
            }
        }

--- a/crates/milli/src/fields_ids_map/metadata.rs
+++ b/crates/milli/src/fields_ids_map/metadata.rs
@@ -18,6 +18,8 @@ use crate::{
 pub struct Metadata {
    /// The weight as defined in the FieldidsWeightsMap of the searchable attribute if it is searchable.
    pub searchable: Option<Weight>,
+    /// The field is part of the exact attributes.
+    pub exact: bool,
    /// The field is part of the sortable attributes.
    pub sortable: bool,
    /// The field is defined as the distinct attribute.
@@ -209,6 +211,7 @@ impl Metadata {
 #[derive(Debug, Clone)]
 pub struct MetadataBuilder {
    searchable_attributes: Option<Vec<String>>,
+    exact_searchable_attributes: Vec<String>,
    filterable_attributes: Vec<FilterableAttributesRule>,
    sortable_attributes: HashSet<String>,
    localized_attributes: Option<Vec<LocalizedAttributesRule>>,
@@ -220,15 +223,18 @@ impl MetadataBuilder {
    pub fn from_index(index: &Index, rtxn: &RoTxn) -> Result<Self> {
        let searchable_attributes = index
            .user_defined_searchable_fields(rtxn)?
-            .map(|fields| fields.into_iter().map(|s| s.to_string()).collect());
+            .map(|fields| fields.into_iter().map(String::from).collect());
+        let exact_searchable_attributes =
+            index.exact_attributes(rtxn)?.into_iter().map(String::from).collect();
        let filterable_attributes = index.filterable_attributes_rules(rtxn)?;
        let sortable_attributes = index.sortable_fields(rtxn)?;
        let localized_attributes = index.localized_attributes_rules(rtxn)?;
-        let distinct_attribute = index.distinct_field(rtxn)?.map(|s| s.to_string());
+        let distinct_attribute = index.distinct_field(rtxn)?.map(String::from);
        let asc_desc_attributes = index.asc_desc_fields(rtxn)?;

        Ok(Self::new(
            searchable_attributes,
+            exact_searchable_attributes,
            filterable_attributes,
            sortable_attributes,
            localized_attributes,
@@ -242,6 +248,7 @@ impl MetadataBuilder {
    /// This is used for testing, prefer using `MetadataBuilder::from_index` instead.
    pub fn new(
        searchable_attributes: Option<Vec<String>>,
+        exact_searchable_attributes: Vec<String>,
        filterable_attributes: Vec<FilterableAttributesRule>,
        sortable_attributes: HashSet<String>,
        localized_attributes: Option<Vec<LocalizedAttributesRule>>,
@@ -256,6 +263,7 @@ impl MetadataBuilder {

        Self {
            searchable_attributes,
+            exact_searchable_attributes,
            filterable_attributes,
            sortable_attributes,
            localized_attributes,
@@ -269,6 +277,7 @@ impl MetadataBuilder {
            // Vectors fields are not searchable, filterable, distinct or asc_desc
            return Metadata {
                searchable: None,
+                exact: false,
                sortable: false,
                distinct: false,
                asc_desc: false,
@@ -296,6 +305,7 @@ impl MetadataBuilder {
            // Geo fields are not searchable, distinct or asc_desc
            return Metadata {
                searchable: None,
+                exact: false,
                sortable,
                distinct: false,
                asc_desc: false,
@@ -309,6 +319,7 @@ impl MetadataBuilder {
            debug_assert!(!sortable, "geojson fields should not be sortable");
            return Metadata {
                searchable: None,
+                exact: false,
                sortable,
                distinct: false,
                asc_desc: false,
@@ -329,6 +340,8 @@ impl MetadataBuilder {
            None => Some(0),
        };

+        let exact = self.exact_searchable_attributes.iter().any(|attr| is_faceted_by(field, attr));
+
        let distinct =
            self.distinct_attribute.as_ref().is_some_and(|distinct_field| field == distinct_field);
        let asc_desc = self.asc_desc_attributes.contains(field);
@@ -343,6 +356,7 @@ impl MetadataBuilder {

        Metadata {
            searchable,
+            exact,
            sortable,
            distinct,
            asc_desc,
--- a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
+++ b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
@@ -8,17 +8,26 @@ use bumpalo::Bump;

 use super::match_searchable_field;
 use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
+use crate::attribute_patterns::match_field_legacy;
+use crate::fields_ids_map::metadata::Metadata;
 use crate::update::new::document::DocumentContext;
 use crate::update::new::extract::cache::BalancedCaches;
 use crate::update::new::extract::perm_json_p::contained_in;
 use crate::update::new::indexer::document_changes::{
    extract, DocumentChanges, Extractor, IndexingContext,
 };
+use crate::update::new::indexer::settings_changes::{
+    settings_change_extract, DocumentsIndentifiers, SettingsChangeExtractor,
+};
 use crate::update::new::ref_cell_ext::RefCellExt as _;
 use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
-use crate::update::new::DocumentChange;
-use crate::{bucketed_position, DocumentId, FieldId, Result, MAX_POSITION_PER_ATTRIBUTE};
+use crate::update::new::{DocumentChange, DocumentIdentifiers};
+use crate::update::settings::SettingsDelta;
+use crate::{
+    bucketed_position, DocumentId, FieldId, PatternMatch, Result, UserError,
+    MAX_POSITION_PER_ATTRIBUTE,
+};

 const MAX_COUNTED_WORDS: usize = 30;

@@ -34,6 +43,15 @@ pub struct WordDocidsBalancedCaches<'extractor> {

 unsafe impl MostlySend for WordDocidsBalancedCaches<'_> {}

+/// Whether to extract or skip fields during word extraction.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum FieldDbExtraction {
+    /// Extract the word and put it in to the fid-based databases.
+    Extract,
+    /// Do not store the word in the fid-based databases.
+    Skip,
+}
+
 impl<'extractor> WordDocidsBalancedCaches<'extractor> {
    pub fn new_in(buckets: usize, max_memory: Option<usize>, alloc: &'extractor Bump) -> Self {
        Self {
@@ -47,12 +65,14 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
        }
    }

+    #[allow(clippy::too_many_arguments)]
    fn insert_add_u32(
        &mut self,
        field_id: FieldId,
        position: u16,
        word: &str,
        exact: bool,
+        field_db_extraction: FieldDbExtraction,
        docid: u32,
        bump: &Bump,
    ) -> Result<()> {
@@ -66,11 +86,13 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
        let buffer_size = word_bytes.len() + 1 + size_of::<FieldId>();
        let mut buffer = BumpVec::with_capacity_in(buffer_size, bump);

-        buffer.clear();
-        buffer.extend_from_slice(word_bytes);
-        buffer.push(0);
-        buffer.extend_from_slice(&field_id.to_be_bytes());
-        self.word_fid_docids.insert_add_u32(&buffer, docid)?;
+        if field_db_extraction == FieldDbExtraction::Extract {
+            buffer.clear();
+            buffer.extend_from_slice(word_bytes);
+            buffer.push(0);
+            buffer.extend_from_slice(&field_id.to_be_bytes());
+            self.word_fid_docids.insert_add_u32(&buffer, docid)?;
+        }

        let position = bucketed_position(position);
        buffer.clear();
@@ -83,21 +105,26 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
            self.flush_fid_word_count(&mut buffer)?;
        }

-        self.fid_word_count
-            .entry(field_id)
-            .and_modify(|(_current_count, new_count)| *new_count.get_or_insert(0) += 1)
-            .or_insert((None, Some(1)));
+        if field_db_extraction == FieldDbExtraction::Extract {
+            self.fid_word_count
+                .entry(field_id)
+                .and_modify(|(_current_count, new_count)| *new_count.get_or_insert(0) += 1)
+                .or_insert((None, Some(1)));
+        }
+
        self.current_docid = Some(docid);

        Ok(())
    }

+    #[allow(clippy::too_many_arguments)]
    fn insert_del_u32(
        &mut self,
        field_id: FieldId,
        position: u16,
        word: &str,
        exact: bool,
+        field_db_extraction: FieldDbExtraction,
        docid: u32,
        bump: &Bump,
    ) -> Result<()> {
@@ -111,11 +138,13 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
        let buffer_size = word_bytes.len() + 1 + size_of::<FieldId>();
        let mut buffer = BumpVec::with_capacity_in(buffer_size, bump);

-        buffer.clear();
-        buffer.extend_from_slice(word_bytes);
-        buffer.push(0);
-        buffer.extend_from_slice(&field_id.to_be_bytes());
-        self.word_fid_docids.insert_del_u32(&buffer, docid)?;
+        if field_db_extraction == FieldDbExtraction::Extract {
+            buffer.clear();
+            buffer.extend_from_slice(word_bytes);
+            buffer.push(0);
+            buffer.extend_from_slice(&field_id.to_be_bytes());
+            self.word_fid_docids.insert_del_u32(&buffer, docid)?;
+        }

        let position = bucketed_position(position);
        buffer.clear();
@@ -128,10 +157,12 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
            self.flush_fid_word_count(&mut buffer)?;
        }

-        self.fid_word_count
-            .entry(field_id)
-            .and_modify(|(current_count, _new_count)| *current_count.get_or_insert(0) += 1)
-            .or_insert((Some(1), None));
+        if field_db_extraction == FieldDbExtraction::Extract {
+            self.fid_word_count
+                .entry(field_id)
+                .and_modify(|(current_count, _new_count)| *current_count.get_or_insert(0) += 1)
+                .or_insert((Some(1), None));
+        }

        self.current_docid = Some(docid);

@@ -325,6 +356,24 @@ impl WordDocidsExtractors {
            exact_attributes.iter().any(|attr| contained_in(fname, attr))
                || disabled_typos_terms.is_exact(word)
        };
+
+        let mut should_tokenize = |field_name: &str| {
+            let Some((field_id, meta)) = new_fields_ids_map.id_with_metadata_or_insert(field_name)
+            else {
+                return Err(UserError::AttributeLimitReached.into());
+            };
+
+            let pattern_match = if meta.is_searchable() {
+                PatternMatch::Match
+            } else {
+                // TODO: should be a match on the field_name using `match_field_legacy` function,
+                //       but for legacy reasons we iterate over all the fields to fill the field_id_map.
+                PatternMatch::Parent
+            };
+
+            Ok((field_id, pattern_match))
+        };
+
        match document_change {
            DocumentChange::Deletion(inner) => {
                let mut token_fn = |fname: &str, fid, pos, word: &str| {
@@ -333,13 +382,14 @@ impl WordDocidsExtractors {
                        pos,
                        word,
                        is_exact(fname, word),
+                        FieldDbExtraction::Extract,
                        inner.docid(),
                        doc_alloc,
                    )
                };
                document_tokenizer.tokenize_document(
                    inner.current(rtxn, index, context.db_fields_ids_map)?,
-                    new_fields_ids_map,
+                    &mut should_tokenize,
                    &mut token_fn,
                )?;
            }
@@ -361,13 +411,14 @@ impl WordDocidsExtractors {
                        pos,
                        word,
                        is_exact(fname, word),
+                        FieldDbExtraction::Extract,
                        inner.docid(),
                        doc_alloc,
                    )
                };
                document_tokenizer.tokenize_document(
                    inner.current(rtxn, index, context.db_fields_ids_map)?,
-                    new_fields_ids_map,
+                    &mut should_tokenize,
                    &mut token_fn,
                )?;

@@ -377,13 +428,14 @@ impl WordDocidsExtractors {
                        pos,
                        word,
                        is_exact(fname, word),
+                        FieldDbExtraction::Extract,
                        inner.docid(),
                        doc_alloc,
                    )
                };
                document_tokenizer.tokenize_document(
                    inner.merged(rtxn, index, context.db_fields_ids_map)?,
-                    new_fields_ids_map,
+                    &mut should_tokenize,
                    &mut token_fn,
                )?;
            }
@@ -394,13 +446,14 @@ impl WordDocidsExtractors {
                        pos,
                        word,
                        is_exact(fname, word),
+                        FieldDbExtraction::Extract,
                        inner.docid(),
                        doc_alloc,
                    )
                };
                document_tokenizer.tokenize_document(
                    inner.inserted(),
-                    new_fields_ids_map,
+                    &mut should_tokenize,
                    &mut token_fn,
                )?;
            }
@@ -411,3 +464,300 @@ impl WordDocidsExtractors {
        cached_sorter.flush_fid_word_count(&mut buffer)
    }
 }
+
+pub struct WordDocidsSettingsExtractorsData<'a, SD> {
+    tokenizer: DocumentTokenizer<'a>,
+    max_memory_by_thread: Option<usize>,
+    buckets: usize,
+    settings_delta: &'a SD,
+}
+
+impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
+    for WordDocidsSettingsExtractorsData<'_, SD>
+{
+    type Data = RefCell<Option<WordDocidsBalancedCaches<'extractor>>>;
+
+    fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> crate::Result<Self::Data> {
+        Ok(RefCell::new(Some(WordDocidsBalancedCaches::new_in(
+            self.buckets,
+            self.max_memory_by_thread,
+            extractor_alloc,
+        ))))
+    }
+
+    fn process<'doc>(
+        &'doc self,
+        documents: impl Iterator<Item = crate::Result<DocumentIdentifiers<'doc>>>,
+        context: &'doc DocumentContext<Self::Data>,
+    ) -> crate::Result<()> {
+        for document in documents {
+            let document = document?;
+            SettingsChangeWordDocidsExtractors::extract_settings_change(
+                document,
+                context,
+                &self.tokenizer,
+                self.settings_delta,
+            )?;
+        }
+        Ok(())
+    }
+}
+
+pub struct SettingsChangeWordDocidsExtractors;
+
+impl SettingsChangeWordDocidsExtractors {
+    pub fn run_extraction<'fid, 'indexer, 'index, 'extractor, SD, MSP>(
+        settings_delta: &SD,
+        documents: &'indexer DocumentsIndentifiers<'indexer>,
+        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
+        extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
+        step: IndexingStep,
+    ) -> Result<WordDocidsCaches<'extractor>>
+    where
+        SD: SettingsDelta + Sync,
+        MSP: Fn() -> bool + Sync,
+    {
+        // Warning: this is duplicated code from extract_word_pair_proximity_docids.rs
+        // TODO we need to read the new AND old settings to support changing global parameters
+        let rtxn = indexing_context.index.read_txn()?;
+        let stop_words = indexing_context.index.stop_words(&rtxn)?;
+        let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?;
+        let allowed_separators: Option<Vec<_>> =
+            allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
+        let dictionary = indexing_context.index.dictionary(&rtxn)?;
+        let dictionary: Option<Vec<_>> =
+            dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
+        let mut builder = tokenizer_builder(
+            stop_words.as_ref(),
+            allowed_separators.as_deref(),
+            dictionary.as_deref(),
+        );
+        let tokenizer = builder.build();
+        let localized_attributes_rules =
+            indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
+        let document_tokenizer = DocumentTokenizer {
+            tokenizer: &tokenizer,
+            localized_attributes_rules: &localized_attributes_rules,
+            max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
+        };
+        let extractor_data = WordDocidsSettingsExtractorsData {
+            tokenizer: document_tokenizer,
+            max_memory_by_thread: indexing_context.grenad_parameters.max_memory_by_thread(),
+            buckets: rayon::current_num_threads(),
+            settings_delta,
+        };
+        let datastore = ThreadLocal::new();
+        {
+            let span = tracing::debug_span!(target: "indexing::documents::extract", "vectors");
+            let _entered = span.enter();
+
+            settings_change_extract(
+                documents,
+                &extractor_data,
+                indexing_context,
+                extractor_allocs,
+                &datastore,
+                step,
+            )?;
+        }
+
+        let mut merger = WordDocidsCaches::new();
+        for cache in datastore.into_iter().flat_map(RefCell::into_inner) {
+            merger.push(cache)?;
+        }
+
+        Ok(merger)
+    }
+
+    // TODO find a better name (extract_document_change?)
+    //      and document this method.
+    fn extract_settings_change<SD: SettingsDelta>(
+        document: DocumentIdentifiers<'_>,
+        context: &DocumentContext<RefCell<Option<WordDocidsBalancedCaches>>>,
+        document_tokenizer: &DocumentTokenizer,
+        settings_delta: &SD,
+    ) -> Result<()> {
+        let mut cached_sorter_ref = context.data.borrow_mut_or_yield();
+        let cached_sorter = cached_sorter_ref.as_mut().unwrap();
+        let doc_alloc = &context.doc_alloc;
+
+        let new_fields_ids_map = settings_delta.new_fields_ids_map();
+        let old_fields_ids_map = context.index.fields_ids_map_with_metadata(&context.rtxn)?;
+        let old_searchable = settings_delta.old_searchable_attributes().as_ref();
+        let new_searchable = settings_delta.new_searchable_attributes().as_ref();
+
+        let current_document = document.current(
+            &context.rtxn,
+            context.index,
+            old_fields_ids_map.as_fields_ids_map(),
+        )?;
+
+        #[derive(Debug, Clone, Copy, PartialEq)]
+        enum ActionToOperate {
+            ReindexAllFields,
+            // TODO improve by listing field prefixes
+            IndexAddedFields,
+            SkipDocument,
+        }
+
+        let mut action = ActionToOperate::SkipDocument;
+        // Here we do a preliminary check to determine the action to take.
+        // This check doesn't trigger the tokenizer as we never return
+        // PatternMatch::Match.
+        document_tokenizer.tokenize_document(
+            current_document,
+            &mut |field_name| {
+                let fid = new_fields_ids_map.id(field_name).expect("All fields IDs must exist");
+
+                if action == ActionToOperate::ReindexAllFields {
+                    return Ok((fid, PatternMatch::NoMatch));
+                }
+
+                let old_field_metadata = old_fields_ids_map.metadata(fid).unwrap();
+                let new_field_metadata = new_fields_ids_map.metadata(fid).unwrap();
+
+                action = match (old_field_metadata, new_field_metadata) {
+                    (Metadata { exact: old_exact, .. }, Metadata { exact: new_exact, .. })
+                        if old_exact != new_exact =>
+                    {
+                        ActionToOperate::ReindexAllFields
+                    }
+                    (Metadata { searchable: Some(_), .. }, Metadata { searchable: None, .. }) => {
+                        ActionToOperate::ReindexAllFields
+                    }
+                    (Metadata { searchable: None, .. }, Metadata { searchable: Some(_), .. }) => {
+                        ActionToOperate::IndexAddedFields
+                    }
+                    _ => action,
+                };
+
+                Ok((fid, PatternMatch::Parent))
+            },
+            &mut |_, _, _, _| Ok(()),
+        )?;
+
+        // Early return when we don't need to index the document
+        if action == ActionToOperate::SkipDocument {
+            return Ok(());
+        }
+
+        let mut should_tokenize = |field_name: &str| {
+            let field_id = new_fields_ids_map.id(field_name).expect("All fields IDs must exist");
+            let old_field_metadata = old_fields_ids_map.metadata(field_id).unwrap();
+            let new_field_metadata = new_fields_ids_map.metadata(field_id).unwrap();
+
+            let pattern_match = match action {
+                ActionToOperate::ReindexAllFields => {
+                    if old_field_metadata.is_searchable() || new_field_metadata.is_searchable() {
+                        PatternMatch::Match
+                    // If any old or new field is searchable then we need to iterate over all fields
+                    // else if any field matches we need to iterate over all fields
+                    } else if old_searchable.zip(new_searchable).is_none_or(|(old, new)| {
+                        old.iter().chain(new).any(|attr| {
+                            match_field_legacy(attr, field_name) == PatternMatch::Parent
+                        })
+                    }) {
+                        PatternMatch::Parent
+                    } else {
+                        PatternMatch::NoMatch
+                    }
+                }
+                ActionToOperate::IndexAddedFields => {
+                    let has_searchable_children =
+                        |field_name: &str, searchable: Option<&Vec<String>>| {
+                            searchable.is_none_or(|fields| {
+                                fields.iter().any(|attr| {
+                                    match_field_legacy(attr, field_name) != PatternMatch::Parent
+                                })
+                            })
+                        };
+
+                    // Was not searchable but now is
+                    if !old_field_metadata.is_searchable() && new_field_metadata.is_searchable() {
+                        PatternMatch::Match
+                    // If the field was not a parent of a searchable before and is now
+                    } else if !has_searchable_children(field_name, old_searchable)
+                        && has_searchable_children(field_name, new_searchable)
+                    {
+                        PatternMatch::Parent
+                    } else {
+                        PatternMatch::NoMatch
+                    }
+                }
+                ActionToOperate::SkipDocument => unreachable!(),
+            };
+
+            Ok((field_id, pattern_match))
+        };
+
+        let old_disabled_typos_terms = settings_delta.old_disabled_typos_terms();
+        let new_disabled_typos_terms = settings_delta.new_disabled_typos_terms();
+        let mut token_fn = |_field_name: &str, field_id, pos, word: &str| {
+            let old_field_metadata = old_fields_ids_map.metadata(field_id).unwrap();
+            let new_field_metadata = new_fields_ids_map.metadata(field_id).unwrap();
+
+            match (old_field_metadata, new_field_metadata) {
+                (
+                    Metadata { searchable: Some(_), exact: old_exact, .. },
+                    Metadata { searchable: None, .. },
+                ) => cached_sorter.insert_del_u32(
+                    field_id,
+                    pos,
+                    word,
+                    old_exact || old_disabled_typos_terms.is_exact(word),
+                    // We deleted the field globally
+                    FieldDbExtraction::Skip,
+                    document.docid(),
+                    doc_alloc,
+                ),
+                (
+                    Metadata { searchable: None, .. },
+                    Metadata { searchable: Some(_), exact: new_exact, .. },
+                ) => cached_sorter.insert_add_u32(
+                    field_id,
+                    pos,
+                    word,
+                    new_exact || new_disabled_typos_terms.is_exact(word),
+                    FieldDbExtraction::Extract,
+                    document.docid(),
+                    doc_alloc,
+                ),
+                (Metadata { searchable: None, .. }, Metadata { searchable: None, .. }) => {
+                    unreachable!()
+                }
+                (Metadata { exact: old_exact, .. }, Metadata { exact: new_exact, .. }) => {
+                    cached_sorter.insert_del_u32(
+                        field_id,
+                        pos,
+                        word,
+                        old_exact || old_disabled_typos_terms.is_exact(word),
+                        // The field has already been extracted
+                        FieldDbExtraction::Skip,
+                        document.docid(),
+                        doc_alloc,
+                    )?;
+                    cached_sorter.insert_add_u32(
+                        field_id,
+                        pos,
+                        word,
+                        new_exact || new_disabled_typos_terms.is_exact(word),
+                        // The field has already been extracted
+                        FieldDbExtraction::Skip,
+                        document.docid(),
+                        doc_alloc,
+                    )
+                }
+            }
+        };
+
+        // TODO we must tokenize twice when we change global parameters like stop words,
+        //      the language settings, dictionary, separators, non-separators...
+        document_tokenizer.tokenize_document(
+            current_document,
+            &mut should_tokenize,
+            &mut token_fn,
+        )?;
+
+        Ok(())
+    }
+}
--- a/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs
+++ b/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs
@@ -16,7 +16,9 @@ use crate::update::new::ref_cell_ext::RefCellExt as _;
 use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::{FullySend, ThreadLocal};
 use crate::update::new::DocumentChange;
-use crate::{FieldId, GlobalFieldsIdsMap, Result, MAX_POSITION_PER_ATTRIBUTE};
+use crate::{
+    FieldId, GlobalFieldsIdsMap, PatternMatch, Result, UserError, MAX_POSITION_PER_ATTRIBUTE,
+};

 pub struct WordPairProximityDocidsExtractorData<'a> {
    tokenizer: DocumentTokenizer<'a>,
@@ -279,7 +281,24 @@ fn process_document_tokens<'doc>(
        word_positions.push_back((Rc::from(word), pos));
        Ok(())
    };
-    document_tokenizer.tokenize_document(document, fields_ids_map, &mut token_fn)?;
+
+    let mut should_tokenize = |field_name: &str| {
+        let Some((field_id, meta)) = fields_ids_map.id_with_metadata_or_insert(field_name) else {
+            return Err(UserError::AttributeLimitReached.into());
+        };
+
+        let pattern_match = if meta.is_searchable() {
+            PatternMatch::Match
+        } else {
+            // TODO: should be a match on the field_name using `match_field_legacy` function,
+            //       but for legacy reasons we iterate over all the fields to fill the field_id_map.
+            PatternMatch::Parent
+        };
+
+        Ok((field_id, pattern_match))
+    };
+
+    document_tokenizer.tokenize_document(document, &mut should_tokenize, &mut token_fn)?;

    drain_word_positions(word_positions, word_pair_proximity);
    Ok(())
--- a/crates/milli/src/update/new/extract/searchable/mod.rs
+++ b/crates/milli/src/update/new/extract/searchable/mod.rs
@@ -2,6 +2,7 @@ mod extract_word_docids;
 mod extract_word_pair_proximity_docids;
 mod tokenize_document;

+pub use extract_word_docids::SettingsChangeWordDocidsExtractors;
 pub use extract_word_docids::{WordDocidsCaches, WordDocidsExtractors};
 pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor;

--- a/crates/milli/src/update/new/extract/searchable/tokenize_document.rs
+++ b/crates/milli/src/update/new/extract/searchable/tokenize_document.rs
@@ -8,10 +8,7 @@ use crate::update::new::document::Document;
 use crate::update::new::extract::perm_json_p::{
    seek_leaf_values_in_array, seek_leaf_values_in_object, Depth,
 };
-use crate::{
-    FieldId, GlobalFieldsIdsMap, InternalError, LocalizedAttributesRule, Result, UserError,
-    MAX_WORD_LENGTH,
-};
+use crate::{FieldId, InternalError, LocalizedAttributesRule, Result, MAX_WORD_LENGTH};

 // todo: should be crate::proximity::MAX_DISTANCE but it has been forgotten
 const MAX_DISTANCE: u32 = 8;
@@ -26,22 +23,16 @@ impl DocumentTokenizer<'_> {
    pub fn tokenize_document<'doc>(
        &self,
        document: impl Document<'doc>,
-        field_id_map: &mut GlobalFieldsIdsMap,
+        should_tokenize: &mut impl FnMut(&str) -> Result<(FieldId, PatternMatch)>,
        token_fn: &mut impl FnMut(&str, FieldId, u16, &str) -> Result<()>,
    ) -> Result<()> {
        let mut field_position = HashMap::new();
        let mut tokenize_field = |field_name: &str, _depth, value: &Value| {
-            let Some((field_id, meta)) = field_id_map.id_with_metadata_or_insert(field_name) else {
-                return Err(UserError::AttributeLimitReached.into());
-            };
-
-            if meta.is_searchable() {
+            let (field_id, pattern_match) = should_tokenize(field_name)?;
+            if pattern_match == PatternMatch::Match {
                self.tokenize_field(field_id, field_name, value, token_fn, &mut field_position)?;
            }
-
-            // todo: should be a match on the field_name using `match_field_legacy` function,
-            // but for legacy reasons we iterate over all the fields to fill the field_id_map.
-            Ok(PatternMatch::Match)
+            Ok(pattern_match)
        };

        for entry in document.iter_top_level_fields() {
@@ -192,7 +183,7 @@ mod test {
    use super::*;
    use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
    use crate::update::new::document::{DocumentFromVersions, Versions};
-    use crate::FieldsIdsMap;
+    use crate::{FieldsIdsMap, GlobalFieldsIdsMap, UserError};

    #[test]
    fn test_tokenize_document() {
@@ -231,6 +222,7 @@ mod test {
                Default::default(),
                Default::default(),
                Default::default(),
+                Default::default(),
                None,
                None,
                Default::default(),
@@ -251,15 +243,19 @@ mod test {
        let document = Versions::single(document);
        let document = DocumentFromVersions::new(&document);

+        let mut should_tokenize = |field_name: &str| {
+            let Some(field_id) = global_fields_ids_map.id_or_insert(field_name) else {
+                return Err(UserError::AttributeLimitReached.into());
+            };
+
+            Ok((field_id, PatternMatch::Match))
+        };
+
        document_tokenizer
-            .tokenize_document(
-                document,
-                &mut global_fields_ids_map,
-                &mut |_fname, fid, pos, word| {
-                    words.insert([fid, pos], word.to_string());
-                    Ok(())
-                },
-            )
+            .tokenize_document(document, &mut should_tokenize, &mut |_fname, fid, pos, word| {
+                words.insert([fid, pos], word.to_string());
+                Ok(())
+            })
            .unwrap();

        snapshot!(format!("{:#?}", words), @r###"
--- a/crates/milli/src/update/new/extract/vectors/mod.rs
+++ b/crates/milli/src/update/new/extract/vectors/mod.rs
@@ -1,5 +1,6 @@
 use std::cell::RefCell;
 use std::fmt::Debug;
+use std::sync::RwLock;

 use bumpalo::collections::Vec as BVec;
 use bumpalo::Bump;
@@ -27,7 +28,10 @@ use crate::vector::extractor::{
 use crate::vector::session::{EmbedSession, Input, Metadata, OnEmbed};
 use crate::vector::settings::ReindexAction;
 use crate::vector::{Embedding, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment};
-use crate::{DocumentId, FieldDistribution, InternalError, Result, ThreadPoolNoAbort, UserError};
+use crate::{
+    DocumentId, FieldDistribution, GlobalFieldsIdsMap, InternalError, Result, ThreadPoolNoAbort,
+    UserError,
+};

 pub struct EmbeddingExtractor<'a, 'b> {
    embedders: &'a RuntimeEmbedders,
@@ -321,6 +325,15 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
        let old_embedders = self.settings_delta.old_embedders();
        let unused_vectors_distribution = UnusedVectorsDistributionBump::new_in(&context.doc_alloc);

+        // We get a reference to the new and old fields ids maps but
+        // note that those are local versions where updates to them
+        // will not be reflected in the database. It's not an issue
+        // because new settings do not generate new fields.
+        let new_fields_ids_map = RwLock::new(self.settings_delta.new_fields_ids_map().clone());
+        let new_fields_ids_map = RefCell::new(GlobalFieldsIdsMap::new(&new_fields_ids_map));
+        let old_fields_ids_map = RwLock::new(self.settings_delta.old_fields_ids_map().clone());
+        let old_fields_ids_map = RefCell::new(GlobalFieldsIdsMap::new(&old_fields_ids_map));
+
        let mut all_chunks = BVec::with_capacity_in(embedders.len(), &context.doc_alloc);
        let embedder_configs = context.index.embedding_configs();
        for (embedder_name, action) in self.settings_delta.embedder_actions().iter() {
@@ -396,6 +409,7 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
                        if !must_regenerate {
                            continue;
                        }
+
                        // we need to regenerate the prompts for the document
                        chunks.settings_change_autogenerated(
                            document.docid(),
@@ -406,7 +420,8 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
                                context.db_fields_ids_map,
                            )?,
                            self.settings_delta,
-                            context.new_fields_ids_map,
+                            &old_fields_ids_map,
+                            &new_fields_ids_map,
                            &unused_vectors_distribution,
                            old_is_user_provided,
                            fragments_changed,
@@ -442,7 +457,8 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
                                    context.db_fields_ids_map,
                                )?,
                                self.settings_delta,
-                                context.new_fields_ids_map,
+                                &old_fields_ids_map,
+                                &new_fields_ids_map,
                                &unused_vectors_distribution,
                                old_is_user_provided,
                                true,
@@ -638,7 +654,8 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
        external_docid: &'a str,
        document: D,
        settings_delta: &SD,
-        fields_ids_map: &'a RefCell<crate::GlobalFieldsIdsMap>,
+        old_fields_ids_map: &'a RefCell<GlobalFieldsIdsMap<'a>>,
+        new_fields_ids_map: &'a RefCell<GlobalFieldsIdsMap<'a>>,
        unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>,
        old_is_user_provided: bool,
        full_reindex: bool,
@@ -733,10 +750,17 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
                    old_embedder.as_ref().map(|old_embedder| &old_embedder.document_template)
                };

-                let extractor =
-                    DocumentTemplateExtractor::new(document_template, doc_alloc, fields_ids_map);
+                let extractor = DocumentTemplateExtractor::new(
+                    document_template,
+                    doc_alloc,
+                    new_fields_ids_map,
+                );
                let old_extractor = old_document_template.map(|old_document_template| {
-                    DocumentTemplateExtractor::new(old_document_template, doc_alloc, fields_ids_map)
+                    DocumentTemplateExtractor::new(
+                        old_document_template,
+                        doc_alloc,
+                        old_fields_ids_map,
+                    )
                });
                let metadata =
                    Metadata { docid, external_docid, extractor_id: extractor.extractor_id() };
--- a/crates/milli/src/update/new/indexer/extract.rs
+++ b/crates/milli/src/update/new/indexer/extract.rs
@@ -372,11 +372,10 @@ where
    SD: SettingsDelta + Sync,
 {
    // Create the list of document ids to extract
-    let rtxn = indexing_context.index.read_txn()?;
-    let all_document_ids =
-        indexing_context.index.documents_ids(&rtxn)?.into_iter().collect::<Vec<_>>();
-    let primary_key =
-        primary_key_from_db(indexing_context.index, &rtxn, &indexing_context.db_fields_ids_map)?;
+    let index = indexing_context.index;
+    let rtxn = index.read_txn()?;
+    let all_document_ids = index.documents_ids(&rtxn)?.into_iter().collect::<Vec<_>>();
+    let primary_key = primary_key_from_db(index, &rtxn, &indexing_context.db_fields_ids_map)?;
    let documents = DocumentsIndentifiers::new(&all_document_ids, primary_key);

    let span =
@@ -391,6 +390,102 @@ where
        extractor_allocs,
    )?;

+    '_word_docids: {
+        let WordDocidsCaches {
+            word_docids,
+            word_fid_docids,
+            exact_word_docids,
+            word_position_docids,
+            fid_word_count_docids,
+        } = {
+            let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
+            let _entered = span.enter();
+            SettingsChangeWordDocidsExtractors::run_extraction(
+                settings_delta,
+                &documents,
+                indexing_context,
+                extractor_allocs,
+                IndexingStep::ExtractingWords,
+            )?
+        };
+
+        indexing_context.progress.update_progress(IndexingStep::MergingWordCaches);
+
+        {
+            let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
+            let _entered = span.enter();
+            indexing_context.progress.update_progress(MergingWordCache::WordDocids);
+
+            merge_and_send_docids(
+                word_docids,
+                index.word_docids.remap_types(),
+                index,
+                extractor_sender.docids::<WordDocids>(),
+                &indexing_context.must_stop_processing,
+            )?;
+        }
+
+        {
+            let span =
+                tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
+            let _entered = span.enter();
+            indexing_context.progress.update_progress(MergingWordCache::WordFieldIdDocids);
+
+            merge_and_send_docids(
+                word_fid_docids,
+                index.word_fid_docids.remap_types(),
+                index,
+                extractor_sender.docids::<WordFidDocids>(),
+                &indexing_context.must_stop_processing,
+            )?;
+        }
+
+        {
+            let span =
+                tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
+            let _entered = span.enter();
+            indexing_context.progress.update_progress(MergingWordCache::ExactWordDocids);
+
+            merge_and_send_docids(
+                exact_word_docids,
+                index.exact_word_docids.remap_types(),
+                index,
+                extractor_sender.docids::<ExactWordDocids>(),
+                &indexing_context.must_stop_processing,
+            )?;
+        }
+
+        {
+            let span =
+                tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
+            let _entered = span.enter();
+            indexing_context.progress.update_progress(MergingWordCache::WordPositionDocids);
+
+            merge_and_send_docids(
+                word_position_docids,
+                index.word_position_docids.remap_types(),
+                index,
+                extractor_sender.docids::<WordPositionDocids>(),
+                &indexing_context.must_stop_processing,
+            )?;
+        }
+
+        {
+            let span =
+                tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
+            let _entered = span.enter();
+            indexing_context.progress.update_progress(MergingWordCache::FieldIdWordCountDocids);
+
+            merge_and_send_docids(
+                fid_word_count_docids,
+                index.field_id_word_count_docids.remap_types(),
+                index,
+                extractor_sender.docids::<FidWordCountDocids>(),
+                &indexing_context.must_stop_processing,
+            )?;
+        }
+    };
+
    'vectors: {
        if settings_delta.embedder_actions().is_empty() {
            break 'vectors;
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -1,4 +1,4 @@
-use std::collections::BTreeMap;
+use std::collections::{BTreeMap, BTreeSet};
 use std::sync::atomic::AtomicBool;
 use std::sync::{Arc, Once, RwLock};
 use std::thread::{self, Builder};
@@ -8,9 +8,11 @@ use document_changes::{DocumentChanges, IndexingContext};
 pub use document_deletion::DocumentDeletion;
 pub use document_operation::{DocumentOperation, PayloadStats};
 use hashbrown::HashMap;
+use heed::types::DecodeIgnore;
 use heed::{RoTxn, RwTxn};
 pub use partial_dump::PartialDump;
 pub use post_processing::recompute_word_fst_from_word_docids_database;
+pub use settings_changes::settings_change_extract;
 pub use update_by_function::UpdateByFunction;
 pub use write::ChannelCongestion;
 use write::{build_vectors, update_index, write_to_db};
@@ -21,11 +23,15 @@ use super::thread_local::ThreadLocal;
 use crate::documents::PrimaryKey;
 use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
 use crate::progress::{EmbedderStats, Progress};
+use crate::update::new::steps::SettingsIndexerStep;
+use crate::update::new::FacetFieldIdsDelta;
 use crate::update::settings::SettingsDelta;
 use crate::update::GrenadParameters;
 use crate::vector::settings::{EmbedderAction, RemoveFragments, WriteBackToDocuments};
 use crate::vector::{Embedder, RuntimeEmbedders, VectorStore};
-use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort};
+use crate::{
+    Error, FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort,
+};

 pub(crate) mod de;
 pub mod document_changes;
@@ -235,6 +241,12 @@ where
    SD: SettingsDelta + Sync,
 {
    delete_old_embedders_and_fragments(wtxn, index, settings_delta)?;
+    delete_old_fid_based_databases(wtxn, index, settings_delta, must_stop_processing, progress)?;
+
+    // TODO delete useless searchable databases
+    //      - Clear word_pair_proximity if byWord to byAttribute
+    //      - Clear fid_prefix_* in the post processing
+    //      - clear the prefix + fid_prefix if setting `PrefixSearch` is enabled

    let mut bbbuffers = Vec::new();
    let finished_extraction = AtomicBool::new(false);
@@ -293,6 +305,8 @@ where
                .unwrap()
            })?;

+        let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map);
+
        let new_embedders = settings_delta.new_embedders();
        let embedder_actions = settings_delta.embedder_actions();
        let index_embedder_category_ids = settings_delta.new_embedder_category_id();
@@ -327,6 +341,18 @@ where
        })
        .unwrap()?;

+        pool.install(|| {
+            // WARN When implementing the facets don't forget this
+            let facet_field_ids_delta = FacetFieldIdsDelta::new(0, 0);
+            post_processing::post_process(
+                indexing_context,
+                wtxn,
+                global_fields_ids_map,
+                facet_field_ids_delta,
+            )
+        })
+        .unwrap()?;
+
        indexing_context.progress.update_progress(IndexingStep::BuildingGeoJson);
        index.cellulite.build(
            wtxn,
@@ -456,6 +482,98 @@ where
    Ok(())
 }

+/// Deletes entries refering the provided
+/// fids from the fid-based databases.
+fn delete_old_fid_based_databases<SD, MSP>(
+    wtxn: &mut RwTxn<'_>,
+    index: &Index,
+    settings_delta: &SD,
+    must_stop_processing: &MSP,
+    progress: &Progress,
+) -> Result<()>
+where
+    SD: SettingsDelta + Sync,
+    MSP: Fn() -> bool + Sync,
+{
+    let fids_to_delete: Option<BTreeSet<_>> = {
+        let rtxn = index.read_txn()?;
+        let fields_ids_map = index.fields_ids_map(&rtxn)?;
+        let old_searchable_attributes = settings_delta.old_searchable_attributes().as_ref();
+        let new_searchable_attributes = settings_delta.new_searchable_attributes().as_ref();
+        old_searchable_attributes.zip(new_searchable_attributes).map(|(old, new)| {
+            old.iter()
+                // Ignore the field if it is not searchable anymore
+                // or if it was never referenced in any document
+                .filter_map(|name| if new.contains(name) { None } else { fields_ids_map.id(name) })
+                .collect()
+        })
+    };
+
+    let fids_to_delete = match fids_to_delete {
+        Some(fids) => fids,
+        None => return Ok(()),
+    };
+
+    progress.update_progress(SettingsIndexerStep::DeletingOldWordFidDocids);
+    delete_old_word_fid_docids(wtxn, index, must_stop_processing, &fids_to_delete)?;
+
+    progress.update_progress(SettingsIndexerStep::DeletingOldFidWordCountDocids);
+    delete_old_fid_word_count_docids(wtxn, index, must_stop_processing, fids_to_delete)?;
+
+    Ok(())
+}
+
+fn delete_old_word_fid_docids<MSP>(
+    wtxn: &mut RwTxn<'_>,
+    index: &Index,
+    must_stop_processing: &MSP,
+    fids_to_delete: &BTreeSet<u16>,
+) -> Result<(), Error>
+where
+    MSP: Fn() -> bool + Sync,
+{
+    let mut iter = index.word_fid_docids.iter_mut(wtxn)?.remap_data_type::<DecodeIgnore>();
+    while let Some(((_word, fid), ())) = iter.next().transpose()? {
+        // TODO should I call it that often?
+        if must_stop_processing() {
+            return Err(Error::InternalError(InternalError::AbortedIndexation));
+        }
+
+        if fids_to_delete.contains(&fid) {
+            // safety: We don't keep any references to the data.
+            unsafe { iter.del_current()? };
+        }
+    }
+
+    Ok(())
+}
+
+fn delete_old_fid_word_count_docids<MSP>(
+    wtxn: &mut RwTxn<'_>,
+    index: &Index,
+    must_stop_processing: &MSP,
+    fids_to_delete: BTreeSet<u16>,
+) -> Result<(), Error>
+where
+    MSP: Fn() -> bool + Sync,
+{
+    let db = index.field_id_word_count_docids.remap_data_type::<DecodeIgnore>();
+    for fid_to_delete in fids_to_delete {
+        if must_stop_processing() {
+            return Err(Error::InternalError(InternalError::AbortedIndexation));
+        }
+
+        let mut iter = db.prefix_iter_mut(wtxn, &(fid_to_delete, 0))?;
+        while let Some(((fid, _word_count), ())) = iter.next().transpose()? {
+            debug_assert_eq!(fid, fid_to_delete);
+            // safety: We don't keep any references to the data.
+            unsafe { iter.del_current()? };
+        }
+    }
+
+    Ok(())
+}
+
 fn indexer_memory_settings(
    current_num_threads: usize,
    grenad_parameters: GrenadParameters,
--- a/crates/milli/src/update/new/steps.rs
+++ b/crates/milli/src/update/new/steps.rs
@@ -28,6 +28,8 @@ make_enum_progress! {
        ChangingVectorStore,
        UsingStableIndexer,
        UsingExperimentalIndexer,
+        DeletingOldWordFidDocids,
+        DeletingOldFidWordCountDocids,
    }
 }

--- a/crates/milli/src/update/settings.rs
+++ b/crates/milli/src/update/settings.rs
@@ -1589,33 +1589,33 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {

        // only use the new indexer when only the embedder possibly changed
        if let Self {
-            searchable_fields: Setting::NotSet,
+            searchable_fields: _,
            displayed_fields: Setting::NotSet,
            filterable_fields: Setting::NotSet,
            sortable_fields: Setting::NotSet,
            criteria: Setting::NotSet,
-            stop_words: Setting::NotSet,
-            non_separator_tokens: Setting::NotSet,
-            separator_tokens: Setting::NotSet,
-            dictionary: Setting::NotSet,
+            stop_words: Setting::NotSet, // TODO (require force reindexing of searchables)
+            non_separator_tokens: Setting::NotSet, // TODO (require force reindexing of searchables)
+            separator_tokens: Setting::NotSet, // TODO (require force reindexing of searchables)
+            dictionary: Setting::NotSet, // TODO (require force reindexing of searchables)
            distinct_field: Setting::NotSet,
            synonyms: Setting::NotSet,
            primary_key: Setting::NotSet,
            authorize_typos: Setting::NotSet,
            min_word_len_two_typos: Setting::NotSet,
            min_word_len_one_typo: Setting::NotSet,
-            exact_words: Setting::NotSet,
-            exact_attributes: Setting::NotSet,
+            exact_words: Setting::NotSet, // TODO (require force reindexing of searchables)
+            exact_attributes: _,
            max_values_per_facet: Setting::NotSet,
            sort_facet_values_by: Setting::NotSet,
            pagination_max_total_hits: Setting::NotSet,
            proximity_precision: Setting::NotSet,
            embedder_settings: _,
            search_cutoff: Setting::NotSet,
-            localized_attributes_rules: Setting::NotSet,
-            prefix_search: Setting::NotSet,
+            localized_attributes_rules: Setting::NotSet, // TODO to start with
+            prefix_search: Setting::NotSet,              // TODO continue with this
            facet_search: Setting::NotSet,
-            disable_on_numbers: Setting::NotSet,
+            disable_on_numbers: Setting::NotSet, // TODO (require force reindexing of searchables)
            chat: Setting::NotSet,
            vector_store: Setting::NotSet,
            wtxn: _,
@@ -1632,10 +1632,11 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
            // Update index settings
            let embedding_config_updates = self.update_embedding_configs()?;
            self.update_user_defined_searchable_attributes()?;
+            self.update_exact_attributes()?;

-            let mut new_inner_settings =
-                InnerIndexSettings::from_index(self.index, self.wtxn, None)?;
-            new_inner_settings.recompute_searchables(self.wtxn, self.index)?;
+            // Note that we don't need to update the searchables here,
+            // as it will be done after the settings update.
+            let new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn, None)?;

            let primary_key_id = self
                .index
@@ -2062,9 +2063,12 @@ impl InnerIndexSettings {
        let sortable_fields = index.sortable_fields(rtxn)?;
        let asc_desc_fields = index.asc_desc_fields(rtxn)?;
        let distinct_field = index.distinct_field(rtxn)?.map(|f| f.to_string());
-        let user_defined_searchable_attributes = index
-            .user_defined_searchable_fields(rtxn)?
-            .map(|fields| fields.into_iter().map(|f| f.to_string()).collect());
+        let user_defined_searchable_attributes = match index.user_defined_searchable_fields(rtxn)? {
+            Some(fields) if fields.contains(&"*") => None,
+            Some(fields) => Some(fields.into_iter().map(|f| f.to_string()).collect()),
+            None => None,
+        };
+
        let builder = MetadataBuilder::from_index(index, rtxn)?;
        let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder);
        let disabled_typos_terms = index.disabled_typos_terms(rtxn)?;
@@ -2578,8 +2582,17 @@ fn deserialize_sub_embedder(
 /// Implement this trait for the settings delta type.
 /// This is used in the new settings update flow and will allow to easily replace the old settings delta type: `InnerIndexSettingsDiff`.
 pub trait SettingsDelta {
-    fn new_embedders(&self) -> &RuntimeEmbedders;
+    fn old_fields_ids_map(&self) -> &FieldIdMapWithMetadata;
+    fn new_fields_ids_map(&self) -> &FieldIdMapWithMetadata;
+
+    fn old_searchable_attributes(&self) -> &Option<Vec<String>>;
+    fn new_searchable_attributes(&self) -> &Option<Vec<String>>;
+
+    fn old_disabled_typos_terms(&self) -> &DisabledTyposTerms;
+    fn new_disabled_typos_terms(&self) -> &DisabledTyposTerms;
+
    fn old_embedders(&self) -> &RuntimeEmbedders;
+    fn new_embedders(&self) -> &RuntimeEmbedders;
    fn new_embedder_category_id(&self) -> &HashMap<String, u8>;
    fn embedder_actions(&self) -> &BTreeMap<String, EmbedderAction>;
    fn try_for_each_fragment_diff<F, E>(
@@ -2589,7 +2602,6 @@ pub trait SettingsDelta {
    ) -> std::result::Result<(), E>
    where
        F: FnMut(FragmentDiff) -> std::result::Result<(), E>;
-    fn new_fields_ids_map(&self) -> &FieldIdMapWithMetadata;
 }

 pub struct FragmentDiff<'a> {
@@ -2598,26 +2610,40 @@ pub struct FragmentDiff<'a> {
 }

 impl SettingsDelta for InnerIndexSettingsDiff {
-    fn new_embedders(&self) -> &RuntimeEmbedders {
-        &self.new.runtime_embedders
+    fn old_fields_ids_map(&self) -> &FieldIdMapWithMetadata {
+        &self.old.fields_ids_map
+    }
+    fn new_fields_ids_map(&self) -> &FieldIdMapWithMetadata {
+        &self.new.fields_ids_map
+    }
+
+    fn old_searchable_attributes(&self) -> &Option<Vec<String>> {
+        &self.old.user_defined_searchable_attributes
+    }
+    fn new_searchable_attributes(&self) -> &Option<Vec<String>> {
+        &self.new.user_defined_searchable_attributes
+    }
+
+    fn old_disabled_typos_terms(&self) -> &DisabledTyposTerms {
+        &self.old.disabled_typos_terms
+    }
+    fn new_disabled_typos_terms(&self) -> &DisabledTyposTerms {
+        &self.new.disabled_typos_terms
    }

    fn old_embedders(&self) -> &RuntimeEmbedders {
        &self.old.runtime_embedders
    }
+    fn new_embedders(&self) -> &RuntimeEmbedders {
+        &self.new.runtime_embedders
+    }

    fn new_embedder_category_id(&self) -> &HashMap<String, u8> {
        &self.new.embedder_category_id
    }
-
    fn embedder_actions(&self) -> &BTreeMap<String, EmbedderAction> {
        &self.embedding_config_updates
    }
-
-    fn new_fields_ids_map(&self) -> &FieldIdMapWithMetadata {
-        &self.new.fields_ids_map
-    }
-
    fn try_for_each_fragment_diff<F, E>(
        &self,
        embedder_name: &str,
--- a/crates/milli/src/update/test_settings.rs
+++ b/crates/milli/src/update/test_settings.rs
@@ -14,28 +14,21 @@ fn set_and_reset_searchable_fields() {
    let index = TempIndex::new();

    // First we send 3 documents with ids from 1 to 3.
-    let mut wtxn = index.write_txn().unwrap();
-
    index
-        .add_documents_using_wtxn(
-            &mut wtxn,
-            documents!([
-                { "id": 1, "name": "kevin", "age": 23 },
-                { "id": 2, "name": "kevina", "age": 21},
-                { "id": 3, "name": "benoit", "age": 34 }
-            ]),
-        )
+        .add_documents(documents!([
+            { "id": 1, "name": "kevin", "age": 23 },
+            { "id": 2, "name": "kevina", "age": 21},
+            { "id": 3, "name": "benoit", "age": 34 }
+        ]))
        .unwrap();

    // We change the searchable fields to be the "name" field only.
    index
-        .update_settings_using_wtxn(&mut wtxn, |settings| {
+        .update_settings(|settings| {
            settings.set_searchable_fields(vec!["name".into()]);
        })
        .unwrap();

-    wtxn.commit().unwrap();
-
    db_snap!(index, fields_ids_map, @r###"
    0   id               |
    1   name             |
--- a/crates/milli/src/vector/session.rs
+++ b/crates/milli/src/vector/session.rs
@@ -112,13 +112,12 @@ impl<'doc, C: OnEmbed<'doc>, I: Input> EmbedSession<'doc, C, I> {
        rendered: I,
        unused_vectors_distribution: &C::ErrorMetadata,
    ) -> Result<()> {
-        if self.inputs.len() < self.inputs.capacity() {
-            self.inputs.push(rendered);
-            self.metadata.push(metadata);
-            return Ok(());
+        if self.inputs.len() >= self.inputs.capacity() {
+            self.embed_chunks(unused_vectors_distribution)?;
        }
-
-        self.embed_chunks(unused_vectors_distribution)
+        self.inputs.push(rendered);
+        self.metadata.push(metadata);
+        Ok(())
    }

    pub fn drain(mut self, unused_vectors_distribution: &C::ErrorMetadata) -> Result<C> {
Author	SHA1	Message	Date
Clément Renault	32e23ba1b2	Fix a test trying to change settings with a wtxn	2025-11-24 13:41:12 +01:00
Clément Renault	1517a3dd29	Make sure the embedders supports changing searchables	2025-11-24 13:41:11 +01:00
Clément Renault	6d2198d678	Make sure we don't crash on unreferenced fields	2025-11-24 13:41:11 +01:00
Clément Renault	28ae67501c	Make clippy happy	2025-11-24 13:41:11 +01:00
Clément Renault	7ec1152068	Introduce new progress steps when deleting fid-based entries	2025-11-24 13:41:11 +01:00
Clément Renault	6d1a58da84	Delete entries from fid-based databases when searchables are deleted	2025-11-24 13:41:11 +01:00
Clément Renault	5feae8d858	Support exact attributes in the settings delta	2025-11-24 13:41:11 +01:00
Clément Renault	bf175c3ed3	Call the post processing in the new settings indexer	2025-11-24 13:41:11 +01:00
Clément Renault	f3d7595e5f	Support exact attributes in the field metadata	2025-11-24 13:41:11 +01:00
Clément Renault	9a9061267b	Call the new searchable extractor	2025-11-24 13:41:11 +01:00
Clément Renault	92bc52cd60	Introduce the new searchable extractor	2025-11-24 13:41:11 +01:00
Clément Renault	d5511db234	Enable the new settings indexer when the searchable or exact are updates	2025-11-24 13:41:11 +01:00
Clément Renault	cf62af13e8	Merge pull request #6005 from meilisearch/clamp-max-batch-size Clamp max batch size to 10 GiB	2025-11-20 10:45:23 +00:00
Many the fish	91cf94c196	Merge pull request #5999 from meilisearch/fix-document-fetch-sort Fix the Document Fetch pagination bug when Sort is applied	2025-11-20 10:15:04 +00:00
Clément Renault	753ba39199	Update the documentation of the batch size	2025-11-20 10:33:02 +01:00
Clément Renault	3944c25853	Clamp the maximum batch size to maximum 10GiB	2025-11-20 10:29:50 +01:00
ManyTheFish	925bce5fbd	Modify the test to test all the sort branches and fix the untested branch	2025-11-20 10:27:24 +01:00
ManyTheFish	62065ed30d	Fix the pagination bug where the last document of the previous page was duplicated as the first document of the current page. This was due to a bug on the custom nth function of the sort ranking rule skipping `n-1` documents instead of `n`	2025-11-20 10:27:24 +01:00
Clément Renault	97e6ae1957	Merge pull request #5994 from meilisearch/improve-s3-error-messages Improve S3 upload by showing errors in the task queue	2025-11-19 16:58:02 +00:00
Clément Renault	5ed9be0789	Merge pull request #5990 from meilisearch/default-max-batch-size Make the limit batched tasks total size defaults to half of the max indexing memory	2025-11-19 16:56:34 +00:00
Clément Renault	7597b1049f	Merge pull request #6001 from meilisearch/update-windows-macos-ci Update the macOS platform version in the CI	2025-11-19 16:12:52 +00:00
Clément Renault	d99150f21b	Improve error message extraction Co-authored-by: Many the fish <many@meilisearch.com>	2025-11-19 17:09:15 +01:00
Kerollmops	c9726674a0	Make the limit batched tasks total size default to half of max indexing memory	2025-11-19 17:04:45 +01:00
Clément Renault	205f40b3b8	Update the macOS platform version to use version 14	2025-11-19 16:10:41 +01:00
Clément Renault	3d013cdebe	Merge pull request #5995 from meilisearch/fix-embedding-skip Fix embedding skip	2025-11-18 10:02:53 +00:00
Louis Dureuil	ddeff5678f	Clippy happy	2025-11-17 14:48:40 +01:00
Louis Dureuil	a235434910	Add test	2025-11-17 13:52:23 +01:00
Louis Dureuil	a376525348	Do not skip embedding request for the document that exceeds capacity	2025-11-17 13:18:58 +01:00
Kerollmops	361580f451	Display the error message on failure	2025-11-17 09:21:18 +01:00