improve documentation

time for some tests
threading down on_missing_document param
2025-12-15 08:56:56 +00:00 · 2025-12-11 16:30:04 -05:00 · 2025-12-11 16:11:22 -05:00 · 2025-12-11 15:54:11 -05:00 · 2025-12-11 14:31:16 -05:00 · 2025-12-11 14:13:11 -05:00
29 changed files with 519 additions and 1311 deletions
--- a/.github/workflows/publish-release-assets.yml
+++ b/.github/workflows/publish-release-assets.yml
@@ -104,13 +104,13 @@ jobs:
      - name: Generate OpenAPI file
        run: |
          cd crates/openapi-generator
-          cargo run --release -- --pretty --output ../../meilisearch-openapi.json
+          cargo run --release -- --pretty --output ../../meilisearch.json
      - name: Upload OpenAPI to Release
        # No need to upload for dry run (cron or workflow_dispatch)
        if: github.event_name == 'release'
        uses: svenstaro/upload-release-action@2.11.2
        with:
          repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
-          file: ./meilisearch-openapi.json
+          file: ./meilisearch.json
          asset_name: meilisearch-openapi.json
          tag: ${{ github.ref }}
--- a/.gitignore
+++ b/.gitignore
@@ -29,6 +29,3 @@ crates/meilisearch/db.snapshot

 # Fuzzcheck data for the facet indexing fuzz test
 crates/milli/fuzz/update::facet::incremental::fuzz::fuzz/
-
-# OpenAPI generator
-**/meilisearch-openapi.json
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -117,7 +117,7 @@ With swagger:
 With the internal crate:
 ```bash
 cd crates/openapi-generator
-cargo run --release -- --pretty
+cargo run --release -- --pretty --output meilisearch.json
 ```

 ### Logging
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/crates/fuzzers/src/bin/fuzz-indexing.rs
+++ b/crates/fuzzers/src/bin/fuzz-indexing.rs
@@ -113,9 +113,9 @@ fn main() {

                            for op in &operations {
                                match op {
-                                    Either::Left(documents) => {
-                                        indexer.replace_documents(documents).unwrap()
-                                    }
+                                    Either::Left(documents) => indexer
+                                        .replace_documents(documents, Default::default())
+                                        .unwrap(),
                                    Either::Right(ids) => indexer.delete_documents(ids),
                                }
                            }
--- a/crates/index-scheduler/src/dump.rs
+++ b/crates/index-scheduler/src/dump.rs
@@ -164,6 +164,7 @@ impl<'a> Dump<'a> {
                    content_file: content_uuid.ok_or(Error::CorruptedDump)?,
                    documents_count,
                    allow_index_creation,
+                    on_missing_document: Default::default(),
                },
                KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion {
                    documents_ids,
--- a/crates/index-scheduler/src/scheduler/autobatcher_test.rs
+++ b/crates/index-scheduler/src/scheduler/autobatcher_test.rs
@@ -40,6 +40,7 @@ fn doc_imp(
        content_file: Uuid::new_v4(),
        documents_count: 0,
        allow_index_creation,
+        on_missing_document: Default::default(),
    }
 }

--- a/crates/index-scheduler/src/scheduler/create_batch.rs
+++ b/crates/index-scheduler/src/scheduler/create_batch.rs
@@ -2,7 +2,7 @@ use std::fmt;
 use std::io::ErrorKind;

 use meilisearch_types::heed::RoTxn;
-use meilisearch_types::milli::update::IndexDocumentsMethod;
+use meilisearch_types::milli::update::{IndexDocumentsMethod, MissingDocumentPolicy};
 use meilisearch_types::settings::{Settings, Unchecked};
 use meilisearch_types::tasks::{BatchStopReason, Kind, KindWithContent, Status, Task};
 use roaring::RoaringBitmap;
@@ -63,8 +63,8 @@ pub(crate) enum Batch {

 #[derive(Debug)]
 pub(crate) enum DocumentOperation {
-    Replace(Uuid),
-    Update(Uuid),
+    Replace { content_file: Uuid, on_missing_document: MissingDocumentPolicy },
+    Update { content_file: Uuid, on_missing_document: MissingDocumentPolicy },
    Delete(Vec<String>),
 }

@@ -293,13 +293,22 @@ impl IndexScheduler {
                for task in tasks.iter() {
                    match task.kind {
                        KindWithContent::DocumentAdditionOrUpdate {
-                            content_file, method, ..
+                            content_file,
+                            method,
+                            on_missing_document,
+                            ..
                        } => match method {
                            IndexDocumentsMethod::ReplaceDocuments => {
-                                operations.push(DocumentOperation::Replace(content_file))
+                                operations.push(DocumentOperation::Replace {
+                                    content_file,
+                                    on_missing_document,
+                                })
                            }
                            IndexDocumentsMethod::UpdateDocuments => {
-                                operations.push(DocumentOperation::Update(content_file))
+                                operations.push(DocumentOperation::Update {
+                                    content_file,
+                                    on_missing_document,
+                                })
                            }
                            _ => unreachable!("Unknown document merging method"),
                        },
--- a/crates/index-scheduler/src/scheduler/process_index_operation.rs
+++ b/crates/index-scheduler/src/scheduler/process_index_operation.rs
@@ -77,8 +77,8 @@ impl IndexScheduler {
                let mut content_files = Vec::new();
                for operation in &operations {
                    match operation {
-                        DocumentOperation::Replace(content_uuid)
-                        | DocumentOperation::Update(content_uuid) => {
+                        DocumentOperation::Replace { content_file: content_uuid, .. }
+                        | DocumentOperation::Update { content_file: content_uuid, .. } => {
                            let content_file = self.queue.file_store.get_update(*content_uuid)?;
                            let mmap = unsafe { memmap2::Mmap::map(&content_file)? };
                            content_files.push(mmap);
@@ -100,16 +100,16 @@ impl IndexScheduler {
                let embedders = self.embedders(index_uid.clone(), embedders)?;
                for operation in operations {
                    match operation {
-                        DocumentOperation::Replace(_content_uuid) => {
+                        DocumentOperation::Replace { content_file: _, on_missing_document } => {
                            let mmap = content_files_iter.next().unwrap();
                            indexer
-                                .replace_documents(mmap)
+                                .replace_documents(mmap, on_missing_document)
                                .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
                        }
-                        DocumentOperation::Update(_content_uuid) => {
+                        DocumentOperation::Update { content_file: _, on_missing_document } => {
                            let mmap = content_files_iter.next().unwrap();
                            indexer
-                                .update_documents(mmap)
+                                .update_documents(mmap, on_missing_document)
                                .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
                        }
                        DocumentOperation::Delete(document_ids) => {
--- a/crates/index-scheduler/src/scheduler/test.rs
+++ b/crates/index-scheduler/src/scheduler/test.rs
@@ -294,6 +294,7 @@ fn document_addition_and_index_deletion() {
                content_file: uuid,
                documents_count,
                allow_index_creation: true,
+                on_missing_document: Default::default(),
            },
            None,
            false,
@@ -482,6 +483,7 @@ fn document_addition_and_index_deletion_on_unexisting_index() {
                content_file: uuid,
                documents_count,
                allow_index_creation: true,
+                on_missing_document: Default::default(),
            },
            None,
            false,
--- a/crates/index-scheduler/src/scheduler/test_document_addition.rs
+++ b/crates/index-scheduler/src/scheduler/test_document_addition.rs
@@ -31,6 +31,7 @@ fn document_addition() {
                content_file: uuid,
                documents_count,
                allow_index_creation: true,
+                on_missing_document: Default::default(),
            },
            None,
            false,
@@ -67,6 +68,7 @@ fn document_addition_and_document_deletion() {
                content_file: uuid,
                documents_count,
                allow_index_creation: true,
+                on_missing_document: Default::default(),
            },
            None,
            false,
@@ -133,6 +135,7 @@ fn document_deletion_and_document_addition() {
                content_file: uuid,
                documents_count,
                allow_index_creation: true,
+                on_missing_document: Default::default(),
            },
            None,
            false,
@@ -185,6 +188,7 @@ fn test_document_replace() {
                    content_file: uuid,
                    documents_count,
                    allow_index_creation: true,
+                    on_missing_document: Default::default(),
                },
                None,
                false,
@@ -236,6 +240,7 @@ fn test_document_update() {
                    content_file: uuid,
                    documents_count,
                    allow_index_creation: true,
+                    on_missing_document: Default::default(),
                },
                None,
                false,
@@ -289,6 +294,7 @@ fn test_mixed_document_addition() {
                    content_file: uuid,
                    documents_count,
                    allow_index_creation: true,
+                    on_missing_document: Default::default(),
                },
                None,
                false,
@@ -340,6 +346,7 @@ fn test_document_replace_without_autobatching() {
                    content_file: uuid,
                    documents_count,
                    allow_index_creation: true,
+                    on_missing_document: Default::default(),
                },
                None,
                false,
@@ -395,6 +402,7 @@ fn test_document_update_without_autobatching() {
                    content_file: uuid,
                    documents_count,
                    allow_index_creation: true,
+                    on_missing_document: Default::default(),
                },
                None,
                false,
@@ -454,6 +462,7 @@ fn test_document_addition_cant_create_index_without_index() {
                    content_file: uuid,
                    documents_count,
                    allow_index_creation: false,
+                    on_missing_document: Default::default(),
                },
                None,
                false,
@@ -506,6 +515,7 @@ fn test_document_addition_cant_create_index_without_index_without_autobatching()
                    content_file: uuid,
                    documents_count,
                    allow_index_creation: false,
+                    on_missing_document: Default::default(),
                },
                None,
                false,
@@ -568,6 +578,7 @@ fn test_document_addition_cant_create_index_with_index() {
                    content_file: uuid,
                    documents_count,
                    allow_index_creation: false,
+                    on_missing_document: Default::default(),
                },
                None,
                false,
@@ -635,6 +646,7 @@ fn test_document_addition_cant_create_index_with_index_without_autobatching() {
                    content_file: uuid,
                    documents_count,
                    allow_index_creation: false,
+                    on_missing_document: Default::default(),
                },
                None,
                false,
@@ -707,6 +719,7 @@ fn test_document_addition_mixed_rights_with_index() {
                    content_file: uuid,
                    documents_count,
                    allow_index_creation,
+                    on_missing_document: Default::default(),
                },
                None,
                false,
@@ -764,6 +777,7 @@ fn test_document_addition_mixed_right_without_index_starts_with_cant_create() {
                    content_file: uuid,
                    documents_count,
                    allow_index_creation,
+                    on_missing_document: Default::default(),
                },
                None,
                false,
@@ -820,6 +834,7 @@ fn test_document_addition_with_multiple_primary_key() {
                    content_file: uuid,
                    documents_count,
                    allow_index_creation: true,
+                    on_missing_document: Default::default(),
                },
                None,
                false,
@@ -883,6 +898,7 @@ fn test_document_addition_with_multiple_primary_key_batch_wrong_key() {
                    content_file: uuid,
                    documents_count,
                    allow_index_creation: true,
+                    on_missing_document: Default::default(),
                },
                None,
                false,
@@ -943,6 +959,7 @@ fn test_document_addition_with_bad_primary_key() {
                    content_file: uuid,
                    documents_count,
                    allow_index_creation: true,
+                    on_missing_document: Default::default(),
                },
                None,
                false,
@@ -1029,6 +1046,7 @@ fn test_document_addition_with_set_and_null_primary_key() {
                    content_file: uuid,
                    documents_count,
                    allow_index_creation: true,
+                    on_missing_document: Default::default(),
                },
                None,
                false,
@@ -1104,6 +1122,7 @@ fn test_document_addition_with_set_and_null_primary_key_inference_works() {
                    content_file: uuid,
                    documents_count,
                    allow_index_creation: true,
+                    on_missing_document: Default::default(),
                },
                None,
                false,
--- a/crates/index-scheduler/src/scheduler/test_embedders.rs
+++ b/crates/index-scheduler/src/scheduler/test_embedders.rs
@@ -173,6 +173,7 @@ fn import_vectors() {
                content_file: uuid,
                documents_count,
                allow_index_creation: true,
+                on_missing_document: Default::default(),
            },
            None,
            false,
@@ -263,6 +264,7 @@ fn import_vectors() {
                content_file: uuid,
                documents_count,
                allow_index_creation: true,
+                on_missing_document: Default::default(),
            },
            None,
            false,
@@ -399,6 +401,7 @@ fn import_vectors_first_and_embedder_later() {
                content_file: uuid,
                documents_count,
                allow_index_creation: true,
+                on_missing_document: Default::default(),
            },
            None,
            false,
@@ -539,6 +542,7 @@ fn import_vectors_first_and_embedder_later() {
                content_file: uuid,
                documents_count,
                allow_index_creation: true,
+                on_missing_document: Default::default(),
            },
            None,
            false,
@@ -640,6 +644,7 @@ fn delete_document_containing_vector() {
                content_file: uuid,
                documents_count,
                allow_index_creation: false,
+                on_missing_document: Default::default(),
            },
            None,
            false,
@@ -818,6 +823,7 @@ fn delete_embedder_with_user_provided_vectors() {
                content_file: uuid,
                documents_count,
                allow_index_creation: false,
+                on_missing_document: Default::default(),
            },
            None,
            false,
--- a/crates/index-scheduler/src/scheduler/test_failure.rs
+++ b/crates/index-scheduler/src/scheduler/test_failure.rs
@@ -52,6 +52,7 @@ fn fail_in_process_batch_for_document_addition() {
                content_file: uuid,
                documents_count,
                allow_index_creation: true,
+                on_missing_document: Default::default(),
            },
            None,
            false,
@@ -94,6 +95,7 @@ fn fail_in_update_task_after_process_batch_success_for_document_addition() {
                content_file: uuid,
                documents_count,
                allow_index_creation: true,
+                on_missing_document: Default::default(),
            },
            None,
            false,
@@ -160,6 +162,7 @@ fn fail_in_process_batch_for_document_deletion() {
                content_file: uuid,
                documents_count,
                allow_index_creation: true,
+                on_missing_document: Default::default(),
            },
            None,
            false,
--- a/crates/index-scheduler/src/test_utils.rs
+++ b/crates/index-scheduler/src/test_utils.rs
@@ -197,6 +197,7 @@ pub(crate) fn replace_document_import_task(
        content_file: Uuid::from_u128(content_file_uuid),
        documents_count,
        allow_index_creation: true,
+        on_missing_document: Default::default(),
    }
 }

--- a/crates/meilisearch-types/src/error.rs
+++ b/crates/meilisearch-types/src/error.rs
@@ -255,6 +255,7 @@ InvalidIndexLimit                              , InvalidRequest       , BAD_REQU
 InvalidIndexOffset                             , InvalidRequest       , BAD_REQUEST ;
 InvalidIndexPrimaryKey                         , InvalidRequest       , BAD_REQUEST ;
 InvalidIndexCustomMetadata                     , InvalidRequest       , BAD_REQUEST ;
+InvalidSkipCreation                            , InvalidRequest       , BAD_REQUEST ;
 InvalidIndexUid                                , InvalidRequest       , BAD_REQUEST ;
 InvalidMultiSearchFacets                       , InvalidRequest       , BAD_REQUEST ;
 InvalidMultiSearchFacetsByIndex                , InvalidRequest       , BAD_REQUEST ;
--- a/crates/meilisearch-types/src/tasks.rs
+++ b/crates/meilisearch-types/src/tasks.rs
@@ -5,7 +5,7 @@ use std::str::FromStr;

 use byte_unit::Byte;
 use enum_iterator::Sequence;
-use milli::update::IndexDocumentsMethod;
+use milli::update::{IndexDocumentsMethod, MissingDocumentPolicy};
 use milli::Object;
 use roaring::RoaringBitmap;
 use serde::{Deserialize, Serialize, Serializer};
@@ -114,6 +114,7 @@ pub enum KindWithContent {
        content_file: Uuid,
        documents_count: u64,
        allow_index_creation: bool,
+        on_missing_document: MissingDocumentPolicy,
    },
    DocumentDeletion {
        index_uid: String,
--- a/crates/meilisearch/src/lib.rs
+++ b/crates/meilisearch/src/lib.rs
@@ -629,7 +629,7 @@ fn import_dump(

            let mmap = unsafe { memmap2::Mmap::map(index_reader.documents_file())? };

-            indexer.replace_documents(&mmap)?;
+            indexer.replace_documents(&mmap, Default::default())?;

            let indexer_config = index_scheduler.indexer_config();
            let pool = &indexer_config.thread_pool;
--- a/crates/meilisearch/src/routes/indexes/compact.rs
+++ b/crates/meilisearch/src/routes/indexes/compact.rs
@@ -34,7 +34,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
 /// Compact an index
 #[utoipa::path(
    post,
-    path = "/{indexUid}/compact",
+    path = "{indexUid}/compact",
    tag = "Compact an index",
    security(("Bearer" = ["search", "*"])),
    params(("indexUid" = String, Path, example = "movies", description = "Index Unique Identifier", nullable = false)),
--- a/crates/meilisearch/src/routes/indexes/documents.rs
+++ b/crates/meilisearch/src/routes/indexes/documents.rs
@@ -20,7 +20,7 @@ use meilisearch_types::heed::RoTxn;
 use meilisearch_types::index_uid::IndexUid;
 use meilisearch_types::milli::documents::sort::recursive_sort;
 use meilisearch_types::milli::index::EmbeddingsWithMetadata;
-use meilisearch_types::milli::update::IndexDocumentsMethod;
+use meilisearch_types::milli::update::{IndexDocumentsMethod, MissingDocumentPolicy};
 use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
 use meilisearch_types::milli::{AscDesc, DocumentId};
 use meilisearch_types::serde_cs::vec::CS;
@@ -687,6 +687,11 @@ pub struct UpdateDocumentsQuery {
    #[param(example = "custom")]
    #[deserr(default, error = DeserrQueryParamError<InvalidIndexCustomMetadata>)]
    pub custom_metadata: Option<String>,
+
+    #[param(example = "true")]
+    #[deserr(default, try_from(&String) = from_string_skip_creation -> DeserrQueryParamError<InvalidSkipCreation>, error = DeserrQueryParamError<InvalidSkipCreation>)]
+    /// Only update documents if they already exist.
+    pub skip_creation: Option<bool>,
 }

 #[derive(Deserialize, Debug, Deserr, IntoParams)]
@@ -711,6 +716,23 @@ fn from_char_csv_delimiter(
    }
 }

+fn from_string_skip_creation(
+    s: &String,
+) -> Result<Option<bool>, DeserrQueryParamError<InvalidSkipCreation>> {
+    if s.eq_ignore_ascii_case("true") {
+        return Ok(Some(true));
+    }
+
+    if s.eq_ignore_ascii_case("false") {
+        return Ok(Some(false));
+    }
+
+    Err(DeserrQueryParamError::new(
+        format!("skipCreation must be either `true` or `false`. Found: `{}`", s),
+        Code::InvalidSkipCreation,
+    ))
+}
+
 aggregate_methods!(
    Replaced => "Documents Added",
    Updated => "Documents Updated",
@@ -840,6 +862,7 @@ pub async fn replace_documents(
        params.custom_metadata,
        dry_run,
        allow_index_creation,
+        params.skip_creation,
        &req,
    )
    .await?;
@@ -943,6 +966,7 @@ pub async fn update_documents(
        params.custom_metadata,
        dry_run,
        allow_index_creation,
+        params.skip_creation,
        &req,
    )
    .await?;
@@ -963,6 +987,7 @@ async fn document_addition(
    custom_metadata: Option<String>,
    dry_run: bool,
    allow_index_creation: bool,
+    skip_creation: Option<bool>,
    req: &HttpRequest,
 ) -> Result<SummarizedTaskView, MeilisearchHttpError> {
    let mime_type = extract_mime_type(req)?;
@@ -1083,6 +1108,11 @@ async fn document_addition(
        primary_key,
        allow_index_creation,
        index_uid: index_uid.to_string(),
+        on_missing_document: if matches!(skip_creation, Some(true)) {
+            MissingDocumentPolicy::Skip
+        } else {
+            MissingDocumentPolicy::Create
+        },
    };

    let scheduler = index_scheduler.clone();
--- a/crates/milli/src/search/new/tests/integration.rs
+++ b/crates/milli/src/search/new/tests/integration.rs
@@ -64,7 +64,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
    let payload = unsafe { memmap2::Mmap::map(&file).unwrap() };

    // index documents
-    indexer.replace_documents(&payload).unwrap();
+    indexer.replace_documents(&payload, Default::default()).unwrap();

    let indexer_alloc = Bump::new();
    let (document_changes, operation_stats, primary_key) = indexer
--- a/crates/milli/src/test_index.rs
+++ b/crates/milli/src/test_index.rs
@@ -70,9 +70,11 @@ impl TempIndex {
        let mut indexer = indexer::DocumentOperation::new();
        match self.index_documents_config.update_method {
            IndexDocumentsMethod::ReplaceDocuments => {
-                indexer.replace_documents(&documents).unwrap()
+                indexer.replace_documents(&documents, Default::default()).unwrap()
+            }
+            IndexDocumentsMethod::UpdateDocuments => {
+                indexer.update_documents(&documents, Default::default()).unwrap()
            }
-            IndexDocumentsMethod::UpdateDocuments => indexer.update_documents(&documents).unwrap(),
        }

        let indexer_alloc = Bump::new();
@@ -232,7 +234,7 @@ fn aborting_indexation() {
        { "id": 2, "name": "bob", "age": 20 },
        { "id": 2, "name": "bob", "age": 20 },
    ]);
-    indexer.replace_documents(&payload).unwrap();
+    indexer.replace_documents(&payload, Default::default()).unwrap();

    let indexer_alloc = Bump::new();
    let (document_changes, _operation_stats, primary_key) = indexer
--- a/crates/milli/src/update/index_documents/mod.rs
+++ b/crates/milli/src/update/index_documents/mod.rs
@@ -67,6 +67,21 @@ pub enum IndexDocumentsMethod {
    UpdateDocuments,
 }

+/// Controls whether new documents should be created when they don't already exist.
+///
+/// This policy is checked when processing a document whose ID is not found in the index.
+/// It applies to both update and replace operations.
+#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub enum MissingDocumentPolicy {
+    /// Create the document if it doesn't exist. This is the default behavior.
+    #[default]
+    Create,
+
+    /// Skip the document silently if it doesn't exist. No error is returned, the document is simply
+    /// not indexed.
+    Skip,
+}
+
 pub struct IndexDocuments<'t, 'i, 'a, FP, FA> {
    wtxn: &'t mut heed::RwTxn<'i>,
    index: &'i Index,
@@ -1971,10 +1986,10 @@ mod tests {
        let mut new_fields_ids_map = db_fields_ids_map.clone();

        let mut indexer = indexer::DocumentOperation::new();
-        indexer.replace_documents(&doc1).unwrap();
-        indexer.replace_documents(&doc2).unwrap();
-        indexer.replace_documents(&doc3).unwrap();
-        indexer.replace_documents(&doc4).unwrap();
+        indexer.replace_documents(&doc1, Default::default()).unwrap();
+        indexer.replace_documents(&doc2, Default::default()).unwrap();
+        indexer.replace_documents(&doc3, Default::default()).unwrap();
+        indexer.replace_documents(&doc4, Default::default()).unwrap();

        let indexer_alloc = Bump::new();
        let (_document_changes, operation_stats, _primary_key) = indexer
@@ -2024,10 +2039,10 @@ mod tests {
        let mut new_fields_ids_map = db_fields_ids_map.clone();

        let mut indexer = indexer::DocumentOperation::new();
-        indexer.replace_documents(&doc1).unwrap();
-        indexer.update_documents(&doc2).unwrap();
-        indexer.update_documents(&doc3).unwrap();
-        indexer.update_documents(&doc4).unwrap();
+        indexer.replace_documents(&doc1, Default::default()).unwrap();
+        indexer.update_documents(&doc2, Default::default()).unwrap();
+        indexer.update_documents(&doc3, Default::default()).unwrap();
+        indexer.update_documents(&doc4, Default::default()).unwrap();

        let indexer_alloc = Bump::new();
        let (document_changes, operation_stats, primary_key) = indexer
@@ -2112,11 +2127,11 @@ mod tests {
        let mut new_fields_ids_map = db_fields_ids_map.clone();

        let mut indexer = indexer::DocumentOperation::new();
-        indexer.replace_documents(&doc1).unwrap();
-        indexer.update_documents(&doc2).unwrap();
-        indexer.update_documents(&doc3).unwrap();
-        indexer.replace_documents(&doc4).unwrap();
-        indexer.update_documents(&doc5).unwrap();
+        indexer.replace_documents(&doc1, Default::default()).unwrap();
+        indexer.update_documents(&doc2, Default::default()).unwrap();
+        indexer.update_documents(&doc3, Default::default()).unwrap();
+        indexer.replace_documents(&doc4, Default::default()).unwrap();
+        indexer.update_documents(&doc5, Default::default()).unwrap();

        let indexer_alloc = Bump::new();
        let (document_changes, operation_stats, primary_key) = indexer
@@ -2307,7 +2322,7 @@ mod tests {
        let indexer_alloc = Bump::new();
        let embedders = RuntimeEmbedders::default();
        let mut indexer = indexer::DocumentOperation::new();
-        indexer.replace_documents(&documents).unwrap();
+        indexer.replace_documents(&documents, Default::default()).unwrap();
        indexer.delete_documents(&["2"]);
        let (document_changes, _operation_stats, primary_key) = indexer
            .into_changes(
@@ -2362,13 +2377,13 @@ mod tests {
            { "id": 3, "name": "jean", "age": 25 },
        ]);
        let mut indexer = indexer::DocumentOperation::new();
-        indexer.update_documents(&documents).unwrap();
+        indexer.update_documents(&documents, Default::default()).unwrap();

        let documents = documents!([
            { "id": 2, "catto": "jorts" },
            { "id": 3, "legs": 4 },
        ]);
-        indexer.update_documents(&documents).unwrap();
+        indexer.update_documents(&documents, Default::default()).unwrap();
        indexer.delete_documents(&["1", "2"]);

        let indexer_alloc = Bump::new();
@@ -2426,7 +2441,7 @@ mod tests {
        let indexer_alloc = Bump::new();
        let embedders = RuntimeEmbedders::default();
        let mut indexer = indexer::DocumentOperation::new();
-        indexer.update_documents(&documents).unwrap();
+        indexer.update_documents(&documents, Default::default()).unwrap();

        let (document_changes, _operation_stats, primary_key) = indexer
            .into_changes(
@@ -2479,7 +2494,7 @@ mod tests {
        let indexer_alloc = Bump::new();
        let embedders = RuntimeEmbedders::default();
        let mut indexer = indexer::DocumentOperation::new();
-        indexer.update_documents(&documents).unwrap();
+        indexer.update_documents(&documents, Default::default()).unwrap();
        indexer.delete_documents(&["1", "2"]);

        let (document_changes, _operation_stats, primary_key) = indexer
@@ -2536,7 +2551,7 @@ mod tests {
            { "id": 2, "doggo": { "name": "jean", "age": 20 } },
            { "id": 3, "name": "bob", "age": 25 },
        ]);
-        indexer.update_documents(&documents).unwrap();
+        indexer.update_documents(&documents, Default::default()).unwrap();

        let (document_changes, _operation_stats, primary_key) = indexer
            .into_changes(
@@ -2595,7 +2610,7 @@ mod tests {
            { "id": 2, "doggo": { "name": "jean", "age": 20 } },
            { "id": 3, "name": "bob", "age": 25 },
        ]);
-        indexer.update_documents(&documents).unwrap();
+        indexer.update_documents(&documents, Default::default()).unwrap();

        indexer.delete_documents(&["1", "2", "1", "2"]);

@@ -2651,7 +2666,7 @@ mod tests {
        let documents = documents!([
            { "id": 1, "doggo": "kevin" },
        ]);
-        indexer.update_documents(&documents).unwrap();
+        indexer.update_documents(&documents, Default::default()).unwrap();

        let (document_changes, _operation_stats, primary_key) = indexer
            .into_changes(
@@ -2705,7 +2720,7 @@ mod tests {
            { "id": 1, "catto": "jorts" },
        ]);

-        indexer.replace_documents(&documents).unwrap();
+        indexer.replace_documents(&documents, Default::default()).unwrap();

        let (document_changes, _operation_stats, primary_key) = indexer
            .into_changes(
@@ -2916,7 +2931,7 @@ mod tests {
        let documents = documents!([
            { "id": 1, "doggo": "bernese" },
        ]);
-        indexer.replace_documents(&documents).unwrap();
+        indexer.replace_documents(&documents, Default::default()).unwrap();

        // FINISHING
        let (document_changes, _operation_stats, primary_key) = indexer
@@ -2978,7 +2993,7 @@ mod tests {
        let documents = documents!([
            { "id": 0, "catto": "jorts" },
        ]);
-        indexer.replace_documents(&documents).unwrap();
+        indexer.replace_documents(&documents, Default::default()).unwrap();

        let (document_changes, _operation_stats, primary_key) = indexer
            .into_changes(
@@ -3036,7 +3051,7 @@ mod tests {
        let documents = documents!([
            { "id": 1, "catto": "jorts" },
        ]);
-        indexer.replace_documents(&documents).unwrap();
+        indexer.replace_documents(&documents, Default::default()).unwrap();

        let (document_changes, _operation_stats, primary_key) = indexer
            .into_changes(
--- a/crates/milli/src/update/new/indexer/document_operation.rs
+++ b/crates/milli/src/update/new/indexer/document_operation.rs
@@ -21,7 +21,7 @@ use crate::update::new::indexer::current_edition::sharding::Shards;
 use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::MostlySend;
 use crate::update::new::{DocumentIdentifiers, Insertion, Update};
-use crate::update::{AvailableIds, IndexDocumentsMethod};
+use crate::update::{AvailableIds, IndexDocumentsMethod, MissingDocumentPolicy};
 use crate::{DocumentId, Error, FieldsIdsMap, Index, InternalError, Result, UserError};

 #[derive(Default)]
@@ -37,20 +37,28 @@ impl<'pl> DocumentOperation<'pl> {
    /// Append a replacement of documents.
    ///
    /// The payload is expected to be in the NDJSON format
-    pub fn replace_documents(&mut self, payload: &'pl Mmap) -> Result<()> {
+    pub fn replace_documents(
+        &mut self,
+        payload: &'pl Mmap,
+        on_missing_document: MissingDocumentPolicy,
+    ) -> Result<()> {
        #[cfg(unix)]
        payload.advise(memmap2::Advice::Sequential)?;
-        self.operations.push(Payload::Replace(&payload[..]));
+        self.operations.push(Payload::Replace { payload: &payload[..], on_missing_document });
        Ok(())
    }

    /// Append an update of documents.
    ///
    /// The payload is expected to be in the NDJSON format
-    pub fn update_documents(&mut self, payload: &'pl Mmap) -> Result<()> {
+    pub fn update_documents(
+        &mut self,
+        payload: &'pl Mmap,
+        on_missing_document: MissingDocumentPolicy,
+    ) -> Result<()> {
        #[cfg(unix)]
        payload.advise(memmap2::Advice::Sequential)?;
-        self.operations.push(Payload::Update(&payload[..]));
+        self.operations.push(Payload::Update { payload: &payload[..], on_missing_document });
        Ok(())
    }

@@ -98,34 +106,40 @@ impl<'pl> DocumentOperation<'pl> {

            let mut bytes = 0;
            let result = match operation {
-                Payload::Replace(payload) => extract_addition_payload_changes(
-                    indexer,
-                    index,
-                    rtxn,
-                    primary_key_from_op,
-                    &mut primary_key,
-                    new_fields_ids_map,
-                    &mut available_docids,
-                    &mut bytes,
-                    &docids_version_offsets,
-                    IndexDocumentsMethod::ReplaceDocuments,
-                    shards,
-                    payload,
-                ),
-                Payload::Update(payload) => extract_addition_payload_changes(
-                    indexer,
-                    index,
-                    rtxn,
-                    primary_key_from_op,
-                    &mut primary_key,
-                    new_fields_ids_map,
-                    &mut available_docids,
-                    &mut bytes,
-                    &docids_version_offsets,
-                    IndexDocumentsMethod::UpdateDocuments,
-                    shards,
-                    payload,
-                ),
+                Payload::Replace { payload, on_missing_document } => {
+                    extract_addition_payload_changes(
+                        indexer,
+                        index,
+                        rtxn,
+                        primary_key_from_op,
+                        &mut primary_key,
+                        new_fields_ids_map,
+                        &mut available_docids,
+                        &mut bytes,
+                        &docids_version_offsets,
+                        IndexDocumentsMethod::ReplaceDocuments,
+                        shards,
+                        payload,
+                        on_missing_document,
+                    )
+                }
+                Payload::Update { payload, on_missing_document } => {
+                    extract_addition_payload_changes(
+                        indexer,
+                        index,
+                        rtxn,
+                        primary_key_from_op,
+                        &mut primary_key,
+                        new_fields_ids_map,
+                        &mut available_docids,
+                        &mut bytes,
+                        &docids_version_offsets,
+                        IndexDocumentsMethod::UpdateDocuments,
+                        shards,
+                        payload,
+                        on_missing_document,
+                    )
+                }
                Payload::Deletion(to_delete) => extract_deletion_payload_changes(
                    index,
                    rtxn,
@@ -180,6 +194,7 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
    method: IndexDocumentsMethod,
    shards: Option<&Shards>,
    payload: &'pl [u8],
+    on_missing_document: MissingDocumentPolicy,
 ) -> Result<hashbrown::HashMap<&'pl str, PayloadOperations<'pl>>> {
    use IndexDocumentsMethod::{ReplaceDocuments, UpdateDocuments};

@@ -271,6 +286,10 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(

                            match method {
                                ReplaceDocuments => {
+                                    if matches!(on_missing_document, MissingDocumentPolicy::Skip) {
+                                        continue;
+                                    }
+
                                    entry.insert(PayloadOperations::new_replacement(
                                        docid,
                                        true, // is new
@@ -278,6 +297,10 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
                                    ));
                                }
                                UpdateDocuments => {
+                                    if matches!(on_missing_document, MissingDocumentPolicy::Skip) {
+                                        continue;
+                                    }
+
                                    entry.insert(PayloadOperations::new_update(
                                        docid,
                                        true, // is new
@@ -297,6 +320,12 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
                },
                Entry::Vacant(entry) => match method {
                    ReplaceDocuments => {
+                        if payload_operations.is_new
+                            && matches!(on_missing_document, MissingDocumentPolicy::Skip)
+                        {
+                            continue;
+                        }
+
                        entry.insert(PayloadOperations::new_replacement(
                            payload_operations.docid,
                            payload_operations.is_new,
@@ -304,6 +333,12 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
                        ));
                    }
                    UpdateDocuments => {
+                        if payload_operations.is_new
+                            && matches!(on_missing_document, MissingDocumentPolicy::Skip)
+                        {
+                            continue;
+                        }
+
                        entry.insert(PayloadOperations::new_update(
                            payload_operations.docid,
                            payload_operations.is_new,
@@ -448,8 +483,8 @@ pub struct DocumentOperationChanges<'pl> {
 }

 pub enum Payload<'pl> {
-    Replace(&'pl [u8]),
-    Update(&'pl [u8]),
+    Replace { payload: &'pl [u8], on_missing_document: MissingDocumentPolicy },
+    Update { payload: &'pl [u8], on_missing_document: MissingDocumentPolicy },
    Deletion(&'pl [&'pl str]),
 }

--- a/crates/milli/tests/search/facet_distribution.rs
+++ b/crates/milli/tests/search/facet_distribution.rs
@@ -47,7 +47,7 @@ fn test_facet_distribution_with_no_facet_values() {
    let documents = mmap_from_objects(vec![doc1, doc2]);

    // index documents
-    indexer.replace_documents(&documents).unwrap();
+    indexer.replace_documents(&documents, Default::default()).unwrap();

    let indexer_alloc = Bump::new();
    let (document_changes, _operation_stats, primary_key) = indexer
--- a/crates/milli/tests/search/mod.rs
+++ b/crates/milli/tests/search/mod.rs
@@ -85,7 +85,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
    let payload = unsafe { memmap2::Mmap::map(&file).unwrap() };

    // index documents
-    indexer.replace_documents(&payload).unwrap();
+    indexer.replace_documents(&payload, Default::default()).unwrap();

    let indexer_alloc = Bump::new();
    let (document_changes, operation_stats, primary_key) = indexer
--- a/crates/milli/tests/search/query_criteria.rs
+++ b/crates/milli/tests/search/query_criteria.rs
@@ -319,7 +319,7 @@ fn criteria_ascdesc() {
    file.sync_all().unwrap();

    let payload = unsafe { memmap2::Mmap::map(&file).unwrap() };
-    indexer.replace_documents(&payload).unwrap();
+    indexer.replace_documents(&payload, Default::default()).unwrap();
    let (document_changes, _operation_stats, primary_key) = indexer
        .into_changes(
            &indexer_alloc,
--- a/crates/milli/tests/search/typo_tolerance.rs
+++ b/crates/milli/tests/search/typo_tolerance.rs
@@ -126,7 +126,7 @@ fn test_typo_disabled_on_word() {
    let embedders = RuntimeEmbedders::default();
    let mut indexer = indexer::DocumentOperation::new();

-    indexer.replace_documents(&documents).unwrap();
+    indexer.replace_documents(&documents, Default::default()).unwrap();

    let indexer_alloc = Bump::new();
    let (document_changes, _operation_stats, primary_key) = indexer
--- a/crates/openapi-generator/Cargo.toml
+++ b/crates/openapi-generator/Cargo.toml
@@ -10,5 +10,3 @@ serde_json = "1.0"
 clap = { version = "4.5.52", features = ["derive"] }
 anyhow = "1.0.100"
 utoipa = "5.4.0"
-reqwest = { version = "0.12", features = ["blocking"] }
-regex = "1.10"
--- a/crates/openapi-generator/src/main.rs
+++ b/crates/openapi-generator/src/main.rs
@@ -1,57 +1,21 @@
-use std::borrow::Cow;
-use std::collections::HashMap;
 use std::path::PathBuf;
-use std::sync::LazyLock;

-use anyhow::{Context, Result};
+use anyhow::Result;
 use clap::Parser;
 use meilisearch::routes::MeilisearchApi;
-use regex::Regex;
-use serde_json::{json, Value};
 use utoipa::OpenApi;

-const HTTP_METHODS: &[&str] = &["get", "post", "put", "patch", "delete"];
-
-/// Language used in the documentation repository (contains the key mapping)
-const DOCS_LANG: &str = "cURL";
-
-/// Mapping of repository URLs to language names.
-/// The "cURL" entry is special: it contains the key mapping used to resolve sample IDs for all SDKs.
-const CODE_SAMPLES: &[(&str, &str)] = &[
-    ("https://raw.githubusercontent.com/meilisearch/documentation/refs/heads/main/.code-samples.meilisearch.yaml", "cURL"),
-    ("https://raw.githubusercontent.com/meilisearch/meilisearch-dotnet/refs/heads/main/.code-samples.meilisearch.yaml", "C#"),
-    ("https://raw.githubusercontent.com/meilisearch/meilisearch-dart/refs/heads/main/.code-samples.meilisearch.yaml", "Dart"),
-    ("https://raw.githubusercontent.com/meilisearch/meilisearch-go/refs/heads/main/.code-samples.meilisearch.yaml", "Go"),
-    ("https://raw.githubusercontent.com/meilisearch/meilisearch-java/refs/heads/main/.code-samples.meilisearch.yaml", "Java"),
-    ("https://raw.githubusercontent.com/meilisearch/meilisearch-js/refs/heads/main/.code-samples.meilisearch.yaml", "JS"),
-    ("https://raw.githubusercontent.com/meilisearch/meilisearch-php/refs/heads/main/.code-samples.meilisearch.yaml", "PHP"),
-    ("https://raw.githubusercontent.com/meilisearch/meilisearch-python/refs/heads/main/.code-samples.meilisearch.yaml", "Python"),
-    ("https://raw.githubusercontent.com/meilisearch/meilisearch-ruby/refs/heads/main/.code-samples.meilisearch.yaml", "Ruby"),
-    ("https://raw.githubusercontent.com/meilisearch/meilisearch-rust/refs/heads/main/.code-samples.meilisearch.yaml", "Rust"),
-    ("https://raw.githubusercontent.com/meilisearch/meilisearch-swift/refs/heads/main/.code-samples.meilisearch.yaml", "Swift"),
-];
-
-// Pre-compiled regex patterns
-static COMMENT_RE: LazyLock<Regex> =
-    LazyLock::new(|| Regex::new(r"^#\s*([a-zA-Z0-9_]+)\s*$").unwrap());
-static CODE_START_RE: LazyLock<Regex> =
-    LazyLock::new(|| Regex::new(r"^([a-zA-Z0-9_]+):\s*\|-\s*$").unwrap());
-
 #[derive(Parser)]
 #[command(name = "openapi-generator")]
 #[command(about = "Generate OpenAPI specification for Meilisearch")]
 struct Cli {
-    /// Output file path (default: meilisearch-openapi.json)
+    /// Output file path (default: meilisearch.json)
    #[arg(short, long, value_name = "FILE")]
    output: Option<PathBuf>,

    /// Pretty print the JSON output
    #[arg(short, long)]
    pretty: bool,
-
-    /// Skip fetching code samples (offline mode)
-    #[arg(long)]
-    no_code_samples: bool,
 }

 fn main() -> Result<()> {
@@ -60,26 +24,14 @@ fn main() -> Result<()> {
    // Generate the OpenAPI specification
    let openapi = MeilisearchApi::openapi();

-    // Convert to serde_json::Value for modification
-    let mut openapi_value: Value = serde_json::to_value(&openapi)?;
-
-    // Fetch and add code samples if not disabled
-    if !cli.no_code_samples {
-        let code_samples = fetch_all_code_samples()?;
-        add_code_samples_to_openapi(&mut openapi_value, &code_samples)?;
-    }
-
-    // Clean up null descriptions in tags
-    clean_null_descriptions(&mut openapi_value);
-
    // Determine output path
-    let output_path = cli.output.unwrap_or_else(|| PathBuf::from("meilisearch-openapi.json"));
+    let output_path = cli.output.unwrap_or_else(|| PathBuf::from("meilisearch.json"));

    // Serialize to JSON
    let json = if cli.pretty {
-        serde_json::to_string_pretty(&openapi_value)?
+        serde_json::to_string_pretty(&openapi)?
    } else {
-        serde_json::to_string(&openapi_value)?
+        serde_json::to_string(&openapi)?
    };

    // Write to file
@@ -89,487 +41,3 @@ fn main() -> Result<()> {

    Ok(())
 }
-
-/// Code sample for a specific language
-#[derive(Debug, Clone)]
-struct CodeSample {
-    lang: String,
-    source: String,
-}
-
-/// Fetch and parse code samples from all repositories
-/// Returns a map from key (e.g., "get_indexes") to a list of code samples for different languages
-fn fetch_all_code_samples() -> Result<HashMap<String, Vec<CodeSample>>> {
-    // First, fetch the documentation file (cURL) to get the key mapping
-    let (docs_url, _) = CODE_SAMPLES
-        .iter()
-        .find(|(_, lang)| *lang == DOCS_LANG)
-        .context("Documentation source not found in CODE_SAMPLES")?;
-
-    let docs_content = reqwest::blocking::get(*docs_url)
-        .context("Failed to fetch documentation code samples")?
-        .text()
-        .context("Failed to read documentation code samples response")?;
-
-    let key_to_sample_ids = parse_documentation_mapping(&docs_content);
-
-    // Fetch code samples from all sources
-    let mut all_samples: HashMap<String, Vec<CodeSample>> = HashMap::new();
-
-    for (url, lang) in CODE_SAMPLES {
-        // For cURL, reuse already fetched content; for SDKs, fetch from URL
-        let content: Cow<'_, str> = if *lang == DOCS_LANG {
-            Cow::Borrowed(&docs_content)
-        } else {
-            match reqwest::blocking::get(*url).and_then(|r| r.text()) {
-                Ok(text) => Cow::Owned(text),
-                Err(e) => {
-                    eprintln!("Warning: Failed to fetch code samples for {}: {}", lang, e);
-                    continue;
-                }
-            }
-        };
-
-        let sample_id_to_code = parse_code_samples(&content);
-        for (key, sample_ids) in &key_to_sample_ids {
-            for sample_id in sample_ids {
-                if let Some(source) = sample_id_to_code.get(sample_id) {
-                    all_samples.entry(key.clone()).or_default().push(CodeSample {
-                        lang: lang.to_string(),
-                        source: source.clone(),
-                    });
-                }
-            }
-        }
-    }
-
-    Ok(all_samples)
-}
-
-/// Parse the documentation file to create a mapping from keys (comment IDs) to sample IDs
-/// Returns: HashMap<key, Vec<sample_id>>
-fn parse_documentation_mapping(content: &str) -> HashMap<String, Vec<String>> {
-    let mut mapping: HashMap<String, Vec<String>> = HashMap::new();
-    let mut current_key: Option<String> = None;
-
-    for line in content.lines() {
-        // Check if this is a comment line defining a new key
-        if let Some(caps) = COMMENT_RE.captures(line) {
-            current_key = Some(caps[1].to_string());
-            continue;
-        }
-
-        // Check if this starts a new code block and extract the sample_id
-        if let Some(caps) = CODE_START_RE.captures(line) {
-            let sample_id = caps[1].to_string();
-
-            if let Some(ref key) = current_key {
-                // Only associate this sample_id with the current key if it follows the pattern {key}_N
-                // This prevents samples without a preceding comment from being incorrectly associated
-                if sample_id.starts_with(&format!("{}_", key)) {
-                    mapping.entry(key.clone()).or_default().push(sample_id);
-                } else {
-                    // Sample ID doesn't match the current key, reset current_key
-                    current_key = None;
-                }
-            }
-        }
-    }
-
-    mapping
-}
-
-/// State machine for parsing YAML code blocks
-struct YamlCodeBlockParser {
-    current_value: Vec<String>,
-    in_code_block: bool,
-    base_indent: Option<usize>,
-}
-
-impl YamlCodeBlockParser {
-    fn new() -> Self {
-        Self { current_value: Vec::new(), in_code_block: false, base_indent: None }
-    }
-
-    fn start_new_block(&mut self) {
-        self.current_value.clear();
-        self.in_code_block = true;
-        self.base_indent = None;
-    }
-
-    fn take_value(&mut self) -> Option<String> {
-        if self.current_value.is_empty() {
-            return None;
-        }
-        let value = self.current_value.join("\n").trim_end().to_string();
-        self.current_value.clear();
-        self.in_code_block = false;
-        self.base_indent = None;
-        Some(value)
-    }
-
-    fn process_line(&mut self, line: &str) {
-        if !self.in_code_block {
-            return;
-        }
-
-        // Empty line or line with only whitespace
-        if line.trim().is_empty() {
-            // Only add empty lines if we've already started collecting
-            if !self.current_value.is_empty() {
-                self.current_value.push(String::new());
-            }
-            return;
-        }
-
-        // Calculate indentation
-        let indent = line.len() - line.trim_start().len();
-
-        // Set base indent from first non-empty line
-        let base = *self.base_indent.get_or_insert(indent);
-
-        // If line has less indentation than base, we've exited the block
-        if indent < base {
-            self.in_code_block = false;
-            return;
-        }
-
-        // Remove base indentation and add to value
-        let dedented = line.get(base..).unwrap_or_else(|| line.trim_start());
-        self.current_value.push(dedented.to_string());
-    }
-}
-
-/// Parse a code samples YAML file
-/// Returns: HashMap<sample_id, code>
-fn parse_code_samples(content: &str) -> HashMap<String, String> {
-    let mut samples: HashMap<String, String> = HashMap::new();
-    let mut current_sample_id: Option<String> = None;
-    let mut parser = YamlCodeBlockParser::new();
-
-    for line in content.lines() {
-        // Ignore comment lines
-        if line.starts_with('#') {
-            continue;
-        }
-
-        // Check if this starts a new code block
-        if let Some(caps) = CODE_START_RE.captures(line) {
-            // Save previous sample if exists
-            if let Some(sample_id) = current_sample_id.take() {
-                if let Some(value) = parser.take_value() {
-                    samples.insert(sample_id, value);
-                }
-            }
-            current_sample_id = Some(caps[1].to_string());
-            parser.start_new_block();
-            continue;
-        }
-
-        if current_sample_id.is_some() {
-            parser.process_line(line);
-        }
-    }
-
-    // Don't forget the last sample
-    if let Some(sample_id) = current_sample_id {
-        if let Some(value) = parser.take_value() {
-            samples.insert(sample_id, value);
-        }
-    }
-
-    samples
-}
-
-/// Convert an OpenAPI path to a code sample key
-/// Path: /indexes/{index_uid}/documents/{document_id}
-/// Method: GET
-/// Key: get_indexes_indexUid_documents_documentId
-fn path_to_key(path: &str, method: &str) -> String {
-    let method_lower = method.to_lowercase();
-
-    // Remove leading slash and convert path
-    let path_part = path
-        .trim_start_matches('/')
-        .split('/')
-        .map(|segment| {
-            if segment.starts_with('{') && segment.ends_with('}') {
-                // Convert {param_name} to camelCase
-                let param = &segment[1..segment.len() - 1];
-                to_camel_case(param)
-            } else {
-                // Keep path segments as-is, but replace hyphens with underscores
-                segment.replace('-', "_")
-            }
-        })
-        .collect::<Vec<_>>()
-        .join("_");
-
-    if path_part.is_empty() {
-        method_lower
-    } else {
-        format!("{}_{}", method_lower, path_part)
-    }
-}
-
-/// Convert snake_case to camelCase
-fn to_camel_case(s: &str) -> String {
-    let mut result = String::with_capacity(s.len());
-    let mut capitalize_next = false;
-
-    for (i, c) in s.chars().enumerate() {
-        match c {
-            '_' => capitalize_next = true,
-            _ if capitalize_next => {
-                result.push(c.to_ascii_uppercase());
-                capitalize_next = false;
-            }
-            _ if i == 0 => result.push(c.to_ascii_lowercase()),
-            _ => result.push(c),
-        }
-    }
-
-    result
-}
-
-/// Add code samples to the OpenAPI specification
-fn add_code_samples_to_openapi(
-    openapi: &mut Value,
-    code_samples: &HashMap<String, Vec<CodeSample>>,
-) -> Result<()> {
-    let paths = openapi
-        .get_mut("paths")
-        .and_then(|p| p.as_object_mut())
-        .context("OpenAPI spec missing 'paths' object")?;
-
-    for (path, path_item) in paths.iter_mut() {
-        let Some(path_item) = path_item.as_object_mut() else {
-            continue;
-        };
-
-        for method in HTTP_METHODS {
-            let Some(operation) = path_item.get_mut(*method) else {
-                continue;
-            };
-
-            let key = path_to_key(path, method);
-
-            if let Some(samples) = code_samples.get(&key) {
-                // Create x-codeSamples array according to Redocly spec
-                // Sort by language name for consistent output
-                let mut sorted_samples = samples.clone();
-                sorted_samples.sort_by(|a, b| a.lang.cmp(&b.lang));
-
-                let code_sample_array: Vec<Value> = sorted_samples
-                    .iter()
-                    .map(|sample| {
-                        json!({
-                            "lang": sample.lang,
-                            "source": sample.source
-                        })
-                    })
-                    .collect();
-
-                if let Some(op) = operation.as_object_mut() {
-                    op.insert("x-codeSamples".to_string(), json!(code_sample_array));
-                }
-            }
-        }
-    }
-
-    Ok(())
-}
-
-/// Clean up null descriptions in tags to make Mintlify work
-/// Removes any "description" fields with null values (both JSON null and "null" string)
-/// from the tags array and all nested objects
-fn clean_null_descriptions(openapi: &mut Value) {
-    if let Some(tags) = openapi.get_mut("tags").and_then(|t| t.as_array_mut()) {
-        for tag in tags.iter_mut() {
-            remove_null_descriptions_recursive(tag);
-        }
-    }
-}
-
-/// Recursively remove all "description" fields that are null or "null" string
-fn remove_null_descriptions_recursive(value: &mut Value) {
-    if let Some(obj) = value.as_object_mut() {
-        // Check and remove description if it's null or "null" string
-        if let Some(desc) = obj.get("description") {
-            if desc.is_null() || (desc.is_string() && desc.as_str() == Some("null")) {
-                obj.remove("description");
-            }
-        }
-
-        // Recursively process all nested objects
-        for (_, v) in obj.iter_mut() {
-            remove_null_descriptions_recursive(v);
-        }
-    } else if let Some(arr) = value.as_array_mut() {
-        // Recursively process arrays
-        for item in arr.iter_mut() {
-            remove_null_descriptions_recursive(item);
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_path_to_key() {
-        assert_eq!(path_to_key("/indexes", "GET"), "get_indexes");
-        assert_eq!(path_to_key("/indexes/{index_uid}", "GET"), "get_indexes_indexUid");
-        assert_eq!(
-            path_to_key("/indexes/{index_uid}/documents", "POST"),
-            "post_indexes_indexUid_documents"
-        );
-        assert_eq!(
-            path_to_key("/indexes/{index_uid}/documents/{document_id}", "GET"),
-            "get_indexes_indexUid_documents_documentId"
-        );
-        assert_eq!(
-            path_to_key("/indexes/{index_uid}/settings/stop-words", "GET"),
-            "get_indexes_indexUid_settings_stop_words"
-        );
-    }
-
-    #[test]
-    fn test_to_camel_case() {
-        assert_eq!(to_camel_case("index_uid"), "indexUid");
-        assert_eq!(to_camel_case("document_id"), "documentId");
-        assert_eq!(to_camel_case("task_uid"), "taskUid");
-    }
-
-    #[test]
-    fn test_parse_documentation_mapping() {
-        let yaml = r#"
-# get_indexes
-get_indexes_1: |-
-  curl \
-    -X GET 'MEILISEARCH_URL/indexes'
-get_indexes_2: |-
-  curl \
-    -X GET 'MEILISEARCH_URL/indexes?limit=5'
-# post_indexes
-post_indexes_1: |-
-  curl \
-    -X POST 'MEILISEARCH_URL/indexes'
-post_indexes_2: |-
-  curl \
-    -X POST 'MEILISEARCH_URL/indexes'
-# get_version
-get_version_1: |-
-  curl \
-    -X GET 'MEILISEARCH_URL/version'
-# COMMENT WITHOUT KEY - SHOULD BE IGNORED
-## COMMENT WITHOUT KEY - SHOULD BE IGNORED
-unrelated_sample_without_comment: |-
-  curl \
-    -X GET 'MEILISEARCH_URL/something'
-"#;
-        let mapping = parse_documentation_mapping(yaml);
-
-        assert_eq!(mapping.len(), 3);
-        assert!(mapping.contains_key("get_indexes"));
-        assert!(mapping.contains_key("post_indexes"));
-        assert!(mapping.contains_key("get_version"));
-        assert_eq!(mapping["get_indexes"], vec!["get_indexes_1", "get_indexes_2"]);
-        assert_eq!(mapping["post_indexes"], vec!["post_indexes_1", "post_indexes_2"]);
-        assert_eq!(mapping["get_version"], vec!["get_version_1"]);
-        // unrelated_sample_without_comment should not be in the mapping
-        assert!(!mapping.values().any(|v| v.contains(&"unrelated_sample_without_comment".to_string())));
-        // Comments with multiple words or ## should be ignored and not create keys
-        assert!(!mapping.contains_key("COMMENT"));
-        assert!(!mapping.contains_key("##"));
-    }
-
-    #[test]
-    fn test_parse_code_samples() {
-        let yaml = r#"
-# This is a comment that should be ignored
-list_all_indexes_1: |-
-  const client = new MeiliSearch({
-    host: 'http://localhost:7700',
-    apiKey: 'masterKey'
-  });
-
-  const response = await client.getIndexes();
-
-# Another comment
-create_an_index_1: |-
-  const task = await client.createIndex('movies');
-"#;
-        let samples = parse_code_samples(yaml);
-
-        assert_eq!(samples.len(), 2);
-        assert!(samples.contains_key("list_all_indexes_1"));
-        assert!(samples.contains_key("create_an_index_1"));
-        assert!(samples["list_all_indexes_1"].contains("getIndexes"));
-        assert!(samples["create_an_index_1"].contains("createIndex"));
-    }
-
-    #[test]
-    fn test_clean_null_descriptions() {
-        let mut openapi = json!({
-            "tags": [
-                {
-                    "name": "Test1",
-                    "description": "null"
-                },
-                {
-                    "name": "Test2",
-                    "description": null
-                },
-                {
-                    "name": "Test3",
-                    "description": "Valid description"
-                },
-                {
-                    "name": "Test4",
-                    "description": "null",
-                    "externalDocs": {
-                        "url": "https://example.com",
-                        "description": null
-                    }
-                },
-                {
-                    "name": "Test5",
-                    "externalDocs": {
-                        "url": "https://example.com",
-                        "description": "null"
-                    }
-                }
-            ]
-        });
-
-        clean_null_descriptions(&mut openapi);
-
-        let tags = openapi["tags"].as_array().unwrap();
-
-        // Test1: description "null" should be removed
-        assert!(!tags[0].as_object().unwrap().contains_key("description"));
-
-        // Test2: description null should be removed
-        assert!(!tags[1].as_object().unwrap().contains_key("description"));
-
-        // Test3: valid description should remain
-        assert_eq!(tags[2]["description"], "Valid description");
-
-        // Test4: both tag description and externalDocs description should be removed
-        assert!(!tags[3].as_object().unwrap().contains_key("description"));
-        assert!(!tags[3]["externalDocs"]
-            .as_object()
-            .unwrap()
-            .contains_key("description"));
-        assert_eq!(tags[3]["externalDocs"]["url"], "https://example.com");
-
-        // Test5: externalDocs description "null" should be removed
-        assert!(!tags[4]["externalDocs"]
-            .as_object()
-            .unwrap()
-            .contains_key("description"));
-        assert_eq!(tags[4]["externalDocs"]["url"], "https://example.com");
-    }
-}
Author	SHA1	Message	Date
YoEight	328718ee90	improve documentation	2025-12-11 16:30:04 -05:00
YoEight	a2878efafe	time for some tests	2025-12-11 16:11:22 -05:00
YoEight	f392e0a0f8	threading down on_missing_document param	2025-12-11 15:54:11 -05:00
YoEight	e359325dbd	rename to a clearer name	2025-12-11 14:31:16 -05:00
YoEight	d5f66c195d	introduce DocumentCreationPolicy	2025-12-11 14:13:11 -05:00
YoEight	9f64b0de66	Allow strict document update without creating missing documents	2025-12-11 12:49:34 -05:00