Move the default pagination limit into a const

Make the Query limit mandatory
2025-12-11 23:25:41 +00:00 · 2025-12-09 15:24:12 +01:00 · 2025-12-09 15:21:20 +01:00
15 changed files with 368 additions and 1298 deletions
--- a/.github/workflows/publish-release-assets.yml
+++ b/.github/workflows/publish-release-assets.yml
@@ -104,13 +104,13 @@ jobs:
      - name: Generate OpenAPI file
        run: |
          cd crates/openapi-generator
-          cargo run --release -- --pretty --output ../../meilisearch-openapi.json
+          cargo run --release -- --pretty --output ../../meilisearch.json
      - name: Upload OpenAPI to Release
        # No need to upload for dry run (cron or workflow_dispatch)
        if: github.event_name == 'release'
        uses: svenstaro/upload-release-action@2.11.2
        with:
          repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
-          file: ./meilisearch-openapi.json
+          file: ./meilisearch.json
          asset_name: meilisearch-openapi.json
          tag: ${{ github.ref }}
--- a/.github/workflows/sdks-tests.yml
+++ b/.github/workflows/sdks-tests.yml
@@ -25,18 +25,14 @@ jobs:
      - uses: actions/checkout@v5
      - name: Define the Docker image we need to use
        id: define-image
-        env:
-          EVENT_NAME: ${{ github.event_name }}
-          DOCKER_IMAGE_INPUT: ${{ github.event.inputs.docker_image }}
        run: |
+          event=${{ github.event_name }}
          echo "docker-image=nightly" >> $GITHUB_OUTPUT
-          if [[ "$EVENT_NAME" == 'workflow_dispatch' ]]; then
-            echo "docker-image=$DOCKER_IMAGE_INPUT" >> $GITHUB_OUTPUT
+          if [[ $event == 'workflow_dispatch' ]]; then
+            echo "docker-image=${{ github.event.inputs.docker_image }}" >> $GITHUB_OUTPUT
          fi
      - name: Docker image is ${{ steps.define-image.outputs.docker-image }}
-        env:
-          DOCKER_IMAGE: ${{ steps.define-image.outputs.docker-image }}
-        run: echo "Docker image is $DOCKER_IMAGE"
+        run: echo "Docker image is ${{ steps.define-image.outputs.docker-image }}"

 ##########
 ## SDKs ##
--- a/.gitignore
+++ b/.gitignore
@@ -29,6 +29,3 @@ crates/meilisearch/db.snapshot

 # Fuzzcheck data for the facet indexing fuzz test
 crates/milli/fuzz/update::facet::incremental::fuzz::fuzz/
-
-# OpenAPI generator
-**/meilisearch-openapi.json
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -117,7 +117,7 @@ With swagger:
 With the internal crate:
 ```bash
 cd crates/openapi-generator
-cargo run --release -- --pretty
+cargo run --release -- --pretty --output meilisearch.json
 ```

 ### Logging
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/crates/index-scheduler/src/queue/batches.rs
+++ b/crates/index-scheduler/src/queue/batches.rs
@@ -502,13 +502,11 @@ impl Queue {
            *before_finished_at,
        )?;

-        if let Some(limit) = limit {
-            batches = if query.reverse.unwrap_or_default() {
-                batches.into_iter().take(*limit as usize).collect()
-            } else {
-                batches.into_iter().rev().take(*limit as usize).collect()
-            };
-        }
+        batches = if query.reverse.unwrap_or_default() {
+            batches.into_iter().take(*limit).collect()
+        } else {
+            batches.into_iter().rev().take(*limit).collect()
+        };

        Ok(batches)
    }
@@ -602,11 +600,8 @@ impl Queue {
            Box::new(batches.into_iter().rev()) as Box<dyn Iterator<Item = u32>>
        };

-        let batches = self.batches.get_existing_batches(
-            rtxn,
-            batches.take(query.limit.unwrap_or(u32::MAX) as usize),
-            processing,
-        )?;
+        let batches =
+            self.batches.get_existing_batches(rtxn, batches.take(query.limit), processing)?;

        Ok((batches, total))
    }
--- a/crates/index-scheduler/src/queue/batches_test.rs
+++ b/crates/index-scheduler/src/queue/batches_test.rs
@@ -28,21 +28,21 @@ fn query_batches_from_and_limit() {

    let proc = index_scheduler.processing_tasks.read().unwrap().clone();
    let rtxn = index_scheduler.env.read_txn().unwrap();
-    let query = Query { limit: Some(0), ..Default::default() };
+    let query = Query { limit: 0, ..Default::default() };
    let (batches, _) = index_scheduler
        .queue
        .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
        .unwrap();
    snapshot!(snapshot_bitmap(&batches), @"[]");

-    let query = Query { limit: Some(1), ..Default::default() };
+    let query = Query { limit: 1, ..Default::default() };
    let (batches, _) = index_scheduler
        .queue
        .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
        .unwrap();
    snapshot!(snapshot_bitmap(&batches), @"[2,]");

-    let query = Query { limit: Some(2), ..Default::default() };
+    let query = Query { limit: 2, ..Default::default() };
    let (batches, _) = index_scheduler
        .queue
        .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
@@ -63,14 +63,14 @@ fn query_batches_from_and_limit() {
        .unwrap();
    snapshot!(snapshot_bitmap(&batches), @"[0,1,2,]");

-    let query = Query { from: Some(1), limit: Some(1), ..Default::default() };
+    let query = Query { from: Some(1), limit: 1, ..Default::default() };
    let (batches, _) = index_scheduler
        .queue
        .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
        .unwrap();
    snapshot!(snapshot_bitmap(&batches), @"[1,]");

-    let query = Query { from: Some(1), limit: Some(2), ..Default::default() };
+    let query = Query { from: Some(1), limit: 2, ..Default::default() };
    let (batches, _) = index_scheduler
        .queue
        .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
--- a/crates/index-scheduler/src/queue/mod.rs
+++ b/crates/index-scheduler/src/queue/mod.rs
@@ -31,6 +31,9 @@ use crate::{Error, IndexSchedulerOptions, Result, TaskId};

 /// The number of database used by queue itself
 const NUMBER_OF_DATABASES: u32 = 1;
+/// The default limit for pagination
+const DEFAULT_LIMIT: usize = 20;
+
 /// Database const names for the `IndexScheduler`.
 mod db_name {
    pub const BATCH_TO_TASKS_MAPPING: &str = "batch-to-tasks-mapping";
@@ -40,11 +43,11 @@ mod db_name {
 ///
 /// An empty/default query (where each field is set to `None`) matches all tasks.
 /// Each non-null field restricts the set of tasks further.
-#[derive(Default, Debug, Clone, PartialEq, Eq)]
+#[derive(Debug, Clone, PartialEq, Eq)]
 pub struct Query {
-    /// The maximum number of tasks to be matched
-    pub limit: Option<u32>,
-    /// The minimum [task id](`meilisearch_types::tasks::Task::uid`) to be matched
+    /// The maximum number of tasks to be matched. Defaults to 20.
+    pub limit: usize,
+    /// The minimum [task id](`meilisearch_types::tasks::Task::uid`) to be matched. Defaults to 0.
    pub from: Option<u32>,
    /// The order used to return the tasks. By default the newest tasks are returned first and the boolean is `false`.
    pub reverse: Option<bool>,
@@ -83,32 +86,29 @@ pub struct Query {
    pub after_finished_at: Option<OffsetDateTime>,
 }

-impl Query {
-    /// Return `true` if every field of the query is set to `None`, such that the query
-    /// matches all tasks.
-    pub fn is_empty(&self) -> bool {
-        matches!(
-            self,
-            Query {
-                limit: None,
-                from: None,
-                reverse: None,
-                uids: None,
-                batch_uids: None,
-                statuses: None,
-                types: None,
-                index_uids: None,
-                canceled_by: None,
-                before_enqueued_at: None,
-                after_enqueued_at: None,
-                before_started_at: None,
-                after_started_at: None,
-                before_finished_at: None,
-                after_finished_at: None,
-            }
-        )
+impl Default for Query {
+    fn default() -> Self {
+        Self {
+            limit: DEFAULT_LIMIT,
+            from: Default::default(),
+            reverse: Default::default(),
+            uids: Default::default(),
+            batch_uids: Default::default(),
+            statuses: Default::default(),
+            types: Default::default(),
+            index_uids: Default::default(),
+            canceled_by: Default::default(),
+            before_enqueued_at: Default::default(),
+            after_enqueued_at: Default::default(),
+            before_started_at: Default::default(),
+            after_started_at: Default::default(),
+            before_finished_at: Default::default(),
+            after_finished_at: Default::default(),
+        }
    }
+}

+impl Query {
    /// Add an [index id](meilisearch_types::tasks::Task::index_uid) to the list of permitted indexes.
    pub fn with_index(self, index_uid: String) -> Self {
        let mut index_vec = self.index_uids.unwrap_or_default();
@@ -119,7 +119,7 @@ impl Query {
    // Removes the `from` and `limit` restrictions from the query.
    // Useful to get the total number of tasks matching a filter.
    pub fn without_limits(self) -> Self {
-        Query { limit: None, from: None, ..self }
+        Query { limit: usize::MAX, from: None, ..self }
    }
 }

--- a/crates/index-scheduler/src/queue/tasks.rs
+++ b/crates/index-scheduler/src/queue/tasks.rs
@@ -465,13 +465,11 @@ impl Queue {
            *before_finished_at,
        )?;

-        if let Some(limit) = limit {
-            tasks = if query.reverse.unwrap_or_default() {
-                tasks.into_iter().take(*limit as usize).collect()
-            } else {
-                tasks.into_iter().rev().take(*limit as usize).collect()
-            };
-        }
+        tasks = if query.reverse.unwrap_or_default() {
+            tasks.into_iter().take(*limit).collect()
+        } else {
+            tasks.into_iter().rev().take(*limit).collect()
+        };

        Ok(tasks)
    }
@@ -529,9 +527,7 @@ impl Queue {
        } else {
            Box::new(tasks.into_iter().rev()) as Box<dyn Iterator<Item = u32>>
        };
-        let tasks = self
-            .tasks
-            .get_existing_tasks(rtxn, tasks.take(query.limit.unwrap_or(u32::MAX) as usize))?;
+        let tasks = self.tasks.get_existing_tasks(rtxn, tasks.take(query.limit))?;

        let ProcessingTasks { batch, processing, progress: _ } = processing_tasks;

--- a/crates/index-scheduler/src/queue/tasks_test.rs
+++ b/crates/index-scheduler/src/queue/tasks_test.rs
@@ -28,21 +28,21 @@ fn query_tasks_from_and_limit() {

    let rtxn = index_scheduler.env.read_txn().unwrap();
    let processing = index_scheduler.processing_tasks.read().unwrap();
-    let query = Query { limit: Some(0), ..Default::default() };
+    let query = Query { limit: 0, ..Default::default() };
    let (tasks, _) = index_scheduler
        .queue
        .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
        .unwrap();
    snapshot!(snapshot_bitmap(&tasks), @"[]");

-    let query = Query { limit: Some(1), ..Default::default() };
+    let query = Query { limit: 1, ..Default::default() };
    let (tasks, _) = index_scheduler
        .queue
        .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
        .unwrap();
    snapshot!(snapshot_bitmap(&tasks), @"[2,]");

-    let query = Query { limit: Some(2), ..Default::default() };
+    let query = Query { limit: 2, ..Default::default() };
    let (tasks, _) = index_scheduler
        .queue
        .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
@@ -63,14 +63,14 @@ fn query_tasks_from_and_limit() {
        .unwrap();
    snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]");

-    let query = Query { from: Some(1), limit: Some(1), ..Default::default() };
+    let query = Query { from: Some(1), limit: 1, ..Default::default() };
    let (tasks, _) = index_scheduler
        .queue
        .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
        .unwrap();
    snapshot!(snapshot_bitmap(&tasks), @"[1,]");

-    let query = Query { from: Some(1), limit: Some(2), ..Default::default() };
+    let query = Query { from: Some(1), limit: 2, ..Default::default() };
    let (tasks, _) = index_scheduler
        .queue
        .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
--- a/crates/meilisearch/src/routes/indexes/compact.rs
+++ b/crates/meilisearch/src/routes/indexes/compact.rs
@@ -34,7 +34,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
 /// Compact an index
 #[utoipa::path(
    post,
-    path = "/{indexUid}/compact",
+    path = "{indexUid}/compact",
    tag = "Compact an index",
    security(("Bearer" = ["search", "*"])),
    params(("indexUid" = String, Path, example = "movies", description = "Index Unique Identifier", nullable = false)),
--- a/crates/meilisearch/src/routes/metrics.rs
+++ b/crates/meilisearch/src/routes/metrics.rs
@@ -185,7 +185,7 @@ pub async fn get_metrics(
        // Fetch the finished batches...
        &Query {
            statuses: Some(vec![Status::Succeeded, Status::Failed]),
-            limit: Some(1),
+            limit: 1,
            ..Query::default()
        },
        auth_filters,
@@ -214,7 +214,7 @@ pub async fn get_metrics(
    let task_queue_latency_seconds = index_scheduler
        .get_tasks_from_authorized_indexes(
            &Query {
-                limit: Some(1),
+                limit: 1,
                reverse: Some(true),
                statuses: Some(vec![Status::Enqueued, Status::Processing]),
                ..Query::default()
--- a/crates/meilisearch/src/routes/tasks.rs
+++ b/crates/meilisearch/src/routes/tasks.rs
@@ -126,7 +126,7 @@ pub struct TasksFilterQuery {
 impl TasksFilterQuery {
    pub(crate) fn into_query(self) -> Query {
        Query {
-            limit: Some(self.limit.0),
+            limit: self.limit.0 as usize,
            from: self.from.as_deref().copied(),
            reverse: self.reverse.as_deref().copied(),
            batch_uids: self.batch_uids.merge_star_and_none(),
@@ -225,7 +225,8 @@ pub struct TaskDeletionOrCancelationQuery {
 impl TaskDeletionOrCancelationQuery {
    fn into_query(self) -> Query {
        Query {
-            limit: None,
+            // We want to delete all tasks that match the given filters
+            limit: usize::MAX,
            from: None,
            reverse: None,
            batch_uids: self.batch_uids.merge_star_and_none(),
--- a/crates/openapi-generator/Cargo.toml
+++ b/crates/openapi-generator/Cargo.toml
@@ -10,5 +10,3 @@ serde_json = "1.0"
 clap = { version = "4.5.52", features = ["derive"] }
 anyhow = "1.0.100"
 utoipa = "5.4.0"
-reqwest = { version = "0.12", features = ["blocking"] }
-regex = "1.10"
--- a/crates/openapi-generator/src/main.rs
+++ b/crates/openapi-generator/src/main.rs
@@ -1,57 +1,21 @@
-use std::borrow::Cow;
-use std::collections::HashMap;
 use std::path::PathBuf;
-use std::sync::LazyLock;

-use anyhow::{Context, Result};
+use anyhow::Result;
 use clap::Parser;
 use meilisearch::routes::MeilisearchApi;
-use regex::Regex;
-use serde_json::{json, Value};
 use utoipa::OpenApi;

-const HTTP_METHODS: &[&str] = &["get", "post", "put", "patch", "delete"];
-
-/// Language used in the documentation repository (contains the key mapping)
-const DOCS_LANG: &str = "cURL";
-
-/// Mapping of repository URLs to language names.
-/// The "cURL" entry is special: it contains the key mapping used to resolve sample IDs for all SDKs.
-const CODE_SAMPLES: &[(&str, &str)] = &[
-    ("https://raw.githubusercontent.com/meilisearch/documentation/refs/heads/main/.code-samples.meilisearch.yaml", "cURL"),
-    ("https://raw.githubusercontent.com/meilisearch/meilisearch-dotnet/refs/heads/main/.code-samples.meilisearch.yaml", "C#"),
-    ("https://raw.githubusercontent.com/meilisearch/meilisearch-dart/refs/heads/main/.code-samples.meilisearch.yaml", "Dart"),
-    ("https://raw.githubusercontent.com/meilisearch/meilisearch-go/refs/heads/main/.code-samples.meilisearch.yaml", "Go"),
-    ("https://raw.githubusercontent.com/meilisearch/meilisearch-java/refs/heads/main/.code-samples.meilisearch.yaml", "Java"),
-    ("https://raw.githubusercontent.com/meilisearch/meilisearch-js/refs/heads/main/.code-samples.meilisearch.yaml", "JS"),
-    ("https://raw.githubusercontent.com/meilisearch/meilisearch-php/refs/heads/main/.code-samples.meilisearch.yaml", "PHP"),
-    ("https://raw.githubusercontent.com/meilisearch/meilisearch-python/refs/heads/main/.code-samples.meilisearch.yaml", "Python"),
-    ("https://raw.githubusercontent.com/meilisearch/meilisearch-ruby/refs/heads/main/.code-samples.meilisearch.yaml", "Ruby"),
-    ("https://raw.githubusercontent.com/meilisearch/meilisearch-rust/refs/heads/main/.code-samples.meilisearch.yaml", "Rust"),
-    ("https://raw.githubusercontent.com/meilisearch/meilisearch-swift/refs/heads/main/.code-samples.meilisearch.yaml", "Swift"),
-];
-
-// Pre-compiled regex patterns
-static COMMENT_RE: LazyLock<Regex> =
-    LazyLock::new(|| Regex::new(r"^#\s*([a-zA-Z0-9_]+)\s*$").unwrap());
-static CODE_START_RE: LazyLock<Regex> =
-    LazyLock::new(|| Regex::new(r"^([a-zA-Z0-9_]+):\s*\|-\s*$").unwrap());
-
 #[derive(Parser)]
 #[command(name = "openapi-generator")]
 #[command(about = "Generate OpenAPI specification for Meilisearch")]
 struct Cli {
-    /// Output file path (default: meilisearch-openapi.json)
+    /// Output file path (default: meilisearch.json)
    #[arg(short, long, value_name = "FILE")]
    output: Option<PathBuf>,

    /// Pretty print the JSON output
    #[arg(short, long)]
    pretty: bool,
-
-    /// Skip fetching code samples (offline mode)
-    #[arg(long)]
-    no_code_samples: bool,
 }

 fn main() -> Result<()> {
@@ -60,26 +24,14 @@ fn main() -> Result<()> {
    // Generate the OpenAPI specification
    let openapi = MeilisearchApi::openapi();

-    // Convert to serde_json::Value for modification
-    let mut openapi_value: Value = serde_json::to_value(&openapi)?;
-
-    // Fetch and add code samples if not disabled
-    if !cli.no_code_samples {
-        let code_samples = fetch_all_code_samples()?;
-        add_code_samples_to_openapi(&mut openapi_value, &code_samples)?;
-    }
-
-    // Clean up null descriptions in tags
-    clean_null_descriptions(&mut openapi_value);
-
    // Determine output path
-    let output_path = cli.output.unwrap_or_else(|| PathBuf::from("meilisearch-openapi.json"));
+    let output_path = cli.output.unwrap_or_else(|| PathBuf::from("meilisearch.json"));

    // Serialize to JSON
    let json = if cli.pretty {
-        serde_json::to_string_pretty(&openapi_value)?
+        serde_json::to_string_pretty(&openapi)?
    } else {
-        serde_json::to_string(&openapi_value)?
+        serde_json::to_string(&openapi)?
    };

    // Write to file
@@ -89,487 +41,3 @@ fn main() -> Result<()> {

    Ok(())
 }
-
-/// Code sample for a specific language
-#[derive(Debug, Clone)]
-struct CodeSample {
-    lang: String,
-    source: String,
-}
-
-/// Fetch and parse code samples from all repositories
-/// Returns a map from key (e.g., "get_indexes") to a list of code samples for different languages
-fn fetch_all_code_samples() -> Result<HashMap<String, Vec<CodeSample>>> {
-    // First, fetch the documentation file (cURL) to get the key mapping
-    let (docs_url, _) = CODE_SAMPLES
-        .iter()
-        .find(|(_, lang)| *lang == DOCS_LANG)
-        .context("Documentation source not found in CODE_SAMPLES")?;
-
-    let docs_content = reqwest::blocking::get(*docs_url)
-        .context("Failed to fetch documentation code samples")?
-        .text()
-        .context("Failed to read documentation code samples response")?;
-
-    let key_to_sample_ids = parse_documentation_mapping(&docs_content);
-
-    // Fetch code samples from all sources
-    let mut all_samples: HashMap<String, Vec<CodeSample>> = HashMap::new();
-
-    for (url, lang) in CODE_SAMPLES {
-        // For cURL, reuse already fetched content; for SDKs, fetch from URL
-        let content: Cow<'_, str> = if *lang == DOCS_LANG {
-            Cow::Borrowed(&docs_content)
-        } else {
-            match reqwest::blocking::get(*url).and_then(|r| r.text()) {
-                Ok(text) => Cow::Owned(text),
-                Err(e) => {
-                    eprintln!("Warning: Failed to fetch code samples for {}: {}", lang, e);
-                    continue;
-                }
-            }
-        };
-
-        let sample_id_to_code = parse_code_samples(&content);
-        for (key, sample_ids) in &key_to_sample_ids {
-            for sample_id in sample_ids {
-                if let Some(source) = sample_id_to_code.get(sample_id) {
-                    all_samples.entry(key.clone()).or_default().push(CodeSample {
-                        lang: lang.to_string(),
-                        source: source.clone(),
-                    });
-                }
-            }
-        }
-    }
-
-    Ok(all_samples)
-}
-
-/// Parse the documentation file to create a mapping from keys (comment IDs) to sample IDs
-/// Returns: HashMap<key, Vec<sample_id>>
-fn parse_documentation_mapping(content: &str) -> HashMap<String, Vec<String>> {
-    let mut mapping: HashMap<String, Vec<String>> = HashMap::new();
-    let mut current_key: Option<String> = None;
-
-    for line in content.lines() {
-        // Check if this is a comment line defining a new key
-        if let Some(caps) = COMMENT_RE.captures(line) {
-            current_key = Some(caps[1].to_string());
-            continue;
-        }
-
-        // Check if this starts a new code block and extract the sample_id
-        if let Some(caps) = CODE_START_RE.captures(line) {
-            let sample_id = caps[1].to_string();
-
-            if let Some(ref key) = current_key {
-                // Only associate this sample_id with the current key if it follows the pattern {key}_N
-                // This prevents samples without a preceding comment from being incorrectly associated
-                if sample_id.starts_with(&format!("{}_", key)) {
-                    mapping.entry(key.clone()).or_default().push(sample_id);
-                } else {
-                    // Sample ID doesn't match the current key, reset current_key
-                    current_key = None;
-                }
-            }
-        }
-    }
-
-    mapping
-}
-
-/// State machine for parsing YAML code blocks
-struct YamlCodeBlockParser {
-    current_value: Vec<String>,
-    in_code_block: bool,
-    base_indent: Option<usize>,
-}
-
-impl YamlCodeBlockParser {
-    fn new() -> Self {
-        Self { current_value: Vec::new(), in_code_block: false, base_indent: None }
-    }
-
-    fn start_new_block(&mut self) {
-        self.current_value.clear();
-        self.in_code_block = true;
-        self.base_indent = None;
-    }
-
-    fn take_value(&mut self) -> Option<String> {
-        if self.current_value.is_empty() {
-            return None;
-        }
-        let value = self.current_value.join("\n").trim_end().to_string();
-        self.current_value.clear();
-        self.in_code_block = false;
-        self.base_indent = None;
-        Some(value)
-    }
-
-    fn process_line(&mut self, line: &str) {
-        if !self.in_code_block {
-            return;
-        }
-
-        // Empty line or line with only whitespace
-        if line.trim().is_empty() {
-            // Only add empty lines if we've already started collecting
-            if !self.current_value.is_empty() {
-                self.current_value.push(String::new());
-            }
-            return;
-        }
-
-        // Calculate indentation
-        let indent = line.len() - line.trim_start().len();
-
-        // Set base indent from first non-empty line
-        let base = *self.base_indent.get_or_insert(indent);
-
-        // If line has less indentation than base, we've exited the block
-        if indent < base {
-            self.in_code_block = false;
-            return;
-        }
-
-        // Remove base indentation and add to value
-        let dedented = line.get(base..).unwrap_or_else(|| line.trim_start());
-        self.current_value.push(dedented.to_string());
-    }
-}
-
-/// Parse a code samples YAML file
-/// Returns: HashMap<sample_id, code>
-fn parse_code_samples(content: &str) -> HashMap<String, String> {
-    let mut samples: HashMap<String, String> = HashMap::new();
-    let mut current_sample_id: Option<String> = None;
-    let mut parser = YamlCodeBlockParser::new();
-
-    for line in content.lines() {
-        // Ignore comment lines
-        if line.starts_with('#') {
-            continue;
-        }
-
-        // Check if this starts a new code block
-        if let Some(caps) = CODE_START_RE.captures(line) {
-            // Save previous sample if exists
-            if let Some(sample_id) = current_sample_id.take() {
-                if let Some(value) = parser.take_value() {
-                    samples.insert(sample_id, value);
-                }
-            }
-            current_sample_id = Some(caps[1].to_string());
-            parser.start_new_block();
-            continue;
-        }
-
-        if current_sample_id.is_some() {
-            parser.process_line(line);
-        }
-    }
-
-    // Don't forget the last sample
-    if let Some(sample_id) = current_sample_id {
-        if let Some(value) = parser.take_value() {
-            samples.insert(sample_id, value);
-        }
-    }
-
-    samples
-}
-
-/// Convert an OpenAPI path to a code sample key
-/// Path: /indexes/{index_uid}/documents/{document_id}
-/// Method: GET
-/// Key: get_indexes_indexUid_documents_documentId
-fn path_to_key(path: &str, method: &str) -> String {
-    let method_lower = method.to_lowercase();
-
-    // Remove leading slash and convert path
-    let path_part = path
-        .trim_start_matches('/')
-        .split('/')
-        .map(|segment| {
-            if segment.starts_with('{') && segment.ends_with('}') {
-                // Convert {param_name} to camelCase
-                let param = &segment[1..segment.len() - 1];
-                to_camel_case(param)
-            } else {
-                // Keep path segments as-is, but replace hyphens with underscores
-                segment.replace('-', "_")
-            }
-        })
-        .collect::<Vec<_>>()
-        .join("_");
-
-    if path_part.is_empty() {
-        method_lower
-    } else {
-        format!("{}_{}", method_lower, path_part)
-    }
-}
-
-/// Convert snake_case to camelCase
-fn to_camel_case(s: &str) -> String {
-    let mut result = String::with_capacity(s.len());
-    let mut capitalize_next = false;
-
-    for (i, c) in s.chars().enumerate() {
-        match c {
-            '_' => capitalize_next = true,
-            _ if capitalize_next => {
-                result.push(c.to_ascii_uppercase());
-                capitalize_next = false;
-            }
-            _ if i == 0 => result.push(c.to_ascii_lowercase()),
-            _ => result.push(c),
-        }
-    }
-
-    result
-}
-
-/// Add code samples to the OpenAPI specification
-fn add_code_samples_to_openapi(
-    openapi: &mut Value,
-    code_samples: &HashMap<String, Vec<CodeSample>>,
-) -> Result<()> {
-    let paths = openapi
-        .get_mut("paths")
-        .and_then(|p| p.as_object_mut())
-        .context("OpenAPI spec missing 'paths' object")?;
-
-    for (path, path_item) in paths.iter_mut() {
-        let Some(path_item) = path_item.as_object_mut() else {
-            continue;
-        };
-
-        for method in HTTP_METHODS {
-            let Some(operation) = path_item.get_mut(*method) else {
-                continue;
-            };
-
-            let key = path_to_key(path, method);
-
-            if let Some(samples) = code_samples.get(&key) {
-                // Create x-codeSamples array according to Redocly spec
-                // Sort by language name for consistent output
-                let mut sorted_samples = samples.clone();
-                sorted_samples.sort_by(|a, b| a.lang.cmp(&b.lang));
-
-                let code_sample_array: Vec<Value> = sorted_samples
-                    .iter()
-                    .map(|sample| {
-                        json!({
-                            "lang": sample.lang,
-                            "source": sample.source
-                        })
-                    })
-                    .collect();
-
-                if let Some(op) = operation.as_object_mut() {
-                    op.insert("x-codeSamples".to_string(), json!(code_sample_array));
-                }
-            }
-        }
-    }
-
-    Ok(())
-}
-
-/// Clean up null descriptions in tags to make Mintlify work
-/// Removes any "description" fields with null values (both JSON null and "null" string)
-/// from the tags array and all nested objects
-fn clean_null_descriptions(openapi: &mut Value) {
-    if let Some(tags) = openapi.get_mut("tags").and_then(|t| t.as_array_mut()) {
-        for tag in tags.iter_mut() {
-            remove_null_descriptions_recursive(tag);
-        }
-    }
-}
-
-/// Recursively remove all "description" fields that are null or "null" string
-fn remove_null_descriptions_recursive(value: &mut Value) {
-    if let Some(obj) = value.as_object_mut() {
-        // Check and remove description if it's null or "null" string
-        if let Some(desc) = obj.get("description") {
-            if desc.is_null() || (desc.is_string() && desc.as_str() == Some("null")) {
-                obj.remove("description");
-            }
-        }
-
-        // Recursively process all nested objects
-        for (_, v) in obj.iter_mut() {
-            remove_null_descriptions_recursive(v);
-        }
-    } else if let Some(arr) = value.as_array_mut() {
-        // Recursively process arrays
-        for item in arr.iter_mut() {
-            remove_null_descriptions_recursive(item);
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_path_to_key() {
-        assert_eq!(path_to_key("/indexes", "GET"), "get_indexes");
-        assert_eq!(path_to_key("/indexes/{index_uid}", "GET"), "get_indexes_indexUid");
-        assert_eq!(
-            path_to_key("/indexes/{index_uid}/documents", "POST"),
-            "post_indexes_indexUid_documents"
-        );
-        assert_eq!(
-            path_to_key("/indexes/{index_uid}/documents/{document_id}", "GET"),
-            "get_indexes_indexUid_documents_documentId"
-        );
-        assert_eq!(
-            path_to_key("/indexes/{index_uid}/settings/stop-words", "GET"),
-            "get_indexes_indexUid_settings_stop_words"
-        );
-    }
-
-    #[test]
-    fn test_to_camel_case() {
-        assert_eq!(to_camel_case("index_uid"), "indexUid");
-        assert_eq!(to_camel_case("document_id"), "documentId");
-        assert_eq!(to_camel_case("task_uid"), "taskUid");
-    }
-
-    #[test]
-    fn test_parse_documentation_mapping() {
-        let yaml = r#"
-# get_indexes
-get_indexes_1: |-
-  curl \
-    -X GET 'MEILISEARCH_URL/indexes'
-get_indexes_2: |-
-  curl \
-    -X GET 'MEILISEARCH_URL/indexes?limit=5'
-# post_indexes
-post_indexes_1: |-
-  curl \
-    -X POST 'MEILISEARCH_URL/indexes'
-post_indexes_2: |-
-  curl \
-    -X POST 'MEILISEARCH_URL/indexes'
-# get_version
-get_version_1: |-
-  curl \
-    -X GET 'MEILISEARCH_URL/version'
-# COMMENT WITHOUT KEY - SHOULD BE IGNORED
-## COMMENT WITHOUT KEY - SHOULD BE IGNORED
-unrelated_sample_without_comment: |-
-  curl \
-    -X GET 'MEILISEARCH_URL/something'
-"#;
-        let mapping = parse_documentation_mapping(yaml);
-
-        assert_eq!(mapping.len(), 3);
-        assert!(mapping.contains_key("get_indexes"));
-        assert!(mapping.contains_key("post_indexes"));
-        assert!(mapping.contains_key("get_version"));
-        assert_eq!(mapping["get_indexes"], vec!["get_indexes_1", "get_indexes_2"]);
-        assert_eq!(mapping["post_indexes"], vec!["post_indexes_1", "post_indexes_2"]);
-        assert_eq!(mapping["get_version"], vec!["get_version_1"]);
-        // unrelated_sample_without_comment should not be in the mapping
-        assert!(!mapping.values().any(|v| v.contains(&"unrelated_sample_without_comment".to_string())));
-        // Comments with multiple words or ## should be ignored and not create keys
-        assert!(!mapping.contains_key("COMMENT"));
-        assert!(!mapping.contains_key("##"));
-    }
-
-    #[test]
-    fn test_parse_code_samples() {
-        let yaml = r#"
-# This is a comment that should be ignored
-list_all_indexes_1: |-
-  const client = new MeiliSearch({
-    host: 'http://localhost:7700',
-    apiKey: 'masterKey'
-  });
-
-  const response = await client.getIndexes();
-
-# Another comment
-create_an_index_1: |-
-  const task = await client.createIndex('movies');
-"#;
-        let samples = parse_code_samples(yaml);
-
-        assert_eq!(samples.len(), 2);
-        assert!(samples.contains_key("list_all_indexes_1"));
-        assert!(samples.contains_key("create_an_index_1"));
-        assert!(samples["list_all_indexes_1"].contains("getIndexes"));
-        assert!(samples["create_an_index_1"].contains("createIndex"));
-    }
-
-    #[test]
-    fn test_clean_null_descriptions() {
-        let mut openapi = json!({
-            "tags": [
-                {
-                    "name": "Test1",
-                    "description": "null"
-                },
-                {
-                    "name": "Test2",
-                    "description": null
-                },
-                {
-                    "name": "Test3",
-                    "description": "Valid description"
-                },
-                {
-                    "name": "Test4",
-                    "description": "null",
-                    "externalDocs": {
-                        "url": "https://example.com",
-                        "description": null
-                    }
-                },
-                {
-                    "name": "Test5",
-                    "externalDocs": {
-                        "url": "https://example.com",
-                        "description": "null"
-                    }
-                }
-            ]
-        });
-
-        clean_null_descriptions(&mut openapi);
-
-        let tags = openapi["tags"].as_array().unwrap();
-
-        // Test1: description "null" should be removed
-        assert!(!tags[0].as_object().unwrap().contains_key("description"));
-
-        // Test2: description null should be removed
-        assert!(!tags[1].as_object().unwrap().contains_key("description"));
-
-        // Test3: valid description should remain
-        assert_eq!(tags[2]["description"], "Valid description");
-
-        // Test4: both tag description and externalDocs description should be removed
-        assert!(!tags[3].as_object().unwrap().contains_key("description"));
-        assert!(!tags[3]["externalDocs"]
-            .as_object()
-            .unwrap()
-            .contains_key("description"));
-        assert_eq!(tags[3]["externalDocs"]["url"], "https://example.com");
-
-        // Test5: externalDocs description "null" should be removed
-        assert!(!tags[4]["externalDocs"]
-            .as_object()
-            .unwrap()
-            .contains_key("description"));
-        assert_eq!(tags[4]["externalDocs"]["url"], "https://example.com");
-    }
-}
Author	SHA1	Message	Date
Kerollmops	0be7db9b42	Move the default pagination limit into a const	2025-12-09 15:24:12 +01:00
Kerollmops	051c084aba	Make the Query limit mandatory	2025-12-09 15:21:20 +01:00