Use the minWordSizeForTypos index settings

Format the code
Increase the number of dbs
2025-12-08 21:55:42 +00:00 · 2023-05-04 15:09:17 +02:00 · 2023-05-04 14:02:22 +02:00 · 2023-05-04 13:53:08 +02:00 · 2023-05-04 12:27:19 +02:00 · 2023-05-04 12:22:09 +02:00
27 changed files with 1320 additions and 184 deletions
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@
 <p align="center">
  <a href="https://deps.rs/repo/github/meilisearch/meilisearch"><img src="https://deps.rs/repo/github/meilisearch/meilisearch/status.svg" alt="Dependency status"></a>
  <a href="https://github.com/meilisearch/meilisearch/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-informational" alt="License"></a>
-  <a href="https://app.bors.tech/repositories/26457"><img src="https://bors.tech/images/badge_small.svg" alt="Bors enabled"></a>
+  <a href="https://ms-bors.herokuapp.com/repositories/52"><img src="https://bors.tech/images/badge_small.svg" alt="Bors enabled"></a>
 </p>

 <p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
--- a/index-scheduler/src/insta_snapshot.rs
+++ b/index-scheduler/src/insta_snapshot.rs
@@ -28,6 +28,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
        started_at,
        finished_at,
        index_mapper,
+        max_number_of_tasks: _,
        wake_up: _,
        dumps_path: _,
        snapshots_path: _,
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -51,6 +51,7 @@ use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmap
 use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
 use roaring::RoaringBitmap;
 use synchronoise::SignalEvent;
+use time::format_description::well_known::Rfc3339;
 use time::OffsetDateTime;
 use utils::{filter_out_references_to_newer_tasks, keep_tasks_within_datetimes, map_bound};
 use uuid::Uuid;
@@ -241,6 +242,9 @@ pub struct IndexSchedulerOptions {
    /// Set to `true` iff the index scheduler is allowed to automatically
    /// batch tasks together, to process multiple tasks at once.
    pub autobatching_enabled: bool,
+    /// The maximum number of tasks stored in the task queue before starting
+    /// to auto schedule task deletions.
+    pub max_number_of_tasks: usize,
 }

 /// Structure which holds meilisearch's indexes and schedules the tasks
@@ -290,6 +294,10 @@ pub struct IndexScheduler {
    /// Whether auto-batching is enabled or not.
    pub(crate) autobatching_enabled: bool,

+    /// The max number of tasks allowed before the scheduler starts to delete
+    /// the finished tasks automatically.
+    pub(crate) max_number_of_tasks: usize,
+
    /// The path used to create the dumps.
    pub(crate) dumps_path: PathBuf,

@@ -339,6 +347,7 @@ impl IndexScheduler {
            index_mapper: self.index_mapper.clone(),
            wake_up: self.wake_up.clone(),
            autobatching_enabled: self.autobatching_enabled,
+            max_number_of_tasks: self.max_number_of_tasks,
            snapshots_path: self.snapshots_path.clone(),
            dumps_path: self.dumps_path.clone(),
            auth_path: self.auth_path.clone(),
@@ -412,6 +421,7 @@ impl IndexScheduler {
            // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
            wake_up: Arc::new(SignalEvent::auto(true)),
            autobatching_enabled: options.autobatching_enabled,
+            max_number_of_tasks: options.max_number_of_tasks,
            dumps_path: options.dumps_path,
            snapshots_path: options.snapshots_path,
            auth_path: options.auth_path,
@@ -940,14 +950,15 @@ impl IndexScheduler {

    /// Perform one iteration of the run loop.
    ///
-    /// 1. Find the next batch of tasks to be processed.
-    /// 2. Update the information of these tasks following the start of their processing.
-    /// 3. Update the in-memory list of processed tasks accordingly.
-    /// 4. Process the batch:
+    /// 1. See if we need to cleanup the task queue
+    /// 2. Find the next batch of tasks to be processed.
+    /// 3. Update the information of these tasks following the start of their processing.
+    /// 4. Update the in-memory list of processed tasks accordingly.
+    /// 5. Process the batch:
    ///    - perform the actions of each batched task
    ///    - update the information of each batched task following the end
    ///      of their processing.
-    /// 5. Reset the in-memory list of processed tasks.
+    /// 6. Reset the in-memory list of processed tasks.
    ///
    /// Returns the number of processed tasks.
    fn tick(&self) -> Result<TickOutcome> {
@@ -957,6 +968,8 @@ impl IndexScheduler {
            self.breakpoint(Breakpoint::Start);
        }

+        self.cleanup_task_queue()?;
+
        let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
        let batch =
            match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? {
@@ -1093,6 +1106,55 @@ impl IndexScheduler {
        Ok(TickOutcome::TickAgain(processed_tasks))
    }

+    /// Register a task to cleanup the task queue if needed
+    fn cleanup_task_queue(&self) -> Result<()> {
+        let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
+
+        let nb_tasks = self.all_task_ids(&rtxn)?.len();
+        // if we have less than 1M tasks everything is fine
+        if nb_tasks < self.max_number_of_tasks as u64 {
+            return Ok(());
+        }
+
+        let finished = self.status.get(&rtxn, &Status::Succeeded)?.unwrap_or_default()
+            | self.status.get(&rtxn, &Status::Failed)?.unwrap_or_default()
+            | self.status.get(&rtxn, &Status::Canceled)?.unwrap_or_default();
+
+        let to_delete = RoaringBitmap::from_iter(finished.into_iter().rev().take(100_000));
+
+        // /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete
+        //     the deletion tasks we enqueued ourselves.
+        if to_delete.len() < 2 {
+            log::warn!("The task queue is almost full, but no task can be deleted yet.");
+            // the only thing we can do is hope that the user tasks are going to finish
+            return Ok(());
+        }
+
+        log::info!(
+            "The task queue is almost full. Deleting the oldest {} finished tasks.",
+            to_delete.len()
+        );
+
+        // it's safe to unwrap here because we checked the len above
+        let newest_task_id = to_delete.iter().last().unwrap();
+        let last_task_to_delete =
+            self.get_task(&rtxn, newest_task_id)?.ok_or(Error::CorruptedTaskQueue)?;
+        drop(rtxn);
+
+        // increase time by one nanosecond so that the enqueuedAt of the last task to delete is also lower than that date.
+        let delete_before = last_task_to_delete.enqueued_at + Duration::from_nanos(1);
+
+        self.register(KindWithContent::TaskDeletion {
+            query: format!(
+                "?beforeEnqueuedAt={}&statuses=succeeded,failed,canceled",
+                delete_before.format(&Rfc3339).map_err(|_| Error::CorruptedTaskQueue)?,
+            ),
+            tasks: to_delete,
+        })?;
+
+        Ok(())
+    }
+
    pub fn index_stats(&self, index_uid: &str) -> Result<IndexStats> {
        let is_indexing = self.is_index_processing(index_uid)?;
        let rtxn = self.read_txn()?;
@@ -1350,9 +1412,10 @@ mod tests {
    use big_s::S;
    use crossbeam::channel::RecvTimeoutError;
    use file_store::File;
-    use meili_snap::snapshot;
+    use meili_snap::{json_string, snapshot};
    use meilisearch_auth::AuthFilter;
    use meilisearch_types::document_formats::DocumentFormatError;
+    use meilisearch_types::error::ErrorCode;
    use meilisearch_types::index_uid_pattern::IndexUidPattern;
    use meilisearch_types::milli::obkv_to_json;
    use meilisearch_types::milli::update::IndexDocumentsMethod::{
@@ -1383,13 +1446,22 @@ mod tests {
        pub fn test(
            autobatching_enabled: bool,
            planned_failures: Vec<(usize, FailureLocation)>,
+        ) -> (Self, IndexSchedulerHandle) {
+            Self::test_with_custom_config(planned_failures, |config| {
+                config.autobatching_enabled = autobatching_enabled;
+            })
+        }
+
+        pub fn test_with_custom_config(
+            planned_failures: Vec<(usize, FailureLocation)>,
+            configuration: impl Fn(&mut IndexSchedulerOptions),
        ) -> (Self, IndexSchedulerHandle) {
            let tempdir = TempDir::new().unwrap();
            let (sender, receiver) = crossbeam::channel::bounded(0);

            let indexer_config = IndexerConfig { skip_index_budget: true, ..Default::default() };

-            let options = IndexSchedulerOptions {
+            let mut options = IndexSchedulerOptions {
                version_file_path: tempdir.path().join(VERSION_FILE_NAME),
                auth_path: tempdir.path().join("auth"),
                tasks_path: tempdir.path().join("db_path"),
@@ -1402,8 +1474,10 @@ mod tests {
                index_growth_amount: 1000 * 1000, // 1 MB
                index_count: 5,
                indexer_config,
-                autobatching_enabled,
+                autobatching_enabled: true,
+                max_number_of_tasks: 1_000_000,
            };
+            configuration(&mut options);

            let index_scheduler = Self::new(options, sender, planned_failures).unwrap();

@@ -3718,4 +3792,127 @@ mod tests {
        // No matter what happens in process_batch, the index_scheduler should be internally consistent
        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "index_creation_failed");
    }
+
+    #[test]
+    fn test_task_queue_is_full() {
+        let (index_scheduler, mut handle) =
+            IndexScheduler::test_with_custom_config(vec![], |config| {
+                // that's the minimum map size possible
+                config.task_db_size = 1048576;
+            });
+
+        index_scheduler
+            .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None })
+            .unwrap();
+        handle.advance_one_successful_batch();
+        // on average this task takes ~600 bytes
+        loop {
+            let result = index_scheduler.register(KindWithContent::IndexCreation {
+                index_uid: S("doggo"),
+                primary_key: None,
+            });
+            if result.is_err() {
+                break;
+            }
+            handle.advance_one_failed_batch();
+        }
+        index_scheduler.assert_internally_consistent();
+
+        // at this point the task DB shoud have reached its limit and we should not be able to register new tasks
+        let result = index_scheduler
+            .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None })
+            .unwrap_err();
+        snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
+        // we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code
+        snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice");
+
+        // Even the task deletion that doesn't delete anything shouldn't be accepted
+        let result = index_scheduler
+            .register(KindWithContent::TaskDeletion {
+                query: S("test"),
+                tasks: RoaringBitmap::new(),
+            })
+            .unwrap_err();
+        snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
+        // we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code
+        snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice");
+
+        // But a task deletion that delete something should works
+        index_scheduler
+            .register(KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() })
+            .unwrap();
+        handle.advance_one_successful_batch();
+
+        // Now we should be able to enqueue a few tasks again
+        index_scheduler
+            .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None })
+            .unwrap();
+        handle.advance_one_failed_batch();
+    }
+
+    #[test]
+    fn test_auto_deletion_of_tasks() {
+        let (index_scheduler, mut handle) =
+            IndexScheduler::test_with_custom_config(vec![], |config| {
+                config.max_number_of_tasks = 2;
+            });
+
+        index_scheduler
+            .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None })
+            .unwrap();
+        handle.advance_one_successful_batch();
+
+        index_scheduler
+            .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None })
+            .unwrap();
+        handle.advance_one_failed_batch();
+
+        // at this point the max number of tasks is reached
+        // we can still enqueue multiple tasks
+        index_scheduler
+            .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None })
+            .unwrap();
+        index_scheduler
+            .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None })
+            .unwrap();
+
+        let rtxn = index_scheduler.env.read_txn().unwrap();
+        let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap();
+        let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap();
+        snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
+        drop(rtxn);
+
+        // now we're above the max number of tasks
+        // and if we try to advance in the tick function a new task deletion should be enqueued
+        handle.advance_till([Start, BatchCreated]);
+        let rtxn = index_scheduler.env.read_txn().unwrap();
+        let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap();
+        let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap();
+        snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued");
+        drop(rtxn);
+
+        handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]);
+        let rtxn = index_scheduler.env.read_txn().unwrap();
+        let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap();
+        let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap();
+        snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed");
+        drop(rtxn);
+
+        handle.advance_one_failed_batch();
+        // a new task deletion has been enqueued
+        handle.advance_one_successful_batch();
+        let rtxn = index_scheduler.env.read_txn().unwrap();
+        let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap();
+        let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap();
+        snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion");
+        drop(rtxn);
+
+        handle.advance_one_failed_batch();
+        handle.advance_one_successful_batch();
+        let rtxn = index_scheduler.env.read_txn().unwrap();
+        let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap();
+        let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap();
+        snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed");
+        drop(rtxn);
+    }
 }
--- a/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/after_the_second_task_deletion.snap
+++ b/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/after_the_second_task_deletion.snap
@@ -0,0 +1,68 @@
+---
+source: index-scheduler/src/lib.rs
+---
+[
+  {
+    "uid": 3,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": null,
+    "canceledBy": null,
+    "details": {
+      "IndexInfo": {
+        "primary_key": null
+      }
+    },
+    "status": "enqueued",
+    "kind": {
+      "indexCreation": {
+        "index_uid": "doggo",
+        "primary_key": null
+      }
+    }
+  },
+  {
+    "uid": 5,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": null,
+    "canceledBy": null,
+    "details": {
+      "TaskDeletion": {
+        "matched_tasks": 2,
+        "deleted_tasks": 2,
+        "original_filter": "[filter]"
+      }
+    },
+    "status": "succeeded",
+    "kind": {
+      "taskDeletion": {
+        "query": "[query]",
+        "tasks": [
+          58,
+          48,
+          0,
+          0,
+          1,
+          0,
+          0,
+          0,
+          0,
+          0,
+          1,
+          0,
+          16,
+          0,
+          0,
+          0,
+          2,
+          0,
+          4,
+          0
+        ]
+      }
+    }
+  }
+]
--- a/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/everything_has_been_processed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/everything_has_been_processed.snap
@@ -0,0 +1,48 @@
+---
+source: index-scheduler/src/lib.rs
+---
+[
+  {
+    "uid": 6,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": null,
+    "canceledBy": null,
+    "details": {
+      "TaskDeletion": {
+        "matched_tasks": 2,
+        "deleted_tasks": 2,
+        "original_filter": "[filter]"
+      }
+    },
+    "status": "succeeded",
+    "kind": {
+      "taskDeletion": {
+        "query": "[query]",
+        "tasks": [
+          58,
+          48,
+          0,
+          0,
+          1,
+          0,
+          0,
+          0,
+          0,
+          0,
+          1,
+          0,
+          16,
+          0,
+          0,
+          0,
+          3,
+          0,
+          5,
+          0
+        ]
+      }
+    }
+  }
+]
--- a/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_enqueued.snap
+++ b/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_enqueued.snap
@@ -0,0 +1,133 @@
+---
+source: index-scheduler/src/lib.rs
+---
+[
+  {
+    "uid": 0,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": null,
+    "canceledBy": null,
+    "details": {
+      "IndexInfo": {
+        "primary_key": null
+      }
+    },
+    "status": "succeeded",
+    "kind": {
+      "indexCreation": {
+        "index_uid": "doggo",
+        "primary_key": null
+      }
+    }
+  },
+  {
+    "uid": 1,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": {
+      "message": "Index `doggo` already exists.",
+      "code": "index_already_exists",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#index_already_exists"
+    },
+    "canceledBy": null,
+    "details": {
+      "IndexInfo": {
+        "primary_key": null
+      }
+    },
+    "status": "failed",
+    "kind": {
+      "indexCreation": {
+        "index_uid": "doggo",
+        "primary_key": null
+      }
+    }
+  },
+  {
+    "uid": 2,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": null,
+    "canceledBy": null,
+    "details": {
+      "IndexInfo": {
+        "primary_key": null
+      }
+    },
+    "status": "enqueued",
+    "kind": {
+      "indexCreation": {
+        "index_uid": "doggo",
+        "primary_key": null
+      }
+    }
+  },
+  {
+    "uid": 3,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": null,
+    "canceledBy": null,
+    "details": {
+      "IndexInfo": {
+        "primary_key": null
+      }
+    },
+    "status": "enqueued",
+    "kind": {
+      "indexCreation": {
+        "index_uid": "doggo",
+        "primary_key": null
+      }
+    }
+  },
+  {
+    "uid": 4,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": null,
+    "canceledBy": null,
+    "details": {
+      "TaskDeletion": {
+        "matched_tasks": 2,
+        "deleted_tasks": null,
+        "original_filter": "[filter]"
+      }
+    },
+    "status": "enqueued",
+    "kind": {
+      "taskDeletion": {
+        "query": "[query]",
+        "tasks": [
+          58,
+          48,
+          0,
+          0,
+          1,
+          0,
+          0,
+          0,
+          0,
+          0,
+          1,
+          0,
+          16,
+          0,
+          0,
+          0,
+          0,
+          0,
+          1,
+          0
+        ]
+      }
+    }
+  }
+]
--- a/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_processed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_processed.snap
@@ -0,0 +1,88 @@
+---
+source: index-scheduler/src/lib.rs
+---
+[
+  {
+    "uid": 2,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": null,
+    "canceledBy": null,
+    "details": {
+      "IndexInfo": {
+        "primary_key": null
+      }
+    },
+    "status": "enqueued",
+    "kind": {
+      "indexCreation": {
+        "index_uid": "doggo",
+        "primary_key": null
+      }
+    }
+  },
+  {
+    "uid": 3,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": null,
+    "canceledBy": null,
+    "details": {
+      "IndexInfo": {
+        "primary_key": null
+      }
+    },
+    "status": "enqueued",
+    "kind": {
+      "indexCreation": {
+        "index_uid": "doggo",
+        "primary_key": null
+      }
+    }
+  },
+  {
+    "uid": 4,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": null,
+    "canceledBy": null,
+    "details": {
+      "TaskDeletion": {
+        "matched_tasks": 2,
+        "deleted_tasks": 2,
+        "original_filter": "[filter]"
+      }
+    },
+    "status": "succeeded",
+    "kind": {
+      "taskDeletion": {
+        "query": "[query]",
+        "tasks": [
+          58,
+          48,
+          0,
+          0,
+          1,
+          0,
+          0,
+          0,
+          0,
+          0,
+          1,
+          0,
+          16,
+          0,
+          0,
+          0,
+          0,
+          0,
+          1,
+          0
+        ]
+      }
+    }
+  }
+]
--- a/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_queue_is_full.snap
+++ b/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_queue_is_full.snap
@@ -0,0 +1,90 @@
+---
+source: index-scheduler/src/lib.rs
+---
+[
+  {
+    "uid": 0,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": null,
+    "canceledBy": null,
+    "details": {
+      "IndexInfo": {
+        "primary_key": null
+      }
+    },
+    "status": "succeeded",
+    "kind": {
+      "indexCreation": {
+        "index_uid": "doggo",
+        "primary_key": null
+      }
+    }
+  },
+  {
+    "uid": 1,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": {
+      "message": "Index `doggo` already exists.",
+      "code": "index_already_exists",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#index_already_exists"
+    },
+    "canceledBy": null,
+    "details": {
+      "IndexInfo": {
+        "primary_key": null
+      }
+    },
+    "status": "failed",
+    "kind": {
+      "indexCreation": {
+        "index_uid": "doggo",
+        "primary_key": null
+      }
+    }
+  },
+  {
+    "uid": 2,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": null,
+    "canceledBy": null,
+    "details": {
+      "IndexInfo": {
+        "primary_key": null
+      }
+    },
+    "status": "enqueued",
+    "kind": {
+      "indexCreation": {
+        "index_uid": "doggo",
+        "primary_key": null
+      }
+    }
+  },
+  {
+    "uid": 3,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": null,
+    "canceledBy": null,
+    "details": {
+      "IndexInfo": {
+        "primary_key": null
+      }
+    },
+    "status": "enqueued",
+    "kind": {
+      "indexCreation": {
+        "index_uid": "doggo",
+        "primary_key": null
+      }
+    }
+  }
+]
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@@ -237,8 +237,11 @@ InvalidSearchMatchingStrategy         , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchOffset                   , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchPage                     , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchQ                        , InvalidRequest       , BAD_REQUEST ;
+InvalidFacetSearchQuery               , InvalidRequest       , BAD_REQUEST ;
+InvalidFacetSearchName                , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchShowMatchesPosition      , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchSort                     , InvalidRequest       , BAD_REQUEST ;
+InvalidSearchFacet                    , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsDisplayedAttributes    , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsDistinctAttribute      , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsFaceting               , InvalidRequest       , BAD_REQUEST ;
@@ -327,6 +330,7 @@ impl ErrorCode for milli::Error {
                    UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
                    UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
                    UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
+                    UserError::InvalidSearchFacet { .. } => Code::InvalidSearchFacet,
                    UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
                    UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
                    UserError::SortError(_) => Code::InvalidSearchSort,
--- a/meilisearch/src/analytics/mock_analytics.rs
+++ b/meilisearch/src/analytics/mock_analytics.rs
@@ -38,6 +38,18 @@ impl MultiSearchAggregator {
    pub fn succeed(&mut self) {}
 }

+#[derive(Default)]
+pub struct FacetSearchAggregator;
+
+#[allow(dead_code)]
+impl FacetSearchAggregator {
+    pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
+        Self::default()
+    }
+
+    pub fn succeed(&mut self, _: &dyn Any) {}
+}
+
 impl MockAnalytics {
    #[allow(clippy::new_ret_no_self)]
    pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
@@ -56,6 +68,7 @@ impl Analytics for MockAnalytics {
    fn get_search(&self, _aggregate: super::SearchAggregator) {}
    fn post_search(&self, _aggregate: super::SearchAggregator) {}
    fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
+    fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {}
    fn add_documents(
        &self,
        _documents_query: &UpdateDocumentsQuery,
--- a/meilisearch/src/analytics/mod.rs
+++ b/meilisearch/src/analytics/mod.rs
@@ -25,6 +25,8 @@ pub type SegmentAnalytics = mock_analytics::MockAnalytics;
 pub type SearchAggregator = mock_analytics::SearchAggregator;
 #[cfg(any(debug_assertions, not(feature = "analytics")))]
 pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
+#[cfg(any(debug_assertions, not(feature = "analytics")))]
+pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator;

 // if we are in release mode and the feature analytics was enabled
 // we use the real analytics
@@ -34,6 +36,8 @@ pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
 pub type SearchAggregator = segment_analytics::SearchAggregator;
 #[cfg(all(not(debug_assertions), feature = "analytics"))]
 pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
+#[cfg(all(not(debug_assertions), feature = "analytics"))]
+pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator;

 /// The Meilisearch config dir:
 /// `~/.config/Meilisearch` on *NIX or *BSD.
@@ -81,6 +85,9 @@ pub trait Analytics: Sync + Send {
    /// This method should be called to aggregate a post array of searches
    fn post_multi_search(&self, aggregate: MultiSearchAggregator);

+    /// This method should be called to aggregate post facet values searches
+    fn post_facet_search(&self, aggregate: FacetSearchAggregator);
+
    // this method should be called to aggregate a add documents request
    fn add_documents(
        &self,
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -1,5 +1,6 @@
 use std::collections::{BinaryHeap, HashMap, HashSet};
 use std::fs;
+use std::mem::take;
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 use std::time::{Duration, Instant};
@@ -27,11 +28,13 @@ use super::{config_user_id_path, DocumentDeletionKind, MEILISEARCH_CONFIG_PATH};
 use crate::analytics::Analytics;
 use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, ScheduleSnapshot};
 use crate::routes::indexes::documents::UpdateDocumentsQuery;
+use crate::routes::indexes::facet_search::FacetSearchQuery;
 use crate::routes::tasks::TasksFilterQuery;
 use crate::routes::{create_all_stats, Stats};
 use crate::search::{
-    SearchQuery, SearchQueryWithIndex, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
-    DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
+    FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
+    DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
+    DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
 };
 use crate::Opt;

@@ -69,6 +72,7 @@ pub enum AnalyticsMsg {
    AggregateGetSearch(SearchAggregator),
    AggregatePostSearch(SearchAggregator),
    AggregatePostMultiSearch(MultiSearchAggregator),
+    AggregatePostFacetSearch(FacetSearchAggregator),
    AggregateAddDocuments(DocumentsAggregator),
    AggregateDeleteDocuments(DocumentsDeletionAggregator),
    AggregateUpdateDocuments(DocumentsAggregator),
@@ -135,6 +139,7 @@ impl SegmentAnalytics {
            batcher,
            post_search_aggregator: SearchAggregator::default(),
            post_multi_search_aggregator: MultiSearchAggregator::default(),
+            post_facet_search_aggregator: FacetSearchAggregator::default(),
            get_search_aggregator: SearchAggregator::default(),
            add_documents_aggregator: DocumentsAggregator::default(),
            delete_documents_aggregator: DocumentsDeletionAggregator::default(),
@@ -176,6 +181,10 @@ impl super::Analytics for SegmentAnalytics {
        let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
    }

+    fn post_facet_search(&self, aggregate: FacetSearchAggregator) {
+        let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFacetSearch(aggregate));
+    }
+
    fn post_multi_search(&self, aggregate: MultiSearchAggregator) {
        let _ = self.sender.try_send(AnalyticsMsg::AggregatePostMultiSearch(aggregate));
    }
@@ -335,6 +344,7 @@ pub struct Segment {
    get_search_aggregator: SearchAggregator,
    post_search_aggregator: SearchAggregator,
    post_multi_search_aggregator: MultiSearchAggregator,
+    post_facet_search_aggregator: FacetSearchAggregator,
    add_documents_aggregator: DocumentsAggregator,
    delete_documents_aggregator: DocumentsDeletionAggregator,
    update_documents_aggregator: DocumentsAggregator,
@@ -397,6 +407,7 @@ impl Segment {
                        Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg),
                        Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg),
                        Some(AnalyticsMsg::AggregatePostMultiSearch(agreg)) => self.post_multi_search_aggregator.aggregate(agreg),
+                        Some(AnalyticsMsg::AggregatePostFacetSearch(agreg)) => self.post_facet_search_aggregator.aggregate(agreg),
                        Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
                        Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
                        Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
@@ -438,45 +449,62 @@ impl Segment {
                })
                .await;
        }
-        let get_search = std::mem::take(&mut self.get_search_aggregator)
-            .into_event(&self.user, "Documents Searched GET");
-        let post_search = std::mem::take(&mut self.post_search_aggregator)
-            .into_event(&self.user, "Documents Searched POST");
-        let post_multi_search = std::mem::take(&mut self.post_multi_search_aggregator)
-            .into_event(&self.user, "Documents Searched by Multi-Search POST");
-        let add_documents = std::mem::take(&mut self.add_documents_aggregator)
-            .into_event(&self.user, "Documents Added");
-        let delete_documents = std::mem::take(&mut self.delete_documents_aggregator)
-            .into_event(&self.user, "Documents Deleted");
-        let update_documents = std::mem::take(&mut self.update_documents_aggregator)
-            .into_event(&self.user, "Documents Updated");
-        let get_tasks =
-            std::mem::take(&mut self.get_tasks_aggregator).into_event(&self.user, "Tasks Seen");
-        let health =
-            std::mem::take(&mut self.health_aggregator).into_event(&self.user, "Health Seen");

-        if let Some(get_search) = get_search {
+        let Segment {
+            inbox: _,
+            opt: _,
+            batcher: _,
+            user,
+            get_search_aggregator,
+            post_search_aggregator,
+            post_multi_search_aggregator,
+            post_facet_search_aggregator,
+            add_documents_aggregator,
+            delete_documents_aggregator,
+            update_documents_aggregator,
+            get_tasks_aggregator,
+            health_aggregator,
+        } = self;
+
+        if let Some(get_search) =
+            take(get_search_aggregator).into_event(&user, "Documents Searched GET")
+        {
            let _ = self.batcher.push(get_search).await;
        }
-        if let Some(post_search) = post_search {
+        if let Some(post_search) =
+            take(post_search_aggregator).into_event(&user, "Documents Searched POST")
+        {
            let _ = self.batcher.push(post_search).await;
        }
-        if let Some(post_multi_search) = post_multi_search {
+        if let Some(post_multi_search) = take(post_multi_search_aggregator)
+            .into_event(&user, "Documents Searched by Multi-Search POST")
+        {
            let _ = self.batcher.push(post_multi_search).await;
        }
-        if let Some(add_documents) = add_documents {
+        if let Some(post_facet_search) = take(post_facet_search_aggregator)
+            .into_event(&user, "Documents Searched by Facet-Search POST")
+        {
+            let _ = self.batcher.push(post_facet_search).await;
+        }
+        if let Some(add_documents) =
+            take(add_documents_aggregator).into_event(&user, "Documents Added")
+        {
            let _ = self.batcher.push(add_documents).await;
        }
-        if let Some(delete_documents) = delete_documents {
+        if let Some(delete_documents) =
+            take(delete_documents_aggregator).into_event(&user, "Documents Deleted")
+        {
            let _ = self.batcher.push(delete_documents).await;
        }
-        if let Some(update_documents) = update_documents {
+        if let Some(update_documents) =
+            take(update_documents_aggregator).into_event(&user, "Documents Updated")
+        {
            let _ = self.batcher.push(update_documents).await;
        }
-        if let Some(get_tasks) = get_tasks {
+        if let Some(get_tasks) = take(get_tasks_aggregator).into_event(&user, "Tasks Seen") {
            let _ = self.batcher.push(get_tasks).await;
        }
-        if let Some(health) = health {
+        if let Some(health) = take(health_aggregator).into_event(&user, "Health Seen") {
            let _ = self.batcher.push(health).await;
        }
        let _ = self.batcher.flush().await;
@@ -853,6 +881,144 @@ impl MultiSearchAggregator {
    }
 }

+#[derive(Default)]
+pub struct FacetSearchAggregator {
+    timestamp: Option<OffsetDateTime>,
+
+    // context
+    user_agents: HashSet<String>,
+
+    // requests
+    total_received: usize,
+    total_succeeded: usize,
+    time_spent: BinaryHeap<usize>,
+
+    // The set of all facetNames that were used
+    facet_names: HashSet<String>,
+
+    // As there been any other parameter than the facetName or facetQuery ones?
+    additional_search_parameters_provided: bool,
+}
+
+impl FacetSearchAggregator {
+    pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self {
+        let FacetSearchQuery {
+            facet_query: _,
+            facet_name,
+            q,
+            offset,
+            limit,
+            page,
+            hits_per_page,
+            attributes_to_retrieve,
+            attributes_to_crop,
+            crop_length,
+            attributes_to_highlight,
+            show_matches_position,
+            filter,
+            sort,
+            facets,
+            highlight_pre_tag,
+            highlight_post_tag,
+            crop_marker,
+            matching_strategy,
+        } = query;
+
+        let mut ret = Self::default();
+        ret.timestamp = Some(OffsetDateTime::now_utc());
+
+        ret.total_received = 1;
+        ret.user_agents = extract_user_agents(request).into_iter().collect();
+        ret.facet_names = Some(facet_name.clone()).into_iter().collect();
+
+        ret.additional_search_parameters_provided = q.is_some()
+            || *offset != DEFAULT_SEARCH_OFFSET()
+            || *limit != DEFAULT_SEARCH_LIMIT()
+            || page.is_some()
+            || hits_per_page.is_some()
+            || attributes_to_retrieve.is_some()
+            || attributes_to_crop.is_some()
+            || *crop_length != DEFAULT_CROP_LENGTH()
+            || attributes_to_highlight.is_some()
+            || *show_matches_position
+            || filter.is_some()
+            || sort.is_some()
+            || facets.is_some()
+            || *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG()
+            || *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG()
+            || *crop_marker != DEFAULT_CROP_MARKER()
+            || *matching_strategy != MatchingStrategy::default();
+
+        ret
+    }
+
+    pub fn succeed(&mut self, result: &FacetSearchResult) {
+        self.total_succeeded = self.total_succeeded.saturating_add(1);
+        self.time_spent.push(result.processing_time_ms as usize);
+    }
+
+    /// Aggregate one [SearchAggregator] into another.
+    pub fn aggregate(&mut self, mut other: Self) {
+        if self.timestamp.is_none() {
+            self.timestamp = other.timestamp;
+        }
+
+        // context
+        for user_agent in other.user_agents.into_iter() {
+            self.user_agents.insert(user_agent);
+        }
+
+        // request
+        self.total_received = self.total_received.saturating_add(other.total_received);
+        self.total_succeeded = self.total_succeeded.saturating_add(other.total_succeeded);
+        self.time_spent.append(&mut other.time_spent);
+
+        // facet_names
+        for facet_name in other.facet_names.into_iter() {
+            self.facet_names.insert(facet_name);
+        }
+
+        // additional_search_parameters_provided
+        self.additional_search_parameters_provided = self.additional_search_parameters_provided
+            | other.additional_search_parameters_provided;
+    }
+
+    pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
+        if self.total_received == 0 {
+            None
+        } else {
+            // the index of the 99th percentage of value
+            let percentile_99th = 0.99 * (self.total_succeeded as f64 - 1.) + 1.;
+            // we get all the values in a sorted manner
+            let time_spent = self.time_spent.into_sorted_vec();
+            // We are only interested by the slowest value of the 99th fastest results
+            let time_spent = time_spent.get(percentile_99th as usize);
+
+            let properties = json!({
+                "user-agent": self.user_agents,
+                "requests": {
+                    "99th_response_time":  time_spent.map(|t| format!("{:.2}", t)),
+                    "total_succeeded": self.total_succeeded,
+                    "total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics
+                    "total_received": self.total_received,
+                },
+                "facets": {
+                    "total_distinct_facet_count": self.facet_names.len(),
+                },
+                "additional_search_parameters_provided": self.additional_search_parameters_provided,
+            });
+
+            Some(Track {
+                timestamp: self.timestamp,
+                user: user.clone(),
+                event: event_name.to_string(),
+                properties,
+                ..Default::default()
+            })
+        }
+    }
+}
+
 #[derive(Default)]
 pub struct DocumentsAggregator {
    timestamp: Option<OffsetDateTime>,
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@@ -234,6 +234,7 @@ fn open_or_create_database_unchecked(
            index_base_map_size: opt.max_index_size.get_bytes() as usize,
            indexer_config: (&opt.indexer_options).try_into()?,
            autobatching_enabled: true,
+            max_number_of_tasks: 1_000_000,
            index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
            index_count: DEFAULT_INDEX_COUNT,
        })?)
--- a/meilisearch/src/routes/indexes/facet_search.rs
+++ b/meilisearch/src/routes/indexes/facet_search.rs
@@ -0,0 +1,133 @@
+use std::collections::{BTreeSet, HashSet};
+
+use actix_web::web::Data;
+use actix_web::{web, HttpRequest, HttpResponse};
+use deserr::actix_web::AwebJson;
+use index_scheduler::IndexScheduler;
+use log::debug;
+use meilisearch_types::deserr::DeserrJsonError;
+use meilisearch_types::error::deserr_codes::*;
+use meilisearch_types::error::ResponseError;
+use meilisearch_types::index_uid::IndexUid;
+use serde_json::Value;
+
+use crate::analytics::{Analytics, FacetSearchAggregator};
+use crate::extractors::authentication::policies::*;
+use crate::extractors::authentication::GuardedData;
+use crate::search::{
+    add_search_rules, perform_facet_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
+    DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
+    DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
+};
+
+pub fn configure(cfg: &mut web::ServiceConfig) {
+    cfg.service(web::resource("").route(web::post().to(search)));
+}
+
+// TODO improve the error messages
+#[derive(Debug, Clone, Default, PartialEq, Eq, deserr::Deserr)]
+#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
+pub struct FacetSearchQuery {
+    #[deserr(default, error = DeserrJsonError<InvalidFacetSearchQuery>)]
+    pub facet_query: Option<String>,
+    #[deserr(error = DeserrJsonError<InvalidFacetSearchName>)]
+    pub facet_name: String,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
+    pub q: Option<String>,
+    #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
+    pub offset: usize,
+    #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
+    pub limit: usize,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchPage>)]
+    pub page: Option<usize>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)]
+    pub hits_per_page: Option<usize>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
+    pub attributes_to_retrieve: Option<BTreeSet<String>>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
+    pub attributes_to_crop: Option<Vec<String>>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
+    pub crop_length: usize,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToHighlight>)]
+    pub attributes_to_highlight: Option<HashSet<String>>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
+    pub show_matches_position: bool,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
+    pub filter: Option<Value>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
+    pub sort: Option<Vec<String>>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
+    pub facets: Option<Vec<String>>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
+    pub highlight_pre_tag: String,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPostTag>, default = DEFAULT_HIGHLIGHT_POST_TAG())]
+    pub highlight_post_tag: String,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchCropMarker>, default = DEFAULT_CROP_MARKER())]
+    pub crop_marker: String,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
+    pub matching_strategy: MatchingStrategy,
+}
+
+pub async fn search(
+    index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
+    index_uid: web::Path<String>,
+    params: AwebJson<FacetSearchQuery, DeserrJsonError>,
+    req: HttpRequest,
+    analytics: web::Data<dyn Analytics>,
+) -> Result<HttpResponse, ResponseError> {
+    let index_uid = IndexUid::try_from(index_uid.into_inner())?;
+
+    let query = params.into_inner();
+    debug!("facet search called with params: {:?}", query);
+
+    let mut aggregate = FacetSearchAggregator::from_query(&query, &req);
+
+    let facet_query = query.facet_query.clone();
+    let facet_name = query.facet_name.clone();
+    let mut search_query = SearchQuery::from(query);
+
+    // Tenant token search_rules.
+    if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
+        add_search_rules(&mut search_query, search_rules);
+    }
+
+    let index = index_scheduler.index(&index_uid)?;
+    let search_result = tokio::task::spawn_blocking(move || {
+        perform_facet_search(&index, search_query, facet_query, facet_name)
+    })
+    .await?;
+
+    if let Ok(ref search_result) = search_result {
+        aggregate.succeed(search_result);
+    }
+    analytics.post_facet_search(aggregate);
+
+    let search_result = search_result?;
+
+    debug!("returns: {:?}", search_result);
+    Ok(HttpResponse::Ok().json(search_result))
+}
+
+impl From<FacetSearchQuery> for SearchQuery {
+    fn from(value: FacetSearchQuery) -> Self {
+        SearchQuery {
+            q: value.q,
+            offset: value.offset,
+            limit: value.limit,
+            page: value.page,
+            hits_per_page: value.hits_per_page,
+            attributes_to_retrieve: value.attributes_to_retrieve,
+            attributes_to_crop: value.attributes_to_crop,
+            crop_length: value.crop_length,
+            attributes_to_highlight: value.attributes_to_highlight,
+            show_matches_position: value.show_matches_position,
+            filter: value.filter,
+            sort: value.sort,
+            facets: value.facets,
+            highlight_pre_tag: value.highlight_pre_tag,
+            highlight_post_tag: value.highlight_post_tag,
+            crop_marker: value.crop_marker,
+            matching_strategy: value.matching_strategy,
+        }
+    }
+}
--- a/meilisearch/src/routes/indexes/mod.rs
+++ b/meilisearch/src/routes/indexes/mod.rs
@@ -24,6 +24,7 @@ use crate::extractors::authentication::{AuthenticationError, GuardedData};
 use crate::extractors::sequential_extractor::SeqHandler;

 pub mod documents;
+pub mod facet_search;
 pub mod search;
 pub mod settings;

@@ -44,6 +45,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
            .service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats))))
            .service(web::scope("/documents").configure(documents::configure))
            .service(web::scope("/search").configure(search::configure))
+            .service(web::scope("/facet-search").configure(facet_search::configure))
            .service(web::scope("/settings").configure(settings::configure)),
    );
 }
--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@@ -8,7 +8,9 @@ use either::Either;
 use meilisearch_auth::IndexSearchRules;
 use meilisearch_types::deserr::DeserrJsonError;
 use meilisearch_types::error::deserr_codes::*;
+use meilisearch_types::heed::RoTxn;
 use meilisearch_types::index_uid::IndexUid;
+use meilisearch_types::milli::{FacetValueHit, SearchForFacetValues};
 use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
 use meilisearch_types::{milli, Document};
 use milli::tokenizer::TokenizerBuilder;
@@ -170,7 +172,7 @@ impl SearchQueryWithIndex {
    }
 }

-#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr)]
 #[deserr(rename_all = camelCase)]
 pub enum MatchingStrategy {
    /// Remove query words from last to first
@@ -241,6 +243,14 @@ pub struct FacetStats {
    pub max: f64,
 }

+#[derive(Serialize, Debug, Clone, PartialEq)]
+#[serde(rename_all = "camelCase")]
+pub struct FacetSearchResult {
+    pub hits: Vec<FacetValueHit>,
+    pub query: Option<String>,
+    pub processing_time_ms: u128,
+}
+
 /// Incorporate search rules in search query
 pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
    query.filter = match (query.filter.take(), rules.filter) {
@@ -261,14 +271,12 @@ pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
    }
 }

-pub fn perform_search(
-    index: &Index,
-    query: SearchQuery,
-) -> Result<SearchResult, MeilisearchHttpError> {
-    let before_search = Instant::now();
-    let rtxn = index.read_txn()?;
-
-    let mut search = index.search(&rtxn);
+fn prepare_search<'t>(
+    index: &'t Index,
+    rtxn: &'t RoTxn,
+    query: &'t SearchQuery,
+) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
+    let mut search = index.search(rtxn);

    if let Some(ref query) = query.q {
        search.query(query);
@@ -278,7 +286,7 @@ pub fn perform_search(
    search.terms_matching_strategy(query.matching_strategy.into());

    let max_total_hits = index
-        .pagination_max_total_hits(&rtxn)
+        .pagination_max_total_hits(rtxn)
        .map_err(milli::Error::from)?
        .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);

@@ -320,6 +328,19 @@ pub fn perform_search(
        search.sort_criteria(sort);
    }

+    Ok((search, is_finite_pagination, max_total_hits, offset))
+}
+
+pub fn perform_search(
+    index: &Index,
+    query: SearchQuery,
+) -> Result<SearchResult, MeilisearchHttpError> {
+    let before_search = Instant::now();
+    let rtxn = index.read_txn()?;
+
+    let (search, is_finite_pagination, max_total_hits, offset) =
+        prepare_search(index, &rtxn, &query)?;
+
    let milli::SearchResult { documents_ids, matching_words, candidates, .. } = search.execute()?;

    let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
@@ -473,6 +494,30 @@ pub fn perform_search(
    Ok(result)
 }

+pub fn perform_facet_search(
+    index: &Index,
+    search_query: SearchQuery,
+    facet_query: Option<String>,
+    facet_name: String,
+) -> Result<FacetSearchResult, MeilisearchHttpError> {
+    let before_search = Instant::now();
+    let rtxn = index.read_txn()?;
+
+    let (search, _, _, _) = prepare_search(index, &rtxn, &search_query)?;
+    let mut facet_search = SearchForFacetValues::new(facet_name, search);
+    if let Some(facet_query) = &facet_query {
+        facet_search.query(facet_query);
+    }
+
+    let hits = facet_search.execute()?;
+
+    Ok(FacetSearchResult {
+        hits,
+        query: facet_query,
+        processing_time_ms: before_search.elapsed().as_millis(),
+    })
+}
+
 fn insert_geo_distance(sorts: &[String], document: &mut Document) {
    lazy_static::lazy_static! {
        static ref GEO_REGEX: Regex =
--- a/meilisearch/tests/tasks/mod.rs
+++ b/meilisearch/tests/tasks/mod.rs
@@ -1,14 +1,11 @@
 mod errors;

-use byte_unit::{Byte, ByteUnit};
 use meili_snap::insta::assert_json_snapshot;
-use meili_snap::{json_string, snapshot};
 use serde_json::json;
-use tempfile::TempDir;
 use time::format_description::well_known::Rfc3339;
 use time::OffsetDateTime;

-use crate::common::{default_settings, Server};
+use crate::common::Server;

 #[actix_rt::test]
 async fn error_get_unexisting_task_status() {
@@ -1003,117 +1000,3 @@ async fn test_summarized_dump_creation() {
    }
    "###);
 }
-
-#[actix_web::test]
-async fn test_task_queue_is_full() {
-    let dir = TempDir::new().unwrap();
-    let mut options = default_settings(dir.path());
-    options.max_task_db_size = Byte::from_unit(500.0, ByteUnit::B).unwrap();
-
-    let server = Server::new_with_options(options).await.unwrap();
-
-    // the first task should be enqueued without issue
-    let (result, code) = server.create_index(json!({ "uid": "doggo" })).await;
-    snapshot!(code, @"202 Accepted");
-    snapshot!(json_string!(result, { ".enqueuedAt" => "[date]" }), @r###"
-    {
-      "taskUid": 0,
-      "indexUid": "doggo",
-      "status": "enqueued",
-      "type": "indexCreation",
-      "enqueuedAt": "[date]"
-    }
-    "###);
-
-    loop {
-        let (res, code) = server.create_index(json!({ "uid": "doggo" })).await;
-        if code == 422 {
-            break;
-        }
-        if res["taskUid"] == json!(null) {
-            panic!(
-                "Encountered the strange case:\n{}",
-                serde_json::to_string_pretty(&res).unwrap()
-            );
-        }
-    }
-
-    let (result, code) = server.create_index(json!({ "uid": "doggo" })).await;
-    snapshot!(code, @"422 Unprocessable Entity");
-    snapshot!(json_string!(result), @r###"
-    {
-      "message": "Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.",
-      "code": "no_space_left_on_device",
-      "type": "system",
-      "link": "https://docs.meilisearch.com/errors#no_space_left_on_device"
-    }
-    "###);
-
-    // But we should still be able to register tasks deletion IF they delete something
-    let (result, code) = server.delete_tasks("uids=*").await;
-    snapshot!(code, @"200 OK");
-    snapshot!(json_string!(result, { ".enqueuedAt" => "[date]", ".taskUid" => "uid" }), @r###"
-    {
-      "taskUid": "uid",
-      "indexUid": null,
-      "status": "enqueued",
-      "type": "taskDeletion",
-      "enqueuedAt": "[date]"
-    }
-    "###);
-
-    let result = server.wait_task(result["taskUid"].as_u64().unwrap()).await;
-    snapshot!(json_string!(result["status"]), @r###""succeeded""###);
-
-    // Now we should be able to register tasks again
-    let (result, code) = server.create_index(json!({ "uid": "doggo" })).await;
-    snapshot!(code, @"202 Accepted");
-    snapshot!(json_string!(result, { ".enqueuedAt" => "[date]", ".taskUid" => "uid" }), @r###"
-    {
-      "taskUid": "uid",
-      "indexUid": "doggo",
-      "status": "enqueued",
-      "type": "indexCreation",
-      "enqueuedAt": "[date]"
-    }
-    "###);
-
-    // we're going to fill up the queue once again
-    loop {
-        let (res, code) = server.delete_tasks("uids=0").await;
-        if code == 422 {
-            break;
-        }
-        if res["taskUid"] == json!(null) {
-            panic!(
-                "Encountered the strange case:\n{}",
-                serde_json::to_string_pretty(&res).unwrap()
-            );
-        }
-    }
-
-    // But we should NOT be able to register this task because it doesn't match any tasks
-    let (result, code) = server.delete_tasks("uids=0").await;
-    snapshot!(code, @"422 Unprocessable Entity");
-    snapshot!(json_string!(result), @r###"
-    {
-      "message": "Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.",
-      "code": "no_space_left_on_device",
-      "type": "system",
-      "link": "https://docs.meilisearch.com/errors#no_space_left_on_device"
-    }
-    "###);
-
-    // The deletion still works
-    let (result, code) = server.delete_tasks("uids=*").await;
-    snapshot!(code, @"200 OK");
-    snapshot!(json_string!(result, { ".enqueuedAt" => "[date]", ".taskUid" => "uid" }), @r###"
-    {
-      "taskUid": "uid",
-      "indexUid": null,
-      "status": "enqueued",
-      "type": "taskDeletion",
-      "enqueuedAt": "[date]"
-    }
-    "###);
-}
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -122,6 +122,16 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
        }
    )]
    InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
+    #[error("Attribute `{}` is not filterable. {}",
+        .field,
+        match .valid_fields.is_empty() {
+            true => "This index does not have configured filterable attributes.".to_string(),
+            false => format!("Available filterable attributes are: `{}`.",
+                    valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
+                ),
+        }
+    )]
+    InvalidSearchFacet { field: String, valid_fields: BTreeSet<String> },
    #[error("{}", HeedError::BadOpenOptions)]
    InvalidLmdbOpenOptions,
    #[error("The sort ranking rule must be specified in the ranking rules settings to use the sort parameter at search time.")]
--- a/milli/src/heed_codec/fst_set_codec.rs
+++ b/milli/src/heed_codec/fst_set_codec.rs
@@ -0,0 +1,23 @@
+use std::borrow::Cow;
+
+use fst::Set;
+use heed::{BytesDecode, BytesEncode};
+
+/// A codec for values of type `Set<&[u8]>`.
+pub struct FstSetCodec;
+
+impl<'a> BytesEncode<'a> for FstSetCodec {
+    type EItem = Set<Vec<u8>>;
+
+    fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
+        Some(Cow::Borrowed(item.as_fst().as_bytes()))
+    }
+}
+
+impl<'a> BytesDecode<'a> for FstSetCodec {
+    type DItem = Set<&'a [u8]>;
+
+    fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
+        Set::new(bytes).ok()
+    }
+}
--- a/milli/src/heed_codec/mod.rs
+++ b/milli/src/heed_codec/mod.rs
@@ -2,6 +2,7 @@ mod beu32_str_codec;
 mod byte_slice_ref;
 pub mod facet;
 mod field_id_word_count_codec;
+mod fst_set_codec;
 mod obkv_codec;
 mod roaring_bitmap;
 mod roaring_bitmap_length;
@@ -15,6 +16,7 @@ pub use str_ref::StrRefCodec;

 pub use self::beu32_str_codec::BEU32StrCodec;
 pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
+pub use self::fst_set_codec::FstSetCodec;
 pub use self::obkv_codec::ObkvCodec;
 pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
 pub use self::roaring_bitmap_length::{
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -19,7 +19,7 @@ use crate::heed_codec::facet::{
    FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
    FieldIdCodec, OrderedF64Codec,
 };
-use crate::heed_codec::{ScriptLanguageCodec, StrBEU16Codec, StrRefCodec};
+use crate::heed_codec::{FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec};
 use crate::{
    default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
    DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
@@ -85,6 +85,7 @@ pub mod db_name {
    pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids";
    pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids";
    pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
+    pub const FACET_ID_STRING_FST: &str = "facet-id-string-fst";
    pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
    pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
    pub const DOCUMENTS: &str = "documents";
@@ -147,6 +148,8 @@ pub struct Index {
    pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
    /// Maps the facet field id and ranges of strings with the docids that corresponds to them.
    pub facet_id_string_docids: Database<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
+    /// Maps the facet field id of the string facets with an FST containing all the facets values.
+    pub facet_id_string_fst: Database<OwnedType<BEU16>, FstSetCodec>,

    /// Maps the document id, the facet field id and the numbers.
    pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,
@@ -166,7 +169,7 @@ impl Index {
    ) -> Result<Index> {
        use db_name::*;

-        options.max_dbs(23);
+        options.max_dbs(24);
        unsafe { options.flag(Flags::MdbAlwaysFreePages) };

        let env = options.open(path)?;
@@ -189,6 +192,7 @@ impl Index {
        let word_prefix_fid_docids = env.create_database(Some(WORD_PREFIX_FIELD_ID_DOCIDS))?;
        let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?;
        let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?;
+        let facet_id_string_fst = env.create_database(Some(FACET_ID_STRING_FST))?;
        let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?;
        let facet_id_is_null_docids = env.create_database(Some(FACET_ID_IS_NULL_DOCIDS))?;
        let facet_id_is_empty_docids = env.create_database(Some(FACET_ID_IS_EMPTY_DOCIDS))?;
@@ -219,6 +223,7 @@ impl Index {
            field_id_word_count_docids,
            facet_id_f64_docids,
            facet_id_string_docids,
+            facet_id_string_fst,
            facet_id_exists_docids,
            facet_id_is_null_docids,
            facet_id_is_empty_docids,
@@ -1461,11 +1466,11 @@ pub(crate) mod tests {
        db_snap!(index, field_distribution);

        db_snap!(index, field_distribution,
-            @r###"
-        age              1     
-        id               2     
-        name             2     
-        "###
+            @"
+            age              1     
+            id               2     
+            name             2     
+            "
        );

        // snapshot_index!(&index, "1", include: "^field_distribution$");
@@ -1482,10 +1487,10 @@ pub(crate) mod tests {

        db_snap!(index, field_distribution,
            @r###"
-        age              1     
-        id               2     
-        name             2     
-        "###
+            age              1     
+            id               2     
+            name             2     
+            "###
        );

        // then we update a document by removing one field and another by adding one field
@@ -1498,10 +1503,10 @@ pub(crate) mod tests {

        db_snap!(index, field_distribution,
            @r###"
-        has_dog          1     
-        id               2     
-        name             2     
-        "###
+            has_dog          1     
+            id               2     
+            name             2     
+            "###
        );
    }

--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@@ -99,8 +99,9 @@ pub use self::heed_codec::{
 };
 pub use self::index::Index;
 pub use self::search::{
-    FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, Search,
-    SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
+    FacetDistribution, FacetValueHit, Filter, FormatOptions, MatchBounds, MatcherBuilder,
+    MatchingWords, Search, SearchForFacetValues, SearchResult, TermsMatchingStrategy,
+    DEFAULT_VALUES_PER_FACET,
 };

 pub type Result<T> = std::result::Result<T, error::Error>;
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -1,14 +1,20 @@
 use std::fmt;

+use fst::automaton::{Automaton, Str};
+use fst::{IntoStreamer, Streamer};
 use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
+use log::error;
 use once_cell::sync::Lazy;
 use roaring::bitmap::RoaringBitmap;

 pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET};
 pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
 use self::new::PartialSearchResult;
+use crate::error::UserError;
+use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
 use crate::{
-    execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext,
+    execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldIdMapMissingEntry, Index,
+    Result, SearchContext, BEU16,
 };

 // Building these factories is not free.
@@ -16,6 +22,9 @@ static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
 static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
 static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));

+/// The maximum number of facets returned by the facet search route.
+const MAX_NUMBER_OF_FACETS: usize = 100;
+
 pub mod facet;
 mod fst_utils;
 pub mod new;
@@ -199,6 +208,174 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA {
    }
 }

+pub struct SearchForFacetValues<'a> {
+    query: Option<String>,
+    facet: String,
+    search_query: Search<'a>,
+}
+
+impl<'a> SearchForFacetValues<'a> {
+    pub fn new(facet: String, search_query: Search<'a>) -> SearchForFacetValues<'a> {
+        SearchForFacetValues { query: None, facet, search_query }
+    }
+
+    pub fn query(&mut self, query: impl Into<String>) -> &mut Self {
+        self.query = Some(query.into());
+        self
+    }
+
+    pub fn execute(&self) -> Result<Vec<FacetValueHit>> {
+        let index = self.search_query.index;
+        let rtxn = self.search_query.rtxn;
+
+        let filterable_fields = index.filterable_fields(rtxn)?;
+        if !filterable_fields.contains(&self.facet) {
+            return Err(UserError::InvalidSearchFacet {
+                field: self.facet.clone(),
+                valid_fields: filterable_fields.into_iter().collect(),
+            }
+            .into());
+        }
+
+        let fields_ids_map = index.fields_ids_map(rtxn)?;
+        let fid = match fields_ids_map.id(&self.facet) {
+            Some(fid) => fid,
+            None => {
+                return Err(FieldIdMapMissingEntry::FieldName {
+                    field_name: self.facet.clone(),
+                    process: "search for facet values",
+                }
+                .into());
+            }
+        };
+
+        let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? {
+            Some(fst) => fst,
+            None => return Ok(vec![]),
+        };
+
+        let search_candidates = self.search_query.execute()?.candidates;
+
+        match self.query.as_ref() {
+            Some(query) => {
+                let authorize_typos = self.search_query.index.authorize_typos(rtxn)?;
+                let field_authorizes_typos =
+                    !self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid);
+
+                if authorize_typos && field_authorizes_typos {
+                    let mut results = vec![];
+
+                    let exact_words_fst = self.search_query.index.exact_words(rtxn)?;
+                    if exact_words_fst.map_or(false, |fst| fst.contains(query)) {
+                        let key =
+                            FacetGroupKey { field_id: fid, level: 0, left_bound: query.as_ref() };
+                        if let Some(FacetGroupValue { bitmap, .. }) =
+                            index.facet_id_string_docids.get(rtxn, &key)?
+                        {
+                            let count = search_candidates.intersection_len(&bitmap);
+                            if count != 0 {
+                                results.push(FacetValueHit { value: query.to_string(), count });
+                            }
+                        }
+                    } else {
+                        let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?;
+                        let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?;
+
+                        let is_prefix = true;
+                        let automaton = if query.len() < one_typo as usize {
+                            build_dfa(query, 0, is_prefix)
+                        } else if query.len() < two_typos as usize {
+                            build_dfa(query, 1, is_prefix)
+                        } else {
+                            build_dfa(query, 2, is_prefix)
+                        };
+
+                        let mut stream = fst.search(automaton).into_stream();
+                        let mut length = 0;
+                        while let Some(facet_value) = stream.next() {
+                            let value = std::str::from_utf8(facet_value)?;
+                            let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value };
+                            let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
+                                Some(FacetGroupValue { bitmap, .. }) => bitmap,
+                                None => {
+                                    error!(
+                                        "the facet value is missing from the facet database: {key:?}"
+                                    );
+                                    continue;
+                                }
+                            };
+                            let count = search_candidates.intersection_len(&docids);
+                            if count != 0 {
+                                results.push(FacetValueHit { value: value.to_string(), count });
+                                length += 1;
+                            }
+                            if length >= MAX_NUMBER_OF_FACETS {
+                                break;
+                            }
+                        }
+                    }
+
+                    Ok(results)
+                } else {
+                    let automaton = Str::new(query).starts_with();
+                    let mut stream = fst.search(automaton).into_stream();
+                    let mut results = vec![];
+                    let mut length = 0;
+                    while let Some(facet_value) = stream.next() {
+                        let value = std::str::from_utf8(facet_value)?;
+                        let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value };
+                        let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
+                            Some(FacetGroupValue { bitmap, .. }) => bitmap,
+                            None => {
+                                error!(
+                                    "the facet value is missing from the facet database: {key:?}"
+                                );
+                                continue;
+                            }
+                        };
+                        let count = search_candidates.intersection_len(&docids);
+                        if count != 0 {
+                            results.push(FacetValueHit { value: value.to_string(), count });
+                            length += 1;
+                        }
+                        if length >= MAX_NUMBER_OF_FACETS {
+                            break;
+                        }
+                    }
+
+                    Ok(results)
+                }
+            }
+            None => {
+                let mut results = vec![];
+                let mut length = 0;
+                let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" };
+                for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? {
+                    let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) =
+                        result?;
+                    let count = search_candidates.intersection_len(&bitmap);
+                    if count != 0 {
+                        results.push(FacetValueHit { value: left_bound.to_string(), count });
+                        length += 1;
+                    }
+                    if length >= MAX_NUMBER_OF_FACETS {
+                        break;
+                    }
+                }
+                Ok(results)
+            }
+        }
+    }
+}
+
+#[derive(Debug, Clone, serde::Serialize, PartialEq)]
+pub struct FacetValueHit {
+    /// The original facet value
+    pub value: String,
+    /// The number of documents associated to this facet
+    pub count: u64,
+}
+
 #[cfg(test)]
 mod test {
    use super::*;
--- a/milli/src/update/clear_documents.rs
+++ b/milli/src/update/clear_documents.rs
@@ -35,6 +35,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
            script_language_docids,
            facet_id_f64_docids,
            facet_id_string_docids,
+            facet_id_string_fst: _,
            facet_id_exists_docids,
            facet_id_is_null_docids,
            facet_id_is_empty_docids,
--- a/milli/src/update/delete_documents.rs
+++ b/milli/src/update/delete_documents.rs
@@ -243,6 +243,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
            word_prefix_fid_docids,
            facet_id_f64_docids: _,
            facet_id_string_docids: _,
+            facet_id_string_fst: _,
            field_id_docid_facet_f64s: _,
            field_id_docid_facet_strings: _,
            script_language_docids,
--- a/milli/src/update/facet/mod.rs
+++ b/milli/src/update/facet/mod.rs
@@ -78,15 +78,16 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5;

 use std::fs::File;

+use heed::types::DecodeIgnore;
 use log::debug;
 use time::OffsetDateTime;

 use self::incremental::FacetsUpdateIncremental;
 use super::FacetsUpdateBulk;
 use crate::facet::FacetType;
-use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec};
+use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
 use crate::heed_codec::ByteSliceRefCodec;
-use crate::{Index, Result};
+use crate::{Index, Result, BEU16};

 pub mod bulk;
 pub mod delete;
@@ -157,6 +158,43 @@ impl<'i> FacetsUpdate<'i> {
            );
            incremental_update.execute(wtxn)?;
        }
+
+        // We compute one FST by string facet
+        let mut text_fsts = vec![];
+        let mut current_fst: Option<(u16, fst::SetBuilder<Vec<u8>>)> = None;
+        let database = self.index.facet_id_string_docids.remap_data_type::<DecodeIgnore>();
+        for result in database.iter(wtxn)? {
+            let (facet_group_key, _) = result?;
+            if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
+                current_fst = match current_fst.take() {
+                    Some((fid, fst_builder)) if fid != field_id => {
+                        let fst = fst_builder.into_set();
+                        text_fsts.push((field_id, fst));
+                        Some((field_id, fst::SetBuilder::memory()))
+                    }
+                    Some((field_id, fst_builder)) => Some((field_id, fst_builder)),
+                    None => Some((field_id, fst::SetBuilder::memory())),
+                };
+
+                if let Some((_, fst_builder)) = current_fst.as_mut() {
+                    fst_builder.insert(left_bound)?;
+                }
+            }
+        }
+
+        if let Some((field_id, fst_builder)) = current_fst {
+            let fst = fst_builder.into_set();
+            text_fsts.push((field_id, fst));
+        }
+
+        // We remove all of the previous FSTs that were in this database
+        self.index.facet_id_string_fst.clear(wtxn)?;
+
+        // We write those FSTs in LMDB now
+        for (field_id, fst) in text_fsts {
+            self.index.facet_id_string_fst.put(wtxn, &BEU16::new(field_id), &fst)?;
+        }
+
        Ok(())
    }
 }
--- a/milli/src/update/facets.rs
+++ b/milli/src/update/facets.rs
@@ -1 +0,0 @@
-
Author	SHA1	Message	Date
Clément Renault	f7e0a3edaf	Use the minWordSizeForTypos index settings	2023-05-04 15:09:17 +02:00
Clément Renault	a669368f2d	Format the code	2023-05-04 14:02:22 +02:00
Clément Renault	2e7ba2b092	Increase the number of dbs	2023-05-04 13:53:08 +02:00
Clément Renault	f983bbe532	Fix compilation issues	2023-05-04 12:27:19 +02:00
Clément Renault	3cbcbad71c	Fix a snap test	2023-05-04 12:22:09 +02:00
Clément Renault	e83d9680ba	Simplify the placeholder search of the facet-search route	2023-05-04 12:22:09 +02:00
Clément Renault	29153417c4	Use the disableOnAttributes parameter on the facet-search route	2023-05-04 12:22:09 +02:00
Clément Renault	57892e3d76	Use the disableOnWords parameter on the facet-search route	2023-05-04 12:22:09 +02:00
Clément Renault	13d4aee912	Support the typoTolerant.enabled parameter	2023-05-04 12:22:09 +02:00
Clément Renault	ba5db80afa	Log an error when a facet value is missing from the database	2023-05-04 12:22:09 +02:00
Clément Renault	f8432dff5d	Rename the SearchForFacetValues struct	2023-05-04 12:21:55 +02:00
Clément Renault	ae3dad5b63	Return an internal error when a field id is missing	2023-05-04 12:19:25 +02:00
Clément Renault	938ab16799	Make clippy happy	2023-05-04 12:19:25 +02:00
Clément Renault	8252f3331f	Improve the returned errors from the facet-search route	2023-05-04 12:19:25 +02:00
Clément Renault	5b745cc8a2	Fix the max number of facets to be returned to 100	2023-05-04 12:19:25 +02:00
Clément Renault	19c3ef64bd	Return the correct response JSON object from the facet-search route	2023-05-04 12:19:24 +02:00
Clément Renault	3b346aac42	Send analytics about the facet-search route	2023-05-04 12:18:10 +02:00
Clément Renault	72a2469178	Make the search for facet work	2023-05-04 12:18:09 +02:00
Kerollmops	d7d085ef1e	Introduce the facet search route	2023-05-04 12:18:09 +02:00
Kerollmops	1e61870c2b	Restrict the number of facet search results to 1000	2023-05-04 12:18:09 +02:00
Kerollmops	4d2860a692	Introduce the SearchForFacetValue struct	2023-05-04 12:17:52 +02:00
Clément Renault	51ca77726d	Store the facet string values in multiple FSTs	2023-05-04 11:40:41 +02:00
meili-bors[bot]	78e611f282	Merge #3693 3693: Implement the auto deletion of tasks r=dureuill a=irevoire Fixes https://github.com/meilisearch/meilisearch/issues/3622 This PR should be the definite fix for #3622. It adds a limit (1M) to the maximum number of tasks the task queue can hold. Once the task queue reaches this limit (1M of tasks are in the task queue, whatever their status is), meilisearch will schedule a task deletion that tries to delete the oldest 100k tasks. If meilisearch can't delete 100k tasks because some of them are not yet finished, it will delete as many tasks as possible. Once the limit is reached, you're still able to register new tasks. The engine will only stop you from adding new tasks once [the other hard limit](https://github.com/meilisearch/meilisearch/pull/3659) of 10GiB of tasks is reached (that's between 5M and 15M of tasks depending on your workflow). ------- Technically; - We only try to schedule our task deletion when calling the tick function but before creating a new batch. This means we never enqueue a task we're not going to process ~right away. - If our task deletion doesn't delete anything, we don't enqueue it and log a warn the user that the engine is not working properly Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2023-05-04 08:30:22 +00:00
Louis Dureuil	d8381eb790	Fix originalFilter	2023-05-04 10:07:59 +02:00
Louis Dureuil	b212aef5db	add one nanosecond to generated filter so as to generate a filter that would have matched the last task to delete	2023-05-04 09:56:48 +02:00
meili-bors[bot]	6bf66f35be	Merge #3721 3721: Use new bors URL of our self hosted bors instance r=curquiza a=curquiza Co-authored-by: curquiza <clementine@meilisearch.com>	2023-05-04 07:53:39 +00:00
Louis Dureuil	52ab114f6c	Fix test on macOS: 50 tasks would result in the test consistently failing on a local macOS	2023-05-04 00:06:49 +02:00
Tamo	dcbfecf42c	make the generated filter valid	2023-05-04 00:06:49 +02:00
Tamo	9ca6f59546	Update index-scheduler/src/lib.rs Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2023-05-04 00:06:49 +02:00
Tamo	aa7537a11e	make the autodeletion work with a fixed number of tasks and update the tests	2023-05-04 00:06:49 +02:00
Tamo	972bb2831c	log when meilisearch need to delete tasks	2023-05-04 00:06:49 +02:00
Tamo	f9ddd32545	implement the auto-deletion of tasks	2023-05-04 00:06:49 +02:00
meili-bors[bot]	1afde4fea5	Merge #3542 3542: Refactor of the search algorithms r=dureuill a=loiclec This PR refactors a large part of the search logic (related to https://github.com/meilisearch/meilisearch/issues/3547) - The "query tree" is replaced by a "query graph", which describes the different ways in which the search query can be interpreted and precomputes the word derivations for each query term. Example: <img width="1162" alt="Screenshot 2023-02-27 at 10 26 50" src="https://user-images.githubusercontent.com/6040237/221525270-87917cc0-60d1-473f-847f-2c5a7de9e370.png"> - The control flow between the ~criterions~ ranking rules is managed in a single place instead of being independently implemented by each ranking rule. - The set of document candidates is determined greedily from the beginning. It is often referred as the "universe" in the code. - The ranking rules `proximity`, `attribute`, `typo`, and (maybe) `exactness` are or will be implemented using a K-shortest path graph algorithm. This minimises the number of database and bitmap operations we need to do to compute each ranking rule bucket. It also simplifies the code a lot since a lot of ranking rules will share a large part of their implementation. - Pointers to database values are stored in a cache to avoid searching in the LMDB databases needlessly. - The result of some roaring bitmap operations are also stored in a cache, although we'll need to measure the memory pressure this puts on the system and maybe deactivate this cache later on. - Search requests can be visually logged and debugged in tests. TODO: - [ ] Reintroduce search benchmarks - [x] Implement `disableOnWords` and `disableOnAttributes` settings of typo tolerance - [x] Implement "exhaustive number of hits - [x] Implement `attribute` ranking rule - [x] Indexing changes: split into `word_fid_docids` and `word_position_docids` (with bucketed position) - [x] Ranking rule implementations - [ ] Implement `exactness` ranking rule - [x] Initial implementation - [ ] Correct implementation when followed by `Words` - [ ] Implement `geosort` ranking rule - [ ] Add tests - [x] Typo tolerance `disableOnWords`/`disableOnAttributes` - [ ] Geosort - [x] Exactness - [ ] Attribute/Position - [ ] Interactions between ranking rules: - [x] Typo/Proximity/Attribute not preceded by Words - [x] Exactness not preceded by Words - [x] Exactness -> Words (+ check universe correctness) - [x] Exactness -> Typo, etc. - [ ] Sort -> Words (performance tests) - [ ] Attribute/Position -> Typo - [ ] Attribute/Position -> Proximity - [x] Typo -> Exactness - [x] Typo -> Proximity - [x] Proximity -> Typo - [x] Words - [x] Typo - [x] Proximity - [x] Sort - [x] Ngrams - [x] Split words - [x] Ngram + Split Words - [x] Term matching strategy - [x] Distinct attribute - [x] Phrase Search - [x] Placeholder search - [x] Highlighter - [x] Limit the number of word derivations in a search query - [x] Compute the initial universe correctly according to the terms matching strategy - [x] Implement placeholder search - [x] Get the list of ranking rules from the settings - [x] Implement `distinct` - [x] Determine what to do when one of `attribute`, `proximity`, `typo`, or `exactness` is placed before `words` - [x] Make sure the correct number of allowed typos is used for each word, including the prefix one - [x] Make sure stop words are treated correctly (e.g. correct position in query graph), including in phrases - [x] Support phrases correctly - [x] Support synonyms - [x] Support split words - [x] Support combination of ngram + split-words (e.g. `whiteh orse` -> `"white horse"`) - [x] Implement `typo` ranking rule - [x] Implement `sort` ranking rule - [x] Use existing `Search` interface to use the new search algorithms - [x] Remove old code Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>	2023-05-03 13:42:51 +00:00
curquiza	ddcb661c19	Use new bors URL of our self hosted instance	2023-05-02 18:20:12 +02:00