Use the minWordSizeForTypos index settings

Format the code
Fix compilation issues
2025-07-21 22:00:59 +00:00 · 2023-06-06 10:48:43 +02:00 · 2023-06-06 10:48:43 +02:00 · 2023-06-06 10:48:42 +02:00 · 2023-06-06 10:48:08 +02:00 · 2023-06-06 10:48:08 +02:00
22 changed files with 718 additions and 66 deletions
--- a/index-scheduler/src/utils.rs
+++ b/index-scheduler/src/utils.rs
@ -466,7 +466,7 @@ impl IndexScheduler {
                        }
                    }
                    Details::DocumentDeletionByFilter { deleted_documents, original_filter: _ } => {
-                        assert_eq!(kind.as_kind(), Kind::DocumentDeletionByFilter);
+                        assert_eq!(kind.as_kind(), Kind::DocumentDeletion);
                        let (index_uid, _) = if let KindWithContent::DocumentDeletionByFilter {
                            ref index_uid,
                            ref filter_expr,
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@ -239,8 +239,11 @@ InvalidSearchMatchingStrategy         , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchOffset                   , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchPage                     , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchQ                        , InvalidRequest       , BAD_REQUEST ;
+InvalidFacetSearchQuery               , InvalidRequest       , BAD_REQUEST ;
+InvalidFacetSearchName                , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchShowMatchesPosition      , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchSort                     , InvalidRequest       , BAD_REQUEST ;
+InvalidSearchFacet                    , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsDisplayedAttributes    , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsDistinctAttribute      , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsFaceting               , InvalidRequest       , BAD_REQUEST ;
@ -330,6 +333,7 @@ impl ErrorCode for milli::Error {
                    UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
                    UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
                    UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
+                    UserError::InvalidSearchFacet { .. } => Code::InvalidSearchFacet,
                    UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
                    UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
                    UserError::SortError(_) => Code::InvalidSearchSort,
--- a/meilisearch-types/src/tasks.rs
+++ b/meilisearch-types/src/tasks.rs
@ -395,7 +395,6 @@ impl std::error::Error for ParseTaskStatusError {}
 pub enum Kind {
    DocumentAdditionOrUpdate,
    DocumentDeletion,
-    DocumentDeletionByFilter,
    SettingsUpdate,
    IndexCreation,
    IndexDeletion,
@ -412,7 +411,6 @@ impl Kind {
        match self {
            Kind::DocumentAdditionOrUpdate
            | Kind::DocumentDeletion
-            | Kind::DocumentDeletionByFilter
            | Kind::SettingsUpdate
            | Kind::IndexCreation
            | Kind::IndexDeletion
@ -430,7 +428,6 @@ impl Display for Kind {
        match self {
            Kind::DocumentAdditionOrUpdate => write!(f, "documentAdditionOrUpdate"),
            Kind::DocumentDeletion => write!(f, "documentDeletion"),
-            Kind::DocumentDeletionByFilter => write!(f, "documentDeletionByFilter"),
            Kind::SettingsUpdate => write!(f, "settingsUpdate"),
            Kind::IndexCreation => write!(f, "indexCreation"),
            Kind::IndexDeletion => write!(f, "indexDeletion"),
--- a/meilisearch/src/analytics/mock_analytics.rs
+++ b/meilisearch/src/analytics/mock_analytics.rs
@ -38,6 +38,18 @@ impl MultiSearchAggregator {
    pub fn succeed(&mut self) {}
 }

+#[derive(Default)]
+pub struct FacetSearchAggregator;
+
+#[allow(dead_code)]
+impl FacetSearchAggregator {
+    pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
+        Self::default()
+    }
+
+    pub fn succeed(&mut self, _: &dyn Any) {}
+}
+
 impl MockAnalytics {
    #[allow(clippy::new_ret_no_self)]
    pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
@ -56,6 +68,7 @@ impl Analytics for MockAnalytics {
    fn get_search(&self, _aggregate: super::SearchAggregator) {}
    fn post_search(&self, _aggregate: super::SearchAggregator) {}
    fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
+    fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {}
    fn add_documents(
        &self,
        _documents_query: &UpdateDocumentsQuery,
--- a/meilisearch/src/analytics/mod.rs
+++ b/meilisearch/src/analytics/mod.rs
@ -25,6 +25,8 @@ pub type SegmentAnalytics = mock_analytics::MockAnalytics;
 pub type SearchAggregator = mock_analytics::SearchAggregator;
 #[cfg(any(debug_assertions, not(feature = "analytics")))]
 pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
+#[cfg(any(debug_assertions, not(feature = "analytics")))]
+pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator;

 // if we are in release mode and the feature analytics was enabled
 // we use the real analytics
@ -34,6 +36,8 @@ pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
 pub type SearchAggregator = segment_analytics::SearchAggregator;
 #[cfg(all(not(debug_assertions), feature = "analytics"))]
 pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
+#[cfg(all(not(debug_assertions), feature = "analytics"))]
+pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator;

 /// The Meilisearch config dir:
 /// `~/.config/Meilisearch` on *NIX or *BSD.
@ -88,6 +92,9 @@ pub trait Analytics: Sync + Send {
    /// This method should be called to aggregate a post array of searches
    fn post_multi_search(&self, aggregate: MultiSearchAggregator);

+    /// This method should be called to aggregate post facet values searches
+    fn post_facet_search(&self, aggregate: FacetSearchAggregator);
+
    // this method should be called to aggregate a add documents request
    fn add_documents(
        &self,
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@ -1,5 +1,6 @@
 use std::collections::{BinaryHeap, HashMap, HashSet};
 use std::fs;
+use std::mem::take;
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 use std::time::{Duration, Instant};
@ -29,11 +30,13 @@ use super::{
 use crate::analytics::Analytics;
 use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, ScheduleSnapshot};
 use crate::routes::indexes::documents::UpdateDocumentsQuery;
+use crate::routes::indexes::facet_search::FacetSearchQuery;
 use crate::routes::tasks::TasksFilterQuery;
 use crate::routes::{create_all_stats, Stats};
 use crate::search::{
-    SearchQuery, SearchQueryWithIndex, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
-    DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
+    FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
+    DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
+    DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
 };
 use crate::Opt;

@ -71,6 +74,7 @@ pub enum AnalyticsMsg {
    AggregateGetSearch(SearchAggregator),
    AggregatePostSearch(SearchAggregator),
    AggregatePostMultiSearch(MultiSearchAggregator),
+    AggregatePostFacetSearch(FacetSearchAggregator),
    AggregateAddDocuments(DocumentsAggregator),
    AggregateDeleteDocuments(DocumentsDeletionAggregator),
    AggregateUpdateDocuments(DocumentsAggregator),
@ -139,6 +143,7 @@ impl SegmentAnalytics {
            batcher,
            post_search_aggregator: SearchAggregator::default(),
            post_multi_search_aggregator: MultiSearchAggregator::default(),
+            post_facet_search_aggregator: FacetSearchAggregator::default(),
            get_search_aggregator: SearchAggregator::default(),
            add_documents_aggregator: DocumentsAggregator::default(),
            delete_documents_aggregator: DocumentsDeletionAggregator::default(),
@ -182,6 +187,10 @@ impl super::Analytics for SegmentAnalytics {
        let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
    }

+    fn post_facet_search(&self, aggregate: FacetSearchAggregator) {
+        let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFacetSearch(aggregate));
+    }
+
    fn post_multi_search(&self, aggregate: MultiSearchAggregator) {
        let _ = self.sender.try_send(AnalyticsMsg::AggregatePostMultiSearch(aggregate));
    }
@ -354,6 +363,7 @@ pub struct Segment {
    get_search_aggregator: SearchAggregator,
    post_search_aggregator: SearchAggregator,
    post_multi_search_aggregator: MultiSearchAggregator,
+    post_facet_search_aggregator: FacetSearchAggregator,
    add_documents_aggregator: DocumentsAggregator,
    delete_documents_aggregator: DocumentsDeletionAggregator,
    update_documents_aggregator: DocumentsAggregator,
@ -418,6 +428,7 @@ impl Segment {
                        Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg),
                        Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg),
                        Some(AnalyticsMsg::AggregatePostMultiSearch(agreg)) => self.post_multi_search_aggregator.aggregate(agreg),
+                        Some(AnalyticsMsg::AggregatePostFacetSearch(agreg)) => self.post_facet_search_aggregator.aggregate(agreg),
                        Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
                        Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
                        Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
@ -461,55 +472,72 @@ impl Segment {
                })
                .await;
        }
-        let get_search = std::mem::take(&mut self.get_search_aggregator)
-            .into_event(&self.user, "Documents Searched GET");
-        let post_search = std::mem::take(&mut self.post_search_aggregator)
-            .into_event(&self.user, "Documents Searched POST");
-        let post_multi_search = std::mem::take(&mut self.post_multi_search_aggregator)
-            .into_event(&self.user, "Documents Searched by Multi-Search POST");
-        let add_documents = std::mem::take(&mut self.add_documents_aggregator)
-            .into_event(&self.user, "Documents Added");
-        let delete_documents = std::mem::take(&mut self.delete_documents_aggregator)
-            .into_event(&self.user, "Documents Deleted");
-        let update_documents = std::mem::take(&mut self.update_documents_aggregator)
-            .into_event(&self.user, "Documents Updated");
-        let get_fetch_documents = std::mem::take(&mut self.get_fetch_documents_aggregator)
-            .into_event(&self.user, "Documents Fetched GET");
-        let post_fetch_documents = std::mem::take(&mut self.post_fetch_documents_aggregator)
-            .into_event(&self.user, "Documents Fetched POST");
-        let get_tasks =
-            std::mem::take(&mut self.get_tasks_aggregator).into_event(&self.user, "Tasks Seen");
-        let health =
-            std::mem::take(&mut self.health_aggregator).into_event(&self.user, "Health Seen");

-        if let Some(get_search) = get_search {
+        let Segment {
+            inbox: _,
+            opt: _,
+            batcher: _,
+            user,
+            get_search_aggregator,
+            post_search_aggregator,
+            post_multi_search_aggregator,
+            post_facet_search_aggregator,
+            add_documents_aggregator,
+            delete_documents_aggregator,
+            update_documents_aggregator,
+            get_fetch_documents_aggregator,
+            post_fetch_documents_aggregator,
+            get_tasks_aggregator,
+            health_aggregator,
+        } = self;
+
+        if let Some(get_search) =
+            take(get_search_aggregator).into_event(&user, "Documents Searched GET")
+        {
            let _ = self.batcher.push(get_search).await;
        }
-        if let Some(post_search) = post_search {
+        if let Some(post_search) =
+            take(post_search_aggregator).into_event(&user, "Documents Searched POST")
+        {
            let _ = self.batcher.push(post_search).await;
        }
-        if let Some(post_multi_search) = post_multi_search {
+        if let Some(post_multi_search) = take(post_multi_search_aggregator)
+            .into_event(&user, "Documents Searched by Multi-Search POST")
+        {
            let _ = self.batcher.push(post_multi_search).await;
        }
-        if let Some(add_documents) = add_documents {
+        if let Some(post_facet_search) = take(post_facet_search_aggregator)
+            .into_event(&user, "Documents Searched by Facet-Search POST")
+        {
+            let _ = self.batcher.push(post_facet_search).await;
+        }
+        if let Some(add_documents) =
+            take(add_documents_aggregator).into_event(&user, "Documents Added")
+        {
            let _ = self.batcher.push(add_documents).await;
        }
-        if let Some(delete_documents) = delete_documents {
+        if let Some(delete_documents) =
+            take(delete_documents_aggregator).into_event(&user, "Documents Deleted")
+        {
            let _ = self.batcher.push(delete_documents).await;
        }
-        if let Some(update_documents) = update_documents {
+        if let Some(update_documents) =
+            take(update_documents_aggregator).into_event(&user, "Documents Updated")
+        {
            let _ = self.batcher.push(update_documents).await;
        }
-        if let Some(get_fetch_documents) = get_fetch_documents {
+        if let Some(get_fetch_documents) =
+            take(get_fetch_documents_aggregator).into_event(&user, "Documents Fetched GET") {
            let _ = self.batcher.push(get_fetch_documents).await;
        }
-        if let Some(post_fetch_documents) = post_fetch_documents {
+        if let Some(post_fetch_documents) =
+            take(post_fetch_documents_aggregator).into_event(&user, "Documents Fetched POST") {
            let _ = self.batcher.push(post_fetch_documents).await;
        }
-        if let Some(get_tasks) = get_tasks {
+        if let Some(get_tasks) = take(get_tasks_aggregator).into_event(&user, "Tasks Seen") {
            let _ = self.batcher.push(get_tasks).await;
        }
-        if let Some(health) = health {
+        if let Some(health) = take(health_aggregator).into_event(&user, "Health Seen") {
            let _ = self.batcher.push(health).await;
        }
        let _ = self.batcher.flush().await;
@ -886,6 +914,144 @@ impl MultiSearchAggregator {
    }
 }

+#[derive(Default)]
+pub struct FacetSearchAggregator {
+    timestamp: Option<OffsetDateTime>,
+
+    // context
+    user_agents: HashSet<String>,
+
+    // requests
+    total_received: usize,
+    total_succeeded: usize,
+    time_spent: BinaryHeap<usize>,
+
+    // The set of all facetNames that were used
+    facet_names: HashSet<String>,
+
+    // As there been any other parameter than the facetName or facetQuery ones?
+    additional_search_parameters_provided: bool,
+}
+
+impl FacetSearchAggregator {
+    pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self {
+        let FacetSearchQuery {
+            facet_query: _,
+            facet_name,
+            q,
+            offset,
+            limit,
+            page,
+            hits_per_page,
+            attributes_to_retrieve,
+            attributes_to_crop,
+            crop_length,
+            attributes_to_highlight,
+            show_matches_position,
+            filter,
+            sort,
+            facets,
+            highlight_pre_tag,
+            highlight_post_tag,
+            crop_marker,
+            matching_strategy,
+        } = query;
+
+        let mut ret = Self::default();
+        ret.timestamp = Some(OffsetDateTime::now_utc());
+
+        ret.total_received = 1;
+        ret.user_agents = extract_user_agents(request).into_iter().collect();
+        ret.facet_names = Some(facet_name.clone()).into_iter().collect();
+
+        ret.additional_search_parameters_provided = q.is_some()
+            || *offset != DEFAULT_SEARCH_OFFSET()
+            || *limit != DEFAULT_SEARCH_LIMIT()
+            || page.is_some()
+            || hits_per_page.is_some()
+            || attributes_to_retrieve.is_some()
+            || attributes_to_crop.is_some()
+            || *crop_length != DEFAULT_CROP_LENGTH()
+            || attributes_to_highlight.is_some()
+            || *show_matches_position
+            || filter.is_some()
+            || sort.is_some()
+            || facets.is_some()
+            || *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG()
+            || *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG()
+            || *crop_marker != DEFAULT_CROP_MARKER()
+            || *matching_strategy != MatchingStrategy::default();
+
+        ret
+    }
+
+    pub fn succeed(&mut self, result: &FacetSearchResult) {
+        self.total_succeeded = self.total_succeeded.saturating_add(1);
+        self.time_spent.push(result.processing_time_ms as usize);
+    }
+
+    /// Aggregate one [SearchAggregator] into another.
+    pub fn aggregate(&mut self, mut other: Self) {
+        if self.timestamp.is_none() {
+            self.timestamp = other.timestamp;
+        }
+
+        // context
+        for user_agent in other.user_agents.into_iter() {
+            self.user_agents.insert(user_agent);
+        }
+
+        // request
+        self.total_received = self.total_received.saturating_add(other.total_received);
+        self.total_succeeded = self.total_succeeded.saturating_add(other.total_succeeded);
+        self.time_spent.append(&mut other.time_spent);
+
+        // facet_names
+        for facet_name in other.facet_names.into_iter() {
+            self.facet_names.insert(facet_name);
+        }
+
+        // additional_search_parameters_provided
+        self.additional_search_parameters_provided = self.additional_search_parameters_provided
+            | other.additional_search_parameters_provided;
+    }
+
+    pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
+        if self.total_received == 0 {
+            None
+        } else {
+            // the index of the 99th percentage of value
+            let percentile_99th = 0.99 * (self.total_succeeded as f64 - 1.) + 1.;
+            // we get all the values in a sorted manner
+            let time_spent = self.time_spent.into_sorted_vec();
+            // We are only interested by the slowest value of the 99th fastest results
+            let time_spent = time_spent.get(percentile_99th as usize);
+
+            let properties = json!({
+                "user-agent": self.user_agents,
+                "requests": {
+                    "99th_response_time":  time_spent.map(|t| format!("{:.2}", t)),
+                    "total_succeeded": self.total_succeeded,
+                    "total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics
+                    "total_received": self.total_received,
+                },
+                "facets": {
+                    "total_distinct_facet_count": self.facet_names.len(),
+                },
+                "additional_search_parameters_provided": self.additional_search_parameters_provided,
+            });
+
+            Some(Track {
+                timestamp: self.timestamp,
+                user: user.clone(),
+                event: event_name.to_string(),
+                properties,
+                ..Default::default()
+            })
+        }
+    }
+}
+
 #[derive(Default)]
 pub struct DocumentsAggregator {
    timestamp: Option<OffsetDateTime>,
--- a/meilisearch/src/routes/indexes/facet_search.rs
+++ b/meilisearch/src/routes/indexes/facet_search.rs
@ -0,0 +1,133 @@
+use std::collections::{BTreeSet, HashSet};
+
+use actix_web::web::Data;
+use actix_web::{web, HttpRequest, HttpResponse};
+use deserr::actix_web::AwebJson;
+use index_scheduler::IndexScheduler;
+use log::debug;
+use meilisearch_types::deserr::DeserrJsonError;
+use meilisearch_types::error::deserr_codes::*;
+use meilisearch_types::error::ResponseError;
+use meilisearch_types::index_uid::IndexUid;
+use serde_json::Value;
+
+use crate::analytics::{Analytics, FacetSearchAggregator};
+use crate::extractors::authentication::policies::*;
+use crate::extractors::authentication::GuardedData;
+use crate::search::{
+    add_search_rules, perform_facet_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
+    DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
+    DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
+};
+
+pub fn configure(cfg: &mut web::ServiceConfig) {
+    cfg.service(web::resource("").route(web::post().to(search)));
+}
+
+// TODO improve the error messages
+#[derive(Debug, Clone, Default, PartialEq, Eq, deserr::Deserr)]
+#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
+pub struct FacetSearchQuery {
+    #[deserr(default, error = DeserrJsonError<InvalidFacetSearchQuery>)]
+    pub facet_query: Option<String>,
+    #[deserr(error = DeserrJsonError<InvalidFacetSearchName>)]
+    pub facet_name: String,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
+    pub q: Option<String>,
+    #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
+    pub offset: usize,
+    #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
+    pub limit: usize,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchPage>)]
+    pub page: Option<usize>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)]
+    pub hits_per_page: Option<usize>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
+    pub attributes_to_retrieve: Option<BTreeSet<String>>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
+    pub attributes_to_crop: Option<Vec<String>>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
+    pub crop_length: usize,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToHighlight>)]
+    pub attributes_to_highlight: Option<HashSet<String>>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
+    pub show_matches_position: bool,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
+    pub filter: Option<Value>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
+    pub sort: Option<Vec<String>>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
+    pub facets: Option<Vec<String>>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
+    pub highlight_pre_tag: String,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPostTag>, default = DEFAULT_HIGHLIGHT_POST_TAG())]
+    pub highlight_post_tag: String,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchCropMarker>, default = DEFAULT_CROP_MARKER())]
+    pub crop_marker: String,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
+    pub matching_strategy: MatchingStrategy,
+}
+
+pub async fn search(
+    index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
+    index_uid: web::Path<String>,
+    params: AwebJson<FacetSearchQuery, DeserrJsonError>,
+    req: HttpRequest,
+    analytics: web::Data<dyn Analytics>,
+) -> Result<HttpResponse, ResponseError> {
+    let index_uid = IndexUid::try_from(index_uid.into_inner())?;
+
+    let query = params.into_inner();
+    debug!("facet search called with params: {:?}", query);
+
+    let mut aggregate = FacetSearchAggregator::from_query(&query, &req);
+
+    let facet_query = query.facet_query.clone();
+    let facet_name = query.facet_name.clone();
+    let mut search_query = SearchQuery::from(query);
+
+    // Tenant token search_rules.
+    if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
+        add_search_rules(&mut search_query, search_rules);
+    }
+
+    let index = index_scheduler.index(&index_uid)?;
+    let search_result = tokio::task::spawn_blocking(move || {
+        perform_facet_search(&index, search_query, facet_query, facet_name)
+    })
+    .await?;
+
+    if let Ok(ref search_result) = search_result {
+        aggregate.succeed(search_result);
+    }
+    analytics.post_facet_search(aggregate);
+
+    let search_result = search_result?;
+
+    debug!("returns: {:?}", search_result);
+    Ok(HttpResponse::Ok().json(search_result))
+}
+
+impl From<FacetSearchQuery> for SearchQuery {
+    fn from(value: FacetSearchQuery) -> Self {
+        SearchQuery {
+            q: value.q,
+            offset: value.offset,
+            limit: value.limit,
+            page: value.page,
+            hits_per_page: value.hits_per_page,
+            attributes_to_retrieve: value.attributes_to_retrieve,
+            attributes_to_crop: value.attributes_to_crop,
+            crop_length: value.crop_length,
+            attributes_to_highlight: value.attributes_to_highlight,
+            show_matches_position: value.show_matches_position,
+            filter: value.filter,
+            sort: value.sort,
+            facets: value.facets,
+            highlight_pre_tag: value.highlight_pre_tag,
+            highlight_post_tag: value.highlight_post_tag,
+            crop_marker: value.crop_marker,
+            matching_strategy: value.matching_strategy,
+        }
+    }
+}
--- a/meilisearch/src/routes/indexes/mod.rs
+++ b/meilisearch/src/routes/indexes/mod.rs
@ -24,6 +24,7 @@ use crate::extractors::authentication::{AuthenticationError, GuardedData};
 use crate::extractors::sequential_extractor::SeqHandler;

 pub mod documents;
+pub mod facet_search;
 pub mod search;
 pub mod settings;

@ -44,6 +45,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
            .service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats))))
            .service(web::scope("/documents").configure(documents::configure))
            .service(web::scope("/search").configure(search::configure))
+            .service(web::scope("/facet-search").configure(facet_search::configure))
            .service(web::scope("/settings").configure(settings::configure)),
    );
 }
--- a/meilisearch/src/routes/tasks.rs
+++ b/meilisearch/src/routes/tasks.rs
@ -730,7 +730,7 @@ mod tests {
            let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
            snapshot!(meili_snap::json_string!(err), @r###"
            {
-              "message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
+              "message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
              "code": "invalid_task_types",
              "type": "invalid_request",
              "link": "https://docs.meilisearch.com/errors#invalid_task_types"
--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@ -8,7 +8,9 @@ use either::Either;
 use meilisearch_auth::IndexSearchRules;
 use meilisearch_types::deserr::DeserrJsonError;
 use meilisearch_types::error::deserr_codes::*;
+use meilisearch_types::heed::RoTxn;
 use meilisearch_types::index_uid::IndexUid;
+use meilisearch_types::milli::{FacetValueHit, SearchForFacetValues};
 use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
 use meilisearch_types::{milli, Document};
 use milli::tokenizer::TokenizerBuilder;
@ -170,7 +172,7 @@ impl SearchQueryWithIndex {
    }
 }

-#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr)]
 #[deserr(rename_all = camelCase)]
 pub enum MatchingStrategy {
    /// Remove query words from last to first
@ -241,6 +243,14 @@ pub struct FacetStats {
    pub max: f64,
 }

+#[derive(Serialize, Debug, Clone, PartialEq)]
+#[serde(rename_all = "camelCase")]
+pub struct FacetSearchResult {
+    pub hits: Vec<FacetValueHit>,
+    pub query: Option<String>,
+    pub processing_time_ms: u128,
+}
+
 /// Incorporate search rules in search query
 pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
    query.filter = match (query.filter.take(), rules.filter) {
@ -261,14 +271,12 @@ pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
    }
 }

-pub fn perform_search(
-    index: &Index,
-    query: SearchQuery,
-) -> Result<SearchResult, MeilisearchHttpError> {
-    let before_search = Instant::now();
-    let rtxn = index.read_txn()?;
-
-    let mut search = index.search(&rtxn);
+fn prepare_search<'t>(
+    index: &'t Index,
+    rtxn: &'t RoTxn,
+    query: &'t SearchQuery,
+) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
+    let mut search = index.search(rtxn);

    if let Some(ref query) = query.q {
        search.query(query);
@ -278,7 +286,7 @@ pub fn perform_search(
    search.terms_matching_strategy(query.matching_strategy.into());

    let max_total_hits = index
-        .pagination_max_total_hits(&rtxn)
+        .pagination_max_total_hits(rtxn)
        .map_err(milli::Error::from)?
        .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);

@ -320,6 +328,19 @@ pub fn perform_search(
        search.sort_criteria(sort);
    }

+    Ok((search, is_finite_pagination, max_total_hits, offset))
+}
+
+pub fn perform_search(
+    index: &Index,
+    query: SearchQuery,
+) -> Result<SearchResult, MeilisearchHttpError> {
+    let before_search = Instant::now();
+    let rtxn = index.read_txn()?;
+
+    let (search, is_finite_pagination, max_total_hits, offset) =
+        prepare_search(index, &rtxn, &query)?;
+
    let milli::SearchResult { documents_ids, matching_words, candidates, .. } = search.execute()?;

    let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
@ -473,6 +494,30 @@ pub fn perform_search(
    Ok(result)
 }

+pub fn perform_facet_search(
+    index: &Index,
+    search_query: SearchQuery,
+    facet_query: Option<String>,
+    facet_name: String,
+) -> Result<FacetSearchResult, MeilisearchHttpError> {
+    let before_search = Instant::now();
+    let rtxn = index.read_txn()?;
+
+    let (search, _, _, _) = prepare_search(index, &rtxn, &search_query)?;
+    let mut facet_search = SearchForFacetValues::new(facet_name, search);
+    if let Some(facet_query) = &facet_query {
+        facet_search.query(facet_query);
+    }
+
+    let hits = facet_search.execute()?;
+
+    Ok(FacetSearchResult {
+        hits,
+        query: facet_query,
+        processing_time_ms: before_search.elapsed().as_millis(),
+    })
+}
+
 fn insert_geo_distance(sorts: &[String], document: &mut Document) {
    lazy_static::lazy_static! {
        static ref GEO_REGEX: Regex =
--- a/meilisearch/tests/tasks/errors.rs
+++ b/meilisearch/tests/tasks/errors.rs
@ -97,7 +97,7 @@ async fn task_bad_types() {
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
-      "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
+      "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
      "code": "invalid_task_types",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#invalid_task_types"
@ -108,7 +108,7 @@ async fn task_bad_types() {
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
-      "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
+      "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
      "code": "invalid_task_types",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#invalid_task_types"
@ -119,7 +119,7 @@ async fn task_bad_types() {
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
-      "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
+      "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
      "code": "invalid_task_types",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#invalid_task_types"
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@ -124,6 +124,16 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
        }
    )]
    InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
+    #[error("Attribute `{}` is not filterable. {}",
+        .field,
+        match .valid_fields.is_empty() {
+            true => "This index does not have configured filterable attributes.".to_string(),
+            false => format!("Available filterable attributes are: `{}`.",
+                    valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
+                ),
+        }
+    )]
+    InvalidSearchFacet { field: String, valid_fields: BTreeSet<String> },
    #[error("{}", HeedError::BadOpenOptions)]
    InvalidLmdbOpenOptions,
    #[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")]
--- a/milli/src/heed_codec/fst_set_codec.rs
+++ b/milli/src/heed_codec/fst_set_codec.rs
@ -0,0 +1,23 @@
+use std::borrow::Cow;
+
+use fst::Set;
+use heed::{BytesDecode, BytesEncode};
+
+/// A codec for values of type `Set<&[u8]>`.
+pub struct FstSetCodec;
+
+impl<'a> BytesEncode<'a> for FstSetCodec {
+    type EItem = Set<Vec<u8>>;
+
+    fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
+        Some(Cow::Borrowed(item.as_fst().as_bytes()))
+    }
+}
+
+impl<'a> BytesDecode<'a> for FstSetCodec {
+    type DItem = Set<&'a [u8]>;
+
+    fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
+        Set::new(bytes).ok()
+    }
+}
--- a/milli/src/heed_codec/mod.rs
+++ b/milli/src/heed_codec/mod.rs
@ -2,6 +2,7 @@ mod beu32_str_codec;
 mod byte_slice_ref;
 pub mod facet;
 mod field_id_word_count_codec;
+mod fst_set_codec;
 mod obkv_codec;
 mod roaring_bitmap;
 mod roaring_bitmap_length;
@ -15,6 +16,7 @@ pub use str_ref::StrRefCodec;

 pub use self::beu32_str_codec::BEU32StrCodec;
 pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
+pub use self::fst_set_codec::FstSetCodec;
 pub use self::obkv_codec::ObkvCodec;
 pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
 pub use self::roaring_bitmap_length::{
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@ -19,7 +19,7 @@ use crate::heed_codec::facet::{
    FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
    FieldIdCodec, OrderedF64Codec,
 };
-use crate::heed_codec::{ScriptLanguageCodec, StrBEU16Codec, StrRefCodec};
+use crate::heed_codec::{FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec};
 use crate::{
    default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
    DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
@ -85,6 +85,7 @@ pub mod db_name {
    pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids";
    pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids";
    pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
+    pub const FACET_ID_STRING_FST: &str = "facet-id-string-fst";
    pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
    pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
    pub const DOCUMENTS: &str = "documents";
@ -147,6 +148,8 @@ pub struct Index {
    pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
    /// Maps the facet field id and ranges of strings with the docids that corresponds to them.
    pub facet_id_string_docids: Database<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
+    /// Maps the facet field id of the string facets with an FST containing all the facets values.
+    pub facet_id_string_fst: Database<OwnedType<BEU16>, FstSetCodec>,

    /// Maps the document id, the facet field id and the numbers.
    pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,
@ -166,7 +169,7 @@ impl Index {
    ) -> Result<Index> {
        use db_name::*;

-        options.max_dbs(23);
+        options.max_dbs(24);
        unsafe { options.flag(Flags::MdbAlwaysFreePages) };

        let env = options.open(path)?;
@ -197,13 +200,13 @@ impl Index {
        let facet_id_f64_docids = env.create_database(&mut wtxn, Some(FACET_ID_F64_DOCIDS))?;
        let facet_id_string_docids =
            env.create_database(&mut wtxn, Some(FACET_ID_STRING_DOCIDS))?;
+        let facet_id_string_fst = env.create_database(&mut wtxn, Some(FACET_ID_STRING_FST))?;
        let facet_id_exists_docids =
            env.create_database(&mut wtxn, Some(FACET_ID_EXISTS_DOCIDS))?;
        let facet_id_is_null_docids =
            env.create_database(&mut wtxn, Some(FACET_ID_IS_NULL_DOCIDS))?;
        let facet_id_is_empty_docids =
            env.create_database(&mut wtxn, Some(FACET_ID_IS_EMPTY_DOCIDS))?;
-
        let field_id_docid_facet_f64s =
            env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_F64S))?;
        let field_id_docid_facet_strings =
@ -232,6 +235,7 @@ impl Index {
            field_id_word_count_docids,
            facet_id_f64_docids,
            facet_id_string_docids,
+            facet_id_string_fst,
            facet_id_exists_docids,
            facet_id_is_null_docids,
            facet_id_is_empty_docids,
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@ -99,8 +99,9 @@ pub use self::heed_codec::{
 };
 pub use self::index::Index;
 pub use self::search::{
-    FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, Search,
-    SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
+    FacetDistribution, FacetValueHit, Filter, FormatOptions, MatchBounds, MatcherBuilder,
+    MatchingWords, Search, SearchForFacetValues, SearchResult, TermsMatchingStrategy,
+    DEFAULT_VALUES_PER_FACET,
 };

 pub type Result<T> = std::result::Result<T, error::Error>;
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@ -1,14 +1,20 @@
 use std::fmt;

+use fst::automaton::{Automaton, Str};
+use fst::{IntoStreamer, Streamer};
 use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
+use log::error;
 use once_cell::sync::Lazy;
 use roaring::bitmap::RoaringBitmap;

 pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET};
 pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
 use self::new::PartialSearchResult;
+use crate::error::UserError;
+use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
 use crate::{
-    execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext,
+    execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldIdMapMissingEntry, Index,
+    Result, SearchContext, BEU16,
 };

 // Building these factories is not free.
@ -16,6 +22,9 @@ static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
 static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
 static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));

+/// The maximum number of facets returned by the facet search route.
+const MAX_NUMBER_OF_FACETS: usize = 100;
+
 pub mod facet;
 mod fst_utils;
 pub mod new;
@ -199,6 +208,174 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA {
    }
 }

+pub struct SearchForFacetValues<'a> {
+    query: Option<String>,
+    facet: String,
+    search_query: Search<'a>,
+}
+
+impl<'a> SearchForFacetValues<'a> {
+    pub fn new(facet: String, search_query: Search<'a>) -> SearchForFacetValues<'a> {
+        SearchForFacetValues { query: None, facet, search_query }
+    }
+
+    pub fn query(&mut self, query: impl Into<String>) -> &mut Self {
+        self.query = Some(query.into());
+        self
+    }
+
+    pub fn execute(&self) -> Result<Vec<FacetValueHit>> {
+        let index = self.search_query.index;
+        let rtxn = self.search_query.rtxn;
+
+        let filterable_fields = index.filterable_fields(rtxn)?;
+        if !filterable_fields.contains(&self.facet) {
+            return Err(UserError::InvalidSearchFacet {
+                field: self.facet.clone(),
+                valid_fields: filterable_fields.into_iter().collect(),
+            }
+            .into());
+        }
+
+        let fields_ids_map = index.fields_ids_map(rtxn)?;
+        let fid = match fields_ids_map.id(&self.facet) {
+            Some(fid) => fid,
+            None => {
+                return Err(FieldIdMapMissingEntry::FieldName {
+                    field_name: self.facet.clone(),
+                    process: "search for facet values",
+                }
+                .into());
+            }
+        };
+
+        let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? {
+            Some(fst) => fst,
+            None => return Ok(vec![]),
+        };
+
+        let search_candidates = self.search_query.execute()?.candidates;
+
+        match self.query.as_ref() {
+            Some(query) => {
+                let authorize_typos = self.search_query.index.authorize_typos(rtxn)?;
+                let field_authorizes_typos =
+                    !self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid);
+
+                if authorize_typos && field_authorizes_typos {
+                    let mut results = vec![];
+
+                    let exact_words_fst = self.search_query.index.exact_words(rtxn)?;
+                    if exact_words_fst.map_or(false, |fst| fst.contains(query)) {
+                        let key =
+                            FacetGroupKey { field_id: fid, level: 0, left_bound: query.as_ref() };
+                        if let Some(FacetGroupValue { bitmap, .. }) =
+                            index.facet_id_string_docids.get(rtxn, &key)?
+                        {
+                            let count = search_candidates.intersection_len(&bitmap);
+                            if count != 0 {
+                                results.push(FacetValueHit { value: query.to_string(), count });
+                            }
+                        }
+                    } else {
+                        let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?;
+                        let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?;
+
+                        let is_prefix = true;
+                        let automaton = if query.len() < one_typo as usize {
+                            build_dfa(query, 0, is_prefix)
+                        } else if query.len() < two_typos as usize {
+                            build_dfa(query, 1, is_prefix)
+                        } else {
+                            build_dfa(query, 2, is_prefix)
+                        };
+
+                        let mut stream = fst.search(automaton).into_stream();
+                        let mut length = 0;
+                        while let Some(facet_value) = stream.next() {
+                            let value = std::str::from_utf8(facet_value)?;
+                            let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value };
+                            let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
+                                Some(FacetGroupValue { bitmap, .. }) => bitmap,
+                                None => {
+                                    error!(
+                                        "the facet value is missing from the facet database: {key:?}"
+                                    );
+                                    continue;
+                                }
+                            };
+                            let count = search_candidates.intersection_len(&docids);
+                            if count != 0 {
+                                results.push(FacetValueHit { value: value.to_string(), count });
+                                length += 1;
+                            }
+                            if length >= MAX_NUMBER_OF_FACETS {
+                                break;
+                            }
+                        }
+                    }
+
+                    Ok(results)
+                } else {
+                    let automaton = Str::new(query).starts_with();
+                    let mut stream = fst.search(automaton).into_stream();
+                    let mut results = vec![];
+                    let mut length = 0;
+                    while let Some(facet_value) = stream.next() {
+                        let value = std::str::from_utf8(facet_value)?;
+                        let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value };
+                        let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
+                            Some(FacetGroupValue { bitmap, .. }) => bitmap,
+                            None => {
+                                error!(
+                                    "the facet value is missing from the facet database: {key:?}"
+                                );
+                                continue;
+                            }
+                        };
+                        let count = search_candidates.intersection_len(&docids);
+                        if count != 0 {
+                            results.push(FacetValueHit { value: value.to_string(), count });
+                            length += 1;
+                        }
+                        if length >= MAX_NUMBER_OF_FACETS {
+                            break;
+                        }
+                    }
+
+                    Ok(results)
+                }
+            }
+            None => {
+                let mut results = vec![];
+                let mut length = 0;
+                let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" };
+                for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? {
+                    let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) =
+                        result?;
+                    let count = search_candidates.intersection_len(&bitmap);
+                    if count != 0 {
+                        results.push(FacetValueHit { value: left_bound.to_string(), count });
+                        length += 1;
+                    }
+                    if length >= MAX_NUMBER_OF_FACETS {
+                        break;
+                    }
+                }
+                Ok(results)
+            }
+        }
+    }
+}
+
+#[derive(Debug, Clone, serde::Serialize, PartialEq)]
+pub struct FacetValueHit {
+    /// The original facet value
+    pub value: String,
+    /// The number of documents associated to this facet
+    pub count: u64,
+}
+
 #[cfg(test)]
 mod test {
    #[allow(unused_imports)]
--- a/milli/src/search/new/query_term/parse_query.rs
+++ b/milli/src/search/new/query_term/parse_query.rs
@ -77,13 +77,9 @@ pub fn located_query_terms_from_tokens(
                }
            }
            TokenKind::Separator(separator_kind) => {
-                match separator_kind {
-                    SeparatorKind::Hard => {
-                        position += 1;
-                    }
-                    SeparatorKind::Soft => {
-                        position += 0;
-                    }
+                // add penalty for hard separators
+                if let SeparatorKind::Hard = separator_kind {
+                    position = position.wrapping_add(1);
                }

                phrase = 'phrase: {
@ -288,3 +284,36 @@ impl PhraseBuilder {
        })
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use charabia::TokenizerBuilder;
+
+    use super::*;
+    use crate::index::tests::TempIndex;
+
+    fn temp_index_with_documents() -> TempIndex {
+        let temp_index = TempIndex::new();
+        temp_index
+            .add_documents(documents!([
+                { "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" },
+                { "id": 2, "name": "Westfália" },
+                { "id": 3, "name": "Ŵôřlḑôle" },
+            ]))
+            .unwrap();
+        temp_index
+    }
+
+    #[test]
+    fn start_with_hard_separator() -> Result<()> {
+        let tokenizer = TokenizerBuilder::new().build();
+        let tokens = tokenizer.tokenize(".");
+        let index = temp_index_with_documents();
+        let rtxn = index.read_txn()?;
+        let mut ctx = SearchContext::new(&index, &rtxn);
+        // panics with `attempt to add with overflow` before <https://github.com/meilisearch/meilisearch/issues/3785>
+        let located_query_terms = located_query_terms_from_tokens(&mut ctx, tokens, None)?;
+        assert!(located_query_terms.is_empty());
+        Ok(())
+    }
+}
--- a/milli/src/update/clear_documents.rs
+++ b/milli/src/update/clear_documents.rs
@ -35,6 +35,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
            script_language_docids,
            facet_id_f64_docids,
            facet_id_string_docids,
+            facet_id_string_fst: _,
            facet_id_exists_docids,
            facet_id_is_null_docids,
            facet_id_is_empty_docids,
--- a/milli/src/update/delete_documents.rs
+++ b/milli/src/update/delete_documents.rs
@ -243,6 +243,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
            word_prefix_fid_docids,
            facet_id_f64_docids: _,
            facet_id_string_docids: _,
+            facet_id_string_fst: _,
            field_id_docid_facet_f64s: _,
            field_id_docid_facet_strings: _,
            script_language_docids,
--- a/milli/src/update/facet/mod.rs
+++ b/milli/src/update/facet/mod.rs
@ -78,15 +78,16 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5;

 use std::fs::File;

+use heed::types::DecodeIgnore;
 use log::debug;
 use time::OffsetDateTime;

 use self::incremental::FacetsUpdateIncremental;
 use super::FacetsUpdateBulk;
 use crate::facet::FacetType;
-use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec};
+use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
 use crate::heed_codec::ByteSliceRefCodec;
-use crate::{Index, Result};
+use crate::{Index, Result, BEU16};

 pub mod bulk;
 pub mod delete;
@ -157,6 +158,43 @@ impl<'i> FacetsUpdate<'i> {
            );
            incremental_update.execute(wtxn)?;
        }
+
+        // We compute one FST by string facet
+        let mut text_fsts = vec![];
+        let mut current_fst: Option<(u16, fst::SetBuilder<Vec<u8>>)> = None;
+        let database = self.index.facet_id_string_docids.remap_data_type::<DecodeIgnore>();
+        for result in database.iter(wtxn)? {
+            let (facet_group_key, _) = result?;
+            if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
+                current_fst = match current_fst.take() {
+                    Some((fid, fst_builder)) if fid != field_id => {
+                        let fst = fst_builder.into_set();
+                        text_fsts.push((field_id, fst));
+                        Some((field_id, fst::SetBuilder::memory()))
+                    }
+                    Some((field_id, fst_builder)) => Some((field_id, fst_builder)),
+                    None => Some((field_id, fst::SetBuilder::memory())),
+                };
+
+                if let Some((_, fst_builder)) = current_fst.as_mut() {
+                    fst_builder.insert(left_bound)?;
+                }
+            }
+        }
+
+        if let Some((field_id, fst_builder)) = current_fst {
+            let fst = fst_builder.into_set();
+            text_fsts.push((field_id, fst));
+        }
+
+        // We remove all of the previous FSTs that were in this database
+        self.index.facet_id_string_fst.clear(wtxn)?;
+
+        // We write those FSTs in LMDB now
+        for (field_id, fst) in text_fsts {
+            self.index.facet_id_string_fst.put(wtxn, &BEU16::new(field_id), &fst)?;
+        }
+
        Ok(())
    }
 }
--- a/milli/src/update/facets.rs
+++ b/milli/src/update/facets.rs
@ -1 +0,0 @@
-
Author	SHA1	Message	Date
Clément Renault	25d49f5811	Use the minWordSizeForTypos index settings	2023-06-06 10:48:43 +02:00
Clément Renault	e9af506591	Format the code	2023-06-06 10:48:43 +02:00
Clément Renault	6ee4f4b544	Fix compilation issues	2023-06-06 10:48:42 +02:00
Clément Renault	e92576e0d4	Simplify the placeholder search of the facet-search route	2023-06-06 10:48:08 +02:00
Clément Renault	7e1a49e7fa	Use the disableOnAttributes parameter on the facet-search route	2023-06-06 10:48:08 +02:00
Clément Renault	17e86e9c42	Use the disableOnWords parameter on the facet-search route	2023-06-06 10:48:08 +02:00
Clément Renault	f4f5ae70d6	Support the typoTolerant.enabled parameter	2023-06-06 10:48:08 +02:00
Clément Renault	edf3031dae	Log an error when a facet value is missing from the database	2023-06-06 10:48:08 +02:00
Clément Renault	09d440a427	Rename the SearchForFacetValues struct	2023-06-06 10:48:08 +02:00
Clément Renault	8b66318a6b	Return an internal error when a field id is missing	2023-06-06 10:48:08 +02:00
Clément Renault	196a2b3d58	Make clippy happy	2023-06-06 10:48:07 +02:00
Clément Renault	c153cbc593	Improve the returned errors from the facet-search route	2023-06-06 10:48:07 +02:00
Clément Renault	e731f1c8ba	Fix the max number of facets to be returned to 100	2023-06-06 10:48:07 +02:00
Clément Renault	c39d830ff8	Return the correct response JSON object from the facet-search route	2023-06-06 10:48:07 +02:00
Clément Renault	2dca4d82d8	Send analytics about the facet-search route	2023-06-06 10:48:07 +02:00
Clément Renault	ce87ee8ea0	Make the search for facet work	2023-06-06 10:37:27 +02:00
Kerollmops	f06bb445a6	Introduce the facet search route	2023-06-06 10:37:26 +02:00
Kerollmops	81792eb5f7	Restrict the number of facet search results to 1000	2023-06-06 10:37:26 +02:00
Kerollmops	7a49bbc8df	Introduce the SearchForFacetValue struct	2023-06-06 10:37:26 +02:00
Clément Renault	ca16aaaa30	Store the facet string values in multiple FSTs	2023-06-06 10:37:26 +02:00
meili-bors[bot]	d963b5f85a	Merge #3792 3792: fix the type of the document deletion by filter tasks r=dureuill a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/3791 ## What does this PR do? - Hide the deleteDocumentByFilter internal type from the users. Co-authored-by: Tamo <tamo@meilisearch.com>	2023-05-30 18:20:28 +00:00
Tamo	2acc3ec5ee	fix the type of the document deletion by filter tasks	2023-05-30 15:18:52 +02:00
meili-bors[bot]	0a7817a002	Merge #3786 3786: Consistently use wrapping add to avoid overflow in debug when query s… r=dureuill a=dureuill # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/3785 ## What does this PR do? - Some of the code paths would erroneously use the default addition operator that has the semantics that "overflow is an error, checked at runtime in debug" instead of the intended "overflow is expected" semantics that this code use (this code is using `u16::MAX` as a sentinel). This PR makes it so the wrapping add operator is used everywhere. Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2023-05-29 12:39:54 +00:00
Louis Dureuil	1dfc4038ab	Add test that fails before PR and passes now	2023-05-29 11:58:26 +02:00
Louis Dureuil	73198179f1	Consistently use wrapping add to avoid overflow in debug when query starts with a separator	2023-05-29 11:54:12 +02:00