From 478f374b9d42465ec5ce1bfc31fa7e692f0b259b Mon Sep 17 00:00:00 2001
From: Mubelotix <simon@meilisearch.com>
Date: Mon, 28 Jul 2025 16:23:26 +0200
Subject: [PATCH 01/11] Add benchmark

---
 crates/benchmarks/Cargo.toml                  |  4 ++
 .../benchmarks/benches/filter_starts_with.rs  | 72 +++++++++++++++++++
 2 files changed, 76 insertions(+)
 create mode 100644 crates/benchmarks/benches/filter_starts_with.rs

diff --git a/crates/benchmarks/Cargo.toml b/crates/benchmarks/Cargo.toml
index 9dccc444b..25b44436d 100644
--- a/crates/benchmarks/Cargo.toml
+++ b/crates/benchmarks/Cargo.toml
@@ -51,3 +51,7 @@ harness = false
 [[bench]]
 name = "indexing"
 harness = false
+
+[[bench]]
+name = "filter_starts_with"
+harness = false
diff --git a/crates/benchmarks/benches/filter_starts_with.rs b/crates/benchmarks/benches/filter_starts_with.rs
new file mode 100644
index 000000000..b1d0b502f
--- /dev/null
+++ b/crates/benchmarks/benches/filter_starts_with.rs
@@ -0,0 +1,72 @@
+mod datasets_paths;
+mod utils;
+
+use criterion::{criterion_group, criterion_main};
+use milli::update::Settings;
+use milli::FilterableAttributesRule;
+use utils::Conf;
+
+#[cfg(not(windows))]
+#[global_allocator]
+static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
+
+fn base_conf(builder: &mut Settings) {
+    let displayed_fields =
+        ["geonameid", "name"]
+            .iter()
+            .map(|s| s.to_string())
+            .collect();
+    builder.set_displayed_fields(displayed_fields);
+
+    let filterable_fields = ["name"]
+        .iter()
+        .map(|s| FilterableAttributesRule::Field(s.to_string()))
+        .collect();
+    builder.set_filterable_fields(filterable_fields);
+}
+
+#[rustfmt::skip]
+const BASE_CONF: Conf = Conf {
+    dataset: datasets_paths::SMOL_ALL_COUNTRIES,
+    dataset_format: "jsonl",
+    queries: &[
+        "",
+    ],
+    configure: base_conf,
+    primary_key: Some("geonameid"),
+    ..Conf::BASE
+};
+
+fn filter_starts_with(c: &mut criterion::Criterion) {
+    #[rustfmt::skip]
+    let confs = &[
+        utils::Conf {
+            group_name: "1 letter",
+            filter: Some("name STARTS WITH e"),
+            ..BASE_CONF
+        },
+
+        utils::Conf {
+            group_name: "2 letters",
+            filter: Some("name STARTS WITH es"),
+            ..BASE_CONF
+        },
+
+        utils::Conf {
+            group_name: "3 letters",
+            filter: Some("name STARTS WITH est"),
+            ..BASE_CONF
+        },
+
+        utils::Conf {
+            group_name: "6 letters",
+            filter: Some("name STARTS WITH estoni"),
+            ..BASE_CONF
+        }
+    ];
+
+    utils::run_benches(c, confs);
+}
+
+criterion_group!(benches, filter_starts_with);
+criterion_main!(benches);

From e8a818f53d6387194f645df2dea87acdb6327d28 Mon Sep 17 00:00:00 2001
From: Mubelotix <simon@meilisearch.com>
Date: Mon, 28 Jul 2025 16:24:04 +0200
Subject: [PATCH 02/11] Optimize the filter

---
 crates/milli/src/search/facet/filter.rs | 37 +++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs
index c3eba8031..54c7535dd 100644
--- a/crates/milli/src/search/facet/filter.rs
+++ b/crates/milli/src/search/facet/filter.rs
@@ -18,6 +18,7 @@ use crate::heed_codec::facet::{
     FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
 };
 use crate::index::db_name::FACET_ID_STRING_DOCIDS;
+use crate::search::facet::facet_range_search::find_docids_of_facet_within_bounds;
 use crate::{
     distance_between_two_points, lat_lng_to_xyz, FieldId, FieldsIdsMap,
     FilterableAttributesFeatures, FilterableAttributesRule, Index, InternalError, Result,
@@ -416,7 +417,43 @@ impl<'a> Filter<'a> {
                 return Ok(docids);
             }
             Condition::StartsWith { keyword: _, word } => {
+                // There are two algorithms:
+                //
+                // - The first one looks directly at level 0 of the facet group database.
+                //   This pessimistic approach is more efficient when the value is unique.
+                //
+                // - The second one is recursive over levels.
+                //   This is more efficient when the prefix is common among many values.
+
                 let value = crate::normalize_facet(word.value());
+
+                if value.len() <= 6 {
+                    // 6 is abitrary, but it works well in practice
+                    let mut value2 = value.as_bytes().to_owned();
+                    if let Some(last) = value2.last_mut() {
+                        if *last != 255 {
+                            *last += 1;
+                            if let Ok(value2) = String::from_utf8(value2) {
+                                // The idea here is that "STARTS WITH baba" is the same as "baba <= value < babb".
+                                // We just increase the last letter to find the upper bound.
+                                // The result could be invalid utf8, so it can fallback.
+                                let mut docids = RoaringBitmap::new();
+                                find_docids_of_facet_within_bounds(
+                                    rtxn,
+                                    strings_db,
+                                    field_id,
+                                    &Included(&value),
+                                    &Excluded(&value2),
+                                    universe,
+                                    &mut docids,
+                                )?;
+
+                                return Ok(docids);
+                            }
+                        }
+                    }
+                }
+
                 let base = FacetGroupKey { field_id, level: 0, left_bound: value.as_str() };
                 let docids = strings_db
                     .prefix_iter(rtxn, &base)?

From 691a9ae4b18c00459ca2512429a364fc2b8722cf Mon Sep 17 00:00:00 2001
From: Mubelotix <simon@meilisearch.com>
Date: Mon, 28 Jul 2025 16:24:11 +0200
Subject: [PATCH 03/11] Format

---
 crates/benchmarks/benches/filter_starts_with.rs | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/crates/benchmarks/benches/filter_starts_with.rs b/crates/benchmarks/benches/filter_starts_with.rs
index b1d0b502f..a7682cbf8 100644
--- a/crates/benchmarks/benches/filter_starts_with.rs
+++ b/crates/benchmarks/benches/filter_starts_with.rs
@@ -11,17 +11,11 @@ use utils::Conf;
 static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
 
 fn base_conf(builder: &mut Settings) {
-    let displayed_fields =
-        ["geonameid", "name"]
-            .iter()
-            .map(|s| s.to_string())
-            .collect();
+    let displayed_fields = ["geonameid", "name"].iter().map(|s| s.to_string()).collect();
     builder.set_displayed_fields(displayed_fields);
 
-    let filterable_fields = ["name"]
-        .iter()
-        .map(|s| FilterableAttributesRule::Field(s.to_string()))
-        .collect();
+    let filterable_fields =
+        ["name"].iter().map(|s| FilterableAttributesRule::Field(s.to_string())).collect();
     builder.set_filterable_fields(filterable_fields);
 }
 

From 224892e692fb31df55dc25b275d9412d247314a2 Mon Sep 17 00:00:00 2001
From: Mubelotix <simon@meilisearch.com>
Date: Mon, 28 Jul 2025 16:28:06 +0200
Subject: [PATCH 04/11] Enable new algorithm every time

---
 crates/milli/src/search/facet/filter.rs | 41 ++++++++++++-------------
 1 file changed, 19 insertions(+), 22 deletions(-)

diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs
index 54c7535dd..907b12f1a 100644
--- a/crates/milli/src/search/facet/filter.rs
+++ b/crates/milli/src/search/facet/filter.rs
@@ -427,29 +427,26 @@ impl<'a> Filter<'a> {
 
                 let value = crate::normalize_facet(word.value());
 
-                if value.len() <= 6 {
-                    // 6 is abitrary, but it works well in practice
-                    let mut value2 = value.as_bytes().to_owned();
-                    if let Some(last) = value2.last_mut() {
-                        if *last != 255 {
-                            *last += 1;
-                            if let Ok(value2) = String::from_utf8(value2) {
-                                // The idea here is that "STARTS WITH baba" is the same as "baba <= value < babb".
-                                // We just increase the last letter to find the upper bound.
-                                // The result could be invalid utf8, so it can fallback.
-                                let mut docids = RoaringBitmap::new();
-                                find_docids_of_facet_within_bounds(
-                                    rtxn,
-                                    strings_db,
-                                    field_id,
-                                    &Included(&value),
-                                    &Excluded(&value2),
-                                    universe,
-                                    &mut docids,
-                                )?;
+                let mut value2 = value.as_bytes().to_owned();
+                if let Some(last) = value2.last_mut() {
+                    if *last != 255 {
+                        *last += 1;
+                        if let Ok(value2) = String::from_utf8(value2) {
+                            // The idea here is that "STARTS WITH baba" is the same as "baba <= value < babb".
+                            // We just increase the last letter to find the upper bound.
+                            // The result could be invalid utf8, so it can fallback.
+                            let mut docids = RoaringBitmap::new();
+                            find_docids_of_facet_within_bounds(
+                                rtxn,
+                                strings_db,
+                                field_id,
+                                &Included(&value),
+                                &Excluded(&value2),
+                                universe,
+                                &mut docids,
+                            )?;
 
-                                return Ok(docids);
-                            }
+                            return Ok(docids);
                         }
                     }
                 }

From 48a5f4db2d48b2b8fb98abf9e74acf90d349e9c0 Mon Sep 17 00:00:00 2001
From: Mubelotix <simon@meilisearch.com>
Date: Mon, 28 Jul 2025 16:42:33 +0200
Subject: [PATCH 05/11] Improve comment

---
 crates/milli/src/search/facet/filter.rs | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs
index 907b12f1a..955e75753 100644
--- a/crates/milli/src/search/facet/filter.rs
+++ b/crates/milli/src/search/facet/filter.rs
@@ -419,11 +419,12 @@ impl<'a> Filter<'a> {
             Condition::StartsWith { keyword: _, word } => {
                 // There are two algorithms:
                 //
-                // - The first one looks directly at level 0 of the facet group database.
-                //   This pessimistic approach is more efficient when the value is unique.
-                //
-                // - The second one is recursive over levels.
+                // - The first one is recursive over levels.
                 //   This is more efficient when the prefix is common among many values.
+                //
+                // - The second one looks directly at level 0 of the facet group database.
+                //   This pessimistic approach is more efficient when the value is unique.
+                //   It's used as a fallback.
 
                 let value = crate::normalize_facet(word.value());
 

From fc814b7537cfb8c1a8abdeb2835f1ebe0f317e0b Mon Sep 17 00:00:00 2001
From: Mubelotix <simon@meilisearch.com>
Date: Tue, 5 Aug 2025 10:25:14 +0200
Subject: [PATCH 06/11] Apply review suggestion

---
 crates/milli/src/search/facet/filter.rs | 78 +++++++++++++------------
 1 file changed, 42 insertions(+), 36 deletions(-)

diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs
index 955e75753..af4a77814 100644
--- a/crates/milli/src/search/facet/filter.rs
+++ b/crates/milli/src/search/facet/filter.rs
@@ -1,3 +1,4 @@
+use std::borrow::Cow;
 use std::collections::BTreeSet;
 use std::fmt::{Debug, Display};
 use std::ops::Bound::{self, Excluded, Included, Unbounded};
@@ -14,9 +15,7 @@ use super::facet_range_search;
 use crate::constants::RESERVED_GEO_FIELD_NAME;
 use crate::error::{Error, UserError};
 use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
-use crate::heed_codec::facet::{
-    FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
-};
+use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
 use crate::index::db_name::FACET_ID_STRING_DOCIDS;
 use crate::search::facet::facet_range_search::find_docids_of_facet_within_bounds;
 use crate::{
@@ -427,44 +426,51 @@ impl<'a> Filter<'a> {
                 //   It's used as a fallback.
 
                 let value = crate::normalize_facet(word.value());
-
                 let mut value2 = value.as_bytes().to_owned();
-                if let Some(last) = value2.last_mut() {
-                    if *last != 255 {
-                        *last += 1;
-                        if let Ok(value2) = String::from_utf8(value2) {
-                            // The idea here is that "STARTS WITH baba" is the same as "baba <= value < babb".
-                            // We just increase the last letter to find the upper bound.
-                            // The result could be invalid utf8, so it can fallback.
-                            let mut docids = RoaringBitmap::new();
-                            find_docids_of_facet_within_bounds(
-                                rtxn,
-                                strings_db,
-                                field_id,
-                                &Included(&value),
-                                &Excluded(&value2),
-                                universe,
-                                &mut docids,
-                            )?;
 
-                            return Ok(docids);
-                        }
+                let last = match value2.last_mut() {
+                    Some(last) => last,
+                    None => {
+                        // The prefix is empty, so all documents that have the field will match.
+                        return index
+                            .exists_faceted_documents_ids(rtxn, field_id)
+                            .map_err(|e| e.into());
+                    }
+                };
+
+                if *last == 255 {
+                    // The prefix is invalid utf8, so no documents will match anyway
+                    return Ok(RoaringBitmap::new());
+                }
+                *last += 1;
+
+                // This is very similar to `heed::Bytes` but its `EItem` is `&[u8]` instead of `[u8]`
+                struct BytesRef;
+                impl<'a> BytesEncode<'a> for BytesRef {
+                    type EItem = &'a [u8];
+
+                    fn bytes_encode(
+                        item: &'a Self::EItem,
+                    ) -> std::result::Result<Cow<'a, [u8]>, heed::BoxedError> {
+                        Ok(Cow::Borrowed(item))
                     }
                 }
 
-                let base = FacetGroupKey { field_id, level: 0, left_bound: value.as_str() };
-                let docids = strings_db
-                    .prefix_iter(rtxn, &base)?
-                    .map(|result| -> Result<RoaringBitmap> {
-                        match result {
-                            Ok((_facet_group_key, FacetGroupValue { bitmap, .. })) => Ok(bitmap),
-                            Err(_e) => Err(InternalError::from(SerializationError::Decoding {
-                                db_name: Some(FACET_ID_STRING_DOCIDS),
-                            })
-                            .into()),
-                        }
-                    })
-                    .union()?;
+                // The idea here is that "STARTS WITH baba" is the same as "baba <= value < babb".
+                // We just incremented the last letter to find the upper bound.
+                // The upper bound may not be valid utf8, but lmdb doesn't care as it works over bytes.
+                let mut docids = RoaringBitmap::new();
+                let bytes_db =
+                    index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRef>>();
+                find_docids_of_facet_within_bounds::<BytesRef>(
+                    rtxn,
+                    bytes_db,
+                    field_id,
+                    &Included(value.as_bytes()),
+                    &Excluded(value2.as_slice()),
+                    universe,
+                    &mut docids,
+                )?;
 
                 return Ok(docids);
             }

From afb367c7f4a8003c0d1c45b76c3f8a201e498276 Mon Sep 17 00:00:00 2001
From: Mubelotix <simon@meilisearch.com>
Date: Tue, 5 Aug 2025 10:29:39 +0200
Subject: [PATCH 07/11] Update old comment

---
 crates/milli/src/search/facet/filter.rs | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs
index af4a77814..4d1e51767 100644
--- a/crates/milli/src/search/facet/filter.rs
+++ b/crates/milli/src/search/facet/filter.rs
@@ -416,14 +416,9 @@ impl<'a> Filter<'a> {
                 return Ok(docids);
             }
             Condition::StartsWith { keyword: _, word } => {
-                // There are two algorithms:
-                //
-                // - The first one is recursive over levels.
-                //   This is more efficient when the prefix is common among many values.
-                //
-                // - The second one looks directly at level 0 of the facet group database.
-                //   This pessimistic approach is more efficient when the value is unique.
-                //   It's used as a fallback.
+                // The idea here is that "STARTS WITH baba" is the same as "baba <= value < babb".
+                // We just incremented the last letter to find the upper bound.
+                // The upper bound may not be valid utf8, but lmdb doesn't care as it works over bytes.
 
                 let value = crate::normalize_facet(word.value());
                 let mut value2 = value.as_bytes().to_owned();
@@ -456,9 +451,6 @@ impl<'a> Filter<'a> {
                     }
                 }
 
-                // The idea here is that "STARTS WITH baba" is the same as "baba <= value < babb".
-                // We just incremented the last letter to find the upper bound.
-                // The upper bound may not be valid utf8, but lmdb doesn't care as it works over bytes.
                 let mut docids = RoaringBitmap::new();
                 let bytes_db =
                     index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRef>>();

From 3d2c204f2d528040f6775ae0102666aa5e6987f6 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 5 Aug 2025 11:26:10 +0200
Subject: [PATCH 08/11] Update crates/milli/src/search/facet/filter.rs

---
 crates/milli/src/search/facet/filter.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs
index 4d1e51767..803a0635b 100644
--- a/crates/milli/src/search/facet/filter.rs
+++ b/crates/milli/src/search/facet/filter.rs
@@ -433,8 +433,9 @@ impl<'a> Filter<'a> {
                     }
                 };
 
-                if *last == 255 {
-                    // The prefix is invalid utf8, so no documents will match anyway
+                if *last == u8::MAX {
+                    // u8::MAX is a forbidden UTF-8 byte, we're guaranteed it cannot be sent through a filter to meilisearch, but just in case, we're going to return something
+                    tracing::warn!("Found non utf-8 character in filter. That shouldn't be possible");
                     return Ok(RoaringBitmap::new());
                 }
                 *last += 1;

From 4c61a227caf32b6e947722d1c4120a75b1e25dbe Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 5 Aug 2025 11:29:54 +0200
Subject: [PATCH 09/11] fmt after my suggestion

---
 crates/milli/src/search/facet/filter.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs
index 803a0635b..76d935fc6 100644
--- a/crates/milli/src/search/facet/filter.rs
+++ b/crates/milli/src/search/facet/filter.rs
@@ -435,7 +435,9 @@ impl<'a> Filter<'a> {
 
                 if *last == u8::MAX {
                     // u8::MAX is a forbidden UTF-8 byte, we're guaranteed it cannot be sent through a filter to meilisearch, but just in case, we're going to return something
-                    tracing::warn!("Found non utf-8 character in filter. That shouldn't be possible");
+                    tracing::warn!(
+                        "Found non utf-8 character in filter. That shouldn't be possible"
+                    );
                     return Ok(RoaringBitmap::new());
                 }
                 *last += 1;

From c4e7bf2e6022d946260e559366767f87af999082 Mon Sep 17 00:00:00 2001
From: Mubelotix <simon@meilisearch.com>
Date: Tue, 5 Aug 2025 12:14:16 +0200
Subject: [PATCH 10/11] Stabilize `STARTS WITH` filter

---
 crates/filter-parser/src/lib.rs           |  6 +++---
 crates/index-scheduler/src/features.rs    |  2 +-
 crates/meilisearch/tests/search/errors.rs | 24 +++++++++++------------
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/crates/filter-parser/src/lib.rs b/crates/filter-parser/src/lib.rs
index 938702103..e25636812 100644
--- a/crates/filter-parser/src/lib.rs
+++ b/crates/filter-parser/src/lib.rs
@@ -165,9 +165,9 @@ impl<'a> FilterCondition<'a> {
                 | Condition::Exists
                 | Condition::LowerThan(_)
                 | Condition::LowerThanOrEqual(_)
-                | Condition::Between { .. } => None,
-                Condition::Contains { keyword, word: _ }
-                | Condition::StartsWith { keyword, word: _ } => Some(keyword),
+                | Condition::Between { .. }
+                | Condition::StartsWith { .. } => None,
+                Condition::Contains { keyword, word: _ } => Some(keyword),
             },
             FilterCondition::Not(this) => this.use_contains_operator(),
             FilterCondition::Or(seq) | FilterCondition::And(seq) => {
diff --git a/crates/index-scheduler/src/features.rs b/crates/index-scheduler/src/features.rs
index b52a659a6..00e706a74 100644
--- a/crates/index-scheduler/src/features.rs
+++ b/crates/index-scheduler/src/features.rs
@@ -85,7 +85,7 @@ impl RoFeatures {
             Ok(())
         } else {
             Err(FeatureNotEnabledError {
-                disabled_action: "Using `CONTAINS` or `STARTS WITH` in a filter",
+                disabled_action: "Using `CONTAINS` in a filter",
                 feature: "contains filter",
                 issue_link: "https://github.com/orgs/meilisearch/discussions/763",
             }
diff --git a/crates/meilisearch/tests/search/errors.rs b/crates/meilisearch/tests/search/errors.rs
index 363ece067..9cc7e06dd 100644
--- a/crates/meilisearch/tests/search/errors.rs
+++ b/crates/meilisearch/tests/search/errors.rs
@@ -1270,27 +1270,27 @@ async fn search_with_contains_without_enabling_the_feature() {
     index
         .search(json!({ "filter": "doggo CONTAINS kefir" }), |response, code| {
             snapshot!(code, @"400 Bad Request");
-            snapshot!(json_string!(response), @r###"
+            snapshot!(json_string!(response), @r#"
             {
-              "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
+              "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
               "code": "feature_not_enabled",
               "type": "invalid_request",
               "link": "https://docs.meilisearch.com/errors#feature_not_enabled"
             }
-            "###);
+            "#);
         })
         .await;
     index
         .search(json!({ "filter": "doggo != echo AND doggo CONTAINS kefir" }), |response, code| {
             snapshot!(code, @"400 Bad Request");
-            snapshot!(json_string!(response), @r###"
+            snapshot!(json_string!(response), @r#"
             {
-              "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n25:33 doggo != echo AND doggo CONTAINS kefir",
+              "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n25:33 doggo != echo AND doggo CONTAINS kefir",
               "code": "feature_not_enabled",
               "type": "invalid_request",
               "link": "https://docs.meilisearch.com/errors#feature_not_enabled"
             }
-            "###);
+            "#);
         })
         .await;
 
@@ -1299,24 +1299,24 @@ async fn search_with_contains_without_enabling_the_feature() {
         index.search_post(json!({ "filter": ["doggo != echo", "doggo CONTAINS kefir"] })).await;
 
     snapshot!(code, @"400 Bad Request");
-    snapshot!(json_string!(response), @r###"
+    snapshot!(json_string!(response), @r#"
     {
-      "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
+      "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
       "code": "feature_not_enabled",
       "type": "invalid_request",
       "link": "https://docs.meilisearch.com/errors#feature_not_enabled"
     }
-    "###);
+    "#);
     let (response, code) =
         index.search_post(json!({ "filter": ["doggo != echo", ["doggo CONTAINS kefir"]] })).await;
 
     snapshot!(code, @"400 Bad Request");
-    snapshot!(json_string!(response), @r###"
+    snapshot!(json_string!(response), @r#"
     {
-      "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
+      "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
       "code": "feature_not_enabled",
       "type": "invalid_request",
       "link": "https://docs.meilisearch.com/errors#feature_not_enabled"
     }
-    "###);
+    "#);
 }

From 3a9b08960abe17e46f31cf0196c2a8453df68772 Mon Sep 17 00:00:00 2001
From: Mubelotix <simon@meilisearch.com>
Date: Tue, 5 Aug 2025 13:49:28 +0200
Subject: [PATCH 11/11] Add test

---
 .../milli/src/search/new/tests/integration.rs |  2 +-
 crates/milli/tests/search/filters.rs          | 13 ++++++++++--
 crates/milli/tests/search/mod.rs              | 21 +++++++++++++++++--
 3 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/crates/milli/src/search/new/tests/integration.rs b/crates/milli/src/search/new/tests/integration.rs
index 38f39e18b..6b8c25ab8 100644
--- a/crates/milli/src/search/new/tests/integration.rs
+++ b/crates/milli/src/search/new/tests/integration.rs
@@ -17,7 +17,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
     let path = tempfile::tempdir().unwrap();
     let options = EnvOpenOptions::new();
     let mut options = options.read_txn_without_tls();
-    options.map_size(10 * 1024 * 1024); // 10 MB
+    options.map_size(10 * 1024 * 1024); // 10 MiB
     let index = Index::new(options, &path, true).unwrap();
 
     let mut wtxn = index.write_txn().unwrap();
diff --git a/crates/milli/tests/search/filters.rs b/crates/milli/tests/search/filters.rs
index bb5943782..c97143d48 100644
--- a/crates/milli/tests/search/filters.rs
+++ b/crates/milli/tests/search/filters.rs
@@ -25,13 +25,16 @@ macro_rules! test_filter {
             let SearchResult { documents_ids, .. } = search.execute().unwrap();
 
             let filtered_ids = search::expected_filtered_ids($filter);
-            let expected_external_ids: Vec<_> =
+            let mut expected_external_ids: Vec<_> =
                 search::expected_order(&criteria, TermsMatchingStrategy::default(), &[])
                     .into_iter()
                     .filter_map(|d| if filtered_ids.contains(&d.id) { Some(d.id) } else { None })
                     .collect();
 
-            let documents_ids = search::internal_to_external_ids(&index, &documents_ids);
+            let mut documents_ids = search::internal_to_external_ids(&index, &documents_ids);
+
+            expected_external_ids.sort_unstable();
+            documents_ids.sort_unstable();
             assert_eq!(documents_ids, expected_external_ids);
         }
     };
@@ -102,3 +105,9 @@ test_filter!(empty_filter_1_double_not, vec![Right("NOT opt1 IS NOT EMPTY")]);
 test_filter!(in_filter, vec![Right("tag_in IN[1, 2, 3, four, five]")]);
 test_filter!(not_in_filter, vec![Right("tag_in NOT IN[1, 2, 3, four, five]")]);
 test_filter!(not_not_in_filter, vec![Right("NOT tag_in NOT IN[1, 2, 3, four, five]")]);
+
+test_filter!(starts_with_filter_single_letter, vec![Right("tag STARTS WITH e")]);
+test_filter!(starts_with_filter_diacritic, vec![Right("tag STARTS WITH é")]);
+test_filter!(starts_with_filter_empty_prefix, vec![Right("tag STARTS WITH ''")]);
+test_filter!(starts_with_filter_hell, vec![Right("title STARTS WITH hell")]);
+test_filter!(starts_with_filter_hello, vec![Right("title STARTS WITH hello")]);
diff --git a/crates/milli/tests/search/mod.rs b/crates/milli/tests/search/mod.rs
index fa03f1cc1..578a22009 100644
--- a/crates/milli/tests/search/mod.rs
+++ b/crates/milli/tests/search/mod.rs
@@ -12,7 +12,8 @@ use milli::update::new::indexer;
 use milli::update::{IndexerConfig, Settings};
 use milli::vector::RuntimeEmbedders;
 use milli::{
-    AscDesc, Criterion, DocumentId, FilterableAttributesRule, Index, Member, TermsMatchingStrategy,
+    normalize_facet, AscDesc, Criterion, DocumentId, FilterableAttributesRule, Index, Member,
+    TermsMatchingStrategy,
 };
 use serde::{Deserialize, Deserializer};
 use slice_group_by::GroupBy;
@@ -36,7 +37,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
     let path = tempfile::tempdir().unwrap();
     let options = EnvOpenOptions::new();
     let mut options = options.read_txn_without_tls();
-    options.map_size(10 * 1024 * 1024); // 10 MB
+    options.map_size(10 * 1024 * 1024); // 10 MiB
     let index = Index::new(options, &path, true).unwrap();
 
     let mut wtxn = index.write_txn().unwrap();
@@ -46,6 +47,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
 
     builder.set_criteria(criteria.to_vec());
     builder.set_filterable_fields(vec![
+        FilterableAttributesRule::Field(S("title")),
         FilterableAttributesRule::Field(S("tag")),
         FilterableAttributesRule::Field(S("asc_desc_rank")),
         FilterableAttributesRule::Field(S("_geo")),
@@ -220,6 +222,19 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> {
         {
             id = Some(document.id.clone())
         }
+    } else if let Some((field, prefix)) = filter.split_once("STARTS WITH") {
+        let field = match field.trim() {
+            "tag" => &document.tag,
+            "title" => &document.title,
+            "description" => &document.description,
+            _ => panic!("Unknown field: {field}"),
+        };
+
+        let field = normalize_facet(field);
+        let prefix = normalize_facet(prefix.trim().trim_matches('\''));
+        if field.starts_with(&prefix) {
+            id = Some(document.id.clone());
+        }
     } else if let Some(("asc_desc_rank", filter)) = filter.split_once('<') {
         if document.asc_desc_rank < filter.parse().unwrap() {
             id = Some(document.id.clone())
@@ -271,6 +286,8 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> {
     } else if matches!(filter, "tag_in NOT IN[1, 2, 3, four, five]") {
         id = (!matches!(document.id.as_str(), "A" | "B" | "C" | "D" | "E"))
             .then(|| document.id.clone());
+    } else {
+        panic!("Unknown filter: {filter}");
     }
     id
 }