From 3a9b08960abe17e46f31cf0196c2a8453df68772 Mon Sep 17 00:00:00 2001 From: Mubelotix Date: Tue, 5 Aug 2025 13:49:28 +0200 Subject: [PATCH] Add test --- .../milli/src/search/new/tests/integration.rs | 2 +- crates/milli/tests/search/filters.rs | 13 ++++++++++-- crates/milli/tests/search/mod.rs | 21 +++++++++++++++++-- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/crates/milli/src/search/new/tests/integration.rs b/crates/milli/src/search/new/tests/integration.rs index 38f39e18b..6b8c25ab8 100644 --- a/crates/milli/src/search/new/tests/integration.rs +++ b/crates/milli/src/search/new/tests/integration.rs @@ -17,7 +17,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { let path = tempfile::tempdir().unwrap(); let options = EnvOpenOptions::new(); let mut options = options.read_txn_without_tls(); - options.map_size(10 * 1024 * 1024); // 10 MB + options.map_size(10 * 1024 * 1024); // 10 MiB let index = Index::new(options, &path, true).unwrap(); let mut wtxn = index.write_txn().unwrap(); diff --git a/crates/milli/tests/search/filters.rs b/crates/milli/tests/search/filters.rs index bb5943782..c97143d48 100644 --- a/crates/milli/tests/search/filters.rs +++ b/crates/milli/tests/search/filters.rs @@ -25,13 +25,16 @@ macro_rules! test_filter { let SearchResult { documents_ids, .. } = search.execute().unwrap(); let filtered_ids = search::expected_filtered_ids($filter); - let expected_external_ids: Vec<_> = + let mut expected_external_ids: Vec<_> = search::expected_order(&criteria, TermsMatchingStrategy::default(), &[]) .into_iter() .filter_map(|d| if filtered_ids.contains(&d.id) { Some(d.id) } else { None }) .collect(); - let documents_ids = search::internal_to_external_ids(&index, &documents_ids); + let mut documents_ids = search::internal_to_external_ids(&index, &documents_ids); + + expected_external_ids.sort_unstable(); + documents_ids.sort_unstable(); assert_eq!(documents_ids, expected_external_ids); } }; @@ -102,3 +105,9 @@ test_filter!(empty_filter_1_double_not, vec![Right("NOT opt1 IS NOT EMPTY")]); test_filter!(in_filter, vec![Right("tag_in IN[1, 2, 3, four, five]")]); test_filter!(not_in_filter, vec![Right("tag_in NOT IN[1, 2, 3, four, five]")]); test_filter!(not_not_in_filter, vec![Right("NOT tag_in NOT IN[1, 2, 3, four, five]")]); + +test_filter!(starts_with_filter_single_letter, vec![Right("tag STARTS WITH e")]); +test_filter!(starts_with_filter_diacritic, vec![Right("tag STARTS WITH é")]); +test_filter!(starts_with_filter_empty_prefix, vec![Right("tag STARTS WITH ''")]); +test_filter!(starts_with_filter_hell, vec![Right("title STARTS WITH hell")]); +test_filter!(starts_with_filter_hello, vec![Right("title STARTS WITH hello")]); diff --git a/crates/milli/tests/search/mod.rs b/crates/milli/tests/search/mod.rs index fa03f1cc1..578a22009 100644 --- a/crates/milli/tests/search/mod.rs +++ b/crates/milli/tests/search/mod.rs @@ -12,7 +12,8 @@ use milli::update::new::indexer; use milli::update::{IndexerConfig, Settings}; use milli::vector::RuntimeEmbedders; use milli::{ - AscDesc, Criterion, DocumentId, FilterableAttributesRule, Index, Member, TermsMatchingStrategy, + normalize_facet, AscDesc, Criterion, DocumentId, FilterableAttributesRule, Index, Member, + TermsMatchingStrategy, }; use serde::{Deserialize, Deserializer}; use slice_group_by::GroupBy; @@ -36,7 +37,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { let path = tempfile::tempdir().unwrap(); let options = EnvOpenOptions::new(); let mut options = options.read_txn_without_tls(); - options.map_size(10 * 1024 * 1024); // 10 MB + options.map_size(10 * 1024 * 1024); // 10 MiB let index = Index::new(options, &path, true).unwrap(); let mut wtxn = index.write_txn().unwrap(); @@ -46,6 +47,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { builder.set_criteria(criteria.to_vec()); builder.set_filterable_fields(vec![ + FilterableAttributesRule::Field(S("title")), FilterableAttributesRule::Field(S("tag")), FilterableAttributesRule::Field(S("asc_desc_rank")), FilterableAttributesRule::Field(S("_geo")), @@ -220,6 +222,19 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option { { id = Some(document.id.clone()) } + } else if let Some((field, prefix)) = filter.split_once("STARTS WITH") { + let field = match field.trim() { + "tag" => &document.tag, + "title" => &document.title, + "description" => &document.description, + _ => panic!("Unknown field: {field}"), + }; + + let field = normalize_facet(field); + let prefix = normalize_facet(prefix.trim().trim_matches('\'')); + if field.starts_with(&prefix) { + id = Some(document.id.clone()); + } } else if let Some(("asc_desc_rank", filter)) = filter.split_once('<') { if document.asc_desc_rank < filter.parse().unwrap() { id = Some(document.id.clone()) @@ -271,6 +286,8 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option { } else if matches!(filter, "tag_in NOT IN[1, 2, 3, four, five]") { id = (!matches!(document.id.as_str(), "A" | "B" | "C" | "D" | "E")) .then(|| document.id.clone()); + } else { + panic!("Unknown filter: {filter}"); } id }