This commit is contained in:
Mubelotix
2025-08-05 13:49:28 +02:00
parent c4e7bf2e60
commit 3a9b08960a
3 changed files with 31 additions and 5 deletions

View File

@ -17,7 +17,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
let path = tempfile::tempdir().unwrap(); let path = tempfile::tempdir().unwrap();
let options = EnvOpenOptions::new(); let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls(); let mut options = options.read_txn_without_tls();
options.map_size(10 * 1024 * 1024); // 10 MB options.map_size(10 * 1024 * 1024); // 10 MiB
let index = Index::new(options, &path, true).unwrap(); let index = Index::new(options, &path, true).unwrap();
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();

View File

@ -25,13 +25,16 @@ macro_rules! test_filter {
let SearchResult { documents_ids, .. } = search.execute().unwrap(); let SearchResult { documents_ids, .. } = search.execute().unwrap();
let filtered_ids = search::expected_filtered_ids($filter); let filtered_ids = search::expected_filtered_ids($filter);
let expected_external_ids: Vec<_> = let mut expected_external_ids: Vec<_> =
search::expected_order(&criteria, TermsMatchingStrategy::default(), &[]) search::expected_order(&criteria, TermsMatchingStrategy::default(), &[])
.into_iter() .into_iter()
.filter_map(|d| if filtered_ids.contains(&d.id) { Some(d.id) } else { None }) .filter_map(|d| if filtered_ids.contains(&d.id) { Some(d.id) } else { None })
.collect(); .collect();
let documents_ids = search::internal_to_external_ids(&index, &documents_ids); let mut documents_ids = search::internal_to_external_ids(&index, &documents_ids);
expected_external_ids.sort_unstable();
documents_ids.sort_unstable();
assert_eq!(documents_ids, expected_external_ids); assert_eq!(documents_ids, expected_external_ids);
} }
}; };
@ -102,3 +105,9 @@ test_filter!(empty_filter_1_double_not, vec![Right("NOT opt1 IS NOT EMPTY")]);
test_filter!(in_filter, vec![Right("tag_in IN[1, 2, 3, four, five]")]); test_filter!(in_filter, vec![Right("tag_in IN[1, 2, 3, four, five]")]);
test_filter!(not_in_filter, vec![Right("tag_in NOT IN[1, 2, 3, four, five]")]); test_filter!(not_in_filter, vec![Right("tag_in NOT IN[1, 2, 3, four, five]")]);
test_filter!(not_not_in_filter, vec![Right("NOT tag_in NOT IN[1, 2, 3, four, five]")]); test_filter!(not_not_in_filter, vec![Right("NOT tag_in NOT IN[1, 2, 3, four, five]")]);
test_filter!(starts_with_filter_single_letter, vec![Right("tag STARTS WITH e")]);
test_filter!(starts_with_filter_diacritic, vec![Right("tag STARTS WITH é")]);
test_filter!(starts_with_filter_empty_prefix, vec![Right("tag STARTS WITH ''")]);
test_filter!(starts_with_filter_hell, vec![Right("title STARTS WITH hell")]);
test_filter!(starts_with_filter_hello, vec![Right("title STARTS WITH hello")]);

View File

@ -12,7 +12,8 @@ use milli::update::new::indexer;
use milli::update::{IndexerConfig, Settings}; use milli::update::{IndexerConfig, Settings};
use milli::vector::RuntimeEmbedders; use milli::vector::RuntimeEmbedders;
use milli::{ use milli::{
AscDesc, Criterion, DocumentId, FilterableAttributesRule, Index, Member, TermsMatchingStrategy, normalize_facet, AscDesc, Criterion, DocumentId, FilterableAttributesRule, Index, Member,
TermsMatchingStrategy,
}; };
use serde::{Deserialize, Deserializer}; use serde::{Deserialize, Deserializer};
use slice_group_by::GroupBy; use slice_group_by::GroupBy;
@ -36,7 +37,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
let path = tempfile::tempdir().unwrap(); let path = tempfile::tempdir().unwrap();
let options = EnvOpenOptions::new(); let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls(); let mut options = options.read_txn_without_tls();
options.map_size(10 * 1024 * 1024); // 10 MB options.map_size(10 * 1024 * 1024); // 10 MiB
let index = Index::new(options, &path, true).unwrap(); let index = Index::new(options, &path, true).unwrap();
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
@ -46,6 +47,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
builder.set_criteria(criteria.to_vec()); builder.set_criteria(criteria.to_vec());
builder.set_filterable_fields(vec![ builder.set_filterable_fields(vec![
FilterableAttributesRule::Field(S("title")),
FilterableAttributesRule::Field(S("tag")), FilterableAttributesRule::Field(S("tag")),
FilterableAttributesRule::Field(S("asc_desc_rank")), FilterableAttributesRule::Field(S("asc_desc_rank")),
FilterableAttributesRule::Field(S("_geo")), FilterableAttributesRule::Field(S("_geo")),
@ -220,6 +222,19 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> {
{ {
id = Some(document.id.clone()) id = Some(document.id.clone())
} }
} else if let Some((field, prefix)) = filter.split_once("STARTS WITH") {
let field = match field.trim() {
"tag" => &document.tag,
"title" => &document.title,
"description" => &document.description,
_ => panic!("Unknown field: {field}"),
};
let field = normalize_facet(field);
let prefix = normalize_facet(prefix.trim().trim_matches('\''));
if field.starts_with(&prefix) {
id = Some(document.id.clone());
}
} else if let Some(("asc_desc_rank", filter)) = filter.split_once('<') { } else if let Some(("asc_desc_rank", filter)) = filter.split_once('<') {
if document.asc_desc_rank < filter.parse().unwrap() { if document.asc_desc_rank < filter.parse().unwrap() {
id = Some(document.id.clone()) id = Some(document.id.clone())
@ -271,6 +286,8 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> {
} else if matches!(filter, "tag_in NOT IN[1, 2, 3, four, five]") { } else if matches!(filter, "tag_in NOT IN[1, 2, 3, four, five]") {
id = (!matches!(document.id.as_str(), "A" | "B" | "C" | "D" | "E")) id = (!matches!(document.id.as_str(), "A" | "B" | "C" | "D" | "E"))
.then(|| document.id.clone()); .then(|| document.id.clone());
} else {
panic!("Unknown filter: {filter}");
} }
id id
} }