Merge pull request #5548 from lblack00/attributes-to-search-on-nested-fields

Added support for nested wildcards to attributes_to_search_on
This commit is contained in:
Many the fish 2025-05-22 13:58:23 +00:00 committed by GitHub
commit 97aeb6db4d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 403 additions and 4 deletions

View File

@ -416,3 +416,381 @@ async fn phrase_search_on_title() {
) )
.await; .await;
} }
static NESTED_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"details": {
"title": "Shazam!",
"desc": "a Captain Marvel ersatz",
"weaknesses": ["magic", "requires transformation"],
"outfit": {
"has_cape": true,
"colors": {
"primary": "red",
"secondary": "gold"
}
}
},
"id": "1",
},
{
"details": {
"title": "Captain Planet",
"desc": "He's not part of the Marvel Cinematic Universe",
"blue_skin": true,
"outfit": {
"has_cape": false
}
},
"id": "2",
},
{
"details": {
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"weaknesses": ["magic", "power instability"],
"outfit": {
"has_cape": false
}
},
"id": "3",
}])
});
#[actix_rt::test]
async fn nested_search_on_title_with_prefix_wildcard() {
let server = Server::new().await;
let index = index_with_documents(&server, &NESTED_SEARCH_DOCUMENTS).await;
// Wildcard should match to 'details.' attribute
index
.search(
json!({"q": "Captain Marvel", "attributesToSearchOn": ["*.title"], "attributesToRetrieve": ["id"]}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]),
@r###"
[
{
"id": "3"
},
{
"id": "2"
}
]"###);
},
)
.await;
}
#[actix_rt::test]
async fn nested_search_with_suffix_wildcard() {
let server = Server::new().await;
let index = index_with_documents(&server, &NESTED_SEARCH_DOCUMENTS).await;
// Wildcard should match to any attribute inside 'details.'
// It's worth noting the difference between 'details.*' and '*.title'
index
.search(
json!({"q": "Captain Marvel", "attributesToSearchOn": ["details.*"], "attributesToRetrieve": ["id"]}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]),
@r###"
[
{
"id": "3"
},
{
"id": "1"
},
{
"id": "2"
}
]"###);
},
)
.await;
// Should return 1 document (ids: 1)
index
.search(
json!({"q": "gold", "attributesToSearchOn": ["details.*"], "attributesToRetrieve": ["id"]}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]),
@r###"
[
{
"id": "1"
}
]"###);
},
)
.await;
// Should return 2 documents (ids: 1 and 2)
index
.search(
json!({"q": "true", "attributesToSearchOn": ["details.*"], "attributesToRetrieve": ["id"]}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]),
@r###"
[
{
"id": "1"
},
{
"id": "2"
}
]"###);
},
)
.await;
}
#[actix_rt::test]
async fn nested_search_on_title_restricted_set_with_suffix_wildcard() {
let server = Server::new().await;
let index = index_with_documents(&server, &NESTED_SEARCH_DOCUMENTS).await;
let (task, _status_code) =
index.update_settings_searchable_attributes(json!(["details.title"])).await;
index.wait_task(task.uid()).await.succeeded();
index
.search(
json!({"q": "Captain Marvel", "attributesToSearchOn": ["details.*"], "attributesToRetrieve": ["id"]}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]),
@r###"
[
{
"id": "3"
},
{
"id": "2"
}
]"###);
},
)
.await;
}
#[actix_rt::test]
async fn nested_search_no_searchable_attribute_set_with_any_wildcard() {
let server = Server::new().await;
let index = index_with_documents(&server, &NESTED_SEARCH_DOCUMENTS).await;
index
.search(
json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown.*", "*.unknown"]}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"].as_array().unwrap().len(), @"0");
},
)
.await;
let (task, _status_code) = index.update_settings_searchable_attributes(json!(["*"])).await;
index.wait_task(task.uid()).await.succeeded();
index
.search(
json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown.*", "*.unknown"]}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"].as_array().unwrap().len(), @"0");
},
)
.await;
let (task, _status_code) = index.update_settings_searchable_attributes(json!(["*"])).await;
index.wait_task(task.uid()).await.succeeded();
index
.search(
json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown.*", "*.unknown", "*.title"], "attributesToRetrieve": ["id"]}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]),
@r###"
[
{
"id": "3"
},
{
"id": "2"
}
]"###);
},
)
.await;
}
#[actix_rt::test]
async fn nested_prefix_search_on_title_with_prefix_wildcard() {
let server = Server::new().await;
let index = index_with_documents(&server, &NESTED_SEARCH_DOCUMENTS).await;
// Nested prefix search with prefix wildcard should return 2 documents (ids: 2 and 3).
index
.search(
json!({"q": "Captain Mar", "attributesToSearchOn": ["*.title"], "attributesToRetrieve": ["id"]}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]),
@r###"
[
{
"id": "3"
},
{
"id": "2"
}
]"###);
},
)
.await;
}
#[actix_rt::test]
async fn nested_prefix_search_on_details_with_suffix_wildcard() {
let server = Server::new().await;
let index = index_with_documents(&server, &NESTED_SEARCH_DOCUMENTS).await;
index
.search(
json!({"q": "Captain Mar", "attributesToSearchOn": ["details.*"], "attributesToRetrieve": ["id"]}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]),
@r###"
[
{
"id": "3"
},
{
"id": "1"
},
{
"id": "2"
}
]"###);
},
)
.await;
}
#[actix_rt::test]
async fn nested_prefix_search_on_weaknesses_with_suffix_wildcard() {
let server = Server::new().await;
let index = index_with_documents(&server, &NESTED_SEARCH_DOCUMENTS).await;
// Wildcard search on nested weaknesses should return 2 documents (ids: 1 and 3)
index
.search(
json!({"q": "mag", "attributesToSearchOn": ["details.*"], "attributesToRetrieve": ["id"]}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]),
@r###"
[
{
"id": "1"
},
{
"id": "3"
}
]"###);
},
)
.await;
}
#[actix_rt::test]
async fn nested_search_on_title_matching_strategy_all() {
let server = Server::new().await;
let index = index_with_documents(&server, &NESTED_SEARCH_DOCUMENTS).await;
// Nested search matching strategy all should only return 1 document (ids: 3)
index
.search(
json!({"q": "Captain Marvel", "attributesToSearchOn": ["*.title"], "matchingStrategy": "all", "attributesToRetrieve": ["id"]}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]),
@r###"
[
{
"id": "3"
}
]"###);
},
)
.await;
}
#[actix_rt::test]
async fn nested_attributes_ranking_rule_order_with_prefix_wildcard() {
let server = Server::new().await;
let index = index_with_documents(&server, &NESTED_SEARCH_DOCUMENTS).await;
// Document 3 should appear before documents 1 and 2
index
.search(
json!({"q": "Captain Marvel", "attributesToSearchOn": ["*.desc", "*.title"], "attributesToRetrieve": ["id"]}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]),
@r###"
[
{
"id": "3"
},
{
"id": "1"
},
{
"id": "2"
}
]
"###
);
},
)
.await;
}
#[actix_rt::test]
async fn nested_attributes_ranking_rule_order_with_suffix_wildcard() {
let server = Server::new().await;
let index = index_with_documents(&server, &NESTED_SEARCH_DOCUMENTS).await;
// Document 3 should appear before documents 1 and 2
index
.search(
json!({"q": "Captain Marvel", "attributesToSearchOn": ["details.*"], "attributesToRetrieve": ["id"]}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]),
@r###"
[
{
"id": "3"
},
{
"id": "1"
},
{
"id": "2"
}
]
"###
);
},
)
.await;
}

View File

@ -50,7 +50,7 @@ impl AttributePatterns {
/// ///
/// * `pattern` - The pattern to match against. /// * `pattern` - The pattern to match against.
/// * `str` - The string to match against the pattern. /// * `str` - The string to match against the pattern.
fn match_pattern(pattern: &str, str: &str) -> PatternMatch { pub fn match_pattern(pattern: &str, str: &str) -> PatternMatch {
// If the pattern is a wildcard, return Match // If the pattern is a wildcard, return Match
if pattern == "*" { if pattern == "*" {
return PatternMatch::Match; return PatternMatch::Match;

View File

@ -52,6 +52,7 @@ pub use self::geo_sort::Strategy as GeoSortStrategy;
use self::graph_based_ranking_rule::Words; use self::graph_based_ranking_rule::Words;
use self::interner::Interned; use self::interner::Interned;
use self::vector_sort::VectorSort; use self::vector_sort::VectorSort;
use crate::attribute_patterns::{match_pattern, PatternMatch};
use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::index::PrefixSearch; use crate::index::PrefixSearch;
use crate::localized_attributes_rules::LocalizedFieldIds; use crate::localized_attributes_rules::LocalizedFieldIds;
@ -120,17 +121,37 @@ impl<'ctx> SearchContext<'ctx> {
let searchable_fields_weights = self.index.searchable_fields_and_weights(self.txn)?; let searchable_fields_weights = self.index.searchable_fields_and_weights(self.txn)?;
let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?; let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?;
let mut wildcard = false; let mut universal_wildcard = false;
let mut restricted_fids = RestrictedFids::default(); let mut restricted_fids = RestrictedFids::default();
for field_name in attributes_to_search_on { for field_name in attributes_to_search_on {
if field_name == "*" { if field_name == "*" {
wildcard = true; universal_wildcard = true;
// we cannot early exit as we want to returns error in case of unknown fields // we cannot early exit as we want to returns error in case of unknown fields
continue; continue;
} }
let searchable_weight = let searchable_weight =
searchable_fields_weights.iter().find(|(name, _, _)| name == field_name); searchable_fields_weights.iter().find(|(name, _, _)| name == field_name);
// The field is not searchable but may contain a wildcard pattern
if searchable_weight.is_none() && field_name.contains("*") {
let matching_searchable_weights: Vec<_> = searchable_fields_weights
.iter()
.filter(|(name, _, _)| match_pattern(field_name, name) == PatternMatch::Match)
.collect();
if !matching_searchable_weights.is_empty() {
for (_name, fid, weight) in matching_searchable_weights {
if exact_attributes_ids.contains(fid) {
restricted_fids.exact.push((*fid, *weight));
} else {
restricted_fids.tolerant.push((*fid, *weight));
}
}
continue;
}
}
let (fid, weight) = match searchable_weight { let (fid, weight) = match searchable_weight {
// The Field id exist and the field is searchable // The Field id exist and the field is searchable
Some((_name, fid, weight)) => (*fid, *weight), Some((_name, fid, weight)) => (*fid, *weight),
@ -160,7 +181,7 @@ impl<'ctx> SearchContext<'ctx> {
}; };
} }
if wildcard { if universal_wildcard {
self.restricted_fids = None; self.restricted_fids = None;
} else { } else {
self.restricted_fids = Some(restricted_fids); self.restricted_fids = Some(restricted_fids);