Revert metadata creation when computing facet search and distinct

This commit is contained in:
ManyTheFish
2025-03-10 15:27:17 +01:00
parent b12ffd1356
commit abef655849
6 changed files with 45 additions and 156 deletions

View File

@@ -548,7 +548,7 @@ async fn facet_search_with_filterable_attributes_rules_errors() {
&json!({"facetName": "invalid", "facetQuery": "a"}),
|response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(response["message"], @r###""Attribute `invalid` is not facet-searchable. Available facet-searchable attributes are: `genres`. To make it facet-searchable add it to the `filterableAttributes` index settings.""###);
snapshot!(response["message"], @r###""Attribute `invalid` is not facet-searchable. Available facet-searchable attributes patterns are: `genres`. To make it facet-searchable add it to the `filterableAttributes` index settings.""###);
},
)
.await;

View File

@@ -158,28 +158,32 @@ and can not be more than 511 bytes.", .document_id.to_string()
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
#[error("Attribute `{}` is not filterable and thus, cannot be used as distinct attribute. {}",
.field,
match .valid_fields.is_empty() {
match .valid_patterns.is_empty() {
true => "This index does not have configured filterable attributes.".to_string(),
false => format!("Available filterable attributes are: `{}{}`.",
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
false => format!("Available filterable attributes patterns are: `{}{}`.",
valid_patterns.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
.hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
),
}
)]
InvalidDistinctAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
InvalidDistinctAttribute {
field: String,
valid_patterns: BTreeSet<String>,
hidden_fields: bool,
},
#[error("Attribute `{}` is not facet-searchable. {}",
.field,
match .valid_fields.is_empty() {
match .valid_patterns.is_empty() {
true => "This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.".to_string(),
false => format!("Available facet-searchable attributes are: `{}{}`. To make it facet-searchable add it to the `filterableAttributes` index settings.",
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
false => format!("Available facet-searchable attributes patterns are: `{}{}`. To make it facet-searchable add it to the `filterableAttributes` index settings.",
valid_patterns.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
.hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
),
}
)]
InvalidFacetSearchFacetName {
field: String,
valid_fields: BTreeSet<String>,
valid_patterns: BTreeSet<String>,
hidden_fields: bool,
},
#[error("Attribute `{}` is not searchable. Available searchable attributes are: `{}{}`.",

View File

@@ -6,7 +6,6 @@ use utoipa::ToSchema;
use crate::{
attribute_patterns::{match_distinct_field, match_field_legacy, PatternMatch},
constants::RESERVED_GEO_FIELD_NAME,
fields_ids_map::metadata::FieldIdMapWithMetadata,
AttributePatterns,
};
@@ -225,30 +224,6 @@ impl Default for FilterFeatures {
}
}
/// Match a field against a set of filterable attributes rules.
///
/// This function will return the set of field names that match the given filter.
///
/// # Arguments
///
/// * `filterable_attributes` - The set of filterable attributes rules to match against.
/// * `fields_ids_map` - The map of field names to field ids.
/// * `filter` - The filter function to apply to the filterable attributes rules.
pub fn filtered_matching_field_names<'fim>(
filterable_attributes: &[FilterableAttributesRule],
fields_ids_map: &'fim FieldIdMapWithMetadata,
filter: &impl Fn(FilterableAttributesFeatures) -> bool,
) -> BTreeSet<&'fim str> {
let mut result = BTreeSet::new();
for (_, field_name, metadata) in fields_ids_map.iter() {
let features = metadata.filterable_attributes_features(filterable_attributes);
if filter(features) {
result.insert(field_name);
}
}
result
}
/// Match a field against a set of filterable attributes rules.
///
/// This function will return the set of patterns that match the given filter.
@@ -306,34 +281,6 @@ pub fn matching_features(
None
}
/// Check if a field is filterable calling the method `FilterableAttributesFeatures::is_filterable()`.
///
/// # Arguments
///
/// * `field_name` - The field name to check.
/// * `filterable_attributes` - The set of filterable attributes rules to match against.
pub fn is_field_filterable(
field_name: &str,
filterable_attributes: &[FilterableAttributesRule],
) -> bool {
matching_features(field_name, filterable_attributes)
.map_or(false, |(_, features)| features.is_filterable())
}
/// Check if a field is facet searchable calling the method `FilterableAttributesFeatures::is_facet_searchable()`.
///
/// # Arguments
///
/// * `field_name` - The field name to check.
/// * `filterable_attributes` - The set of filterable attributes rules to match against.
pub fn is_field_facet_searchable(
field_name: &str,
filterable_attributes: &[FilterableAttributesRule],
) -> bool {
matching_features(field_name, filterable_attributes)
.map_or(false, |(_, features)| features.is_facet_searchable())
}
/// Match a field against a set of filterable, facet searchable fields, distinct field, sortable fields, and asc_desc fields.
pub fn match_faceted_field(
field_name: &str,

View File

@@ -10,9 +10,7 @@ use roaring::RoaringBitmap;
use tracing::error;
use crate::error::UserError;
use crate::filterable_attributes_rules::{
filtered_matching_field_names, is_field_facet_searchable,
};
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
use crate::search::build_dfa;
use crate::{DocumentId, FieldId, OrderBy, Result, Search};
@@ -77,37 +75,27 @@ impl<'a> SearchForFacetValues<'a> {
let rtxn = self.search_query.rtxn;
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
let fields_ids_map = index.fields_ids_map_with_metadata(rtxn)?;
let fid = match fields_ids_map.id_with_metadata(&self.facet) {
Some((fid, metadata))
if metadata
.filterable_attributes_features(&filterable_attributes_rules)
.is_facet_searchable() =>
if !matching_features(&self.facet, &filterable_attributes_rules)
.map_or(false, |(_, features)| features.is_facet_searchable())
{
fid
}
// we return an empty list of results when the attribute has been
// set as filterable but no document contains this field (yet).
None if is_field_facet_searchable(&self.facet, &filterable_attributes_rules) => {
return Ok(Vec::new());
}
// we return an error when the attribute is not facet searchable
_otherwise => {
let matching_field_names = filtered_matching_field_names(
&filterable_attributes_rules,
&fields_ids_map,
&|features| features.is_facet_searchable(),
);
let (valid_fields, hidden_fields) =
let matching_field_names =
filtered_matching_patterns(&filterable_attributes_rules, &|features| {
features.is_facet_searchable()
});
let (valid_patterns, hidden_fields) =
index.remove_hidden_fields(rtxn, matching_field_names)?;
return Err(UserError::InvalidFacetSearchFacetName {
field: self.facet.clone(),
valid_fields,
valid_patterns,
hidden_fields,
}
.into());
}
};
let fields_ids_map = index.fields_ids_map(rtxn)?;
let Some(fid) = fields_ids_map.id(&self.facet) else {
return Ok(Vec::new());
};
let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? {

View File

@@ -9,7 +9,7 @@ use roaring::bitmap::RoaringBitmap;
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
use self::new::{execute_vector_search, PartialSearchResult};
use crate::filterable_attributes_rules::{filtered_matching_field_names, is_field_filterable};
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::vector::Embedder;
use crate::{
@@ -190,20 +190,20 @@ impl<'a> Search<'a> {
if let Some(distinct) = &self.distinct {
let filterable_fields = ctx.index.filterable_attributes_rules(ctx.txn)?;
// check if the distinct field is in the filterable fields
if !is_field_filterable(distinct, &filterable_fields) {
if !matching_features(distinct, &filterable_fields)
.map_or(false, |(_, features)| features.is_filterable())
{
// if not, remove the hidden fields from the filterable fields to generate the error message
let fields_ids_map = ctx.index.fields_ids_map_with_metadata(ctx.txn)?;
let matching_field_names = filtered_matching_field_names(
&filterable_fields,
&fields_ids_map,
&|features| features.is_filterable(),
);
let (valid_fields, hidden_fields) =
ctx.index.remove_hidden_fields(ctx.txn, matching_field_names)?;
let matching_patterns =
filtered_matching_patterns(&filterable_fields, &|features| {
features.is_filterable()
});
let (valid_patterns, hidden_fields) =
ctx.index.remove_hidden_fields(ctx.txn, matching_patterns)?;
// and return the error
return Err(Error::UserError(UserError::InvalidDistinctAttribute {
field: distinct.clone(),
valid_fields,
valid_patterns,
hidden_fields,
}));
}

View File

@@ -770,12 +770,11 @@ mod tests {
use bumpalo::Bump;
use fst::IntoStreamer;
use heed::RwTxn;
use maplit::{btreeset, hashset};
use maplit::hashset;
use super::*;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::documents::mmap_from_objects;
use crate::filterable_attributes_rules::filtered_matching_field_names;
use crate::index::tests::TempIndex;
use crate::index::IndexEmbeddingConfig;
use crate::progress::Progress;
@@ -1255,14 +1254,6 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
});
assert_eq!(facets, btreeset!("title", "nested.object", "nested.machin"));
// testing the simple query search
let mut search = crate::Search::new(&rtxn, &index);
search.query("document");
@@ -1478,15 +1469,6 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
});
assert_eq!(facets, btreeset!("dog", "dog.race", "dog.race.bernese mountain"));
for (s, i) in [("zeroth", 0), ("first", 1), ("second", 2), ("third", 3)] {
let mut search = crate::Search::new(&rtxn, &index);
let filter = format!(r#""dog.race.bernese mountain" = {s}"#);
@@ -1504,17 +1486,6 @@ mod tests {
db_snap!(index, facet_id_string_docids, @"");
db_snap!(index, field_id_docid_facet_strings, @"");
let rtxn = index.read_txn().unwrap();
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
});
assert_eq!(facets, btreeset!());
// update the settings to test the sortable
index
.update_settings(|settings| {
@@ -1744,13 +1715,6 @@ mod tests {
let check_ok = |index: &Index| {
let rtxn = index.read_txn().unwrap();
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
});
assert_eq!(facets, btreeset!("colour", "colour.green", "colour.green.blue"));
let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
@@ -1855,13 +1819,6 @@ mod tests {
let check_ok = |index: &Index| {
let rtxn = index.read_txn().unwrap();
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
});
assert_eq!(facets, btreeset!("colour", "colour.green", "colour.green.blue"));
let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
@@ -1924,13 +1881,6 @@ mod tests {
let check_ok = |index: &Index| {
let rtxn = index.read_txn().unwrap();
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
});
assert_eq!(facets, btreeset!("tags", "tags.green", "tags.green.blue"));
let tags_id = index.fields_ids_map(&rtxn).unwrap().id("tags").unwrap();
let tags_green_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green").unwrap();