add a new _geoPolygon filter to query the cellulite database

This commit is contained in:
Tamo
2025-07-16 00:50:03 +02:00
parent 05a13f662d
commit e510d4a8a3
4 changed files with 78 additions and 3 deletions

1
Cargo.lock generated
View File

@ -4111,6 +4111,7 @@ dependencies = [
"flume", "flume",
"fst", "fst",
"fxhash", "fxhash",
"geo-types",
"geojson", "geojson",
"geoutils", "geoutils",
"grenad", "grenad",

View File

@ -19,6 +19,7 @@
//! word = (alphanumeric | _ | - | .)+ //! word = (alphanumeric | _ | - | .)+
//! geoRadius = "_geoRadius(" WS* float WS* "," WS* float WS* "," float WS* ")" //! geoRadius = "_geoRadius(" WS* float WS* "," WS* float WS* "," float WS* ")"
//! geoBoundingBox = "_geoBoundingBox([" WS * float WS* "," WS* float WS* "], [" WS* float WS* "," WS* float WS* "]") //! geoBoundingBox = "_geoBoundingBox([" WS * float WS* "," WS* float WS* "], [" WS* float WS* "," WS* float WS* "]")
//! geoPolygon = "_geoPolygon([[" WS* float WS* "," WS* float WS* "],+])"
//! ``` //! ```
//! //!
//! Other BNF grammar used to handle some specific errors: //! Other BNF grammar used to handle some specific errors:
@ -145,6 +146,7 @@ pub enum FilterCondition<'a> {
And(Vec<Self>), And(Vec<Self>),
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> }, GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] }, GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] },
GeoPolygon { points: Vec<[Token<'a>; 2]> },
} }
pub enum TraversedElement<'a> { pub enum TraversedElement<'a> {
@ -175,6 +177,7 @@ impl<'a> FilterCondition<'a> {
} }
FilterCondition::GeoLowerThan { .. } FilterCondition::GeoLowerThan { .. }
| FilterCondition::GeoBoundingBox { .. } | FilterCondition::GeoBoundingBox { .. }
| FilterCondition::GeoPolygon { .. }
| FilterCondition::In { .. } => None, | FilterCondition::In { .. } => None,
} }
} }
@ -422,6 +425,38 @@ fn parse_geo_bounding_box(input: Span) -> IResult<FilterCondition> {
Ok((input, res)) Ok((input, res))
} }
/// geoPolygon = "_geoPolygon([[" WS* float WS* "," WS* float WS* "],+])"
/// If we parse `_geoPolygon` we MUST parse the rest of the expression.
fn parse_geo_polygon(input: Span) -> IResult<FilterCondition> {
// we want to allow space BEFORE the _geoBoundingBox but not after
let parsed = preceded(
tuple((multispace0, word_exact("_geoPolygon"))),
// if we were able to parse `_geoPolygon` and can't parse the rest of the input we return a failure
cut(delimited(
char('('),
separated_list1(
tag(","),
ws(delimited(char('['), separated_list1(tag(","), ws(recognize_float)), char(']'))),
),
char(')'),
)),
)(input)
.map_err(|e| e.map(|_| Error::new_from_kind(dbg!(input), ErrorKind::GeoBoundingBox)));
let (input, args) = parsed?;
// TODO: Return a more specific error
if args.len() <= 2 || args.iter().any(|a| a.len() != 2) {
println!("here");
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::GeoBoundingBox)));
}
let res = FilterCondition::GeoPolygon {
points: args.into_iter().map(|a| [a[0].into(), a[1].into()]).collect(),
};
Ok((input, res))
}
/// geoPoint = WS* "_geoPoint(float WS* "," WS* float WS* "," WS* float) /// geoPoint = WS* "_geoPoint(float WS* "," WS* float WS* "," WS* float)
fn parse_geo_point(input: Span) -> IResult<FilterCondition> { fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
// we want to forbid space BEFORE the _geoPoint but not after // we want to forbid space BEFORE the _geoPoint but not after
@ -491,8 +526,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char())) Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char()))
}), }),
), ),
parse_geo_radius, // Made a random block of functions because we reached the maximum number of elements per alt
parse_geo_bounding_box, alt((parse_geo_radius, parse_geo_bounding_box, parse_geo_polygon)),
parse_in, parse_in,
parse_not_in, parse_not_in,
parse_condition, parse_condition,
@ -573,6 +608,13 @@ impl std::fmt::Display for FilterCondition<'_> {
bottom_right_point[1] bottom_right_point[1]
) )
} }
FilterCondition::GeoPolygon { points } => {
write!(f, "_geoPolygon([")?;
for point in points {
write!(f, "[{}, {}], ", point[0], point[1])?;
}
write!(f, "])")
}
} }
} }
} }

View File

@ -112,6 +112,7 @@ utoipa = { version = "5.4.0", features = [
"openapi_extensions", "openapi_extensions",
] } ] }
lru = "0.14.0" lru = "0.14.0"
geo-types = "0.7.16"
[dev-dependencies] [dev-dependencies]
mimalloc = { version = "0.1.47", default-features = false } mimalloc = { version = "0.1.47", default-features = false }

View File

@ -11,7 +11,7 @@ use roaring::{MultiOps, RoaringBitmap};
use serde_json::Value; use serde_json::Value;
use super::facet_range_search; use super::facet_range_search;
use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::constants::{RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME};
use crate::error::{Error, UserError}; use crate::error::{Error, UserError};
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features}; use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
use crate::heed_codec::facet::{ use crate::heed_codec::facet::{
@ -778,6 +778,37 @@ impl<'a> Filter<'a> {
))? ))?
} }
} }
FilterCondition::GeoPolygon { points } => {
if index.is_geojson_enabled(rtxn)? {
let polygon = geo_types::Polygon::new(
geo_types::LineString(
points
.iter()
.map(|p| {
Ok(geo_types::Coord {
x: p[0].parse_finite_float()?,
y: p[1].parse_finite_float()?,
})
})
.collect::<Result<_, filter_parser::Error>>()?,
),
Vec::new(),
);
let cellulite = cellulite::Writer::new(index.cellulite);
let result = cellulite
.in_shape(rtxn, &polygon.into(), &mut |_| ())
.map_err(InternalError::CelluliteError)?;
Ok(result)
} else {
Err(points[0][0].as_external_error(FilterError::AttributeNotFilterable {
attribute: RESERVED_GEOJSON_FIELD_NAME,
filterable_patterns: filtered_matching_patterns(
filterable_attribute_rules,
&|features| features.is_filterable(),
),
}))?
}
}
} }
} }
} }