From e510d4a8a37904e08018ccb5abd57bacb5aeca6d Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 16 Jul 2025 00:50:03 +0200 Subject: [PATCH] add a new _geoPolygon filter to query the cellulite database --- Cargo.lock | 1 + crates/filter-parser/src/lib.rs | 46 +++++++++++++++++++++++-- crates/milli/Cargo.toml | 1 + crates/milli/src/search/facet/filter.rs | 33 +++++++++++++++++- 4 files changed, 78 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 775d4d023..fa73c52a1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4111,6 +4111,7 @@ dependencies = [ "flume", "fst", "fxhash", + "geo-types", "geojson", "geoutils", "grenad", diff --git a/crates/filter-parser/src/lib.rs b/crates/filter-parser/src/lib.rs index 938702103..67ac8a3a2 100644 --- a/crates/filter-parser/src/lib.rs +++ b/crates/filter-parser/src/lib.rs @@ -19,6 +19,7 @@ //! word = (alphanumeric | _ | - | .)+ //! geoRadius = "_geoRadius(" WS* float WS* "," WS* float WS* "," float WS* ")" //! geoBoundingBox = "_geoBoundingBox([" WS * float WS* "," WS* float WS* "], [" WS* float WS* "," WS* float WS* "]") +//! geoPolygon = "_geoPolygon([[" WS* float WS* "," WS* float WS* "],+])" //! ``` //! //! Other BNF grammar used to handle some specific errors: @@ -145,6 +146,7 @@ pub enum FilterCondition<'a> { And(Vec), GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> }, GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] }, + GeoPolygon { points: Vec<[Token<'a>; 2]> }, } pub enum TraversedElement<'a> { @@ -175,6 +177,7 @@ impl<'a> FilterCondition<'a> { } FilterCondition::GeoLowerThan { .. } | FilterCondition::GeoBoundingBox { .. } + | FilterCondition::GeoPolygon { .. } | FilterCondition::In { .. } => None, } } @@ -422,6 +425,38 @@ fn parse_geo_bounding_box(input: Span) -> IResult { Ok((input, res)) } +/// geoPolygon = "_geoPolygon([[" WS* float WS* "," WS* float WS* "],+])" +/// If we parse `_geoPolygon` we MUST parse the rest of the expression. +fn parse_geo_polygon(input: Span) -> IResult { + // we want to allow space BEFORE the _geoBoundingBox but not after + let parsed = preceded( + tuple((multispace0, word_exact("_geoPolygon"))), + // if we were able to parse `_geoPolygon` and can't parse the rest of the input we return a failure + cut(delimited( + char('('), + separated_list1( + tag(","), + ws(delimited(char('['), separated_list1(tag(","), ws(recognize_float)), char(']'))), + ), + char(')'), + )), + )(input) + .map_err(|e| e.map(|_| Error::new_from_kind(dbg!(input), ErrorKind::GeoBoundingBox))); + + let (input, args) = parsed?; + + // TODO: Return a more specific error + if args.len() <= 2 || args.iter().any(|a| a.len() != 2) { + println!("here"); + return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::GeoBoundingBox))); + } + + let res = FilterCondition::GeoPolygon { + points: args.into_iter().map(|a| [a[0].into(), a[1].into()]).collect(), + }; + Ok((input, res)) +} + /// geoPoint = WS* "_geoPoint(float WS* "," WS* float WS* "," WS* float) fn parse_geo_point(input: Span) -> IResult { // we want to forbid space BEFORE the _geoPoint but not after @@ -491,8 +526,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult { Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char())) }), ), - parse_geo_radius, - parse_geo_bounding_box, + // Made a random block of functions because we reached the maximum number of elements per alt + alt((parse_geo_radius, parse_geo_bounding_box, parse_geo_polygon)), parse_in, parse_not_in, parse_condition, @@ -573,6 +608,13 @@ impl std::fmt::Display for FilterCondition<'_> { bottom_right_point[1] ) } + FilterCondition::GeoPolygon { points } => { + write!(f, "_geoPolygon([")?; + for point in points { + write!(f, "[{}, {}], ", point[0], point[1])?; + } + write!(f, "])") + } } } } diff --git a/crates/milli/Cargo.toml b/crates/milli/Cargo.toml index 0e30f8fc7..2a5bb139b 100644 --- a/crates/milli/Cargo.toml +++ b/crates/milli/Cargo.toml @@ -112,6 +112,7 @@ utoipa = { version = "5.4.0", features = [ "openapi_extensions", ] } lru = "0.14.0" +geo-types = "0.7.16" [dev-dependencies] mimalloc = { version = "0.1.47", default-features = false } diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs index c3eba8031..b0a24d530 100644 --- a/crates/milli/src/search/facet/filter.rs +++ b/crates/milli/src/search/facet/filter.rs @@ -11,7 +11,7 @@ use roaring::{MultiOps, RoaringBitmap}; use serde_json::Value; use super::facet_range_search; -use crate::constants::RESERVED_GEO_FIELD_NAME; +use crate::constants::{RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME}; use crate::error::{Error, UserError}; use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features}; use crate::heed_codec::facet::{ @@ -778,6 +778,37 @@ impl<'a> Filter<'a> { ))? } } + FilterCondition::GeoPolygon { points } => { + if index.is_geojson_enabled(rtxn)? { + let polygon = geo_types::Polygon::new( + geo_types::LineString( + points + .iter() + .map(|p| { + Ok(geo_types::Coord { + x: p[0].parse_finite_float()?, + y: p[1].parse_finite_float()?, + }) + }) + .collect::>()?, + ), + Vec::new(), + ); + let cellulite = cellulite::Writer::new(index.cellulite); + let result = cellulite + .in_shape(rtxn, &polygon.into(), &mut |_| ()) + .map_err(InternalError::CelluliteError)?; + Ok(result) + } else { + Err(points[0][0].as_external_error(FilterError::AttributeNotFilterable { + attribute: RESERVED_GEOJSON_FIELD_NAME, + filterable_patterns: filtered_matching_patterns( + filterable_attribute_rules, + &|features| features.is_filterable(), + ), + }))? + } + } } } }