From 0319a46a7ab611ab78a85af4e22b3b7e7e260359 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 16 Jul 2025 00:50:03 +0200 Subject: [PATCH] add a new _geoPolygon filter to query the cellulite database --- Cargo.lock | 1 + crates/filter-parser/src/lib.rs | 46 +++++++++++++++++++++++-- crates/milli/Cargo.toml | 1 + crates/milli/src/search/facet/filter.rs | 35 ++++++++++++++++++- 4 files changed, 80 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d9a95ed66..e236f510a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4164,6 +4164,7 @@ dependencies = [ "flume", "fst", "fxhash", + "geo-types", "geojson", "geoutils", "grenad", diff --git a/crates/filter-parser/src/lib.rs b/crates/filter-parser/src/lib.rs index c46797dd2..c35f1622c 100644 --- a/crates/filter-parser/src/lib.rs +++ b/crates/filter-parser/src/lib.rs @@ -19,6 +19,7 @@ //! word = (alphanumeric | _ | - | .)+ //! geoRadius = "_geoRadius(" WS* float WS* "," WS* float WS* "," float WS* ")" //! geoBoundingBox = "_geoBoundingBox([" WS * float WS* "," WS* float WS* "], [" WS* float WS* "," WS* float WS* "]") +//! geoPolygon = "_geoPolygon([[" WS* float WS* "," WS* float WS* "],+])" //! ``` //! //! Other BNF grammar used to handle some specific errors: @@ -158,6 +159,7 @@ pub enum FilterCondition<'a> { VectorExists { fid: Token<'a>, embedder: Option>, filter: VectorFilter<'a> }, GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> }, GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] }, + GeoPolygon { points: Vec<[Token<'a>; 2]> }, } pub enum TraversedElement<'a> { @@ -202,6 +204,7 @@ impl<'a> FilterCondition<'a> { } FilterCondition::GeoLowerThan { .. } | FilterCondition::GeoBoundingBox { .. } + | FilterCondition::GeoPolygon { .. } | FilterCondition::In { .. } => None, FilterCondition::VectorExists { fid, .. } => Some(fid), } @@ -447,6 +450,38 @@ fn parse_geo_bounding_box(input: Span) -> IResult { Ok((input, res)) } +/// geoPolygon = "_geoPolygon([[" WS* float WS* "," WS* float WS* "],+])" +/// If we parse `_geoPolygon` we MUST parse the rest of the expression. +fn parse_geo_polygon(input: Span) -> IResult { + // we want to allow space BEFORE the _geoBoundingBox but not after + let parsed = preceded( + tuple((multispace0, word_exact("_geoPolygon"))), + // if we were able to parse `_geoPolygon` and can't parse the rest of the input we return a failure + cut(delimited( + char('('), + separated_list1( + tag(","), + ws(delimited(char('['), separated_list1(tag(","), ws(recognize_float)), char(']'))), + ), + char(')'), + )), + )(input) + .map_err(|e| e.map(|_| Error::new_from_kind(dbg!(input), ErrorKind::GeoBoundingBox))); + + let (input, args) = parsed?; + + // TODO: Return a more specific error + if args.len() <= 2 || args.iter().any(|a| a.len() != 2) { + println!("here"); + return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::GeoBoundingBox))); + } + + let res = FilterCondition::GeoPolygon { + points: args.into_iter().map(|a| [a[0].into(), a[1].into()]).collect(), + }; + Ok((input, res)) +} + /// geoPoint = WS* "_geoPoint(float WS* "," WS* float WS* "," WS* float) fn parse_geo_point(input: Span) -> IResult { // we want to forbid space BEFORE the _geoPoint but not after @@ -516,8 +551,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult { Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char())) }), ), - parse_geo_radius, - parse_geo_bounding_box, + // Made a random block of functions because we reached the maximum number of elements per alt + alt((parse_geo_radius, parse_geo_bounding_box, parse_geo_polygon)), parse_in, parse_not_in, parse_condition, @@ -613,6 +648,13 @@ impl std::fmt::Display for FilterCondition<'_> { bottom_right_point[1] ) } + FilterCondition::GeoPolygon { points } => { + write!(f, "_geoPolygon([")?; + for point in points { + write!(f, "[{}, {}], ", point[0], point[1])?; + } + write!(f, "])") + } } } } diff --git a/crates/milli/Cargo.toml b/crates/milli/Cargo.toml index 16a425564..68c18582c 100644 --- a/crates/milli/Cargo.toml +++ b/crates/milli/Cargo.toml @@ -119,6 +119,7 @@ twox-hash = { version = "2.1.1", default-features = false, features = [ "xxhash3_64", "xxhash64", ] } +geo-types = "0.7.16" [dev-dependencies] mimalloc = { version = "0.1.47", default-features = false } diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs index 29ebc187c..afdb32b34 100644 --- a/crates/milli/src/search/facet/filter.rs +++ b/crates/milli/src/search/facet/filter.rs @@ -12,7 +12,9 @@ use roaring::{MultiOps, RoaringBitmap}; use serde_json::Value; use super::facet_range_search; -use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME}; +use crate::constants::{ + RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME, +}; use crate::error::{Error, UserError}; use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features}; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; @@ -824,6 +826,37 @@ impl<'a> Filter<'a> { ))? } } + FilterCondition::GeoPolygon { points } => { + if index.is_geojson_enabled(rtxn)? { + let polygon = geo_types::Polygon::new( + geo_types::LineString( + points + .iter() + .map(|p| { + Ok(geo_types::Coord { + x: p[0].parse_finite_float()?, + y: p[1].parse_finite_float()?, + }) + }) + .collect::>()?, + ), + Vec::new(), + ); + let cellulite = cellulite::Writer::new(index.cellulite); + let result = cellulite + .in_shape(rtxn, &polygon.into(), &mut |_| ()) + .map_err(InternalError::CelluliteError)?; + Ok(result) + } else { + Err(points[0][0].as_external_error(FilterError::AttributeNotFilterable { + attribute: RESERVED_GEOJSON_FIELD_NAME, + filterable_patterns: filtered_matching_patterns( + filterable_attribute_rules, + &|features| features.is_filterable(), + ), + }))? + } + } } } }