3405: Implement geo bounding box r=irevoire a=curquiza

Following https://github.com/meilisearch/milli/pull/672 (work from `@gmourier)`

Fixes #2761

Co-authored-by: Guillaume Mourier <guillaume@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
bors[bot]
2023-02-07 09:55:20 +00:00
committed by GitHub
10 changed files with 481 additions and 34 deletions

View File

@ -55,6 +55,9 @@ impl From<AscDescError> for CriterionError {
AscDescError::ReservedKeyword { name } if name.starts_with("_geoRadius") => {
CriterionError::ReservedNameForFilter { name: "_geoRadius".to_string() }
}
AscDescError::ReservedKeyword { name } if name.starts_with("_geoBoundingBox") => {
CriterionError::ReservedNameForFilter { name: "_geoBoundingBox".to_string() }
}
AscDescError::ReservedKeyword { name } => CriterionError::ReservedName { name },
}
}
@ -89,7 +92,10 @@ impl FromStr for Member {
Ok(Member::Geo([lat, lng]))
}
None => {
if is_reserved_keyword(text) || text.starts_with("_geoRadius(") {
if is_reserved_keyword(text)
|| text.starts_with("_geoRadius(")
|| text.starts_with("_geoBoundingBox(")
{
return Err(AscDescError::ReservedKeyword { name: text.to_string() })?;
}
Ok(Member::Field(text.to_string()))
@ -190,6 +196,9 @@ impl From<AscDescError> for SortError {
AscDescError::ReservedKeyword { name } if name.starts_with("_geoRadius") => {
SortError::ReservedNameForFilter { name: String::from("_geoRadius") }
}
AscDescError::ReservedKeyword { name } if name.starts_with("_geoBoundingBox") => {
SortError::ReservedNameForFilter { name: String::from("_geoBoundingBox") }
}
AscDescError::ReservedKeyword { name } => SortError::ReservedName { name },
}
}

View File

@ -159,6 +159,11 @@ mod tests {
("_geoPoint(42, 75):asc", ReservedNameForSort { name: S("_geoPoint") }),
("_geoRadius:asc", ReservedNameForFilter { name: S("_geoRadius") }),
("_geoRadius(42, 75, 59):asc", ReservedNameForFilter { name: S("_geoRadius") }),
("_geoBoundingBox:asc", ReservedNameForFilter { name: S("_geoBoundingBox") }),
(
"_geoBoundingBox([42, 75], [75, 59]):asc",
ReservedNameForFilter { name: S("_geoBoundingBox") },
),
];
for (input, expected) in invalid_criteria {

View File

@ -11,7 +11,7 @@ use crate::documents::{self, DocumentsBatchCursorError};
use crate::{CriterionError, DocumentId, FieldId, Object, SortError};
pub fn is_reserved_keyword(keyword: &str) -> bool {
["_geo", "_geoDistance", "_geoPoint", "_geoRadius"].contains(&keyword)
["_geo", "_geoDistance", "_geoPoint", "_geoRadius", "_geoBoundingBox"].contains(&keyword)
}
#[derive(Error, Debug)]

View File

@ -1206,7 +1206,7 @@ pub(crate) mod tests {
self, DeleteDocuments, DeletionStrategy, IndexDocuments, IndexDocumentsConfig,
IndexDocumentsMethod, IndexerConfig, Settings,
};
use crate::{db_snap, obkv_to_json, Index, Search, SearchResult};
use crate::{db_snap, obkv_to_json, Filter, Index, Search, SearchResult};
pub(crate) struct TempIndex {
pub inner: Index,
@ -1504,6 +1504,108 @@ pub(crate) mod tests {
assert_eq!(user_defined, &["doggo", "name"]);
}
#[test]
fn test_basic_geo_bounding_box() {
let index = TempIndex::new();
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S("_geo") });
})
.unwrap();
index
.add_documents(documents!([
{ "id": 0, "_geo": { "lat": 0, "lng": 0 } },
{ "id": 1, "_geo": { "lat": 0, "lng": -175 } },
{ "id": 2, "_geo": { "lat": 0, "lng": 175 } },
{ "id": 3, "_geo": { "lat": 85, "lng": 0 } },
{ "id": 4, "_geo": { "lat": -85, "lng": 0 } },
]))
.unwrap();
// ensure we get the right real searchable fields + user defined searchable fields
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
// exact match a document
let search_result = search
.filter(Filter::from_str("_geoBoundingBox([0, 0], [0, 0])").unwrap().unwrap())
.execute()
.unwrap();
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0]>");
// match a document in the middle of the rectangle
let search_result = search
.filter(Filter::from_str("_geoBoundingBox([10, -10], [-10, 10])").unwrap().unwrap())
.execute()
.unwrap();
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0]>");
// select everything
let search_result = search
.filter(Filter::from_str("_geoBoundingBox([90, -180], [-90, 180])").unwrap().unwrap())
.execute()
.unwrap();
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0, 1, 2, 3, 4]>");
// go on the edge of the longitude
let search_result = search
.filter(Filter::from_str("_geoBoundingBox([0, 180], [0, -170])").unwrap().unwrap())
.execute()
.unwrap();
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[1]>");
// go on the other edge of the longitude
let search_result = search
.filter(Filter::from_str("_geoBoundingBox([0, 170], [0, -180])").unwrap().unwrap())
.execute()
.unwrap();
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[2]>");
// wrap around the longitude
let search_result = search
.filter(Filter::from_str("_geoBoundingBox([0, 170], [0, -170])").unwrap().unwrap())
.execute()
.unwrap();
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[1, 2]>");
// go on the edge of the latitude
let search_result = search
.filter(Filter::from_str("_geoBoundingBox([90, 0], [80, 0])").unwrap().unwrap())
.execute()
.unwrap();
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[3]>");
// go on the edge of the latitude
let search_result = search
.filter(Filter::from_str("_geoBoundingBox([-80, 0], [-90, 0])").unwrap().unwrap())
.execute()
.unwrap();
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[4]>");
// the requests that don't make sense
// try to wrap around the latitude
let error = search
.filter(Filter::from_str("_geoBoundingBox([-80, 0], [80, 0])").unwrap().unwrap())
.execute()
.unwrap_err();
insta::assert_display_snapshot!(error, @r###"
The top latitude `-80` is below the bottom latitude `80`.
32:33 _geoBoundingBox([-80, 0], [80, 0])
"###);
// send a top latitude lower than the bottow latitude
let error = search
.filter(Filter::from_str("_geoBoundingBox([-10, 0], [10, 0])").unwrap().unwrap())
.execute()
.unwrap_err();
insta::assert_display_snapshot!(error, @r###"
The top latitude `-10` is below the bottom latitude `10`.
32:33 _geoBoundingBox([-10, 0], [10, 0])
"###);
}
#[test]
fn replace_documents_external_ids_and_soft_deletion_check() {
use big_s::S;

View File

@ -27,6 +27,7 @@ enum FilterError<'a> {
BadGeo(&'a str),
BadGeoLat(f64),
BadGeoLng(f64),
BadGeoBoundingBoxTopIsBelowBottom(f64, f64),
Reserved(&'a str),
TooDeep,
}
@ -62,7 +63,8 @@ impl<'a> Display for FilterError<'a> {
"`{}` is a reserved keyword and thus can't be used as a filter expression.",
keyword
),
Self::BadGeo(keyword) => write!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the _geoRadius(latitude, longitude, distance) built-in rule to filter on _geo field coordinates.", keyword),
Self::BadGeo(keyword) => write!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` field coordinates.", keyword),
Self::BadGeoBoundingBoxTopIsBelowBottom(top, bottom) => write!(f, "The top latitude `{top}` is below the bottom latitude `{bottom}`."),
Self::BadGeoLat(lat) => write!(f, "Bad latitude `{}`. Latitude must be contained between -90 and 90 degrees. ", lat),
Self::BadGeoLng(lng) => write!(f, "Bad longitude `{}`. Longitude must be contained between -180 and 180 degrees. ", lng),
}
@ -294,7 +296,7 @@ impl<'a> Filter<'a> {
Ok(RoaringBitmap::new())
}
} else {
match fid.lexeme() {
match fid.value() {
attribute @ "_geo" => {
Err(fid.as_external_error(FilterError::BadGeo(attribute)))?
}
@ -385,6 +387,131 @@ impl<'a> Filter<'a> {
}))?
}
}
FilterCondition::GeoBoundingBox { top_left_point, bottom_right_point } => {
if filterable_fields.contains("_geo") {
let top_left: [f64; 2] = [
top_left_point[0].parse_finite_float()?,
top_left_point[1].parse_finite_float()?,
];
let bottom_right: [f64; 2] = [
bottom_right_point[0].parse_finite_float()?,
bottom_right_point[1].parse_finite_float()?,
];
if !(-90.0..=90.0).contains(&top_left[0]) {
return Err(top_left_point[0]
.as_external_error(FilterError::BadGeoLat(top_left[0])))?;
}
if !(-180.0..=180.0).contains(&top_left[1]) {
return Err(top_left_point[1]
.as_external_error(FilterError::BadGeoLng(top_left[1])))?;
}
if !(-90.0..=90.0).contains(&bottom_right[0]) {
return Err(bottom_right_point[0]
.as_external_error(FilterError::BadGeoLat(bottom_right[0])))?;
}
if !(-180.0..=180.0).contains(&bottom_right[1]) {
return Err(bottom_right_point[1]
.as_external_error(FilterError::BadGeoLng(bottom_right[1])))?;
}
if top_left[0] < bottom_right[0] {
return Err(bottom_right_point[1].as_external_error(
FilterError::BadGeoBoundingBoxTopIsBelowBottom(
top_left[0],
bottom_right[0],
),
))?;
}
// Instead of writing a custom `GeoBoundingBox` filter we're simply going to re-use the range
// filter to create the following filter;
// `_geo.lat {top_left[0]} TO {bottom_right[0]} AND _geo.lng {top_left[1]} TO {bottom_right[1]}`
// As we can see, we need to use a bunch of tokens that don't exist in the original filter,
// thus we're going to create tokens that point to a random span but contain our text.
let geo_lat_token =
Token::new(top_left_point[0].original_span(), Some("_geo.lat".to_string()));
let condition_lat = FilterCondition::Condition {
fid: geo_lat_token,
op: Condition::Between {
from: bottom_right_point[0].clone(),
to: top_left_point[0].clone(),
},
};
let selected_lat = Filter { condition: condition_lat }.inner_evaluate(
rtxn,
index,
filterable_fields,
)?;
let geo_lng_token =
Token::new(top_left_point[1].original_span(), Some("_geo.lng".to_string()));
let selected_lng = if top_left[1] > bottom_right[1] {
// In this case the bounding box is wrapping around the earth (going from 180 to -180).
// We need to update the lng part of the filter from;
// `_geo.lng {top_left[1]} TO {bottom_right[1]}` to
// `_geo.lng {top_left[1]} TO 180 AND _geo.lng -180 TO {bottom_right[1]}`
let min_lng_token = Token::new(
top_left_point[1].original_span(),
Some("-180.0".to_string()),
);
let max_lng_token = Token::new(
top_left_point[1].original_span(),
Some("180.0".to_string()),
);
let condition_left = FilterCondition::Condition {
fid: geo_lng_token.clone(),
op: Condition::Between {
from: top_left_point[1].clone(),
to: max_lng_token,
},
};
let left = Filter { condition: condition_left }.inner_evaluate(
rtxn,
index,
filterable_fields,
)?;
let condition_right = FilterCondition::Condition {
fid: geo_lng_token,
op: Condition::Between {
from: min_lng_token,
to: bottom_right_point[1].clone(),
},
};
let right = Filter { condition: condition_right }.inner_evaluate(
rtxn,
index,
filterable_fields,
)?;
left | right
} else {
let condition_lng = FilterCondition::Condition {
fid: geo_lng_token,
op: Condition::Between {
from: top_left_point[1].clone(),
to: bottom_right_point[1].clone(),
},
};
Filter { condition: condition_lng }.inner_evaluate(
rtxn,
index,
filterable_fields,
)?
};
Ok(selected_lat & selected_lng)
} else {
Err(top_left_point[0].as_external_error(FilterError::AttributeNotFilterable {
attribute: "_geo",
filterable_fields: filterable_fields.clone(),
}))?
}
}
}
}
}
@ -502,6 +629,12 @@ mod tests {
"Attribute `_geo` is not filterable. This index does not have configured filterable attributes."
));
let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `_geo` is not filterable. This index does not have configured filterable attributes."
));
let filter = Filter::from_str("dog = \"bernese mountain\"").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
@ -524,6 +657,12 @@ mod tests {
"Attribute `_geo` is not filterable. Available filterable attributes are: `title`."
));
let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `_geo` is not filterable. Available filterable attributes are: `title`."
));
let filter = Filter::from_str("name = 12").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
@ -675,6 +814,92 @@ mod tests {
));
}
#[test]
fn geo_bounding_box_error() {
let index = TempIndex::new();
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order
settings.set_filterable_fields(hashset! { S("_geo"), S("price") });
})
.unwrap();
let rtxn = index.read_txn().unwrap();
// geoboundingbox top left coord have a bad latitude
let filter =
Filter::from_str("_geoBoundingBox([-90.0000001, 150], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(
error.to_string().starts_with(
"Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees."
),
"{}",
error.to_string()
);
// geoboundingbox top left coord have a bad latitude
let filter =
Filter::from_str("_geoBoundingBox([90.0000001, 150], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(
error.to_string().starts_with(
"Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees."
),
"{}",
error.to_string()
);
// geoboundingbox bottom right coord have a bad latitude
let filter =
Filter::from_str("_geoBoundingBox([30, 10], [-90.0000001, 150])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees."
));
// geoboundingbox bottom right coord have a bad latitude
let filter =
Filter::from_str("_geoBoundingBox([30, 10], [90.0000001, 150])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees."
));
// geoboundingbox top left coord have a bad longitude
let filter =
Filter::from_str("_geoBoundingBox([-10, 180.000001], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees."
));
// geoboundingbox top left coord have a bad longitude
let filter =
Filter::from_str("_geoBoundingBox([-10, -180.000001], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees."
));
// geoboundingbox bottom right coord have a bad longitude
let filter =
Filter::from_str("_geoBoundingBox([30, 10], [-10, -180.000001])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees."
));
// geoboundingbox bottom right coord have a bad longitude
let filter =
Filter::from_str("_geoBoundingBox([30, 10], [-10, 180.000001])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees."
));
}
#[test]
fn filter_depth() {
// generates a big (2 MiB) filter with too much of ORs.

View File

@ -319,7 +319,7 @@ impl fmt::Debug for Search<'_> {
}
}
#[derive(Default)]
#[derive(Default, Debug)]
pub struct SearchResult {
pub matching_words: MatchingWords,
pub candidates: RoaringBitmap,