mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-26 06:46:27 +00:00
Merge pull request #5758 from meilisearch/cellulite
Cellulite integration
This commit is contained in:
1074
Cargo.lock
generated
1074
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -7,23 +7,14 @@
|
|||||||
|
|
||||||
use nom::branch::alt;
|
use nom::branch::alt;
|
||||||
use nom::bytes::complete::tag;
|
use nom::bytes::complete::tag;
|
||||||
use nom::character::complete::char;
|
use nom::character::complete::{char, multispace0, multispace1};
|
||||||
use nom::character::complete::multispace0;
|
use nom::combinator::{cut, map, value};
|
||||||
use nom::character::complete::multispace1;
|
use nom::sequence::{preceded, terminated, tuple};
|
||||||
use nom::combinator::cut;
|
|
||||||
use nom::combinator::map;
|
|
||||||
use nom::combinator::value;
|
|
||||||
use nom::sequence::preceded;
|
|
||||||
use nom::sequence::{terminated, tuple};
|
|
||||||
use Condition::*;
|
use Condition::*;
|
||||||
|
|
||||||
use crate::error::IResultExt;
|
use crate::error::IResultExt;
|
||||||
use crate::value::parse_vector_value;
|
use crate::value::{parse_vector_value, parse_vector_value_cut};
|
||||||
use crate::value::parse_vector_value_cut;
|
use crate::{parse_value, Error, ErrorKind, FilterCondition, IResult, Span, Token, VectorFilter};
|
||||||
use crate::Error;
|
|
||||||
use crate::ErrorKind;
|
|
||||||
use crate::VectorFilter;
|
|
||||||
use crate::{parse_value, FilterCondition, IResult, Span, Token};
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
pub enum Condition<'a> {
|
pub enum Condition<'a> {
|
||||||
|
@ -75,7 +75,11 @@ pub enum ExpectedValueKind {
|
|||||||
pub enum ErrorKind<'a> {
|
pub enum ErrorKind<'a> {
|
||||||
ReservedGeo(&'a str),
|
ReservedGeo(&'a str),
|
||||||
GeoRadius,
|
GeoRadius,
|
||||||
|
GeoRadiusArgumentCount(usize),
|
||||||
GeoBoundingBox,
|
GeoBoundingBox,
|
||||||
|
GeoPolygon,
|
||||||
|
GeoPolygonNotEnoughPoints(usize),
|
||||||
|
GeoCoordinatesNotPair(usize),
|
||||||
MisusedGeoRadius,
|
MisusedGeoRadius,
|
||||||
MisusedGeoBoundingBox,
|
MisusedGeoBoundingBox,
|
||||||
VectorFilterLeftover,
|
VectorFilterLeftover,
|
||||||
@ -189,7 +193,7 @@ impl Display for Error<'_> {
|
|||||||
}
|
}
|
||||||
ErrorKind::InvalidPrimary => {
|
ErrorKind::InvalidPrimary => {
|
||||||
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
|
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
|
||||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
|
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` {text}")?
|
||||||
}
|
}
|
||||||
ErrorKind::InvalidEscapedNumber => {
|
ErrorKind::InvalidEscapedNumber => {
|
||||||
writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?
|
writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?
|
||||||
@ -198,11 +202,23 @@ impl Display for Error<'_> {
|
|||||||
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
|
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
|
||||||
}
|
}
|
||||||
ErrorKind::GeoRadius => {
|
ErrorKind::GeoRadius => {
|
||||||
writeln!(f, "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.")?
|
writeln!(f, "The `_geoRadius` filter must be in the form: `_geoRadius(latitude, longitude, radius, optionalResolution)`.")?
|
||||||
|
}
|
||||||
|
ErrorKind::GeoRadiusArgumentCount(count) => {
|
||||||
|
writeln!(f, "Was expecting 3 or 4 arguments for `_geoRadius`, but instead found {count}.")?
|
||||||
}
|
}
|
||||||
ErrorKind::GeoBoundingBox => {
|
ErrorKind::GeoBoundingBox => {
|
||||||
writeln!(f, "The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.")?
|
writeln!(f, "The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.")?
|
||||||
}
|
}
|
||||||
|
ErrorKind::GeoPolygon => {
|
||||||
|
writeln!(f, "The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.")?
|
||||||
|
}
|
||||||
|
ErrorKind::GeoPolygonNotEnoughPoints(n) => {
|
||||||
|
writeln!(f, "The `_geoPolygon` filter expects at least 3 points but only {n} were specified")?;
|
||||||
|
}
|
||||||
|
ErrorKind::GeoCoordinatesNotPair(number) => {
|
||||||
|
writeln!(f, "Was expecting 2 coordinates but instead found {number}.")?
|
||||||
|
}
|
||||||
ErrorKind::ReservedGeo(name) => {
|
ErrorKind::ReservedGeo(name) => {
|
||||||
writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.", name.escape_debug())?
|
writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.", name.escape_debug())?
|
||||||
}
|
}
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
//! word = (alphanumeric | _ | - | .)+
|
//! word = (alphanumeric | _ | - | .)+
|
||||||
//! geoRadius = "_geoRadius(" WS* float WS* "," WS* float WS* "," float WS* ")"
|
//! geoRadius = "_geoRadius(" WS* float WS* "," WS* float WS* "," float WS* ")"
|
||||||
//! geoBoundingBox = "_geoBoundingBox([" WS * float WS* "," WS* float WS* "], [" WS* float WS* "," WS* float WS* "]")
|
//! geoBoundingBox = "_geoBoundingBox([" WS * float WS* "," WS* float WS* "], [" WS* float WS* "," WS* float WS* "]")
|
||||||
|
//! geoPolygon = "_geoPolygon([[" WS* float WS* "," WS* float WS* "],+])"
|
||||||
//! ```
|
//! ```
|
||||||
//!
|
//!
|
||||||
//! Other BNF grammar used to handle some specific errors:
|
//! Other BNF grammar used to handle some specific errors:
|
||||||
@ -156,8 +157,9 @@ pub enum FilterCondition<'a> {
|
|||||||
Or(Vec<Self>),
|
Or(Vec<Self>),
|
||||||
And(Vec<Self>),
|
And(Vec<Self>),
|
||||||
VectorExists { fid: Token<'a>, embedder: Option<Token<'a>>, filter: VectorFilter<'a> },
|
VectorExists { fid: Token<'a>, embedder: Option<Token<'a>>, filter: VectorFilter<'a> },
|
||||||
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
|
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a>, resolution: Option<Token<'a>> },
|
||||||
GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] },
|
GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] },
|
||||||
|
GeoPolygon { points: Vec<[Token<'a>; 2]> },
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum TraversedElement<'a> {
|
pub enum TraversedElement<'a> {
|
||||||
@ -189,6 +191,7 @@ impl<'a> FilterCondition<'a> {
|
|||||||
FilterCondition::VectorExists { .. }
|
FilterCondition::VectorExists { .. }
|
||||||
| FilterCondition::GeoLowerThan { .. }
|
| FilterCondition::GeoLowerThan { .. }
|
||||||
| FilterCondition::GeoBoundingBox { .. }
|
| FilterCondition::GeoBoundingBox { .. }
|
||||||
|
| FilterCondition::GeoPolygon { .. }
|
||||||
| FilterCondition::In { .. } => None,
|
| FilterCondition::In { .. } => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -202,6 +205,7 @@ impl<'a> FilterCondition<'a> {
|
|||||||
}
|
}
|
||||||
FilterCondition::GeoLowerThan { .. }
|
FilterCondition::GeoLowerThan { .. }
|
||||||
| FilterCondition::GeoBoundingBox { .. }
|
| FilterCondition::GeoBoundingBox { .. }
|
||||||
|
| FilterCondition::GeoPolygon { .. }
|
||||||
| FilterCondition::In { .. } => None,
|
| FilterCondition::In { .. } => None,
|
||||||
FilterCondition::VectorExists { fid, .. } => Some(fid),
|
FilterCondition::VectorExists { fid, .. } => Some(fid),
|
||||||
}
|
}
|
||||||
@ -396,23 +400,27 @@ fn parse_not(input: Span, depth: usize) -> IResult<FilterCondition> {
|
|||||||
/// If we parse `_geoRadius` we MUST parse the rest of the expression.
|
/// If we parse `_geoRadius` we MUST parse the rest of the expression.
|
||||||
fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
|
fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
|
||||||
// we want to allow space BEFORE the _geoRadius but not after
|
// we want to allow space BEFORE the _geoRadius but not after
|
||||||
let parsed = preceded(
|
|
||||||
tuple((multispace0, word_exact("_geoRadius"))),
|
let (input, _) = tuple((multispace0, word_exact("_geoRadius")))(input)?;
|
||||||
// if we were able to parse `_geoRadius` and can't parse the rest of the input we return a failure
|
|
||||||
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
|
// if we were able to parse `_geoRadius` and can't parse the rest of the input we return a failure
|
||||||
)(input)
|
|
||||||
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::GeoRadius)));
|
let parsed =
|
||||||
|
delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))(input)
|
||||||
|
.map_cut(ErrorKind::GeoRadius);
|
||||||
|
|
||||||
let (input, args) = parsed?;
|
let (input, args) = parsed?;
|
||||||
|
|
||||||
if args.len() != 3 {
|
if !(3..=4).contains(&args.len()) {
|
||||||
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::GeoRadius)));
|
return Err(Error::failure_from_kind(input, ErrorKind::GeoRadiusArgumentCount(args.len())));
|
||||||
}
|
}
|
||||||
|
|
||||||
let res = FilterCondition::GeoLowerThan {
|
let res = FilterCondition::GeoLowerThan {
|
||||||
point: [args[0].into(), args[1].into()],
|
point: [args[0].into(), args[1].into()],
|
||||||
radius: args[2].into(),
|
radius: args[2].into(),
|
||||||
|
resolution: args.get(3).cloned().map(Token::from),
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok((input, res))
|
Ok((input, res))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -420,26 +428,33 @@ fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
|
|||||||
/// If we parse `_geoBoundingBox` we MUST parse the rest of the expression.
|
/// If we parse `_geoBoundingBox` we MUST parse the rest of the expression.
|
||||||
fn parse_geo_bounding_box(input: Span) -> IResult<FilterCondition> {
|
fn parse_geo_bounding_box(input: Span) -> IResult<FilterCondition> {
|
||||||
// we want to allow space BEFORE the _geoBoundingBox but not after
|
// we want to allow space BEFORE the _geoBoundingBox but not after
|
||||||
let parsed = preceded(
|
|
||||||
tuple((multispace0, word_exact("_geoBoundingBox"))),
|
let (input, _) = tuple((multispace0, word_exact("_geoBoundingBox")))(input)?;
|
||||||
// if we were able to parse `_geoBoundingBox` and can't parse the rest of the input we return a failure
|
|
||||||
cut(delimited(
|
// if we were able to parse `_geoBoundingBox` and can't parse the rest of the input we return a failure
|
||||||
char('('),
|
|
||||||
separated_list1(
|
let (input, args) = delimited(
|
||||||
tag(","),
|
char('('),
|
||||||
ws(delimited(char('['), separated_list1(tag(","), ws(recognize_float)), char(']'))),
|
separated_list1(
|
||||||
),
|
tag(","),
|
||||||
char(')'),
|
ws(delimited(char('['), separated_list1(tag(","), ws(recognize_float)), char(']'))),
|
||||||
)),
|
),
|
||||||
|
char(')'),
|
||||||
)(input)
|
)(input)
|
||||||
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::GeoBoundingBox)));
|
.map_cut(ErrorKind::GeoBoundingBox)?;
|
||||||
|
|
||||||
let (input, args) = parsed?;
|
if args.len() != 2 {
|
||||||
|
|
||||||
if args.len() != 2 || args[0].len() != 2 || args[1].len() != 2 {
|
|
||||||
return Err(Error::failure_from_kind(input, ErrorKind::GeoBoundingBox));
|
return Err(Error::failure_from_kind(input, ErrorKind::GeoBoundingBox));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(offending) = args.iter().find(|a| a.len() != 2) {
|
||||||
|
let context = offending.first().unwrap_or(&input);
|
||||||
|
return Err(Error::failure_from_kind(
|
||||||
|
*context,
|
||||||
|
ErrorKind::GeoCoordinatesNotPair(offending.len()),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
let res = FilterCondition::GeoBoundingBox {
|
let res = FilterCondition::GeoBoundingBox {
|
||||||
top_right_point: [args[0][0].into(), args[0][1].into()],
|
top_right_point: [args[0][0].into(), args[0][1].into()],
|
||||||
bottom_left_point: [args[1][0].into(), args[1][1].into()],
|
bottom_left_point: [args[1][0].into(), args[1][1].into()],
|
||||||
@ -447,6 +462,47 @@ fn parse_geo_bounding_box(input: Span) -> IResult<FilterCondition> {
|
|||||||
Ok((input, res))
|
Ok((input, res))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// geoPolygon = "_geoPolygon([[" WS* float WS* "," WS* float WS* "],+])"
|
||||||
|
/// If we parse `_geoPolygon` we MUST parse the rest of the expression.
|
||||||
|
fn parse_geo_polygon(input: Span) -> IResult<FilterCondition> {
|
||||||
|
// we want to allow space BEFORE the _geoPolygon but not after
|
||||||
|
|
||||||
|
let (input, _) = tuple((multispace0, word_exact("_geoPolygon")))(input)?;
|
||||||
|
|
||||||
|
// if we were able to parse `_geoPolygon` and can't parse the rest of the input we return a failure
|
||||||
|
|
||||||
|
let (input, args): (_, Vec<Vec<LocatedSpan<_, _>>>) = delimited(
|
||||||
|
char('('),
|
||||||
|
separated_list1(
|
||||||
|
tag(","),
|
||||||
|
ws(delimited(char('['), separated_list1(tag(","), ws(recognize_float)), char(']'))),
|
||||||
|
),
|
||||||
|
preceded(opt(ws(char(','))), char(')')), // Tolerate trailing comma
|
||||||
|
)(input)
|
||||||
|
.map_cut(ErrorKind::GeoPolygon)?;
|
||||||
|
|
||||||
|
if args.len() < 3 {
|
||||||
|
let context = args.last().and_then(|a| a.last()).unwrap_or(&input);
|
||||||
|
return Err(Error::failure_from_kind(
|
||||||
|
*context,
|
||||||
|
ErrorKind::GeoPolygonNotEnoughPoints(args.len()),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(offending) = args.iter().find(|a| a.len() != 2) {
|
||||||
|
let context = offending.first().unwrap_or(&input);
|
||||||
|
return Err(Error::failure_from_kind(
|
||||||
|
*context,
|
||||||
|
ErrorKind::GeoCoordinatesNotPair(offending.len()),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let res = FilterCondition::GeoPolygon {
|
||||||
|
points: args.into_iter().map(|a| [a[0].into(), a[1].into()]).collect(),
|
||||||
|
};
|
||||||
|
Ok((input, res))
|
||||||
|
}
|
||||||
|
|
||||||
/// geoPoint = WS* "_geoPoint(float WS* "," WS* float WS* "," WS* float)
|
/// geoPoint = WS* "_geoPoint(float WS* "," WS* float WS* "," WS* float)
|
||||||
fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
|
fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
|
||||||
// we want to forbid space BEFORE the _geoPoint but not after
|
// we want to forbid space BEFORE the _geoPoint but not after
|
||||||
@ -516,8 +572,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
|
|||||||
Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char()))
|
Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char()))
|
||||||
}),
|
}),
|
||||||
),
|
),
|
||||||
parse_geo_radius,
|
// Made a random block of functions because we reached the maximum number of elements per alt
|
||||||
parse_geo_bounding_box,
|
alt((parse_geo_radius, parse_geo_bounding_box, parse_geo_polygon)),
|
||||||
parse_in,
|
parse_in,
|
||||||
parse_not_in,
|
parse_not_in,
|
||||||
parse_condition,
|
parse_condition,
|
||||||
@ -597,9 +653,12 @@ impl std::fmt::Display for FilterCondition<'_> {
|
|||||||
}
|
}
|
||||||
write!(f, " EXISTS")
|
write!(f, " EXISTS")
|
||||||
}
|
}
|
||||||
FilterCondition::GeoLowerThan { point, radius } => {
|
FilterCondition::GeoLowerThan { point, radius, resolution: None } => {
|
||||||
write!(f, "_geoRadius({}, {}, {})", point[0], point[1], radius)
|
write!(f, "_geoRadius({}, {}, {})", point[0], point[1], radius)
|
||||||
}
|
}
|
||||||
|
FilterCondition::GeoLowerThan { point, radius, resolution: Some(resolution) } => {
|
||||||
|
write!(f, "_geoRadius({}, {}, {}, {})", point[0], point[1], radius, resolution)
|
||||||
|
}
|
||||||
FilterCondition::GeoBoundingBox {
|
FilterCondition::GeoBoundingBox {
|
||||||
top_right_point: top_left_point,
|
top_right_point: top_left_point,
|
||||||
bottom_left_point: bottom_right_point,
|
bottom_left_point: bottom_right_point,
|
||||||
@ -613,6 +672,13 @@ impl std::fmt::Display for FilterCondition<'_> {
|
|||||||
bottom_right_point[1]
|
bottom_right_point[1]
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
FilterCondition::GeoPolygon { points } => {
|
||||||
|
write!(f, "_geoPolygon([")?;
|
||||||
|
for point in points {
|
||||||
|
write!(f, "[{}, {}], ", point[0], point[1])?;
|
||||||
|
}
|
||||||
|
write!(f, "])")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -776,12 +842,17 @@ pub mod tests {
|
|||||||
insta::assert_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})");
|
insta::assert_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})");
|
||||||
insta::assert_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))");
|
insta::assert_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))");
|
||||||
insta::assert_snapshot!(p("_geoRadius(12,13,14)"), @"_geoRadius({12}, {13}, {14})");
|
insta::assert_snapshot!(p("_geoRadius(12,13,14)"), @"_geoRadius({12}, {13}, {14})");
|
||||||
|
insta::assert_snapshot!(p("_geoRadius(12,13,14,1000)"), @"_geoRadius({12}, {13}, {14}, {1000})");
|
||||||
|
|
||||||
// Test geo bounding box
|
// Test geo bounding box
|
||||||
insta::assert_snapshot!(p("_geoBoundingBox([12, 13], [14, 15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
|
insta::assert_snapshot!(p("_geoBoundingBox([12, 13], [14, 15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
|
||||||
insta::assert_snapshot!(p("NOT _geoBoundingBox([12, 13], [14, 15])"), @"NOT (_geoBoundingBox([{12}, {13}], [{14}, {15}]))");
|
insta::assert_snapshot!(p("NOT _geoBoundingBox([12, 13], [14, 15])"), @"NOT (_geoBoundingBox([{12}, {13}], [{14}, {15}]))");
|
||||||
insta::assert_snapshot!(p("_geoBoundingBox([12,13],[14,15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
|
insta::assert_snapshot!(p("_geoBoundingBox([12,13],[14,15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
|
||||||
|
|
||||||
|
// Test geo polygon
|
||||||
|
insta::assert_snapshot!(p("_geoPolygon([12, 13], [14, 15], [16, 17])"), @"_geoPolygon([[{12}, {13}], [{14}, {15}], [{16}, {17}], ])");
|
||||||
|
insta::assert_snapshot!(p("_geoPolygon([12, 13], [14, 15], [-1.2,2939.2], [1,1])"), @"_geoPolygon([[{12}, {13}], [{14}, {15}], [{-1.2}, {2939.2}], [{1}, {1}], ])");
|
||||||
|
|
||||||
// Test OR + AND
|
// Test OR + AND
|
||||||
insta::assert_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
|
insta::assert_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
|
||||||
insta::assert_snapshot!(p("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
|
insta::assert_snapshot!(p("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
|
||||||
@ -838,50 +909,80 @@ pub mod tests {
|
|||||||
11:12 channel = 🐻 AND followers < 100
|
11:12 channel = 🐻 AND followers < 100
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_snapshot!(p("'OR'"), @r###"
|
insta::assert_snapshot!(p("'OR'"), @r"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `\'OR\'`.
|
||||||
1:5 'OR'
|
1:5 'OR'
|
||||||
"###);
|
");
|
||||||
|
|
||||||
insta::assert_snapshot!(p("OR"), @r###"
|
insta::assert_snapshot!(p("OR"), @r###"
|
||||||
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
|
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
|
||||||
1:3 OR
|
1:3 OR
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_snapshot!(p("channel Ponce"), @r###"
|
insta::assert_snapshot!(p("channel Ponce"), @r"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `channel Ponce`.
|
||||||
1:14 channel Ponce
|
1:14 channel Ponce
|
||||||
"###);
|
");
|
||||||
|
|
||||||
insta::assert_snapshot!(p("channel = Ponce OR"), @r###"
|
insta::assert_snapshot!(p("channel = Ponce OR"), @r"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` but instead got nothing.
|
||||||
19:19 channel = Ponce OR
|
19:19 channel = Ponce OR
|
||||||
"###);
|
");
|
||||||
|
|
||||||
insta::assert_snapshot!(p("_geoRadius"), @r###"
|
insta::assert_snapshot!(p("_geoRadius"), @r"
|
||||||
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
|
The `_geoRadius` filter must be in the form: `_geoRadius(latitude, longitude, radius, optionalResolution)`.
|
||||||
1:11 _geoRadius
|
11:11 _geoRadius
|
||||||
"###);
|
");
|
||||||
|
|
||||||
insta::assert_snapshot!(p("_geoRadius = 12"), @r###"
|
insta::assert_snapshot!(p("_geoRadius = 12"), @r"
|
||||||
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
|
The `_geoRadius` filter must be in the form: `_geoRadius(latitude, longitude, radius, optionalResolution)`.
|
||||||
1:16 _geoRadius = 12
|
11:16 _geoRadius = 12
|
||||||
"###);
|
");
|
||||||
|
|
||||||
insta::assert_snapshot!(p("_geoBoundingBox"), @r###"
|
insta::assert_snapshot!(p("_geoBoundingBox"), @r"
|
||||||
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
|
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
|
||||||
1:16 _geoBoundingBox
|
16:16 _geoBoundingBox
|
||||||
"###);
|
");
|
||||||
|
|
||||||
insta::assert_snapshot!(p("_geoBoundingBox = 12"), @r###"
|
insta::assert_snapshot!(p("_geoBoundingBox = 12"), @r"
|
||||||
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
|
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
|
||||||
1:21 _geoBoundingBox = 12
|
16:21 _geoBoundingBox = 12
|
||||||
"###);
|
");
|
||||||
|
|
||||||
insta::assert_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r###"
|
insta::assert_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r"
|
||||||
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
|
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
|
||||||
1:26 _geoBoundingBox(1.0, 1.0)
|
17:26 _geoBoundingBox(1.0, 1.0)
|
||||||
"###);
|
");
|
||||||
|
|
||||||
|
insta::assert_snapshot!(p("_geoPolygon([1,2,3])"), @r"
|
||||||
|
The `_geoPolygon` filter expects at least 3 points but only 1 were specified
|
||||||
|
18:19 _geoPolygon([1,2,3])
|
||||||
|
");
|
||||||
|
|
||||||
|
insta::assert_snapshot!(p("_geoPolygon(1,2,3)"), @r"
|
||||||
|
The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.
|
||||||
|
13:19 _geoPolygon(1,2,3)
|
||||||
|
");
|
||||||
|
|
||||||
|
insta::assert_snapshot!(p("_geoPolygon([1,2],[1,2],[1,2,3])"), @r"
|
||||||
|
Was expecting 2 coordinates but instead found 3.
|
||||||
|
26:27 _geoPolygon([1,2],[1,2],[1,2,3])
|
||||||
|
");
|
||||||
|
|
||||||
|
insta::assert_snapshot!(p("_geoPolygon([1,2],[1,2,3])"), @r"
|
||||||
|
The `_geoPolygon` filter expects at least 3 points but only 2 were specified
|
||||||
|
24:25 _geoPolygon([1,2],[1,2,3])
|
||||||
|
");
|
||||||
|
|
||||||
|
insta::assert_snapshot!(p("_geoPolygon(1)"), @r"
|
||||||
|
The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.
|
||||||
|
13:15 _geoPolygon(1)
|
||||||
|
");
|
||||||
|
|
||||||
|
insta::assert_snapshot!(p("_geoPolygon([1,2)"), @r"
|
||||||
|
The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.
|
||||||
|
17:18 _geoPolygon([1,2)
|
||||||
|
");
|
||||||
|
|
||||||
insta::assert_snapshot!(p("_geoPoint(12, 13, 14)"), @r###"
|
insta::assert_snapshot!(p("_geoPoint(12, 13, 14)"), @r###"
|
||||||
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||||
@ -938,15 +1039,15 @@ pub mod tests {
|
|||||||
34:35 channel = mv OR followers >= 1000)
|
34:35 channel = mv OR followers >= 1000)
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_snapshot!(p("colour NOT EXIST"), @r###"
|
insta::assert_snapshot!(p("colour NOT EXIST"), @r"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `colour NOT EXIST`.
|
||||||
1:17 colour NOT EXIST
|
1:17 colour NOT EXIST
|
||||||
"###);
|
");
|
||||||
|
|
||||||
insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###"
|
insta::assert_snapshot!(p("subscribers 100 TO1000"), @r"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `subscribers 100 TO1000`.
|
||||||
1:23 subscribers 100 TO1000
|
1:23 subscribers 100 TO1000
|
||||||
"###);
|
");
|
||||||
|
|
||||||
insta::assert_snapshot!(p("channel = ponce ORdog != 'bernese mountain'"), @r###"
|
insta::assert_snapshot!(p("channel = ponce ORdog != 'bernese mountain'"), @r###"
|
||||||
Found unexpected characters at the end of the filter: `ORdog != \'bernese mountain\'`. You probably forgot an `OR` or an `AND` rule.
|
Found unexpected characters at the end of the filter: `ORdog != \'bernese mountain\'`. You probably forgot an `OR` or an `AND` rule.
|
||||||
@ -1071,38 +1172,38 @@ pub mod tests {
|
|||||||
5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
|
5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_snapshot!(p(r#"value NULL"#), @r###"
|
insta::assert_snapshot!(p(r#"value NULL"#), @r"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value NULL`.
|
||||||
1:11 value NULL
|
1:11 value NULL
|
||||||
"###);
|
");
|
||||||
insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###"
|
insta::assert_snapshot!(p(r#"value NOT NULL"#), @r"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value NOT NULL`.
|
||||||
1:15 value NOT NULL
|
1:15 value NOT NULL
|
||||||
"###);
|
");
|
||||||
insta::assert_snapshot!(p(r#"value EMPTY"#), @r###"
|
insta::assert_snapshot!(p(r#"value EMPTY"#), @r"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value EMPTY`.
|
||||||
1:12 value EMPTY
|
1:12 value EMPTY
|
||||||
"###);
|
");
|
||||||
insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###"
|
insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value NOT EMPTY`.
|
||||||
1:16 value NOT EMPTY
|
1:16 value NOT EMPTY
|
||||||
"###);
|
");
|
||||||
insta::assert_snapshot!(p(r#"value IS"#), @r###"
|
insta::assert_snapshot!(p(r#"value IS"#), @r"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS`.
|
||||||
1:9 value IS
|
1:9 value IS
|
||||||
"###);
|
");
|
||||||
insta::assert_snapshot!(p(r#"value IS NOT"#), @r###"
|
insta::assert_snapshot!(p(r#"value IS NOT"#), @r"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS NOT`.
|
||||||
1:13 value IS NOT
|
1:13 value IS NOT
|
||||||
"###);
|
");
|
||||||
insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###"
|
insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS EXISTS`.
|
||||||
1:16 value IS EXISTS
|
1:16 value IS EXISTS
|
||||||
"###);
|
");
|
||||||
insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
|
insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS NOT EXISTS`.
|
||||||
1:20 value IS NOT EXISTS
|
1:20 value IS NOT EXISTS
|
||||||
"###);
|
");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -45,6 +45,7 @@ impl From<DateField> for Code {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::large_enum_variant)]
|
||||||
#[derive(Error, Debug)]
|
#[derive(Error, Debug)]
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
#[error("{1}")]
|
#[error("{1}")]
|
||||||
|
@ -271,9 +271,10 @@ macro_rules! json_string {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
use crate as meili_snap;
|
use crate as meili_snap;
|
||||||
use crate::UUID_IN_MESSAGE_RE;
|
use crate::UUID_IN_MESSAGE_RE;
|
||||||
use uuid::Uuid;
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn snap() {
|
fn snap() {
|
||||||
|
@ -5,6 +5,7 @@ use actix_web::{self as aweb, HttpResponseBuilder};
|
|||||||
use aweb::http::header;
|
use aweb::http::header;
|
||||||
use aweb::rt::task::JoinError;
|
use aweb::rt::task::JoinError;
|
||||||
use convert_case::Casing;
|
use convert_case::Casing;
|
||||||
|
use milli::cellulite;
|
||||||
use milli::heed::{Error as HeedError, MdbError};
|
use milli::heed::{Error as HeedError, MdbError};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use utoipa::ToSchema;
|
use utoipa::ToSchema;
|
||||||
@ -239,6 +240,7 @@ InconsistentDocumentChangeHeaders , InvalidRequest , BAD_REQU
|
|||||||
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidDocumentSort , InvalidRequest , BAD_REQUEST ;
|
InvalidDocumentSort , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
|
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidDocumentGeojsonField , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidHeaderValue , InvalidRequest , BAD_REQUEST ;
|
InvalidHeaderValue , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
|
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
|
InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
|
||||||
@ -501,7 +503,9 @@ impl ErrorCode for milli::Error {
|
|||||||
Code::InvalidFacetSearchFacetName
|
Code::InvalidFacetSearchFacetName
|
||||||
}
|
}
|
||||||
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
|
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
|
||||||
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
|
UserError::InvalidGeoField { .. } | UserError::GeoJsonError(_) => {
|
||||||
|
Code::InvalidDocumentGeoField
|
||||||
|
}
|
||||||
UserError::InvalidVectorDimensions { .. }
|
UserError::InvalidVectorDimensions { .. }
|
||||||
| UserError::InvalidIndexingVectorDimensions { .. } => {
|
| UserError::InvalidIndexingVectorDimensions { .. } => {
|
||||||
Code::InvalidVectorDimensions
|
Code::InvalidVectorDimensions
|
||||||
@ -525,6 +529,17 @@ impl ErrorCode for milli::Error {
|
|||||||
| UserError::DocumentEditionCompilationError(_) => {
|
| UserError::DocumentEditionCompilationError(_) => {
|
||||||
Code::EditDocumentsByFunctionError
|
Code::EditDocumentsByFunctionError
|
||||||
}
|
}
|
||||||
|
UserError::CelluliteError(err) => match err {
|
||||||
|
cellulite::Error::BuildCanceled
|
||||||
|
| cellulite::Error::VersionMismatchOnBuild(_)
|
||||||
|
| cellulite::Error::DatabaseDoesntExists
|
||||||
|
| cellulite::Error::Heed(_)
|
||||||
|
| cellulite::Error::InvalidGeometry(_)
|
||||||
|
| cellulite::Error::InternalDocIdMissing(_, _)
|
||||||
|
| cellulite::Error::CannotConvertLineToCell(_, _, _) => Code::Internal,
|
||||||
|
cellulite::Error::InvalidGeoJson(_) => Code::InvalidDocumentGeojsonField,
|
||||||
|
},
|
||||||
|
UserError::MalformedGeojson(_) => Code::InvalidDocumentGeojsonField,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
#![allow(clippy::result_large_err)]
|
||||||
|
|
||||||
pub mod batch_view;
|
pub mod batch_view;
|
||||||
pub mod batches;
|
pub mod batches;
|
||||||
pub mod compression;
|
pub mod compression;
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use core::convert::Infallible;
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
@ -7,7 +8,6 @@ use actix_http::header::{
|
|||||||
};
|
};
|
||||||
use actix_web::web::{self, Data, Path};
|
use actix_web::web::{self, Data, Path};
|
||||||
use actix_web::{HttpRequest, HttpResponse};
|
use actix_web::{HttpRequest, HttpResponse};
|
||||||
use core::convert::Infallible;
|
|
||||||
use deserr::actix_web::AwebJson;
|
use deserr::actix_web::AwebJson;
|
||||||
use deserr::{DeserializeError, Deserr, ValuePointerRef};
|
use deserr::{DeserializeError, Deserr, ValuePointerRef};
|
||||||
use index_scheduler::IndexScheduler;
|
use index_scheduler::IndexScheduler;
|
||||||
@ -24,12 +24,12 @@ use tracing::debug;
|
|||||||
use url::Url;
|
use url::Url;
|
||||||
use utoipa::{OpenApi, ToSchema};
|
use utoipa::{OpenApi, ToSchema};
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
use WebhooksError::*;
|
||||||
|
|
||||||
use crate::analytics::{Aggregate, Analytics};
|
use crate::analytics::{Aggregate, Analytics};
|
||||||
use crate::extractors::authentication::policies::ActionPolicy;
|
use crate::extractors::authentication::policies::ActionPolicy;
|
||||||
use crate::extractors::authentication::GuardedData;
|
use crate::extractors::authentication::GuardedData;
|
||||||
use crate::extractors::sequential_extractor::SeqHandler;
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
use WebhooksError::*;
|
|
||||||
|
|
||||||
#[derive(OpenApi)]
|
#[derive(OpenApi)]
|
||||||
#[openapi(
|
#[openapi(
|
||||||
|
@ -522,6 +522,26 @@ pub async fn shared_index_with_geo_documents() -> &'static Index<'static, Shared
|
|||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn shared_index_geojson_documents() -> &'static Index<'static, Shared> {
|
||||||
|
static INDEX: OnceCell<Index<'static, Shared>> = OnceCell::const_new();
|
||||||
|
INDEX
|
||||||
|
.get_or_init(|| async {
|
||||||
|
// Retrieved from https://gitlab-forge.din.developpement-durable.gouv.fr/pub/geomatique/descartes/d-map/-/blob/main/demo/examples/commons/countries.geojson?ref_type=heads
|
||||||
|
let server = Server::new_shared();
|
||||||
|
let index = server._index("SHARED_GEOJSON_DOCUMENTS").to_shared();
|
||||||
|
let countries = include_str!("../documents/geojson/assets/countries.json");
|
||||||
|
let lille = serde_json::from_str::<serde_json::Value>(countries).unwrap();
|
||||||
|
let (response, _code) = index._add_documents(Value(lille), Some("name")).await;
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (response, _code) =
|
||||||
|
index._update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
index
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn shared_index_for_fragments() -> Index<'static, Shared> {
|
pub async fn shared_index_for_fragments() -> Index<'static, Shared> {
|
||||||
static INDEX: OnceCell<(Server<Shared>, String)> = OnceCell::const_new();
|
static INDEX: OnceCell<(Server<Shared>, String)> = OnceCell::const_new();
|
||||||
let (server, uid) = INDEX
|
let (server, uid) = INDEX
|
||||||
|
@ -1,6 +1,3 @@
|
|||||||
use crate::common::encoder::Encoder;
|
|
||||||
use crate::common::{default_settings, GetAllDocumentsOptions, Server, Value};
|
|
||||||
use crate::json;
|
|
||||||
use actix_web::test;
|
use actix_web::test;
|
||||||
use meili_snap::{json_string, snapshot};
|
use meili_snap::{json_string, snapshot};
|
||||||
use meilisearch::Opt;
|
use meilisearch::Opt;
|
||||||
@ -8,6 +5,10 @@ use time::format_description::well_known::Rfc3339;
|
|||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::common::encoder::Encoder;
|
||||||
|
use crate::common::{default_settings, GetAllDocumentsOptions, Server, Value};
|
||||||
|
use crate::json;
|
||||||
|
|
||||||
/// This is the basic usage of our API and every other tests uses the content-type application/json
|
/// This is the basic usage of our API and every other tests uses the content-type application/json
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn add_documents_test_json_content_types() {
|
async fn add_documents_test_json_content_types() {
|
||||||
|
@ -134,14 +134,14 @@ async fn get_all_documents_bad_filter() {
|
|||||||
|
|
||||||
let (response, code) = index.get_all_documents_raw("?filter=doggo").await;
|
let (response, code) = index.get_all_documents_raw("?filter=doggo").await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r#"
|
||||||
{
|
{
|
||||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo",
|
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `doggo`.\n1:6 doggo",
|
||||||
"code": "invalid_document_filter",
|
"code": "invalid_document_filter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
||||||
}
|
}
|
||||||
"###);
|
"#);
|
||||||
|
|
||||||
let (response, code) = index.get_all_documents_raw("?filter=doggo=bernese").await;
|
let (response, code) = index.get_all_documents_raw("?filter=doggo=bernese").await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
@ -523,14 +523,14 @@ async fn delete_document_by_filter() {
|
|||||||
// send bad filter
|
// send bad filter
|
||||||
let (response, code) = index.delete_document_by_filter(json!({ "filter": "hello"})).await;
|
let (response, code) = index.delete_document_by_filter(json!({ "filter": "hello"})).await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(response, @r###"
|
snapshot!(response, @r#"
|
||||||
{
|
{
|
||||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello",
|
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `hello`.\n1:6 hello",
|
||||||
"code": "invalid_document_filter",
|
"code": "invalid_document_filter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
||||||
}
|
}
|
||||||
"###);
|
"#);
|
||||||
|
|
||||||
// send empty filter
|
// send empty filter
|
||||||
let (response, code) = index.delete_document_by_filter(json!({ "filter": ""})).await;
|
let (response, code) = index.delete_document_by_filter(json!({ "filter": ""})).await;
|
||||||
@ -724,14 +724,14 @@ async fn fetch_document_by_filter() {
|
|||||||
|
|
||||||
let (response, code) = index.fetch_documents(json!({ "filter": "cool doggo" })).await;
|
let (response, code) = index.fetch_documents(json!({ "filter": "cool doggo" })).await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(response, @r###"
|
snapshot!(response, @r#"
|
||||||
{
|
{
|
||||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
|
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `cool doggo`.\n1:11 cool doggo",
|
||||||
"code": "invalid_document_filter",
|
"code": "invalid_document_filter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
||||||
}
|
}
|
||||||
"###);
|
"#);
|
||||||
|
|
||||||
let (response, code) = index.fetch_documents(json!({ "filter": "doggo = bernese" })).await;
|
let (response, code) = index.fetch_documents(json!({ "filter": "doggo = bernese" })).await;
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
180
crates/meilisearch/tests/documents/geojson/assets/countries.json
Normal file
180
crates/meilisearch/tests/documents/geojson/assets/countries.json
Normal file
File diff suppressed because one or more lines are too long
547
crates/meilisearch/tests/documents/geojson/assets/lille.geojson
Normal file
547
crates/meilisearch/tests/documents/geojson/assets/lille.geojson
Normal file
@ -0,0 +1,547 @@
|
|||||||
|
{
|
||||||
|
"type": "Polygon",
|
||||||
|
"coordinates": [
|
||||||
|
[
|
||||||
|
[
|
||||||
|
3.11681,
|
||||||
|
50.63646
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.11945,
|
||||||
|
50.63488
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.12134,
|
||||||
|
50.63504
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.12064,
|
||||||
|
50.63127
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.12203,
|
||||||
|
50.62785
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.12389,
|
||||||
|
50.6262
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.12161,
|
||||||
|
50.62358
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.12547,
|
||||||
|
50.62114
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.12447,
|
||||||
|
50.61874
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.12288,
|
||||||
|
50.61988
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.12054,
|
||||||
|
50.61846
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.11846,
|
||||||
|
50.61754
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.11482,
|
||||||
|
50.6207
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.11232,
|
||||||
|
50.6188
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.10936,
|
||||||
|
50.61727
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.10822,
|
||||||
|
50.61765
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.10603,
|
||||||
|
50.61536
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.1041,
|
||||||
|
50.61596
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.10017,
|
||||||
|
50.6186
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.09688,
|
||||||
|
50.61714
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.09575,
|
||||||
|
50.61795
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.0891,
|
||||||
|
50.61532
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.08625,
|
||||||
|
50.61792
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.07948,
|
||||||
|
50.61428
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.07146,
|
||||||
|
50.6066
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.06819,
|
||||||
|
50.60918
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.06502,
|
||||||
|
50.61046
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.06223,
|
||||||
|
50.61223
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.05925,
|
||||||
|
50.60659
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.05463,
|
||||||
|
50.60077
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.04906,
|
||||||
|
50.6008
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.04726,
|
||||||
|
50.6035
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.04328,
|
||||||
|
50.60667
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.04155,
|
||||||
|
50.60417
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.03767,
|
||||||
|
50.60456
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.03528,
|
||||||
|
50.60538
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.03239,
|
||||||
|
50.60725
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.0254,
|
||||||
|
50.6111
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.02387,
|
||||||
|
50.6125
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.0248,
|
||||||
|
50.61344
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.02779,
|
||||||
|
50.61418
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.02414,
|
||||||
|
50.6169
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.02312,
|
||||||
|
50.61975
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.02172,
|
||||||
|
50.62082
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.01953,
|
||||||
|
50.62484
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.01811,
|
||||||
|
50.62529
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.01313,
|
||||||
|
50.62558
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.01385,
|
||||||
|
50.62695
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.00844,
|
||||||
|
50.62717
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.0056,
|
||||||
|
50.6267
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.00229,
|
||||||
|
50.62557
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.00119,
|
||||||
|
50.62723
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.99769,
|
||||||
|
50.62901
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.99391,
|
||||||
|
50.62732
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.98971,
|
||||||
|
50.63036
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.9862,
|
||||||
|
50.63328
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.98178,
|
||||||
|
50.63404
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.97917,
|
||||||
|
50.63499
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.97284,
|
||||||
|
50.63429
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.97174,
|
||||||
|
50.63365
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.97002,
|
||||||
|
50.63366
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.96956,
|
||||||
|
50.63506
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.97046,
|
||||||
|
50.6365
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.96878,
|
||||||
|
50.63833
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.97039,
|
||||||
|
50.6395
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.97275,
|
||||||
|
50.64183
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.97225,
|
||||||
|
50.64381
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.9745,
|
||||||
|
50.64442
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.97474,
|
||||||
|
50.64648
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.97091,
|
||||||
|
50.65108
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.96975,
|
||||||
|
50.65361
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.97061,
|
||||||
|
50.65513
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.96929,
|
||||||
|
50.65739
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.97072,
|
||||||
|
50.6581
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.97973,
|
||||||
|
50.66048
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.98369,
|
||||||
|
50.66123
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.9865,
|
||||||
|
50.65959
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.9896,
|
||||||
|
50.65845
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.9963,
|
||||||
|
50.65666
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2.99903,
|
||||||
|
50.65552
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.00274,
|
||||||
|
50.65235
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.00714,
|
||||||
|
50.64887
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.01088,
|
||||||
|
50.64845
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.01318,
|
||||||
|
50.64541
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.01974,
|
||||||
|
50.63972
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.02317,
|
||||||
|
50.63813
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.02639,
|
||||||
|
50.63613
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.029,
|
||||||
|
50.63521
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.03414,
|
||||||
|
50.6382
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.03676,
|
||||||
|
50.63888
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.03686,
|
||||||
|
50.64147
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.03791,
|
||||||
|
50.64379
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.0409,
|
||||||
|
50.64577
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.04582,
|
||||||
|
50.64807
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.05132,
|
||||||
|
50.64866
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.05055,
|
||||||
|
50.64949
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.05244,
|
||||||
|
50.65055
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.05784,
|
||||||
|
50.64927
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.0596,
|
||||||
|
50.65105
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.06414,
|
||||||
|
50.65041
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.06705,
|
||||||
|
50.64936
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.07023,
|
||||||
|
50.64706
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.07203,
|
||||||
|
50.64355
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.07526,
|
||||||
|
50.64188
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.0758,
|
||||||
|
50.64453
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.07753,
|
||||||
|
50.64381
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.07861,
|
||||||
|
50.64542
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.08299,
|
||||||
|
50.64725
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.08046,
|
||||||
|
50.64912
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.08349,
|
||||||
|
50.65082
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.08354,
|
||||||
|
50.65155
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.08477,
|
||||||
|
50.65312
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.08542,
|
||||||
|
50.65654
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.08753,
|
||||||
|
50.65687
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.09032,
|
||||||
|
50.65602
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.09018,
|
||||||
|
50.65142
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.09278,
|
||||||
|
50.65086
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.09402,
|
||||||
|
50.64982
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.09908,
|
||||||
|
50.65146
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.10316,
|
||||||
|
50.65227
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.09726,
|
||||||
|
50.64723
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.09387,
|
||||||
|
50.64358
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.09357,
|
||||||
|
50.64095
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.09561,
|
||||||
|
50.64133
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.09675,
|
||||||
|
50.64018
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.09454,
|
||||||
|
50.63891
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.09627,
|
||||||
|
50.63693
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.09795,
|
||||||
|
50.63713
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.09919,
|
||||||
|
50.63576
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.10324,
|
||||||
|
50.6351
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.10613,
|
||||||
|
50.63532
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.10649,
|
||||||
|
50.63434
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.1109,
|
||||||
|
50.63525
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.11502,
|
||||||
|
50.63504
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3.11681,
|
||||||
|
50.63646
|
||||||
|
]
|
||||||
|
]
|
||||||
|
]
|
||||||
|
}
|
420
crates/meilisearch/tests/documents/geojson/mod.rs
Normal file
420
crates/meilisearch/tests/documents/geojson/mod.rs
Normal file
@ -0,0 +1,420 @@
|
|||||||
|
use meili_snap::{json_string, snapshot};
|
||||||
|
|
||||||
|
use crate::common::{shared_index_geojson_documents, Server};
|
||||||
|
use crate::json;
|
||||||
|
|
||||||
|
const LILLE: &str = include_str!("assets/lille.geojson");
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn basic_add_settings_and_geojson_documents() {
|
||||||
|
let server = Server::new_shared();
|
||||||
|
let index = server.unique_index();
|
||||||
|
let (task, _status_code) =
|
||||||
|
index.update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
|
||||||
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (response, _) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
|
||||||
|
snapshot!(response,
|
||||||
|
@r#"
|
||||||
|
{
|
||||||
|
"hits": [],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 0
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
|
||||||
|
let lille: serde_json::Value = serde_json::from_str(LILLE).unwrap();
|
||||||
|
let documents = json!([
|
||||||
|
{
|
||||||
|
"id": "missing",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "point",
|
||||||
|
"_geojson": { "type": "Point", "coordinates": [1, 1] },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "lille",
|
||||||
|
"_geojson": lille,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
|
||||||
|
let (task, _status_code) = index.add_documents(documents, None).await;
|
||||||
|
let response = server.wait_task(task.uid()).await.succeeded();
|
||||||
|
snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
||||||
|
@r#"
|
||||||
|
{
|
||||||
|
"uid": "[uid]",
|
||||||
|
"batchUid": "[batch_uid]",
|
||||||
|
"indexUid": "[uuid]",
|
||||||
|
"status": "succeeded",
|
||||||
|
"type": "documentAdditionOrUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"receivedDocuments": 3,
|
||||||
|
"indexedDocuments": 3
|
||||||
|
},
|
||||||
|
"error": null,
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]"
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
|
||||||
|
let (response, code) = index.get_all_documents_raw("?ids=missing,point").await;
|
||||||
|
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(response,
|
||||||
|
@r#"
|
||||||
|
{
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"id": "missing"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "point",
|
||||||
|
"_geojson": {
|
||||||
|
"type": "Point",
|
||||||
|
"coordinates": [
|
||||||
|
1,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"offset": 0,
|
||||||
|
"limit": 20,
|
||||||
|
"total": 2
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
|
||||||
|
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
|
||||||
|
snapshot!(response,
|
||||||
|
@r#"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"id": "point",
|
||||||
|
"_geojson": {
|
||||||
|
"type": "Point",
|
||||||
|
"coordinates": [
|
||||||
|
1,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 1
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn basic_add_geojson_documents_and_settings() {
|
||||||
|
let server = Server::new_shared();
|
||||||
|
let index = server.unique_index();
|
||||||
|
|
||||||
|
let lille: serde_json::Value = serde_json::from_str(LILLE).unwrap();
|
||||||
|
let documents = json!([
|
||||||
|
{
|
||||||
|
"id": "missing",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "point",
|
||||||
|
"_geojson": { "type": "Point", "coordinates": [1, 1] },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "lille",
|
||||||
|
"_geojson": lille,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
|
||||||
|
let (task, _status_code) = index.add_documents(documents, None).await;
|
||||||
|
let response = server.wait_task(task.uid()).await.succeeded();
|
||||||
|
snapshot!(response,
|
||||||
|
@r#"
|
||||||
|
{
|
||||||
|
"uid": "[uid]",
|
||||||
|
"batchUid": "[batch_uid]",
|
||||||
|
"indexUid": "[uuid]",
|
||||||
|
"status": "succeeded",
|
||||||
|
"type": "documentAdditionOrUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"receivedDocuments": 3,
|
||||||
|
"indexedDocuments": 3
|
||||||
|
},
|
||||||
|
"error": null,
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]"
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
|
||||||
|
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
|
||||||
|
snapshot!(response,
|
||||||
|
@r#"
|
||||||
|
{
|
||||||
|
"message": "Index `[uuid]`: Attribute `_geojson` is not filterable. This index does not have configured filterable attributes.\n14:15 _geoPolygon([0,0],[0,2],[2,2],[2,0])",
|
||||||
|
"code": "invalid_search_filter",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
|
||||||
|
let (task, _status_code) =
|
||||||
|
index.update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
|
||||||
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
|
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
|
||||||
|
snapshot!(response,
|
||||||
|
@r#"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"id": "point",
|
||||||
|
"_geojson": {
|
||||||
|
"type": "Point",
|
||||||
|
"coordinates": [
|
||||||
|
1,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 1
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn add_and_remove_geojson() {
|
||||||
|
let server = Server::new_shared();
|
||||||
|
let index = server.unique_index();
|
||||||
|
index.update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
|
||||||
|
|
||||||
|
let documents = json!([
|
||||||
|
{
|
||||||
|
"id": "missing",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"_geojson": { "type": "Point", "coordinates": [1, 1] },
|
||||||
|
}
|
||||||
|
]);
|
||||||
|
let (task, _status_code) = index.add_documents(documents, None).await;
|
||||||
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
|
let (response, _code) =
|
||||||
|
index.search_get("?filter=_geoPolygon([0,0],[0,0.9],[0.9,0.9],[0.9,0])").await;
|
||||||
|
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
|
||||||
|
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
|
||||||
|
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
|
||||||
|
|
||||||
|
let (task, _) = index.delete_document(0).await;
|
||||||
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
|
let (response, _code) =
|
||||||
|
index.search_get("?filter=_geoPolygon([0,0],[0,0.9],[0.9,0.9],[0.9,0])").await;
|
||||||
|
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
|
||||||
|
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
|
||||||
|
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
|
||||||
|
|
||||||
|
// add it back
|
||||||
|
let documents = json!([
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"_geojson": { "type": "Point", "coordinates": [1, 1] },
|
||||||
|
}
|
||||||
|
]);
|
||||||
|
let (task, _status_code) = index.add_documents(documents, None).await;
|
||||||
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
|
let (response, _code) =
|
||||||
|
index.search_get("?filter=_geoPolygon([0,0],[0,0.9],[0.9,0.9],[0.9,0])").await;
|
||||||
|
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
|
||||||
|
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
|
||||||
|
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn partial_update_geojson() {
|
||||||
|
let server = Server::new_shared();
|
||||||
|
let index = server.unique_index();
|
||||||
|
let (task, _) = index.update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
|
||||||
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let documents = json!([
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"_geojson": { "type": "Point", "coordinates": [1, 1] },
|
||||||
|
}
|
||||||
|
]);
|
||||||
|
let (task, _status_code) = index.add_documents(documents, None).await;
|
||||||
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
|
let (response, _code) =
|
||||||
|
index.search_get("?filter=_geoPolygon([0,0],[0,0.9],[0.9,0.9],[0.9,0])").await;
|
||||||
|
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
|
||||||
|
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
|
||||||
|
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
|
||||||
|
|
||||||
|
let documents = json!([
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"_geojson": { "type": "Point", "coordinates": [0.5, 0.5] },
|
||||||
|
}
|
||||||
|
]);
|
||||||
|
let (task, _status_code) = index.update_documents(documents, None).await;
|
||||||
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
|
let (response, _code) =
|
||||||
|
index.search_get("?filter=_geoPolygon([0,0],[0,0.9],[0.9,0.9],[0.9,0])").await;
|
||||||
|
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
|
||||||
|
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
|
||||||
|
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
|
||||||
|
let (response, _code) =
|
||||||
|
index.search_get("?filter=_geoPolygon([0.9,0.9],[0.9,2],[2,2],[2,0.9])").await;
|
||||||
|
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn geo_bounding_box() {
|
||||||
|
let index = shared_index_geojson_documents().await;
|
||||||
|
|
||||||
|
// The bounding box is a polygon over middle Europe
|
||||||
|
let (response, code) =
|
||||||
|
index.search_get("?filter=_geoBoundingBox([50.53987503447863,21.43443989912143],[43.76393151539099,0.54979129195425])&attributesToRetrieve=name").await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(response, @r#"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"name": "Austria"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Belgium"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Bosnia_and_Herzegovina"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Switzerland"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Czech_Republic"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Germany"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "France"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Croatia"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Hungary"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Italy"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Luxembourg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Netherlands"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Poland"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Romania"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Republic_of_Serbia"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Slovakia"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Slovenia"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 17
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
|
||||||
|
// Between Russia and Alaska
|
||||||
|
let (response, code) = index
|
||||||
|
.search_get("?filter=_geoBoundingBox([70,-148],[63,152])&attributesToRetrieve=name")
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(response, @r#"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"name": "Canada"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Russia"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "United_States_of_America"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 3
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn geo_radius() {
|
||||||
|
let index = shared_index_geojson_documents().await;
|
||||||
|
|
||||||
|
// 200km around Luxembourg
|
||||||
|
let (response, code) = index
|
||||||
|
.search_get("?filter=_geoRadius(49.4369862,6.5576591,200000)&attributesToRetrieve=name")
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(response, @r#"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"name": "Belgium"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Germany"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "France"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Luxembourg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Netherlands"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 5
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
@ -1,5 +1,6 @@
|
|||||||
mod add_documents;
|
mod add_documents;
|
||||||
mod delete_documents;
|
mod delete_documents;
|
||||||
mod errors;
|
mod errors;
|
||||||
|
mod geojson;
|
||||||
mod get_documents;
|
mod get_documents;
|
||||||
mod update_documents;
|
mod update_documents;
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
use meili_snap::snapshot;
|
use meili_snap::snapshot;
|
||||||
|
|
||||||
use time::format_description::well_known::Rfc3339;
|
use time::format_description::well_known::Rfc3339;
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
|
@ -642,14 +642,14 @@ async fn filter_invalid_syntax_object() {
|
|||||||
&json!({"filterableAttributes": ["title"]}),
|
&json!({"filterableAttributes": ["title"]}),
|
||||||
&json!({"filter": "title & Glass"}),
|
&json!({"filter": "title & Glass"}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
snapshot!(response, @r###"
|
snapshot!(response, @r#"
|
||||||
{
|
{
|
||||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `title & Glass`.\n1:14 title & Glass",
|
||||||
"code": "invalid_search_filter",
|
"code": "invalid_search_filter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||||
}
|
}
|
||||||
"###);
|
"#);
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@ -663,14 +663,14 @@ async fn filter_invalid_syntax_array() {
|
|||||||
&json!({"filterableAttributes": ["title"]}),
|
&json!({"filterableAttributes": ["title"]}),
|
||||||
&json!({"filter": ["title & Glass"]}),
|
&json!({"filter": ["title & Glass"]}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
snapshot!(response, @r###"
|
snapshot!(response, @r#"
|
||||||
{
|
{
|
||||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `title & Glass`.\n1:14 title & Glass",
|
||||||
"code": "invalid_search_filter",
|
"code": "invalid_search_filter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||||
}
|
}
|
||||||
"###);
|
"#);
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
@ -2,8 +2,7 @@ use std::sync::Arc;
|
|||||||
|
|
||||||
use actix_http::StatusCode;
|
use actix_http::StatusCode;
|
||||||
use meili_snap::{json_string, snapshot};
|
use meili_snap::{json_string, snapshot};
|
||||||
use wiremock::matchers::method;
|
use wiremock::matchers::{method, path, AnyMatcher};
|
||||||
use wiremock::matchers::{path, AnyMatcher};
|
|
||||||
use wiremock::{Mock, MockServer, Request, ResponseTemplate};
|
use wiremock::{Mock, MockServer, Request, ResponseTemplate};
|
||||||
|
|
||||||
use crate::common::{Server, Value, SCORE_DOCUMENTS};
|
use crate::common::{Server, Value, SCORE_DOCUMENTS};
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
|
use meili_snap::{json_string, snapshot};
|
||||||
|
|
||||||
use super::shared_index_with_documents;
|
use super::shared_index_with_documents;
|
||||||
use crate::common::Server;
|
use crate::common::Server;
|
||||||
use crate::json;
|
use crate::json;
|
||||||
use meili_snap::{json_string, snapshot};
|
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn default_search_should_return_estimated_total_hit() {
|
async fn default_search_should_return_estimated_total_hit() {
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
|
use meili_snap::{json_string, snapshot};
|
||||||
|
|
||||||
use crate::common::Server;
|
use crate::common::Server;
|
||||||
use crate::json;
|
use crate::json;
|
||||||
use meili_snap::{json_string, snapshot};
|
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn set_reset_chat_issue_5772() {
|
async fn set_reset_chat_issue_5772() {
|
||||||
|
@ -339,14 +339,14 @@ async fn filter_invalid_syntax_object() {
|
|||||||
|
|
||||||
index
|
index
|
||||||
.similar(json!({"id": 287947, "filter": "title & Glass", "embedder": "manual"}), |response, code| {
|
.similar(json!({"id": 287947, "filter": "title & Glass", "embedder": "manual"}), |response, code| {
|
||||||
snapshot!(response, @r###"
|
snapshot!(response, @r#"
|
||||||
{
|
{
|
||||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `title & Glass`.\n1:14 title & Glass",
|
||||||
"code": "invalid_similar_filter",
|
"code": "invalid_similar_filter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
}
|
}
|
||||||
"###);
|
"#);
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
@ -377,14 +377,14 @@ async fn filter_invalid_syntax_array() {
|
|||||||
|
|
||||||
index
|
index
|
||||||
.similar(json!({"id": 287947, "filter": ["title & Glass"], "embedder": "manual"}), |response, code| {
|
.similar(json!({"id": 287947, "filter": ["title & Glass"], "embedder": "manual"}), |response, code| {
|
||||||
snapshot!(response, @r###"
|
snapshot!(response, @r#"
|
||||||
{
|
{
|
||||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `title & Glass`.\n1:14 title & Glass",
|
||||||
"code": "invalid_similar_filter",
|
"code": "invalid_similar_filter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
}
|
}
|
||||||
"###);
|
"#);
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
#![allow(clippy::result_large_err)]
|
||||||
|
|
||||||
use std::fs::{read_dir, read_to_string, remove_file, File};
|
use std::fs::{read_dir, read_to_string, remove_file, File};
|
||||||
use std::io::{BufWriter, Write as _};
|
use std::io::{BufWriter, Write as _};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
@ -19,6 +19,7 @@ bstr = "1.12.0"
|
|||||||
bytemuck = { version = "1.23.1", features = ["extern_crate_alloc"] }
|
bytemuck = { version = "1.23.1", features = ["extern_crate_alloc"] }
|
||||||
byteorder = "1.5.0"
|
byteorder = "1.5.0"
|
||||||
charabia = { version = "0.9.7", default-features = false }
|
charabia = { version = "0.9.7", default-features = false }
|
||||||
|
cellulite = "0.3.0"
|
||||||
concat-arrays = "0.1.2"
|
concat-arrays = "0.1.2"
|
||||||
convert_case = "0.8.0"
|
convert_case = "0.8.0"
|
||||||
crossbeam-channel = "0.5.15"
|
crossbeam-channel = "0.5.15"
|
||||||
@ -27,6 +28,7 @@ either = { version = "1.15.0", features = ["serde"] }
|
|||||||
flatten-serde-json = { path = "../flatten-serde-json" }
|
flatten-serde-json = { path = "../flatten-serde-json" }
|
||||||
fst = "0.4.7"
|
fst = "0.4.7"
|
||||||
fxhash = "0.2.1"
|
fxhash = "0.2.1"
|
||||||
|
geojson = "0.24.2"
|
||||||
geoutils = "0.5.1"
|
geoutils = "0.5.1"
|
||||||
grenad = { version = "0.5.0", default-features = false, features = [
|
grenad = { version = "0.5.0", default-features = false, features = [
|
||||||
"rayon",
|
"rayon",
|
||||||
@ -96,7 +98,7 @@ url = "2.5.4"
|
|||||||
hashbrown = "0.15.4"
|
hashbrown = "0.15.4"
|
||||||
bumpalo = "3.18.1"
|
bumpalo = "3.18.1"
|
||||||
bumparaw-collections = "0.1.4"
|
bumparaw-collections = "0.1.4"
|
||||||
steppe = { version = "0.4.0", default-features = false }
|
steppe = { version = "0.4", default-features = false }
|
||||||
thread_local = "1.1.9"
|
thread_local = "1.1.9"
|
||||||
allocator-api2 = "0.3.0"
|
allocator-api2 = "0.3.0"
|
||||||
rustc-hash = "2.1.1"
|
rustc-hash = "2.1.1"
|
||||||
@ -116,6 +118,8 @@ twox-hash = { version = "2.1.1", default-features = false, features = [
|
|||||||
"xxhash3_64",
|
"xxhash3_64",
|
||||||
"xxhash64",
|
"xxhash64",
|
||||||
] }
|
] }
|
||||||
|
geo-types = "0.7.16"
|
||||||
|
zerometry = "0.1.0"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
mimalloc = { version = "0.1.47", default-features = false }
|
mimalloc = { version = "0.1.47", default-features = false }
|
||||||
|
@ -11,3 +11,4 @@ const fn parse_u32(s: &str) -> u32 {
|
|||||||
|
|
||||||
pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors";
|
pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors";
|
||||||
pub const RESERVED_GEO_FIELD_NAME: &str = "_geo";
|
pub const RESERVED_GEO_FIELD_NAME: &str = "_geo";
|
||||||
|
pub const RESERVED_GEOJSON_FIELD_NAME: &str = "_geojson";
|
||||||
|
@ -48,6 +48,7 @@ pub enum PrimaryKey<'a> {
|
|||||||
Nested { name: &'a str },
|
Nested { name: &'a str },
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::large_enum_variant)]
|
||||||
pub enum DocumentIdExtractionError {
|
pub enum DocumentIdExtractionError {
|
||||||
InvalidDocumentId(UserError),
|
InvalidDocumentId(UserError),
|
||||||
MissingDocumentId,
|
MissingDocumentId,
|
||||||
|
@ -10,17 +10,26 @@ use rhai::EvalAltResult;
|
|||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
use crate::constants::{RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME};
|
||||||
use crate::documents::{self, DocumentsBatchCursorError};
|
use crate::documents::{self, DocumentsBatchCursorError};
|
||||||
use crate::thread_pool_no_abort::PanicCatched;
|
use crate::thread_pool_no_abort::PanicCatched;
|
||||||
use crate::vector::settings::EmbeddingSettings;
|
use crate::vector::settings::EmbeddingSettings;
|
||||||
use crate::{CriterionError, DocumentId, FieldId, Object, SortError};
|
use crate::{CriterionError, DocumentId, FieldId, Object, SortError};
|
||||||
|
|
||||||
pub fn is_reserved_keyword(keyword: &str) -> bool {
|
pub fn is_reserved_keyword(keyword: &str) -> bool {
|
||||||
[RESERVED_GEO_FIELD_NAME, "_geoDistance", "_geoPoint", "_geoRadius", "_geoBoundingBox"]
|
[
|
||||||
.contains(&keyword)
|
RESERVED_GEO_FIELD_NAME,
|
||||||
|
RESERVED_GEOJSON_FIELD_NAME,
|
||||||
|
"_geoDistance",
|
||||||
|
"_geoPoint",
|
||||||
|
"_geoRadius",
|
||||||
|
"_geoBoundingBox",
|
||||||
|
"_geoPolygon",
|
||||||
|
]
|
||||||
|
.contains(&keyword)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::large_enum_variant)]
|
||||||
#[derive(Error, Debug)]
|
#[derive(Error, Debug)]
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
#[error("internal: {0}.")]
|
#[error("internal: {0}.")]
|
||||||
@ -80,6 +89,8 @@ pub enum InternalError {
|
|||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
HannoyError(#[from] hannoy::Error),
|
HannoyError(#[from] hannoy::Error),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
|
CelluliteError(#[from] cellulite::Error),
|
||||||
|
#[error(transparent)]
|
||||||
VectorEmbeddingError(#[from] crate::vector::Error),
|
VectorEmbeddingError(#[from] crate::vector::Error),
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -99,6 +110,12 @@ pub enum SerializationError {
|
|||||||
InvalidNumberSerialization,
|
InvalidNumberSerialization,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<cellulite::Error> for Error {
|
||||||
|
fn from(error: cellulite::Error) -> Self {
|
||||||
|
Self::UserError(UserError::CelluliteError(error))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Error, Debug)]
|
#[derive(Error, Debug)]
|
||||||
pub enum FieldIdMapMissingEntry {
|
pub enum FieldIdMapMissingEntry {
|
||||||
#[error("unknown field id {field_id} coming from the {process} process")]
|
#[error("unknown field id {field_id} coming from the {process} process")]
|
||||||
@ -107,8 +124,13 @@ pub enum FieldIdMapMissingEntry {
|
|||||||
FieldName { field_name: String, process: &'static str },
|
FieldName { field_name: String, process: &'static str },
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::large_enum_variant)]
|
||||||
#[derive(Error, Debug)]
|
#[derive(Error, Debug)]
|
||||||
pub enum UserError {
|
pub enum UserError {
|
||||||
|
#[error(transparent)]
|
||||||
|
CelluliteError(#[from] cellulite::Error),
|
||||||
|
#[error("Malformed geojson: {0}")]
|
||||||
|
MalformedGeojson(serde_json::Error),
|
||||||
#[error("A document cannot contain more than 65,535 fields.")]
|
#[error("A document cannot contain more than 65,535 fields.")]
|
||||||
AttributeLimitReached,
|
AttributeLimitReached,
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
@ -153,6 +175,8 @@ and can not be more than 511 bytes.", .document_id.to_string()
|
|||||||
},
|
},
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
InvalidGeoField(#[from] Box<GeoError>),
|
InvalidGeoField(#[from] Box<GeoError>),
|
||||||
|
#[error(transparent)]
|
||||||
|
GeoJsonError(#[from] geojson::Error),
|
||||||
#[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)]
|
#[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)]
|
||||||
InvalidVectorDimensions { expected: usize, found: usize },
|
InvalidVectorDimensions { expected: usize, found: usize },
|
||||||
#[error("Invalid vector dimensions in document with id `{document_id}` in `._vectors.{embedder_name}`.\n - note: embedding #{embedding_index} has dimensions {found}\n - note: embedder `{embedder_name}` requires {expected}")]
|
#[error("Invalid vector dimensions in document with id `{document_id}` in `._vectors.{embedder_name}`.\n - note: embedding #{embedding_index} has dimensions {found}\n - note: embedder `{embedder_name}` requires {expected}")]
|
||||||
|
@ -6,7 +6,9 @@ use heed::RoTxn;
|
|||||||
|
|
||||||
use super::FieldsIdsMap;
|
use super::FieldsIdsMap;
|
||||||
use crate::attribute_patterns::{match_field_legacy, PatternMatch};
|
use crate::attribute_patterns::{match_field_legacy, PatternMatch};
|
||||||
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
|
use crate::constants::{
|
||||||
|
RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME,
|
||||||
|
};
|
||||||
use crate::{
|
use crate::{
|
||||||
is_faceted_by, FieldId, FilterableAttributesFeatures, FilterableAttributesRule, Index,
|
is_faceted_by, FieldId, FilterableAttributesFeatures, FilterableAttributesRule, Index,
|
||||||
LocalizedAttributesRule, Result, Weight,
|
LocalizedAttributesRule, Result, Weight,
|
||||||
@ -24,6 +26,8 @@ pub struct Metadata {
|
|||||||
pub asc_desc: bool,
|
pub asc_desc: bool,
|
||||||
/// The field is a geo field (`_geo`, `_geo.lat`, `_geo.lng`).
|
/// The field is a geo field (`_geo`, `_geo.lat`, `_geo.lng`).
|
||||||
pub geo: bool,
|
pub geo: bool,
|
||||||
|
/// The field is a geo json field (`_geojson`).
|
||||||
|
pub geo_json: bool,
|
||||||
/// The id of the localized attributes rule if the field is localized.
|
/// The id of the localized attributes rule if the field is localized.
|
||||||
pub localized_attributes_rule_id: Option<NonZeroU16>,
|
pub localized_attributes_rule_id: Option<NonZeroU16>,
|
||||||
/// The id of the filterable attributes rule if the field is filterable.
|
/// The id of the filterable attributes rule if the field is filterable.
|
||||||
@ -269,6 +273,7 @@ impl MetadataBuilder {
|
|||||||
distinct: false,
|
distinct: false,
|
||||||
asc_desc: false,
|
asc_desc: false,
|
||||||
geo: false,
|
geo: false,
|
||||||
|
geo_json: false,
|
||||||
localized_attributes_rule_id: None,
|
localized_attributes_rule_id: None,
|
||||||
filterable_attributes_rule_id: None,
|
filterable_attributes_rule_id: None,
|
||||||
};
|
};
|
||||||
@ -295,6 +300,20 @@ impl MetadataBuilder {
|
|||||||
distinct: false,
|
distinct: false,
|
||||||
asc_desc: false,
|
asc_desc: false,
|
||||||
geo: true,
|
geo: true,
|
||||||
|
geo_json: false,
|
||||||
|
localized_attributes_rule_id: None,
|
||||||
|
filterable_attributes_rule_id,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if match_field_legacy(RESERVED_GEOJSON_FIELD_NAME, field) == PatternMatch::Match {
|
||||||
|
debug_assert!(!sortable, "geojson fields should not be sortable");
|
||||||
|
return Metadata {
|
||||||
|
searchable: None,
|
||||||
|
sortable,
|
||||||
|
distinct: false,
|
||||||
|
asc_desc: false,
|
||||||
|
geo: false,
|
||||||
|
geo_json: true,
|
||||||
localized_attributes_rule_id: None,
|
localized_attributes_rule_id: None,
|
||||||
filterable_attributes_rule_id,
|
filterable_attributes_rule_id,
|
||||||
};
|
};
|
||||||
@ -328,6 +347,7 @@ impl MetadataBuilder {
|
|||||||
distinct,
|
distinct,
|
||||||
asc_desc,
|
asc_desc,
|
||||||
geo: false,
|
geo: false,
|
||||||
|
geo_json: false,
|
||||||
localized_attributes_rule_id,
|
localized_attributes_rule_id,
|
||||||
filterable_attributes_rule_id,
|
filterable_attributes_rule_id,
|
||||||
}
|
}
|
||||||
|
@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize};
|
|||||||
use utoipa::ToSchema;
|
use utoipa::ToSchema;
|
||||||
|
|
||||||
use crate::attribute_patterns::{match_distinct_field, match_field_legacy, PatternMatch};
|
use crate::attribute_patterns::{match_distinct_field, match_field_legacy, PatternMatch};
|
||||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
use crate::constants::{RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME};
|
||||||
use crate::AttributePatterns;
|
use crate::AttributePatterns;
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, ToSchema)]
|
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, ToSchema)]
|
||||||
@ -34,6 +34,10 @@ impl FilterableAttributesRule {
|
|||||||
matches!(self, FilterableAttributesRule::Field(field_name) if field_name == RESERVED_GEO_FIELD_NAME)
|
matches!(self, FilterableAttributesRule::Field(field_name) if field_name == RESERVED_GEO_FIELD_NAME)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn has_geojson(&self) -> bool {
|
||||||
|
matches!(self, FilterableAttributesRule::Field(field_name) if field_name == RESERVED_GEOJSON_FIELD_NAME)
|
||||||
|
}
|
||||||
|
|
||||||
/// Get the features of the rule.
|
/// Get the features of the rule.
|
||||||
pub fn features(&self) -> FilterableAttributesFeatures {
|
pub fn features(&self) -> FilterableAttributesFeatures {
|
||||||
match self {
|
match self {
|
||||||
|
@ -5,6 +5,7 @@ use std::fmt;
|
|||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
|
use cellulite::Cellulite;
|
||||||
use deserr::Deserr;
|
use deserr::Deserr;
|
||||||
use heed::types::*;
|
use heed::types::*;
|
||||||
use heed::{CompactionOption, Database, DatabaseStat, RoTxn, RwTxn, Unspecified, WithoutTls};
|
use heed::{CompactionOption, Database, DatabaseStat, RoTxn, RwTxn, Unspecified, WithoutTls};
|
||||||
@ -115,9 +116,10 @@ pub mod db_name {
|
|||||||
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
|
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
|
||||||
pub const VECTOR_EMBEDDER_CATEGORY_ID: &str = "vector-embedder-category-id";
|
pub const VECTOR_EMBEDDER_CATEGORY_ID: &str = "vector-embedder-category-id";
|
||||||
pub const VECTOR_STORE: &str = "vector-arroy";
|
pub const VECTOR_STORE: &str = "vector-arroy";
|
||||||
|
pub const CELLULITE: &str = "cellulite";
|
||||||
pub const DOCUMENTS: &str = "documents";
|
pub const DOCUMENTS: &str = "documents";
|
||||||
}
|
}
|
||||||
const NUMBER_OF_DBS: u32 = 25;
|
const NUMBER_OF_DBS: u32 = 25 + Cellulite::nb_dbs();
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Index {
|
pub struct Index {
|
||||||
@ -183,6 +185,9 @@ pub struct Index {
|
|||||||
/// Vector store based on hannoy™.
|
/// Vector store based on hannoy™.
|
||||||
pub vector_store: hannoy::Database<Unspecified>,
|
pub vector_store: hannoy::Database<Unspecified>,
|
||||||
|
|
||||||
|
/// Geo store based on cellulite™.
|
||||||
|
pub cellulite: Cellulite,
|
||||||
|
|
||||||
/// Maps the document id to the document as an obkv store.
|
/// Maps the document id to the document as an obkv store.
|
||||||
pub(crate) documents: Database<BEU32, ObkvCodec>,
|
pub(crate) documents: Database<BEU32, ObkvCodec>,
|
||||||
}
|
}
|
||||||
@ -239,6 +244,7 @@ impl Index {
|
|||||||
let embedder_category_id =
|
let embedder_category_id =
|
||||||
env.create_database(&mut wtxn, Some(VECTOR_EMBEDDER_CATEGORY_ID))?;
|
env.create_database(&mut wtxn, Some(VECTOR_EMBEDDER_CATEGORY_ID))?;
|
||||||
let vector_store = env.create_database(&mut wtxn, Some(VECTOR_STORE))?;
|
let vector_store = env.create_database(&mut wtxn, Some(VECTOR_STORE))?;
|
||||||
|
let cellulite = cellulite::Cellulite::create_from_env(&env, &mut wtxn, CELLULITE)?;
|
||||||
|
|
||||||
let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?;
|
let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?;
|
||||||
|
|
||||||
@ -267,6 +273,7 @@ impl Index {
|
|||||||
field_id_docid_facet_strings,
|
field_id_docid_facet_strings,
|
||||||
vector_store,
|
vector_store,
|
||||||
embedder_category_id,
|
embedder_category_id,
|
||||||
|
cellulite,
|
||||||
documents,
|
documents,
|
||||||
};
|
};
|
||||||
if this.get_version(&wtxn)?.is_none() && creation {
|
if this.get_version(&wtxn)?.is_none() && creation {
|
||||||
@ -1052,6 +1059,13 @@ impl Index {
|
|||||||
Ok(geo_filter)
|
Ok(geo_filter)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if the geo sorting feature is enabled.
|
||||||
|
pub fn is_geojson_filtering_enabled(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
|
||||||
|
let geojson_filter =
|
||||||
|
self.filterable_attributes_rules(rtxn)?.iter().any(|field| field.has_geojson());
|
||||||
|
Ok(geojson_filter)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn asc_desc_fields(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<String>> {
|
pub fn asc_desc_fields(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<String>> {
|
||||||
let asc_desc_fields = self
|
let asc_desc_fields = self
|
||||||
.criteria(rtxn)?
|
.criteria(rtxn)?
|
||||||
@ -1882,6 +1896,7 @@ impl Index {
|
|||||||
field_id_docid_facet_strings,
|
field_id_docid_facet_strings,
|
||||||
vector_store,
|
vector_store,
|
||||||
embedder_category_id,
|
embedder_category_id,
|
||||||
|
cellulite,
|
||||||
documents,
|
documents,
|
||||||
} = self;
|
} = self;
|
||||||
|
|
||||||
@ -1955,6 +1970,17 @@ impl Index {
|
|||||||
sizes.insert("embedder_category_id", embedder_category_id.stat(rtxn).map(compute_size)?);
|
sizes.insert("embedder_category_id", embedder_category_id.stat(rtxn).map(compute_size)?);
|
||||||
sizes.insert("documents", documents.stat(rtxn).map(compute_size)?);
|
sizes.insert("documents", documents.stat(rtxn).map(compute_size)?);
|
||||||
|
|
||||||
|
// Cellulite
|
||||||
|
const _CELLULITE_DB_CHECK: () = {
|
||||||
|
if Cellulite::nb_dbs() != 4 {
|
||||||
|
panic!("Cellulite database count has changed, please update the code accordingly.")
|
||||||
|
}
|
||||||
|
};
|
||||||
|
sizes.insert("cellulite_item", cellulite.item_db_stats(rtxn).map(compute_size)?);
|
||||||
|
sizes.insert("cellulite_cell", cellulite.cell_db_stats(rtxn).map(compute_size)?);
|
||||||
|
sizes.insert("cellulite_update", cellulite.update_db_stats(rtxn).map(compute_size)?);
|
||||||
|
sizes.insert("cellulite_metadata", cellulite.metadata_db_stats(rtxn).map(compute_size)?);
|
||||||
|
|
||||||
Ok(sizes)
|
Ok(sizes)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -54,7 +54,7 @@ pub use search::new::{
|
|||||||
};
|
};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
|
pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
|
||||||
pub use {arroy, charabia as tokenizer, hannoy, heed, rhai};
|
pub use {arroy, cellulite, charabia as tokenizer, hannoy, heed, rhai};
|
||||||
|
|
||||||
pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
|
pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
|
||||||
pub use self::attribute_patterns::{AttributePatterns, PatternMatch};
|
pub use self::attribute_patterns::{AttributePatterns, PatternMatch};
|
||||||
@ -87,7 +87,7 @@ pub use self::search::{
|
|||||||
};
|
};
|
||||||
pub use self::update::ChannelCongestion;
|
pub use self::update::ChannelCongestion;
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, error::Error>;
|
pub type Result<T, E = error::Error> = std::result::Result<T, E>;
|
||||||
|
|
||||||
pub type Attribute = u32;
|
pub type Attribute = u32;
|
||||||
pub type BEU16 = heed::types::U16<heed::byteorder::BE>;
|
pub type BEU16 = heed::types::U16<heed::byteorder::BE>;
|
||||||
|
@ -278,30 +278,6 @@ impl<U: Send + Sync + 'static> Step for VariableNameStep<U> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Integration with steppe
|
|
||||||
|
|
||||||
impl steppe::Progress for Progress {
|
|
||||||
fn update(&self, sub_progress: impl steppe::Step) {
|
|
||||||
self.update_progress(Compat(sub_progress));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct Compat<T: steppe::Step>(T);
|
|
||||||
|
|
||||||
impl<T: steppe::Step> Step for Compat<T> {
|
|
||||||
fn name(&self) -> Cow<'static, str> {
|
|
||||||
self.0.name()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn current(&self) -> u32 {
|
|
||||||
self.0.current().try_into().unwrap_or(u32::MAX)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn total(&self) -> u32 {
|
|
||||||
self.0.total().try_into().unwrap_or(u32::MAX)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Step for arroy::MainStep {
|
impl Step for arroy::MainStep {
|
||||||
fn name(&self) -> Cow<'static, str> {
|
fn name(&self) -> Cow<'static, str> {
|
||||||
match self {
|
match self {
|
||||||
@ -343,3 +319,27 @@ impl Step for arroy::SubStep {
|
|||||||
self.max
|
self.max
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Integration with steppe
|
||||||
|
|
||||||
|
impl steppe::Progress for Progress {
|
||||||
|
fn update(&self, sub_progress: impl steppe::Step) {
|
||||||
|
self.update_progress(Compat(sub_progress));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Compat<T: steppe::Step>(T);
|
||||||
|
|
||||||
|
impl<T: steppe::Step> Step for Compat<T> {
|
||||||
|
fn name(&self) -> Cow<'static, str> {
|
||||||
|
self.0.name()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn current(&self) -> u32 {
|
||||||
|
self.0.current().try_into().unwrap_or(u32::MAX)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn total(&self) -> u32 {
|
||||||
|
self.0.total().try_into().unwrap_or(u32::MAX)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -12,7 +12,9 @@ use roaring::{MultiOps, RoaringBitmap};
|
|||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
use super::facet_range_search;
|
use super::facet_range_search;
|
||||||
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
|
use crate::constants::{
|
||||||
|
RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME,
|
||||||
|
};
|
||||||
use crate::error::{Error, UserError};
|
use crate::error::{Error, UserError};
|
||||||
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
|
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
|
||||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||||
@ -36,6 +38,7 @@ pub struct Filter<'a> {
|
|||||||
pub enum BadGeoError {
|
pub enum BadGeoError {
|
||||||
Lat(f64),
|
Lat(f64),
|
||||||
Lng(f64),
|
Lng(f64),
|
||||||
|
InvalidResolution(usize),
|
||||||
BoundingBoxTopIsBelowBottom(f64, f64),
|
BoundingBoxTopIsBelowBottom(f64, f64),
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -47,16 +50,23 @@ impl Display for BadGeoError {
|
|||||||
Self::BoundingBoxTopIsBelowBottom(top, bottom) => {
|
Self::BoundingBoxTopIsBelowBottom(top, bottom) => {
|
||||||
write!(f, "The top latitude `{top}` is below the bottom latitude `{bottom}`.")
|
write!(f, "The top latitude `{top}` is below the bottom latitude `{bottom}`.")
|
||||||
}
|
}
|
||||||
|
Self::InvalidResolution(resolution) => write!(
|
||||||
|
f,
|
||||||
|
"Invalid resolution `{resolution}`. Resolution must be between 3 and 1000."
|
||||||
|
),
|
||||||
Self::Lat(lat) => write!(
|
Self::Lat(lat) => write!(
|
||||||
f,
|
f,
|
||||||
"Bad latitude `{}`. Latitude must be contained between -90 and 90 degrees. ",
|
"Bad latitude `{}`. Latitude must be contained between -90 and 90 degrees.",
|
||||||
lat
|
lat
|
||||||
),
|
),
|
||||||
Self::Lng(lng) => write!(
|
Self::Lng(lng) => {
|
||||||
f,
|
let normalized = (lng + 180.0).rem_euclid(360.0) - 180.0;
|
||||||
"Bad longitude `{}`. Longitude must be contained between -180 and 180 degrees. ",
|
write!(
|
||||||
lng
|
f,
|
||||||
),
|
"Bad longitude `{}`. Longitude must be contained between -180 and 180 degrees. Hint: try using `{normalized}` instead.",
|
||||||
|
lng
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -612,50 +622,61 @@ impl<'a> Filter<'a> {
|
|||||||
.union(),
|
.union(),
|
||||||
FilterCondition::And(subfilters) => {
|
FilterCondition::And(subfilters) => {
|
||||||
let mut subfilters_iter = subfilters.iter();
|
let mut subfilters_iter = subfilters.iter();
|
||||||
if let Some(first_subfilter) = subfilters_iter.next() {
|
let Some(first_subfilter) = subfilters_iter.next() else {
|
||||||
let mut bitmap = Self::inner_evaluate(
|
return Ok(RoaringBitmap::new());
|
||||||
&(first_subfilter.clone()).into(),
|
};
|
||||||
|
|
||||||
|
let mut bitmap = Self::inner_evaluate(
|
||||||
|
&(first_subfilter.clone()).into(),
|
||||||
|
rtxn,
|
||||||
|
index,
|
||||||
|
field_ids_map,
|
||||||
|
filterable_attribute_rules,
|
||||||
|
universe,
|
||||||
|
)?;
|
||||||
|
for f in subfilters_iter {
|
||||||
|
if bitmap.is_empty() {
|
||||||
|
return Ok(bitmap);
|
||||||
|
}
|
||||||
|
// TODO We are doing the intersections two times,
|
||||||
|
// it could be more efficient
|
||||||
|
// Can't I just replace this `&=` by an `=`?
|
||||||
|
bitmap &= Self::inner_evaluate(
|
||||||
|
&(f.clone()).into(),
|
||||||
rtxn,
|
rtxn,
|
||||||
index,
|
index,
|
||||||
field_ids_map,
|
field_ids_map,
|
||||||
filterable_attribute_rules,
|
filterable_attribute_rules,
|
||||||
universe,
|
Some(&bitmap),
|
||||||
)?;
|
)?;
|
||||||
for f in subfilters_iter {
|
|
||||||
if bitmap.is_empty() {
|
|
||||||
return Ok(bitmap);
|
|
||||||
}
|
|
||||||
// TODO We are doing the intersections two times,
|
|
||||||
// it could be more efficient
|
|
||||||
// Can't I just replace this `&=` by an `=`?
|
|
||||||
bitmap &= Self::inner_evaluate(
|
|
||||||
&(f.clone()).into(),
|
|
||||||
rtxn,
|
|
||||||
index,
|
|
||||||
field_ids_map,
|
|
||||||
filterable_attribute_rules,
|
|
||||||
Some(&bitmap),
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
Ok(bitmap)
|
|
||||||
} else {
|
|
||||||
Ok(RoaringBitmap::new())
|
|
||||||
}
|
}
|
||||||
|
Ok(bitmap)
|
||||||
}
|
}
|
||||||
FilterCondition::VectorExists { fid: _, embedder, filter } => {
|
FilterCondition::VectorExists { fid: _, embedder, filter } => {
|
||||||
super::filter_vector::evaluate(rtxn, index, universe, embedder.clone(), filter)
|
super::filter_vector::evaluate(rtxn, index, universe, embedder.clone(), filter)
|
||||||
}
|
}
|
||||||
FilterCondition::GeoLowerThan { point, radius } => {
|
FilterCondition::GeoLowerThan { point, radius, resolution: res_token } => {
|
||||||
|
let base_point: [f64; 2] =
|
||||||
|
[point[0].parse_finite_float()?, point[1].parse_finite_float()?];
|
||||||
|
if !(-90.0..=90.0).contains(&base_point[0]) {
|
||||||
|
return Err(point[0].as_external_error(BadGeoError::Lat(base_point[0])))?;
|
||||||
|
}
|
||||||
|
if !(-180.0..=180.0).contains(&base_point[1]) {
|
||||||
|
return Err(point[1].as_external_error(BadGeoError::Lng(base_point[1])))?;
|
||||||
|
}
|
||||||
|
let radius = radius.parse_finite_float()?;
|
||||||
|
let mut resolution = 125;
|
||||||
|
if let Some(res_token) = res_token {
|
||||||
|
resolution = res_token.parse_finite_float()? as usize;
|
||||||
|
if !(3..=1000).contains(&resolution) {
|
||||||
|
return Err(
|
||||||
|
res_token.as_external_error(BadGeoError::InvalidResolution(resolution))
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut r1 = None;
|
||||||
if index.is_geo_filtering_enabled(rtxn)? {
|
if index.is_geo_filtering_enabled(rtxn)? {
|
||||||
let base_point: [f64; 2] =
|
|
||||||
[point[0].parse_finite_float()?, point[1].parse_finite_float()?];
|
|
||||||
if !(-90.0..=90.0).contains(&base_point[0]) {
|
|
||||||
return Err(point[0].as_external_error(BadGeoError::Lat(base_point[0])))?;
|
|
||||||
}
|
|
||||||
if !(-180.0..=180.0).contains(&base_point[1]) {
|
|
||||||
return Err(point[1].as_external_error(BadGeoError::Lng(base_point[1])))?;
|
|
||||||
}
|
|
||||||
let radius = radius.parse_finite_float()?;
|
|
||||||
let rtree = match index.geo_rtree(rtxn)? {
|
let rtree = match index.geo_rtree(rtxn)? {
|
||||||
Some(rtree) => rtree,
|
Some(rtree) => rtree,
|
||||||
None => return Ok(RoaringBitmap::new()),
|
None => return Ok(RoaringBitmap::new()),
|
||||||
@ -671,52 +692,72 @@ impl<'a> Filter<'a> {
|
|||||||
})
|
})
|
||||||
.map(|point| point.data.0)
|
.map(|point| point.data.0)
|
||||||
.collect();
|
.collect();
|
||||||
|
r1 = Some(result);
|
||||||
|
}
|
||||||
|
|
||||||
Ok(result)
|
let mut r2 = None;
|
||||||
} else {
|
if index.is_geojson_filtering_enabled(rtxn)? {
|
||||||
Err(point[0].as_external_error(FilterError::AttributeNotFilterable {
|
let point = geo_types::Point::new(base_point[1], base_point[0]);
|
||||||
attribute: RESERVED_GEO_FIELD_NAME,
|
|
||||||
filterable_patterns: filtered_matching_patterns(
|
let result = index.cellulite.in_circle(rtxn, point, radius, resolution)?;
|
||||||
filterable_attribute_rules,
|
|
||||||
&|features| features.is_filterable(),
|
r2 = Some(RoaringBitmap::from_iter(result)); // TODO: Remove once we update roaring in meilisearch
|
||||||
),
|
}
|
||||||
}))?
|
|
||||||
|
match (r1, r2) {
|
||||||
|
(Some(r1), Some(r2)) => Ok(r1 | r2),
|
||||||
|
(Some(r1), None) => Ok(r1),
|
||||||
|
(None, Some(r2)) => Ok(r2),
|
||||||
|
(None, None) => {
|
||||||
|
Err(point[0].as_external_error(FilterError::AttributeNotFilterable {
|
||||||
|
attribute: &format!(
|
||||||
|
"{RESERVED_GEO_FIELD_NAME}/{RESERVED_GEOJSON_FIELD_NAME}"
|
||||||
|
),
|
||||||
|
filterable_patterns: filtered_matching_patterns(
|
||||||
|
filterable_attribute_rules,
|
||||||
|
&|features| features.is_filterable(),
|
||||||
|
),
|
||||||
|
}))?
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
FilterCondition::GeoBoundingBox { top_right_point, bottom_left_point } => {
|
FilterCondition::GeoBoundingBox { top_right_point, bottom_left_point } => {
|
||||||
if index.is_geo_filtering_enabled(rtxn)? {
|
let top_right: [f64; 2] = [
|
||||||
let top_right: [f64; 2] = [
|
top_right_point[0].parse_finite_float()?,
|
||||||
top_right_point[0].parse_finite_float()?,
|
top_right_point[1].parse_finite_float()?,
|
||||||
top_right_point[1].parse_finite_float()?,
|
];
|
||||||
];
|
let bottom_left: [f64; 2] = [
|
||||||
let bottom_left: [f64; 2] = [
|
bottom_left_point[0].parse_finite_float()?,
|
||||||
bottom_left_point[0].parse_finite_float()?,
|
bottom_left_point[1].parse_finite_float()?,
|
||||||
bottom_left_point[1].parse_finite_float()?,
|
];
|
||||||
];
|
if !(-90.0..=90.0).contains(&top_right[0]) {
|
||||||
if !(-90.0..=90.0).contains(&top_right[0]) {
|
return Err(
|
||||||
return Err(
|
top_right_point[0].as_external_error(BadGeoError::Lat(top_right[0]))
|
||||||
top_right_point[0].as_external_error(BadGeoError::Lat(top_right[0]))
|
)?;
|
||||||
)?;
|
}
|
||||||
}
|
if !(-180.0..=180.0).contains(&top_right[1]) {
|
||||||
if !(-180.0..=180.0).contains(&top_right[1]) {
|
return Err(
|
||||||
return Err(
|
top_right_point[1].as_external_error(BadGeoError::Lng(top_right[1]))
|
||||||
top_right_point[1].as_external_error(BadGeoError::Lng(top_right[1]))
|
)?;
|
||||||
)?;
|
}
|
||||||
}
|
if !(-90.0..=90.0).contains(&bottom_left[0]) {
|
||||||
if !(-90.0..=90.0).contains(&bottom_left[0]) {
|
return Err(
|
||||||
return Err(bottom_left_point[0]
|
bottom_left_point[0].as_external_error(BadGeoError::Lat(bottom_left[0]))
|
||||||
.as_external_error(BadGeoError::Lat(bottom_left[0])))?;
|
)?;
|
||||||
}
|
}
|
||||||
if !(-180.0..=180.0).contains(&bottom_left[1]) {
|
if !(-180.0..=180.0).contains(&bottom_left[1]) {
|
||||||
return Err(bottom_left_point[1]
|
return Err(
|
||||||
.as_external_error(BadGeoError::Lng(bottom_left[1])))?;
|
bottom_left_point[1].as_external_error(BadGeoError::Lng(bottom_left[1]))
|
||||||
}
|
)?;
|
||||||
if top_right[0] < bottom_left[0] {
|
}
|
||||||
return Err(bottom_left_point[1].as_external_error(
|
if top_right[0] < bottom_left[0] {
|
||||||
BadGeoError::BoundingBoxTopIsBelowBottom(top_right[0], bottom_left[0]),
|
return Err(bottom_left_point[1].as_external_error(
|
||||||
))?;
|
BadGeoError::BoundingBoxTopIsBelowBottom(top_right[0], bottom_left[0]),
|
||||||
}
|
))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut r1 = None;
|
||||||
|
if index.is_geo_filtering_enabled(rtxn)? {
|
||||||
// Instead of writing a custom `GeoBoundingBox` filter we're simply going to re-use the range
|
// Instead of writing a custom `GeoBoundingBox` filter we're simply going to re-use the range
|
||||||
// filter to create the following filter;
|
// filter to create the following filter;
|
||||||
// `_geo.lat {top_right[0]} TO {bottom_left[0]} AND _geo.lng {top_right[1]} TO {bottom_left[1]}`
|
// `_geo.lat {top_right[0]} TO {bottom_left[0]} AND _geo.lng {top_right[1]} TO {bottom_left[1]}`
|
||||||
@ -811,19 +852,76 @@ impl<'a> Filter<'a> {
|
|||||||
)?
|
)?
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(selected_lat & selected_lng)
|
r1 = Some(selected_lat & selected_lng);
|
||||||
} else {
|
}
|
||||||
Err(top_right_point[0].as_external_error(
|
|
||||||
|
let mut r2 = None;
|
||||||
|
if index.is_geojson_filtering_enabled(rtxn)? {
|
||||||
|
let polygon = geo_types::Polygon::new(
|
||||||
|
geo_types::LineString(vec![
|
||||||
|
geo_types::Coord { x: top_right[1], y: top_right[0] },
|
||||||
|
geo_types::Coord { x: bottom_left[1], y: top_right[0] },
|
||||||
|
geo_types::Coord { x: bottom_left[1], y: bottom_left[0] },
|
||||||
|
geo_types::Coord { x: top_right[1], y: bottom_left[0] },
|
||||||
|
]),
|
||||||
|
Vec::new(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let result = index.cellulite.in_shape(rtxn, &polygon)?;
|
||||||
|
|
||||||
|
r2 = Some(RoaringBitmap::from_iter(result)); // TODO: Remove once we update roaring in meilisearch
|
||||||
|
}
|
||||||
|
|
||||||
|
match (r1, r2) {
|
||||||
|
(Some(r1), Some(r2)) => Ok(r1 | r2),
|
||||||
|
(Some(r1), None) => Ok(r1),
|
||||||
|
(None, Some(r2)) => Ok(r2),
|
||||||
|
(None, None) => Err(top_right_point[0].as_external_error(
|
||||||
FilterError::AttributeNotFilterable {
|
FilterError::AttributeNotFilterable {
|
||||||
attribute: RESERVED_GEO_FIELD_NAME,
|
attribute: &format!(
|
||||||
|
"{RESERVED_GEO_FIELD_NAME}/{RESERVED_GEOJSON_FIELD_NAME}"
|
||||||
|
),
|
||||||
filterable_patterns: filtered_matching_patterns(
|
filterable_patterns: filtered_matching_patterns(
|
||||||
filterable_attribute_rules,
|
filterable_attribute_rules,
|
||||||
&|features| features.is_filterable(),
|
&|features| features.is_filterable(),
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
))?
|
))?,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
FilterCondition::GeoPolygon { points } => {
|
||||||
|
if !index.is_geojson_filtering_enabled(rtxn)? {
|
||||||
|
return Err(points[0][0].as_external_error(
|
||||||
|
FilterError::AttributeNotFilterable {
|
||||||
|
attribute: RESERVED_GEOJSON_FIELD_NAME,
|
||||||
|
filterable_patterns: filtered_matching_patterns(
|
||||||
|
filterable_attribute_rules,
|
||||||
|
&|features| features.is_filterable(),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut coords = Vec::new();
|
||||||
|
for [lat_token, lng_token] in points {
|
||||||
|
let lat = lat_token.parse_finite_float()?;
|
||||||
|
let lng = lng_token.parse_finite_float()?;
|
||||||
|
if !(-90.0..=90.0).contains(&lat) {
|
||||||
|
return Err(lat_token.as_external_error(BadGeoError::Lat(lat)))?;
|
||||||
|
}
|
||||||
|
if !(-180.0..=180.0).contains(&lng) {
|
||||||
|
return Err(lng_token.as_external_error(BadGeoError::Lng(lng)))?;
|
||||||
|
}
|
||||||
|
coords.push(geo_types::Coord { x: lng, y: lat });
|
||||||
|
}
|
||||||
|
|
||||||
|
let polygon = geo_types::Polygon::new(geo_types::LineString(coords), Vec::new());
|
||||||
|
let result = index.cellulite.in_shape(rtxn, &polygon)?;
|
||||||
|
|
||||||
|
let result = roaring::RoaringBitmap::from_iter(result); // TODO: Remove once we update roaring in meilisearch
|
||||||
|
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -962,17 +1060,17 @@ mod tests {
|
|||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
let filter = Filter::from_str("_geoRadius(42, 150, 10)").unwrap().unwrap();
|
let filter = Filter::from_str("_geoRadius(42, 150, 10)").unwrap().unwrap();
|
||||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||||
snapshot!(error.to_string(), @r###"
|
snapshot!(error.to_string(), @r"
|
||||||
Attribute `_geo` is not filterable. This index does not have configured filterable attributes.
|
Attribute `_geo/_geojson` is not filterable. This index does not have configured filterable attributes.
|
||||||
12:14 _geoRadius(42, 150, 10)
|
12:14 _geoRadius(42, 150, 10)
|
||||||
"###);
|
");
|
||||||
|
|
||||||
let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap();
|
let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap();
|
||||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||||
snapshot!(error.to_string(), @r###"
|
snapshot!(error.to_string(), @r"
|
||||||
Attribute `_geo` is not filterable. This index does not have configured filterable attributes.
|
Attribute `_geo/_geojson` is not filterable. This index does not have configured filterable attributes.
|
||||||
18:20 _geoBoundingBox([42, 150], [30, 10])
|
18:20 _geoBoundingBox([42, 150], [30, 10])
|
||||||
"###);
|
");
|
||||||
|
|
||||||
let filter = Filter::from_str("dog = \"bernese mountain\"").unwrap().unwrap();
|
let filter = Filter::from_str("dog = \"bernese mountain\"").unwrap().unwrap();
|
||||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||||
@ -993,19 +1091,19 @@ mod tests {
|
|||||||
|
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap();
|
let filter = Filter::from_str("_geoRadius(-90, 150, 10)").unwrap().unwrap();
|
||||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||||
snapshot!(error.to_string(), @r###"
|
snapshot!(error.to_string(), @r"
|
||||||
Attribute `_geo` is not filterable. Available filterable attribute patterns are: `title`.
|
Attribute `_geo/_geojson` is not filterable. Available filterable attribute patterns are: `title`.
|
||||||
12:16 _geoRadius(-100, 150, 10)
|
12:15 _geoRadius(-90, 150, 10)
|
||||||
"###);
|
");
|
||||||
|
|
||||||
let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap();
|
let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap();
|
||||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||||
snapshot!(error.to_string(), @r###"
|
snapshot!(error.to_string(), @r"
|
||||||
Attribute `_geo` is not filterable. Available filterable attribute patterns are: `title`.
|
Attribute `_geo/_geojson` is not filterable. Available filterable attribute patterns are: `title`.
|
||||||
18:20 _geoBoundingBox([42, 150], [30, 10])
|
18:20 _geoBoundingBox([42, 150], [30, 10])
|
||||||
"###);
|
");
|
||||||
|
|
||||||
let filter = Filter::from_str("name = 12").unwrap().unwrap();
|
let filter = Filter::from_str("name = 12").unwrap().unwrap();
|
||||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||||
@ -1153,38 +1251,34 @@ mod tests {
|
|||||||
// georadius have a bad latitude
|
// georadius have a bad latitude
|
||||||
let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap();
|
let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap();
|
||||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||||
assert!(
|
snapshot!(error.to_string(), @r"
|
||||||
error.to_string().starts_with(
|
Bad latitude `-100`. Latitude must be contained between -90 and 90 degrees.
|
||||||
"Bad latitude `-100`. Latitude must be contained between -90 and 90 degrees."
|
12:16 _geoRadius(-100, 150, 10)
|
||||||
),
|
");
|
||||||
"{}",
|
|
||||||
error.to_string()
|
|
||||||
);
|
|
||||||
|
|
||||||
// georadius have a bad latitude
|
// georadius have a bad latitude
|
||||||
let filter = Filter::from_str("_geoRadius(-90.0000001, 150, 10)").unwrap().unwrap();
|
let filter = Filter::from_str("_geoRadius(-90.0000001, 150, 10)").unwrap().unwrap();
|
||||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||||
assert!(error.to_string().contains(
|
snapshot!(error.to_string(), @r"
|
||||||
"Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees."
|
Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees.
|
||||||
));
|
12:23 _geoRadius(-90.0000001, 150, 10)
|
||||||
|
");
|
||||||
|
|
||||||
// georadius have a bad longitude
|
// georadius have a bad longitude
|
||||||
let filter = Filter::from_str("_geoRadius(-10, 250, 10)").unwrap().unwrap();
|
let filter = Filter::from_str("_geoRadius(-10, 250, 10)").unwrap().unwrap();
|
||||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||||
assert!(
|
snapshot!(error.to_string(), @r"
|
||||||
error.to_string().contains(
|
Bad longitude `250`. Longitude must be contained between -180 and 180 degrees. Hint: try using `-110` instead.
|
||||||
"Bad longitude `250`. Longitude must be contained between -180 and 180 degrees."
|
17:20 _geoRadius(-10, 250, 10)
|
||||||
),
|
");
|
||||||
"{}",
|
|
||||||
error.to_string(),
|
|
||||||
);
|
|
||||||
|
|
||||||
// georadius have a bad longitude
|
// georadius have a bad longitude
|
||||||
let filter = Filter::from_str("_geoRadius(-10, 180.000001, 10)").unwrap().unwrap();
|
let filter = Filter::from_str("_geoRadius(-10, 180.000001, 10)").unwrap().unwrap();
|
||||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||||
assert!(error.to_string().contains(
|
snapshot!(error.to_string(), @r"
|
||||||
"Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees."
|
Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees. Hint: try using `-179.999999` instead.
|
||||||
));
|
17:27 _geoRadius(-10, 180.000001, 10)
|
||||||
|
");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -1207,73 +1301,73 @@ mod tests {
|
|||||||
let filter =
|
let filter =
|
||||||
Filter::from_str("_geoBoundingBox([-90.0000001, 150], [30, 10])").unwrap().unwrap();
|
Filter::from_str("_geoBoundingBox([-90.0000001, 150], [30, 10])").unwrap().unwrap();
|
||||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||||
assert!(
|
snapshot!(error.to_string(), @r"
|
||||||
error.to_string().starts_with(
|
Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees.
|
||||||
"Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees."
|
18:29 _geoBoundingBox([-90.0000001, 150], [30, 10])
|
||||||
),
|
");
|
||||||
"{}",
|
|
||||||
error.to_string()
|
|
||||||
);
|
|
||||||
|
|
||||||
// geoboundingbox top left coord have a bad latitude
|
// geoboundingbox top left coord have a bad latitude
|
||||||
let filter =
|
let filter =
|
||||||
Filter::from_str("_geoBoundingBox([90.0000001, 150], [30, 10])").unwrap().unwrap();
|
Filter::from_str("_geoBoundingBox([90.0000001, 150], [30, 10])").unwrap().unwrap();
|
||||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||||
assert!(
|
snapshot!(error.to_string(), @r"
|
||||||
error.to_string().starts_with(
|
Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees.
|
||||||
"Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees."
|
18:28 _geoBoundingBox([90.0000001, 150], [30, 10])
|
||||||
),
|
");
|
||||||
"{}",
|
|
||||||
error.to_string()
|
|
||||||
);
|
|
||||||
|
|
||||||
// geoboundingbox bottom right coord have a bad latitude
|
// geoboundingbox bottom right coord have a bad latitude
|
||||||
let filter =
|
let filter =
|
||||||
Filter::from_str("_geoBoundingBox([30, 10], [-90.0000001, 150])").unwrap().unwrap();
|
Filter::from_str("_geoBoundingBox([30, 10], [-90.0000001, 150])").unwrap().unwrap();
|
||||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||||
assert!(error.to_string().contains(
|
snapshot!(error.to_string(), @r"
|
||||||
"Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees."
|
Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees.
|
||||||
));
|
28:39 _geoBoundingBox([30, 10], [-90.0000001, 150])
|
||||||
|
");
|
||||||
|
|
||||||
// geoboundingbox bottom right coord have a bad latitude
|
// geoboundingbox bottom right coord have a bad latitude
|
||||||
let filter =
|
let filter =
|
||||||
Filter::from_str("_geoBoundingBox([30, 10], [90.0000001, 150])").unwrap().unwrap();
|
Filter::from_str("_geoBoundingBox([30, 10], [90.0000001, 150])").unwrap().unwrap();
|
||||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||||
assert!(error.to_string().contains(
|
snapshot!(error.to_string(), @r"
|
||||||
"Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees."
|
Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees.
|
||||||
));
|
28:38 _geoBoundingBox([30, 10], [90.0000001, 150])
|
||||||
|
");
|
||||||
|
|
||||||
// geoboundingbox top left coord have a bad longitude
|
// geoboundingbox top left coord have a bad longitude
|
||||||
let filter =
|
let filter =
|
||||||
Filter::from_str("_geoBoundingBox([-10, 180.000001], [30, 10])").unwrap().unwrap();
|
Filter::from_str("_geoBoundingBox([-10, 180.000001], [30, 10])").unwrap().unwrap();
|
||||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||||
assert!(error.to_string().contains(
|
snapshot!(error.to_string(), @r"
|
||||||
"Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees."
|
Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees. Hint: try using `-179.999999` instead.
|
||||||
));
|
23:33 _geoBoundingBox([-10, 180.000001], [30, 10])
|
||||||
|
");
|
||||||
|
|
||||||
// geoboundingbox top left coord have a bad longitude
|
// geoboundingbox top left coord have a bad longitude
|
||||||
let filter =
|
let filter =
|
||||||
Filter::from_str("_geoBoundingBox([-10, -180.000001], [30, 10])").unwrap().unwrap();
|
Filter::from_str("_geoBoundingBox([-10, -180.000001], [30, 10])").unwrap().unwrap();
|
||||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||||
assert!(error.to_string().contains(
|
snapshot!(error.to_string(), @r"
|
||||||
"Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees."
|
Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees. Hint: try using `179.999999` instead.
|
||||||
));
|
23:34 _geoBoundingBox([-10, -180.000001], [30, 10])
|
||||||
|
");
|
||||||
|
|
||||||
// geoboundingbox bottom right coord have a bad longitude
|
// geoboundingbox bottom right coord have a bad longitude
|
||||||
let filter =
|
let filter =
|
||||||
Filter::from_str("_geoBoundingBox([30, 10], [-10, -180.000001])").unwrap().unwrap();
|
Filter::from_str("_geoBoundingBox([30, 10], [-10, -180.000001])").unwrap().unwrap();
|
||||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||||
assert!(error.to_string().contains(
|
snapshot!(error.to_string(), @r"
|
||||||
"Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees."
|
Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees. Hint: try using `179.999999` instead.
|
||||||
));
|
33:44 _geoBoundingBox([30, 10], [-10, -180.000001])
|
||||||
|
");
|
||||||
|
|
||||||
// geoboundingbox bottom right coord have a bad longitude
|
// geoboundingbox bottom right coord have a bad longitude
|
||||||
let filter =
|
let filter =
|
||||||
Filter::from_str("_geoBoundingBox([30, 10], [-10, 180.000001])").unwrap().unwrap();
|
Filter::from_str("_geoBoundingBox([30, 10], [-10, 180.000001])").unwrap().unwrap();
|
||||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||||
assert!(error.to_string().contains(
|
snapshot!(error.to_string(), @r"
|
||||||
"Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees."
|
Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees. Hint: try using `-179.999999` instead.
|
||||||
));
|
33:43 _geoBoundingBox([30, 10], [-10, 180.000001])
|
||||||
|
");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -48,6 +48,7 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
|
|||||||
field_id_docid_facet_strings,
|
field_id_docid_facet_strings,
|
||||||
vector_store,
|
vector_store,
|
||||||
embedder_category_id: _,
|
embedder_category_id: _,
|
||||||
|
cellulite,
|
||||||
documents,
|
documents,
|
||||||
} = self.index;
|
} = self.index;
|
||||||
|
|
||||||
@ -90,6 +91,7 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
|
|||||||
field_id_docid_facet_strings.clear(self.wtxn)?;
|
field_id_docid_facet_strings.clear(self.wtxn)?;
|
||||||
// vector
|
// vector
|
||||||
vector_store.clear(self.wtxn)?;
|
vector_store.clear(self.wtxn)?;
|
||||||
|
cellulite.clear(self.wtxn)?;
|
||||||
|
|
||||||
documents.clear(self.wtxn)?;
|
documents.clear(self.wtxn)?;
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@ use std::fs::File;
|
|||||||
use std::io::{self, BufReader};
|
use std::io::{self, BufReader};
|
||||||
|
|
||||||
use concat_arrays::concat_arrays;
|
use concat_arrays::concat_arrays;
|
||||||
|
use geojson::GeoJson;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
|
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
|
||||||
@ -9,7 +10,7 @@ use crate::error::GeoError;
|
|||||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||||
use crate::update::index_documents::extract_finite_float_from_value;
|
use crate::update::index_documents::extract_finite_float_from_value;
|
||||||
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
|
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
|
||||||
use crate::{FieldId, InternalError, Result};
|
use crate::{DocumentId, FieldId, InternalError, Result, UserError};
|
||||||
|
|
||||||
/// Extracts the geographical coordinates contained in each document under the `_geo` field.
|
/// Extracts the geographical coordinates contained in each document under the `_geo` field.
|
||||||
///
|
///
|
||||||
@ -107,3 +108,72 @@ fn extract_lat_lng(
|
|||||||
None => Ok(None),
|
None => Ok(None),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Extracts the geographical coordinates contained in each document under the `_geojson` field.
|
||||||
|
///
|
||||||
|
/// Returns the generated grenad reader containing the docid as key associated to its zerometry
|
||||||
|
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
|
||||||
|
pub fn extract_geojson<R: io::Read + io::Seek>(
|
||||||
|
obkv_documents: grenad::Reader<R>,
|
||||||
|
indexer: GrenadParameters,
|
||||||
|
primary_key_id: FieldId,
|
||||||
|
settings_diff: &InnerIndexSettingsDiff,
|
||||||
|
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||||
|
let mut writer = create_writer(
|
||||||
|
indexer.chunk_compression_type,
|
||||||
|
indexer.chunk_compression_level,
|
||||||
|
tempfile::tempfile()?,
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut cursor = obkv_documents.into_cursor()?;
|
||||||
|
while let Some((docid_bytes, value)) = cursor.move_on_next()? {
|
||||||
|
let obkv = obkv::KvReader::from_slice(value);
|
||||||
|
// since we only need the primary key when we throw an error
|
||||||
|
// we create this getter to lazily get it when needed
|
||||||
|
let document_id = || -> Value {
|
||||||
|
let reader = KvReaderDelAdd::from_slice(obkv.get(primary_key_id).unwrap());
|
||||||
|
let document_id =
|
||||||
|
reader.get(DelAdd::Deletion).or(reader.get(DelAdd::Addition)).unwrap();
|
||||||
|
serde_json::from_slice(document_id).unwrap()
|
||||||
|
};
|
||||||
|
|
||||||
|
// extract old version
|
||||||
|
let del_geojson =
|
||||||
|
extract_geojson_field(obkv, &settings_diff.old, DelAdd::Deletion, document_id)?;
|
||||||
|
// extract new version
|
||||||
|
let add_geojson =
|
||||||
|
extract_geojson_field(obkv, &settings_diff.new, DelAdd::Addition, document_id)?;
|
||||||
|
|
||||||
|
if del_geojson != add_geojson {
|
||||||
|
let mut obkv = KvWriterDelAdd::memory();
|
||||||
|
if del_geojson.is_some() {
|
||||||
|
// We don't need to store the geojson, we'll just delete it by id
|
||||||
|
obkv.insert(DelAdd::Deletion, [])?;
|
||||||
|
}
|
||||||
|
if let Some(geojson) = add_geojson {
|
||||||
|
obkv.insert(DelAdd::Addition, geojson.to_string().as_bytes())?;
|
||||||
|
}
|
||||||
|
let bytes = obkv.into_inner()?;
|
||||||
|
writer.insert(&docid_bytes[0..std::mem::size_of::<DocumentId>()], bytes)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
writer_into_reader(writer)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_geojson_field(
|
||||||
|
obkv: &obkv::KvReader<FieldId>,
|
||||||
|
settings: &InnerIndexSettings,
|
||||||
|
deladd: DelAdd,
|
||||||
|
_document_id: impl Fn() -> Value,
|
||||||
|
) -> Result<Option<GeoJson>> {
|
||||||
|
match settings.geojson_fid {
|
||||||
|
Some(fid) if settings.filterable_attributes_rules.iter().any(|rule| rule.has_geojson()) => {
|
||||||
|
let value = obkv.get(fid).map(KvReaderDelAdd::from_slice).and_then(|r| r.get(deladd));
|
||||||
|
Ok(value
|
||||||
|
.map(|v| GeoJson::from_reader(v).map_err(UserError::MalformedGeojson))
|
||||||
|
.transpose()?)
|
||||||
|
}
|
||||||
|
_ => Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -31,6 +31,7 @@ use self::extract_word_position_docids::extract_word_position_docids;
|
|||||||
use super::helpers::{as_cloneable_grenad, CursorClonableMmap, GrenadParameters};
|
use super::helpers::{as_cloneable_grenad, CursorClonableMmap, GrenadParameters};
|
||||||
use super::{helpers, TypedChunk};
|
use super::{helpers, TypedChunk};
|
||||||
use crate::progress::EmbedderStats;
|
use crate::progress::EmbedderStats;
|
||||||
|
use crate::update::index_documents::extract::extract_geo_points::extract_geojson;
|
||||||
use crate::update::index_documents::extract::extract_vector_points::extract_embeddings_from_fragments;
|
use crate::update::index_documents::extract::extract_vector_points::extract_embeddings_from_fragments;
|
||||||
use crate::update::settings::InnerIndexSettingsDiff;
|
use crate::update::settings::InnerIndexSettingsDiff;
|
||||||
use crate::vector::db::EmbedderInfo;
|
use crate::vector::db::EmbedderInfo;
|
||||||
@ -62,6 +63,7 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
original_documents_chunk,
|
original_documents_chunk,
|
||||||
indexer,
|
indexer,
|
||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
|
primary_key_id,
|
||||||
settings_diff.clone(),
|
settings_diff.clone(),
|
||||||
embedder_info.clone(),
|
embedder_info.clone(),
|
||||||
possible_embedding_mistakes.clone(),
|
possible_embedding_mistakes.clone(),
|
||||||
@ -228,10 +230,12 @@ pub fn request_threads() -> &'static ThreadPoolNoAbort {
|
|||||||
|
|
||||||
/// Extract chunked data and send it into lmdb_writer_sx sender:
|
/// Extract chunked data and send it into lmdb_writer_sx sender:
|
||||||
/// - documents
|
/// - documents
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
fn send_original_documents_data(
|
fn send_original_documents_data(
|
||||||
original_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
|
original_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||||
|
primary_key_id: FieldId,
|
||||||
settings_diff: Arc<InnerIndexSettingsDiff>,
|
settings_diff: Arc<InnerIndexSettingsDiff>,
|
||||||
embedder_info: Arc<Vec<(String, EmbedderInfo)>>,
|
embedder_info: Arc<Vec<(String, EmbedderInfo)>>,
|
||||||
possible_embedding_mistakes: Arc<PossibleEmbeddingMistakes>,
|
possible_embedding_mistakes: Arc<PossibleEmbeddingMistakes>,
|
||||||
@ -240,6 +244,20 @@ fn send_original_documents_data(
|
|||||||
let original_documents_chunk =
|
let original_documents_chunk =
|
||||||
original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
|
original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
|
||||||
|
|
||||||
|
if settings_diff.reindex_geojson() {
|
||||||
|
let documents_chunk_cloned = original_documents_chunk.clone();
|
||||||
|
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
|
||||||
|
let settings_diff = settings_diff.clone();
|
||||||
|
rayon::spawn(move || {
|
||||||
|
let result =
|
||||||
|
extract_geojson(documents_chunk_cloned, indexer, primary_key_id, &settings_diff);
|
||||||
|
let _ = match result {
|
||||||
|
Ok(geojson) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoJson(geojson))),
|
||||||
|
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
let index_vectors = (settings_diff.reindex_vectors() || !settings_diff.settings_update_only())
|
let index_vectors = (settings_diff.reindex_vectors() || !settings_diff.settings_update_only())
|
||||||
// no point in indexing vectors without embedders
|
// no point in indexing vectors without embedders
|
||||||
&& (!settings_diff.new.runtime_embedders.inner_as_ref().is_empty());
|
&& (!settings_diff.new.runtime_embedders.inner_as_ref().is_empty());
|
||||||
|
@ -523,7 +523,7 @@ where
|
|||||||
.is_some_and(|conf| conf.is_quantized);
|
.is_some_and(|conf| conf.is_quantized);
|
||||||
let is_quantizing = embedder_config.is_some_and(|action| action.is_being_quantized);
|
let is_quantizing = embedder_config.is_some_and(|action| action.is_being_quantized);
|
||||||
|
|
||||||
pool.install(|| {
|
pool.install(|| -> Result<_> {
|
||||||
let mut writer =
|
let mut writer =
|
||||||
VectorStore::new(backend, vector_store, embedder_index, was_quantized);
|
VectorStore::new(backend, vector_store, embedder_index, was_quantized);
|
||||||
writer.build_and_quantize(
|
writer.build_and_quantize(
|
||||||
@ -541,6 +541,8 @@ where
|
|||||||
.map_err(InternalError::from)??;
|
.map_err(InternalError::from)??;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
self.index.cellulite.build(self.wtxn, &self.should_abort, &Progress::default())?;
|
||||||
|
|
||||||
self.execute_prefix_databases(
|
self.execute_prefix_databases(
|
||||||
word_docids.map(MergerBuilder::build),
|
word_docids.map(MergerBuilder::build),
|
||||||
exact_word_docids.map(MergerBuilder::build),
|
exact_word_docids.map(MergerBuilder::build),
|
||||||
|
@ -820,19 +820,20 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
let documents_count = documents_ids.len() as usize;
|
let documents_count = documents_ids.len() as usize;
|
||||||
|
|
||||||
// We initialize the sorter with the user indexing settings.
|
// We initialize the sorter with the user indexing settings.
|
||||||
let mut original_sorter = if settings_diff.reindex_vectors() {
|
let mut original_sorter =
|
||||||
Some(create_sorter(
|
if settings_diff.reindex_vectors() || settings_diff.reindex_geojson() {
|
||||||
grenad::SortAlgorithm::Stable,
|
Some(create_sorter(
|
||||||
KeepFirst,
|
grenad::SortAlgorithm::Stable,
|
||||||
self.indexer_settings.chunk_compression_type,
|
KeepFirst,
|
||||||
self.indexer_settings.chunk_compression_level,
|
self.indexer_settings.chunk_compression_type,
|
||||||
self.indexer_settings.max_nb_chunks,
|
self.indexer_settings.chunk_compression_level,
|
||||||
self.indexer_settings.max_memory.map(|mem| mem / 2),
|
self.indexer_settings.max_nb_chunks,
|
||||||
true,
|
self.indexer_settings.max_memory.map(|mem| mem / 2),
|
||||||
))
|
true,
|
||||||
} else {
|
))
|
||||||
None
|
} else {
|
||||||
};
|
None
|
||||||
|
};
|
||||||
|
|
||||||
let backend = self.index.get_vector_store(wtxn)?.unwrap_or_default();
|
let backend = self.index.get_vector_store(wtxn)?.unwrap_or_default();
|
||||||
let readers: BTreeMap<&str, (VectorStore, &RoaringBitmap)> = settings_diff
|
let readers: BTreeMap<&str, (VectorStore, &RoaringBitmap)> = settings_diff
|
||||||
|
@ -30,7 +30,7 @@ use crate::vector::db::{EmbeddingStatusDelta, IndexEmbeddingConfig};
|
|||||||
use crate::vector::VectorStore;
|
use crate::vector::VectorStore;
|
||||||
use crate::{
|
use crate::{
|
||||||
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
|
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
|
||||||
Result, SerializationError, U8StrStrCodec,
|
Result, SerializationError, U8StrStrCodec, UserError,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// This struct accumulates and group the TypedChunks
|
/// This struct accumulates and group the TypedChunks
|
||||||
@ -85,6 +85,7 @@ pub(crate) enum TypedChunk {
|
|||||||
FieldIdFacetIsNullDocids(grenad::Reader<BufReader<File>>),
|
FieldIdFacetIsNullDocids(grenad::Reader<BufReader<File>>),
|
||||||
FieldIdFacetIsEmptyDocids(grenad::Reader<BufReader<File>>),
|
FieldIdFacetIsEmptyDocids(grenad::Reader<BufReader<File>>),
|
||||||
GeoPoints(grenad::Reader<BufReader<File>>),
|
GeoPoints(grenad::Reader<BufReader<File>>),
|
||||||
|
GeoJson(grenad::Reader<BufReader<File>>),
|
||||||
VectorPoints {
|
VectorPoints {
|
||||||
remove_vectors: grenad::Reader<BufReader<File>>,
|
remove_vectors: grenad::Reader<BufReader<File>>,
|
||||||
// docid -> vector
|
// docid -> vector
|
||||||
@ -614,6 +615,36 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
index.put_geo_rtree(wtxn, &rtree)?;
|
index.put_geo_rtree(wtxn, &rtree)?;
|
||||||
index.put_geo_faceted_documents_ids(wtxn, &geo_faceted_docids)?;
|
index.put_geo_faceted_documents_ids(wtxn, &geo_faceted_docids)?;
|
||||||
}
|
}
|
||||||
|
TypedChunk::GeoJson(_) => {
|
||||||
|
let span = tracing::trace_span!(target: "indexing::write_db", "geo_json");
|
||||||
|
let _entered = span.enter();
|
||||||
|
|
||||||
|
let mut builder = MergerBuilder::new(KeepFirst);
|
||||||
|
for typed_chunk in typed_chunks {
|
||||||
|
let TypedChunk::GeoJson(chunk) = typed_chunk else {
|
||||||
|
unreachable!();
|
||||||
|
};
|
||||||
|
|
||||||
|
builder.push(chunk.into_cursor()?);
|
||||||
|
}
|
||||||
|
let merger = builder.build();
|
||||||
|
|
||||||
|
let mut iter = merger.into_stream_merger_iter()?;
|
||||||
|
while let Some((key, value)) = iter.next()? {
|
||||||
|
// convert the key back to a u32 (4 bytes)
|
||||||
|
let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap();
|
||||||
|
|
||||||
|
let deladd_obkv = KvReaderDelAdd::from_slice(value);
|
||||||
|
if let Some(_value) = deladd_obkv.get(DelAdd::Deletion) {
|
||||||
|
index.cellulite.delete(wtxn, docid)?;
|
||||||
|
}
|
||||||
|
if let Some(value) = deladd_obkv.get(DelAdd::Addition) {
|
||||||
|
let geojson =
|
||||||
|
geojson::GeoJson::from_reader(value).map_err(UserError::SerdeJson)?;
|
||||||
|
index.cellulite.add(wtxn, docid, &geojson)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
TypedChunk::VectorPoints { .. } => {
|
TypedChunk::VectorPoints { .. } => {
|
||||||
let span = tracing::trace_span!(target: "indexing::write_db", "vector_points");
|
let span = tracing::trace_span!(target: "indexing::write_db", "vector_points");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
|
@ -139,6 +139,7 @@ pub enum ReceiverAction {
|
|||||||
LargeEntry(LargeEntry),
|
LargeEntry(LargeEntry),
|
||||||
LargeVectors(LargeVectors),
|
LargeVectors(LargeVectors),
|
||||||
LargeVector(LargeVector),
|
LargeVector(LargeVector),
|
||||||
|
LargeGeoJson(LargeGeoJson),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An entry that cannot fit in the BBQueue buffers has been
|
/// An entry that cannot fit in the BBQueue buffers has been
|
||||||
@ -193,6 +194,14 @@ impl LargeVector {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct LargeGeoJson {
|
||||||
|
/// The document id associated to the large geojson.
|
||||||
|
pub docid: DocumentId,
|
||||||
|
/// The large geojson that must be written.
|
||||||
|
pub geojson: Mmap,
|
||||||
|
}
|
||||||
|
|
||||||
impl<'a> WriterBbqueueReceiver<'a> {
|
impl<'a> WriterBbqueueReceiver<'a> {
|
||||||
/// Tries to receive an action to do until the timeout occurs
|
/// Tries to receive an action to do until the timeout occurs
|
||||||
/// and if it does, consider it as a spurious wake up.
|
/// and if it does, consider it as a spurious wake up.
|
||||||
@ -258,10 +267,12 @@ pub enum EntryHeader {
|
|||||||
DeleteVector(DeleteVector),
|
DeleteVector(DeleteVector),
|
||||||
SetVectors(SetVectors),
|
SetVectors(SetVectors),
|
||||||
SetVector(SetVector),
|
SetVector(SetVector),
|
||||||
|
CelluliteItem(DocumentId),
|
||||||
|
CelluliteRemove(DocumentId),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EntryHeader {
|
impl EntryHeader {
|
||||||
const fn variant_size() -> usize {
|
pub const fn variant_size() -> usize {
|
||||||
mem::size_of::<u8>()
|
mem::size_of::<u8>()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -271,6 +282,8 @@ impl EntryHeader {
|
|||||||
EntryHeader::DeleteVector(_) => 1,
|
EntryHeader::DeleteVector(_) => 1,
|
||||||
EntryHeader::SetVectors(_) => 2,
|
EntryHeader::SetVectors(_) => 2,
|
||||||
EntryHeader::SetVector(_) => 3,
|
EntryHeader::SetVector(_) => 3,
|
||||||
|
EntryHeader::CelluliteItem(_) => 4,
|
||||||
|
EntryHeader::CelluliteRemove(_) => 5,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -289,6 +302,14 @@ impl EntryHeader {
|
|||||||
Self::variant_size() + mem::size_of::<DeleteVector>()
|
Self::variant_size() + mem::size_of::<DeleteVector>()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const fn total_cellulite_item_size(value_length: usize) -> usize {
|
||||||
|
Self::variant_size() + mem::size_of::<DocumentId>() + value_length
|
||||||
|
}
|
||||||
|
|
||||||
|
const fn total_cellulite_remove_size() -> usize {
|
||||||
|
Self::variant_size() + mem::size_of::<DocumentId>()
|
||||||
|
}
|
||||||
|
|
||||||
/// The `dimensions` corresponds to the number of `f32` in the embedding.
|
/// The `dimensions` corresponds to the number of `f32` in the embedding.
|
||||||
fn total_set_vectors_size(count: usize, dimensions: usize) -> usize {
|
fn total_set_vectors_size(count: usize, dimensions: usize) -> usize {
|
||||||
let embedding_size = dimensions * mem::size_of::<f32>();
|
let embedding_size = dimensions * mem::size_of::<f32>();
|
||||||
@ -306,6 +327,8 @@ impl EntryHeader {
|
|||||||
EntryHeader::DeleteVector(adv) => mem::size_of_val(adv),
|
EntryHeader::DeleteVector(adv) => mem::size_of_val(adv),
|
||||||
EntryHeader::SetVectors(asvs) => mem::size_of_val(asvs),
|
EntryHeader::SetVectors(asvs) => mem::size_of_val(asvs),
|
||||||
EntryHeader::SetVector(asv) => mem::size_of_val(asv),
|
EntryHeader::SetVector(asv) => mem::size_of_val(asv),
|
||||||
|
EntryHeader::CelluliteItem(docid) => mem::size_of_val(docid),
|
||||||
|
EntryHeader::CelluliteRemove(docid) => mem::size_of_val(docid),
|
||||||
};
|
};
|
||||||
Self::variant_size() + payload_size
|
Self::variant_size() + payload_size
|
||||||
}
|
}
|
||||||
@ -333,6 +356,16 @@ impl EntryHeader {
|
|||||||
let header = checked::pod_read_unaligned(header_bytes);
|
let header = checked::pod_read_unaligned(header_bytes);
|
||||||
EntryHeader::SetVector(header)
|
EntryHeader::SetVector(header)
|
||||||
}
|
}
|
||||||
|
4 => {
|
||||||
|
let header_bytes = &remaining[..mem::size_of::<DocumentId>()];
|
||||||
|
let header = checked::pod_read_unaligned(header_bytes);
|
||||||
|
EntryHeader::CelluliteItem(header)
|
||||||
|
}
|
||||||
|
5 => {
|
||||||
|
let header_bytes = &remaining[..mem::size_of::<DocumentId>()];
|
||||||
|
let header = checked::pod_read_unaligned(header_bytes);
|
||||||
|
EntryHeader::CelluliteRemove(header)
|
||||||
|
}
|
||||||
id => panic!("invalid variant id: {id}"),
|
id => panic!("invalid variant id: {id}"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -344,6 +377,8 @@ impl EntryHeader {
|
|||||||
EntryHeader::DeleteVector(adv) => bytemuck::bytes_of(adv),
|
EntryHeader::DeleteVector(adv) => bytemuck::bytes_of(adv),
|
||||||
EntryHeader::SetVectors(asvs) => bytemuck::bytes_of(asvs),
|
EntryHeader::SetVectors(asvs) => bytemuck::bytes_of(asvs),
|
||||||
EntryHeader::SetVector(asv) => bytemuck::bytes_of(asv),
|
EntryHeader::SetVector(asv) => bytemuck::bytes_of(asv),
|
||||||
|
EntryHeader::CelluliteItem(docid) => bytemuck::bytes_of(docid),
|
||||||
|
EntryHeader::CelluliteRemove(docid) => bytemuck::bytes_of(docid),
|
||||||
};
|
};
|
||||||
*first = self.variant_id();
|
*first = self.variant_id();
|
||||||
remaining.copy_from_slice(payload_bytes);
|
remaining.copy_from_slice(payload_bytes);
|
||||||
@ -548,6 +583,10 @@ impl<'b> ExtractorBbqueueSender<'b> {
|
|||||||
GeoSender(self)
|
GeoSender(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn geojson<'a>(&'a self) -> GeoJsonSender<'a, 'b> {
|
||||||
|
GeoJsonSender(self)
|
||||||
|
}
|
||||||
|
|
||||||
fn delete_vector(&self, docid: DocumentId) -> crate::Result<()> {
|
fn delete_vector(&self, docid: DocumentId) -> crate::Result<()> {
|
||||||
let max_grant = self.max_grant;
|
let max_grant = self.max_grant;
|
||||||
let refcell = self.producers.get().unwrap();
|
let refcell = self.producers.get().unwrap();
|
||||||
@ -1139,3 +1178,72 @@ impl GeoSender<'_, '_> {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
pub struct GeoJsonSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>);
|
||||||
|
|
||||||
|
impl GeoJsonSender<'_, '_> {
|
||||||
|
pub fn send_geojson(&self, docid: DocumentId, value: Vec<u8>) -> crate::Result<()> {
|
||||||
|
let max_grant = self.0.max_grant;
|
||||||
|
let refcell = self.0.producers.get().unwrap();
|
||||||
|
let mut producer = refcell.0.borrow_mut_or_yield();
|
||||||
|
|
||||||
|
let payload_header = EntryHeader::CelluliteItem(docid);
|
||||||
|
let value_length = value.len();
|
||||||
|
let total_length = EntryHeader::total_cellulite_item_size(value_length);
|
||||||
|
if total_length > max_grant {
|
||||||
|
let mut value_file = tempfile::tempfile().map(BufWriter::new)?;
|
||||||
|
|
||||||
|
let mut embedding_bytes = bytemuck::cast_slice(&value);
|
||||||
|
io::copy(&mut embedding_bytes, &mut value_file)?;
|
||||||
|
|
||||||
|
let value_file = value_file.into_inner().map_err(|ie| ie.into_error())?;
|
||||||
|
let geojson = unsafe { Mmap::map(&value_file)? }; // Safe because the file is never modified
|
||||||
|
|
||||||
|
let large_geojson = LargeGeoJson { docid, geojson };
|
||||||
|
self.0.sender.send(ReceiverAction::LargeGeoJson(large_geojson)).unwrap();
|
||||||
|
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Spin loop to have a frame the size we requested.
|
||||||
|
reserve_and_write_grant(
|
||||||
|
&mut producer,
|
||||||
|
total_length,
|
||||||
|
&self.0.sender,
|
||||||
|
&self.0.sent_messages_attempts,
|
||||||
|
&self.0.blocking_sent_messages_attempts,
|
||||||
|
|grant| {
|
||||||
|
let header_size = payload_header.header_size();
|
||||||
|
let (header_bytes, remaining) = grant.split_at_mut(header_size);
|
||||||
|
payload_header.serialize_into(header_bytes);
|
||||||
|
remaining.copy_from_slice(&value);
|
||||||
|
Ok(())
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn delete_geojson(&self, docid: DocumentId) -> crate::Result<()> {
|
||||||
|
let refcell = self.0.producers.get().unwrap();
|
||||||
|
let mut producer = refcell.0.borrow_mut_or_yield();
|
||||||
|
|
||||||
|
let payload_header = EntryHeader::CelluliteRemove(docid);
|
||||||
|
let total_length = EntryHeader::total_cellulite_remove_size();
|
||||||
|
|
||||||
|
reserve_and_write_grant(
|
||||||
|
&mut producer,
|
||||||
|
total_length,
|
||||||
|
&self.0.sender,
|
||||||
|
&self.0.sent_messages_attempts,
|
||||||
|
&self.0.blocking_sent_messages_attempts,
|
||||||
|
|grant| {
|
||||||
|
payload_header.serialize_into(grant);
|
||||||
|
Ok(())
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -10,7 +10,9 @@ use serde_json::value::RawValue;
|
|||||||
|
|
||||||
use super::vector_document::VectorDocument;
|
use super::vector_document::VectorDocument;
|
||||||
use super::{KvReaderFieldId, KvWriterFieldId};
|
use super::{KvReaderFieldId, KvWriterFieldId};
|
||||||
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
|
use crate::constants::{
|
||||||
|
RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME,
|
||||||
|
};
|
||||||
use crate::documents::FieldIdMapper;
|
use crate::documents::FieldIdMapper;
|
||||||
use crate::update::del_add::KvReaderDelAdd;
|
use crate::update::del_add::KvReaderDelAdd;
|
||||||
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
|
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
|
||||||
@ -29,10 +31,10 @@ pub trait Document<'doc> {
|
|||||||
/// Iterate over all **top-level** fields of the document, returning their name and raw JSON value.
|
/// Iterate over all **top-level** fields of the document, returning their name and raw JSON value.
|
||||||
///
|
///
|
||||||
/// - The returned values *may* contain nested fields.
|
/// - The returned values *may* contain nested fields.
|
||||||
/// - The `_vectors` and `_geo` fields are **ignored** by this method, meaning they are **not returned** by this method.
|
/// - The `_vectors`, `_geo` and `_geojson` fields are **ignored** by this method, meaning they are **not returned** by this method.
|
||||||
fn iter_top_level_fields(&self) -> impl Iterator<Item = Result<(&'doc str, &'doc RawValue)>>;
|
fn iter_top_level_fields(&self) -> impl Iterator<Item = Result<(&'doc str, &'doc RawValue)>>;
|
||||||
|
|
||||||
/// Number of top level fields, **excluding** `_vectors` and `_geo`
|
/// Number of top level fields, **excluding** `_vectors`, `_geo` and `_geojson`.
|
||||||
fn top_level_fields_count(&self) -> usize;
|
fn top_level_fields_count(&self) -> usize;
|
||||||
|
|
||||||
/// Get the **top-level** with the specified name, if exists.
|
/// Get the **top-level** with the specified name, if exists.
|
||||||
@ -50,11 +52,13 @@ pub trait Document<'doc> {
|
|||||||
|
|
||||||
/// Returns the unparsed value of the `_geo` field from the document data.
|
/// Returns the unparsed value of the `_geo` field from the document data.
|
||||||
///
|
///
|
||||||
/// This field alone is insufficient to retrieve geo data, as they may be stored in a dedicated location in the database.
|
|
||||||
/// Use a [`super::geo_document::GeoDocument`] to access the vector.
|
|
||||||
///
|
|
||||||
/// This method is meant as a convenience for implementors of [`super::geo_document::GeoDocument`].
|
/// This method is meant as a convenience for implementors of [`super::geo_document::GeoDocument`].
|
||||||
fn geo_field(&self) -> Result<Option<&'doc RawValue>>;
|
fn geo_field(&self) -> Result<Option<&'doc RawValue>>;
|
||||||
|
|
||||||
|
/// Returns the unparsed value of the `_geojson` field from the document data.
|
||||||
|
///
|
||||||
|
/// This method is meant as a convenience for implementors of [`super::geo_document::GeoDocument`].
|
||||||
|
fn geojson_field(&self) -> Result<Option<&'doc RawValue>>;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@ -90,7 +94,10 @@ impl<'t, Mapper: FieldIdMapper> Document<'t> for DocumentFromDb<'t, Mapper> {
|
|||||||
Err(error) => return Some(Err(error.into())),
|
Err(error) => return Some(Err(error.into())),
|
||||||
};
|
};
|
||||||
|
|
||||||
if name == RESERVED_VECTORS_FIELD_NAME || name == RESERVED_GEO_FIELD_NAME {
|
if name == RESERVED_VECTORS_FIELD_NAME
|
||||||
|
|| name == RESERVED_GEO_FIELD_NAME
|
||||||
|
|| name == RESERVED_GEOJSON_FIELD_NAME
|
||||||
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -113,19 +120,24 @@ impl<'t, Mapper: FieldIdMapper> Document<'t> for DocumentFromDb<'t, Mapper> {
|
|||||||
self.field(RESERVED_GEO_FIELD_NAME)
|
self.field(RESERVED_GEO_FIELD_NAME)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn geojson_field(&self) -> Result<Option<&'t RawValue>> {
|
||||||
|
self.field(RESERVED_GEOJSON_FIELD_NAME)
|
||||||
|
}
|
||||||
|
|
||||||
fn top_level_fields_count(&self) -> usize {
|
fn top_level_fields_count(&self) -> usize {
|
||||||
let has_vectors_field = self.vectors_field().unwrap_or(None).is_some();
|
let has_vectors_field = self.vectors_field().unwrap_or(None).is_some();
|
||||||
let has_geo_field = self.geo_field().unwrap_or(None).is_some();
|
let has_geo_field = self.geo_field().unwrap_or(None).is_some();
|
||||||
|
let has_geojson_field = self.geojson_field().unwrap_or(None).is_some();
|
||||||
let count = self.content.iter().count();
|
let count = self.content.iter().count();
|
||||||
match (has_vectors_field, has_geo_field) {
|
|
||||||
(true, true) => count - 2,
|
count - has_vectors_field as usize - has_geo_field as usize - has_geojson_field as usize
|
||||||
(true, false) | (false, true) => count - 1,
|
|
||||||
(false, false) => count,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn top_level_field(&self, k: &str) -> Result<Option<&'t RawValue>> {
|
fn top_level_field(&self, k: &str) -> Result<Option<&'t RawValue>> {
|
||||||
if k == RESERVED_VECTORS_FIELD_NAME || k == RESERVED_GEO_FIELD_NAME {
|
if k == RESERVED_VECTORS_FIELD_NAME
|
||||||
|
|| k == RESERVED_GEO_FIELD_NAME
|
||||||
|
|| k == RESERVED_GEOJSON_FIELD_NAME
|
||||||
|
{
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
self.field(k)
|
self.field(k)
|
||||||
@ -177,15 +189,16 @@ impl<'doc> Document<'doc> for DocumentFromVersions<'_, 'doc> {
|
|||||||
Ok(self.versions.geo_field())
|
Ok(self.versions.geo_field())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn geojson_field(&self) -> Result<Option<&'doc RawValue>> {
|
||||||
|
Ok(self.versions.geojson_field())
|
||||||
|
}
|
||||||
|
|
||||||
fn top_level_fields_count(&self) -> usize {
|
fn top_level_fields_count(&self) -> usize {
|
||||||
let has_vectors_field = self.vectors_field().unwrap_or(None).is_some();
|
let has_vectors_field = self.vectors_field().unwrap_or(None).is_some();
|
||||||
let has_geo_field = self.geo_field().unwrap_or(None).is_some();
|
let has_geo_field = self.geo_field().unwrap_or(None).is_some();
|
||||||
|
let has_geojson_field = self.geojson_field().unwrap_or(None).is_some();
|
||||||
let count = self.versions.len();
|
let count = self.versions.len();
|
||||||
match (has_vectors_field, has_geo_field) {
|
count - has_vectors_field as usize - has_geo_field as usize - has_geojson_field as usize
|
||||||
(true, true) => count - 2,
|
|
||||||
(true, false) | (false, true) => count - 1,
|
|
||||||
(false, false) => count,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn top_level_field(&self, k: &str) -> Result<Option<&'doc RawValue>> {
|
fn top_level_field(&self, k: &str) -> Result<Option<&'doc RawValue>> {
|
||||||
@ -265,6 +278,16 @@ impl<'d, 'doc: 'd, 't: 'd, Mapper: FieldIdMapper> Document<'d>
|
|||||||
db.geo_field()
|
db.geo_field()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn geojson_field(&self) -> Result<Option<&'d RawValue>> {
|
||||||
|
if let Some(geojson) = self.new_doc.geojson_field()? {
|
||||||
|
return Ok(Some(geojson));
|
||||||
|
}
|
||||||
|
|
||||||
|
let Some(db) = self.db else { return Ok(None) };
|
||||||
|
|
||||||
|
db.geojson_field()
|
||||||
|
}
|
||||||
|
|
||||||
fn top_level_fields_count(&self) -> usize {
|
fn top_level_fields_count(&self) -> usize {
|
||||||
self.iter_top_level_fields().count()
|
self.iter_top_level_fields().count()
|
||||||
}
|
}
|
||||||
@ -296,6 +319,10 @@ where
|
|||||||
D::geo_field(self)
|
D::geo_field(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn geojson_field(&self) -> Result<Option<&'doc RawValue>> {
|
||||||
|
D::geojson_field(self)
|
||||||
|
}
|
||||||
|
|
||||||
fn top_level_fields_count(&self) -> usize {
|
fn top_level_fields_count(&self) -> usize {
|
||||||
D::top_level_fields_count(self)
|
D::top_level_fields_count(self)
|
||||||
}
|
}
|
||||||
@ -405,6 +432,13 @@ where
|
|||||||
unordered_field_buffer.push((fid, geo_value));
|
unordered_field_buffer.push((fid, geo_value));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(geojson_value) = document.geojson_field()? {
|
||||||
|
let fid = fields_ids_map
|
||||||
|
.id_or_insert(RESERVED_GEOJSON_FIELD_NAME)
|
||||||
|
.ok_or(UserError::AttributeLimitReached)?;
|
||||||
|
unordered_field_buffer.push((fid, geojson_value));
|
||||||
|
}
|
||||||
|
|
||||||
unordered_field_buffer.sort_by_key(|(fid, _)| *fid);
|
unordered_field_buffer.sort_by_key(|(fid, _)| *fid);
|
||||||
for (fid, value) in unordered_field_buffer.iter() {
|
for (fid, value) in unordered_field_buffer.iter() {
|
||||||
writer.insert(*fid, value.get().as_bytes()).unwrap();
|
writer.insert(*fid, value.get().as_bytes()).unwrap();
|
||||||
@ -441,9 +475,11 @@ impl<'doc> Versions<'doc> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn iter_top_level_fields(&self) -> impl Iterator<Item = (&'doc str, &'doc RawValue)> + '_ {
|
pub fn iter_top_level_fields(&self) -> impl Iterator<Item = (&'doc str, &'doc RawValue)> + '_ {
|
||||||
self.data
|
self.data.iter().filter(|(k, _)| {
|
||||||
.iter()
|
*k != RESERVED_VECTORS_FIELD_NAME
|
||||||
.filter(|(k, _)| *k != RESERVED_VECTORS_FIELD_NAME && *k != RESERVED_GEO_FIELD_NAME)
|
&& *k != RESERVED_GEO_FIELD_NAME
|
||||||
|
&& *k != RESERVED_GEOJSON_FIELD_NAME
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn vectors_field(&self) -> Option<&'doc RawValue> {
|
pub fn vectors_field(&self) -> Option<&'doc RawValue> {
|
||||||
@ -454,6 +490,10 @@ impl<'doc> Versions<'doc> {
|
|||||||
self.data.get(RESERVED_GEO_FIELD_NAME)
|
self.data.get(RESERVED_GEO_FIELD_NAME)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn geojson_field(&self) -> Option<&'doc RawValue> {
|
||||||
|
self.data.get(RESERVED_GEOJSON_FIELD_NAME)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn len(&self) -> usize {
|
pub fn len(&self) -> usize {
|
||||||
self.data.len()
|
self.data.len()
|
||||||
}
|
}
|
||||||
@ -463,7 +503,10 @@ impl<'doc> Versions<'doc> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn top_level_field(&self, k: &str) -> Option<&'doc RawValue> {
|
pub fn top_level_field(&self, k: &str) -> Option<&'doc RawValue> {
|
||||||
if k == RESERVED_VECTORS_FIELD_NAME || k == RESERVED_GEO_FIELD_NAME {
|
if k == RESERVED_VECTORS_FIELD_NAME
|
||||||
|
|| k == RESERVED_GEO_FIELD_NAME
|
||||||
|
|| k == RESERVED_GEOJSON_FIELD_NAME
|
||||||
|
{
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
self.data.get(k)
|
self.data.get(k)
|
||||||
@ -516,7 +559,10 @@ impl<'a, Mapper: FieldIdMapper> Document<'a> for KvDelAddDocument<'a, Mapper> {
|
|||||||
Err(error) => return Some(Err(error.into())),
|
Err(error) => return Some(Err(error.into())),
|
||||||
};
|
};
|
||||||
|
|
||||||
if name == RESERVED_VECTORS_FIELD_NAME || name == RESERVED_GEO_FIELD_NAME {
|
if name == RESERVED_VECTORS_FIELD_NAME
|
||||||
|
|| name == RESERVED_GEO_FIELD_NAME
|
||||||
|
|| name == RESERVED_GEOJSON_FIELD_NAME
|
||||||
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -549,7 +595,10 @@ impl<'a, Mapper: FieldIdMapper> Document<'a> for KvDelAddDocument<'a, Mapper> {
|
|||||||
Err(_) => return Some(()),
|
Err(_) => return Some(()),
|
||||||
};
|
};
|
||||||
|
|
||||||
if name == RESERVED_VECTORS_FIELD_NAME || name == RESERVED_GEO_FIELD_NAME {
|
if name == RESERVED_VECTORS_FIELD_NAME
|
||||||
|
|| name == RESERVED_GEO_FIELD_NAME
|
||||||
|
|| name == RESERVED_GEOJSON_FIELD_NAME
|
||||||
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -559,7 +608,10 @@ impl<'a, Mapper: FieldIdMapper> Document<'a> for KvDelAddDocument<'a, Mapper> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn top_level_field(&self, k: &str) -> Result<Option<&'a RawValue>> {
|
fn top_level_field(&self, k: &str) -> Result<Option<&'a RawValue>> {
|
||||||
if k == RESERVED_VECTORS_FIELD_NAME || k == RESERVED_GEO_FIELD_NAME {
|
if k == RESERVED_VECTORS_FIELD_NAME
|
||||||
|
|| k == RESERVED_GEO_FIELD_NAME
|
||||||
|
|| k == RESERVED_GEOJSON_FIELD_NAME
|
||||||
|
{
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
self.get(k)
|
self.get(k)
|
||||||
@ -572,6 +624,10 @@ impl<'a, Mapper: FieldIdMapper> Document<'a> for KvDelAddDocument<'a, Mapper> {
|
|||||||
fn geo_field(&self) -> Result<Option<&'a RawValue>> {
|
fn geo_field(&self) -> Result<Option<&'a RawValue>> {
|
||||||
self.get(RESERVED_GEO_FIELD_NAME)
|
self.get(RESERVED_GEO_FIELD_NAME)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn geojson_field(&self) -> Result<Option<&'a RawValue>> {
|
||||||
|
self.get(RESERVED_GEOJSON_FIELD_NAME)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct DocumentIdentifiers<'doc> {
|
pub struct DocumentIdentifiers<'doc> {
|
||||||
|
@ -5,7 +5,7 @@ use bumpalo::Bump;
|
|||||||
use hashbrown::HashMap;
|
use hashbrown::HashMap;
|
||||||
|
|
||||||
use super::DelAddRoaringBitmap;
|
use super::DelAddRoaringBitmap;
|
||||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
use crate::constants::{RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME};
|
||||||
use crate::update::new::channel::{DocumentsSender, ExtractorBbqueueSender};
|
use crate::update::new::channel::{DocumentsSender, ExtractorBbqueueSender};
|
||||||
use crate::update::new::document::{write_to_obkv, Document, DocumentContext, DocumentIdentifiers};
|
use crate::update::new::document::{write_to_obkv, Document, DocumentContext, DocumentIdentifiers};
|
||||||
use crate::update::new::indexer::document_changes::{Extractor, IndexingContext};
|
use crate::update::new::indexer::document_changes::{Extractor, IndexingContext};
|
||||||
@ -75,7 +75,11 @@ impl<'extractor> Extractor<'extractor> for DocumentsExtractor<'_, '_> {
|
|||||||
.geo_field()
|
.geo_field()
|
||||||
.transpose()
|
.transpose()
|
||||||
.map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
|
.map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
|
||||||
for res in content.iter_top_level_fields().chain(geo_iter) {
|
let geojson_iter = content
|
||||||
|
.geojson_field()
|
||||||
|
.transpose()
|
||||||
|
.map(|res| res.map(|rv| (RESERVED_GEOJSON_FIELD_NAME, rv)));
|
||||||
|
for res in content.iter_top_level_fields().chain(geo_iter).chain(geojson_iter) {
|
||||||
let (f, _) = res?;
|
let (f, _) = res?;
|
||||||
let entry = document_extractor_data
|
let entry = document_extractor_data
|
||||||
.field_distribution_delta
|
.field_distribution_delta
|
||||||
@ -94,7 +98,11 @@ impl<'extractor> Extractor<'extractor> for DocumentsExtractor<'_, '_> {
|
|||||||
.geo_field()
|
.geo_field()
|
||||||
.transpose()
|
.transpose()
|
||||||
.map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
|
.map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
|
||||||
for res in content.iter_top_level_fields().chain(geo_iter) {
|
let geojson_iter = content
|
||||||
|
.geojson_field()
|
||||||
|
.transpose()
|
||||||
|
.map(|res| res.map(|rv| (RESERVED_GEOJSON_FIELD_NAME, rv)));
|
||||||
|
for res in content.iter_top_level_fields().chain(geo_iter).chain(geojson_iter) {
|
||||||
let (f, _) = res?;
|
let (f, _) = res?;
|
||||||
let entry = document_extractor_data
|
let entry = document_extractor_data
|
||||||
.field_distribution_delta
|
.field_distribution_delta
|
||||||
@ -108,7 +116,11 @@ impl<'extractor> Extractor<'extractor> for DocumentsExtractor<'_, '_> {
|
|||||||
.geo_field()
|
.geo_field()
|
||||||
.transpose()
|
.transpose()
|
||||||
.map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
|
.map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
|
||||||
for res in content.iter_top_level_fields().chain(geo_iter) {
|
let geojson_iter = content
|
||||||
|
.geojson_field()
|
||||||
|
.transpose()
|
||||||
|
.map(|res| res.map(|rv| (RESERVED_GEOJSON_FIELD_NAME, rv)));
|
||||||
|
for res in content.iter_top_level_fields().chain(geo_iter).chain(geojson_iter) {
|
||||||
let (f, _) = res?;
|
let (f, _) = res?;
|
||||||
let entry = document_extractor_data
|
let entry = document_extractor_data
|
||||||
.field_distribution_delta
|
.field_distribution_delta
|
||||||
@ -143,7 +155,11 @@ impl<'extractor> Extractor<'extractor> for DocumentsExtractor<'_, '_> {
|
|||||||
.geo_field()
|
.geo_field()
|
||||||
.transpose()
|
.transpose()
|
||||||
.map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
|
.map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
|
||||||
for res in content.iter_top_level_fields().chain(geo_iter) {
|
let geojson_iter = content
|
||||||
|
.geojson_field()
|
||||||
|
.transpose()
|
||||||
|
.map(|res| res.map(|rv| (RESERVED_GEOJSON_FIELD_NAME, rv)));
|
||||||
|
for res in content.iter_top_level_fields().chain(geo_iter).chain(geojson_iter) {
|
||||||
let (f, _) = res?;
|
let (f, _) = res?;
|
||||||
let entry = document_extractor_data
|
let entry = document_extractor_data
|
||||||
.field_distribution_delta
|
.field_distribution_delta
|
||||||
|
261
crates/milli/src/update/new/extract/geo/cellulite.rs
Normal file
261
crates/milli/src/update/new/extract/geo/cellulite.rs
Normal file
@ -0,0 +1,261 @@
|
|||||||
|
use std::cell::RefCell;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::{BufReader, BufWriter, ErrorKind, Read, Seek as _, Write as _};
|
||||||
|
use std::mem;
|
||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use bumpalo::Bump;
|
||||||
|
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
|
||||||
|
use cellulite::zerometry::ZerometryCodec;
|
||||||
|
use geo_types::Geometry;
|
||||||
|
use geojson::GeoJson;
|
||||||
|
use heed::{BytesEncode, RoTxn};
|
||||||
|
use zerometry::Zerometry;
|
||||||
|
|
||||||
|
use crate::update::new::channel::GeoJsonSender;
|
||||||
|
use crate::update::new::document::{Document, DocumentContext};
|
||||||
|
use crate::update::new::indexer::document_changes::Extractor;
|
||||||
|
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
||||||
|
use crate::update::new::thread_local::MostlySend;
|
||||||
|
use crate::update::new::DocumentChange;
|
||||||
|
use crate::update::GrenadParameters;
|
||||||
|
use crate::{DocumentId, Index, InternalError, Result, UserError};
|
||||||
|
|
||||||
|
pub struct GeoJsonExtractor {
|
||||||
|
grenad_parameters: GrenadParameters,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GeoJsonExtractor {
|
||||||
|
pub fn new(
|
||||||
|
rtxn: &RoTxn,
|
||||||
|
index: &Index,
|
||||||
|
grenad_parameters: GrenadParameters,
|
||||||
|
) -> Result<Option<Self>> {
|
||||||
|
if index.is_geojson_filtering_enabled(rtxn)? {
|
||||||
|
Ok(Some(GeoJsonExtractor { grenad_parameters }))
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct GeoJsonExtractorData<'extractor> {
|
||||||
|
/// The set of documents ids that were removed. If a document sees its geo
|
||||||
|
/// point being updated, we first put it in the deleted and then in the inserted.
|
||||||
|
removed: bumpalo::collections::Vec<'extractor, DocumentId>,
|
||||||
|
inserted: bumpalo::collections::Vec<'extractor, (DocumentId, &'extractor [u8])>,
|
||||||
|
/// Contains a packed list of `ExtractedGeoPoint` of the inserted geo points
|
||||||
|
/// data structures if we have spilled to disk.
|
||||||
|
spilled_removed: Option<BufWriter<File>>,
|
||||||
|
/// Contains a packed list of `ExtractedGeoPoint` of the inserted geo points
|
||||||
|
/// data structures if we have spilled to disk.
|
||||||
|
spilled_inserted: Option<BufWriter<File>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'extractor> GeoJsonExtractorData<'extractor> {
|
||||||
|
pub fn freeze(self) -> Result<FrozenGeoJsonExtractorData<'extractor>> {
|
||||||
|
let GeoJsonExtractorData { removed, inserted, spilled_removed, spilled_inserted } = self;
|
||||||
|
|
||||||
|
Ok(FrozenGeoJsonExtractorData {
|
||||||
|
removed: removed.into_bump_slice(),
|
||||||
|
inserted: inserted.into_bump_slice(),
|
||||||
|
spilled_removed: spilled_removed
|
||||||
|
.map(|bw| bw.into_inner().map(BufReader::new).map_err(|iie| iie.into_error()))
|
||||||
|
.transpose()?,
|
||||||
|
spilled_inserted: spilled_inserted
|
||||||
|
.map(|bw| bw.into_inner().map(BufReader::new).map_err(|iie| iie.into_error()))
|
||||||
|
.transpose()?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe impl MostlySend for GeoJsonExtractorData<'_> {}
|
||||||
|
|
||||||
|
pub struct FrozenGeoJsonExtractorData<'extractor> {
|
||||||
|
pub removed: &'extractor [DocumentId],
|
||||||
|
pub inserted: &'extractor [(DocumentId, &'extractor [u8])],
|
||||||
|
pub spilled_removed: Option<BufReader<File>>,
|
||||||
|
pub spilled_inserted: Option<BufReader<File>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FrozenGeoJsonExtractorData<'_> {
|
||||||
|
pub fn iter_and_clear_removed(&mut self, channel: GeoJsonSender<'_, '_>) -> Result<()> {
|
||||||
|
for docid in mem::take(&mut self.removed) {
|
||||||
|
channel.delete_geojson(*docid).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(mut spilled) = self.spilled_removed.take() {
|
||||||
|
spilled.rewind()?;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let docid = match spilled.read_u32::<BigEndian>() {
|
||||||
|
Ok(docid) => docid,
|
||||||
|
Err(e) if e.kind() == ErrorKind::UnexpectedEof => break,
|
||||||
|
Err(e) => return Err(InternalError::SerdeJson(serde_json::Error::io(e)).into()),
|
||||||
|
};
|
||||||
|
channel.delete_geojson(docid).unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn iter_and_clear_inserted(&mut self, channel: GeoJsonSender<'_, '_>) -> Result<()> {
|
||||||
|
for (docid, _buf) in mem::take(&mut self.inserted) {
|
||||||
|
channel.send_geojson(*docid, _buf.to_vec()).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(mut spilled) = self.spilled_inserted.take() {
|
||||||
|
spilled.rewind()?;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let docid = match spilled.read_u32::<BigEndian>() {
|
||||||
|
Ok(docid) => docid,
|
||||||
|
Err(e) if e.kind() == ErrorKind::UnexpectedEof => break,
|
||||||
|
Err(e) => return Err(InternalError::SerdeJson(serde_json::Error::io(e)).into()),
|
||||||
|
};
|
||||||
|
let size = match spilled.read_u32::<BigEndian>() {
|
||||||
|
Ok(size) => size,
|
||||||
|
Err(e) => return Err(InternalError::SerdeJson(serde_json::Error::io(e)).into()),
|
||||||
|
};
|
||||||
|
let mut buf = vec![0; size as usize];
|
||||||
|
spilled
|
||||||
|
.read_exact(&mut buf)
|
||||||
|
.map_err(|e| InternalError::SerdeJson(serde_json::Error::io(e)))?;
|
||||||
|
channel.send_geojson(docid, buf).unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'extractor> Extractor<'extractor> for GeoJsonExtractor {
|
||||||
|
type Data = RefCell<GeoJsonExtractorData<'extractor>>;
|
||||||
|
|
||||||
|
fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
|
||||||
|
Ok(RefCell::new(GeoJsonExtractorData {
|
||||||
|
removed: bumpalo::collections::Vec::new_in(extractor_alloc),
|
||||||
|
inserted: bumpalo::collections::Vec::new_in(extractor_alloc),
|
||||||
|
spilled_inserted: None,
|
||||||
|
spilled_removed: None,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn process<'doc>(
|
||||||
|
&'doc self,
|
||||||
|
changes: impl Iterator<Item = Result<DocumentChange<'doc>>>,
|
||||||
|
context: &'doc DocumentContext<'doc, 'extractor, '_, '_, Self::Data>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let rtxn = &context.rtxn;
|
||||||
|
let index = context.index;
|
||||||
|
let max_memory = self.grenad_parameters.max_memory_by_thread();
|
||||||
|
let db_fields_ids_map = context.db_fields_ids_map;
|
||||||
|
let mut data_ref = context.data.borrow_mut_or_yield();
|
||||||
|
|
||||||
|
for change in changes {
|
||||||
|
if data_ref.spilled_removed.is_none()
|
||||||
|
&& max_memory.is_some_and(|mm| context.extractor_alloc.allocated_bytes() >= mm)
|
||||||
|
{
|
||||||
|
// We must spill as we allocated too much memory
|
||||||
|
data_ref.spilled_removed = tempfile::tempfile().map(BufWriter::new).map(Some)?;
|
||||||
|
data_ref.spilled_inserted = tempfile::tempfile().map(BufWriter::new).map(Some)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
match change? {
|
||||||
|
DocumentChange::Deletion(deletion) => {
|
||||||
|
let docid = deletion.docid();
|
||||||
|
let current = deletion.current(rtxn, index, db_fields_ids_map)?;
|
||||||
|
|
||||||
|
if let Some(_geojson) = current.geojson_field()? {
|
||||||
|
match &mut data_ref.spilled_removed {
|
||||||
|
Some(file) => {
|
||||||
|
file.write_u32::<BigEndian>(docid)?;
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
data_ref.removed.push(docid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
DocumentChange::Update(update) => {
|
||||||
|
let current = update.current(rtxn, index, db_fields_ids_map)?;
|
||||||
|
let docid = update.docid();
|
||||||
|
|
||||||
|
let current_geo = current.geojson_field()?;
|
||||||
|
|
||||||
|
let updated_geo =
|
||||||
|
update.merged(rtxn, index, db_fields_ids_map)?.geojson_field()?;
|
||||||
|
|
||||||
|
if current_geo.map(|c| c.get()) != updated_geo.map(|u| u.get()) {
|
||||||
|
// If the current and new geo points are different it means that
|
||||||
|
// we need to replace the current by the new point and therefore
|
||||||
|
// delete the current point from cellulite.
|
||||||
|
if let Some(_geojson) = current_geo {
|
||||||
|
match &mut data_ref.spilled_removed {
|
||||||
|
Some(file) => {
|
||||||
|
file.write_u32::<BigEndian>(docid)?;
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
data_ref.removed.push(docid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(geojson) = updated_geo {
|
||||||
|
let geojson =
|
||||||
|
GeoJson::from_str(geojson.get()).map_err(UserError::from)?;
|
||||||
|
let mut geometry =
|
||||||
|
Geometry::try_from(geojson).map_err(UserError::from)?;
|
||||||
|
cellulite::densify_geom(&mut geometry);
|
||||||
|
|
||||||
|
let buf = ZerometryCodec::bytes_encode(&geometry).unwrap();
|
||||||
|
|
||||||
|
match &mut data_ref.spilled_inserted {
|
||||||
|
Some(file) => {
|
||||||
|
file.write_u32::<BigEndian>(docid)?;
|
||||||
|
file.write_u32::<BigEndian>(buf.len() as u32)?;
|
||||||
|
file.write_all(&buf)?;
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
let mut bvec =
|
||||||
|
bumpalo::collections::Vec::new_in(context.extractor_alloc);
|
||||||
|
bvec.extend_from_slice(&buf);
|
||||||
|
data_ref.inserted.push((docid, bvec.into_bump_slice()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
DocumentChange::Insertion(insertion) => {
|
||||||
|
let docid = insertion.docid();
|
||||||
|
let inserted_geo = insertion.inserted().geojson_field()?;
|
||||||
|
|
||||||
|
if let Some(geojson) = inserted_geo {
|
||||||
|
let geojson = GeoJson::from_str(geojson.get()).map_err(UserError::from)?;
|
||||||
|
let mut geometry = Geometry::try_from(geojson).map_err(UserError::from)?;
|
||||||
|
cellulite::densify_geom(&mut geometry);
|
||||||
|
let mut bytes = Vec::new();
|
||||||
|
Zerometry::write_from_geometry(&mut bytes, &geometry)?;
|
||||||
|
|
||||||
|
match &mut data_ref.spilled_inserted {
|
||||||
|
Some(file) => {
|
||||||
|
file.write_u32::<BigEndian>(docid)?;
|
||||||
|
file.write_u32::<BigEndian>(bytes.len() as u32)?;
|
||||||
|
file.write_all(&bytes)?;
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
let mut bvec =
|
||||||
|
bumpalo::collections::Vec::new_in(context.extractor_alloc);
|
||||||
|
bvec.extend_from_slice(&bytes);
|
||||||
|
data_ref.inserted.push((docid, bvec.into_bump_slice()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
@ -18,6 +18,8 @@ use crate::update::new::DocumentChange;
|
|||||||
use crate::update::GrenadParameters;
|
use crate::update::GrenadParameters;
|
||||||
use crate::{lat_lng_to_xyz, DocumentId, GeoPoint, Index, InternalError, Result};
|
use crate::{lat_lng_to_xyz, DocumentId, GeoPoint, Index, InternalError, Result};
|
||||||
|
|
||||||
|
pub mod cellulite;
|
||||||
|
|
||||||
pub struct GeoExtractor {
|
pub struct GeoExtractor {
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
}
|
}
|
||||||
|
@ -653,7 +653,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
|
|
||||||
settings_delta.try_for_each_fragment_diff(
|
settings_delta.try_for_each_fragment_diff(
|
||||||
session.embedder_name(),
|
session.embedder_name(),
|
||||||
|fragment_diff| {
|
|fragment_diff| -> Result<()> {
|
||||||
let extractor = RequestFragmentExtractor::new(fragment_diff.new, doc_alloc)
|
let extractor = RequestFragmentExtractor::new(fragment_diff.new, doc_alloc)
|
||||||
.ignore_errors();
|
.ignore_errors();
|
||||||
let old = if full_reindex {
|
let old = if full_reindex {
|
||||||
|
@ -24,7 +24,7 @@ pub trait Extractor<'extractor>: Sync {
|
|||||||
fn process<'doc>(
|
fn process<'doc>(
|
||||||
&'doc self,
|
&'doc self,
|
||||||
changes: impl Iterator<Item = Result<DocumentChange<'doc>>>,
|
changes: impl Iterator<Item = Result<DocumentChange<'doc>>>,
|
||||||
context: &'doc DocumentContext<Self::Data>,
|
context: &'doc DocumentContext<'doc, 'extractor, '_, '_, Self::Data>,
|
||||||
) -> Result<()>;
|
) -> Result<()>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -16,9 +16,10 @@ use super::settings_changes::settings_change_extract;
|
|||||||
use crate::documents::{FieldIdMapper, PrimaryKey};
|
use crate::documents::{FieldIdMapper, PrimaryKey};
|
||||||
use crate::progress::{EmbedderStats, MergingWordCache};
|
use crate::progress::{EmbedderStats, MergingWordCache};
|
||||||
use crate::proximity::ProximityPrecision;
|
use crate::proximity::ProximityPrecision;
|
||||||
|
use crate::update::new::extract::cellulite::GeoJsonExtractor;
|
||||||
use crate::update::new::extract::EmbeddingExtractor;
|
use crate::update::new::extract::EmbeddingExtractor;
|
||||||
use crate::update::new::indexer::settings_changes::DocumentsIndentifiers;
|
use crate::update::new::indexer::settings_changes::DocumentsIndentifiers;
|
||||||
use crate::update::new::merger::merge_and_send_rtree;
|
use crate::update::new::merger::{merge_and_send_cellulite, merge_and_send_rtree};
|
||||||
use crate::update::new::{merge_and_send_docids, merge_and_send_facet_docids, FacetDatabases};
|
use crate::update::new::{merge_and_send_docids, merge_and_send_facet_docids, FacetDatabases};
|
||||||
use crate::update::settings::SettingsDelta;
|
use crate::update::settings::SettingsDelta;
|
||||||
use crate::vector::db::{EmbedderInfo, IndexEmbeddingConfig};
|
use crate::vector::db::{EmbedderInfo, IndexEmbeddingConfig};
|
||||||
@ -317,6 +318,37 @@ where
|
|||||||
&indexing_context.must_stop_processing,
|
&indexing_context.must_stop_processing,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
'cellulite: {
|
||||||
|
let Some(extractor) =
|
||||||
|
GeoJsonExtractor::new(&rtxn, index, *indexing_context.grenad_parameters)?
|
||||||
|
else {
|
||||||
|
break 'cellulite;
|
||||||
|
};
|
||||||
|
let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||||
|
|
||||||
|
{
|
||||||
|
let span = tracing::trace_span!(target: "indexing::documents::extract", "cellulite");
|
||||||
|
let _entered = span.enter();
|
||||||
|
|
||||||
|
extract(
|
||||||
|
document_changes,
|
||||||
|
&extractor,
|
||||||
|
indexing_context,
|
||||||
|
extractor_allocs,
|
||||||
|
&datastore,
|
||||||
|
IndexingStep::WritingGeoJson,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
merge_and_send_cellulite(
|
||||||
|
datastore,
|
||||||
|
&rtxn,
|
||||||
|
index,
|
||||||
|
extractor_sender.geojson(),
|
||||||
|
&indexing_context.must_stop_processing,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
indexing_context.progress.update_progress(IndexingStep::WaitingForDatabaseWrites);
|
indexing_context.progress.update_progress(IndexingStep::WaitingForDatabaseWrites);
|
||||||
finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
|
finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
|
||||||
|
|
||||||
|
@ -187,6 +187,13 @@ where
|
|||||||
facet_field_ids_delta,
|
facet_field_ids_delta,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
indexing_context.progress.update_progress(IndexingStep::BuildingGeoJson);
|
||||||
|
index.cellulite.build(
|
||||||
|
wtxn,
|
||||||
|
&indexing_context.must_stop_processing,
|
||||||
|
indexing_context.progress,
|
||||||
|
)?;
|
||||||
|
|
||||||
indexing_context.progress.update_progress(IndexingStep::Finalizing);
|
indexing_context.progress.update_progress(IndexingStep::Finalizing);
|
||||||
|
|
||||||
Ok(congestion) as Result<_>
|
Ok(congestion) as Result<_>
|
||||||
@ -317,6 +324,13 @@ where
|
|||||||
})
|
})
|
||||||
.unwrap()?;
|
.unwrap()?;
|
||||||
|
|
||||||
|
indexing_context.progress.update_progress(IndexingStep::BuildingGeoJson);
|
||||||
|
index.cellulite.build(
|
||||||
|
wtxn,
|
||||||
|
&indexing_context.must_stop_processing,
|
||||||
|
indexing_context.progress,
|
||||||
|
)?;
|
||||||
|
|
||||||
indexing_context.progress.update_progress(IndexingStep::Finalizing);
|
indexing_context.progress.update_progress(IndexingStep::Finalizing);
|
||||||
|
|
||||||
Ok(congestion) as Result<_>
|
Ok(congestion) as Result<_>
|
||||||
|
@ -16,7 +16,7 @@ use crate::update::settings::InnerIndexSettings;
|
|||||||
use crate::vector::db::IndexEmbeddingConfig;
|
use crate::vector::db::IndexEmbeddingConfig;
|
||||||
use crate::vector::settings::EmbedderAction;
|
use crate::vector::settings::EmbedderAction;
|
||||||
use crate::vector::{Embedder, Embeddings, RuntimeEmbedders, VectorStore};
|
use crate::vector::{Embedder, Embeddings, RuntimeEmbedders, VectorStore};
|
||||||
use crate::{Error, Index, InternalError, Result, UserError};
|
use crate::{DocumentId, Error, Index, InternalError, Result, UserError};
|
||||||
|
|
||||||
pub fn write_to_db(
|
pub fn write_to_db(
|
||||||
mut writer_receiver: WriterBbqueueReceiver<'_>,
|
mut writer_receiver: WriterBbqueueReceiver<'_>,
|
||||||
@ -72,6 +72,14 @@ pub fn write_to_db(
|
|||||||
let embedding = large_vector.read_embedding(*dimensions);
|
let embedding = large_vector.read_embedding(*dimensions);
|
||||||
writer.add_item_in_store(wtxn, docid, extractor_id, embedding)?;
|
writer.add_item_in_store(wtxn, docid, extractor_id, embedding)?;
|
||||||
}
|
}
|
||||||
|
ReceiverAction::LargeGeoJson(LargeGeoJson { docid, geojson }) => {
|
||||||
|
// It cannot be a deletion because it's large. Deletions are always small
|
||||||
|
let geojson: &[u8] = &geojson;
|
||||||
|
index
|
||||||
|
.cellulite
|
||||||
|
.add_raw_zerometry(wtxn, docid, geojson)
|
||||||
|
.map_err(InternalError::CelluliteError)?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Every time the is a message in the channel we search
|
// Every time the is a message in the channel we search
|
||||||
@ -263,6 +271,19 @@ pub fn write_from_bbqueue(
|
|||||||
writer.add_item_in_store(wtxn, docid, extractor_id, embedding)?;
|
writer.add_item_in_store(wtxn, docid, extractor_id, embedding)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
EntryHeader::CelluliteItem(docid) => {
|
||||||
|
let frame = frame_with_header.frame();
|
||||||
|
let skip = EntryHeader::variant_size() + std::mem::size_of::<DocumentId>();
|
||||||
|
let geojson = &frame[skip..];
|
||||||
|
|
||||||
|
index
|
||||||
|
.cellulite
|
||||||
|
.add_raw_zerometry(wtxn, docid, geojson)
|
||||||
|
.map_err(InternalError::CelluliteError)?;
|
||||||
|
}
|
||||||
|
EntryHeader::CelluliteRemove(docid) => {
|
||||||
|
index.cellulite.delete(wtxn, docid).map_err(InternalError::CelluliteError)?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,6 +13,7 @@ use super::extract::{
|
|||||||
FacetKind, GeoExtractorData,
|
FacetKind, GeoExtractorData,
|
||||||
};
|
};
|
||||||
use crate::update::facet::new_incremental::FacetFieldIdChange;
|
use crate::update::facet::new_incremental::FacetFieldIdChange;
|
||||||
|
use crate::update::new::extract::cellulite::GeoJsonExtractorData;
|
||||||
use crate::{CboRoaringBitmapCodec, FieldId, GeoPoint, Index, InternalError, Result};
|
use crate::{CboRoaringBitmapCodec, FieldId, GeoPoint, Index, InternalError, Result};
|
||||||
|
|
||||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
|
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
|
||||||
@ -62,6 +63,30 @@ where
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
|
||||||
|
pub fn merge_and_send_cellulite<'extractor, MSP>(
|
||||||
|
datastore: impl IntoIterator<Item = RefCell<GeoJsonExtractorData<'extractor>>>,
|
||||||
|
_rtxn: &RoTxn,
|
||||||
|
_index: &Index,
|
||||||
|
geojson_sender: GeoJsonSender<'_, '_>,
|
||||||
|
must_stop_processing: &MSP,
|
||||||
|
) -> Result<()>
|
||||||
|
where
|
||||||
|
MSP: Fn() -> bool + Sync,
|
||||||
|
{
|
||||||
|
for data in datastore {
|
||||||
|
if must_stop_processing() {
|
||||||
|
return Err(InternalError::AbortedIndexation.into());
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut frozen = data.into_inner().freeze()?;
|
||||||
|
frozen.iter_and_clear_removed(geojson_sender)?;
|
||||||
|
frozen.iter_and_clear_inserted(geojson_sender)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
|
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
|
||||||
pub fn merge_and_send_docids<MSP, D>(
|
pub fn merge_and_send_docids<MSP, D>(
|
||||||
mut caches: Vec<BalancedCaches<'_>>,
|
mut caches: Vec<BalancedCaches<'_>>,
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
use heed::RwTxn;
|
use heed::RwTxn;
|
||||||
|
|
||||||
use super::document::{Document, DocumentFromDb};
|
use super::document::{Document, DocumentFromDb};
|
||||||
|
use crate::constants::{RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME};
|
||||||
use crate::progress::{self, AtomicSubStep, Progress};
|
use crate::progress::{self, AtomicSubStep, Progress};
|
||||||
use crate::{FieldDistribution, Index, Result};
|
use crate::{FieldDistribution, Index, Result};
|
||||||
|
|
||||||
@ -22,8 +23,13 @@ pub fn field_distribution(index: &Index, wtxn: &mut RwTxn<'_>, progress: &Progre
|
|||||||
let Some(document) = DocumentFromDb::new(docid, wtxn, index, &field_id_map)? else {
|
let Some(document) = DocumentFromDb::new(docid, wtxn, index, &field_id_map)? else {
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
let geo_iter = document.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv)));
|
let geo_iter =
|
||||||
for res in document.iter_top_level_fields().chain(geo_iter) {
|
document.geo_field().transpose().map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
|
||||||
|
let geojson_iter = document
|
||||||
|
.geojson_field()
|
||||||
|
.transpose()
|
||||||
|
.map(|res| res.map(|rv| (RESERVED_GEOJSON_FIELD_NAME, rv)));
|
||||||
|
for res in document.iter_top_level_fields().chain(geo_iter).chain(geojson_iter) {
|
||||||
let (field_name, _) = res?;
|
let (field_name, _) = res?;
|
||||||
if let Some(count) = distribution.get_mut(field_name) {
|
if let Some(count) = distribution.get_mut(field_name) {
|
||||||
*count += 1;
|
*count += 1;
|
||||||
|
@ -12,11 +12,13 @@ make_enum_progress! {
|
|||||||
MergingWordCaches,
|
MergingWordCaches,
|
||||||
MergingWordProximity,
|
MergingWordProximity,
|
||||||
WritingGeoPoints,
|
WritingGeoPoints,
|
||||||
|
WritingGeoJson,
|
||||||
WaitingForDatabaseWrites,
|
WaitingForDatabaseWrites,
|
||||||
WaitingForExtractors,
|
WaitingForExtractors,
|
||||||
WritingEmbeddingsToDatabase,
|
WritingEmbeddingsToDatabase,
|
||||||
PostProcessingFacets,
|
PostProcessingFacets,
|
||||||
PostProcessingWords,
|
PostProcessingWords,
|
||||||
|
BuildingGeoJson,
|
||||||
Finalizing,
|
Finalizing,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -193,7 +193,7 @@ impl WordPrefixIntegerDocids {
|
|||||||
// We access this HashMap in parallel to compute the *union* of all
|
// We access this HashMap in parallel to compute the *union* of all
|
||||||
// of them and *serialize* them into files. There is one file by CPU.
|
// of them and *serialize* them into files. There is one file by CPU.
|
||||||
let local_entries = ThreadLocal::with_capacity(rayon::current_num_threads());
|
let local_entries = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||||
prefixes.into_par_iter().map(AsRef::as_ref).try_for_each(|prefix| {
|
prefixes.into_par_iter().map(AsRef::as_ref).try_for_each(|prefix| -> Result<()> {
|
||||||
let refcell = local_entries.get_or(|| {
|
let refcell = local_entries.get_or(|| {
|
||||||
let file = BufWriter::new(spooled_tempfile(
|
let file = BufWriter::new(spooled_tempfile(
|
||||||
self.max_memory_by_thread.unwrap_or(usize::MAX),
|
self.max_memory_by_thread.unwrap_or(usize::MAX),
|
||||||
|
@ -15,7 +15,7 @@ use super::del_add::{DelAdd, DelAddOperation};
|
|||||||
use super::index_documents::{IndexDocumentsConfig, Transform};
|
use super::index_documents::{IndexDocumentsConfig, Transform};
|
||||||
use super::{ChatSettings, IndexerConfig};
|
use super::{ChatSettings, IndexerConfig};
|
||||||
use crate::attribute_patterns::PatternMatch;
|
use crate::attribute_patterns::PatternMatch;
|
||||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
use crate::constants::{RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME};
|
||||||
use crate::criterion::Criterion;
|
use crate::criterion::Criterion;
|
||||||
use crate::disabled_typos_terms::DisabledTyposTerms;
|
use crate::disabled_typos_terms::DisabledTyposTerms;
|
||||||
use crate::error::UserError::{self, InvalidChatSettingsDocumentTemplateMaxBytes};
|
use crate::error::UserError::{self, InvalidChatSettingsDocumentTemplateMaxBytes};
|
||||||
@ -1862,7 +1862,10 @@ impl InnerIndexSettingsDiff {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn any_reindexing_needed(&self) -> bool {
|
pub fn any_reindexing_needed(&self) -> bool {
|
||||||
self.reindex_searchable() || self.reindex_facets() || self.reindex_vectors()
|
self.reindex_searchable()
|
||||||
|
|| self.reindex_facets()
|
||||||
|
|| self.reindex_vectors()
|
||||||
|
|| self.reindex_geojson()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn reindex_searchable(&self) -> bool {
|
pub fn reindex_searchable(&self) -> bool {
|
||||||
@ -1971,6 +1974,11 @@ impl InnerIndexSettingsDiff {
|
|||||||
!self.embedding_config_updates.is_empty()
|
!self.embedding_config_updates.is_empty()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn reindex_geojson(&self) -> bool {
|
||||||
|
self.old.filterable_attributes_rules.iter().any(|rule| rule.has_geojson())
|
||||||
|
!= self.new.filterable_attributes_rules.iter().any(|rule| rule.has_geojson())
|
||||||
|
}
|
||||||
|
|
||||||
pub fn settings_update_only(&self) -> bool {
|
pub fn settings_update_only(&self) -> bool {
|
||||||
self.settings_update_only
|
self.settings_update_only
|
||||||
}
|
}
|
||||||
@ -1979,6 +1987,11 @@ impl InnerIndexSettingsDiff {
|
|||||||
self.old.geo_fields_ids != self.new.geo_fields_ids
|
self.old.geo_fields_ids != self.new.geo_fields_ids
|
||||||
|| (!self.settings_update_only && self.new.geo_fields_ids.is_some())
|
|| (!self.settings_update_only && self.new.geo_fields_ids.is_some())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn run_geojson_indexing(&self) -> bool {
|
||||||
|
self.old.geojson_fid != self.new.geojson_fid
|
||||||
|
|| (!self.settings_update_only && self.new.geojson_fid.is_some())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@ -1999,6 +2012,7 @@ pub(crate) struct InnerIndexSettings {
|
|||||||
pub runtime_embedders: RuntimeEmbedders,
|
pub runtime_embedders: RuntimeEmbedders,
|
||||||
pub embedder_category_id: HashMap<String, u8>,
|
pub embedder_category_id: HashMap<String, u8>,
|
||||||
pub geo_fields_ids: Option<(FieldId, FieldId)>,
|
pub geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||||
|
pub geojson_fid: Option<FieldId>,
|
||||||
pub prefix_search: PrefixSearch,
|
pub prefix_search: PrefixSearch,
|
||||||
pub facet_search: bool,
|
pub facet_search: bool,
|
||||||
}
|
}
|
||||||
@ -2038,6 +2052,7 @@ impl InnerIndexSettings {
|
|||||||
}
|
}
|
||||||
_ => None,
|
_ => None,
|
||||||
};
|
};
|
||||||
|
let geo_json_fid = fields_ids_map.id(RESERVED_GEOJSON_FIELD_NAME);
|
||||||
let localized_attributes_rules =
|
let localized_attributes_rules =
|
||||||
index.localized_attributes_rules(rtxn)?.unwrap_or_default();
|
index.localized_attributes_rules(rtxn)?.unwrap_or_default();
|
||||||
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
|
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
|
||||||
@ -2066,6 +2081,7 @@ impl InnerIndexSettings {
|
|||||||
runtime_embedders,
|
runtime_embedders,
|
||||||
embedder_category_id,
|
embedder_category_id,
|
||||||
geo_fields_ids,
|
geo_fields_ids,
|
||||||
|
geojson_fid: geo_json_fid,
|
||||||
prefix_search,
|
prefix_search,
|
||||||
facet_search,
|
facet_search,
|
||||||
disabled_typos_terms,
|
disabled_typos_terms,
|
||||||
|
Reference in New Issue
Block a user