Add "did you mean" message

This commit is contained in:
Mubelotix
2025-08-13 13:00:38 +02:00
parent f6559258ce
commit 666ae1a3e7
6 changed files with 58 additions and 19 deletions

1
Cargo.lock generated
View File

@ -2031,6 +2031,7 @@ name = "filter-parser"
version = "1.16.0" version = "1.16.0"
dependencies = [ dependencies = [
"insta", "insta",
"levenshtein_automata",
"nom", "nom",
"nom_locate", "nom_locate",
"unescaper", "unescaper",

View File

@ -15,6 +15,7 @@ license.workspace = true
nom = "7.1.3" nom = "7.1.3"
nom_locate = "4.2.0" nom_locate = "4.2.0"
unescaper = "0.1.6" unescaper = "0.1.6"
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
[dev-dependencies] [dev-dependencies]
# fixed version due to format breakages in v1.40 # fixed version due to format breakages in v1.40

View File

@ -19,6 +19,7 @@ use Condition::*;
use crate::error::IResultExt; use crate::error::IResultExt;
use crate::value::parse_vector_value; use crate::value::parse_vector_value;
use crate::Error;
use crate::ErrorKind; use crate::ErrorKind;
use crate::VectorFilter; use crate::VectorFilter;
use crate::{parse_value, FilterCondition, IResult, Span, Token}; use crate::{parse_value, FilterCondition, IResult, Span, Token};
@ -136,10 +137,7 @@ fn parse_vectors(input: Span) -> IResult<(Token, Option<Token>, VectorFilter<'_>
// We could use nom's `cut` but it's better to be explicit about the errors // We could use nom's `cut` but it's better to be explicit about the errors
if let Ok((_, space)) = tag::<_, _, ()>(" ")(input) { if let Ok((_, space)) = tag::<_, _, ()>(" ")(input) {
return Err(crate::Error::new_failure_from_kind( return Err(crate::Error::failure_from_kind(space, ErrorKind::VectorFilterMissingEmbedder));
space,
ErrorKind::VectorFilterMissingEmbedder,
));
} }
let (input, embedder_name) = let (input, embedder_name) =
@ -159,6 +157,16 @@ fn parse_vectors(input: Span) -> IResult<(Token, Option<Token>, VectorFilter<'_>
value(VectorFilter::None, nom::combinator::success("")), value(VectorFilter::None, nom::combinator::success("")),
))(input)?; ))(input)?;
if let Ok((input, point)) = tag::<_, _, ()>(".")(input) {
let opt_value = parse_vector_value(input).ok().map(|(_, v)| v);
let value = opt_value
.as_ref()
.map(|v| v.original_span().to_string())
.unwrap_or_else(|| point.to_string());
let context = opt_value.map(|v| v.original_span()).unwrap_or(point);
return Err(Error::failure_from_kind(context, ErrorKind::VectorFilterUnknownSuffix(value)));
}
let (input, _) = multispace1(input).map_cut(ErrorKind::VectorFilterLeftover)?; let (input, _) = multispace1(input).map_cut(ErrorKind::VectorFilterLeftover)?;
Ok((input, (Token::from(fid), Some(embedder_name), filter))) Ok((input, (Token::from(fid), Some(embedder_name), filter)))
@ -181,7 +189,7 @@ pub fn parse_vectors_exists(input: Span) -> IResult<FilterCondition> {
)); ));
} }
Err(crate::Error::new_failure_from_kind(input, ErrorKind::VectorFilterOperation)) Err(crate::Error::failure_from_kind(input, ErrorKind::VectorFilterOperation))
} }
/// contains = value "CONTAINS" value /// contains = value "CONTAINS" value

View File

@ -54,7 +54,7 @@ impl<'a, T> IResultExt<'a> for IResult<'a, T> {
nom::Err::Error(e) => *e.context(), nom::Err::Error(e) => *e.context(),
nom::Err::Failure(e) => *e.context(), nom::Err::Failure(e) => *e.context(),
}; };
Error::new_failure_from_kind(input, kind) Error::failure_from_kind(input, kind)
}) })
} }
} }
@ -83,6 +83,7 @@ pub enum ErrorKind<'a> {
VectorFilterInvalidEmbedder, VectorFilterInvalidEmbedder,
VectorFilterMissingFragment, VectorFilterMissingFragment,
VectorFilterInvalidFragment, VectorFilterInvalidFragment,
VectorFilterUnknownSuffix(String),
VectorFilterOperation, VectorFilterOperation,
InvalidPrimary, InvalidPrimary,
InvalidEscapedNumber, InvalidEscapedNumber,
@ -114,7 +115,7 @@ impl<'a> Error<'a> {
Self { context, kind } Self { context, kind }
} }
pub fn new_failure_from_kind(context: Span<'a>, kind: ErrorKind<'a>) -> nom::Err<Self> { pub fn failure_from_kind(context: Span<'a>, kind: ErrorKind<'a>) -> nom::Err<Self> {
nom::Err::Failure(Self::new_from_kind(context, kind)) nom::Err::Failure(Self::new_from_kind(context, kind))
} }
@ -155,6 +156,20 @@ impl Display for Error<'_> {
// first line being the diagnostic and the second line being the incriminated filter. // first line being the diagnostic and the second line being the incriminated filter.
let escaped_input = input.escape_debug(); let escaped_input = input.escape_debug();
fn key_suggestion<'a>(key: &str, keys: &[&'a str]) -> Option<&'a str> {
let typos =
levenshtein_automata::LevenshteinAutomatonBuilder::new(2, true).build_dfa(key);
for key in keys.iter() {
match typos.eval(key) {
levenshtein_automata::Distance::Exact(_) => {
return Some(key);
}
levenshtein_automata::Distance::AtLeast(_) => continue,
}
}
None
}
match &self.kind { match &self.kind {
ErrorKind::ExpectedValue(_) if input.trim().is_empty() => { ErrorKind::ExpectedValue(_) if input.trim().is_empty() => {
writeln!(f, "Was expecting a value but instead got nothing.")? writeln!(f, "Was expecting a value but instead got nothing.")?
@ -199,6 +214,16 @@ impl Display for Error<'_> {
ErrorKind::VectorFilterLeftover => { ErrorKind::VectorFilterLeftover => {
writeln!(f, "The vector filter has leftover tokens.")? writeln!(f, "The vector filter has leftover tokens.")?
} }
ErrorKind::VectorFilterUnknownSuffix(value) if value.as_str() == "." => {
writeln!(f, "Was expecting one of `.fragments`, `.userProvided`, `.documentTemplate`, `.regenerate` or nothing, but instead found a point without a valid value.")?;
}
ErrorKind::VectorFilterUnknownSuffix(value) => {
if let Some(suggestion) = key_suggestion(value, &["fragments", "userProvided", "documentTemplate", "regenerate"]) {
writeln!(f, "Was expecting one of `fragments`, `userProvided`, `documentTemplate`, `regenerate` or nothing, but instead found `{value}`. Did you mean `{suggestion}`?")?;
} else {
writeln!(f, "Was expecting one of `fragments`, `userProvided`, `documentTemplate`, `regenerate` or nothing, but instead found `{value}`.")?;
}
}
ErrorKind::VectorFilterInvalidFragment => { ErrorKind::VectorFilterInvalidFragment => {
writeln!(f, "The vector filter's fragment is invalid.")? writeln!(f, "The vector filter's fragment is invalid.")?
} }

View File

@ -437,7 +437,7 @@ fn parse_geo_bounding_box(input: Span) -> IResult<FilterCondition> {
let (input, args) = parsed?; let (input, args) = parsed?;
if args.len() != 2 || args[0].len() != 2 || args[1].len() != 2 { if args.len() != 2 || args[0].len() != 2 || args[1].len() != 2 {
return Err(Error::new_failure_from_kind(input, ErrorKind::GeoBoundingBox)); return Err(Error::failure_from_kind(input, ErrorKind::GeoBoundingBox));
} }
let res = FilterCondition::GeoBoundingBox { let res = FilterCondition::GeoBoundingBox {
@ -458,7 +458,7 @@ fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
))(input) ))(input)
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))?; .map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))?;
// if we succeeded we still return a `Failure` because geoPoints are not allowed // if we succeeded we still return a `Failure` because geoPoints are not allowed
Err(Error::new_failure_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))) Err(Error::failure_from_kind(input, ErrorKind::ReservedGeo("_geoPoint")))
} }
/// geoPoint = WS* "_geoDistance(float WS* "," WS* float WS* "," WS* float) /// geoPoint = WS* "_geoDistance(float WS* "," WS* float WS* "," WS* float)
@ -472,7 +472,7 @@ fn parse_geo_distance(input: Span) -> IResult<FilterCondition> {
))(input) ))(input)
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoDistance"))))?; .map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoDistance"))))?;
// if we succeeded we still return a `Failure` because `geoDistance` filters are not allowed // if we succeeded we still return a `Failure` because `geoDistance` filters are not allowed
Err(Error::new_failure_from_kind(input, ErrorKind::ReservedGeo("_geoDistance"))) Err(Error::failure_from_kind(input, ErrorKind::ReservedGeo("_geoDistance")))
} }
/// geo = WS* "_geo(float WS* "," WS* float WS* "," WS* float) /// geo = WS* "_geo(float WS* "," WS* float WS* "," WS* float)
@ -486,7 +486,7 @@ fn parse_geo(input: Span) -> IResult<FilterCondition> {
))(input) ))(input)
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geo"))))?; .map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geo"))))?;
// if we succeeded we still return a `Failure` because `_geo` filter is not allowed // if we succeeded we still return a `Failure` because `_geo` filter is not allowed
Err(Error::new_failure_from_kind(input, ErrorKind::ReservedGeo("_geo"))) Err(Error::failure_from_kind(input, ErrorKind::ReservedGeo("_geo")))
} }
fn parse_error_reserved_keyword(input: Span) -> IResult<FilterCondition> { fn parse_error_reserved_keyword(input: Span) -> IResult<FilterCondition> {
@ -1014,8 +1014,8 @@ pub mod tests {
10:30 _vectors .embedderName EXISTS 10:30 _vectors .embedderName EXISTS
"); ");
insta::assert_snapshot!(p(r#"_vectors.embedderName. EXISTS"#), @r" insta::assert_snapshot!(p(r#"_vectors.embedderName. EXISTS"#), @r"
The vector filter has leftover tokens. Was expecting one of `.fragments`, `.userProvided`, `.documentTemplate`, `.regenerate` or nothing, but instead found a point without a valid value.
22:30 _vectors.embedderName. EXISTS 22:23 _vectors.embedderName. EXISTS
"); ");
insta::assert_snapshot!(p(r#"_vectors."embedderName EXISTS"#), @r#" insta::assert_snapshot!(p(r#"_vectors."embedderName EXISTS"#), @r#"
The vector filter's embedder is invalid. The vector filter's embedder is invalid.
@ -1026,8 +1026,8 @@ pub mod tests {
23:31 _vectors."embedderNam"e EXISTS 23:31 _vectors."embedderNam"e EXISTS
"#); "#);
insta::assert_snapshot!(p(r#"_vectors.embedderName.documentTemplate. EXISTS"#), @r" insta::assert_snapshot!(p(r#"_vectors.embedderName.documentTemplate. EXISTS"#), @r"
The vector filter has leftover tokens. Was expecting one of `.fragments`, `.userProvided`, `.documentTemplate`, `.regenerate` or nothing, but instead found a point without a valid value.
39:47 _vectors.embedderName.documentTemplate. EXISTS 39:40 _vectors.embedderName.documentTemplate. EXISTS
"); ");
insta::assert_snapshot!(p(r#"_vectors.embedderName.fragments EXISTS"#), @r" insta::assert_snapshot!(p(r#"_vectors.embedderName.fragments EXISTS"#), @r"
The vector filter is missing a fragment name. The vector filter is missing a fragment name.
@ -1053,6 +1053,10 @@ pub mod tests {
Was expecting an operation like `EXISTS` or `NOT EXISTS` after the vector filter. Was expecting an operation like `EXISTS` or `NOT EXISTS` after the vector filter.
23:45 _vectors.embedderName .fragments.test EXISTS 23:45 _vectors.embedderName .fragments.test EXISTS
"); ");
insta::assert_snapshot!(p(r#"_vectors.embedderName.fargments.test EXISTS"#), @r"
Was expecting one of `fragments`, `userProvided`, `documentTemplate`, `regenerate` or nothing, but instead found `fargments`. Did you mean `fragments`?
23:32 _vectors.embedderName.fargments.test EXISTS
");
insta::assert_snapshot!(p(r#"NOT OR EXISTS AND EXISTS NOT EXISTS"#), @r###" insta::assert_snapshot!(p(r#"NOT OR EXISTS AND EXISTS NOT EXISTS"#), @r###"
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes. Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.

View File

@ -132,21 +132,21 @@ pub fn parse_value(input: Span) -> IResult<Token> {
} }
match parse_geo_radius(input) { match parse_geo_radius(input) {
Ok(_) => return Err(Error::new_failure_from_kind(input, ErrorKind::MisusedGeoRadius)), Ok(_) => return Err(Error::failure_from_kind(input, ErrorKind::MisusedGeoRadius)),
// if we encountered a failure it means the user badly wrote a _geoRadius filter. // if we encountered a failure it means the user badly wrote a _geoRadius filter.
// But instead of showing them how to fix his syntax we are going to tell them they should not use this filter as a value. // But instead of showing them how to fix his syntax we are going to tell them they should not use this filter as a value.
Err(e) if e.is_failure() => { Err(e) if e.is_failure() => {
return Err(Error::new_failure_from_kind(input, ErrorKind::MisusedGeoRadius)) return Err(Error::failure_from_kind(input, ErrorKind::MisusedGeoRadius))
} }
_ => (), _ => (),
} }
match parse_geo_bounding_box(input) { match parse_geo_bounding_box(input) {
Ok(_) => return Err(Error::new_failure_from_kind(input, ErrorKind::MisusedGeoBoundingBox)), Ok(_) => return Err(Error::failure_from_kind(input, ErrorKind::MisusedGeoBoundingBox)),
// if we encountered a failure it means the user badly wrote a _geoBoundingBox filter. // if we encountered a failure it means the user badly wrote a _geoBoundingBox filter.
// But instead of showing them how to fix his syntax we are going to tell them they should not use this filter as a value. // But instead of showing them how to fix his syntax we are going to tell them they should not use this filter as a value.
Err(e) if e.is_failure() => { Err(e) if e.is_failure() => {
return Err(Error::new_failure_from_kind(input, ErrorKind::MisusedGeoBoundingBox)) return Err(Error::failure_from_kind(input, ErrorKind::MisusedGeoBoundingBox))
} }
_ => (), _ => (),
} }