Add "did you mean" message

This commit is contained in:
Mubelotix
2025-08-13 13:00:38 +02:00
parent f6559258ce
commit 666ae1a3e7
6 changed files with 58 additions and 19 deletions

1
Cargo.lock generated
View File

@ -2031,6 +2031,7 @@ name = "filter-parser"
version = "1.16.0"
dependencies = [
"insta",
"levenshtein_automata",
"nom",
"nom_locate",
"unescaper",

View File

@ -15,6 +15,7 @@ license.workspace = true
nom = "7.1.3"
nom_locate = "4.2.0"
unescaper = "0.1.6"
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
[dev-dependencies]
# fixed version due to format breakages in v1.40

View File

@ -19,6 +19,7 @@ use Condition::*;
use crate::error::IResultExt;
use crate::value::parse_vector_value;
use crate::Error;
use crate::ErrorKind;
use crate::VectorFilter;
use crate::{parse_value, FilterCondition, IResult, Span, Token};
@ -136,10 +137,7 @@ fn parse_vectors(input: Span) -> IResult<(Token, Option<Token>, VectorFilter<'_>
// We could use nom's `cut` but it's better to be explicit about the errors
if let Ok((_, space)) = tag::<_, _, ()>(" ")(input) {
return Err(crate::Error::new_failure_from_kind(
space,
ErrorKind::VectorFilterMissingEmbedder,
));
return Err(crate::Error::failure_from_kind(space, ErrorKind::VectorFilterMissingEmbedder));
}
let (input, embedder_name) =
@ -159,6 +157,16 @@ fn parse_vectors(input: Span) -> IResult<(Token, Option<Token>, VectorFilter<'_>
value(VectorFilter::None, nom::combinator::success("")),
))(input)?;
if let Ok((input, point)) = tag::<_, _, ()>(".")(input) {
let opt_value = parse_vector_value(input).ok().map(|(_, v)| v);
let value = opt_value
.as_ref()
.map(|v| v.original_span().to_string())
.unwrap_or_else(|| point.to_string());
let context = opt_value.map(|v| v.original_span()).unwrap_or(point);
return Err(Error::failure_from_kind(context, ErrorKind::VectorFilterUnknownSuffix(value)));
}
let (input, _) = multispace1(input).map_cut(ErrorKind::VectorFilterLeftover)?;
Ok((input, (Token::from(fid), Some(embedder_name), filter)))
@ -181,7 +189,7 @@ pub fn parse_vectors_exists(input: Span) -> IResult<FilterCondition> {
));
}
Err(crate::Error::new_failure_from_kind(input, ErrorKind::VectorFilterOperation))
Err(crate::Error::failure_from_kind(input, ErrorKind::VectorFilterOperation))
}
/// contains = value "CONTAINS" value

View File

@ -54,7 +54,7 @@ impl<'a, T> IResultExt<'a> for IResult<'a, T> {
nom::Err::Error(e) => *e.context(),
nom::Err::Failure(e) => *e.context(),
};
Error::new_failure_from_kind(input, kind)
Error::failure_from_kind(input, kind)
})
}
}
@ -83,6 +83,7 @@ pub enum ErrorKind<'a> {
VectorFilterInvalidEmbedder,
VectorFilterMissingFragment,
VectorFilterInvalidFragment,
VectorFilterUnknownSuffix(String),
VectorFilterOperation,
InvalidPrimary,
InvalidEscapedNumber,
@ -114,7 +115,7 @@ impl<'a> Error<'a> {
Self { context, kind }
}
pub fn new_failure_from_kind(context: Span<'a>, kind: ErrorKind<'a>) -> nom::Err<Self> {
pub fn failure_from_kind(context: Span<'a>, kind: ErrorKind<'a>) -> nom::Err<Self> {
nom::Err::Failure(Self::new_from_kind(context, kind))
}
@ -155,6 +156,20 @@ impl Display for Error<'_> {
// first line being the diagnostic and the second line being the incriminated filter.
let escaped_input = input.escape_debug();
fn key_suggestion<'a>(key: &str, keys: &[&'a str]) -> Option<&'a str> {
let typos =
levenshtein_automata::LevenshteinAutomatonBuilder::new(2, true).build_dfa(key);
for key in keys.iter() {
match typos.eval(key) {
levenshtein_automata::Distance::Exact(_) => {
return Some(key);
}
levenshtein_automata::Distance::AtLeast(_) => continue,
}
}
None
}
match &self.kind {
ErrorKind::ExpectedValue(_) if input.trim().is_empty() => {
writeln!(f, "Was expecting a value but instead got nothing.")?
@ -199,6 +214,16 @@ impl Display for Error<'_> {
ErrorKind::VectorFilterLeftover => {
writeln!(f, "The vector filter has leftover tokens.")?
}
ErrorKind::VectorFilterUnknownSuffix(value) if value.as_str() == "." => {
writeln!(f, "Was expecting one of `.fragments`, `.userProvided`, `.documentTemplate`, `.regenerate` or nothing, but instead found a point without a valid value.")?;
}
ErrorKind::VectorFilterUnknownSuffix(value) => {
if let Some(suggestion) = key_suggestion(value, &["fragments", "userProvided", "documentTemplate", "regenerate"]) {
writeln!(f, "Was expecting one of `fragments`, `userProvided`, `documentTemplate`, `regenerate` or nothing, but instead found `{value}`. Did you mean `{suggestion}`?")?;
} else {
writeln!(f, "Was expecting one of `fragments`, `userProvided`, `documentTemplate`, `regenerate` or nothing, but instead found `{value}`.")?;
}
}
ErrorKind::VectorFilterInvalidFragment => {
writeln!(f, "The vector filter's fragment is invalid.")?
}

View File

@ -437,7 +437,7 @@ fn parse_geo_bounding_box(input: Span) -> IResult<FilterCondition> {
let (input, args) = parsed?;
if args.len() != 2 || args[0].len() != 2 || args[1].len() != 2 {
return Err(Error::new_failure_from_kind(input, ErrorKind::GeoBoundingBox));
return Err(Error::failure_from_kind(input, ErrorKind::GeoBoundingBox));
}
let res = FilterCondition::GeoBoundingBox {
@ -458,7 +458,7 @@ fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
))(input)
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))?;
// if we succeeded we still return a `Failure` because geoPoints are not allowed
Err(Error::new_failure_from_kind(input, ErrorKind::ReservedGeo("_geoPoint")))
Err(Error::failure_from_kind(input, ErrorKind::ReservedGeo("_geoPoint")))
}
/// geoPoint = WS* "_geoDistance(float WS* "," WS* float WS* "," WS* float)
@ -472,7 +472,7 @@ fn parse_geo_distance(input: Span) -> IResult<FilterCondition> {
))(input)
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoDistance"))))?;
// if we succeeded we still return a `Failure` because `geoDistance` filters are not allowed
Err(Error::new_failure_from_kind(input, ErrorKind::ReservedGeo("_geoDistance")))
Err(Error::failure_from_kind(input, ErrorKind::ReservedGeo("_geoDistance")))
}
/// geo = WS* "_geo(float WS* "," WS* float WS* "," WS* float)
@ -486,7 +486,7 @@ fn parse_geo(input: Span) -> IResult<FilterCondition> {
))(input)
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geo"))))?;
// if we succeeded we still return a `Failure` because `_geo` filter is not allowed
Err(Error::new_failure_from_kind(input, ErrorKind::ReservedGeo("_geo")))
Err(Error::failure_from_kind(input, ErrorKind::ReservedGeo("_geo")))
}
fn parse_error_reserved_keyword(input: Span) -> IResult<FilterCondition> {
@ -1014,8 +1014,8 @@ pub mod tests {
10:30 _vectors .embedderName EXISTS
");
insta::assert_snapshot!(p(r#"_vectors.embedderName. EXISTS"#), @r"
The vector filter has leftover tokens.
22:30 _vectors.embedderName. EXISTS
Was expecting one of `.fragments`, `.userProvided`, `.documentTemplate`, `.regenerate` or nothing, but instead found a point without a valid value.
22:23 _vectors.embedderName. EXISTS
");
insta::assert_snapshot!(p(r#"_vectors."embedderName EXISTS"#), @r#"
The vector filter's embedder is invalid.
@ -1026,8 +1026,8 @@ pub mod tests {
23:31 _vectors."embedderNam"e EXISTS
"#);
insta::assert_snapshot!(p(r#"_vectors.embedderName.documentTemplate. EXISTS"#), @r"
The vector filter has leftover tokens.
39:47 _vectors.embedderName.documentTemplate. EXISTS
Was expecting one of `.fragments`, `.userProvided`, `.documentTemplate`, `.regenerate` or nothing, but instead found a point without a valid value.
39:40 _vectors.embedderName.documentTemplate. EXISTS
");
insta::assert_snapshot!(p(r#"_vectors.embedderName.fragments EXISTS"#), @r"
The vector filter is missing a fragment name.
@ -1053,6 +1053,10 @@ pub mod tests {
Was expecting an operation like `EXISTS` or `NOT EXISTS` after the vector filter.
23:45 _vectors.embedderName .fragments.test EXISTS
");
insta::assert_snapshot!(p(r#"_vectors.embedderName.fargments.test EXISTS"#), @r"
Was expecting one of `fragments`, `userProvided`, `documentTemplate`, `regenerate` or nothing, but instead found `fargments`. Did you mean `fragments`?
23:32 _vectors.embedderName.fargments.test EXISTS
");
insta::assert_snapshot!(p(r#"NOT OR EXISTS AND EXISTS NOT EXISTS"#), @r###"
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.

View File

@ -132,21 +132,21 @@ pub fn parse_value(input: Span) -> IResult<Token> {
}
match parse_geo_radius(input) {
Ok(_) => return Err(Error::new_failure_from_kind(input, ErrorKind::MisusedGeoRadius)),
Ok(_) => return Err(Error::failure_from_kind(input, ErrorKind::MisusedGeoRadius)),
// if we encountered a failure it means the user badly wrote a _geoRadius filter.
// But instead of showing them how to fix his syntax we are going to tell them they should not use this filter as a value.
Err(e) if e.is_failure() => {
return Err(Error::new_failure_from_kind(input, ErrorKind::MisusedGeoRadius))
return Err(Error::failure_from_kind(input, ErrorKind::MisusedGeoRadius))
}
_ => (),
}
match parse_geo_bounding_box(input) {
Ok(_) => return Err(Error::new_failure_from_kind(input, ErrorKind::MisusedGeoBoundingBox)),
Ok(_) => return Err(Error::failure_from_kind(input, ErrorKind::MisusedGeoBoundingBox)),
// if we encountered a failure it means the user badly wrote a _geoBoundingBox filter.
// But instead of showing them how to fix his syntax we are going to tell them they should not use this filter as a value.
Err(e) if e.is_failure() => {
return Err(Error::new_failure_from_kind(input, ErrorKind::MisusedGeoBoundingBox))
return Err(Error::failure_from_kind(input, ErrorKind::MisusedGeoBoundingBox))
}
_ => (),
}