improve the error diagnostic when parsing values

This commit is contained in:
Irevoire
2021-11-08 15:30:26 +01:00
parent 7483c7513a
commit 959ca66125
3 changed files with 56 additions and 8 deletions

View File

@@ -67,6 +67,10 @@ impl<'a> Error<'a> {
&self.kind &self.kind
} }
pub fn context(&self) -> &Span<'a> {
&self.context
}
pub fn new_from_kind(context: Span<'a>, kind: ErrorKind<'a>) -> Self { pub fn new_from_kind(context: Span<'a>, kind: ErrorKind<'a>) -> Self {
Self { context, kind } Self { context, kind }
} }

View File

@@ -551,6 +551,7 @@ pub mod tests {
("channel = Ponce = 12", "Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule."), ("channel = Ponce = 12", "Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule."),
("channel = ", "Was expecting a value but instead got nothing."), ("channel = ", "Was expecting a value but instead got nothing."),
("channel = 🐻", "Was expecting a value but instead got `🐻`."), ("channel = 🐻", "Was expecting a value but instead got `🐻`."),
("channel = 🐻 AND followers < 100", "Was expecting a value but instead got `🐻`."),
("OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `OR`."), ("OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `OR`."),
("AND", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `AND`."), ("AND", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `AND`."),
("channel Ponce", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `channel Ponce`."), ("channel Ponce", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `channel Ponce`."),

View File

@@ -9,7 +9,10 @@ use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span,
/// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* /// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
pub fn parse_value(input: Span) -> IResult<Token> { pub fn parse_value(input: Span) -> IResult<Token> {
// before anything we want to check if the user is misusing a geo expression // to get better diagnostic message we are going to strip the left whitespaces from the input right now
let (input, _) = take_while(char::is_whitespace)(input)?;
// then, we want to check if the user is misusing a geo expression
let err = parse_geo_point(input).unwrap_err(); let err = parse_geo_point(input).unwrap_err();
if err.is_failure() { if err.is_failure() {
return Err(err); return Err(err);
@@ -29,23 +32,30 @@ pub fn parse_value(input: Span) -> IResult<Token> {
// doubleQuoted = "\"" (word | spaces)* "\"" // doubleQuoted = "\"" (word | spaces)* "\""
let double_quoted = |input| take_till(|c: char| c == '"')(input); let double_quoted = |input| take_till(|c: char| c == '"')(input);
// word = (alphanumeric | _ | - | .)+ // word = (alphanumeric | _ | - | .)+
let word = |input| take_while1(is_key_component)(input); let word = take_while1(is_key_component);
// this parser is only used when an error is encountered and it parse the
// largest string possible that do not contain any β€œlanguage” syntax.
// If we try to parse `name = πŸ¦€ AND language = rust` we want to return an
// error saying we could not parse `πŸ¦€`. Not that no value were found or that
// we could note parse `πŸ¦€ AND language = rust`.
// we want to remove the space before entering the alt because if we don't, // we want to remove the space before entering the alt because if we don't,
// when we create the errors from the output of the alt we have spaces everywhere // when we create the errors from the output of the alt we have spaces everywhere
let (input, _) = take_while(char::is_whitespace)(input)?; let error_word = take_till::<_, _, Error>(is_syntax_component);
terminated( terminated(
alt(( alt((
delimited(char('\''), simple_quoted, cut(char('\''))), delimited(char('\''), cut(simple_quoted), cut(char('\''))),
delimited(char('"'), double_quoted, cut(char('"'))), delimited(char('"'), cut(double_quoted), cut(char('"'))),
word, word,
)), )),
multispace0, multispace0,
)(input) )(input)
.map(|(s, t)| (s, t.into())) .map(|(s, t)| (s, t.into()))
// if we found nothing in the alt it means the user did not input any value // if we found nothing in the alt it means the user specified something that was not recognized as a value
.map_err(|e| e.map_err(|_| Error::new_from_kind(input, ErrorKind::ExpectedValue))) .map_err(|e: nom::Err<Error>| {
e.map_err(|_| Error::new_from_kind(error_word(input).unwrap().1, ErrorKind::ExpectedValue))
})
// if we found encountered a failure it means the user really tried to input a value, but had an unmatched quote // if we found encountered a failure it means the user really tried to input a value, but had an unmatched quote
.map_err(|e| { .map_err(|e| {
e.map_fail(|c| Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char()))) e.map_fail(|c| Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char())))
@@ -56,8 +66,14 @@ fn is_key_component(c: char) -> bool {
c.is_alphanumeric() || ['_', '-', '.'].contains(&c) c.is_alphanumeric() || ['_', '-', '.'].contains(&c)
} }
fn is_syntax_component(c: char) -> bool {
c.is_whitespace() || ['(', ')', '=', '<', '>', '!'].contains(&c)
}
#[cfg(test)] #[cfg(test)]
pub mod tests { pub mod test {
use nom::Finish;
use super::*; use super::*;
use crate::tests::rtok; use crate::tests::rtok;
@@ -82,6 +98,7 @@ pub mod tests {
("\" some spaces \"", rtok("\"", " some spaces ")), ("\" some spaces \"", rtok("\"", " some spaces ")),
("\"cha'nnel\"", rtok("'", "cha'nnel")), ("\"cha'nnel\"", rtok("'", "cha'nnel")),
("\"cha'nnel\"", rtok("'", "cha'nnel")), ("\"cha'nnel\"", rtok("'", "cha'nnel")),
("I'm tamo", rtok("'m tamo", "I")),
]; ];
for (input, expected) in test_case { for (input, expected) in test_case {
@@ -98,4 +115,30 @@ pub mod tests {
assert_eq!(value, expected, "Filter `{}` failed.", input); assert_eq!(value, expected, "Filter `{}` failed.", input);
} }
} }
#[test]
fn diagnostic() {
let test_case = [
("πŸ¦€", "πŸ¦€"),
(" πŸ¦€", "πŸ¦€"),
("πŸ¦€ AND crab = truc", "πŸ¦€"),
("πŸ¦€_in_name", "πŸ¦€_in_name"),
(" (name = ...", ""),
];
for (input, expected) in test_case {
let input = Span::new_extra(input, input);
let result = parse_value(input);
assert!(
result.is_err(),
"Filter `{}` wasn’t supposed to be parsed but it did with the following result: `{:?}`",
expected,
result.unwrap()
);
// get the inner string referenced in the error
let value = *result.finish().unwrap_err().context().fragment();
assert_eq!(value, expected, "Filter `{}` was supposed to fail with the following value: `{}`, but it failed with: `{}`.", input, expected, value);
}
}
} }