mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 07:56:28 +00:00 
			
		
		
		
	Handle the escapes of quote in the filters
This commit is contained in:
		| @@ -56,6 +56,7 @@ pub enum ErrorKind<'a> { | ||||
|     InvalidPrimary, | ||||
|     ExpectedEof, | ||||
|     ExpectedValue, | ||||
|     MalformedValue, | ||||
|     MissingClosingDelimiter(char), | ||||
|     Char(char), | ||||
|     InternalError(error::ErrorKind), | ||||
| @@ -82,7 +83,7 @@ impl<'a> Error<'a> { | ||||
|     pub fn char(self) -> char { | ||||
|         match self.kind { | ||||
|             ErrorKind::Char(c) => c, | ||||
|             _ => panic!("Internal filter parser error"), | ||||
|             error => panic!("Internal filter parser error: {:?}", error), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -117,6 +118,9 @@ impl<'a> Display for Error<'a> { | ||||
|             ErrorKind::ExpectedValue if input.trim().is_empty() => { | ||||
|                 writeln!(f, "Was expecting a value but instead got nothing.")? | ||||
|             } | ||||
|             ErrorKind::MalformedValue => { | ||||
|                 writeln!(f, "Malformed value: `{}`.", escaped_input)? | ||||
|             } | ||||
|             ErrorKind::MissingClosingDelimiter(c) => { | ||||
|                 writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)? | ||||
|             } | ||||
|   | ||||
| @@ -62,29 +62,39 @@ pub type Span<'a> = LocatedSpan<&'a str, &'a str>; | ||||
| type IResult<'a, Ret> = nom::IResult<Span<'a>, Ret, Error<'a>>; | ||||
|  | ||||
| #[derive(Debug, Clone, Eq)] | ||||
| pub struct Token<'a>(Span<'a>); | ||||
| pub struct Token<'a> { | ||||
|     /// The token in the original input, it should be used when possible. | ||||
|     span: Span<'a>, | ||||
|     /// If you need to modify the original input you can use the `value` field | ||||
|     /// to store your modified input. | ||||
|     value: Option<String>, | ||||
| } | ||||
|  | ||||
| impl<'a> Deref for Token<'a> { | ||||
|     type Target = &'a str; | ||||
|  | ||||
|     fn deref(&self) -> &Self::Target { | ||||
|         &self.0 | ||||
|         &self.span | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> PartialEq for Token<'a> { | ||||
|     fn eq(&self, other: &Self) -> bool { | ||||
|         self.0.fragment() == other.0.fragment() | ||||
|         self.span.fragment() == other.span.fragment() | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> Token<'a> { | ||||
|     pub fn new(position: Span<'a>) -> Self { | ||||
|         Self(position) | ||||
|     pub fn new(span: Span<'a>, value: Option<String>) -> Self { | ||||
|         Self { span, value } | ||||
|     } | ||||
|  | ||||
|     pub fn value(&self) -> &str { | ||||
|         self.value.as_ref().map_or(&self.span, |value| value) | ||||
|     } | ||||
|  | ||||
|     pub fn as_external_error(&self, error: impl std::error::Error) -> Error<'a> { | ||||
|         Error::new_from_external(self.0, error) | ||||
|         Error::new_from_external(self.span, error) | ||||
|     } | ||||
|  | ||||
|     pub fn parse<T>(&self) -> Result<T, Error> | ||||
| @@ -92,13 +102,13 @@ impl<'a> Token<'a> { | ||||
|         T: FromStr, | ||||
|         T::Err: std::error::Error, | ||||
|     { | ||||
|         self.0.parse().map_err(|e| self.as_external_error(e)) | ||||
|         self.span.parse().map_err(|e| self.as_external_error(e)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> From<Span<'a>> for Token<'a> { | ||||
|     fn from(span: Span<'a>) -> Self { | ||||
|         Self(span) | ||||
|         Self { span, value: None } | ||||
|     } | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -7,8 +7,54 @@ use nom::sequence::{delimited, terminated}; | ||||
| use crate::error::NomErrorExt; | ||||
| use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token}; | ||||
|  | ||||
| /// This function goes through all chacaters in the [Span], if it finds any escaped character (`\`). | ||||
| /// It generate a new string with all `\` removed from the [Span]. | ||||
| fn unescape(buf: Span, char_to_escape: char) -> String { | ||||
|     let to_escape = format!("\\{}", char_to_escape); | ||||
|     buf.replace(&to_escape, &char_to_escape.to_string()) | ||||
| } | ||||
|  | ||||
| use nom::{InputIter, InputLength, InputTake, Slice}; | ||||
|  | ||||
| /// Parse a value in quote. If it encounter an escaped quote it'll unescape it. | ||||
| fn quoted_by(quote: char, input: Span) -> IResult<Token> { | ||||
|     // empty fields / values are valid in json | ||||
|     if input.is_empty() { | ||||
|         return Ok((input.slice(input.input_len()..), input.into())); | ||||
|     } | ||||
|  | ||||
|     let mut escaped = false; | ||||
|     let mut i = input.iter_indices(); | ||||
|  | ||||
|     while let Some((idx, c)) = i.next() { | ||||
|         match c { | ||||
|             c if c == quote => { | ||||
|                 let (rem, output) = input.take_split(idx); | ||||
|                 return Ok((rem, Token::new(output, escaped.then(|| unescape(output, quote))))); | ||||
|             } | ||||
|             '\\' => { | ||||
|                 if let Some((_, c)) = i.next() { | ||||
|                     escaped |= c == quote; | ||||
|                 } else { | ||||
|                     return Err(nom::Err::Error(Error::new_from_kind( | ||||
|                         input, | ||||
|                         ErrorKind::MalformedValue, | ||||
|                     ))); | ||||
|                 } | ||||
|             } | ||||
|             // if it was preceeded by a `\` or if it was anything else we can continue to advance | ||||
|             _ => (), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     Ok(( | ||||
|         input.slice(input.input_len()..), | ||||
|         Token::new(input, escaped.then(|| unescape(input, quote))), | ||||
|     )) | ||||
| } | ||||
|  | ||||
| /// value          = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* | ||||
| pub fn parse_value(input: Span) -> IResult<Token> { | ||||
| pub fn parse_value<'a>(input: Span<'a>) -> IResult<Token<'a>> { | ||||
|     // to get better diagnostic message we are going to strip the left whitespaces from the input right now | ||||
|     let (input, _) = take_while(char::is_whitespace)(input)?; | ||||
|  | ||||
| @@ -30,12 +76,10 @@ pub fn parse_value(input: Span) -> IResult<Token> { | ||||
|         _ => (), | ||||
|     } | ||||
|  | ||||
|     // singleQuoted   = "'" .* all but quotes "'" | ||||
|     let simple_quoted = take_till(|c: char| c == '\''); | ||||
|     // doubleQuoted   = "\"" (word | spaces)* "\"" | ||||
|     let double_quoted = take_till(|c: char| c == '"'); | ||||
|     // word           = (alphanumeric | _ | - | .)+ | ||||
|     let word = take_while1(is_value_component); | ||||
|     let word = |input: Span<'a>| -> IResult<Token<'a>> { | ||||
|         take_while1(is_value_component)(input).map(|(s, t)| (s, t.into())) | ||||
|     }; | ||||
|  | ||||
|     // this parser is only used when an error is encountered and it parse the | ||||
|     // largest string possible that do not contain any “language” syntax. | ||||
| @@ -48,20 +92,27 @@ pub fn parse_value(input: Span) -> IResult<Token> { | ||||
|  | ||||
|     terminated( | ||||
|         alt(( | ||||
|             delimited(char('\''), cut(simple_quoted), cut(char('\''))), | ||||
|             delimited(char('"'), cut(double_quoted), cut(char('"'))), | ||||
|             delimited(char('\''), cut(|input| quoted_by('\'', input)), cut(char('\''))), | ||||
|             delimited(char('"'), cut(|input| quoted_by('"', input)), cut(char('"'))), | ||||
|             word, | ||||
|         )), | ||||
|         multispace0, | ||||
|     )(input) | ||||
|     .map(|(s, t)| (s, t.into())) | ||||
|     // .map(|(s, t)| (s, t.into())) | ||||
|     // if we found nothing in the alt it means the user specified something that was not recognized as a value | ||||
|     .map_err(|e: nom::Err<Error>| { | ||||
|         e.map_err(|_| Error::new_from_kind(error_word(input).unwrap().1, ErrorKind::ExpectedValue)) | ||||
|     }) | ||||
|     // if we found encountered a failure it means the user really tried to input a value, but had an unmatched quote | ||||
|     .map_err(|e| { | ||||
|         e.map_fail(|c| Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char()))) | ||||
|         e.map_fail(|failure| { | ||||
|             // if we found encountered a char failure it means the user had an unmatched quote | ||||
|             if matches!(failure.kind(), ErrorKind::Char(_)) { | ||||
|                 Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(failure.char())) | ||||
|             } else { | ||||
|                 // else we let the failure untouched | ||||
|                 failure | ||||
|             } | ||||
|         }) | ||||
|     }) | ||||
| } | ||||
|  | ||||
| @@ -81,7 +132,7 @@ pub mod test { | ||||
|     use crate::tests::rtok; | ||||
|  | ||||
|     #[test] | ||||
|     fn name() { | ||||
|     fn test_span() { | ||||
|         let test_case = [ | ||||
|             ("channel", rtok("", "channel")), | ||||
|             (".private", rtok("", ".private")), | ||||
| @@ -102,6 +153,7 @@ pub mod test { | ||||
|             ("\"cha'nnel\"", rtok("'", "cha'nnel")), | ||||
|             ("\"cha'nnel\"", rtok("'", "cha'nnel")), | ||||
|             ("I'm tamo", rtok("'m tamo", "I")), | ||||
|             ("\"I'm \\\"super\\\" tamo\"", rtok("\"", "I'm \\\"super\\\" tamo")), | ||||
|         ]; | ||||
|  | ||||
|         for (input, expected) in test_case { | ||||
| @@ -114,8 +166,116 @@ pub mod test { | ||||
|                 expected, | ||||
|                 result.unwrap_err() | ||||
|             ); | ||||
|             let value = result.unwrap().1; | ||||
|             assert_eq!(value, expected, "Filter `{}` failed.", input); | ||||
|             let token = result.unwrap().1; | ||||
|             assert_eq!(token, expected, "Filter `{}` failed.", input); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_escape_inside_double_quote() { | ||||
|         // (input, remaining, expected output token, output value) | ||||
|         let test_case = [ | ||||
|             ("aaaa", "", rtok("", "aaaa"), "aaaa"), | ||||
|             (r#"aa"aa"#, r#""aa"#, rtok("", "aa"), "aa"), | ||||
|             (r#"aa\"aa"#, r#""#, rtok("", r#"aa\"aa"#), r#"aa"aa"#), | ||||
|             (r#"aa\\\aa"#, r#""#, rtok("", r#"aa\\\aa"#), r#"aa\\\aa"#), | ||||
|             (r#"aa\\"\aa"#, r#""\aa"#, rtok("", r#"aa\\"#), r#"aa\\"#), | ||||
|             (r#"aa\\\"\aa"#, r#""#, rtok("", r#"aa\\\"\aa"#), r#"aa\\"\aa"#), | ||||
|             (r#"\"\""#, r#""#, rtok("", r#"\"\""#), r#""""#), | ||||
|         ]; | ||||
|  | ||||
|         for (input, remaining, expected_tok, expected_val) in test_case { | ||||
|             let span = Span::new_extra(input, ""); | ||||
|             let result = quoted_by('"', span); | ||||
|             assert!(result.is_ok()); | ||||
|  | ||||
|             let (rem, output) = result.unwrap(); | ||||
|             assert_eq!(rem.to_string(), remaining); | ||||
|             assert_eq!(output, expected_tok); | ||||
|             assert_eq!(output.value(), expected_val.to_string()); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_unescape() { | ||||
|         // double quote | ||||
|         assert_eq!( | ||||
|             unescape(Span::new_extra(r#"Hello \"World\""#, ""), '"'), | ||||
|             r#"Hello "World""#.to_string() | ||||
|         ); | ||||
|         assert_eq!( | ||||
|             unescape(Span::new_extra(r#"Hello \\\"World\\\""#, ""), '"'), | ||||
|             r#"Hello \\"World\\""#.to_string() | ||||
|         ); | ||||
|         // simple quote | ||||
|         assert_eq!( | ||||
|             unescape(Span::new_extra(r#"Hello \'World\'"#, ""), '\''), | ||||
|             r#"Hello 'World'"#.to_string() | ||||
|         ); | ||||
|         assert_eq!( | ||||
|             unescape(Span::new_extra(r#"Hello \\\'World\\\'"#, ""), '\''), | ||||
|             r#"Hello \\'World\\'"#.to_string() | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_value() { | ||||
|         let test_case = [ | ||||
|             // (input, expected value, if a string was generated to hold the new value) | ||||
|             ("channel", "channel", false), | ||||
|             // All the base test, no escaped string should be generated | ||||
|             (".private", ".private", false), | ||||
|             ("I-love-kebab", "I-love-kebab", false), | ||||
|             ("but_snakes_is_also_good", "but_snakes_is_also_good", false), | ||||
|             ("parens(", "parens", false), | ||||
|             ("parens)", "parens", false), | ||||
|             ("not!", "not", false), | ||||
|             ("    channel", "channel", false), | ||||
|             ("channel     ", "channel", false), | ||||
|             ("    channel     ", "channel", false), | ||||
|             ("'channel'", "channel", false), | ||||
|             ("\"channel\"", "channel", false), | ||||
|             ("'cha)nnel'", "cha)nnel", false), | ||||
|             ("'cha\"nnel'", "cha\"nnel", false), | ||||
|             ("\"cha'nnel\"", "cha'nnel", false), | ||||
|             ("\" some spaces \"", " some spaces ", false), | ||||
|             ("\"cha'nnel\"", "cha'nnel", false), | ||||
|             ("\"cha'nnel\"", "cha'nnel", false), | ||||
|             ("I'm tamo", "I", false), | ||||
|             // escaped thing but not quote | ||||
|             (r#""\\""#, r#"\\"#, false), | ||||
|             (r#""\\\\\\""#, r#"\\\\\\"#, false), | ||||
|             (r#""aa\\aa""#, r#"aa\\aa"#, false), | ||||
|             // with double quote | ||||
|             (r#""Hello \"world\"""#, r#"Hello "world""#, true), | ||||
|             (r#""Hello \\\"world\\\"""#, r#"Hello \\"world\\""#, true), | ||||
|             (r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true), | ||||
|             (r#""\"\"""#, r#""""#, true), | ||||
|             // with simple quote | ||||
|             (r#"'Hello \'world\''"#, r#"Hello 'world'"#, true), | ||||
|             (r#"'Hello \\\'world\\\''"#, r#"Hello \\'world\\'"#, true), | ||||
|             (r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true), | ||||
|             (r#"'\'\''"#, r#"''"#, true), | ||||
|         ]; | ||||
|  | ||||
|         for (input, expected, escaped) in test_case { | ||||
|             let input = Span::new_extra(input, input); | ||||
|             let result = parse_value(input); | ||||
|  | ||||
|             assert!( | ||||
|                 result.is_ok(), | ||||
|                 "Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`", | ||||
|                 expected, | ||||
|                 result.unwrap_err() | ||||
|             ); | ||||
|             let token = result.unwrap().1; | ||||
|             assert_eq!( | ||||
|                 token.value.is_some(), | ||||
|                 escaped, | ||||
|                 "Filter `{}` was not supposed to be escaped", | ||||
|                 input | ||||
|             ); | ||||
|             assert_eq!(token.value(), expected, "Filter `{}` failed.", input); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user