mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-05 20:26:31 +00:00
Switch to a nom parser
This commit is contained in:
@ -7,11 +7,20 @@
|
||||
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::character::complete::char;
|
||||
use nom::character::complete::multispace0;
|
||||
use nom::character::complete::multispace1;
|
||||
use nom::combinator::cut;
|
||||
use nom::combinator::map;
|
||||
use nom::combinator::value;
|
||||
use nom::sequence::preceded;
|
||||
use nom::sequence::{terminated, tuple};
|
||||
use Condition::*;
|
||||
|
||||
use crate::error::IResultExt;
|
||||
use crate::value::parse_vector_value;
|
||||
use crate::ErrorKind;
|
||||
use crate::VectorFilter;
|
||||
use crate::{parse_value, FilterCondition, IResult, Span, Token};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
@ -113,6 +122,57 @@ pub fn parse_not_exists(input: Span) -> IResult<FilterCondition> {
|
||||
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Exists }))))
|
||||
}
|
||||
|
||||
fn parse_vectors(input: Span) -> IResult<(Token, Option<Token>, VectorFilter<'_>)> {
|
||||
let (input, _) = multispace0(input)?;
|
||||
let (input, fid) = tag("_vectors")(input)?;
|
||||
|
||||
if let Ok((input, _)) = multispace1::<_, crate::Error>(input) {
|
||||
return Ok((input, (Token::from(fid), None, VectorFilter::None)));
|
||||
}
|
||||
|
||||
let (input, _) = char('.')(input)?;
|
||||
|
||||
// From this point, we are certain this is a vector filter, so our errors must be final.
|
||||
// We could use nom's `cut`` but it's better to be explicit about the errors
|
||||
|
||||
let (input, embedder_name) = parse_vector_value(input).map_cut(ErrorKind::VectorFilterInvalidEmbedder)?;
|
||||
|
||||
let (input, filter) = alt((
|
||||
map(
|
||||
preceded(tag(".fragments"), |input| {
|
||||
let (input, _) = tag(".")(input).map_cut(ErrorKind::VectorFilterMissingFragment)?;
|
||||
parse_vector_value(input).map_cut(ErrorKind::VectorFilterInvalidFragment)
|
||||
}),
|
||||
VectorFilter::Fragment,
|
||||
),
|
||||
value(VectorFilter::UserProvided, tag(".userProvided")),
|
||||
value(VectorFilter::DocumentTemplate, tag(".documentTemplate")),
|
||||
value(VectorFilter::Regenerate, tag(".regenerate")),
|
||||
value(VectorFilter::None, nom::combinator::success("")),
|
||||
))(input)?;
|
||||
|
||||
let (input, _) = multispace1(input).map_cut(ErrorKind::VectorFilterLeftover)?;
|
||||
|
||||
Ok((input, (Token::from(fid), Some(embedder_name), filter)))
|
||||
}
|
||||
|
||||
/// vectors_exists = vectors "EXISTS"
|
||||
pub fn parse_vectors_exists(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, (fid, embedder, filter)) = terminated(parse_vectors, tag("EXISTS"))(input)?;
|
||||
|
||||
Ok((input, FilterCondition::VectorExists { fid, embedder, filter }))
|
||||
}
|
||||
/// vectors_not_exists = vectors "NOT" WS+ "EXISTS"
|
||||
pub fn parse_vectors_not_exists(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, (fid, embedder, filter)) = parse_vectors(input)?;
|
||||
|
||||
let (input, _) = tuple((tag("NOT"), multispace1, tag("EXISTS")))(input)?;
|
||||
Ok((
|
||||
input,
|
||||
FilterCondition::Not(Box::new(FilterCondition::VectorExists { fid, embedder, filter })),
|
||||
))
|
||||
}
|
||||
|
||||
/// contains = value "CONTAINS" value
|
||||
pub fn parse_contains(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, (fid, contains, value)) =
|
||||
|
@ -42,6 +42,23 @@ pub fn cut_with_err<'a, O>(
|
||||
}
|
||||
}
|
||||
|
||||
pub trait IResultExt<'a> {
|
||||
fn map_cut(self, kind: ErrorKind<'a>) -> Self;
|
||||
}
|
||||
|
||||
impl<'a, T> IResultExt<'a> for IResult<'a, T> {
|
||||
fn map_cut(self, kind: ErrorKind<'a>) -> Self {
|
||||
self.map_err(move |e: nom::Err<Error<'a>>| {
|
||||
let input = match e {
|
||||
nom::Err::Incomplete(_) => return e,
|
||||
nom::Err::Error(e) => *e.context(),
|
||||
nom::Err::Failure(e) => *e.context(),
|
||||
};
|
||||
nom::Err::Failure(Error::new_from_kind(input, kind))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Error<'a> {
|
||||
context: Span<'a>,
|
||||
@ -76,6 +93,11 @@ pub enum ErrorKind<'a> {
|
||||
InternalError(error::ErrorKind),
|
||||
DepthLimitReached,
|
||||
External(String),
|
||||
|
||||
VectorFilterLeftover,
|
||||
VectorFilterInvalidEmbedder,
|
||||
VectorFilterMissingFragment,
|
||||
VectorFilterInvalidFragment,
|
||||
}
|
||||
|
||||
impl<'a> Error<'a> {
|
||||
@ -169,6 +191,18 @@ impl Display for Error<'_> {
|
||||
ErrorKind::MisusedGeoBoundingBox => {
|
||||
writeln!(f, "The `_geoBoundingBox` filter is an operation and can't be used as a value.")?
|
||||
}
|
||||
ErrorKind::VectorFilterLeftover => {
|
||||
writeln!(f, "The vector filter has leftover tokens.")?
|
||||
}
|
||||
ErrorKind::VectorFilterInvalidFragment => {
|
||||
writeln!(f, "The vector filter's fragment is invalid.")?
|
||||
}
|
||||
ErrorKind::VectorFilterMissingFragment => {
|
||||
writeln!(f, "The vector filter is missing a fragment name.")?
|
||||
}
|
||||
ErrorKind::VectorFilterInvalidEmbedder => {
|
||||
writeln!(f, "The vector filter's embedder is invalid.")?
|
||||
}
|
||||
ErrorKind::ReservedKeyword(word) => {
|
||||
writeln!(f, "`{word}` is a reserved keyword and thus cannot be used as a field name unless it is put inside quotes. Use \"{word}\" or \'{word}\' instead.")?
|
||||
}
|
||||
|
@ -65,6 +65,9 @@ use nom_locate::LocatedSpan;
|
||||
pub(crate) use value::parse_value;
|
||||
use value::word_exact;
|
||||
|
||||
use crate::condition::{parse_vectors_exists, parse_vectors_not_exists};
|
||||
use crate::error::IResultExt;
|
||||
|
||||
pub type Span<'a> = LocatedSpan<&'a str, &'a str>;
|
||||
|
||||
type IResult<'a, Ret> = nom::IResult<Span<'a>, Ret, Error<'a>>;
|
||||
@ -146,6 +149,15 @@ impl<'a> From<&'a str> for Token<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum VectorFilter<'a> {
|
||||
Fragment(Token<'a>),
|
||||
DocumentTemplate,
|
||||
UserProvided,
|
||||
Regenerate,
|
||||
None,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum FilterCondition<'a> {
|
||||
Not(Box<Self>),
|
||||
@ -153,6 +165,7 @@ pub enum FilterCondition<'a> {
|
||||
In { fid: Token<'a>, els: Vec<Token<'a>> },
|
||||
Or(Vec<Self>),
|
||||
And(Vec<Self>),
|
||||
VectorExists { fid: Token<'a>, embedder: Option<Token<'a>>, filter: VectorFilter<'a> },
|
||||
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
|
||||
GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] },
|
||||
}
|
||||
@ -183,7 +196,8 @@ impl<'a> FilterCondition<'a> {
|
||||
FilterCondition::Or(seq) | FilterCondition::And(seq) => {
|
||||
seq.iter().find_map(|filter| filter.use_contains_operator())
|
||||
}
|
||||
FilterCondition::GeoLowerThan { .. }
|
||||
FilterCondition::VectorExists { .. }
|
||||
| FilterCondition::GeoLowerThan { .. }
|
||||
| FilterCondition::GeoBoundingBox { .. }
|
||||
| FilterCondition::In { .. } => None,
|
||||
}
|
||||
@ -191,13 +205,7 @@ impl<'a> FilterCondition<'a> {
|
||||
|
||||
pub fn use_vector_filter(&self) -> Option<&Token> {
|
||||
match self {
|
||||
FilterCondition::Condition { fid, op: _ } => {
|
||||
if fid.value().starts_with("_vectors.") || fid.value() == "_vectors" {
|
||||
Some(fid)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
FilterCondition::Condition { .. } => None,
|
||||
FilterCondition::Not(this) => this.use_vector_filter(),
|
||||
FilterCondition::Or(seq) | FilterCondition::And(seq) => {
|
||||
seq.iter().find_map(|filter| filter.use_vector_filter())
|
||||
@ -205,6 +213,7 @@ impl<'a> FilterCondition<'a> {
|
||||
FilterCondition::GeoLowerThan { .. }
|
||||
| FilterCondition::GeoBoundingBox { .. }
|
||||
| FilterCondition::In { .. } => None,
|
||||
FilterCondition::VectorExists { fid, .. } => Some(fid),
|
||||
}
|
||||
}
|
||||
|
||||
@ -292,10 +301,7 @@ fn parse_in_body(input: Span) -> IResult<Vec<Token>> {
|
||||
let (input, _) = ws(word_exact("IN"))(input)?;
|
||||
|
||||
// everything after `IN` can be a failure
|
||||
let (input, _) =
|
||||
cut_with_err(tag("["), |_| Error::new_from_kind(input, ErrorKind::InOpeningBracket))(
|
||||
input,
|
||||
)?;
|
||||
let (input, _) = tag("[")(input).map_cut(ErrorKind::InOpeningBracket)?;
|
||||
|
||||
let (input, content) = cut(parse_value_list)(input)?;
|
||||
|
||||
@ -529,8 +535,7 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
|
||||
parse_is_not_null,
|
||||
parse_is_empty,
|
||||
parse_is_not_empty,
|
||||
parse_exists,
|
||||
parse_not_exists,
|
||||
alt((parse_vectors_exists, parse_vectors_not_exists, parse_exists, parse_not_exists)),
|
||||
parse_to,
|
||||
parse_contains,
|
||||
parse_not_contains,
|
||||
@ -586,6 +591,19 @@ impl std::fmt::Display for FilterCondition<'_> {
|
||||
}
|
||||
write!(f, "]")
|
||||
}
|
||||
FilterCondition::VectorExists { fid: _, embedder, filter: inner } => {
|
||||
write!(f, "_vectors")?;
|
||||
if let Some(embedder) = embedder {
|
||||
write!(f, ".{embedder:?}")?;
|
||||
}
|
||||
match inner {
|
||||
VectorFilter::Fragment(fragment) => write!(f, ".fragments.{fragment:?}"),
|
||||
VectorFilter::DocumentTemplate => write!(f, ".documentTemplate"),
|
||||
VectorFilter::UserProvided => write!(f, ".userProvided"),
|
||||
VectorFilter::Regenerate => write!(f, ".regenerate"),
|
||||
VectorFilter::None => Ok(()),
|
||||
}
|
||||
}
|
||||
FilterCondition::GeoLowerThan { point, radius } => {
|
||||
write!(f, "_geoRadius({}, {}, {})", point[0], point[1], radius)
|
||||
}
|
||||
|
@ -80,6 +80,39 @@ pub fn word_exact<'a, 'b: 'a>(tag: &'b str) -> impl Fn(Span<'a>) -> IResult<'a,
|
||||
}
|
||||
}
|
||||
|
||||
/// vector_value = ( non_dot_word | singleQuoted | doubleQuoted)
|
||||
pub fn parse_vector_value(input: Span) -> IResult<Token> {
|
||||
pub fn non_dot_word(input: Span) -> IResult<Token> {
|
||||
let (input, word) = take_while1(|c| is_value_component(c) && c != '.')(input)?;
|
||||
Ok((input, word.into()))
|
||||
}
|
||||
|
||||
let (input, value) = alt((
|
||||
delimited(char('\''), cut(|input| quoted_by('\'', input)), cut(char('\''))),
|
||||
delimited(char('"'), cut(|input| quoted_by('"', input)), cut(char('"'))),
|
||||
non_dot_word,
|
||||
))(input)?;
|
||||
|
||||
match unescaper::unescape(value.value()) {
|
||||
Ok(content) => {
|
||||
if content.len() != value.value().len() {
|
||||
Ok((input, Token::new(value.original_span(), Some(content))))
|
||||
} else {
|
||||
Ok((input, value))
|
||||
}
|
||||
}
|
||||
Err(unescaper::Error::IncompleteStr(_)) => Err(nom::Err::Incomplete(nom::Needed::Unknown)),
|
||||
Err(unescaper::Error::ParseIntError { .. }) => Err(nom::Err::Error(Error::new_from_kind(
|
||||
value.original_span(),
|
||||
ErrorKind::InvalidEscapedNumber,
|
||||
))),
|
||||
Err(unescaper::Error::InvalidChar { .. }) => Err(nom::Err::Error(Error::new_from_kind(
|
||||
value.original_span(),
|
||||
ErrorKind::MalformedValue,
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
/// value = WS* ( word | singleQuoted | doubleQuoted) WS+
|
||||
pub fn parse_value(input: Span) -> IResult<Token> {
|
||||
// to get better diagnostic message we are going to strip the left whitespaces from the input right now
|
||||
|
Reference in New Issue
Block a user