From 7f394d59cd54df6df2afc31ff4f6ec03e4e21654 Mon Sep 17 00:00:00 2001 From: Mubelotix Date: Fri, 1 Aug 2025 10:22:24 +0200 Subject: [PATCH] Add support for ugly names --- crates/filter-parser/src/condition.rs | 6 +- crates/filter-parser/src/lib.rs | 2 + crates/filter-parser/src/value.rs | 4 +- .../meilisearch/src/routes/indexes/render.rs | 148 ++++++++++-------- .../tests/documents/render_documents.rs | 28 ++-- crates/milli/src/lib.rs | 1 + 6 files changed, 108 insertions(+), 81 deletions(-) diff --git a/crates/filter-parser/src/condition.rs b/crates/filter-parser/src/condition.rs index 4d156c269..6122de8b8 100644 --- a/crates/filter-parser/src/condition.rs +++ b/crates/filter-parser/src/condition.rs @@ -18,7 +18,7 @@ use nom::sequence::{terminated, tuple}; use Condition::*; use crate::error::IResultExt; -use crate::value::parse_vector_value; +use crate::value::parse_dotted_value_part; use crate::ErrorKind; use crate::VectorFilter; use crate::{parse_value, FilterCondition, IResult, Span, Token}; @@ -136,13 +136,13 @@ fn parse_vectors(input: Span) -> IResult<(Token, Option, VectorFilter<'_> // We could use nom's `cut` but it's better to be explicit about the errors let (input, embedder_name) = - parse_vector_value(input).map_cut(ErrorKind::VectorFilterInvalidEmbedder)?; + parse_dotted_value_part(input).map_cut(ErrorKind::VectorFilterInvalidEmbedder)?; let (input, filter) = alt(( map( preceded(tag(".fragments"), |input| { let (input, _) = tag(".")(input).map_cut(ErrorKind::VectorFilterMissingFragment)?; - parse_vector_value(input).map_cut(ErrorKind::VectorFilterInvalidFragment) + parse_dotted_value_part(input).map_cut(ErrorKind::VectorFilterInvalidFragment) }), VectorFilter::Fragment, ), diff --git a/crates/filter-parser/src/lib.rs b/crates/filter-parser/src/lib.rs index ae11ccf55..2d9b74b0b 100644 --- a/crates/filter-parser/src/lib.rs +++ b/crates/filter-parser/src/lib.rs @@ -61,7 +61,9 @@ use nom::multi::{many0, separated_list1}; use nom::number::complete::recognize_float; use nom::sequence::{delimited, preceded, terminated, tuple}; use nom::Finish; +pub use nom::Slice; use nom_locate::LocatedSpan; +pub use value::parse_dotted_value_part; pub(crate) use value::parse_value; use value::word_exact; diff --git a/crates/filter-parser/src/value.rs b/crates/filter-parser/src/value.rs index 345f0b0a2..54c6a0670 100644 --- a/crates/filter-parser/src/value.rs +++ b/crates/filter-parser/src/value.rs @@ -80,8 +80,8 @@ pub fn word_exact<'a, 'b: 'a>(tag: &'b str) -> impl Fn(Span<'a>) -> IResult<'a, } } -/// vector_value = ( non_dot_word | singleQuoted | doubleQuoted) -pub fn parse_vector_value(input: Span) -> IResult { +/// dotted_value_part = ( non_dot_word | singleQuoted | doubleQuoted) +pub fn parse_dotted_value_part(input: Span) -> IResult { pub fn non_dot_word(input: Span) -> IResult { let (input, word) = take_while1(|c| is_value_component(c) && c != '.')(input)?; Ok((input, word.into())) diff --git a/crates/meilisearch/src/routes/indexes/render.rs b/crates/meilisearch/src/routes/indexes/render.rs index 7a50a7425..c1e8e4ee5 100644 --- a/crates/meilisearch/src/routes/indexes/render.rs +++ b/crates/meilisearch/src/routes/indexes/render.rs @@ -20,6 +20,7 @@ use meilisearch_types::milli::prompt::{get_document, get_inline_document_fields} use meilisearch_types::milli::vector::db::IndexEmbeddingConfig; use meilisearch_types::milli::vector::json_template::{self, JsonTemplate}; use meilisearch_types::milli::vector::EmbedderOptions; +use meilisearch_types::milli::{Span, Token}; use meilisearch_types::{heed, milli, Index}; use serde::Serialize; use serde_json::Value; @@ -133,47 +134,49 @@ impl FragmentKind { } } -enum RenderError { +enum RenderError<'a> { MultipleTemplates, MissingTemplate, EmptyTemplateId, - UnknownTemplateRoot(String), + UnknownTemplateRoot(Token<'a>), MissingEmbedderName { available: Vec, }, EmbedderDoesNotExist { - embedder: String, + embedder: Token<'a>, available: Vec, }, EmbedderUsesFragments { - embedder: String, + embedder: Token<'a>, }, MissingTemplateAfterEmbedder { - embedder: String, + embedder: Token<'a>, indexing: Vec, search: Vec, }, UnknownTemplatePrefix { - embedder: String, - found: String, + embedder: Token<'a>, + found: Token<'a>, indexing: Vec, search: Vec, }, ReponseError(ResponseError), MissingFragment { - embedder: String, + embedder: Token<'a>, kind: FragmentKind, available: Vec, }, FragmentDoesNotExist { - embedder: String, - fragment: String, + embedder: Token<'a>, + fragment: Token<'a>, kind: FragmentKind, available: Vec, }, - LeftOverToken(String), + LeftOverToken(Token<'a>), MissingChatCompletionTemplate, - UnknownChatCompletionTemplate(String), + UnknownChatCompletionTemplate(Token<'a>), + ExpectedDotAfterValue(milli::Span<'a>), + ExpectedValue(milli::Span<'a>), DocumentNotFound(String), BothInlineDocAndDocId, @@ -182,13 +185,13 @@ enum RenderError { CouldNotHandleInput, } -impl From for RenderError { +impl From for RenderError<'_> { fn from(error: heed::Error) -> Self { RenderError::ReponseError(error.into()) } } -impl From for RenderError { +impl From for RenderError<'_> { fn from(error: milli::Error) -> Self { RenderError::ReponseError(error.into()) } @@ -196,7 +199,7 @@ impl From for RenderError { use RenderError::*; -impl From for ResponseError { +impl From> for ResponseError { fn from(error: RenderError) -> Self { match error { MultipleTemplates => ResponseError::from_msg( @@ -322,31 +325,39 @@ impl From for ResponseError { String::from("Could not handle the input provided."), Code::InvalidRenderInput, ), + ExpectedDotAfterValue(span) => ResponseError::from_msg( + format!("Expected a dot after value, but found `{span}`."), + Code::InvalidRenderTemplateId, + ), + ExpectedValue(span) => ResponseError::from_msg( + format!("Expected a value, but found `{span}`."), + Code::InvalidRenderTemplateId, + ), } } } -fn parse_template_id_fragment( - name: Option<&str>, +fn parse_template_id_fragment<'a>( + name: Option>, kind: FragmentKind, embedding_config: &IndexEmbeddingConfig, - embedder_name: &str, -) -> Result { + embedder: Token<'a>, +) -> Result> { let get_available = [EmbedderOptions::indexing_fragments, EmbedderOptions::search_fragments][kind as usize]; let get_specific = [EmbedderOptions::indexing_fragment, EmbedderOptions::search_fragment][kind as usize]; - let fragment_name = name.ok_or_else(|| MissingFragment { - embedder: embedder_name.to_string(), + let fragment = name.ok_or_else(|| MissingFragment { + embedder: embedder.clone(), kind, available: get_available(&embedding_config.config.embedder_options), })?; - let fragment = get_specific(&embedding_config.config.embedder_options, fragment_name) + let fragment = get_specific(&embedding_config.config.embedder_options, fragment.value()) .ok_or_else(|| FragmentDoesNotExist { - embedder: embedder_name.to_string(), - fragment: fragment_name.to_string(), + embedder, + fragment, kind, available: get_available(&embedding_config.config.embedder_options), })?; @@ -354,83 +365,96 @@ fn parse_template_id_fragment( Ok(fragment.clone()) } -fn parse_template_id( +fn parse_template_id<'a>( index: &Index, - rtxn: &RoTxn<'_>, - id: &str, -) -> Result<(serde_json::Value, bool), RenderError> { - let mut parts = id.split('.'); + rtxn: &RoTxn, + id: &'a str, +) -> Result<(serde_json::Value, bool), RenderError<'a>> { + let mut input: Span = id.into(); + let mut next_part = |first: bool| -> Result>, RenderError<'a>> { + if input.is_empty() { + return Ok(None); + } + if !first { + if !input.starts_with('.') { + return Err(ExpectedDotAfterValue(input)); + } + input = milli::filter_parser::Slice::slice(&input, 1..); + } + let (remaining, value) = milli::filter_parser::parse_dotted_value_part(input) + .map_err(|_| ExpectedValue(input))?; + input = remaining; - let root = parts.next().ok_or(EmptyTemplateId)?; + Ok(Some(value)) + }; - let template = match root { + let root = next_part(true)?.ok_or(EmptyTemplateId)?; + let template = match root.value() { "embedders" => { let index_embedding_configs = index.embedding_configs(); let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?; let get_embedders = || embedding_configs.iter().map(|c| c.name.clone()).collect(); - let embedder = - parts.next().ok_or_else(|| MissingEmbedderName { available: get_embedders() })?; + let embedder = next_part(false)? + .ok_or_else(|| MissingEmbedderName { available: get_embedders() })?; let embedding_config = embedding_configs .iter() - .find(|config| config.name == embedder) + .find(|config| config.name == embedder.value()) .ok_or_else(|| EmbedderDoesNotExist { - embedder: embedder.to_string(), + embedder: embedder.clone(), available: get_embedders(), })?; let get_indexing = || embedding_config.config.embedder_options.indexing_fragments(); let get_search = || embedding_config.config.embedder_options.search_fragments(); - let template_kind = parts.next().ok_or_else(|| MissingTemplateAfterEmbedder { - embedder: embedder.to_string(), + let template_kind = next_part(false)?.ok_or_else(|| MissingTemplateAfterEmbedder { + embedder: embedder.clone(), indexing: get_indexing(), search: get_search(), })?; - match template_kind { - "documentTemplate" | "documenttemplate" - if !embedding_config.fragments.as_slice().is_empty() => - { - return Err(EmbedderUsesFragments { embedder: embedder.to_string() }); + match template_kind.value() { + "documentTemplate" if !embedding_config.fragments.as_slice().is_empty() => { + return Err(EmbedderUsesFragments { embedder }); } - "documentTemplate" | "documenttemplate" => ( + "documentTemplate" => ( serde_json::Value::String(embedding_config.config.prompt.template.clone()), true, ), - "indexingFragments" | "indexingfragments" => ( + "indexingFragments" => ( parse_template_id_fragment( - parts.next(), + next_part(false)?, FragmentKind::Indexing, embedding_config, embedder, )?, false, ), - "searchFragments" | "searchfragments" => ( + "searchFragments" => ( parse_template_id_fragment( - parts.next(), + next_part(false)?, FragmentKind::Search, embedding_config, embedder, )?, false, ), - found => { + _ => { return Err(UnknownTemplatePrefix { - embedder: embedder.to_string(), - found: found.to_string(), + embedder, + found: template_kind, indexing: get_indexing(), search: get_search(), }) } } } - "chatCompletions" | "chatcompletions" => { - let template_name = parts.next().ok_or(MissingChatCompletionTemplate)?; + "chatCompletions" => { + let template_name = next_part(false)?.ok_or(MissingChatCompletionTemplate)?; - if template_name != "documentTemplate" { - return Err(UnknownChatCompletionTemplate(template_name.to_string())); + if template_name.value() != "documentTemplate" { + return Err(UnknownChatCompletionTemplate(template_name)); } let chat_config = index.chat_config(rtxn)?; @@ -438,26 +462,26 @@ fn parse_template_id( (serde_json::Value::String(chat_config.prompt.template.clone()), true) } "" => return Err(EmptyTemplateId), - unknown => { - return Err(UnknownTemplateRoot(unknown.to_string())); + _ => { + return Err(UnknownTemplateRoot(root)); } }; - if let Some(next) = parts.next() { - return Err(LeftOverToken(next.to_string())); + if let Some(next) = next_part(false)? { + return Err(LeftOverToken(next)); } Ok(template) } -async fn render(index: Index, query: RenderQuery) -> Result { +async fn render(index: Index, query: RenderQuery) -> Result { let rtxn = index.read_txn()?; let (template, fields_available) = match (query.template.inline, query.template.id) { (Some(inline), None) => (inline, true), (None, Some(id)) => parse_template_id(&index, &rtxn, &id)?, - (Some(_), Some(_)) => return Err(MultipleTemplates), - (None, None) => return Err(MissingTemplate), + (Some(_), Some(_)) => return Err(MultipleTemplates.into()), + (None, None) => return Err(MissingTemplate.into()), }; let fields_already_present = query @@ -474,7 +498,7 @@ async fn render(index: Index, query: RenderQuery) -> Result