diff --git a/Cargo.lock b/Cargo.lock index ceec0a05e..297cf408c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3770,6 +3770,7 @@ dependencies = [ "itertools 0.14.0", "jsonwebtoken", "lazy_static", + "liquid", "manifest-dir-macros", "maplit", "meili-snap", diff --git a/crates/meilisearch-types/src/error.rs b/crates/meilisearch-types/src/error.rs index c5e082517..8b3be1ede 100644 --- a/crates/meilisearch-types/src/error.rs +++ b/crates/meilisearch-types/src/error.rs @@ -423,6 +423,7 @@ InvalidRenderTemplateId , InvalidRequest , BAD_REQU InvalidRenderTemplateInline , InvalidRequest , BAD_REQUEST ; InvalidRenderInput , InvalidRequest , BAD_REQUEST ; InvalidRenderInputDocumentId , InvalidRequest , BAD_REQUEST ; +InvalidRenderInputFields , InvalidRequest , BAD_REQUEST ; InvalidRenderInputInline , InvalidRequest , BAD_REQUEST ; RenderDocumentNotFound , InvalidRequest , NOT_FOUND ; TemplateParsingError , InvalidRequest , BAD_REQUEST ; diff --git a/crates/meilisearch/Cargo.toml b/crates/meilisearch/Cargo.toml index 83eb439d9..54f1225ab 100644 --- a/crates/meilisearch/Cargo.toml +++ b/crates/meilisearch/Cargo.toml @@ -48,6 +48,7 @@ is-terminal = "0.4.16" itertools = "0.14.0" jsonwebtoken = "9.3.1" lazy_static = "1.5.0" +liquid = "0.26.11" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } mimalloc = { version = "0.1.47", default-features = false } diff --git a/crates/meilisearch/src/routes/indexes/render.rs b/crates/meilisearch/src/routes/indexes/render.rs index aec816e2f..85a6fce97 100644 --- a/crates/meilisearch/src/routes/indexes/render.rs +++ b/crates/meilisearch/src/routes/indexes/render.rs @@ -2,39 +2,32 @@ use std::collections::BTreeMap; use actix_web::web::{self, Data}; use actix_web::{HttpRequest, HttpResponse}; -use deserr::actix_web::{AwebJson, AwebQueryParameter}; +use deserr::actix_web::AwebJson; use deserr::Deserr; use index_scheduler::IndexScheduler; -use itertools::structs; -use meilisearch_types::deserr::query_params::Param; -use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; +use liquid::ValueView; +use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::{ - InvalidRenderInput, InvalidRenderInputDocumentId, InvalidRenderInputInline, - InvalidRenderTemplate, InvalidRenderTemplateId, InvalidRenderTemplateInline, + InvalidRenderInput, InvalidRenderInputDocumentId, InvalidRenderInputFields, + InvalidRenderInputInline, InvalidRenderTemplate, InvalidRenderTemplateId, + InvalidRenderTemplateInline, }; use meilisearch_types::error::Code; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::keys::actions; +use meilisearch_types::milli::prompt::{get_document, get_inline_document_fields}; use meilisearch_types::milli::vector::json_template::{self, JsonTemplate}; -use meilisearch_types::serde_cs::vec::CS; use meilisearch_types::{heed, milli, Index}; use serde::Serialize; use serde_json::Value; use tracing::debug; -use utoipa::{IntoParams, OpenApi, ToSchema}; +use utoipa::{OpenApi, ToSchema}; -use super::ActionPolicy; use crate::analytics::Analytics; -use crate::error::MeilisearchHttpError; use crate::extractors::authentication::policies::DoubleActionPolicy; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; -use crate::routes::indexes::similar_analytics::{SimilarAggregator, SimilarGET, SimilarPOST}; -use crate::search::{ - add_search_rules, perform_similar, RankingScoreThresholdSimilar, RetrieveVectors, Route, - SearchKind, SimilarQuery, SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, -}; #[derive(OpenApi)] #[openapi( @@ -179,6 +172,10 @@ enum RenderError { BothInlineDocAndDocId, TemplateParsing(json_template::Error), TemplateRendering(json_template::Error), + + FieldsUnavailable, + FieldsAlreadyPresent, + FieldsWithoutDocument, } impl From for RenderError { @@ -317,6 +314,18 @@ impl From for ResponseError { format!("Error rendering template: {}", err.rendering_error("input")), Code::TemplateRenderingError, ), + FieldsUnavailable => ResponseError::from_msg( + String::from("Fields are not available on fragments.\n Hint: Remove the `insertFields` parameter or set it to `false`."), + Code::InvalidRenderInputFields, + ), + FieldsAlreadyPresent => ResponseError::from_msg( + String::from("Fields were provided in the inline input but `insertFields` is set to `true`.\n Hint: Remove the `insertFields` parameter or set it to `false`."), + Code::InvalidRenderInputFields, + ), + FieldsWithoutDocument => ResponseError::from_msg( + String::from("Fields were requested but no document was provided.\n Hint: Provide a document ID or inline document."), + Code::InvalidRenderInputFields, + ), } } } @@ -324,8 +333,8 @@ impl From for ResponseError { async fn render(index: Index, query: RenderQuery) -> Result { let rtxn = index.read_txn()?; - let template = match (query.template.inline, query.template.id) { - (Some(inline), None) => inline, + let (template, fields_available) = match (query.template.inline, query.template.id) { + (Some(inline), None) => (inline, true), (None, Some(id)) => { let mut parts = id.split('.'); @@ -368,8 +377,11 @@ async fn render(index: Index, query: RenderQuery) -> Result { @@ -396,7 +408,7 @@ async fn render(index: Index, query: RenderQuery) -> Result { let fragment_name = parts.next().ok_or_else(|| MissingFragment { @@ -422,7 +434,7 @@ async fn render(index: Index, query: RenderQuery) -> Result { return Err(UnknownTemplatePrefix { @@ -449,7 +461,7 @@ async fn render(index: Index, query: RenderQuery) -> Result return Err(EmptyTemplateId), unknown => { @@ -467,30 +479,60 @@ async fn render(index: Index, query: RenderQuery) -> Result return Err(MissingTemplate), }; + let fields_required = query.input.as_ref().and_then(|i| i.insert_fields); + let fields_already_present = query + .input + .as_ref() + .is_some_and(|i| i.inline.as_ref().is_some_and(|i| i.get("fields").is_some())); + let fields_probably_used = template.as_str().is_none_or(|s| s.contains("fields")); + let has_inline_doc = query + .input + .as_ref() + .is_some_and(|i| i.inline.as_ref().is_some_and(|i| i.get("doc").is_some())); + let has_document_id = query.input.as_ref().is_some_and(|i| i.document_id.is_some()); + let has_doc = has_inline_doc || has_document_id; + let insert_fields = match fields_required { + Some(insert_fields) => insert_fields, + None => fields_available && has_doc && fields_probably_used && !fields_already_present, + }; + if insert_fields && !fields_available { + return Err(FieldsUnavailable); + } + if insert_fields && fields_already_present { + return Err(FieldsAlreadyPresent); + } + if insert_fields && !has_doc { + return Err(FieldsWithoutDocument); + } + if has_inline_doc && has_document_id { + return Err(BothInlineDocAndDocId); + } + let mut rendered = Value::Null; if let Some(input) = query.input { - let mut media = input.inline.unwrap_or_default(); + let media = input.inline.unwrap_or_default(); + let mut object = liquid::to_object(&media).unwrap(); + + if let Some(doc) = media.get("doc") { + if insert_fields { + let fields = get_inline_document_fields(&index, &rtxn, doc)?; + object.insert("fields".into(), fields.to_value()); + } + } + if let Some(document_id) = input.document_id { - let internal_id = index - .external_documents_ids() - .get(&rtxn, &document_id)? - .ok_or_else(|| DocumentNotFound(document_id.to_string()))?; + let (document, fields) = get_document(&index, &rtxn, &document_id, insert_fields)? + .ok_or_else(|| DocumentNotFound(document_id))?; - let document = index.document(&rtxn, internal_id)?; - - let fields_ids_map = index.fields_ids_map(&rtxn)?; - let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); - let document = milli::obkv_to_json(&all_fields, &fields_ids_map, document)?; - let document = Value::Object(document); - - if media.insert(String::from("doc"), document).is_some() { - return Err(BothInlineDocAndDocId); + object.insert("doc".into(), document); + if let Some(fields) = fields { + object.insert("fields".into(), fields); } } let json_template = JsonTemplate::new(template.clone()).map_err(TemplateParsing)?; - rendered = json_template.render_serializable(&media).map_err(TemplateRendering)?; + rendered = json_template.render(&object).map_err(TemplateRendering)?; } Ok(RenderResult { template, rendered }) @@ -519,6 +561,8 @@ pub struct RenderQueryTemplate { pub struct RenderQueryInput { #[deserr(default, error = DeserrJsonError)] document_id: Option, + #[deserr(default, error = DeserrJsonError)] + insert_fields: Option, #[deserr(default, error = DeserrJsonError)] inline: Option>, } diff --git a/crates/milli/src/prompt/document.rs b/crates/milli/src/prompt/document.rs index 1125c8fba..64352f7d2 100644 --- a/crates/milli/src/prompt/document.rs +++ b/crates/milli/src/prompt/document.rs @@ -12,6 +12,7 @@ use liquid::{ObjectView, ValueView}; use rustc_hash::FxBuildHasher; use serde_json::value::RawValue; +use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME}; use crate::update::del_add::{DelAdd, KvReaderDelAdd}; use crate::FieldsIdsMap; @@ -143,6 +144,112 @@ impl ValueView for Document<'_> { /// Implementation for any type that implements the Document trait use crate::update::new::document::Document as DocumentTrait; +pub struct JsonDocument { + object: liquid::Object, + cached: BTreeMap>, +} + +impl JsonDocument { + pub fn new(value: &serde_json::Value) -> Self { + let to_string = serde_json::to_string(&value) + .expect("JsonDocument should only be created with valid JSON"); // TODO: Remove panic + let back_to_value: BTreeMap> = serde_json::from_str(&to_string) + .expect("JsonDocument should only be created with valid JSON"); + let object = + liquid::to_object(&value).expect("JsonDocument should only be created with valid JSON"); + Self { object, cached: back_to_value } + } +} + +impl std::fmt::Debug for JsonDocument { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.object.fmt(f) + } +} + +impl<'a> DocumentTrait<'a> for &'a JsonDocument { + fn iter_top_level_fields( + &self, + ) -> impl Iterator> { + self.cached.iter().filter_map(|(k, v)| { + if k == RESERVED_VECTORS_FIELD_NAME || k == RESERVED_GEO_FIELD_NAME { + None + } else { + Some(Ok((k.as_str(), v.as_ref()))) + } + }) + } + + fn top_level_fields_count(&self) -> usize { + self.cached.len() + - self.cached.contains_key(RESERVED_VECTORS_FIELD_NAME) as usize + - self.cached.contains_key(RESERVED_GEO_FIELD_NAME) as usize + } + + fn top_level_field(&self, k: &str) -> crate::Result> { + if k == RESERVED_VECTORS_FIELD_NAME || k == RESERVED_GEO_FIELD_NAME { + return Ok(None); + } + Ok(self.cached.get(k).map(|r| r.as_ref())) + } + + fn vectors_field(&self) -> crate::Result> { + Ok(self.cached.get(RESERVED_VECTORS_FIELD_NAME).map(|r| r.as_ref())) + } + + fn geo_field(&self) -> crate::Result> { + Ok(self.cached.get(RESERVED_GEO_FIELD_NAME).map(|r| r.as_ref())) + } +} + +impl ObjectView for JsonDocument { + fn as_value(&self) -> &dyn ValueView { + self.object.as_value() + } + fn size(&self) -> i64 { + self.object.size() + } + fn keys<'k>(&'k self) -> Box> + 'k> { + Box::new(self.object.keys().map(|s| s.into())) + } + fn values<'k>(&'k self) -> Box + 'k> { + Box::new(self.object.values().map(|v| v.as_view())) + } + fn iter<'k>(&'k self) -> Box, &'k dyn ValueView)> + 'k> { + Box::new(self.object.iter().map(|(k, v)| (k.into(), v.as_view()))) + } + fn contains_key(&self, index: &str) -> bool { + self.object.contains_key(index) + } + fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> { + self.object.get(index).map(|v| v.as_view()) + } +} + +impl ValueView for JsonDocument { + fn as_debug(&self) -> &dyn fmt::Debug { + self.object.as_debug() + } + fn render(&self) -> DisplayCow<'_> { + self.object.render() + } + fn source(&self) -> DisplayCow<'_> { + self.object.source() + } + fn type_name(&self) -> &'static str { + self.object.type_name() + } + fn query_state(&self, state: State) -> bool { + self.object.query_state(state) + } + fn to_kstr(&self) -> KStringCow<'_> { + self.object.to_kstr() + } + fn to_value(&self) -> LiquidValue { + self.object.to_value() + } +} + #[derive(Debug)] pub struct ParseableDocument<'a, 'doc, D: DocumentTrait<'a> + Debug> { document: D, diff --git a/crates/milli/src/prompt/mod.rs b/crates/milli/src/prompt/mod.rs index 03b20a090..aa73877bf 100644 --- a/crates/milli/src/prompt/mod.rs +++ b/crates/milli/src/prompt/mod.rs @@ -12,11 +12,16 @@ use bumpalo::Bump; pub(crate) use document::{Document, ParseableDocument}; use error::{NewPromptError, RenderPromptError}; pub use fields::{BorrowedFields, OwnedFields}; +use heed::RoTxn; +use liquid::model::Value as LiquidValue; +use liquid::ValueView; pub use self::context::Context; use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; +use crate::prompt::document::JsonDocument; use crate::update::del_add::DelAdd; -use crate::GlobalFieldsIdsMap; +use crate::update::new::document::DocumentFromDb; +use crate::{GlobalFieldsIdsMap, Index, MetadataBuilder}; pub struct Prompt { template: liquid::Template, @@ -164,6 +169,47 @@ fn truncate(s: &mut String, max_bytes: usize) { } } +pub fn get_inline_document_fields( + index: &Index, + rtxn: &RoTxn<'_>, + inline_doc: &serde_json::Value, +) -> Result { + let fid_map_with_meta = index.fields_ids_map_with_metadata(rtxn)?; + let inline_doc = JsonDocument::new(inline_doc); + let fields = OwnedFields::new(&inline_doc, &fid_map_with_meta); + + Ok(fields.to_value()) +} + +pub fn get_document( + index: &Index, + rtxn: &RoTxn<'_>, + external_id: &str, + with_fields: bool, +) -> Result)>, crate::Error> { + let Some(internal_id) = index.external_documents_ids().get(rtxn, external_id)? else { + return Ok(None); + }; + + let fid_map = index.fields_ids_map(rtxn)?; + let Some(document_from_db) = DocumentFromDb::new(internal_id, rtxn, index, &fid_map)? else { + return Ok(None); + }; + + let doc_alloc = Bump::new(); + let parseable_document = ParseableDocument::new(document_from_db, &doc_alloc); + + if with_fields { + let metadata_builder = MetadataBuilder::from_index(index, rtxn)?; + let fid_map_with_meta = FieldIdMapWithMetadata::new(fid_map.clone(), metadata_builder); + let fields = OwnedFields::new(&parseable_document, &fid_map_with_meta); + + Ok(Some((parseable_document.to_value(), Some(fields.to_value())))) + } else { + Ok(Some((parseable_document.to_value(), None))) + } +} + #[cfg(test)] mod test { use super::Prompt; diff --git a/crates/milli/src/vector/json_template/mod.rs b/crates/milli/src/vector/json_template/mod.rs index 693860575..606ad9989 100644 --- a/crates/milli/src/vector/json_template/mod.rs +++ b/crates/milli/src/vector/json_template/mod.rs @@ -143,14 +143,6 @@ impl JsonTemplate { self.render(&search_data) } - /// Renders any serializable value by converting it to a liquid object and rendering it with the template. - /// If its a map, values inside can be accessed directly by their keys. - pub fn render_serializable(&self, object: &T) -> Result { - let object = liquid::to_object(object) - .map_err(|err| Error { template_error: err, path: ValuePath::new() })?; - self.render(&object) - } - /// The JSON value representing the underlying template pub fn template(&self) -> &Value { &self.value