Add fields support

This commit is contained in:
Mubelotix
2025-07-18 10:35:02 +02:00
parent 00d9f576ed
commit 289a7f391b
7 changed files with 238 additions and 46 deletions

1
Cargo.lock generated
View File

@ -3770,6 +3770,7 @@ dependencies = [
"itertools 0.14.0", "itertools 0.14.0",
"jsonwebtoken", "jsonwebtoken",
"lazy_static", "lazy_static",
"liquid",
"manifest-dir-macros", "manifest-dir-macros",
"maplit", "maplit",
"meili-snap", "meili-snap",

View File

@ -423,6 +423,7 @@ InvalidRenderTemplateId , InvalidRequest , BAD_REQU
InvalidRenderTemplateInline , InvalidRequest , BAD_REQUEST ; InvalidRenderTemplateInline , InvalidRequest , BAD_REQUEST ;
InvalidRenderInput , InvalidRequest , BAD_REQUEST ; InvalidRenderInput , InvalidRequest , BAD_REQUEST ;
InvalidRenderInputDocumentId , InvalidRequest , BAD_REQUEST ; InvalidRenderInputDocumentId , InvalidRequest , BAD_REQUEST ;
InvalidRenderInputFields , InvalidRequest , BAD_REQUEST ;
InvalidRenderInputInline , InvalidRequest , BAD_REQUEST ; InvalidRenderInputInline , InvalidRequest , BAD_REQUEST ;
RenderDocumentNotFound , InvalidRequest , NOT_FOUND ; RenderDocumentNotFound , InvalidRequest , NOT_FOUND ;
TemplateParsingError , InvalidRequest , BAD_REQUEST ; TemplateParsingError , InvalidRequest , BAD_REQUEST ;

View File

@ -48,6 +48,7 @@ is-terminal = "0.4.16"
itertools = "0.14.0" itertools = "0.14.0"
jsonwebtoken = "9.3.1" jsonwebtoken = "9.3.1"
lazy_static = "1.5.0" lazy_static = "1.5.0"
liquid = "0.26.11"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
mimalloc = { version = "0.1.47", default-features = false } mimalloc = { version = "0.1.47", default-features = false }

View File

@ -2,39 +2,32 @@ use std::collections::BTreeMap;
use actix_web::web::{self, Data}; use actix_web::web::{self, Data};
use actix_web::{HttpRequest, HttpResponse}; use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::actix_web::AwebJson;
use deserr::Deserr; use deserr::Deserr;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use itertools::structs; use liquid::ValueView;
use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::{ use meilisearch_types::error::deserr_codes::{
InvalidRenderInput, InvalidRenderInputDocumentId, InvalidRenderInputInline, InvalidRenderInput, InvalidRenderInputDocumentId, InvalidRenderInputFields,
InvalidRenderTemplate, InvalidRenderTemplateId, InvalidRenderTemplateInline, InvalidRenderInputInline, InvalidRenderTemplate, InvalidRenderTemplateId,
InvalidRenderTemplateInline,
}; };
use meilisearch_types::error::Code; use meilisearch_types::error::Code;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid; use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::keys::actions; use meilisearch_types::keys::actions;
use meilisearch_types::milli::prompt::{get_document, get_inline_document_fields};
use meilisearch_types::milli::vector::json_template::{self, JsonTemplate}; use meilisearch_types::milli::vector::json_template::{self, JsonTemplate};
use meilisearch_types::serde_cs::vec::CS;
use meilisearch_types::{heed, milli, Index}; use meilisearch_types::{heed, milli, Index};
use serde::Serialize; use serde::Serialize;
use serde_json::Value; use serde_json::Value;
use tracing::debug; use tracing::debug;
use utoipa::{IntoParams, OpenApi, ToSchema}; use utoipa::{OpenApi, ToSchema};
use super::ActionPolicy;
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::policies::DoubleActionPolicy; use crate::extractors::authentication::policies::DoubleActionPolicy;
use crate::extractors::authentication::GuardedData; use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler; use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::indexes::similar_analytics::{SimilarAggregator, SimilarGET, SimilarPOST};
use crate::search::{
add_search_rules, perform_similar, RankingScoreThresholdSimilar, RetrieveVectors, Route,
SearchKind, SimilarQuery, SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
};
#[derive(OpenApi)] #[derive(OpenApi)]
#[openapi( #[openapi(
@ -179,6 +172,10 @@ enum RenderError {
BothInlineDocAndDocId, BothInlineDocAndDocId,
TemplateParsing(json_template::Error), TemplateParsing(json_template::Error),
TemplateRendering(json_template::Error), TemplateRendering(json_template::Error),
FieldsUnavailable,
FieldsAlreadyPresent,
FieldsWithoutDocument,
} }
impl From<heed::Error> for RenderError { impl From<heed::Error> for RenderError {
@ -317,6 +314,18 @@ impl From<RenderError> for ResponseError {
format!("Error rendering template: {}", err.rendering_error("input")), format!("Error rendering template: {}", err.rendering_error("input")),
Code::TemplateRenderingError, Code::TemplateRenderingError,
), ),
FieldsUnavailable => ResponseError::from_msg(
String::from("Fields are not available on fragments.\n Hint: Remove the `insertFields` parameter or set it to `false`."),
Code::InvalidRenderInputFields,
),
FieldsAlreadyPresent => ResponseError::from_msg(
String::from("Fields were provided in the inline input but `insertFields` is set to `true`.\n Hint: Remove the `insertFields` parameter or set it to `false`."),
Code::InvalidRenderInputFields,
),
FieldsWithoutDocument => ResponseError::from_msg(
String::from("Fields were requested but no document was provided.\n Hint: Provide a document ID or inline document."),
Code::InvalidRenderInputFields,
),
} }
} }
} }
@ -324,8 +333,8 @@ impl From<RenderError> for ResponseError {
async fn render(index: Index, query: RenderQuery) -> Result<RenderResult, RenderError> { async fn render(index: Index, query: RenderQuery) -> Result<RenderResult, RenderError> {
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let template = match (query.template.inline, query.template.id) { let (template, fields_available) = match (query.template.inline, query.template.id) {
(Some(inline), None) => inline, (Some(inline), None) => (inline, true),
(None, Some(id)) => { (None, Some(id)) => {
let mut parts = id.split('.'); let mut parts = id.split('.');
@ -368,8 +377,11 @@ async fn render(index: Index, query: RenderQuery) -> Result<RenderResult, Render
}); });
} }
serde_json::Value::String( (
embedding_config.config.prompt.template.clone(), serde_json::Value::String(
embedding_config.config.prompt.template.clone(),
),
true,
) )
} }
"indexingFragments" | "indexingfragments" => { "indexingFragments" | "indexingfragments" => {
@ -396,7 +408,7 @@ async fn render(index: Index, query: RenderQuery) -> Result<RenderResult, Render
.indexing_fragments(), .indexing_fragments(),
})?; })?;
fragment.clone() (fragment.clone(), false)
} }
"searchFragments" | "searchfragments" => { "searchFragments" | "searchfragments" => {
let fragment_name = parts.next().ok_or_else(|| MissingFragment { let fragment_name = parts.next().ok_or_else(|| MissingFragment {
@ -422,7 +434,7 @@ async fn render(index: Index, query: RenderQuery) -> Result<RenderResult, Render
.search_fragments(), .search_fragments(),
})?; })?;
fragment.clone() (fragment.clone(), false)
} }
found => { found => {
return Err(UnknownTemplatePrefix { return Err(UnknownTemplatePrefix {
@ -449,7 +461,7 @@ async fn render(index: Index, query: RenderQuery) -> Result<RenderResult, Render
let chat_config = index.chat_config(&rtxn)?; let chat_config = index.chat_config(&rtxn)?;
serde_json::Value::String(chat_config.prompt.template.clone()) (serde_json::Value::String(chat_config.prompt.template.clone()), true)
} }
"" => return Err(EmptyTemplateId), "" => return Err(EmptyTemplateId),
unknown => { unknown => {
@ -467,30 +479,60 @@ async fn render(index: Index, query: RenderQuery) -> Result<RenderResult, Render
(None, None) => return Err(MissingTemplate), (None, None) => return Err(MissingTemplate),
}; };
let fields_required = query.input.as_ref().and_then(|i| i.insert_fields);
let fields_already_present = query
.input
.as_ref()
.is_some_and(|i| i.inline.as_ref().is_some_and(|i| i.get("fields").is_some()));
let fields_probably_used = template.as_str().is_none_or(|s| s.contains("fields"));
let has_inline_doc = query
.input
.as_ref()
.is_some_and(|i| i.inline.as_ref().is_some_and(|i| i.get("doc").is_some()));
let has_document_id = query.input.as_ref().is_some_and(|i| i.document_id.is_some());
let has_doc = has_inline_doc || has_document_id;
let insert_fields = match fields_required {
Some(insert_fields) => insert_fields,
None => fields_available && has_doc && fields_probably_used && !fields_already_present,
};
if insert_fields && !fields_available {
return Err(FieldsUnavailable);
}
if insert_fields && fields_already_present {
return Err(FieldsAlreadyPresent);
}
if insert_fields && !has_doc {
return Err(FieldsWithoutDocument);
}
if has_inline_doc && has_document_id {
return Err(BothInlineDocAndDocId);
}
let mut rendered = Value::Null; let mut rendered = Value::Null;
if let Some(input) = query.input { if let Some(input) = query.input {
let mut media = input.inline.unwrap_or_default(); let media = input.inline.unwrap_or_default();
let mut object = liquid::to_object(&media).unwrap();
if let Some(doc) = media.get("doc") {
if insert_fields {
let fields = get_inline_document_fields(&index, &rtxn, doc)?;
object.insert("fields".into(), fields.to_value());
}
}
if let Some(document_id) = input.document_id { if let Some(document_id) = input.document_id {
let internal_id = index let (document, fields) = get_document(&index, &rtxn, &document_id, insert_fields)?
.external_documents_ids() .ok_or_else(|| DocumentNotFound(document_id))?;
.get(&rtxn, &document_id)?
.ok_or_else(|| DocumentNotFound(document_id.to_string()))?;
let document = index.document(&rtxn, internal_id)?; object.insert("doc".into(), document);
if let Some(fields) = fields {
let fields_ids_map = index.fields_ids_map(&rtxn)?; object.insert("fields".into(), fields);
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let document = milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
let document = Value::Object(document);
if media.insert(String::from("doc"), document).is_some() {
return Err(BothInlineDocAndDocId);
} }
} }
let json_template = JsonTemplate::new(template.clone()).map_err(TemplateParsing)?; let json_template = JsonTemplate::new(template.clone()).map_err(TemplateParsing)?;
rendered = json_template.render_serializable(&media).map_err(TemplateRendering)?; rendered = json_template.render(&object).map_err(TemplateRendering)?;
} }
Ok(RenderResult { template, rendered }) Ok(RenderResult { template, rendered })
@ -519,6 +561,8 @@ pub struct RenderQueryTemplate {
pub struct RenderQueryInput { pub struct RenderQueryInput {
#[deserr(default, error = DeserrJsonError<InvalidRenderInputDocumentId>)] #[deserr(default, error = DeserrJsonError<InvalidRenderInputDocumentId>)]
document_id: Option<String>, document_id: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidRenderInputFields>)]
insert_fields: Option<bool>,
#[deserr(default, error = DeserrJsonError<InvalidRenderInputInline>)] #[deserr(default, error = DeserrJsonError<InvalidRenderInputInline>)]
inline: Option<BTreeMap<String, serde_json::Value>>, inline: Option<BTreeMap<String, serde_json::Value>>,
} }

View File

@ -12,6 +12,7 @@ use liquid::{ObjectView, ValueView};
use rustc_hash::FxBuildHasher; use rustc_hash::FxBuildHasher;
use serde_json::value::RawValue; use serde_json::value::RawValue;
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
use crate::update::del_add::{DelAdd, KvReaderDelAdd}; use crate::update::del_add::{DelAdd, KvReaderDelAdd};
use crate::FieldsIdsMap; use crate::FieldsIdsMap;
@ -143,6 +144,112 @@ impl ValueView for Document<'_> {
/// Implementation for any type that implements the Document trait /// Implementation for any type that implements the Document trait
use crate::update::new::document::Document as DocumentTrait; use crate::update::new::document::Document as DocumentTrait;
pub struct JsonDocument {
object: liquid::Object,
cached: BTreeMap<String, Box<RawValue>>,
}
impl JsonDocument {
pub fn new(value: &serde_json::Value) -> Self {
let to_string = serde_json::to_string(&value)
.expect("JsonDocument should only be created with valid JSON"); // TODO: Remove panic
let back_to_value: BTreeMap<String, Box<RawValue>> = serde_json::from_str(&to_string)
.expect("JsonDocument should only be created with valid JSON");
let object =
liquid::to_object(&value).expect("JsonDocument should only be created with valid JSON");
Self { object, cached: back_to_value }
}
}
impl std::fmt::Debug for JsonDocument {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.object.fmt(f)
}
}
impl<'a> DocumentTrait<'a> for &'a JsonDocument {
fn iter_top_level_fields(
&self,
) -> impl Iterator<Item = crate::Result<(&'a str, &'a RawValue)>> {
self.cached.iter().filter_map(|(k, v)| {
if k == RESERVED_VECTORS_FIELD_NAME || k == RESERVED_GEO_FIELD_NAME {
None
} else {
Some(Ok((k.as_str(), v.as_ref())))
}
})
}
fn top_level_fields_count(&self) -> usize {
self.cached.len()
- self.cached.contains_key(RESERVED_VECTORS_FIELD_NAME) as usize
- self.cached.contains_key(RESERVED_GEO_FIELD_NAME) as usize
}
fn top_level_field(&self, k: &str) -> crate::Result<Option<&'a RawValue>> {
if k == RESERVED_VECTORS_FIELD_NAME || k == RESERVED_GEO_FIELD_NAME {
return Ok(None);
}
Ok(self.cached.get(k).map(|r| r.as_ref()))
}
fn vectors_field(&self) -> crate::Result<Option<&'a RawValue>> {
Ok(self.cached.get(RESERVED_VECTORS_FIELD_NAME).map(|r| r.as_ref()))
}
fn geo_field(&self) -> crate::Result<Option<&'a RawValue>> {
Ok(self.cached.get(RESERVED_GEO_FIELD_NAME).map(|r| r.as_ref()))
}
}
impl ObjectView for JsonDocument {
fn as_value(&self) -> &dyn ValueView {
self.object.as_value()
}
fn size(&self) -> i64 {
self.object.size()
}
fn keys<'k>(&'k self) -> Box<dyn Iterator<Item = KStringCow<'k>> + 'k> {
Box::new(self.object.keys().map(|s| s.into()))
}
fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
Box::new(self.object.values().map(|v| v.as_view()))
}
fn iter<'k>(&'k self) -> Box<dyn Iterator<Item = (KStringCow<'k>, &'k dyn ValueView)> + 'k> {
Box::new(self.object.iter().map(|(k, v)| (k.into(), v.as_view())))
}
fn contains_key(&self, index: &str) -> bool {
self.object.contains_key(index)
}
fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> {
self.object.get(index).map(|v| v.as_view())
}
}
impl ValueView for JsonDocument {
fn as_debug(&self) -> &dyn fmt::Debug {
self.object.as_debug()
}
fn render(&self) -> DisplayCow<'_> {
self.object.render()
}
fn source(&self) -> DisplayCow<'_> {
self.object.source()
}
fn type_name(&self) -> &'static str {
self.object.type_name()
}
fn query_state(&self, state: State) -> bool {
self.object.query_state(state)
}
fn to_kstr(&self) -> KStringCow<'_> {
self.object.to_kstr()
}
fn to_value(&self) -> LiquidValue {
self.object.to_value()
}
}
#[derive(Debug)] #[derive(Debug)]
pub struct ParseableDocument<'a, 'doc, D: DocumentTrait<'a> + Debug> { pub struct ParseableDocument<'a, 'doc, D: DocumentTrait<'a> + Debug> {
document: D, document: D,

View File

@ -12,11 +12,16 @@ use bumpalo::Bump;
pub(crate) use document::{Document, ParseableDocument}; pub(crate) use document::{Document, ParseableDocument};
use error::{NewPromptError, RenderPromptError}; use error::{NewPromptError, RenderPromptError};
pub use fields::{BorrowedFields, OwnedFields}; pub use fields::{BorrowedFields, OwnedFields};
use heed::RoTxn;
use liquid::model::Value as LiquidValue;
use liquid::ValueView;
pub use self::context::Context; pub use self::context::Context;
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
use crate::prompt::document::JsonDocument;
use crate::update::del_add::DelAdd; use crate::update::del_add::DelAdd;
use crate::GlobalFieldsIdsMap; use crate::update::new::document::DocumentFromDb;
use crate::{GlobalFieldsIdsMap, Index, MetadataBuilder};
pub struct Prompt { pub struct Prompt {
template: liquid::Template, template: liquid::Template,
@ -164,6 +169,47 @@ fn truncate(s: &mut String, max_bytes: usize) {
} }
} }
pub fn get_inline_document_fields(
index: &Index,
rtxn: &RoTxn<'_>,
inline_doc: &serde_json::Value,
) -> Result<LiquidValue, crate::Error> {
let fid_map_with_meta = index.fields_ids_map_with_metadata(rtxn)?;
let inline_doc = JsonDocument::new(inline_doc);
let fields = OwnedFields::new(&inline_doc, &fid_map_with_meta);
Ok(fields.to_value())
}
pub fn get_document(
index: &Index,
rtxn: &RoTxn<'_>,
external_id: &str,
with_fields: bool,
) -> Result<Option<(LiquidValue, Option<LiquidValue>)>, crate::Error> {
let Some(internal_id) = index.external_documents_ids().get(rtxn, external_id)? else {
return Ok(None);
};
let fid_map = index.fields_ids_map(rtxn)?;
let Some(document_from_db) = DocumentFromDb::new(internal_id, rtxn, index, &fid_map)? else {
return Ok(None);
};
let doc_alloc = Bump::new();
let parseable_document = ParseableDocument::new(document_from_db, &doc_alloc);
if with_fields {
let metadata_builder = MetadataBuilder::from_index(index, rtxn)?;
let fid_map_with_meta = FieldIdMapWithMetadata::new(fid_map.clone(), metadata_builder);
let fields = OwnedFields::new(&parseable_document, &fid_map_with_meta);
Ok(Some((parseable_document.to_value(), Some(fields.to_value()))))
} else {
Ok(Some((parseable_document.to_value(), None)))
}
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::Prompt; use super::Prompt;

View File

@ -143,14 +143,6 @@ impl JsonTemplate {
self.render(&search_data) self.render(&search_data)
} }
/// Renders any serializable value by converting it to a liquid object and rendering it with the template.
/// If its a map, values inside can be accessed directly by their keys.
pub fn render_serializable<T: Serialize>(&self, object: &T) -> Result<Value, Error> {
let object = liquid::to_object(object)
.map_err(|err| Error { template_error: err, path: ValuePath::new() })?;
self.render(&object)
}
/// The JSON value representing the underlying template /// The JSON value representing the underlying template
pub fn template(&self) -> &Value { pub fn template(&self) -> &Value {
&self.value &self.value