mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-27 00:31:02 +00:00
Add fields support
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -3770,6 +3770,7 @@ dependencies = [
|
||||
"itertools 0.14.0",
|
||||
"jsonwebtoken",
|
||||
"lazy_static",
|
||||
"liquid",
|
||||
"manifest-dir-macros",
|
||||
"maplit",
|
||||
"meili-snap",
|
||||
|
@ -423,6 +423,7 @@ InvalidRenderTemplateId , InvalidRequest , BAD_REQU
|
||||
InvalidRenderTemplateInline , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidRenderInput , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidRenderInputDocumentId , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidRenderInputFields , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidRenderInputInline , InvalidRequest , BAD_REQUEST ;
|
||||
RenderDocumentNotFound , InvalidRequest , NOT_FOUND ;
|
||||
TemplateParsingError , InvalidRequest , BAD_REQUEST ;
|
||||
|
@ -48,6 +48,7 @@ is-terminal = "0.4.16"
|
||||
itertools = "0.14.0"
|
||||
jsonwebtoken = "9.3.1"
|
||||
lazy_static = "1.5.0"
|
||||
liquid = "0.26.11"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
mimalloc = { version = "0.1.47", default-features = false }
|
||||
|
@ -2,39 +2,32 @@ use std::collections::BTreeMap;
|
||||
|
||||
use actix_web::web::{self, Data};
|
||||
use actix_web::{HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use deserr::Deserr;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use itertools::structs;
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||
use liquid::ValueView;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::{
|
||||
InvalidRenderInput, InvalidRenderInputDocumentId, InvalidRenderInputInline,
|
||||
InvalidRenderTemplate, InvalidRenderTemplateId, InvalidRenderTemplateInline,
|
||||
InvalidRenderInput, InvalidRenderInputDocumentId, InvalidRenderInputFields,
|
||||
InvalidRenderInputInline, InvalidRenderTemplate, InvalidRenderTemplateId,
|
||||
InvalidRenderTemplateInline,
|
||||
};
|
||||
use meilisearch_types::error::Code;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::milli::prompt::{get_document, get_inline_document_fields};
|
||||
use meilisearch_types::milli::vector::json_template::{self, JsonTemplate};
|
||||
use meilisearch_types::serde_cs::vec::CS;
|
||||
use meilisearch_types::{heed, milli, Index};
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
use tracing::debug;
|
||||
use utoipa::{IntoParams, OpenApi, ToSchema};
|
||||
use utoipa::{OpenApi, ToSchema};
|
||||
|
||||
use super::ActionPolicy;
|
||||
use crate::analytics::Analytics;
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::extractors::authentication::policies::DoubleActionPolicy;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::indexes::similar_analytics::{SimilarAggregator, SimilarGET, SimilarPOST};
|
||||
use crate::search::{
|
||||
add_search_rules, perform_similar, RankingScoreThresholdSimilar, RetrieveVectors, Route,
|
||||
SearchKind, SimilarQuery, SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
};
|
||||
|
||||
#[derive(OpenApi)]
|
||||
#[openapi(
|
||||
@ -179,6 +172,10 @@ enum RenderError {
|
||||
BothInlineDocAndDocId,
|
||||
TemplateParsing(json_template::Error),
|
||||
TemplateRendering(json_template::Error),
|
||||
|
||||
FieldsUnavailable,
|
||||
FieldsAlreadyPresent,
|
||||
FieldsWithoutDocument,
|
||||
}
|
||||
|
||||
impl From<heed::Error> for RenderError {
|
||||
@ -317,6 +314,18 @@ impl From<RenderError> for ResponseError {
|
||||
format!("Error rendering template: {}", err.rendering_error("input")),
|
||||
Code::TemplateRenderingError,
|
||||
),
|
||||
FieldsUnavailable => ResponseError::from_msg(
|
||||
String::from("Fields are not available on fragments.\n Hint: Remove the `insertFields` parameter or set it to `false`."),
|
||||
Code::InvalidRenderInputFields,
|
||||
),
|
||||
FieldsAlreadyPresent => ResponseError::from_msg(
|
||||
String::from("Fields were provided in the inline input but `insertFields` is set to `true`.\n Hint: Remove the `insertFields` parameter or set it to `false`."),
|
||||
Code::InvalidRenderInputFields,
|
||||
),
|
||||
FieldsWithoutDocument => ResponseError::from_msg(
|
||||
String::from("Fields were requested but no document was provided.\n Hint: Provide a document ID or inline document."),
|
||||
Code::InvalidRenderInputFields,
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -324,8 +333,8 @@ impl From<RenderError> for ResponseError {
|
||||
async fn render(index: Index, query: RenderQuery) -> Result<RenderResult, RenderError> {
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let template = match (query.template.inline, query.template.id) {
|
||||
(Some(inline), None) => inline,
|
||||
let (template, fields_available) = match (query.template.inline, query.template.id) {
|
||||
(Some(inline), None) => (inline, true),
|
||||
(None, Some(id)) => {
|
||||
let mut parts = id.split('.');
|
||||
|
||||
@ -368,8 +377,11 @@ async fn render(index: Index, query: RenderQuery) -> Result<RenderResult, Render
|
||||
});
|
||||
}
|
||||
|
||||
(
|
||||
serde_json::Value::String(
|
||||
embedding_config.config.prompt.template.clone(),
|
||||
),
|
||||
true,
|
||||
)
|
||||
}
|
||||
"indexingFragments" | "indexingfragments" => {
|
||||
@ -396,7 +408,7 @@ async fn render(index: Index, query: RenderQuery) -> Result<RenderResult, Render
|
||||
.indexing_fragments(),
|
||||
})?;
|
||||
|
||||
fragment.clone()
|
||||
(fragment.clone(), false)
|
||||
}
|
||||
"searchFragments" | "searchfragments" => {
|
||||
let fragment_name = parts.next().ok_or_else(|| MissingFragment {
|
||||
@ -422,7 +434,7 @@ async fn render(index: Index, query: RenderQuery) -> Result<RenderResult, Render
|
||||
.search_fragments(),
|
||||
})?;
|
||||
|
||||
fragment.clone()
|
||||
(fragment.clone(), false)
|
||||
}
|
||||
found => {
|
||||
return Err(UnknownTemplatePrefix {
|
||||
@ -449,7 +461,7 @@ async fn render(index: Index, query: RenderQuery) -> Result<RenderResult, Render
|
||||
|
||||
let chat_config = index.chat_config(&rtxn)?;
|
||||
|
||||
serde_json::Value::String(chat_config.prompt.template.clone())
|
||||
(serde_json::Value::String(chat_config.prompt.template.clone()), true)
|
||||
}
|
||||
"" => return Err(EmptyTemplateId),
|
||||
unknown => {
|
||||
@ -467,30 +479,60 @@ async fn render(index: Index, query: RenderQuery) -> Result<RenderResult, Render
|
||||
(None, None) => return Err(MissingTemplate),
|
||||
};
|
||||
|
||||
let fields_required = query.input.as_ref().and_then(|i| i.insert_fields);
|
||||
let fields_already_present = query
|
||||
.input
|
||||
.as_ref()
|
||||
.is_some_and(|i| i.inline.as_ref().is_some_and(|i| i.get("fields").is_some()));
|
||||
let fields_probably_used = template.as_str().is_none_or(|s| s.contains("fields"));
|
||||
let has_inline_doc = query
|
||||
.input
|
||||
.as_ref()
|
||||
.is_some_and(|i| i.inline.as_ref().is_some_and(|i| i.get("doc").is_some()));
|
||||
let has_document_id = query.input.as_ref().is_some_and(|i| i.document_id.is_some());
|
||||
let has_doc = has_inline_doc || has_document_id;
|
||||
let insert_fields = match fields_required {
|
||||
Some(insert_fields) => insert_fields,
|
||||
None => fields_available && has_doc && fields_probably_used && !fields_already_present,
|
||||
};
|
||||
if insert_fields && !fields_available {
|
||||
return Err(FieldsUnavailable);
|
||||
}
|
||||
if insert_fields && fields_already_present {
|
||||
return Err(FieldsAlreadyPresent);
|
||||
}
|
||||
if insert_fields && !has_doc {
|
||||
return Err(FieldsWithoutDocument);
|
||||
}
|
||||
if has_inline_doc && has_document_id {
|
||||
return Err(BothInlineDocAndDocId);
|
||||
}
|
||||
|
||||
let mut rendered = Value::Null;
|
||||
if let Some(input) = query.input {
|
||||
let mut media = input.inline.unwrap_or_default();
|
||||
let media = input.inline.unwrap_or_default();
|
||||
let mut object = liquid::to_object(&media).unwrap();
|
||||
|
||||
if let Some(doc) = media.get("doc") {
|
||||
if insert_fields {
|
||||
let fields = get_inline_document_fields(&index, &rtxn, doc)?;
|
||||
object.insert("fields".into(), fields.to_value());
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(document_id) = input.document_id {
|
||||
let internal_id = index
|
||||
.external_documents_ids()
|
||||
.get(&rtxn, &document_id)?
|
||||
.ok_or_else(|| DocumentNotFound(document_id.to_string()))?;
|
||||
let (document, fields) = get_document(&index, &rtxn, &document_id, insert_fields)?
|
||||
.ok_or_else(|| DocumentNotFound(document_id))?;
|
||||
|
||||
let document = index.document(&rtxn, internal_id)?;
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
let document = milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
|
||||
let document = Value::Object(document);
|
||||
|
||||
if media.insert(String::from("doc"), document).is_some() {
|
||||
return Err(BothInlineDocAndDocId);
|
||||
object.insert("doc".into(), document);
|
||||
if let Some(fields) = fields {
|
||||
object.insert("fields".into(), fields);
|
||||
}
|
||||
}
|
||||
|
||||
let json_template = JsonTemplate::new(template.clone()).map_err(TemplateParsing)?;
|
||||
|
||||
rendered = json_template.render_serializable(&media).map_err(TemplateRendering)?;
|
||||
rendered = json_template.render(&object).map_err(TemplateRendering)?;
|
||||
}
|
||||
|
||||
Ok(RenderResult { template, rendered })
|
||||
@ -519,6 +561,8 @@ pub struct RenderQueryTemplate {
|
||||
pub struct RenderQueryInput {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidRenderInputDocumentId>)]
|
||||
document_id: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidRenderInputFields>)]
|
||||
insert_fields: Option<bool>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidRenderInputInline>)]
|
||||
inline: Option<BTreeMap<String, serde_json::Value>>,
|
||||
}
|
||||
|
@ -12,6 +12,7 @@ use liquid::{ObjectView, ValueView};
|
||||
use rustc_hash::FxBuildHasher;
|
||||
use serde_json::value::RawValue;
|
||||
|
||||
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
||||
use crate::FieldsIdsMap;
|
||||
|
||||
@ -143,6 +144,112 @@ impl ValueView for Document<'_> {
|
||||
/// Implementation for any type that implements the Document trait
|
||||
use crate::update::new::document::Document as DocumentTrait;
|
||||
|
||||
pub struct JsonDocument {
|
||||
object: liquid::Object,
|
||||
cached: BTreeMap<String, Box<RawValue>>,
|
||||
}
|
||||
|
||||
impl JsonDocument {
|
||||
pub fn new(value: &serde_json::Value) -> Self {
|
||||
let to_string = serde_json::to_string(&value)
|
||||
.expect("JsonDocument should only be created with valid JSON"); // TODO: Remove panic
|
||||
let back_to_value: BTreeMap<String, Box<RawValue>> = serde_json::from_str(&to_string)
|
||||
.expect("JsonDocument should only be created with valid JSON");
|
||||
let object =
|
||||
liquid::to_object(&value).expect("JsonDocument should only be created with valid JSON");
|
||||
Self { object, cached: back_to_value }
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for JsonDocument {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.object.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> DocumentTrait<'a> for &'a JsonDocument {
|
||||
fn iter_top_level_fields(
|
||||
&self,
|
||||
) -> impl Iterator<Item = crate::Result<(&'a str, &'a RawValue)>> {
|
||||
self.cached.iter().filter_map(|(k, v)| {
|
||||
if k == RESERVED_VECTORS_FIELD_NAME || k == RESERVED_GEO_FIELD_NAME {
|
||||
None
|
||||
} else {
|
||||
Some(Ok((k.as_str(), v.as_ref())))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn top_level_fields_count(&self) -> usize {
|
||||
self.cached.len()
|
||||
- self.cached.contains_key(RESERVED_VECTORS_FIELD_NAME) as usize
|
||||
- self.cached.contains_key(RESERVED_GEO_FIELD_NAME) as usize
|
||||
}
|
||||
|
||||
fn top_level_field(&self, k: &str) -> crate::Result<Option<&'a RawValue>> {
|
||||
if k == RESERVED_VECTORS_FIELD_NAME || k == RESERVED_GEO_FIELD_NAME {
|
||||
return Ok(None);
|
||||
}
|
||||
Ok(self.cached.get(k).map(|r| r.as_ref()))
|
||||
}
|
||||
|
||||
fn vectors_field(&self) -> crate::Result<Option<&'a RawValue>> {
|
||||
Ok(self.cached.get(RESERVED_VECTORS_FIELD_NAME).map(|r| r.as_ref()))
|
||||
}
|
||||
|
||||
fn geo_field(&self) -> crate::Result<Option<&'a RawValue>> {
|
||||
Ok(self.cached.get(RESERVED_GEO_FIELD_NAME).map(|r| r.as_ref()))
|
||||
}
|
||||
}
|
||||
|
||||
impl ObjectView for JsonDocument {
|
||||
fn as_value(&self) -> &dyn ValueView {
|
||||
self.object.as_value()
|
||||
}
|
||||
fn size(&self) -> i64 {
|
||||
self.object.size()
|
||||
}
|
||||
fn keys<'k>(&'k self) -> Box<dyn Iterator<Item = KStringCow<'k>> + 'k> {
|
||||
Box::new(self.object.keys().map(|s| s.into()))
|
||||
}
|
||||
fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
|
||||
Box::new(self.object.values().map(|v| v.as_view()))
|
||||
}
|
||||
fn iter<'k>(&'k self) -> Box<dyn Iterator<Item = (KStringCow<'k>, &'k dyn ValueView)> + 'k> {
|
||||
Box::new(self.object.iter().map(|(k, v)| (k.into(), v.as_view())))
|
||||
}
|
||||
fn contains_key(&self, index: &str) -> bool {
|
||||
self.object.contains_key(index)
|
||||
}
|
||||
fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> {
|
||||
self.object.get(index).map(|v| v.as_view())
|
||||
}
|
||||
}
|
||||
|
||||
impl ValueView for JsonDocument {
|
||||
fn as_debug(&self) -> &dyn fmt::Debug {
|
||||
self.object.as_debug()
|
||||
}
|
||||
fn render(&self) -> DisplayCow<'_> {
|
||||
self.object.render()
|
||||
}
|
||||
fn source(&self) -> DisplayCow<'_> {
|
||||
self.object.source()
|
||||
}
|
||||
fn type_name(&self) -> &'static str {
|
||||
self.object.type_name()
|
||||
}
|
||||
fn query_state(&self, state: State) -> bool {
|
||||
self.object.query_state(state)
|
||||
}
|
||||
fn to_kstr(&self) -> KStringCow<'_> {
|
||||
self.object.to_kstr()
|
||||
}
|
||||
fn to_value(&self) -> LiquidValue {
|
||||
self.object.to_value()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ParseableDocument<'a, 'doc, D: DocumentTrait<'a> + Debug> {
|
||||
document: D,
|
||||
|
@ -12,11 +12,16 @@ use bumpalo::Bump;
|
||||
pub(crate) use document::{Document, ParseableDocument};
|
||||
use error::{NewPromptError, RenderPromptError};
|
||||
pub use fields::{BorrowedFields, OwnedFields};
|
||||
use heed::RoTxn;
|
||||
use liquid::model::Value as LiquidValue;
|
||||
use liquid::ValueView;
|
||||
|
||||
pub use self::context::Context;
|
||||
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
|
||||
use crate::prompt::document::JsonDocument;
|
||||
use crate::update::del_add::DelAdd;
|
||||
use crate::GlobalFieldsIdsMap;
|
||||
use crate::update::new::document::DocumentFromDb;
|
||||
use crate::{GlobalFieldsIdsMap, Index, MetadataBuilder};
|
||||
|
||||
pub struct Prompt {
|
||||
template: liquid::Template,
|
||||
@ -164,6 +169,47 @@ fn truncate(s: &mut String, max_bytes: usize) {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_inline_document_fields(
|
||||
index: &Index,
|
||||
rtxn: &RoTxn<'_>,
|
||||
inline_doc: &serde_json::Value,
|
||||
) -> Result<LiquidValue, crate::Error> {
|
||||
let fid_map_with_meta = index.fields_ids_map_with_metadata(rtxn)?;
|
||||
let inline_doc = JsonDocument::new(inline_doc);
|
||||
let fields = OwnedFields::new(&inline_doc, &fid_map_with_meta);
|
||||
|
||||
Ok(fields.to_value())
|
||||
}
|
||||
|
||||
pub fn get_document(
|
||||
index: &Index,
|
||||
rtxn: &RoTxn<'_>,
|
||||
external_id: &str,
|
||||
with_fields: bool,
|
||||
) -> Result<Option<(LiquidValue, Option<LiquidValue>)>, crate::Error> {
|
||||
let Some(internal_id) = index.external_documents_ids().get(rtxn, external_id)? else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let fid_map = index.fields_ids_map(rtxn)?;
|
||||
let Some(document_from_db) = DocumentFromDb::new(internal_id, rtxn, index, &fid_map)? else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let doc_alloc = Bump::new();
|
||||
let parseable_document = ParseableDocument::new(document_from_db, &doc_alloc);
|
||||
|
||||
if with_fields {
|
||||
let metadata_builder = MetadataBuilder::from_index(index, rtxn)?;
|
||||
let fid_map_with_meta = FieldIdMapWithMetadata::new(fid_map.clone(), metadata_builder);
|
||||
let fields = OwnedFields::new(&parseable_document, &fid_map_with_meta);
|
||||
|
||||
Ok(Some((parseable_document.to_value(), Some(fields.to_value()))))
|
||||
} else {
|
||||
Ok(Some((parseable_document.to_value(), None)))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::Prompt;
|
||||
|
@ -143,14 +143,6 @@ impl JsonTemplate {
|
||||
self.render(&search_data)
|
||||
}
|
||||
|
||||
/// Renders any serializable value by converting it to a liquid object and rendering it with the template.
|
||||
/// If its a map, values inside can be accessed directly by their keys.
|
||||
pub fn render_serializable<T: Serialize>(&self, object: &T) -> Result<Value, Error> {
|
||||
let object = liquid::to_object(object)
|
||||
.map_err(|err| Error { template_error: err, path: ValuePath::new() })?;
|
||||
self.render(&object)
|
||||
}
|
||||
|
||||
/// The JSON value representing the underlying template
|
||||
pub fn template(&self) -> &Value {
|
||||
&self.value
|
||||
|
Reference in New Issue
Block a user