diff --git a/crates/dump/src/reader/compat/v5_to_v6.rs b/crates/dump/src/reader/compat/v5_to_v6.rs index 14570c258..f7bda81c6 100644 --- a/crates/dump/src/reader/compat/v5_to_v6.rs +++ b/crates/dump/src/reader/compat/v5_to_v6.rs @@ -405,6 +405,7 @@ impl From> for v6::Settings { search_cutoff_ms: v6::Setting::NotSet, facet_search: v6::Setting::NotSet, prefix_search: v6::Setting::NotSet, + chat: v6::Setting::NotSet, _kind: std::marker::PhantomData, } } diff --git a/crates/meilisearch-types/src/error.rs b/crates/meilisearch-types/src/error.rs index 6c547d51e..172656237 100644 --- a/crates/meilisearch-types/src/error.rs +++ b/crates/meilisearch-types/src/error.rs @@ -387,7 +387,8 @@ VectorEmbeddingError , InvalidRequest , BAD_REQUEST ; NotFoundSimilarId , InvalidRequest , BAD_REQUEST ; InvalidDocumentEditionContext , InvalidRequest , BAD_REQUEST ; InvalidDocumentEditionFunctionFilter , InvalidRequest , BAD_REQUEST ; -EditDocumentsByFunctionError , InvalidRequest , BAD_REQUEST +EditDocumentsByFunctionError , InvalidRequest , BAD_REQUEST ; +InvalidSettingsIndexChat , InvalidRequest , BAD_REQUEST } impl ErrorCode for JoinError { diff --git a/crates/meilisearch-types/src/settings.rs b/crates/meilisearch-types/src/settings.rs index edb136567..b2f0c2f5b 100644 --- a/crates/meilisearch-types/src/settings.rs +++ b/crates/meilisearch-types/src/settings.rs @@ -11,6 +11,7 @@ use fst::IntoStreamer; use milli::disabled_typos_terms::DisabledTyposTerms; use milli::index::{IndexEmbeddingConfig, PrefixSearch}; use milli::proximity::ProximityPrecision; +pub use milli::update::ChatSettings; use milli::update::Setting; use milli::{Criterion, CriterionError, FilterableAttributesRule, Index, DEFAULT_VALUES_PER_FACET}; use serde::{Deserialize, Serialize, Serializer}; @@ -199,72 +200,86 @@ pub struct Settings { #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>, example = json!(["id", "title", "description", "url"]))] pub displayed_attributes: WildcardSetting, + /// Fields in which to search for matching query words sorted by order of importance. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>, example = json!(["title", "description"]))] pub searchable_attributes: WildcardSetting, + /// Attributes to use for faceting and filtering. See [Filtering and Faceted Search](https://www.meilisearch.com/docs/learn/filtering_and_sorting/search_with_facet_filters). #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>, example = json!(["release_date", "genre"]))] pub filterable_attributes: Setting>, + /// Attributes to use when sorting search results. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>, example = json!(["release_date"]))] pub sortable_attributes: Setting>, + /// List of ranking rules sorted by order of importance. The order is customizable. /// [A list of ordered built-in ranking rules](https://www.meilisearch.com/docs/learn/relevancy/relevancy). #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>, example = json!([RankingRuleView::Words, RankingRuleView::Typo, RankingRuleView::Proximity, RankingRuleView::Attribute, RankingRuleView::Exactness]))] pub ranking_rules: Setting>, + /// List of words ignored when present in search queries. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>, example = json!(["the", "a", "them", "their"]))] pub stop_words: Setting>, + /// List of characters not delimiting where one term begins and ends. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>, example = json!([" ", "\n"]))] pub non_separator_tokens: Setting>, + /// List of characters delimiting where one term begins and ends. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>, example = json!(["S"]))] pub separator_tokens: Setting>, + /// List of strings Meilisearch should parse as a single term. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>, example = json!(["iPhone pro"]))] pub dictionary: Setting>, + /// List of associated words treated similarly. A word associated to an array of word as synonyms. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>>, example = json!({ "he": ["she", "they", "them"], "phone": ["iPhone", "android"]}))] pub synonyms: Setting>>, + /// Search returns documents with distinct (different) values of the given field. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option, example = json!("sku"))] pub distinct_attribute: Setting, + /// Precision level when calculating the proximity ranking rule. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option, example = json!(ProximityPrecisionView::ByAttribute))] pub proximity_precision: Setting, + /// Customize typo tolerance feature. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option, example = json!({ "enabled": true, "disableOnAttributes": ["title"]}))] pub typo_tolerance: Setting, + /// Faceting settings. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option, example = json!({ "maxValuesPerFacet": 10, "sortFacetValuesBy": { "genre": FacetValuesSort::Count }}))] pub faceting: Setting, + /// Pagination settings. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] @@ -276,24 +291,34 @@ pub struct Settings { #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>)] pub embedders: Setting>, + /// Maximum duration of a search query. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option, example = json!(50))] pub search_cutoff_ms: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option>, example = json!(50))] pub localized_attributes: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option, example = json!(true))] pub facet_search: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option, example = json!("Hemlo"))] pub prefix_search: Setting, + /// Customize the chat prompting. + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option)] + pub chat: Setting, + #[serde(skip)] #[deserr(skip)] pub _kind: PhantomData, @@ -359,6 +384,7 @@ impl Settings { localized_attributes: Setting::Reset, facet_search: Setting::Reset, prefix_search: Setting::Reset, + chat: Setting::Reset, _kind: PhantomData, } } @@ -385,6 +411,7 @@ impl Settings { localized_attributes: localized_attributes_rules, facet_search, prefix_search, + chat, _kind, } = self; @@ -409,6 +436,7 @@ impl Settings { localized_attributes: localized_attributes_rules, facet_search, prefix_search, + chat, _kind: PhantomData, } } @@ -459,6 +487,7 @@ impl Settings { localized_attributes: self.localized_attributes, facet_search: self.facet_search, prefix_search: self.prefix_search, + chat: self.chat, _kind: PhantomData, } } @@ -533,8 +562,9 @@ impl Settings { Setting::Set(this) } }, - prefix_search: other.prefix_search.or(self.prefix_search), facet_search: other.facet_search.or(self.facet_search), + prefix_search: other.prefix_search.or(self.prefix_search), + chat: other.chat.clone().or(self.chat.clone()), _kind: PhantomData, } } @@ -573,6 +603,7 @@ pub fn apply_settings_to_builder( localized_attributes: localized_attributes_rules, facet_search, prefix_search, + chat, _kind, } = settings; @@ -783,6 +814,12 @@ pub fn apply_settings_to_builder( Setting::Reset => builder.reset_facet_search(), Setting::NotSet => (), } + + match chat { + Setting::Set(chat) => builder.set_chat(chat.clone()), + Setting::Reset => builder.reset_chat(), + Setting::NotSet => (), + } } pub enum SecretPolicy { @@ -880,14 +917,11 @@ pub fn settings( }) .collect(); let embedders = Setting::Set(embedders); - let search_cutoff_ms = index.search_cutoff(rtxn)?; - let localized_attributes_rules = index.localized_attributes_rules(rtxn)?; - let prefix_search = index.prefix_search(rtxn)?.map(PrefixSearchSettings::from); - let facet_search = index.facet_search(rtxn)?; + let chat = index.chat_config(rtxn).map(ChatSettings::from)?; let mut settings = Settings { displayed_attributes: match displayed_attributes { @@ -925,8 +959,9 @@ pub fn settings( Some(rules) => Setting::Set(rules.into_iter().map(|r| r.into()).collect()), None => Setting::Reset, }, - prefix_search: Setting::Set(prefix_search.unwrap_or_default()), facet_search: Setting::Set(facet_search), + prefix_search: Setting::Set(prefix_search.unwrap_or_default()), + chat: Setting::Set(chat), _kind: PhantomData, }; @@ -1154,6 +1189,7 @@ pub(crate) mod test { search_cutoff_ms: Setting::NotSet, facet_search: Setting::NotSet, prefix_search: Setting::NotSet, + chat: Setting::NotSet, _kind: PhantomData::, }; @@ -1185,6 +1221,8 @@ pub(crate) mod test { search_cutoff_ms: Setting::NotSet, facet_search: Setting::NotSet, prefix_search: Setting::NotSet, + chat: Setting::NotSet, + _kind: PhantomData::, }; diff --git a/crates/meilisearch/src/routes/chat.rs b/crates/meilisearch/src/routes/chat.rs index b3a67ff10..d85c14c36 100644 --- a/crates/meilisearch/src/routes/chat.rs +++ b/crates/meilisearch/src/routes/chat.rs @@ -30,7 +30,7 @@ use serde_json::json; use tokio::runtime::Handle; use tokio::sync::mpsc::error::SendError; -use super::settings::chat::{ChatPrompts, ChatSettings}; +use super::settings::chat::{ChatPrompts, GlobalChatSettings}; use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::{extract_token_from_request, GuardedData, Policy as _}; use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS; @@ -216,7 +216,7 @@ async fn non_streamed_chat( let chat_settings = match index_scheduler.chat_settings().unwrap() { Some(value) => serde_json::from_value(value).unwrap(), - None => ChatSettings::default(), + None => GlobalChatSettings::default(), }; let mut config = OpenAIConfig::default(); @@ -307,7 +307,7 @@ async fn streamed_chat( let chat_settings = match index_scheduler.chat_settings().unwrap() { Some(value) => serde_json::from_value(value).unwrap(), - None => ChatSettings::default(), + None => GlobalChatSettings::default(), }; let mut config = OpenAIConfig::default(); diff --git a/crates/meilisearch/src/routes/indexes/settings.rs b/crates/meilisearch/src/routes/indexes/settings.rs index 92b018c8c..a35ae5136 100644 --- a/crates/meilisearch/src/routes/indexes/settings.rs +++ b/crates/meilisearch/src/routes/indexes/settings.rs @@ -6,7 +6,7 @@ use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::settings::{ - settings, SecretPolicy, SettingEmbeddingSettings, Settings, Unchecked, + settings, ChatSettings, SecretPolicy, SettingEmbeddingSettings, Settings, Unchecked, }; use meilisearch_types::tasks::KindWithContent; use tracing::debug; @@ -508,6 +508,17 @@ make_setting_routes!( camelcase_attr: "prefixSearch", analytics: PrefixSearchAnalytics }, + { + route: "/chat", + update_verb: put, + value_type: ChatSettings, + err_type: meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsIndexChat, + >, + attr: chat, + camelcase_attr: "chat", + analytics: ChatAnalytics + }, ); #[utoipa::path( @@ -597,6 +608,7 @@ pub async fn update_all( ), facet_search: FacetSearchAnalytics::new(new_settings.facet_search.as_ref().set()), prefix_search: PrefixSearchAnalytics::new(new_settings.prefix_search.as_ref().set()), + chat: ChatAnalytics::new(new_settings.chat.as_ref().set()), }, &req, ); diff --git a/crates/meilisearch/src/routes/indexes/settings_analytics.rs b/crates/meilisearch/src/routes/indexes/settings_analytics.rs index 41df91966..1b8d0e244 100644 --- a/crates/meilisearch/src/routes/indexes/settings_analytics.rs +++ b/crates/meilisearch/src/routes/indexes/settings_analytics.rs @@ -10,8 +10,8 @@ use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView}; use meilisearch_types::milli::update::Setting; use meilisearch_types::milli::FilterableAttributesRule; use meilisearch_types::settings::{ - FacetingSettings, PaginationSettings, PrefixSearchSettings, ProximityPrecisionView, - RankingRuleView, SettingEmbeddingSettings, TypoSettings, + ChatSettings, FacetingSettings, PaginationSettings, PrefixSearchSettings, + ProximityPrecisionView, RankingRuleView, SettingEmbeddingSettings, TypoSettings, }; use serde::Serialize; @@ -39,6 +39,7 @@ pub struct SettingsAnalytics { pub non_separator_tokens: NonSeparatorTokensAnalytics, pub facet_search: FacetSearchAnalytics, pub prefix_search: PrefixSearchAnalytics, + pub chat: ChatAnalytics, } impl Aggregate for SettingsAnalytics { @@ -198,6 +199,7 @@ impl Aggregate for SettingsAnalytics { set: new.prefix_search.set | self.prefix_search.set, value: new.prefix_search.value.or(self.prefix_search.value), }, + chat: ChatAnalytics { set: new.chat.set | self.chat.set }, }) } @@ -676,3 +678,18 @@ impl PrefixSearchAnalytics { SettingsAnalytics { prefix_search: self, ..Default::default() } } } + +#[derive(Serialize, Default)] +pub struct ChatAnalytics { + pub set: bool, +} + +impl ChatAnalytics { + pub fn new(settings: Option<&ChatSettings>) -> Self { + Self { set: settings.is_some() } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { chat: self, ..Default::default() } + } +} diff --git a/crates/meilisearch/src/routes/settings/chat.rs b/crates/meilisearch/src/routes/settings/chat.rs index d8be27ab3..a971ad102 100644 --- a/crates/meilisearch/src/routes/settings/chat.rs +++ b/crates/meilisearch/src/routes/settings/chat.rs @@ -27,7 +27,7 @@ async fn get_settings( ) -> Result { let settings = match index_scheduler.chat_settings()? { Some(value) => serde_json::from_value(value).unwrap(), - None => ChatSettings::default(), + None => GlobalChatSettings::default(), }; Ok(HttpResponse::Ok().json(settings)) } @@ -37,7 +37,7 @@ async fn patch_settings( ActionPolicy<{ actions::CHAT_SETTINGS_UPDATE }>, Data, >, - web::Json(chat_settings): web::Json, + web::Json(chat_settings): web::Json, ) -> Result { let chat_settings = serde_json::to_value(chat_settings).unwrap(); index_scheduler.put_chat_settings(&chat_settings)?; @@ -46,7 +46,7 @@ async fn patch_settings( #[derive(Debug, Serialize, Deserialize)] #[serde(deny_unknown_fields, rename_all = "camelCase")] -pub struct ChatSettings { +pub struct GlobalChatSettings { pub source: String, pub base_api: Option, pub api_key: Option, @@ -91,9 +91,9 @@ const DEFAULT_SEARCH_IN_INDEX_INDEX_PARAMETER_TOOL_DESCRIPTION: &str = "The name of the index to search within. An index is a collection of documents organized for search. \ Selecting the right index ensures the most relevant results for the user query"; -impl Default for ChatSettings { +impl Default for GlobalChatSettings { fn default() -> Self { - ChatSettings { + GlobalChatSettings { source: "openai".to_string(), base_api: None, api_key: None, diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index d0cd5c862..a5145cb0b 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -23,6 +23,7 @@ use crate::heed_codec::facet::{ use crate::heed_codec::version::VersionCodec; use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec}; use crate::order_by_map::OrderByMap; +use crate::prompt::PromptData; use crate::proximity::ProximityPrecision; use crate::vector::{ArroyStats, ArroyWrapper, Embedding, EmbeddingConfig}; use crate::{ @@ -79,6 +80,7 @@ pub mod main_key { pub const PREFIX_SEARCH: &str = "prefix_search"; pub const DOCUMENTS_STATS: &str = "documents_stats"; pub const DISABLED_TYPOS_TERMS: &str = "disabled_typos_terms"; + pub const CHAT: &str = "chat"; } pub mod db_name { @@ -1691,6 +1693,25 @@ impl Index { self.main.remap_key_type::().delete(txn, main_key::FACET_SEARCH) } + pub fn chat_config(&self, txn: &RoTxn<'_>) -> heed::Result { + self.main + .remap_types::>() + .get(txn, main_key::CHAT) + .map(|o| o.unwrap_or_default()) + } + + pub(crate) fn put_chat_config( + &self, + txn: &mut RwTxn<'_>, + val: &ChatConfig, + ) -> heed::Result<()> { + self.main.remap_types::>().put(txn, main_key::CHAT, &val) + } + + pub(crate) fn delete_chat_config(&self, txn: &mut RwTxn<'_>) -> heed::Result { + self.main.remap_key_type::().delete(txn, main_key::CHAT) + } + pub fn localized_attributes_rules( &self, rtxn: &RoTxn<'_>, @@ -1917,6 +1938,13 @@ pub struct IndexEmbeddingConfig { pub user_provided: RoaringBitmap, } +#[derive(Debug, Default, Deserialize, Serialize)] +pub struct ChatConfig { + pub description: String, + /// Contains the document template and max template length. + pub prompt: PromptData, +} + #[derive(Debug, Deserialize, Serialize)] pub struct PrefixSettings { pub prefix_count_threshold: usize, diff --git a/crates/milli/src/update/chat.rs b/crates/milli/src/update/chat.rs new file mode 100644 index 000000000..44e646f6d --- /dev/null +++ b/crates/milli/src/update/chat.rs @@ -0,0 +1,45 @@ +use deserr::Deserr; +use serde::{Deserialize, Serialize}; +use utoipa::ToSchema; + +use crate::index::ChatConfig; +use crate::prompt::{default_max_bytes, PromptData}; +use crate::update::Setting; + +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)] +#[serde(deny_unknown_fields, rename_all = "camelCase")] +#[deserr(deny_unknown_fields, rename_all = camelCase)] +pub struct ChatSettings { + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + pub description: Setting, + + /// A liquid template used to render documents to a text that can be embedded. + /// + /// Meillisearch interpolates the template for each document and sends the resulting text to the embedder. + /// The embedder then generates document vectors based on this text. + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + pub document_template: Setting, + + /// Rendered texts are truncated to this size. Defaults to 400. + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + pub document_template_max_bytes: Setting, +} + +impl From for ChatSettings { + fn from(config: ChatConfig) -> Self { + let ChatConfig { description, prompt: PromptData { template, max_bytes } } = config; + ChatSettings { + description: Setting::Set(description), + document_template: Setting::Set(template), + document_template_max_bytes: Setting::Set( + max_bytes.unwrap_or(default_max_bytes()).get(), + ), + } + } +} diff --git a/crates/milli/src/update/mod.rs b/crates/milli/src/update/mod.rs index 9a783ffd2..5acb00113 100644 --- a/crates/milli/src/update/mod.rs +++ b/crates/milli/src/update/mod.rs @@ -1,4 +1,5 @@ pub use self::available_ids::AvailableIds; +pub use self::chat::ChatSettings; pub use self::clear_documents::ClearDocuments; pub use self::concurrent_available_ids::ConcurrentAvailableIds; pub use self::facet::bulk::FacetsUpdateBulk; @@ -13,6 +14,7 @@ pub use self::words_prefix_integer_docids::WordPrefixIntegerDocids; pub use self::words_prefixes_fst::WordsPrefixesFst; mod available_ids; +mod chat; mod clear_documents; mod concurrent_available_ids; pub(crate) mod del_add; diff --git a/crates/milli/src/update/settings.rs b/crates/milli/src/update/settings.rs index fce2989b1..697bf8168 100644 --- a/crates/milli/src/update/settings.rs +++ b/crates/milli/src/update/settings.rs @@ -13,7 +13,7 @@ use time::OffsetDateTime; use super::del_add::{DelAdd, DelAddOperation}; use super::index_documents::{IndexDocumentsConfig, Transform}; -use super::IndexerConfig; +use super::{ChatSettings, IndexerConfig}; use crate::attribute_patterns::PatternMatch; use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::criterion::Criterion; @@ -22,11 +22,11 @@ use crate::error::UserError; use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; use crate::filterable_attributes_rules::match_faceted_field; use crate::index::{ - IndexEmbeddingConfig, PrefixSearch, DEFAULT_MIN_WORD_LEN_ONE_TYPO, + ChatConfig, IndexEmbeddingConfig, PrefixSearch, DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS, }; use crate::order_by_map::OrderByMap; -use crate::prompt::default_max_bytes; +use crate::prompt::{default_max_bytes, PromptData}; use crate::proximity::ProximityPrecision; use crate::update::index_documents::IndexDocumentsMethod; use crate::update::{IndexDocuments, UpdateIndexingStep}; @@ -185,6 +185,7 @@ pub struct Settings<'a, 't, 'i> { localized_attributes_rules: Setting>, prefix_search: Setting, facet_search: Setting, + chat: Setting, } impl<'a, 't, 'i> Settings<'a, 't, 'i> { @@ -223,6 +224,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { localized_attributes_rules: Setting::NotSet, prefix_search: Setting::NotSet, facet_search: Setting::NotSet, + chat: Setting::NotSet, indexer_config, } } @@ -453,6 +455,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.facet_search = Setting::Reset; } + pub fn set_chat(&mut self, value: ChatSettings) { + self.chat = Setting::Set(value); + } + + pub fn reset_chat(&mut self) { + self.chat = Setting::Reset; + } + #[tracing::instrument( level = "trace" skip(self, progress_callback, should_abort, settings_diff), @@ -1238,6 +1248,45 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { Ok(()) } + fn update_chat_config(&mut self) -> heed::Result { + match &mut self.chat { + Setting::Set(ChatSettings { + description: new_description, + document_template: new_document_template, + document_template_max_bytes: new_document_template_max_bytes, + }) => { + let mut old = self.index.chat_config(self.wtxn)?; + let ChatConfig { + ref mut description, + prompt: PromptData { ref mut template, ref mut max_bytes }, + } = old; + + match new_description { + Setting::Set(d) => *description = d.clone(), + Setting::Reset => *description = Default::default(), + Setting::NotSet => (), + } + + match new_document_template { + Setting::Set(dt) => *template = dt.clone(), + Setting::Reset => *template = Default::default(), + Setting::NotSet => (), + } + + match new_document_template_max_bytes { + Setting::Set(m) => *max_bytes = NonZeroUsize::new(*m), + Setting::Reset => *max_bytes = Some(default_max_bytes()), + Setting::NotSet => (), + } + + self.index.put_chat_config(self.wtxn, &old)?; + Ok(true) + } + Setting::Reset => self.index.delete_chat_config(self.wtxn), + Setting::NotSet => Ok(false), + } + } + pub fn execute(mut self, progress_callback: FP, should_abort: FA) -> Result<()> where FP: Fn(UpdateIndexingStep) + Sync, @@ -1275,6 +1324,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.update_facet_search()?; self.update_localized_attributes_rules()?; self.update_disabled_typos_terms()?; + self.update_chat_config()?; let embedding_config_updates = self.update_embedding_configs()?; diff --git a/crates/milli/src/vector/settings.rs b/crates/milli/src/vector/settings.rs index 3948ad4d8..712c1faa5 100644 --- a/crates/milli/src/vector/settings.rs +++ b/crates/milli/src/vector/settings.rs @@ -33,6 +33,7 @@ pub struct EmbeddingSettings { /// /// - Defaults to `openAi` pub source: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -55,6 +56,7 @@ pub struct EmbeddingSettings { /// - For source `openAi`, defaults to `text-embedding-3-small` /// - For source `huggingFace`, defaults to `BAAI/bge-base-en-v1.5` pub model: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -75,6 +77,7 @@ pub struct EmbeddingSettings { /// - When `model` is set to default, defaults to `617ca489d9e86b49b8167676d8220688b99db36e` /// - Otherwise, defaults to `null` pub revision: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -96,6 +99,7 @@ pub struct EmbeddingSettings { /// /// - Embedders created before this parameter was available default to `forceMean` to preserve the existing behavior. pub pooling: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -118,6 +122,7 @@ pub struct EmbeddingSettings { /// /// - This setting is partially hidden when returned by the settings pub api_key: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -141,6 +146,7 @@ pub struct EmbeddingSettings { /// - For source `openAi`, the dimensions is the maximum allowed by the model. /// - For sources `ollama` and `rest`, the dimensions are inferred by embedding a sample text. pub dimensions: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -167,6 +173,7 @@ pub struct EmbeddingSettings { /// first enabling it. If you are unsure of whether the performance-relevancy tradeoff is right for you, /// we recommend to use this parameter on a test index first. pub binary_quantized: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -183,6 +190,7 @@ pub struct EmbeddingSettings { /// /// - 🏗️ When modified, embeddings are regenerated for documents whose rendering through the template produces a different text. pub document_template: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -201,6 +209,7 @@ pub struct EmbeddingSettings { /// /// - Defaults to 400 pub document_template_max_bytes: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -219,6 +228,7 @@ pub struct EmbeddingSettings { /// - 🌱 When modified for source `openAi`, embeddings are never regenerated /// - 🏗️ When modified for sources `ollama` and `rest`, embeddings are always regenerated pub url: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -236,6 +246,7 @@ pub struct EmbeddingSettings { /// /// - 🏗️ Changing the value of this parameter always regenerates embeddings pub request: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -253,6 +264,7 @@ pub struct EmbeddingSettings { /// /// - 🏗️ Changing the value of this parameter always regenerates embeddings pub response: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option>)]