Expose new chat settings routes

This commit is contained in:
Clément Renault 2025-05-15 17:40:55 +02:00 committed by Kerollmops
parent e603e221d5
commit eb4445070f
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
6 changed files with 191 additions and 92 deletions

View File

@ -53,7 +53,7 @@ use flate2::Compression;
use meilisearch_types::batches::Batch; use meilisearch_types::batches::Batch;
use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures}; use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
use meilisearch_types::heed::byteorder::BE; use meilisearch_types::heed::byteorder::BE;
use meilisearch_types::heed::types::{Str, I128}; use meilisearch_types::heed::types::{SerdeJson, Str, I128};
use meilisearch_types::heed::{self, Database, Env, RoTxn, WithoutTls}; use meilisearch_types::heed::{self, Database, Env, RoTxn, WithoutTls};
use meilisearch_types::milli::index::IndexEmbeddingConfig; use meilisearch_types::milli::index::IndexEmbeddingConfig;
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
@ -153,8 +153,8 @@ pub struct IndexScheduler {
/// In charge of fetching and setting the status of experimental features. /// In charge of fetching and setting the status of experimental features.
features: features::FeatureData, features: features::FeatureData,
/// Stores the custom prompts for the chat /// Stores the custom chat prompts and other settings of the indexes.
chat_prompts: Database<Str, Str>, chat_settings: Database<Str, SerdeJson<serde_json::Value>>,
/// Everything related to the processing of the tasks /// Everything related to the processing of the tasks
pub scheduler: scheduler::Scheduler, pub scheduler: scheduler::Scheduler,
@ -214,7 +214,7 @@ impl IndexScheduler {
#[cfg(test)] #[cfg(test)]
run_loop_iteration: self.run_loop_iteration.clone(), run_loop_iteration: self.run_loop_iteration.clone(),
features: self.features.clone(), features: self.features.clone(),
chat_prompts: self.chat_prompts.clone(), chat_settings: self.chat_settings.clone(),
} }
} }
@ -277,7 +277,7 @@ impl IndexScheduler {
let features = features::FeatureData::new(&env, &mut wtxn, options.instance_features)?; let features = features::FeatureData::new(&env, &mut wtxn, options.instance_features)?;
let queue = Queue::new(&env, &mut wtxn, &options)?; let queue = Queue::new(&env, &mut wtxn, &options)?;
let index_mapper = IndexMapper::new(&env, &mut wtxn, &options, budget)?; let index_mapper = IndexMapper::new(&env, &mut wtxn, &options, budget)?;
let chat_prompts = env.create_database(&mut wtxn, Some("chat-prompts"))?; let chat_settings = env.create_database(&mut wtxn, Some("chat-settings"))?;
wtxn.commit()?; wtxn.commit()?;
// allow unreachable_code to get rids of the warning in the case of a test build. // allow unreachable_code to get rids of the warning in the case of a test build.
@ -301,7 +301,7 @@ impl IndexScheduler {
#[cfg(test)] #[cfg(test)]
run_loop_iteration: Arc::new(RwLock::new(0)), run_loop_iteration: Arc::new(RwLock::new(0)),
features, features,
chat_prompts, chat_settings,
}; };
this.run(); this.run();
@ -875,8 +875,15 @@ impl IndexScheduler {
res.map(EmbeddingConfigs::new) res.map(EmbeddingConfigs::new)
} }
pub fn chat_prompts<'t>(&self, rtxn: &'t RoTxn, name: &str) -> heed::Result<Option<&'t str>> { pub fn chat_settings(&self) -> Result<Option<serde_json::Value>> {
self.chat_prompts.get(rtxn, name) let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
self.chat_settings.get(&rtxn, &"main").map_err(Into::into)
}
pub fn put_chat_settings(&self, settings: &serde_json::Value) -> Result<()> {
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
self.chat_settings.put(&mut wtxn, &"main", &settings)?;
Ok(())
} }
} }

View File

@ -311,6 +311,12 @@ pub enum Action {
#[serde(rename = "chat.get")] #[serde(rename = "chat.get")]
#[deserr(rename = "chat.get")] #[deserr(rename = "chat.get")]
ChatGet, ChatGet,
#[serde(rename = "chatSettings.get")]
#[deserr(rename = "chatSettings.get")]
ChatSettingsGet,
#[serde(rename = "chatSettings.update")]
#[deserr(rename = "chatSettings.update")]
ChatSettingsUpdate,
} }
impl Action { impl Action {
@ -403,4 +409,6 @@ pub mod actions {
pub const NETWORK_UPDATE: u8 = NetworkUpdate.repr(); pub const NETWORK_UPDATE: u8 = NetworkUpdate.repr();
pub const CHAT_GET: u8 = ChatGet.repr(); pub const CHAT_GET: u8 = ChatGet.repr();
pub const CHAT_SETTINGS_GET: u8 = ChatSettingsGet.repr();
pub const CHAT_SETTINGS_UPDATE: u8 = ChatSettingsUpdate.repr();
} }

View File

@ -9,6 +9,7 @@ use async_openai::config::OpenAIConfig;
use async_openai::types::{ use async_openai::types::{
ChatCompletionMessageToolCall, ChatCompletionMessageToolCallChunk, ChatCompletionMessageToolCall, ChatCompletionMessageToolCallChunk,
ChatCompletionRequestAssistantMessageArgs, ChatCompletionRequestMessage, ChatCompletionRequestAssistantMessageArgs, ChatCompletionRequestMessage,
ChatCompletionRequestSystemMessage, ChatCompletionRequestSystemMessageContent,
ChatCompletionRequestToolMessage, ChatCompletionRequestToolMessageContent, ChatCompletionRequestToolMessage, ChatCompletionRequestToolMessageContent,
ChatCompletionStreamResponseDelta, ChatCompletionToolArgs, ChatCompletionToolType, ChatCompletionStreamResponseDelta, ChatCompletionToolArgs, ChatCompletionToolType,
CreateChatCompletionRequest, FinishReason, FunctionCall, FunctionCallStream, CreateChatCompletionRequest, FinishReason, FunctionCall, FunctionCallStream,
@ -27,6 +28,7 @@ use serde::{Deserialize, Serialize};
use serde_json::json; use serde_json::json;
use tokio::runtime::Handle; use tokio::runtime::Handle;
use super::settings::chat::{ChatPrompts, ChatSettings};
use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::GuardedData; use crate::extractors::authentication::GuardedData;
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS; use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
@ -36,33 +38,12 @@ use crate::search::{
}; };
use crate::search_queue::SearchQueue; use crate::search_queue::SearchQueue;
/// The default description of the searchInIndex tool provided to OpenAI.
const DEFAULT_SEARCH_IN_INDEX_TOOL_DESCRIPTION: &str =
"Search the database for relevant JSON documents using an optional query.";
/// The default description of the searchInIndex `q` parameter tool provided to OpenAI.
const DEFAULT_SEARCH_IN_INDEX_Q_PARAMETER_TOOL_DESCRIPTION: &str =
"The search query string used to find relevant documents in the index. \
This should contain keywords or phrases that best represent what the user is looking for. \
More specific queries will yield more precise results.";
/// The default description of the searchInIndex `index` parameter tool provided to OpenAI.
const DEFAULT_SEARCH_IN_INDEX_INDEX_PARAMETER_TOOL_DESCRIPTION: &str =
"The name of the index to search within. An index is a collection of documents organized for search. \
Selecting the right index ensures the most relevant results for the user query";
const EMBEDDER_NAME: &str = "openai"; const EMBEDDER_NAME: &str = "openai";
pub fn configure(cfg: &mut web::ServiceConfig) { pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(chat))); cfg.service(web::resource("").route(web::post().to(chat)));
} }
/// Creates OpenAI client with API key
fn create_openai_client() -> Client<OpenAIConfig> {
let api_key = std::env::var("MEILI_OPENAI_API_KEY")
.expect("cannot find OpenAI API Key (MEILI_OPENAI_API_KEY)");
let config = OpenAIConfig::default().with_api_key(&api_key);
Client::with_config(config)
}
/// Get a chat completion /// Get a chat completion
async fn chat( async fn chat(
index_scheduler: GuardedData<ActionPolicy<{ actions::CHAT_GET }>, Data<IndexScheduler>>, index_scheduler: GuardedData<ActionPolicy<{ actions::CHAT_GET }>, Data<IndexScheduler>>,
@ -86,12 +67,7 @@ async fn chat(
} }
/// Setup search tool in chat completion request /// Setup search tool in chat completion request
fn setup_search_tool( fn setup_search_tool(chat_completion: &mut CreateChatCompletionRequest, prompts: &ChatPrompts) {
chat_completion: &mut CreateChatCompletionRequest,
search_in_index_description: &str,
search_in_index_q_param_description: &str,
search_in_index_index_description: &str,
) {
let tools = chat_completion.tools.get_or_insert_default(); let tools = chat_completion.tools.get_or_insert_default();
tools.push( tools.push(
ChatCompletionToolArgs::default() ChatCompletionToolArgs::default()
@ -99,18 +75,18 @@ fn setup_search_tool(
.function( .function(
FunctionObjectArgs::default() FunctionObjectArgs::default()
.name("searchInIndex") .name("searchInIndex")
.description(search_in_index_description) .description(&prompts.search_description)
.parameters(json!({ .parameters(json!({
"type": "object", "type": "object",
"properties": { "properties": {
"index_uid": { "index_uid": {
"type": "string", "type": "string",
"enum": ["main"], "enum": ["main"],
"description": search_in_index_index_description, "description": prompts.search_index_uid_param,
}, },
"q": { "q": {
"type": ["string", "null"], "type": ["string", "null"],
"description": search_in_index_q_param_description, "description": prompts.search_q_param,
} }
}, },
"required": ["index_uid", "q"], "required": ["index_uid", "q"],
@ -125,6 +101,17 @@ fn setup_search_tool(
); );
} }
/// Prepend system message to the conversation
fn prepend_system_message(chat_completion: &mut CreateChatCompletionRequest, system_prompt: &str) {
chat_completion.messages.insert(
0,
ChatCompletionRequestMessage::System(ChatCompletionRequestSystemMessage {
content: ChatCompletionRequestSystemMessageContent::Text(system_prompt.to_string()),
name: None,
}),
);
}
/// Process search request and return formatted results /// Process search request and return formatted results
async fn process_search_request( async fn process_search_request(
index_scheduler: &GuardedData<ActionPolicy<{ actions::CHAT_GET }>, Data<IndexScheduler>>, index_scheduler: &GuardedData<ActionPolicy<{ actions::CHAT_GET }>, Data<IndexScheduler>>,
@ -187,56 +174,32 @@ async fn process_search_request(
Ok((index, text)) Ok((index, text))
} }
/// Get prompt descriptions from index scheduler
fn get_prompt_descriptions(
index_scheduler: &GuardedData<ActionPolicy<{ actions::CHAT_GET }>, Data<IndexScheduler>>,
) -> (String, String, String) {
let rtxn = index_scheduler.read_txn().unwrap();
let search_in_index_description = index_scheduler
.chat_prompts(&rtxn, "searchInIndex-description")
.unwrap()
.unwrap_or(DEFAULT_SEARCH_IN_INDEX_TOOL_DESCRIPTION)
.to_string();
let search_in_index_q_param_description = index_scheduler
.chat_prompts(&rtxn, "searchInIndex-q-param-description")
.unwrap()
.unwrap_or(DEFAULT_SEARCH_IN_INDEX_Q_PARAMETER_TOOL_DESCRIPTION)
.to_string();
let search_in_index_index_description = index_scheduler
.chat_prompts(&rtxn, "searchInIndex-index-param-description")
.unwrap()
.unwrap_or(DEFAULT_SEARCH_IN_INDEX_INDEX_PARAMETER_TOOL_DESCRIPTION)
.to_string();
drop(rtxn);
(
search_in_index_description,
search_in_index_q_param_description,
search_in_index_index_description,
)
}
async fn non_streamed_chat( async fn non_streamed_chat(
index_scheduler: GuardedData<ActionPolicy<{ actions::CHAT_GET }>, Data<IndexScheduler>>, index_scheduler: GuardedData<ActionPolicy<{ actions::CHAT_GET }>, Data<IndexScheduler>>,
search_queue: web::Data<SearchQueue>, search_queue: web::Data<SearchQueue>,
mut chat_completion: CreateChatCompletionRequest, mut chat_completion: CreateChatCompletionRequest,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let client = create_openai_client(); let chat_settings = match index_scheduler.chat_settings().unwrap() {
Some(value) => serde_json::from_value(value).unwrap(),
None => ChatSettings::default(),
};
let ( let mut config = OpenAIConfig::default();
search_in_index_description, if let Some(api_key) = chat_settings.api_key.as_ref() {
search_in_index_q_param_description, config = config.with_api_key(api_key);
search_in_index_index_description, }
) = get_prompt_descriptions(&index_scheduler); // We cannot change the endpoint
// if let Some(endpoint) = chat_settings.endpoint.as_ref() {
// config.with_api_base(&endpoint);
// }
let client = Client::with_config(config);
// Prepend system message to the conversation
prepend_system_message(&mut chat_completion, &chat_settings.prompts.system);
let mut response; let mut response;
loop { loop {
setup_search_tool( setup_search_tool(&mut chat_completion, &chat_settings.prompts);
&mut chat_completion,
&search_in_index_description,
&search_in_index_q_param_description,
&search_in_index_index_description,
);
response = client.chat().create(chat_completion.clone()).await.unwrap(); response = client.chat().create(chat_completion.clone()).await.unwrap();
@ -290,22 +253,29 @@ async fn streamed_chat(
search_queue: web::Data<SearchQueue>, search_queue: web::Data<SearchQueue>,
mut chat_completion: CreateChatCompletionRequest, mut chat_completion: CreateChatCompletionRequest,
) -> impl Responder { ) -> impl Responder {
let ( let chat_settings = match index_scheduler.chat_settings().unwrap() {
search_in_index_description, Some(value) => serde_json::from_value(value).unwrap(),
search_in_index_q_param_description, None => ChatSettings::default(),
search_in_index_index_description, };
) = get_prompt_descriptions(&index_scheduler);
setup_search_tool( let mut config = OpenAIConfig::default();
&mut chat_completion, if let Some(api_key) = chat_settings.api_key.as_ref() {
&search_in_index_description, config = config.with_api_key(api_key);
&search_in_index_q_param_description, }
&search_in_index_index_description, // We cannot change the endpoint
); // if let Some(endpoint) = chat_settings.endpoint.as_ref() {
// config.with_api_base(&endpoint);
// }
// Prepend system message to the conversation
prepend_system_message(&mut chat_completion, &chat_settings.prompts.system);
// Setup the search tool
setup_search_tool(&mut chat_completion, &chat_settings.prompts);
let (tx, rx) = tokio::sync::mpsc::channel(10); let (tx, rx) = tokio::sync::mpsc::channel(10);
let _join_handle = Handle::current().spawn(async move { let _join_handle = Handle::current().spawn(async move {
let client = create_openai_client(); let client = Client::with_config(config.clone());
let mut global_tool_calls = HashMap::<u32, Call>::new(); let mut global_tool_calls = HashMap::<u32, Call>::new();
'main: loop { 'main: loop {

View File

@ -62,6 +62,7 @@ mod multi_search;
mod multi_search_analytics; mod multi_search_analytics;
pub mod network; pub mod network;
mod open_api_utils; mod open_api_utils;
pub mod settings;
mod snapshot; mod snapshot;
mod swap_indexes; mod swap_indexes;
pub mod tasks; pub mod tasks;
@ -115,7 +116,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::scope("/metrics").configure(metrics::configure)) .service(web::scope("/metrics").configure(metrics::configure))
.service(web::scope("/experimental-features").configure(features::configure)) .service(web::scope("/experimental-features").configure(features::configure))
.service(web::scope("/network").configure(network::configure)) .service(web::scope("/network").configure(network::configure))
.service(web::scope("/chat").configure(chat::configure)); .service(web::scope("/chat").configure(chat::configure))
.service(web::scope("/settings/chat").configure(settings::chat::configure));
#[cfg(feature = "swagger")] #[cfg(feature = "swagger")]
{ {

View File

@ -0,0 +1,111 @@
use std::collections::BTreeMap;
use actix_web::web::{self, Data};
use actix_web::HttpResponse;
use index_scheduler::IndexScheduler;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use serde::{Deserialize, Serialize};
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(get_settings))
.route(web::patch().to(SeqHandler(patch_settings))),
);
}
async fn get_settings(
index_scheduler: GuardedData<
ActionPolicy<{ actions::CHAT_SETTINGS_GET }>,
Data<IndexScheduler>,
>,
) -> Result<HttpResponse, ResponseError> {
let settings = match index_scheduler.chat_settings()? {
Some(value) => serde_json::from_value(value).unwrap(),
None => ChatSettings::default(),
};
Ok(HttpResponse::Ok().json(settings))
}
async fn patch_settings(
index_scheduler: GuardedData<
ActionPolicy<{ actions::CHAT_SETTINGS_UPDATE }>,
Data<IndexScheduler>,
>,
web::Json(chat_settings): web::Json<ChatSettings>,
) -> Result<HttpResponse, ResponseError> {
let chat_settings = serde_json::to_value(chat_settings).unwrap();
index_scheduler.put_chat_settings(&chat_settings)?;
Ok(HttpResponse::Ok().finish())
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
pub struct ChatSettings {
pub source: String,
pub endpoint: Option<String>,
pub api_key: Option<String>,
pub prompts: ChatPrompts,
pub indexes: BTreeMap<String, ChatIndexSettings>,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
pub struct ChatPrompts {
pub system: String,
pub search_description: String,
pub search_q_param: String,
pub search_index_uid_param: String,
pub pre_query: String,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
pub struct ChatIndexSettings {
pub description: String,
pub document_template: String,
}
const DEFAULT_SYSTEM_MESSAGE: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:\
1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.\
2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.\
3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"\
4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.\
5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search.";
/// The default description of the searchInIndex tool provided to OpenAI.
const DEFAULT_SEARCH_IN_INDEX_TOOL_DESCRIPTION: &str =
"Search the database for relevant JSON documents using an optional query.";
/// The default description of the searchInIndex `q` parameter tool provided to OpenAI.
const DEFAULT_SEARCH_IN_INDEX_Q_PARAMETER_TOOL_DESCRIPTION: &str =
"The search query string used to find relevant documents in the index. \
This should contain keywords or phrases that best represent what the user is looking for. \
More specific queries will yield more precise results.";
/// The default description of the searchInIndex `index` parameter tool provided to OpenAI.
const DEFAULT_SEARCH_IN_INDEX_INDEX_PARAMETER_TOOL_DESCRIPTION: &str =
"The name of the index to search within. An index is a collection of documents organized for search. \
Selecting the right index ensures the most relevant results for the user query";
impl Default for ChatSettings {
fn default() -> Self {
ChatSettings {
source: "openai".to_string(),
endpoint: None,
api_key: None,
prompts: ChatPrompts {
system: DEFAULT_SYSTEM_MESSAGE.to_string(),
search_description: DEFAULT_SEARCH_IN_INDEX_TOOL_DESCRIPTION.to_string(),
search_q_param: DEFAULT_SEARCH_IN_INDEX_Q_PARAMETER_TOOL_DESCRIPTION.to_string(),
search_index_uid_param: DEFAULT_SEARCH_IN_INDEX_INDEX_PARAMETER_TOOL_DESCRIPTION
.to_string(),
pre_query: "".to_string(),
},
indexes: BTreeMap::new(),
}
}
}

View File

@ -0,0 +1 @@
pub mod chat;