Compare commits

...

28 Commits

Author SHA1 Message Date
Clément Renault
a1a4082196 Support multiple indexes and not only main 2025-05-20 17:43:51 +02:00
Clément Renault
5c6b63df65 Limit the number of internal loop calls and change the function name 2025-05-20 16:44:28 +02:00
Clément Renault
7266aed770 Correctly support tenant tokens and filters 2025-05-20 16:15:49 +02:00
Clément Renault
bae6c98aa3 Stream errors 2025-05-20 12:23:22 +02:00
Clément Renault
42c95cf3c4 Stop the stream when the connexion stops and chnage the events 2025-05-20 12:05:51 +02:00
Clément Renault
4f919db344 Generate a new default chat API key 2025-05-20 11:00:19 +02:00
Clément Renault
295840d07a Change the /chat route to /chat/completions to be OpenAI-compatible 2025-05-20 10:14:56 +02:00
Clément Renault
c0c3bddda8 Better stop the stream 2025-05-16 17:12:48 +02:00
Clément Renault
10b5fcd4ba Update the streaming detection to work with Mistral 2025-05-16 15:17:01 +02:00
Clément Renault
8113d4a52e Make it compatible with the Mistral API 2025-05-16 14:33:53 +02:00
Clément Renault
5964289284 Support base_api in the settings 2025-05-15 18:28:02 +02:00
Clément Renault
6b81854d48 Make clippy happy 2025-05-15 18:16:06 +02:00
Clément Renault
9e5b466426 Display pre-query prompt in search tool response 2025-05-15 18:10:09 +02:00
Clément Renault
b43ffd8fac Commit when putting stuff in LMDB 2025-05-15 18:03:26 +02:00
Clément Renault
43da2bcb8c Remove useless function 2025-05-15 17:52:26 +02:00
Clément Renault
5e3b126d73 Expose new chat settings routes 2025-05-15 17:48:10 +02:00
Clément Renault
6c034754ca Factorise a bit the code 2025-05-15 15:39:38 +02:00
Clément Renault
6329cf7ed6 Display the different tool calls we need to do 2025-05-15 11:17:34 +02:00
Clément Renault
e0c8c11a94 Send an event with the content of the tool calling 2025-05-14 17:15:32 +02:00
Clément Renault
6e8b371111 Streaming supports tool calling 2025-05-14 14:58:01 +02:00
Clément Renault
da7d651f4b Nearly support tools on the streaming route 2025-05-14 14:29:41 +02:00
Clément Renault
24050f06e4 Return the right message format 2025-05-14 12:03:43 +02:00
Clément Renault
af482d8ee9 Aggregate tool calls and display the calls to make. 2025-05-14 11:53:03 +02:00
Clément Renault
7d62307739 Implement a first version of a streamed chat API 2025-05-14 11:18:21 +02:00
Clément Renault
3a71df7b5a Make it work by retrieving content from the index 2025-05-13 16:35:46 +02:00
Clément Renault
ac39a436d9 Support overwriten prompts of the search query 2025-05-13 16:33:58 +02:00
Clément Renault
e5c963a170 Support querying the index named main 2025-05-13 15:26:24 +02:00
Clément Renault
9baf2ce1a6 Introduce the first version of the /chat route that mimics the OpenAI API 2025-05-13 11:19:32 +02:00
12 changed files with 1248 additions and 97 deletions

559
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -31,7 +31,7 @@ anyhow = "1.0.95"
bytes = "1.9.0"
convert_case = "0.6.0"
flate2 = "1.0.35"
reqwest = { version = "0.12.12", features = ["blocking", "rustls-tls"], default-features = false }
reqwest = { version = "0.12.15", features = ["blocking", "rustls-tls"], default-features = false }
[features]
default = ["milli/all-tokenizations"]

View File

@@ -53,8 +53,8 @@ use flate2::Compression;
use meilisearch_types::batches::Batch;
use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
use meilisearch_types::heed::byteorder::BE;
use meilisearch_types::heed::types::I128;
use meilisearch_types::heed::{self, Env, RoTxn, WithoutTls};
use meilisearch_types::heed::types::{SerdeJson, Str, I128};
use meilisearch_types::heed::{self, Database, Env, RoTxn, WithoutTls};
use meilisearch_types::milli::index::IndexEmbeddingConfig;
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
@@ -151,6 +151,9 @@ pub struct IndexScheduler {
/// In charge of fetching and setting the status of experimental features.
features: features::FeatureData,
/// Stores the custom chat prompts and other settings of the indexes.
chat_settings: Database<Str, SerdeJson<serde_json::Value>>,
/// Everything related to the processing of the tasks
pub scheduler: scheduler::Scheduler,
@@ -209,11 +212,16 @@ impl IndexScheduler {
#[cfg(test)]
run_loop_iteration: self.run_loop_iteration.clone(),
features: self.features.clone(),
chat_settings: self.chat_settings,
}
}
pub(crate) const fn nb_db() -> u32 {
Versioning::nb_db() + Queue::nb_db() + IndexMapper::nb_db() + features::FeatureData::nb_db()
Versioning::nb_db()
+ Queue::nb_db()
+ IndexMapper::nb_db()
+ features::FeatureData::nb_db()
+ 1 // chat-prompts
}
/// Create an index scheduler and start its run loop.
@@ -267,6 +275,7 @@ impl IndexScheduler {
let features = features::FeatureData::new(&env, &mut wtxn, options.instance_features)?;
let queue = Queue::new(&env, &mut wtxn, &options)?;
let index_mapper = IndexMapper::new(&env, &mut wtxn, &options, budget)?;
let chat_settings = env.create_database(&mut wtxn, Some("chat-settings"))?;
wtxn.commit()?;
// allow unreachable_code to get rids of the warning in the case of a test build.
@@ -290,6 +299,7 @@ impl IndexScheduler {
#[cfg(test)]
run_loop_iteration: Arc::new(RwLock::new(0)),
features,
chat_settings,
};
this.run();
@@ -857,6 +867,18 @@ impl IndexScheduler {
.collect();
res.map(EmbeddingConfigs::new)
}
pub fn chat_settings(&self) -> Result<Option<serde_json::Value>> {
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
self.chat_settings.get(&rtxn, "main").map_err(Into::into)
}
pub fn put_chat_settings(&self, settings: &serde_json::Value) -> Result<()> {
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
self.chat_settings.put(&mut wtxn, "main", settings)?;
wtxn.commit().map_err(Error::HeedTransaction)?;
Ok(())
}
}
/// The outcome of calling the [`IndexScheduler::tick`] function.

View File

@@ -351,6 +351,7 @@ pub struct IndexSearchRules {
fn generate_default_keys(store: &HeedAuthStore) -> Result<()> {
store.put_api_key(Key::default_admin())?;
store.put_api_key(Key::default_search())?;
store.put_api_key(Key::default_chat())?;
Ok(())
}

View File

@@ -158,6 +158,21 @@ impl Key {
updated_at: now,
}
}
pub fn default_chat() -> Self {
let now = OffsetDateTime::now_utc();
let uid = Uuid::new_v4();
Self {
name: Some("Default Chat API Key".to_string()),
description: Some("Use it to chat and search from the frontend".to_string()),
uid,
actions: vec![Action::Chat, Action::Search],
indexes: vec![IndexUidPattern::all()],
expires_at: None,
created_at: now,
updated_at: now,
}
}
}
fn parse_expiration_date(
@@ -308,6 +323,15 @@ pub enum Action {
#[serde(rename = "network.update")]
#[deserr(rename = "network.update")]
NetworkUpdate,
#[serde(rename = "chat.get")]
#[deserr(rename = "chat.get")]
Chat,
#[serde(rename = "chatSettings.get")]
#[deserr(rename = "chatSettings.get")]
ChatSettingsGet,
#[serde(rename = "chatSettings.update")]
#[deserr(rename = "chatSettings.update")]
ChatSettingsUpdate,
}
impl Action {
@@ -349,6 +373,7 @@ impl Action {
EXPERIMENTAL_FEATURES_UPDATE => Some(Self::ExperimentalFeaturesUpdate),
NETWORK_GET => Some(Self::NetworkGet),
NETWORK_UPDATE => Some(Self::NetworkUpdate),
CHAT => Some(Self::Chat),
_otherwise => None,
}
}
@@ -397,4 +422,8 @@ pub mod actions {
pub const NETWORK_GET: u8 = NetworkGet.repr();
pub const NETWORK_UPDATE: u8 = NetworkUpdate.repr();
pub const CHAT: u8 = Chat.repr();
pub const CHAT_SETTINGS_GET: u8 = ChatSettingsGet.repr();
pub const CHAT_SETTINGS_UPDATE: u8 = ChatSettingsUpdate.repr();
}

View File

@@ -48,6 +48,7 @@ is-terminal = "0.4.13"
itertools = "0.14.0"
jsonwebtoken = "9.3.0"
lazy_static = "1.5.0"
liquid = "0.26.9"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
mimalloc = { version = "0.1.43", default-features = false }
@@ -111,6 +112,8 @@ utoipa = { version = "5.3.1", features = [
"openapi_extensions",
] }
utoipa-scalar = { version = "0.3.0", optional = true, features = ["actix-web"] }
async-openai = { git = "https://github.com/meilisearch/async-openai", branch = "optional-type-function" }
actix-web-lab = { version = "0.24.1", default-features = false }
[dev-dependencies]
actix-rt = "2.10.0"

View File

@@ -4,6 +4,7 @@ use std::marker::PhantomData;
use std::ops::Deref;
use std::pin::Pin;
use actix_web::http::header::AUTHORIZATION;
use actix_web::web::Data;
use actix_web::FromRequest;
pub use error::AuthenticationError;
@@ -94,36 +95,44 @@ impl<P: Policy + 'static, D: 'static + Clone> FromRequest for GuardedData<P, D>
_payload: &mut actix_web::dev::Payload,
) -> Self::Future {
match req.app_data::<Data<AuthController>>().cloned() {
Some(auth) => match req
.headers()
.get("Authorization")
.map(|type_token| type_token.to_str().unwrap_or_default().splitn(2, ' '))
{
Some(mut type_token) => match type_token.next() {
Some("Bearer") => {
// TODO: find a less hardcoded way?
let index = req.match_info().get("index_uid");
match type_token.next() {
Some(token) => Box::pin(Self::auth_bearer(
auth,
token.to_string(),
index.map(String::from),
req.app_data::<D>().cloned(),
)),
None => Box::pin(err(AuthenticationError::InvalidToken.into())),
}
}
_otherwise => {
Box::pin(err(AuthenticationError::MissingAuthorizationHeader.into()))
}
},
None => Box::pin(Self::auth_token(auth, req.app_data::<D>().cloned())),
Some(auth) => match extract_token_from_request(req) {
Ok(Some(token)) => {
// TODO: find a less hardcoded way?
let index = req.match_info().get("index_uid");
Box::pin(Self::auth_bearer(
auth,
token.to_string(),
index.map(String::from),
req.app_data::<D>().cloned(),
))
}
Ok(None) => Box::pin(Self::auth_token(auth, req.app_data::<D>().cloned())),
Err(e) => Box::pin(err(e.into())),
},
None => Box::pin(err(AuthenticationError::IrretrievableState.into())),
}
}
}
pub fn extract_token_from_request(
req: &actix_web::HttpRequest,
) -> Result<Option<&str>, AuthenticationError> {
match req
.headers()
.get(AUTHORIZATION)
.map(|type_token| type_token.to_str().unwrap_or_default().splitn(2, ' '))
{
Some(mut type_token) => match type_token.next() {
Some("Bearer") => match type_token.next() {
Some(token) => Ok(Some(token)),
None => Err(AuthenticationError::InvalidToken),
},
_otherwise => Err(AuthenticationError::MissingAuthorizationHeader),
},
None => Ok(None),
}
}
pub trait Policy {
fn authenticate(
auth: Data<AuthController>,
@@ -299,8 +308,8 @@ pub mod policies {
auth: &AuthController,
token: &str,
) -> Result<TenantTokenOutcome, AuthError> {
// Only search action can be accessed by a tenant token.
if A != actions::SEARCH {
// Only search and chat actions can be accessed by a tenant token.
if A != actions::SEARCH && A != actions::CHAT {
return Ok(TenantTokenOutcome::NotATenantToken);
}

View File

@@ -0,0 +1,528 @@
use std::collections::HashMap;
use std::mem;
use std::time::Duration;
use actix_web::web::{self, Data};
use actix_web::{Either, HttpRequest, HttpResponse, Responder};
use actix_web_lab::sse::{self, Event, Sse};
use async_openai::config::OpenAIConfig;
use async_openai::types::{
ChatCompletionMessageToolCall, ChatCompletionMessageToolCallChunk,
ChatCompletionRequestAssistantMessageArgs, ChatCompletionRequestMessage,
ChatCompletionRequestSystemMessage, ChatCompletionRequestSystemMessageContent,
ChatCompletionRequestToolMessage, ChatCompletionRequestToolMessageContent,
ChatCompletionStreamResponseDelta, ChatCompletionToolArgs, ChatCompletionToolType,
CreateChatCompletionRequest, FinishReason, FunctionCall, FunctionCallStream,
FunctionObjectArgs,
};
use async_openai::Client;
use futures::StreamExt;
use index_scheduler::IndexScheduler;
use meilisearch_auth::AuthController;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use meilisearch_types::milli::index::IndexEmbeddingConfig;
use meilisearch_types::milli::prompt::PromptData;
use meilisearch_types::milli::vector::EmbeddingConfig;
use meilisearch_types::{Document, Index};
use serde::{Deserialize, Serialize};
use serde_json::json;
use tokio::runtime::Handle;
use tokio::sync::mpsc::error::SendError;
use super::settings::chat::{ChatPrompts, ChatSettings};
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::{extract_token_from_request, GuardedData, Policy as _};
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
use crate::routes::indexes::search::search_kind;
use crate::search::{
add_search_rules, perform_search, HybridQuery, RetrieveVectors, SearchQuery, SemanticRatio,
};
use crate::search_queue::SearchQueue;
const EMBEDDER_NAME: &str = "openai";
const SEARCH_IN_INDEX_FUNCTION_NAME: &str = "_meiliSearchInIndex";
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("/completions").route(web::post().to(chat)));
}
/// Get a chat completion
async fn chat(
index_scheduler: GuardedData<ActionPolicy<{ actions::CHAT }>, Data<IndexScheduler>>,
auth_ctrl: web::Data<AuthController>,
req: HttpRequest,
search_queue: web::Data<SearchQueue>,
web::Json(chat_completion): web::Json<CreateChatCompletionRequest>,
) -> impl Responder {
// To enable later on, when the feature will be experimental
// index_scheduler.features().check_chat("Using the /chat route")?;
assert_eq!(
chat_completion.n.unwrap_or(1),
1,
"Meilisearch /chat only support one completion at a time (n = 1, n = null)"
);
if chat_completion.stream.unwrap_or(false) {
Either::Right(
streamed_chat(index_scheduler, auth_ctrl, req, search_queue, chat_completion).await,
)
} else {
Either::Left(
non_streamed_chat(index_scheduler, auth_ctrl, req, search_queue, chat_completion).await,
)
}
}
/// Setup search tool in chat completion request
fn setup_search_tool(
index_scheduler: &Data<IndexScheduler>,
filters: &meilisearch_auth::AuthFilter,
chat_completion: &mut CreateChatCompletionRequest,
prompts: &ChatPrompts,
) -> Result<(), ResponseError> {
let tools = chat_completion.tools.get_or_insert_default();
if tools.iter().find(|t| t.function.name == SEARCH_IN_INDEX_FUNCTION_NAME).is_some() {
panic!("{SEARCH_IN_INDEX_FUNCTION_NAME} function already set");
}
let index_uids: Vec<_> = index_scheduler
.index_names()?
.into_iter()
.filter(|index_uid| filters.is_index_authorized(&index_uid))
.collect();
let tool = ChatCompletionToolArgs::default()
.r#type(ChatCompletionToolType::Function)
.function(
FunctionObjectArgs::default()
.name(SEARCH_IN_INDEX_FUNCTION_NAME)
.description(&prompts.search_description)
.parameters(json!({
"type": "object",
"properties": {
"index_uid": {
"type": "string",
"enum": index_uids,
"description": prompts.search_index_uid_param,
},
"q": {
// Unfortunately, Mistral does not support an array of types, here.
// "type": ["string", "null"],
"type": "string",
"description": prompts.search_q_param,
}
},
"required": ["index_uid", "q"],
"additionalProperties": false,
}))
.strict(true)
.build()
.unwrap(),
)
.build()
.unwrap();
tools.push(tool);
chat_completion.messages.insert(
0,
ChatCompletionRequestMessage::System(ChatCompletionRequestSystemMessage {
content: ChatCompletionRequestSystemMessageContent::Text(prompts.system.clone()),
name: None,
}),
);
Ok(())
}
/// Process search request and return formatted results
async fn process_search_request(
index_scheduler: &GuardedData<ActionPolicy<{ actions::CHAT }>, Data<IndexScheduler>>,
auth_ctrl: web::Data<AuthController>,
search_queue: &web::Data<SearchQueue>,
auth_token: &str,
index_uid: String,
q: Option<String>,
) -> Result<(Index, String), ResponseError> {
let mut query = SearchQuery {
q,
hybrid: Some(HybridQuery {
semantic_ratio: SemanticRatio::default(),
embedder: EMBEDDER_NAME.to_string(),
}),
limit: 20,
..Default::default()
};
let auth_filter = ActionPolicy::<{ actions::SEARCH }>::authenticate(
auth_ctrl,
auth_token,
Some(index_uid.as_str()),
)?;
// Tenant token search_rules.
if let Some(search_rules) = auth_filter.get_index_search_rules(&index_uid) {
add_search_rules(&mut query.filter, search_rules);
}
// TBD
// let mut aggregate = SearchAggregator::<SearchPOST>::from_query(&query);
let index = index_scheduler.index(&index_uid)?;
let search_kind =
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
let permit = search_queue.try_get_search_permit().await?;
let features = index_scheduler.features();
let index_cloned = index.clone();
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
index_uid.to_string(),
&index_cloned,
query,
search_kind,
RetrieveVectors::new(false),
features,
)
})
.await;
permit.drop().await;
let search_result = search_result?;
if let Ok(ref search_result) = search_result {
// aggregate.succeed(search_result);
if search_result.degraded {
MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
}
}
// analytics.publish(aggregate, &req);
let search_result = search_result?;
let formatted =
format_documents(&index, search_result.hits.into_iter().map(|doc| doc.document));
let text = formatted.join("\n");
Ok((index, text))
}
async fn non_streamed_chat(
index_scheduler: GuardedData<ActionPolicy<{ actions::CHAT }>, Data<IndexScheduler>>,
auth_ctrl: web::Data<AuthController>,
req: HttpRequest,
search_queue: web::Data<SearchQueue>,
mut chat_completion: CreateChatCompletionRequest,
) -> Result<HttpResponse, ResponseError> {
let filters = index_scheduler.filters();
let chat_settings = match index_scheduler.chat_settings().unwrap() {
Some(value) => serde_json::from_value(value).unwrap(),
None => ChatSettings::default(),
};
let mut config = OpenAIConfig::default();
if let Some(api_key) = chat_settings.api_key.as_ref() {
config = config.with_api_key(api_key);
}
if let Some(base_api) = chat_settings.base_api.as_ref() {
config = config.with_api_base(base_api);
}
let client = Client::with_config(config);
let auth_token = extract_token_from_request(&req)?.unwrap();
setup_search_tool(&index_scheduler, filters, &mut chat_completion, &chat_settings.prompts)?;
let mut response;
loop {
response = client.chat().create(chat_completion.clone()).await.unwrap();
let choice = &mut response.choices[0];
match choice.finish_reason {
Some(FinishReason::ToolCalls) => {
let tool_calls = mem::take(&mut choice.message.tool_calls).unwrap_or_default();
let (meili_calls, other_calls): (Vec<_>, Vec<_>) = tool_calls
.into_iter()
.partition(|call| call.function.name == SEARCH_IN_INDEX_FUNCTION_NAME);
chat_completion.messages.push(
ChatCompletionRequestAssistantMessageArgs::default()
.tool_calls(meili_calls.clone())
.build()
.unwrap()
.into(),
);
for call in meili_calls {
let SearchInIndexParameters { index_uid, q } =
serde_json::from_str(&call.function.arguments).unwrap();
let (_, text) = process_search_request(
&index_scheduler,
auth_ctrl.clone(),
&search_queue,
auth_token,
index_uid,
q,
)
.await?;
chat_completion.messages.push(ChatCompletionRequestMessage::Tool(
ChatCompletionRequestToolMessage {
tool_call_id: call.id,
content: ChatCompletionRequestToolMessageContent::Text(text),
},
));
}
// Let the client call other tools by themselves
if !other_calls.is_empty() {
response.choices[0].message.tool_calls = Some(other_calls);
break;
}
}
_ => break,
}
}
Ok(HttpResponse::Ok().json(response))
}
async fn streamed_chat(
index_scheduler: GuardedData<ActionPolicy<{ actions::CHAT }>, Data<IndexScheduler>>,
auth_ctrl: web::Data<AuthController>,
req: HttpRequest,
search_queue: web::Data<SearchQueue>,
mut chat_completion: CreateChatCompletionRequest,
) -> Result<impl Responder, ResponseError> {
let filters = index_scheduler.filters();
let chat_settings = match index_scheduler.chat_settings().unwrap() {
Some(value) => serde_json::from_value(value).unwrap(),
None => ChatSettings::default(),
};
let mut config = OpenAIConfig::default();
if let Some(api_key) = chat_settings.api_key.as_ref() {
config = config.with_api_key(api_key);
}
if let Some(base_api) = chat_settings.base_api.as_ref() {
config = config.with_api_base(base_api);
}
let auth_token = extract_token_from_request(&req)?.unwrap().to_string();
setup_search_tool(&index_scheduler, filters, &mut chat_completion, &chat_settings.prompts)?;
let (tx, rx) = tokio::sync::mpsc::channel(10);
let _join_handle = Handle::current().spawn(async move {
let client = Client::with_config(config.clone());
let mut global_tool_calls = HashMap::<u32, Call>::new();
let mut finish_reason = None;
// Limit the number of internal calls to satisfy the search requests of the LLM
'main: for _ in 0..20 {
let mut response = client.chat().create_stream(chat_completion.clone()).await.unwrap();
while let Some(result) = response.next().await {
match result {
Ok(resp) => {
let choice = &resp.choices[0];
finish_reason = choice.finish_reason;
#[allow(deprecated)]
let ChatCompletionStreamResponseDelta {
content,
// Using deprecated field but keeping for compatibility
function_call: _,
ref tool_calls,
role: _,
refusal: _,
} = &choice.delta;
if content.is_some() {
if let Err(SendError(_)) = tx.send(Event::Data(sse::Data::new_json(&resp).unwrap())).await {
return;
}
}
match tool_calls {
Some(tool_calls) => {
for chunk in tool_calls {
let ChatCompletionMessageToolCallChunk {
index,
id,
r#type: _,
function,
} = chunk;
let FunctionCallStream { name, arguments } =
function.as_ref().unwrap();
global_tool_calls
.entry(*index)
.and_modify(|call| call.append(arguments.as_ref().unwrap()))
.or_insert_with(|| Call {
id: id.as_ref().unwrap().clone(),
function_name: name.as_ref().unwrap().clone(),
arguments: arguments.as_ref().unwrap().clone(),
});
}
}
None if !global_tool_calls.is_empty() => {
let (meili_calls, _other_calls): (Vec<_>, Vec<_>) =
mem::take(&mut global_tool_calls)
.into_values()
.map(|call| ChatCompletionMessageToolCall {
id: call.id,
r#type: Some(ChatCompletionToolType::Function),
function: FunctionCall {
name: call.function_name,
arguments: call.arguments,
},
})
.partition(|call| call.function.name == SEARCH_IN_INDEX_FUNCTION_NAME);
chat_completion.messages.push(
ChatCompletionRequestAssistantMessageArgs::default()
.tool_calls(meili_calls.clone())
.build()
.unwrap()
.into(),
);
for call in meili_calls {
if let Err(SendError(_)) = tx.send(Event::Data(
sse::Data::new_json(json!({
"object": "chat.completion.tool.call",
"tool": call,
}))
.unwrap(),
))
.await {
return;
}
let SearchInIndexParameters { index_uid, q } =
serde_json::from_str(&call.function.arguments).unwrap();
let result = process_search_request(
&index_scheduler,
auth_ctrl.clone(),
&search_queue,
&auth_token,
index_uid,
q,
)
.await;
let is_error = result.is_err();
let text = match result {
Ok((_, text)) => text,
Err(err) => err.to_string(),
};
let tool = ChatCompletionRequestToolMessage {
tool_call_id: call.id.clone(),
content: ChatCompletionRequestToolMessageContent::Text(
format!("{}\n\n{text}", chat_settings.prompts.pre_query),
),
};
if let Err(SendError(_)) = tx.send(Event::Data(
sse::Data::new_json(json!({
"object": if is_error {
"chat.completion.tool.error"
} else {
"chat.completion.tool.output"
},
"tool": ChatCompletionRequestToolMessage {
tool_call_id: call.id,
content: ChatCompletionRequestToolMessageContent::Text(
text,
),
},
}))
.unwrap(),
))
.await {
return;
}
chat_completion.messages.push(ChatCompletionRequestMessage::Tool(tool));
}
}
None => (),
}
}
Err(err) => {
tracing::error!("{err:?}");
if let Err(SendError(_)) = tx.send(Event::Data(sse::Data::new_json(&json!({
"object": "chat.completion.error",
"tool": err.to_string(),
})).unwrap())).await {
return;
}
break 'main;
}
}
}
// We must stop if the finish reason is not something we can solve with Meilisearch
if finish_reason.map_or(true, |fr| fr != FinishReason::ToolCalls) {
break;
}
}
let _ = tx.send(Event::Data(sse::Data::new("[DONE]")));
});
Ok(Sse::from_infallible_receiver(rx).with_retry_duration(Duration::from_secs(10)))
}
/// The structure used to aggregate the function calls to make.
#[derive(Debug)]
struct Call {
id: String,
function_name: String,
arguments: String,
}
impl Call {
fn append(&mut self, arguments: &str) {
self.arguments.push_str(arguments);
}
}
#[derive(Deserialize)]
struct SearchInIndexParameters {
/// The index uid to search in.
index_uid: String,
/// The query parameter to use.
q: Option<String>,
}
fn format_documents(index: &Index, documents: impl Iterator<Item = Document>) -> Vec<String> {
let rtxn = index.read_txn().unwrap();
let IndexEmbeddingConfig { name: _, config, user_provided: _ } = index
.embedding_configs(&rtxn)
.unwrap()
.into_iter()
.find(|conf| conf.name == EMBEDDER_NAME)
.unwrap();
let EmbeddingConfig {
embedder_options: _,
prompt: PromptData { template, max_bytes: _ },
quantized: _,
} = config;
#[derive(Serialize)]
struct Doc<T: Serialize> {
doc: T,
}
let template = liquid::ParserBuilder::with_stdlib().build().unwrap().parse(&template).unwrap();
documents
.map(|doc| {
let object = liquid::to_object(&Doc { doc }).unwrap();
template.render(&object).unwrap()
})
.collect()
}

View File

@@ -52,6 +52,7 @@ const PAGINATION_DEFAULT_LIMIT_FN: fn() -> usize = || 20;
mod api_key;
pub mod batches;
pub mod chat;
mod dump;
pub mod features;
pub mod indexes;
@@ -61,6 +62,7 @@ mod multi_search;
mod multi_search_analytics;
pub mod network;
mod open_api_utils;
pub mod settings;
mod snapshot;
mod swap_indexes;
pub mod tasks;
@@ -113,7 +115,9 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::scope("/swap-indexes").configure(swap_indexes::configure))
.service(web::scope("/metrics").configure(metrics::configure))
.service(web::scope("/experimental-features").configure(features::configure))
.service(web::scope("/network").configure(network::configure));
.service(web::scope("/network").configure(network::configure))
.service(web::scope("/chat").configure(chat::configure))
.service(web::scope("/settings/chat").configure(settings::chat::configure));
#[cfg(feature = "swagger")]
{

View File

@@ -0,0 +1,111 @@
use std::collections::BTreeMap;
use actix_web::web::{self, Data};
use actix_web::HttpResponse;
use index_scheduler::IndexScheduler;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use serde::{Deserialize, Serialize};
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(get_settings))
.route(web::patch().to(SeqHandler(patch_settings))),
);
}
async fn get_settings(
index_scheduler: GuardedData<
ActionPolicy<{ actions::CHAT_SETTINGS_GET }>,
Data<IndexScheduler>,
>,
) -> Result<HttpResponse, ResponseError> {
let settings = match index_scheduler.chat_settings()? {
Some(value) => serde_json::from_value(value).unwrap(),
None => ChatSettings::default(),
};
Ok(HttpResponse::Ok().json(settings))
}
async fn patch_settings(
index_scheduler: GuardedData<
ActionPolicy<{ actions::CHAT_SETTINGS_UPDATE }>,
Data<IndexScheduler>,
>,
web::Json(chat_settings): web::Json<ChatSettings>,
) -> Result<HttpResponse, ResponseError> {
let chat_settings = serde_json::to_value(chat_settings).unwrap();
index_scheduler.put_chat_settings(&chat_settings)?;
Ok(HttpResponse::Ok().finish())
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
pub struct ChatSettings {
pub source: String,
pub base_api: Option<String>,
pub api_key: Option<String>,
pub prompts: ChatPrompts,
pub indexes: BTreeMap<String, ChatIndexSettings>,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
pub struct ChatPrompts {
pub system: String,
pub search_description: String,
pub search_q_param: String,
pub search_index_uid_param: String,
pub pre_query: String,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
pub struct ChatIndexSettings {
pub description: String,
pub document_template: String,
}
const DEFAULT_SYSTEM_MESSAGE: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:\
1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.\
2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.\
3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"\
4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.\
5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search.";
/// The default description of the searchInIndex tool provided to OpenAI.
const DEFAULT_SEARCH_IN_INDEX_TOOL_DESCRIPTION: &str =
"Search the database for relevant JSON documents using an optional query.";
/// The default description of the searchInIndex `q` parameter tool provided to OpenAI.
const DEFAULT_SEARCH_IN_INDEX_Q_PARAMETER_TOOL_DESCRIPTION: &str =
"The search query string used to find relevant documents in the index. \
This should contain keywords or phrases that best represent what the user is looking for. \
More specific queries will yield more precise results.";
/// The default description of the searchInIndex `index` parameter tool provided to OpenAI.
const DEFAULT_SEARCH_IN_INDEX_INDEX_PARAMETER_TOOL_DESCRIPTION: &str =
"The name of the index to search within. An index is a collection of documents organized for search. \
Selecting the right index ensures the most relevant results for the user query";
impl Default for ChatSettings {
fn default() -> Self {
ChatSettings {
source: "openai".to_string(),
base_api: None,
api_key: None,
prompts: ChatPrompts {
system: DEFAULT_SYSTEM_MESSAGE.to_string(),
search_description: DEFAULT_SEARCH_IN_INDEX_TOOL_DESCRIPTION.to_string(),
search_q_param: DEFAULT_SEARCH_IN_INDEX_Q_PARAMETER_TOOL_DESCRIPTION.to_string(),
search_index_uid_param: DEFAULT_SEARCH_IN_INDEX_INDEX_PARAMETER_TOOL_DESCRIPTION
.to_string(),
pre_query: "".to_string(),
},
indexes: BTreeMap::new(),
}
}
}

View File

@@ -0,0 +1 @@
pub mod chat;

View File

@@ -820,6 +820,22 @@ async fn list_api_keys() {
"createdAt": "[ignored]",
"updatedAt": "[ignored]"
},
{
"name": "Default Chat API Key",
"description": "Use it to chat and search from the frontend",
"key": "[ignored]",
"uid": "[ignored]",
"actions": [
"search",
"chat.get"
],
"indexes": [
"*"
],
"expiresAt": null,
"createdAt": "[ignored]",
"updatedAt": "[ignored]"
},
{
"name": "Default Search API Key",
"description": "Use it to search from the frontend",