mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-06 12:46:31 +00:00
Expose the number of tokens on the chat completions routes
This commit is contained in:
@ -21,6 +21,11 @@ lazy_static! {
|
|||||||
"Meilisearch number of search requests performed by the chat route itself"
|
"Meilisearch number of search requests performed by the chat route itself"
|
||||||
))
|
))
|
||||||
.expect("Can't create a metric");
|
.expect("Can't create a metric");
|
||||||
|
pub static ref MEILISEARCH_CHAT_TOKENS_USAGE: IntCounterVec = register_int_counter_vec!(
|
||||||
|
opts!("meilisearch_chat_tokens_usage", "Meilisearch Chat Tokens Usage"),
|
||||||
|
&["chat", "model", "type"]
|
||||||
|
)
|
||||||
|
.expect("Can't create a metric");
|
||||||
pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge =
|
pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge =
|
||||||
register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes"))
|
register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes"))
|
||||||
.expect("Can't create a metric");
|
.expect("Can't create a metric");
|
||||||
|
@ -13,9 +13,9 @@ use async_openai::types::{
|
|||||||
ChatCompletionRequestDeveloperMessageContent, ChatCompletionRequestMessage,
|
ChatCompletionRequestDeveloperMessageContent, ChatCompletionRequestMessage,
|
||||||
ChatCompletionRequestSystemMessage, ChatCompletionRequestSystemMessageContent,
|
ChatCompletionRequestSystemMessage, ChatCompletionRequestSystemMessageContent,
|
||||||
ChatCompletionRequestToolMessage, ChatCompletionRequestToolMessageContent,
|
ChatCompletionRequestToolMessage, ChatCompletionRequestToolMessageContent,
|
||||||
ChatCompletionStreamResponseDelta, ChatCompletionToolArgs, ChatCompletionToolType,
|
ChatCompletionStreamOptions, ChatCompletionStreamResponseDelta, ChatCompletionToolArgs,
|
||||||
CreateChatCompletionRequest, CreateChatCompletionStreamResponse, FinishReason, FunctionCall,
|
ChatCompletionToolType, CreateChatCompletionRequest, CreateChatCompletionStreamResponse,
|
||||||
FunctionCallStream, FunctionObjectArgs,
|
FinishReason, FunctionCall, FunctionCallStream, FunctionObjectArgs,
|
||||||
};
|
};
|
||||||
use async_openai::Client;
|
use async_openai::Client;
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
@ -49,7 +49,8 @@ use crate::error::MeilisearchHttpError;
|
|||||||
use crate::extractors::authentication::policies::ActionPolicy;
|
use crate::extractors::authentication::policies::ActionPolicy;
|
||||||
use crate::extractors::authentication::{extract_token_from_request, GuardedData, Policy as _};
|
use crate::extractors::authentication::{extract_token_from_request, GuardedData, Policy as _};
|
||||||
use crate::metrics::{
|
use crate::metrics::{
|
||||||
MEILISEARCH_CHAT_INTERNAL_SEARCH_REQUESTS, MEILISEARCH_DEGRADED_SEARCH_REQUESTS,
|
MEILISEARCH_CHAT_INTERNAL_SEARCH_REQUESTS, MEILISEARCH_CHAT_TOKENS_USAGE,
|
||||||
|
MEILISEARCH_DEGRADED_SEARCH_REQUESTS,
|
||||||
};
|
};
|
||||||
use crate::routes::chats::utils::SseEventSender;
|
use crate::routes::chats::utils::SseEventSender;
|
||||||
use crate::routes::indexes::search::search_kind;
|
use crate::routes::indexes::search::search_kind;
|
||||||
@ -490,6 +491,7 @@ async fn streamed_chat(
|
|||||||
|
|
||||||
let (tx, rx) = tokio::sync::mpsc::channel(10);
|
let (tx, rx) = tokio::sync::mpsc::channel(10);
|
||||||
let tx = SseEventSender::new(tx);
|
let tx = SseEventSender::new(tx);
|
||||||
|
let workspace_uid = workspace_uid.to_string();
|
||||||
let _join_handle = Handle::current().spawn(async move {
|
let _join_handle = Handle::current().spawn(async move {
|
||||||
let client = Client::with_config(config.clone());
|
let client = Client::with_config(config.clone());
|
||||||
let mut global_tool_calls = HashMap::<u32, Call>::new();
|
let mut global_tool_calls = HashMap::<u32, Call>::new();
|
||||||
@ -499,6 +501,7 @@ async fn streamed_chat(
|
|||||||
let output = run_conversation(
|
let output = run_conversation(
|
||||||
&index_scheduler,
|
&index_scheduler,
|
||||||
&auth_ctrl,
|
&auth_ctrl,
|
||||||
|
&workspace_uid,
|
||||||
&search_queue,
|
&search_queue,
|
||||||
&auth_token,
|
&auth_token,
|
||||||
&client,
|
&client,
|
||||||
@ -536,6 +539,7 @@ async fn run_conversation<C: async_openai::config::Config>(
|
|||||||
Data<IndexScheduler>,
|
Data<IndexScheduler>,
|
||||||
>,
|
>,
|
||||||
auth_ctrl: &web::Data<AuthController>,
|
auth_ctrl: &web::Data<AuthController>,
|
||||||
|
workspace_uid: &str,
|
||||||
search_queue: &web::Data<SearchQueue>,
|
search_queue: &web::Data<SearchQueue>,
|
||||||
auth_token: &str,
|
auth_token: &str,
|
||||||
client: &Client<C>,
|
client: &Client<C>,
|
||||||
@ -546,12 +550,33 @@ async fn run_conversation<C: async_openai::config::Config>(
|
|||||||
function_support: FunctionSupport,
|
function_support: FunctionSupport,
|
||||||
) -> Result<ControlFlow<Option<FinishReason>, ()>, SendError<Event>> {
|
) -> Result<ControlFlow<Option<FinishReason>, ()>, SendError<Event>> {
|
||||||
let mut finish_reason = None;
|
let mut finish_reason = None;
|
||||||
|
chat_completion.stream_options = Some(ChatCompletionStreamOptions { include_usage: true });
|
||||||
// safety: unwrap: can only happens if `stream` was set to `false`
|
// safety: unwrap: can only happens if `stream` was set to `false`
|
||||||
let mut response = client.chat().create_stream(chat_completion.clone()).await.unwrap();
|
let mut response = client.chat().create_stream(chat_completion.clone()).await.unwrap();
|
||||||
while let Some(result) = response.next().await {
|
while let Some(result) = response.next().await {
|
||||||
match result {
|
match result {
|
||||||
Ok(resp) => {
|
Ok(resp) => {
|
||||||
let choice = &resp.choices[0];
|
let choice = match resp.choices.get(0) {
|
||||||
|
Some(choice) => choice,
|
||||||
|
None => {
|
||||||
|
if let Some(usage) = resp.usage.as_ref() {
|
||||||
|
for (r#type, value) in &[
|
||||||
|
("prompt", usage.prompt_tokens),
|
||||||
|
("completion", usage.completion_tokens),
|
||||||
|
("total", usage.total_tokens),
|
||||||
|
] {
|
||||||
|
MEILISEARCH_CHAT_TOKENS_USAGE
|
||||||
|
.with_label_values(&[
|
||||||
|
workspace_uid,
|
||||||
|
&chat_completion.model,
|
||||||
|
r#type,
|
||||||
|
])
|
||||||
|
.inc_by(*value as u64);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
};
|
||||||
finish_reason = choice.finish_reason;
|
finish_reason = choice.finish_reason;
|
||||||
|
|
||||||
let ChatCompletionStreamResponseDelta { ref tool_calls, .. } = &choice.delta;
|
let ChatCompletionStreamResponseDelta { ref tool_calls, .. } = &choice.delta;
|
||||||
|
Reference in New Issue
Block a user