From 662c5d98715c824200d74f8006fdf623d011af22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 24 Jun 2025 17:33:24 +0200 Subject: [PATCH 1/7] Introduce filters in the chat completions --- crates/meilisearch-types/src/error.rs | 1 + crates/meilisearch-types/src/features.rs | 3 +++ .../src/routes/chats/chat_completions.rs | 4 ++++ crates/meilisearch/src/routes/chats/settings.rs | 13 +++++++++++-- 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/crates/meilisearch-types/src/error.rs b/crates/meilisearch-types/src/error.rs index c57e2d042..fb5bd4f18 100644 --- a/crates/meilisearch-types/src/error.rs +++ b/crates/meilisearch-types/src/error.rs @@ -415,6 +415,7 @@ InvalidChatCompletionPrompts , InvalidRequest , BAD_REQU InvalidChatCompletionSystemPrompt , InvalidRequest , BAD_REQUEST ; InvalidChatCompletionSearchDescriptionPrompt , InvalidRequest , BAD_REQUEST ; InvalidChatCompletionSearchQueryParamPrompt , InvalidRequest , BAD_REQUEST ; +InvalidChatCompletionSearchFilterParamPrompt , InvalidRequest , BAD_REQUEST ; InvalidChatCompletionSearchIndexUidParamPrompt , InvalidRequest , BAD_REQUEST ; InvalidChatCompletionPreQueryPrompt , InvalidRequest , BAD_REQUEST } diff --git a/crates/meilisearch-types/src/features.rs b/crates/meilisearch-types/src/features.rs index 3c78035e8..0fabec32f 100644 --- a/crates/meilisearch-types/src/features.rs +++ b/crates/meilisearch-types/src/features.rs @@ -8,6 +8,7 @@ pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research pub const DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT: &str = "Search the database for relevant JSON documents using an optional query."; pub const DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT: &str = "The search query string used to find relevant documents in the index. This should contain keywords or phrases that best represent what the user is looking for. More specific queries will yield more precise results."; +pub const DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT: &str = "The search filter string used to find relevant documents in the index. It supports parentheses, AND, OR, NOT, EXISTS, IS EMPTY, IS NOT EMPTY. Here is an example: \"price > 100 AND category = 'electronics'\""; pub const DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT: &str = "The name of the index to search within. An index is a collection of documents organized for search. Selecting the right index ensures the most relevant results for the user query."; #[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)] @@ -164,6 +165,7 @@ pub struct ChatCompletionPrompts { pub system: String, pub search_description: String, pub search_q_param: String, + pub search_filter_param: String, pub search_index_uid_param: String, } @@ -173,6 +175,7 @@ impl Default for ChatCompletionPrompts { system: DEFAULT_CHAT_SYSTEM_PROMPT.to_string(), search_description: DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT.to_string(), search_q_param: DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT.to_string(), + search_filter_param: DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT.to_string(), search_index_uid_param: DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT.to_string(), } } diff --git a/crates/meilisearch/src/routes/chats/chat_completions.rs b/crates/meilisearch/src/routes/chats/chat_completions.rs index 4f7087ae8..161a1b851 100644 --- a/crates/meilisearch/src/routes/chats/chat_completions.rs +++ b/crates/meilisearch/src/routes/chats/chat_completions.rs @@ -203,6 +203,10 @@ fn setup_search_tool( // "type": ["string", "null"], "type": "string", "description": prompts.search_q_param, + }, + "filter": { + "type": "string", + "description": prompts.search_filter_param, } }, "required": ["index_uid", "q"], diff --git a/crates/meilisearch/src/routes/chats/settings.rs b/crates/meilisearch/src/routes/chats/settings.rs index 38eb0d3c5..44c099c14 100644 --- a/crates/meilisearch/src/routes/chats/settings.rs +++ b/crates/meilisearch/src/routes/chats/settings.rs @@ -8,8 +8,8 @@ use meilisearch_types::error::{Code, ResponseError}; use meilisearch_types::features::{ ChatCompletionPrompts as DbChatCompletionPrompts, ChatCompletionSettings, ChatCompletionSource as DbChatCompletionSource, DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT, - DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT, DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT, - DEFAULT_CHAT_SYSTEM_PROMPT, + DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT, DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT, + DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT, DEFAULT_CHAT_SYSTEM_PROMPT, }; use meilisearch_types::keys::actions; use meilisearch_types::milli::update::Setting; @@ -84,6 +84,11 @@ async fn patch_settings( Setting::Reset => DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT.to_string(), Setting::NotSet => old_settings.prompts.search_q_param, }, + search_filter_param: match new_prompts.search_filter_param { + Setting::Set(new_description) => new_description, + Setting::Reset => DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT.to_string(), + Setting::NotSet => old_settings.prompts.search_filter_param, + }, search_index_uid_param: match new_prompts.search_index_uid_param { Setting::Set(new_description) => new_description, Setting::Reset => DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT.to_string(), @@ -252,6 +257,10 @@ pub struct ChatPrompts { #[schema(value_type = Option, example = json!("This is query parameter..."))] pub search_q_param: Setting, #[serde(default)] + #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = Option, example = json!("This is filter parameter..."))] + pub search_filter_param: Setting, + #[serde(default)] #[deserr(default, error = DeserrJsonError)] #[schema(value_type = Option, example = json!("This is index you want to search in..."))] pub search_index_uid_param: Setting, From 1a9dbd364eff68b17a43df1892bcdb3d27569a5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 24 Jun 2025 18:43:09 +0200 Subject: [PATCH 2/7] Fix some issues --- crates/meilisearch-types/src/features.rs | 2 +- .../src/routes/chats/chat_completions.rs | 39 ++++++++++++------- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/crates/meilisearch-types/src/features.rs b/crates/meilisearch-types/src/features.rs index 0fabec32f..2fe4f7d43 100644 --- a/crates/meilisearch-types/src/features.rs +++ b/crates/meilisearch-types/src/features.rs @@ -8,7 +8,7 @@ pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research pub const DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT: &str = "Search the database for relevant JSON documents using an optional query."; pub const DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT: &str = "The search query string used to find relevant documents in the index. This should contain keywords or phrases that best represent what the user is looking for. More specific queries will yield more precise results."; -pub const DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT: &str = "The search filter string used to find relevant documents in the index. It supports parentheses, AND, OR, NOT, EXISTS, IS EMPTY, IS NOT EMPTY. Here is an example: \"price > 100 AND category = 'electronics'\""; +pub const DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT: &str = "The search filter string used to find relevant documents in the index. It supports parentheses, `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox`. Here is an example: \"price > 100 AND category = 'electronics'\""; pub const DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT: &str = "The name of the index to search within. An index is a collection of documents organized for search. Selecting the right index ensures the most relevant results for the user query."; #[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)] diff --git a/crates/meilisearch/src/routes/chats/chat_completions.rs b/crates/meilisearch/src/routes/chats/chat_completions.rs index 161a1b851..830efa844 100644 --- a/crates/meilisearch/src/routes/chats/chat_completions.rs +++ b/crates/meilisearch/src/routes/chats/chat_completions.rs @@ -209,7 +209,7 @@ fn setup_search_tool( "description": prompts.search_filter_param, } }, - "required": ["index_uid", "q"], + "required": ["index_uid", "q", "filter"], "additionalProperties": false, })) .strict(true) @@ -251,11 +251,16 @@ async fn process_search_request( auth_token: &str, index_uid: String, q: Option, + filter: Option, ) -> Result<(Index, Vec, String), ResponseError> { let index = index_scheduler.index(&index_uid)?; let rtxn = index.static_read_txn()?; let ChatConfig { description: _, prompt: _, search_parameters } = index.chat_config(&rtxn)?; - let mut query = SearchQuery { q, ..SearchQuery::from(search_parameters) }; + let mut query = SearchQuery { + q, + filter: filter.map(serde_json::Value::from), + ..SearchQuery::from(search_parameters) + }; let auth_filter = ActionPolicy::<{ actions::SEARCH }>::authenticate( auth_ctrl, auth_token, @@ -399,16 +404,19 @@ async fn non_streamed_chat( for call in meili_calls { let result = match serde_json::from_str(&call.function.arguments) { - Ok(SearchInIndexParameters { index_uid, q }) => process_search_request( - &index_scheduler, - auth_ctrl.clone(), - &search_queue, - auth_token, - index_uid, - q, - ) - .await - .map_err(|e| e.to_string()), + Ok(SearchInIndexParameters { index_uid, q, filter }) => { + process_search_request( + &index_scheduler, + auth_ctrl.clone(), + &search_queue, + auth_token, + index_uid, + q, + filter, + ) + .await + .map_err(|e| e.to_string()) + } Err(err) => Err(err.to_string()), }; @@ -722,14 +730,15 @@ async fn handle_meili_tools( let mut error = None; - let result = match serde_json::from_str(&call.function.arguments) { - Ok(SearchInIndexParameters { index_uid, q }) => match process_search_request( + let answer = match serde_json::from_str(&call.function.arguments) { + Ok(SearchInIndexParameters { index_uid, q, filter }) => match process_search_request( index_scheduler, auth_ctrl.clone(), search_queue, auth_token, index_uid, q, + filter, ) .await { @@ -805,4 +814,6 @@ struct SearchInIndexParameters { index_uid: String, /// The query parameter to use. q: Option, + /// The filter parameter to use. + filter: Option, } From 34f2ab7093874ccf6617c5ff18c7e0a21e66afb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 26 Jun 2025 12:00:09 +0200 Subject: [PATCH 3/7] WIP report search errors to the LLM --- .../src/routes/chats/chat_completions.rs | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/crates/meilisearch/src/routes/chats/chat_completions.rs b/crates/meilisearch/src/routes/chats/chat_completions.rs index 830efa844..1610419ed 100644 --- a/crates/meilisearch/src/routes/chats/chat_completions.rs +++ b/crates/meilisearch/src/routes/chats/chat_completions.rs @@ -289,14 +289,23 @@ async fn process_search_request( let (search, _is_finite_pagination, _max_total_hits, _offset) = prepare_search(&index_cloned, &rtxn, &query, &search_kind, time_budget, features)?; - search_from_kind(index_uid, search_kind, search) - .map(|(search_results, _)| (rtxn, search_results)) - .map_err(ResponseError::from) + match search_from_kind(index_uid, search_kind, search) { + Ok((search_results, _)) => Ok((rtxn, Ok(search_results))), + Err(MeilisearchHttpError::Milli { + error: meilisearch_types::milli::Error::UserError(user_error), + index_name: _, + }) => Ok((rtxn, Err(user_error))), + Err(err) => Err(ResponseError::from(err)), + } }) .await; permit.drop().await; - let output = output?; + let output = match output? { + Ok((rtxn, Ok(search_results))) => Ok((rtxn, search_results)), + Ok((_rtxn, Err(error))) => return Ok((index, Vec::new(), error.to_string())), + Err(err) => Err(ResponseError::from(err)), + }; let mut documents = Vec::new(); if let Ok((ref rtxn, ref search_result)) = output { MEILISEARCH_CHAT_SEARCH_REQUESTS.with_label_values(&["internal"]).inc(); @@ -730,7 +739,7 @@ async fn handle_meili_tools( let mut error = None; - let answer = match serde_json::from_str(&call.function.arguments) { + let result = match serde_json::from_str(&call.function.arguments) { Ok(SearchInIndexParameters { index_uid, q, filter }) => match process_search_request( index_scheduler, auth_ctrl.clone(), From e654f662230448662b4ee14aaf7d75c1550ae85b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 1 Jul 2025 16:24:02 +0200 Subject: [PATCH 4/7] Support filtering --- crates/meilisearch-types/src/features.rs | 4 +- .../src/routes/chats/chat_completions.rs | 53 +++++++++++++++++-- crates/milli/src/attribute_patterns.rs | 6 +++ 3 files changed, 58 insertions(+), 5 deletions(-) diff --git a/crates/meilisearch-types/src/features.rs b/crates/meilisearch-types/src/features.rs index 2fe4f7d43..006f39d15 100644 --- a/crates/meilisearch-types/src/features.rs +++ b/crates/meilisearch-types/src/features.rs @@ -4,11 +4,11 @@ use serde::{Deserialize, Serialize}; use crate::error::{Code, ResponseError}; -pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search."; +pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search. Meilisearch doesn't use the colon (:) syntax to filter but rather the equal (=) one. Separate filters from query and keep the q parameter empty if needed. Same for the filter parameter: keep it empty if need be. If you need to find documents that CONTAINS keywords simply put the keywords in the q parameter do no use a filter for this purpose. Whenever you get an error, read the error message and fix your error. "; pub const DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT: &str = "Search the database for relevant JSON documents using an optional query."; pub const DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT: &str = "The search query string used to find relevant documents in the index. This should contain keywords or phrases that best represent what the user is looking for. More specific queries will yield more precise results."; -pub const DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT: &str = "The search filter string used to find relevant documents in the index. It supports parentheses, `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox`. Here is an example: \"price > 100 AND category = 'electronics'\""; +pub const DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT: &str = "The search filter string used to find relevant documents in the index. It supports parentheses, `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox`. Here is an example: \"price > 100 AND category = 'electronics'\". The following is a list of fields that can be filtered on: "; pub const DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT: &str = "The name of the index to search within. An index is a collection of documents organized for search. Selecting the right index ensures the most relevant results for the user query."; #[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)] diff --git a/crates/meilisearch/src/routes/chats/chat_completions.rs b/crates/meilisearch/src/routes/chats/chat_completions.rs index 1610419ed..b879f85f8 100644 --- a/crates/meilisearch/src/routes/chats/chat_completions.rs +++ b/crates/meilisearch/src/routes/chats/chat_completions.rs @@ -27,9 +27,10 @@ use meilisearch_types::features::{ ChatCompletionPrompts as DbChatCompletionPrompts, ChatCompletionSource as DbChatCompletionSource, SystemRole, }; +use meilisearch_types::heed::RoTxn; use meilisearch_types::keys::actions; use meilisearch_types::milli::index::ChatConfig; -use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, TimeBudget}; +use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, OrderBy, TimeBudget}; use meilisearch_types::{Document, Index}; use serde::Deserialize; use serde_json::json; @@ -169,6 +170,7 @@ fn setup_search_tool( let mut index_uids = Vec::new(); let mut function_description = prompts.search_description.clone(); + let mut filter_description = prompts.search_filter_param.clone(); index_scheduler.try_for_each_index::<_, ()>(|name, index| { // Make sure to skip unauthorized indexes if !filters.is_index_authorized(name) { @@ -180,16 +182,22 @@ fn setup_search_tool( let index_description = chat_config.description; let _ = writeln!(&mut function_description, "\n\n - {name}: {index_description}\n"); index_uids.push(name.to_string()); + let facet_distributions = format_facet_distributions(&index, &rtxn, 10).unwrap(); // TODO do not unwrap + let _ = writeln!(&mut filter_description, "\n## Facet distributions of the {name} index"); + let _ = writeln!(&mut filter_description, "{facet_distributions}"); Ok(()) })?; + tracing::debug!("LLM function description: {function_description}"); + tracing::debug!("LLM filter description: {filter_description}"); + let tool = ChatCompletionToolArgs::default() .r#type(ChatCompletionToolType::Function) .function( FunctionObjectArgs::default() .name(MEILI_SEARCH_IN_INDEX_FUNCTION_NAME) - .description(&function_description) + .description(function_description) .parameters(json!({ "type": "object", "properties": { @@ -206,7 +214,7 @@ fn setup_search_tool( }, "filter": { "type": "string", - "description": prompts.search_filter_param, + "description": filter_description, } }, "required": ["index_uid", "q", "filter"], @@ -261,6 +269,9 @@ async fn process_search_request( filter: filter.map(serde_json::Value::from), ..SearchQuery::from(search_parameters) }; + + tracing::debug!("LLM query: {:?}", query); + let auth_filter = ActionPolicy::<{ actions::SEARCH }>::authenticate( auth_ctrl, auth_token, @@ -826,3 +837,39 @@ struct SearchInIndexParameters { /// The filter parameter to use. filter: Option, } + +fn format_facet_distributions( + index: &Index, + rtxn: &RoTxn, + max_values_per_facet: usize, +) -> meilisearch_types::milli::Result { + let universe = index.documents_ids(&rtxn)?; + let rules = index.filterable_attributes_rules(&rtxn)?; + let fields_ids_map = index.fields_ids_map(&rtxn)?; + let filterable_attributes = fields_ids_map + .names() + .filter(|name| rules.iter().any(|rule| rule.match_str(name).matches())) + .map(|name| (name, OrderBy::Count)); + let facets_distribution = index + .facets_distribution(&rtxn) + .max_values_per_facet(max_values_per_facet) + .candidates(universe) + .facets(filterable_attributes) + .execute()?; + + let mut output = String::new(); + for (facet_name, entries) in facets_distribution { + let _ = write!(&mut output, "{}: ", facet_name); + let total_entries = entries.len(); + for (i, (value, count)) in entries.into_iter().enumerate() { + let _ = if total_entries.saturating_sub(1) == i { + write!(&mut output, "{} ({}).", value, count) + } else { + write!(&mut output, "{} ({}), ", value, count) + }; + } + let _ = writeln!(&mut output); + } + + Ok(output) +} diff --git a/crates/milli/src/attribute_patterns.rs b/crates/milli/src/attribute_patterns.rs index 8da6942a3..d879cb2c3 100644 --- a/crates/milli/src/attribute_patterns.rs +++ b/crates/milli/src/attribute_patterns.rs @@ -130,6 +130,12 @@ pub enum PatternMatch { NoMatch, } +impl PatternMatch { + pub fn matches(&self) -> bool { + matches!(self, PatternMatch::Match) + } +} + #[cfg(test)] mod tests { use super::*; From d76dcc8998738dee3c072e1244a4668294f1cb3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 7 Jul 2025 11:40:52 +0200 Subject: [PATCH 5/7] Make clippy happy --- .../meilisearch/src/routes/chats/chat_completions.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/crates/meilisearch/src/routes/chats/chat_completions.rs b/crates/meilisearch/src/routes/chats/chat_completions.rs index b879f85f8..799f4c9f0 100644 --- a/crates/meilisearch/src/routes/chats/chat_completions.rs +++ b/crates/meilisearch/src/routes/chats/chat_completions.rs @@ -182,7 +182,7 @@ fn setup_search_tool( let index_description = chat_config.description; let _ = writeln!(&mut function_description, "\n\n - {name}: {index_description}\n"); index_uids.push(name.to_string()); - let facet_distributions = format_facet_distributions(&index, &rtxn, 10).unwrap(); // TODO do not unwrap + let facet_distributions = format_facet_distributions(index, &rtxn, 10).unwrap(); // TODO do not unwrap let _ = writeln!(&mut filter_description, "\n## Facet distributions of the {name} index"); let _ = writeln!(&mut filter_description, "{facet_distributions}"); @@ -315,7 +315,7 @@ async fn process_search_request( let output = match output? { Ok((rtxn, Ok(search_results))) => Ok((rtxn, search_results)), Ok((_rtxn, Err(error))) => return Ok((index, Vec::new(), error.to_string())), - Err(err) => Err(ResponseError::from(err)), + Err(err) => Err(err), }; let mut documents = Vec::new(); if let Ok((ref rtxn, ref search_result)) = output { @@ -843,15 +843,15 @@ fn format_facet_distributions( rtxn: &RoTxn, max_values_per_facet: usize, ) -> meilisearch_types::milli::Result { - let universe = index.documents_ids(&rtxn)?; - let rules = index.filterable_attributes_rules(&rtxn)?; - let fields_ids_map = index.fields_ids_map(&rtxn)?; + let universe = index.documents_ids(rtxn)?; + let rules = index.filterable_attributes_rules(rtxn)?; + let fields_ids_map = index.fields_ids_map(rtxn)?; let filterable_attributes = fields_ids_map .names() .filter(|name| rules.iter().any(|rule| rule.match_str(name).matches())) .map(|name| (name, OrderBy::Count)); let facets_distribution = index - .facets_distribution(&rtxn) + .facets_distribution(rtxn) .max_values_per_facet(max_values_per_facet) .candidates(universe) .facets(filterable_attributes) From 2a015ac3b821545b3e2fab8c7424a6584d536f17 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 15 Jul 2025 14:50:10 +0200 Subject: [PATCH 6/7] Implement basic few shot prompting to improve the query capabilities --- crates/meilisearch-types/src/features.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/meilisearch-types/src/features.rs b/crates/meilisearch-types/src/features.rs index 006f39d15..d1a6b61d8 100644 --- a/crates/meilisearch-types/src/features.rs +++ b/crates/meilisearch-types/src/features.rs @@ -6,7 +6,7 @@ use crate::error::{Code, ResponseError}; pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search. Meilisearch doesn't use the colon (:) syntax to filter but rather the equal (=) one. Separate filters from query and keep the q parameter empty if needed. Same for the filter parameter: keep it empty if need be. If you need to find documents that CONTAINS keywords simply put the keywords in the q parameter do no use a filter for this purpose. Whenever you get an error, read the error message and fix your error. "; pub const DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT: &str = - "Search the database for relevant JSON documents using an optional query."; + "Query: 'best story about Rust before 2018' with year: 2018 (334), 2020 (212), 2021 (210)\r\nlabel: analysis (500), golang (435), javascript (545)\r\ntype: story (760), link (989)\r\nvote: 300 (1), 298 (1), 278 (3)\r\n: {\"q\": \"\", \"filter\": \"category = Rust AND year < 2018 AND vote > 100\"}\r\nQuery: 'A black or green car that can go fast with red brakes'\r\nmaxspeed_kmh: 200 (100), 150 (300), 130 (330)\r\ncolor: black (300), grey (250), red (200), green (50)\r\nbrand: Toyota (300), Renault (100), Jeep (98), Ferrari (50)\r\n: {\"q\": \"red brakes\", \"filter\": \"maxspeed > 150 AND color IN ['black', green]\"}\r\nQuery: 'Superman movie released in 2018 or after' with year: 2018 (334), 2020 (212), 2021 (210)\r\ngenres: Drama (218), Comedy (220), Adventure (210), Fiction (196)\r\n: {\"q\":\"Superman\",\"filter\":\"genres IN [Adventure, Fiction] AND year >= 2018\"}"; pub const DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT: &str = "The search query string used to find relevant documents in the index. This should contain keywords or phrases that best represent what the user is looking for. More specific queries will yield more precise results."; pub const DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT: &str = "The search filter string used to find relevant documents in the index. It supports parentheses, `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox`. Here is an example: \"price > 100 AND category = 'electronics'\". The following is a list of fields that can be filtered on: "; pub const DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT: &str = "The name of the index to search within. An index is a collection of documents organized for search. Selecting the right index ensures the most relevant results for the user query."; From 0791506124aa7aeff7f0fc715bab661eabe54fcd Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 15 Jul 2025 17:10:45 +0200 Subject: [PATCH 7/7] Fix some proposals --- crates/meilisearch-types/src/features.rs | 2 +- .../meilisearch/src/routes/chats/chat_completions.rs | 10 +++++----- crates/milli/src/attribute_patterns.rs | 6 ------ 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/crates/meilisearch-types/src/features.rs b/crates/meilisearch-types/src/features.rs index d1a6b61d8..8878a8281 100644 --- a/crates/meilisearch-types/src/features.rs +++ b/crates/meilisearch-types/src/features.rs @@ -6,7 +6,7 @@ use crate::error::{Code, ResponseError}; pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search. Meilisearch doesn't use the colon (:) syntax to filter but rather the equal (=) one. Separate filters from query and keep the q parameter empty if needed. Same for the filter parameter: keep it empty if need be. If you need to find documents that CONTAINS keywords simply put the keywords in the q parameter do no use a filter for this purpose. Whenever you get an error, read the error message and fix your error. "; pub const DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT: &str = - "Query: 'best story about Rust before 2018' with year: 2018 (334), 2020 (212), 2021 (210)\r\nlabel: analysis (500), golang (435), javascript (545)\r\ntype: story (760), link (989)\r\nvote: 300 (1), 298 (1), 278 (3)\r\n: {\"q\": \"\", \"filter\": \"category = Rust AND year < 2018 AND vote > 100\"}\r\nQuery: 'A black or green car that can go fast with red brakes'\r\nmaxspeed_kmh: 200 (100), 150 (300), 130 (330)\r\ncolor: black (300), grey (250), red (200), green (50)\r\nbrand: Toyota (300), Renault (100), Jeep (98), Ferrari (50)\r\n: {\"q\": \"red brakes\", \"filter\": \"maxspeed > 150 AND color IN ['black', green]\"}\r\nQuery: 'Superman movie released in 2018 or after' with year: 2018 (334), 2020 (212), 2021 (210)\r\ngenres: Drama (218), Comedy (220), Adventure (210), Fiction (196)\r\n: {\"q\":\"Superman\",\"filter\":\"genres IN [Adventure, Fiction] AND year >= 2018\"}"; + "Query: 'best story about Rust before 2018' with year: 2018, 2020, 2021\nlabel: analysis, golang, javascript\ntype: story, link\nvote: 300, 298, 278\n: {\"q\": \"\", \"filter\": \"category = Rust AND type = story AND year < 2018 AND vote > 100\"}\nQuery: 'A black or green car that can go fast with red brakes' with maxspeed_kmh: 200, 150, 130\ncolor: black, grey, red, green\nbrand: Toyota, Renault, Jeep, Ferrari\n: {\"q\": \"red brakes\", \"filter\": \"maxspeed_kmh > 150 AND color IN ['black', green]\"}\nQuery: 'Superman movie released in 2018 or after' with year: 2018, 2020, 2021\ngenres: Drama, Comedy, Adventure, Fiction\n: {\"q\":\"Superman\",\"filter\":\"genres IN [Adventure, Fiction] AND year >= 2018\"}"; pub const DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT: &str = "The search query string used to find relevant documents in the index. This should contain keywords or phrases that best represent what the user is looking for. More specific queries will yield more precise results."; pub const DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT: &str = "The search filter string used to find relevant documents in the index. It supports parentheses, `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox`. Here is an example: \"price > 100 AND category = 'electronics'\". The following is a list of fields that can be filtered on: "; pub const DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT: &str = "The name of the index to search within. An index is a collection of documents organized for search. Selecting the right index ensures the most relevant results for the user query."; diff --git a/crates/meilisearch/src/routes/chats/chat_completions.rs b/crates/meilisearch/src/routes/chats/chat_completions.rs index 799f4c9f0..b636678f5 100644 --- a/crates/meilisearch/src/routes/chats/chat_completions.rs +++ b/crates/meilisearch/src/routes/chats/chat_completions.rs @@ -30,7 +30,7 @@ use meilisearch_types::features::{ use meilisearch_types::heed::RoTxn; use meilisearch_types::keys::actions; use meilisearch_types::milli::index::ChatConfig; -use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, OrderBy, TimeBudget}; +use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, OrderBy, PatternMatch, TimeBudget}; use meilisearch_types::{Document, Index}; use serde::Deserialize; use serde_json::json; @@ -848,7 +848,7 @@ fn format_facet_distributions( let fields_ids_map = index.fields_ids_map(rtxn)?; let filterable_attributes = fields_ids_map .names() - .filter(|name| rules.iter().any(|rule| rule.match_str(name).matches())) + .filter(|name| rules.iter().any(|rule| matches!(rule.match_str(name), PatternMatch::Match))) .map(|name| (name, OrderBy::Count)); let facets_distribution = index .facets_distribution(rtxn) @@ -861,11 +861,11 @@ fn format_facet_distributions( for (facet_name, entries) in facets_distribution { let _ = write!(&mut output, "{}: ", facet_name); let total_entries = entries.len(); - for (i, (value, count)) in entries.into_iter().enumerate() { + for (i, (value, _count)) in entries.into_iter().enumerate() { let _ = if total_entries.saturating_sub(1) == i { - write!(&mut output, "{} ({}).", value, count) + write!(&mut output, "{value}.") } else { - write!(&mut output, "{} ({}), ", value, count) + write!(&mut output, "{value}, ") }; } let _ = writeln!(&mut output); diff --git a/crates/milli/src/attribute_patterns.rs b/crates/milli/src/attribute_patterns.rs index d879cb2c3..8da6942a3 100644 --- a/crates/milli/src/attribute_patterns.rs +++ b/crates/milli/src/attribute_patterns.rs @@ -130,12 +130,6 @@ pub enum PatternMatch { NoMatch, } -impl PatternMatch { - pub fn matches(&self) -> bool { - matches!(self, PatternMatch::Match) - } -} - #[cfg(test)] mod tests { use super::*;