Support filtering

This commit is contained in:
Clément Renault
2025-07-01 16:24:02 +02:00
committed by Kerollmops
parent 34f2ab7093
commit e654f66223
3 changed files with 58 additions and 5 deletions

View File

@@ -4,11 +4,11 @@ use serde::{Deserialize, Serialize};
use crate::error::{Code, ResponseError}; use crate::error::{Code, ResponseError};
pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search."; pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search. Meilisearch doesn't use the colon (:) syntax to filter but rather the equal (=) one. Separate filters from query and keep the q parameter empty if needed. Same for the filter parameter: keep it empty if need be. If you need to find documents that CONTAINS keywords simply put the keywords in the q parameter do no use a filter for this purpose. Whenever you get an error, read the error message and fix your error. ";
pub const DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT: &str = pub const DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT: &str =
"Search the database for relevant JSON documents using an optional query."; "Search the database for relevant JSON documents using an optional query.";
pub const DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT: &str = "The search query string used to find relevant documents in the index. This should contain keywords or phrases that best represent what the user is looking for. More specific queries will yield more precise results."; pub const DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT: &str = "The search query string used to find relevant documents in the index. This should contain keywords or phrases that best represent what the user is looking for. More specific queries will yield more precise results.";
pub const DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT: &str = "The search filter string used to find relevant documents in the index. It supports parentheses, `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox`. Here is an example: \"price > 100 AND category = 'electronics'\""; pub const DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT: &str = "The search filter string used to find relevant documents in the index. It supports parentheses, `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox`. Here is an example: \"price > 100 AND category = 'electronics'\". The following is a list of fields that can be filtered on: ";
pub const DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT: &str = "The name of the index to search within. An index is a collection of documents organized for search. Selecting the right index ensures the most relevant results for the user query."; pub const DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT: &str = "The name of the index to search within. An index is a collection of documents organized for search. Selecting the right index ensures the most relevant results for the user query.";
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)] #[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)]

View File

@@ -27,9 +27,10 @@ use meilisearch_types::features::{
ChatCompletionPrompts as DbChatCompletionPrompts, ChatCompletionPrompts as DbChatCompletionPrompts,
ChatCompletionSource as DbChatCompletionSource, SystemRole, ChatCompletionSource as DbChatCompletionSource, SystemRole,
}; };
use meilisearch_types::heed::RoTxn;
use meilisearch_types::keys::actions; use meilisearch_types::keys::actions;
use meilisearch_types::milli::index::ChatConfig; use meilisearch_types::milli::index::ChatConfig;
use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, TimeBudget}; use meilisearch_types::milli::{all_obkv_to_json, obkv_to_json, OrderBy, TimeBudget};
use meilisearch_types::{Document, Index}; use meilisearch_types::{Document, Index};
use serde::Deserialize; use serde::Deserialize;
use serde_json::json; use serde_json::json;
@@ -169,6 +170,7 @@ fn setup_search_tool(
let mut index_uids = Vec::new(); let mut index_uids = Vec::new();
let mut function_description = prompts.search_description.clone(); let mut function_description = prompts.search_description.clone();
let mut filter_description = prompts.search_filter_param.clone();
index_scheduler.try_for_each_index::<_, ()>(|name, index| { index_scheduler.try_for_each_index::<_, ()>(|name, index| {
// Make sure to skip unauthorized indexes // Make sure to skip unauthorized indexes
if !filters.is_index_authorized(name) { if !filters.is_index_authorized(name) {
@@ -180,16 +182,22 @@ fn setup_search_tool(
let index_description = chat_config.description; let index_description = chat_config.description;
let _ = writeln!(&mut function_description, "\n\n - {name}: {index_description}\n"); let _ = writeln!(&mut function_description, "\n\n - {name}: {index_description}\n");
index_uids.push(name.to_string()); index_uids.push(name.to_string());
let facet_distributions = format_facet_distributions(&index, &rtxn, 10).unwrap(); // TODO do not unwrap
let _ = writeln!(&mut filter_description, "\n## Facet distributions of the {name} index");
let _ = writeln!(&mut filter_description, "{facet_distributions}");
Ok(()) Ok(())
})?; })?;
tracing::debug!("LLM function description: {function_description}");
tracing::debug!("LLM filter description: {filter_description}");
let tool = ChatCompletionToolArgs::default() let tool = ChatCompletionToolArgs::default()
.r#type(ChatCompletionToolType::Function) .r#type(ChatCompletionToolType::Function)
.function( .function(
FunctionObjectArgs::default() FunctionObjectArgs::default()
.name(MEILI_SEARCH_IN_INDEX_FUNCTION_NAME) .name(MEILI_SEARCH_IN_INDEX_FUNCTION_NAME)
.description(&function_description) .description(function_description)
.parameters(json!({ .parameters(json!({
"type": "object", "type": "object",
"properties": { "properties": {
@@ -206,7 +214,7 @@ fn setup_search_tool(
}, },
"filter": { "filter": {
"type": "string", "type": "string",
"description": prompts.search_filter_param, "description": filter_description,
} }
}, },
"required": ["index_uid", "q", "filter"], "required": ["index_uid", "q", "filter"],
@@ -261,6 +269,9 @@ async fn process_search_request(
filter: filter.map(serde_json::Value::from), filter: filter.map(serde_json::Value::from),
..SearchQuery::from(search_parameters) ..SearchQuery::from(search_parameters)
}; };
tracing::debug!("LLM query: {:?}", query);
let auth_filter = ActionPolicy::<{ actions::SEARCH }>::authenticate( let auth_filter = ActionPolicy::<{ actions::SEARCH }>::authenticate(
auth_ctrl, auth_ctrl,
auth_token, auth_token,
@@ -826,3 +837,39 @@ struct SearchInIndexParameters {
/// The filter parameter to use. /// The filter parameter to use.
filter: Option<String>, filter: Option<String>,
} }
fn format_facet_distributions(
index: &Index,
rtxn: &RoTxn,
max_values_per_facet: usize,
) -> meilisearch_types::milli::Result<String> {
let universe = index.documents_ids(&rtxn)?;
let rules = index.filterable_attributes_rules(&rtxn)?;
let fields_ids_map = index.fields_ids_map(&rtxn)?;
let filterable_attributes = fields_ids_map
.names()
.filter(|name| rules.iter().any(|rule| rule.match_str(name).matches()))
.map(|name| (name, OrderBy::Count));
let facets_distribution = index
.facets_distribution(&rtxn)
.max_values_per_facet(max_values_per_facet)
.candidates(universe)
.facets(filterable_attributes)
.execute()?;
let mut output = String::new();
for (facet_name, entries) in facets_distribution {
let _ = write!(&mut output, "{}: ", facet_name);
let total_entries = entries.len();
for (i, (value, count)) in entries.into_iter().enumerate() {
let _ = if total_entries.saturating_sub(1) == i {
write!(&mut output, "{} ({}).", value, count)
} else {
write!(&mut output, "{} ({}), ", value, count)
};
}
let _ = writeln!(&mut output);
}
Ok(output)
}

View File

@@ -130,6 +130,12 @@ pub enum PatternMatch {
NoMatch, NoMatch,
} }
impl PatternMatch {
pub fn matches(&self) -> bool {
matches!(self, PatternMatch::Match)
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;