From da063062740862fb3db369f2749c8089411811ee Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 2 Oct 2025 11:04:19 +0200 Subject: [PATCH] Add header-based metadata opt-in for search responses - Add Meili-Include-Metadata header constant - Modify perform_search to conditionally include metadata based on header - Modify perform_federated_search to conditionally include metadata based on header - Update all search routes to check for header and pass include_metadata parameter - Forward Meili-Include-Metadata header to remote requests for federated search - Ensure remote queries include primaryKey metadata when header is present --- .../meilisearch/src/routes/indexes/search.rs | 14 ++- crates/meilisearch/src/routes/multi_search.rs | 13 ++- .../meilisearch/src/search/federated/mod.rs | 2 +- .../src/search/federated/perform.rs | 88 +++++++++++-------- .../meilisearch/src/search/federated/proxy.rs | 18 +++- crates/meilisearch/src/search/mod.rs | 24 +++-- 6 files changed, 109 insertions(+), 50 deletions(-) diff --git a/crates/meilisearch/src/routes/indexes/search.rs b/crates/meilisearch/src/routes/indexes/search.rs index 445a3bb54..f8ee912b9 100644 --- a/crates/meilisearch/src/routes/indexes/search.rs +++ b/crates/meilisearch/src/routes/indexes/search.rs @@ -26,7 +26,7 @@ use crate::search::{ add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold, RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, - DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO, + DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO, INCLUDE_METADATA_HEADER, }; use crate::search_queue::SearchQueue; @@ -345,6 +345,11 @@ pub async fn search_with_url_query( search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?; let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors); let permit = search_queue.try_get_search_permit().await?; + let include_metadata = req + .headers() + .get(INCLUDE_METADATA_HEADER) + .is_some(); + let search_result = tokio::task::spawn_blocking(move || { perform_search( index_uid.to_string(), @@ -354,6 +359,7 @@ pub async fn search_with_url_query( retrieve_vector, index_scheduler.features(), request_uid, + include_metadata, ) }) .await; @@ -453,6 +459,11 @@ pub async fn search_with_post( search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?; let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors); + let include_metadata = req + .headers() + .get(INCLUDE_METADATA_HEADER) + .is_some(); + let permit = search_queue.try_get_search_permit().await?; let search_result = tokio::task::spawn_blocking(move || { perform_search( @@ -463,6 +474,7 @@ pub async fn search_with_post( retrieve_vectors, index_scheduler.features(), request_uid, + include_metadata, ) }) .await; diff --git a/crates/meilisearch/src/routes/multi_search.rs b/crates/meilisearch/src/routes/multi_search.rs index 15931644f..06a92971a 100644 --- a/crates/meilisearch/src/routes/multi_search.rs +++ b/crates/meilisearch/src/routes/multi_search.rs @@ -21,7 +21,7 @@ use crate::routes::indexes::search::search_kind; use crate::search::{ add_search_rules, perform_federated_search, perform_search, FederatedSearch, FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex, - PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, + PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, INCLUDE_METADATA_HEADER, }; use crate::search_queue::SearchQueue; @@ -202,6 +202,10 @@ pub async fn multi_search_with_post( .headers() .get(PROXY_SEARCH_HEADER) .is_some_and(|value| value.as_bytes() == PROXY_SEARCH_HEADER_VALUE.as_bytes()); + let include_metadata = req + .headers() + .get(INCLUDE_METADATA_HEADER) + .is_some(); let search_result = perform_federated_search( &index_scheduler, queries, @@ -209,6 +213,7 @@ pub async fn multi_search_with_post( features, is_proxy, request_uid, + include_metadata, ) .await; permit.drop().await; @@ -228,6 +233,11 @@ pub async fn multi_search_with_post( HttpResponse::Ok().json(search_result?) } None => { + let include_metadata = req + .headers() + .get(INCLUDE_METADATA_HEADER) + .is_some(); + // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only, // so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code // changes. @@ -286,6 +296,7 @@ pub async fn multi_search_with_post( retrieve_vector, features, request_uid, + include_metadata, ) }) .await diff --git a/crates/meilisearch/src/search/federated/mod.rs b/crates/meilisearch/src/search/federated/mod.rs index 40204c591..2b20f64cb 100644 --- a/crates/meilisearch/src/search/federated/mod.rs +++ b/crates/meilisearch/src/search/federated/mod.rs @@ -4,7 +4,7 @@ mod types; mod weighted_scores; pub use perform::perform_federated_search; -pub use proxy::{PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE}; +pub use proxy::{PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, INCLUDE_METADATA_HEADER}; pub use types::{ FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets, }; diff --git a/crates/meilisearch/src/search/federated/perform.rs b/crates/meilisearch/src/search/federated/perform.rs index 8aee62df6..5ca9f7148 100644 --- a/crates/meilisearch/src/search/federated/perform.rs +++ b/crates/meilisearch/src/search/federated/perform.rs @@ -42,6 +42,7 @@ pub async fn perform_federated_search( features: RoFeatures, is_proxy: bool, request_uid: Uuid, + include_metadata: bool, ) -> Result { if is_proxy { features.check_network("Performing a remote federated search")?; @@ -80,6 +81,7 @@ pub async fn perform_federated_search( partitioned_queries.remote_queries_by_host.clone(), &federation, deadline, + include_metadata, ); // 2.2. concurrently execute local queries @@ -127,48 +129,54 @@ pub async fn perform_federated_search( merge_metadata(&mut results_by_index, &remote_results); // 3.1.1. Build metadata in the same order as the original queries - let mut query_metadata = Vec::new(); + let query_metadata = if include_metadata { + let mut query_metadata = Vec::new(); - // Create a map of remote results by index_uid for quick lookup - let mut remote_results_by_index = std::collections::BTreeMap::new(); - for remote_result in &remote_results { - if let Some(remote_metadata) = &remote_result.metadata { - for remote_meta in remote_metadata { - remote_results_by_index.insert(remote_meta.index_uid.clone(), remote_meta.clone()); + // Create a map of remote results by index_uid for quick lookup + let mut remote_results_by_index = std::collections::BTreeMap::new(); + for remote_result in &remote_results { + if let Some(remote_metadata) = &remote_result.metadata { + for remote_meta in remote_metadata { + remote_results_by_index + .insert(remote_meta.index_uid.clone(), remote_meta.clone()); + } } } - } - // Build metadata in the same order as the original queries - for original_query in original_queries { - let query_uid = Uuid::now_v7(); - let index_uid = original_query.index_uid.to_string(); + // Build metadata in the same order as the original queries + for original_query in original_queries { + let query_uid = Uuid::now_v7(); + let index_uid = original_query.index_uid.to_string(); - // Determine if this is a remote query - let (_, _, federation_options) = original_query.into_index_query_federation(); - let remote = federation_options.and_then(|options| options.remote); + // Determine if this is a remote query + let (_, _, federation_options) = original_query.into_index_query_federation(); + let remote = federation_options.and_then(|options| options.remote); - // Get primary key for this index - let mut primary_key = None; + // Get primary key for this index + let mut primary_key = None; - if remote.is_some() { - // For remote queries, try to get primary key from remote results - if let Some(remote_meta) = remote_results_by_index.get(&index_uid) { - primary_key = remote_meta.primary_key.clone(); + if remote.is_some() { + // For remote queries, try to get primary key from remote results + if let Some(remote_meta) = remote_results_by_index.get(&index_uid) { + primary_key = remote_meta.primary_key.clone(); + } + } else { + // For local queries, get primary key from local index + primary_key = index_scheduler.index(&index_uid).ok().and_then(|index| { + index.read_txn().ok().and_then(|rtxn| { + let pk = index.primary_key(&rtxn).ok().flatten().map(|pk| pk.to_string()); + drop(rtxn); + pk + }) + }); } - } else { - // For local queries, get primary key from local index - primary_key = index_scheduler.index(&index_uid).ok().and_then(|index| { - index.read_txn().ok().and_then(|rtxn| { - let pk = index.primary_key(&rtxn).ok().flatten().map(|pk| pk.to_string()); - drop(rtxn); - pk - }) - }); - } - query_metadata.push(SearchMetadata { query_uid, index_uid, primary_key, remote }); - } + query_metadata.push(SearchMetadata { query_uid, index_uid, primary_key, remote }); + } + Some(query_metadata) + } else { + None + }; // 3.2. merge hits let merged_hits: Vec<_> = merge_index_global_results(results_by_index, &mut remote_results) @@ -231,7 +239,7 @@ pub async fn perform_federated_search( facets_by_index, remote_errors: partitioned_queries.has_remote.then_some(remote_errors), request_uid: Some(request_uid), - metadata: Some(query_metadata), + metadata: query_metadata, }) } @@ -630,7 +638,12 @@ struct RemoteSearch { } impl RemoteSearch { - fn start(queries: RemoteQueriesByHost, federation: &Federation, deadline: Instant) -> Self { + fn start( + queries: RemoteQueriesByHost, + federation: &Federation, + deadline: Instant, + include_metadata: bool, + ) -> Self { let mut in_flight_remote_queries = BTreeMap::new(); let client = reqwest::ClientBuilder::new() .connect_timeout(std::time::Duration::from_millis(200)) @@ -650,7 +663,10 @@ impl RemoteSearch { // never merge distant facets proxy_federation.merge_facets = None; let params = params.clone(); - async move { proxy_search(&node, queries, proxy_federation, ¶ms).await } + async move { + proxy_search(&node, queries, proxy_federation, ¶ms, include_metadata) + .await + } }), ); } diff --git a/crates/meilisearch/src/search/federated/proxy.rs b/crates/meilisearch/src/search/federated/proxy.rs index 67594f645..26387030c 100644 --- a/crates/meilisearch/src/search/federated/proxy.rs +++ b/crates/meilisearch/src/search/federated/proxy.rs @@ -11,6 +11,7 @@ use crate::search::SearchQueryWithIndex; pub const PROXY_SEARCH_HEADER: &str = "Meili-Proxy-Search"; pub const PROXY_SEARCH_HEADER_VALUE: &str = "true"; +pub const INCLUDE_METADATA_HEADER: &str = "Meili-Include-Metadata"; mod error { use meilisearch_types::error::ResponseError; @@ -98,6 +99,7 @@ pub async fn proxy_search( queries: Vec, federation: Federation, params: &ProxySearchParams, + include_metadata: bool, ) -> Result { let url = format!("{}/multi-search", node.url); @@ -119,7 +121,16 @@ pub async fn proxy_search( }; for i in 0..params.try_count { - match try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline).await { + match try_proxy_search( + &url, + search_api_key, + &federated, + ¶ms.client, + deadline, + include_metadata, + ) + .await + { Ok(response) => return Ok(response), Err(retry) => { let duration = retry.into_duration(i)?; @@ -127,7 +138,7 @@ pub async fn proxy_search( } } } - try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline) + try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline, include_metadata) .await .map_err(Retry::into_error) } @@ -138,6 +149,7 @@ async fn try_proxy_search( federated: &FederatedSearch, client: &Client, deadline: std::time::Instant, + include_metadata: bool, ) -> Result { let timeout = deadline.saturating_duration_since(std::time::Instant::now()); @@ -148,6 +160,8 @@ async fn try_proxy_search( request }; let request = request.header(PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE); + let request = + if include_metadata { request.header(INCLUDE_METADATA_HEADER, "true") } else { request }; let response = request.send().await; let response = match response { diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs index 28797603f..87d6e5195 100644 --- a/crates/meilisearch/src/search/mod.rs +++ b/crates/meilisearch/src/search/mod.rs @@ -43,7 +43,7 @@ use crate::error::MeilisearchHttpError; mod federated; pub use federated::{ perform_federated_search, FederatedSearch, FederatedSearchResult, Federation, - FederationOptions, MergeFacets, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, + FederationOptions, MergeFacets, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, INCLUDE_METADATA_HEADER, }; mod ranking_rules; @@ -1146,6 +1146,7 @@ pub fn perform_search( retrieve_vectors: RetrieveVectors, features: RoFeatures, request_uid: Uuid, + include_metadata: bool, ) -> Result { let before_search = Instant::now(); let index_uid_for_metadata = index_uid.clone(); @@ -1171,8 +1172,18 @@ pub fn perform_search( semantic_hit_count, ) = search_from_kind(index_uid.clone(), search_kind, search)?; - let query_uid = Uuid::now_v7(); - let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string()); + let metadata = if include_metadata { + let query_uid = Uuid::now_v7(); + let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string()); + Some(SearchMetadata { + query_uid, + index_uid: index_uid_for_metadata, + primary_key, + remote: None, // Local searches don't have a remote + }) + } else { + None + }; let SearchQuery { q, @@ -1267,12 +1278,7 @@ pub fn perform_search( used_negative_operator, semantic_hit_count, request_uid: Some(request_uid), - metadata: Some(SearchMetadata { - query_uid, - index_uid: index_uid_for_metadata, - primary_key, - remote: None, // Local searches don't have a remote - }), + metadata, }; Ok(result) }