Add header-based metadata opt-in for search responses

- Add Meili-Include-Metadata header constant
- Modify perform_search to conditionally include metadata based on header
- Modify perform_federated_search to conditionally include metadata based on header
- Update all search routes to check for header and pass include_metadata parameter
- Forward Meili-Include-Metadata header to remote requests for federated search
- Ensure remote queries include primaryKey metadata when header is present
This commit is contained in:
ManyTheFish
2025-10-02 11:04:19 +02:00
parent b93b803a2e
commit da06306274
6 changed files with 109 additions and 50 deletions

View File

@@ -26,7 +26,7 @@ use crate::search::{
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold, add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH, RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH,
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO, INCLUDE_METADATA_HEADER,
}; };
use crate::search_queue::SearchQueue; use crate::search_queue::SearchQueue;
@@ -345,6 +345,11 @@ pub async fn search_with_url_query(
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?; search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors); let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
let permit = search_queue.try_get_search_permit().await?; let permit = search_queue.try_get_search_permit().await?;
let include_metadata = req
.headers()
.get(INCLUDE_METADATA_HEADER)
.is_some();
let search_result = tokio::task::spawn_blocking(move || { let search_result = tokio::task::spawn_blocking(move || {
perform_search( perform_search(
index_uid.to_string(), index_uid.to_string(),
@@ -354,6 +359,7 @@ pub async fn search_with_url_query(
retrieve_vector, retrieve_vector,
index_scheduler.features(), index_scheduler.features(),
request_uid, request_uid,
include_metadata,
) )
}) })
.await; .await;
@@ -453,6 +459,11 @@ pub async fn search_with_post(
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?; search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors); let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
let include_metadata = req
.headers()
.get(INCLUDE_METADATA_HEADER)
.is_some();
let permit = search_queue.try_get_search_permit().await?; let permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || { let search_result = tokio::task::spawn_blocking(move || {
perform_search( perform_search(
@@ -463,6 +474,7 @@ pub async fn search_with_post(
retrieve_vectors, retrieve_vectors,
index_scheduler.features(), index_scheduler.features(),
request_uid, request_uid,
include_metadata,
) )
}) })
.await; .await;

View File

@@ -21,7 +21,7 @@ use crate::routes::indexes::search::search_kind;
use crate::search::{ use crate::search::{
add_search_rules, perform_federated_search, perform_search, FederatedSearch, add_search_rules, perform_federated_search, perform_search, FederatedSearch,
FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex, FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex,
PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, INCLUDE_METADATA_HEADER,
}; };
use crate::search_queue::SearchQueue; use crate::search_queue::SearchQueue;
@@ -202,6 +202,10 @@ pub async fn multi_search_with_post(
.headers() .headers()
.get(PROXY_SEARCH_HEADER) .get(PROXY_SEARCH_HEADER)
.is_some_and(|value| value.as_bytes() == PROXY_SEARCH_HEADER_VALUE.as_bytes()); .is_some_and(|value| value.as_bytes() == PROXY_SEARCH_HEADER_VALUE.as_bytes());
let include_metadata = req
.headers()
.get(INCLUDE_METADATA_HEADER)
.is_some();
let search_result = perform_federated_search( let search_result = perform_federated_search(
&index_scheduler, &index_scheduler,
queries, queries,
@@ -209,6 +213,7 @@ pub async fn multi_search_with_post(
features, features,
is_proxy, is_proxy,
request_uid, request_uid,
include_metadata,
) )
.await; .await;
permit.drop().await; permit.drop().await;
@@ -228,6 +233,11 @@ pub async fn multi_search_with_post(
HttpResponse::Ok().json(search_result?) HttpResponse::Ok().json(search_result?)
} }
None => { None => {
let include_metadata = req
.headers()
.get(INCLUDE_METADATA_HEADER)
.is_some();
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only, // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code // so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
// changes. // changes.
@@ -286,6 +296,7 @@ pub async fn multi_search_with_post(
retrieve_vector, retrieve_vector,
features, features,
request_uid, request_uid,
include_metadata,
) )
}) })
.await .await

View File

@@ -4,7 +4,7 @@ mod types;
mod weighted_scores; mod weighted_scores;
pub use perform::perform_federated_search; pub use perform::perform_federated_search;
pub use proxy::{PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE}; pub use proxy::{PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, INCLUDE_METADATA_HEADER};
pub use types::{ pub use types::{
FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets, FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets,
}; };

View File

@@ -42,6 +42,7 @@ pub async fn perform_federated_search(
features: RoFeatures, features: RoFeatures,
is_proxy: bool, is_proxy: bool,
request_uid: Uuid, request_uid: Uuid,
include_metadata: bool,
) -> Result<FederatedSearchResult, ResponseError> { ) -> Result<FederatedSearchResult, ResponseError> {
if is_proxy { if is_proxy {
features.check_network("Performing a remote federated search")?; features.check_network("Performing a remote federated search")?;
@@ -80,6 +81,7 @@ pub async fn perform_federated_search(
partitioned_queries.remote_queries_by_host.clone(), partitioned_queries.remote_queries_by_host.clone(),
&federation, &federation,
deadline, deadline,
include_metadata,
); );
// 2.2. concurrently execute local queries // 2.2. concurrently execute local queries
@@ -127,48 +129,54 @@ pub async fn perform_federated_search(
merge_metadata(&mut results_by_index, &remote_results); merge_metadata(&mut results_by_index, &remote_results);
// 3.1.1. Build metadata in the same order as the original queries // 3.1.1. Build metadata in the same order as the original queries
let mut query_metadata = Vec::new(); let query_metadata = if include_metadata {
let mut query_metadata = Vec::new();
// Create a map of remote results by index_uid for quick lookup // Create a map of remote results by index_uid for quick lookup
let mut remote_results_by_index = std::collections::BTreeMap::new(); let mut remote_results_by_index = std::collections::BTreeMap::new();
for remote_result in &remote_results { for remote_result in &remote_results {
if let Some(remote_metadata) = &remote_result.metadata { if let Some(remote_metadata) = &remote_result.metadata {
for remote_meta in remote_metadata { for remote_meta in remote_metadata {
remote_results_by_index.insert(remote_meta.index_uid.clone(), remote_meta.clone()); remote_results_by_index
.insert(remote_meta.index_uid.clone(), remote_meta.clone());
}
} }
} }
}
// Build metadata in the same order as the original queries // Build metadata in the same order as the original queries
for original_query in original_queries { for original_query in original_queries {
let query_uid = Uuid::now_v7(); let query_uid = Uuid::now_v7();
let index_uid = original_query.index_uid.to_string(); let index_uid = original_query.index_uid.to_string();
// Determine if this is a remote query // Determine if this is a remote query
let (_, _, federation_options) = original_query.into_index_query_federation(); let (_, _, federation_options) = original_query.into_index_query_federation();
let remote = federation_options.and_then(|options| options.remote); let remote = federation_options.and_then(|options| options.remote);
// Get primary key for this index // Get primary key for this index
let mut primary_key = None; let mut primary_key = None;
if remote.is_some() { if remote.is_some() {
// For remote queries, try to get primary key from remote results // For remote queries, try to get primary key from remote results
if let Some(remote_meta) = remote_results_by_index.get(&index_uid) { if let Some(remote_meta) = remote_results_by_index.get(&index_uid) {
primary_key = remote_meta.primary_key.clone(); primary_key = remote_meta.primary_key.clone();
}
} else {
// For local queries, get primary key from local index
primary_key = index_scheduler.index(&index_uid).ok().and_then(|index| {
index.read_txn().ok().and_then(|rtxn| {
let pk = index.primary_key(&rtxn).ok().flatten().map(|pk| pk.to_string());
drop(rtxn);
pk
})
});
} }
} else {
// For local queries, get primary key from local index
primary_key = index_scheduler.index(&index_uid).ok().and_then(|index| {
index.read_txn().ok().and_then(|rtxn| {
let pk = index.primary_key(&rtxn).ok().flatten().map(|pk| pk.to_string());
drop(rtxn);
pk
})
});
}
query_metadata.push(SearchMetadata { query_uid, index_uid, primary_key, remote }); query_metadata.push(SearchMetadata { query_uid, index_uid, primary_key, remote });
} }
Some(query_metadata)
} else {
None
};
// 3.2. merge hits // 3.2. merge hits
let merged_hits: Vec<_> = merge_index_global_results(results_by_index, &mut remote_results) let merged_hits: Vec<_> = merge_index_global_results(results_by_index, &mut remote_results)
@@ -231,7 +239,7 @@ pub async fn perform_federated_search(
facets_by_index, facets_by_index,
remote_errors: partitioned_queries.has_remote.then_some(remote_errors), remote_errors: partitioned_queries.has_remote.then_some(remote_errors),
request_uid: Some(request_uid), request_uid: Some(request_uid),
metadata: Some(query_metadata), metadata: query_metadata,
}) })
} }
@@ -630,7 +638,12 @@ struct RemoteSearch {
} }
impl RemoteSearch { impl RemoteSearch {
fn start(queries: RemoteQueriesByHost, federation: &Federation, deadline: Instant) -> Self { fn start(
queries: RemoteQueriesByHost,
federation: &Federation,
deadline: Instant,
include_metadata: bool,
) -> Self {
let mut in_flight_remote_queries = BTreeMap::new(); let mut in_flight_remote_queries = BTreeMap::new();
let client = reqwest::ClientBuilder::new() let client = reqwest::ClientBuilder::new()
.connect_timeout(std::time::Duration::from_millis(200)) .connect_timeout(std::time::Duration::from_millis(200))
@@ -650,7 +663,10 @@ impl RemoteSearch {
// never merge distant facets // never merge distant facets
proxy_federation.merge_facets = None; proxy_federation.merge_facets = None;
let params = params.clone(); let params = params.clone();
async move { proxy_search(&node, queries, proxy_federation, &params).await } async move {
proxy_search(&node, queries, proxy_federation, &params, include_metadata)
.await
}
}), }),
); );
} }

View File

@@ -11,6 +11,7 @@ use crate::search::SearchQueryWithIndex;
pub const PROXY_SEARCH_HEADER: &str = "Meili-Proxy-Search"; pub const PROXY_SEARCH_HEADER: &str = "Meili-Proxy-Search";
pub const PROXY_SEARCH_HEADER_VALUE: &str = "true"; pub const PROXY_SEARCH_HEADER_VALUE: &str = "true";
pub const INCLUDE_METADATA_HEADER: &str = "Meili-Include-Metadata";
mod error { mod error {
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
@@ -98,6 +99,7 @@ pub async fn proxy_search(
queries: Vec<SearchQueryWithIndex>, queries: Vec<SearchQueryWithIndex>,
federation: Federation, federation: Federation,
params: &ProxySearchParams, params: &ProxySearchParams,
include_metadata: bool,
) -> Result<FederatedSearchResult, ProxySearchError> { ) -> Result<FederatedSearchResult, ProxySearchError> {
let url = format!("{}/multi-search", node.url); let url = format!("{}/multi-search", node.url);
@@ -119,7 +121,16 @@ pub async fn proxy_search(
}; };
for i in 0..params.try_count { for i in 0..params.try_count {
match try_proxy_search(&url, search_api_key, &federated, &params.client, deadline).await { match try_proxy_search(
&url,
search_api_key,
&federated,
&params.client,
deadline,
include_metadata,
)
.await
{
Ok(response) => return Ok(response), Ok(response) => return Ok(response),
Err(retry) => { Err(retry) => {
let duration = retry.into_duration(i)?; let duration = retry.into_duration(i)?;
@@ -127,7 +138,7 @@ pub async fn proxy_search(
} }
} }
} }
try_proxy_search(&url, search_api_key, &federated, &params.client, deadline) try_proxy_search(&url, search_api_key, &federated, &params.client, deadline, include_metadata)
.await .await
.map_err(Retry::into_error) .map_err(Retry::into_error)
} }
@@ -138,6 +149,7 @@ async fn try_proxy_search(
federated: &FederatedSearch, federated: &FederatedSearch,
client: &Client, client: &Client,
deadline: std::time::Instant, deadline: std::time::Instant,
include_metadata: bool,
) -> Result<FederatedSearchResult, Retry> { ) -> Result<FederatedSearchResult, Retry> {
let timeout = deadline.saturating_duration_since(std::time::Instant::now()); let timeout = deadline.saturating_duration_since(std::time::Instant::now());
@@ -148,6 +160,8 @@ async fn try_proxy_search(
request request
}; };
let request = request.header(PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE); let request = request.header(PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE);
let request =
if include_metadata { request.header(INCLUDE_METADATA_HEADER, "true") } else { request };
let response = request.send().await; let response = request.send().await;
let response = match response { let response = match response {

View File

@@ -43,7 +43,7 @@ use crate::error::MeilisearchHttpError;
mod federated; mod federated;
pub use federated::{ pub use federated::{
perform_federated_search, FederatedSearch, FederatedSearchResult, Federation, perform_federated_search, FederatedSearch, FederatedSearchResult, Federation,
FederationOptions, MergeFacets, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, FederationOptions, MergeFacets, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, INCLUDE_METADATA_HEADER,
}; };
mod ranking_rules; mod ranking_rules;
@@ -1146,6 +1146,7 @@ pub fn perform_search(
retrieve_vectors: RetrieveVectors, retrieve_vectors: RetrieveVectors,
features: RoFeatures, features: RoFeatures,
request_uid: Uuid, request_uid: Uuid,
include_metadata: bool,
) -> Result<SearchResult, ResponseError> { ) -> Result<SearchResult, ResponseError> {
let before_search = Instant::now(); let before_search = Instant::now();
let index_uid_for_metadata = index_uid.clone(); let index_uid_for_metadata = index_uid.clone();
@@ -1171,8 +1172,18 @@ pub fn perform_search(
semantic_hit_count, semantic_hit_count,
) = search_from_kind(index_uid.clone(), search_kind, search)?; ) = search_from_kind(index_uid.clone(), search_kind, search)?;
let query_uid = Uuid::now_v7(); let metadata = if include_metadata {
let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string()); let query_uid = Uuid::now_v7();
let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
Some(SearchMetadata {
query_uid,
index_uid: index_uid_for_metadata,
primary_key,
remote: None, // Local searches don't have a remote
})
} else {
None
};
let SearchQuery { let SearchQuery {
q, q,
@@ -1267,12 +1278,7 @@ pub fn perform_search(
used_negative_operator, used_negative_operator,
semantic_hit_count, semantic_hit_count,
request_uid: Some(request_uid), request_uid: Some(request_uid),
metadata: Some(SearchMetadata { metadata,
query_uid,
index_uid: index_uid_for_metadata,
primary_key,
remote: None, // Local searches don't have a remote
}),
}; };
Ok(result) Ok(result)
} }