Add header-based metadata opt-in for search responses

- Add Meili-Include-Metadata header constant
- Modify perform_search to conditionally include metadata based on header
- Modify perform_federated_search to conditionally include metadata based on header
- Update all search routes to check for header and pass include_metadata parameter
- Forward Meili-Include-Metadata header to remote requests for federated search
- Ensure remote queries include primaryKey metadata when header is present
This commit is contained in:
ManyTheFish
2025-10-02 11:04:19 +02:00
parent 9fddbff916
commit ad804b60fc
6 changed files with 109 additions and 50 deletions

View File

@@ -26,7 +26,7 @@ use crate::search::{
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH,
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO, INCLUDE_METADATA_HEADER,
};
use crate::search_queue::SearchQueue;
@@ -345,6 +345,11 @@ pub async fn search_with_url_query(
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
let permit = search_queue.try_get_search_permit().await?;
let include_metadata = req
.headers()
.get(INCLUDE_METADATA_HEADER)
.is_some();
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
index_uid.to_string(),
@@ -354,6 +359,7 @@ pub async fn search_with_url_query(
retrieve_vector,
index_scheduler.features(),
request_uid,
include_metadata,
)
})
.await;
@@ -453,6 +459,11 @@ pub async fn search_with_post(
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
let include_metadata = req
.headers()
.get(INCLUDE_METADATA_HEADER)
.is_some();
let permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
@@ -463,6 +474,7 @@ pub async fn search_with_post(
retrieve_vectors,
index_scheduler.features(),
request_uid,
include_metadata,
)
})
.await;

View File

@@ -21,7 +21,7 @@ use crate::routes::indexes::search::search_kind;
use crate::search::{
add_search_rules, perform_federated_search, perform_search, FederatedSearch,
FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex,
PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE,
PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, INCLUDE_METADATA_HEADER,
};
use crate::search_queue::SearchQueue;
@@ -202,6 +202,10 @@ pub async fn multi_search_with_post(
.headers()
.get(PROXY_SEARCH_HEADER)
.is_some_and(|value| value.as_bytes() == PROXY_SEARCH_HEADER_VALUE.as_bytes());
let include_metadata = req
.headers()
.get(INCLUDE_METADATA_HEADER)
.is_some();
let search_result = perform_federated_search(
&index_scheduler,
queries,
@@ -209,6 +213,7 @@ pub async fn multi_search_with_post(
features,
is_proxy,
request_uid,
include_metadata,
)
.await;
permit.drop().await;
@@ -228,6 +233,11 @@ pub async fn multi_search_with_post(
HttpResponse::Ok().json(search_result?)
}
None => {
let include_metadata = req
.headers()
.get(INCLUDE_METADATA_HEADER)
.is_some();
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
// changes.
@@ -286,6 +296,7 @@ pub async fn multi_search_with_post(
retrieve_vector,
features,
request_uid,
include_metadata,
)
})
.await

View File

@@ -4,7 +4,7 @@ mod types;
mod weighted_scores;
pub use perform::perform_federated_search;
pub use proxy::{PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE};
pub use proxy::{PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, INCLUDE_METADATA_HEADER};
pub use types::{
FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets,
};

View File

@@ -42,6 +42,7 @@ pub async fn perform_federated_search(
features: RoFeatures,
is_proxy: bool,
request_uid: Uuid,
include_metadata: bool,
) -> Result<FederatedSearchResult, ResponseError> {
if is_proxy {
features.check_network("Performing a remote federated search")?;
@@ -74,6 +75,7 @@ pub async fn perform_federated_search(
partitioned_queries.remote_queries_by_host.clone(),
&federation,
deadline,
include_metadata,
);
// 2.2. concurrently execute local queries
@@ -121,48 +123,54 @@ pub async fn perform_federated_search(
merge_metadata(&mut results_by_index, &remote_results);
// 3.1.1. Build metadata in the same order as the original queries
let mut query_metadata = Vec::new();
let query_metadata = if include_metadata {
let mut query_metadata = Vec::new();
// Create a map of remote results by index_uid for quick lookup
let mut remote_results_by_index = std::collections::BTreeMap::new();
for remote_result in &remote_results {
if let Some(remote_metadata) = &remote_result.metadata {
for remote_meta in remote_metadata {
remote_results_by_index.insert(remote_meta.index_uid.clone(), remote_meta.clone());
// Create a map of remote results by index_uid for quick lookup
let mut remote_results_by_index = std::collections::BTreeMap::new();
for remote_result in &remote_results {
if let Some(remote_metadata) = &remote_result.metadata {
for remote_meta in remote_metadata {
remote_results_by_index
.insert(remote_meta.index_uid.clone(), remote_meta.clone());
}
}
}
}
// Build metadata in the same order as the original queries
for original_query in original_queries {
let query_uid = Uuid::now_v7();
let index_uid = original_query.index_uid.to_string();
// Build metadata in the same order as the original queries
for original_query in original_queries {
let query_uid = Uuid::now_v7();
let index_uid = original_query.index_uid.to_string();
// Determine if this is a remote query
let (_, _, federation_options) = original_query.into_index_query_federation();
let remote = federation_options.and_then(|options| options.remote);
// Determine if this is a remote query
let (_, _, federation_options) = original_query.into_index_query_federation();
let remote = federation_options.and_then(|options| options.remote);
// Get primary key for this index
let mut primary_key = None;
// Get primary key for this index
let mut primary_key = None;
if remote.is_some() {
// For remote queries, try to get primary key from remote results
if let Some(remote_meta) = remote_results_by_index.get(&index_uid) {
primary_key = remote_meta.primary_key.clone();
if remote.is_some() {
// For remote queries, try to get primary key from remote results
if let Some(remote_meta) = remote_results_by_index.get(&index_uid) {
primary_key = remote_meta.primary_key.clone();
}
} else {
// For local queries, get primary key from local index
primary_key = index_scheduler.index(&index_uid).ok().and_then(|index| {
index.read_txn().ok().and_then(|rtxn| {
let pk = index.primary_key(&rtxn).ok().flatten().map(|pk| pk.to_string());
drop(rtxn);
pk
})
});
}
} else {
// For local queries, get primary key from local index
primary_key = index_scheduler.index(&index_uid).ok().and_then(|index| {
index.read_txn().ok().and_then(|rtxn| {
let pk = index.primary_key(&rtxn).ok().flatten().map(|pk| pk.to_string());
drop(rtxn);
pk
})
});
}
query_metadata.push(SearchMetadata { query_uid, index_uid, primary_key, remote });
}
query_metadata.push(SearchMetadata { query_uid, index_uid, primary_key, remote });
}
Some(query_metadata)
} else {
None
};
// 3.2. merge hits
let merged_hits: Vec<_> = merge_index_global_results(results_by_index, &mut remote_results)
@@ -225,7 +233,7 @@ pub async fn perform_federated_search(
facets_by_index,
remote_errors: partitioned_queries.has_remote.then_some(remote_errors),
request_uid: Some(request_uid),
metadata: Some(query_metadata),
metadata: query_metadata,
})
}
@@ -624,7 +632,12 @@ struct RemoteSearch {
}
impl RemoteSearch {
fn start(queries: RemoteQueriesByHost, federation: &Federation, deadline: Instant) -> Self {
fn start(
queries: RemoteQueriesByHost,
federation: &Federation,
deadline: Instant,
include_metadata: bool,
) -> Self {
let mut in_flight_remote_queries = BTreeMap::new();
let client = reqwest::ClientBuilder::new()
.connect_timeout(std::time::Duration::from_millis(200))
@@ -644,7 +657,10 @@ impl RemoteSearch {
// never merge distant facets
proxy_federation.merge_facets = None;
let params = params.clone();
async move { proxy_search(&node, queries, proxy_federation, &params).await }
async move {
proxy_search(&node, queries, proxy_federation, &params, include_metadata)
.await
}
}),
);
}

View File

@@ -11,6 +11,7 @@ use crate::search::SearchQueryWithIndex;
pub const PROXY_SEARCH_HEADER: &str = "Meili-Proxy-Search";
pub const PROXY_SEARCH_HEADER_VALUE: &str = "true";
pub const INCLUDE_METADATA_HEADER: &str = "Meili-Include-Metadata";
mod error {
use meilisearch_types::error::ResponseError;
@@ -98,6 +99,7 @@ pub async fn proxy_search(
queries: Vec<SearchQueryWithIndex>,
federation: Federation,
params: &ProxySearchParams,
include_metadata: bool,
) -> Result<FederatedSearchResult, ProxySearchError> {
let url = format!("{}/multi-search", node.url);
@@ -114,7 +116,16 @@ pub async fn proxy_search(
};
for i in 0..params.try_count {
match try_proxy_search(&url, search_api_key, &federated, &params.client, deadline).await {
match try_proxy_search(
&url,
search_api_key,
&federated,
&params.client,
deadline,
include_metadata,
)
.await
{
Ok(response) => return Ok(response),
Err(retry) => {
let duration = retry.into_duration(i)?;
@@ -122,7 +133,7 @@ pub async fn proxy_search(
}
}
}
try_proxy_search(&url, search_api_key, &federated, &params.client, deadline)
try_proxy_search(&url, search_api_key, &federated, &params.client, deadline, include_metadata)
.await
.map_err(Retry::into_error)
}
@@ -133,6 +144,7 @@ async fn try_proxy_search(
federated: &FederatedSearch,
client: &Client,
deadline: std::time::Instant,
include_metadata: bool,
) -> Result<FederatedSearchResult, Retry> {
let timeout = deadline.saturating_duration_since(std::time::Instant::now());
@@ -143,6 +155,8 @@ async fn try_proxy_search(
request
};
let request = request.header(PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE);
let request =
if include_metadata { request.header(INCLUDE_METADATA_HEADER, "true") } else { request };
let response = request.send().await;
let response = match response {

View File

@@ -43,7 +43,7 @@ use crate::error::MeilisearchHttpError;
mod federated;
pub use federated::{
perform_federated_search, FederatedSearch, FederatedSearchResult, Federation,
FederationOptions, MergeFacets, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE,
FederationOptions, MergeFacets, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, INCLUDE_METADATA_HEADER,
};
mod ranking_rules;
@@ -1146,6 +1146,7 @@ pub fn perform_search(
retrieve_vectors: RetrieveVectors,
features: RoFeatures,
request_uid: Uuid,
include_metadata: bool,
) -> Result<SearchResult, ResponseError> {
let before_search = Instant::now();
let index_uid_for_metadata = index_uid.clone();
@@ -1171,8 +1172,18 @@ pub fn perform_search(
semantic_hit_count,
) = search_from_kind(index_uid.clone(), search_kind, search)?;
let query_uid = Uuid::now_v7();
let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
let metadata = if include_metadata {
let query_uid = Uuid::now_v7();
let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
Some(SearchMetadata {
query_uid,
index_uid: index_uid_for_metadata,
primary_key,
remote: None, // Local searches don't have a remote
})
} else {
None
};
let SearchQuery {
q,
@@ -1267,12 +1278,7 @@ pub fn perform_search(
used_negative_operator,
semantic_hit_count,
request_uid: Some(request_uid),
metadata: Some(SearchMetadata {
query_uid,
index_uid: index_uid_for_metadata,
primary_key,
remote: None, // Local searches don't have a remote
}),
metadata,
};
Ok(result)
}