mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-10-10 05:36:35 +00:00
Add header-based metadata opt-in for search responses
- Add Meili-Include-Metadata header constant - Modify perform_search to conditionally include metadata based on header - Modify perform_federated_search to conditionally include metadata based on header - Update all search routes to check for header and pass include_metadata parameter - Forward Meili-Include-Metadata header to remote requests for federated search - Ensure remote queries include primaryKey metadata when header is present
This commit is contained in:
@@ -26,7 +26,7 @@ use crate::search::{
|
||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
||||
RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH,
|
||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO, INCLUDE_METADATA_HEADER,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
|
||||
@@ -345,6 +345,11 @@ pub async fn search_with_url_query(
|
||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
||||
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let include_metadata = req
|
||||
.headers()
|
||||
.get(INCLUDE_METADATA_HEADER)
|
||||
.is_some();
|
||||
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(
|
||||
index_uid.to_string(),
|
||||
@@ -354,6 +359,7 @@ pub async fn search_with_url_query(
|
||||
retrieve_vector,
|
||||
index_scheduler.features(),
|
||||
request_uid,
|
||||
include_metadata,
|
||||
)
|
||||
})
|
||||
.await;
|
||||
@@ -453,6 +459,11 @@ pub async fn search_with_post(
|
||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
|
||||
|
||||
let include_metadata = req
|
||||
.headers()
|
||||
.get(INCLUDE_METADATA_HEADER)
|
||||
.is_some();
|
||||
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(
|
||||
@@ -463,6 +474,7 @@ pub async fn search_with_post(
|
||||
retrieve_vectors,
|
||||
index_scheduler.features(),
|
||||
request_uid,
|
||||
include_metadata,
|
||||
)
|
||||
})
|
||||
.await;
|
||||
|
@@ -21,7 +21,7 @@ use crate::routes::indexes::search::search_kind;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_federated_search, perform_search, FederatedSearch,
|
||||
FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex,
|
||||
PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE,
|
||||
PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, INCLUDE_METADATA_HEADER,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
|
||||
@@ -202,6 +202,10 @@ pub async fn multi_search_with_post(
|
||||
.headers()
|
||||
.get(PROXY_SEARCH_HEADER)
|
||||
.is_some_and(|value| value.as_bytes() == PROXY_SEARCH_HEADER_VALUE.as_bytes());
|
||||
let include_metadata = req
|
||||
.headers()
|
||||
.get(INCLUDE_METADATA_HEADER)
|
||||
.is_some();
|
||||
let search_result = perform_federated_search(
|
||||
&index_scheduler,
|
||||
queries,
|
||||
@@ -209,6 +213,7 @@ pub async fn multi_search_with_post(
|
||||
features,
|
||||
is_proxy,
|
||||
request_uid,
|
||||
include_metadata,
|
||||
)
|
||||
.await;
|
||||
permit.drop().await;
|
||||
@@ -228,6 +233,11 @@ pub async fn multi_search_with_post(
|
||||
HttpResponse::Ok().json(search_result?)
|
||||
}
|
||||
None => {
|
||||
let include_metadata = req
|
||||
.headers()
|
||||
.get(INCLUDE_METADATA_HEADER)
|
||||
.is_some();
|
||||
|
||||
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
|
||||
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
|
||||
// changes.
|
||||
@@ -286,6 +296,7 @@ pub async fn multi_search_with_post(
|
||||
retrieve_vector,
|
||||
features,
|
||||
request_uid,
|
||||
include_metadata,
|
||||
)
|
||||
})
|
||||
.await
|
||||
|
@@ -4,7 +4,7 @@ mod types;
|
||||
mod weighted_scores;
|
||||
|
||||
pub use perform::perform_federated_search;
|
||||
pub use proxy::{PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE};
|
||||
pub use proxy::{PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, INCLUDE_METADATA_HEADER};
|
||||
pub use types::{
|
||||
FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets,
|
||||
};
|
||||
|
@@ -42,6 +42,7 @@ pub async fn perform_federated_search(
|
||||
features: RoFeatures,
|
||||
is_proxy: bool,
|
||||
request_uid: Uuid,
|
||||
include_metadata: bool,
|
||||
) -> Result<FederatedSearchResult, ResponseError> {
|
||||
if is_proxy {
|
||||
features.check_network("Performing a remote federated search")?;
|
||||
@@ -74,6 +75,7 @@ pub async fn perform_federated_search(
|
||||
partitioned_queries.remote_queries_by_host.clone(),
|
||||
&federation,
|
||||
deadline,
|
||||
include_metadata,
|
||||
);
|
||||
|
||||
// 2.2. concurrently execute local queries
|
||||
@@ -121,48 +123,54 @@ pub async fn perform_federated_search(
|
||||
merge_metadata(&mut results_by_index, &remote_results);
|
||||
|
||||
// 3.1.1. Build metadata in the same order as the original queries
|
||||
let mut query_metadata = Vec::new();
|
||||
let query_metadata = if include_metadata {
|
||||
let mut query_metadata = Vec::new();
|
||||
|
||||
// Create a map of remote results by index_uid for quick lookup
|
||||
let mut remote_results_by_index = std::collections::BTreeMap::new();
|
||||
for remote_result in &remote_results {
|
||||
if let Some(remote_metadata) = &remote_result.metadata {
|
||||
for remote_meta in remote_metadata {
|
||||
remote_results_by_index.insert(remote_meta.index_uid.clone(), remote_meta.clone());
|
||||
// Create a map of remote results by index_uid for quick lookup
|
||||
let mut remote_results_by_index = std::collections::BTreeMap::new();
|
||||
for remote_result in &remote_results {
|
||||
if let Some(remote_metadata) = &remote_result.metadata {
|
||||
for remote_meta in remote_metadata {
|
||||
remote_results_by_index
|
||||
.insert(remote_meta.index_uid.clone(), remote_meta.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build metadata in the same order as the original queries
|
||||
for original_query in original_queries {
|
||||
let query_uid = Uuid::now_v7();
|
||||
let index_uid = original_query.index_uid.to_string();
|
||||
// Build metadata in the same order as the original queries
|
||||
for original_query in original_queries {
|
||||
let query_uid = Uuid::now_v7();
|
||||
let index_uid = original_query.index_uid.to_string();
|
||||
|
||||
// Determine if this is a remote query
|
||||
let (_, _, federation_options) = original_query.into_index_query_federation();
|
||||
let remote = federation_options.and_then(|options| options.remote);
|
||||
// Determine if this is a remote query
|
||||
let (_, _, federation_options) = original_query.into_index_query_federation();
|
||||
let remote = federation_options.and_then(|options| options.remote);
|
||||
|
||||
// Get primary key for this index
|
||||
let mut primary_key = None;
|
||||
// Get primary key for this index
|
||||
let mut primary_key = None;
|
||||
|
||||
if remote.is_some() {
|
||||
// For remote queries, try to get primary key from remote results
|
||||
if let Some(remote_meta) = remote_results_by_index.get(&index_uid) {
|
||||
primary_key = remote_meta.primary_key.clone();
|
||||
if remote.is_some() {
|
||||
// For remote queries, try to get primary key from remote results
|
||||
if let Some(remote_meta) = remote_results_by_index.get(&index_uid) {
|
||||
primary_key = remote_meta.primary_key.clone();
|
||||
}
|
||||
} else {
|
||||
// For local queries, get primary key from local index
|
||||
primary_key = index_scheduler.index(&index_uid).ok().and_then(|index| {
|
||||
index.read_txn().ok().and_then(|rtxn| {
|
||||
let pk = index.primary_key(&rtxn).ok().flatten().map(|pk| pk.to_string());
|
||||
drop(rtxn);
|
||||
pk
|
||||
})
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// For local queries, get primary key from local index
|
||||
primary_key = index_scheduler.index(&index_uid).ok().and_then(|index| {
|
||||
index.read_txn().ok().and_then(|rtxn| {
|
||||
let pk = index.primary_key(&rtxn).ok().flatten().map(|pk| pk.to_string());
|
||||
drop(rtxn);
|
||||
pk
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
query_metadata.push(SearchMetadata { query_uid, index_uid, primary_key, remote });
|
||||
}
|
||||
query_metadata.push(SearchMetadata { query_uid, index_uid, primary_key, remote });
|
||||
}
|
||||
Some(query_metadata)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// 3.2. merge hits
|
||||
let merged_hits: Vec<_> = merge_index_global_results(results_by_index, &mut remote_results)
|
||||
@@ -225,7 +233,7 @@ pub async fn perform_federated_search(
|
||||
facets_by_index,
|
||||
remote_errors: partitioned_queries.has_remote.then_some(remote_errors),
|
||||
request_uid: Some(request_uid),
|
||||
metadata: Some(query_metadata),
|
||||
metadata: query_metadata,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -624,7 +632,12 @@ struct RemoteSearch {
|
||||
}
|
||||
|
||||
impl RemoteSearch {
|
||||
fn start(queries: RemoteQueriesByHost, federation: &Federation, deadline: Instant) -> Self {
|
||||
fn start(
|
||||
queries: RemoteQueriesByHost,
|
||||
federation: &Federation,
|
||||
deadline: Instant,
|
||||
include_metadata: bool,
|
||||
) -> Self {
|
||||
let mut in_flight_remote_queries = BTreeMap::new();
|
||||
let client = reqwest::ClientBuilder::new()
|
||||
.connect_timeout(std::time::Duration::from_millis(200))
|
||||
@@ -644,7 +657,10 @@ impl RemoteSearch {
|
||||
// never merge distant facets
|
||||
proxy_federation.merge_facets = None;
|
||||
let params = params.clone();
|
||||
async move { proxy_search(&node, queries, proxy_federation, ¶ms).await }
|
||||
async move {
|
||||
proxy_search(&node, queries, proxy_federation, ¶ms, include_metadata)
|
||||
.await
|
||||
}
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
@@ -11,6 +11,7 @@ use crate::search::SearchQueryWithIndex;
|
||||
|
||||
pub const PROXY_SEARCH_HEADER: &str = "Meili-Proxy-Search";
|
||||
pub const PROXY_SEARCH_HEADER_VALUE: &str = "true";
|
||||
pub const INCLUDE_METADATA_HEADER: &str = "Meili-Include-Metadata";
|
||||
|
||||
mod error {
|
||||
use meilisearch_types::error::ResponseError;
|
||||
@@ -98,6 +99,7 @@ pub async fn proxy_search(
|
||||
queries: Vec<SearchQueryWithIndex>,
|
||||
federation: Federation,
|
||||
params: &ProxySearchParams,
|
||||
include_metadata: bool,
|
||||
) -> Result<FederatedSearchResult, ProxySearchError> {
|
||||
let url = format!("{}/multi-search", node.url);
|
||||
|
||||
@@ -114,7 +116,16 @@ pub async fn proxy_search(
|
||||
};
|
||||
|
||||
for i in 0..params.try_count {
|
||||
match try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline).await {
|
||||
match try_proxy_search(
|
||||
&url,
|
||||
search_api_key,
|
||||
&federated,
|
||||
¶ms.client,
|
||||
deadline,
|
||||
include_metadata,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(response) => return Ok(response),
|
||||
Err(retry) => {
|
||||
let duration = retry.into_duration(i)?;
|
||||
@@ -122,7 +133,7 @@ pub async fn proxy_search(
|
||||
}
|
||||
}
|
||||
}
|
||||
try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline)
|
||||
try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline, include_metadata)
|
||||
.await
|
||||
.map_err(Retry::into_error)
|
||||
}
|
||||
@@ -133,6 +144,7 @@ async fn try_proxy_search(
|
||||
federated: &FederatedSearch,
|
||||
client: &Client,
|
||||
deadline: std::time::Instant,
|
||||
include_metadata: bool,
|
||||
) -> Result<FederatedSearchResult, Retry> {
|
||||
let timeout = deadline.saturating_duration_since(std::time::Instant::now());
|
||||
|
||||
@@ -143,6 +155,8 @@ async fn try_proxy_search(
|
||||
request
|
||||
};
|
||||
let request = request.header(PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE);
|
||||
let request =
|
||||
if include_metadata { request.header(INCLUDE_METADATA_HEADER, "true") } else { request };
|
||||
|
||||
let response = request.send().await;
|
||||
let response = match response {
|
||||
|
@@ -43,7 +43,7 @@ use crate::error::MeilisearchHttpError;
|
||||
mod federated;
|
||||
pub use federated::{
|
||||
perform_federated_search, FederatedSearch, FederatedSearchResult, Federation,
|
||||
FederationOptions, MergeFacets, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE,
|
||||
FederationOptions, MergeFacets, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, INCLUDE_METADATA_HEADER,
|
||||
};
|
||||
|
||||
mod ranking_rules;
|
||||
@@ -1146,6 +1146,7 @@ pub fn perform_search(
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
features: RoFeatures,
|
||||
request_uid: Uuid,
|
||||
include_metadata: bool,
|
||||
) -> Result<SearchResult, ResponseError> {
|
||||
let before_search = Instant::now();
|
||||
let index_uid_for_metadata = index_uid.clone();
|
||||
@@ -1171,8 +1172,18 @@ pub fn perform_search(
|
||||
semantic_hit_count,
|
||||
) = search_from_kind(index_uid.clone(), search_kind, search)?;
|
||||
|
||||
let query_uid = Uuid::now_v7();
|
||||
let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
|
||||
let metadata = if include_metadata {
|
||||
let query_uid = Uuid::now_v7();
|
||||
let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
|
||||
Some(SearchMetadata {
|
||||
query_uid,
|
||||
index_uid: index_uid_for_metadata,
|
||||
primary_key,
|
||||
remote: None, // Local searches don't have a remote
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let SearchQuery {
|
||||
q,
|
||||
@@ -1267,12 +1278,7 @@ pub fn perform_search(
|
||||
used_negative_operator,
|
||||
semantic_hit_count,
|
||||
request_uid: Some(request_uid),
|
||||
metadata: Some(SearchMetadata {
|
||||
query_uid,
|
||||
index_uid: index_uid_for_metadata,
|
||||
primary_key,
|
||||
remote: None, // Local searches don't have a remote
|
||||
}),
|
||||
metadata,
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
|
Reference in New Issue
Block a user