mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-10-17 09:06:27 +00:00
Add header-based metadata opt-in for search responses
- Add Meili-Include-Metadata header constant - Modify perform_search to conditionally include metadata based on header - Modify perform_federated_search to conditionally include metadata based on header - Update all search routes to check for header and pass include_metadata parameter - Forward Meili-Include-Metadata header to remote requests for federated search - Ensure remote queries include primaryKey metadata when header is present
This commit is contained in:
@@ -26,7 +26,7 @@ use crate::search::{
|
|||||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
||||||
RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH,
|
RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH,
|
||||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO, INCLUDE_METADATA_HEADER,
|
||||||
};
|
};
|
||||||
use crate::search_queue::SearchQueue;
|
use crate::search_queue::SearchQueue;
|
||||||
|
|
||||||
@@ -345,6 +345,11 @@ pub async fn search_with_url_query(
|
|||||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
||||||
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
|
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
|
||||||
let permit = search_queue.try_get_search_permit().await?;
|
let permit = search_queue.try_get_search_permit().await?;
|
||||||
|
let include_metadata = req
|
||||||
|
.headers()
|
||||||
|
.get(INCLUDE_METADATA_HEADER)
|
||||||
|
.is_some();
|
||||||
|
|
||||||
let search_result = tokio::task::spawn_blocking(move || {
|
let search_result = tokio::task::spawn_blocking(move || {
|
||||||
perform_search(
|
perform_search(
|
||||||
index_uid.to_string(),
|
index_uid.to_string(),
|
||||||
@@ -354,6 +359,7 @@ pub async fn search_with_url_query(
|
|||||||
retrieve_vector,
|
retrieve_vector,
|
||||||
index_scheduler.features(),
|
index_scheduler.features(),
|
||||||
request_uid,
|
request_uid,
|
||||||
|
include_metadata,
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
@@ -453,6 +459,11 @@ pub async fn search_with_post(
|
|||||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
||||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
|
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
|
||||||
|
|
||||||
|
let include_metadata = req
|
||||||
|
.headers()
|
||||||
|
.get(INCLUDE_METADATA_HEADER)
|
||||||
|
.is_some();
|
||||||
|
|
||||||
let permit = search_queue.try_get_search_permit().await?;
|
let permit = search_queue.try_get_search_permit().await?;
|
||||||
let search_result = tokio::task::spawn_blocking(move || {
|
let search_result = tokio::task::spawn_blocking(move || {
|
||||||
perform_search(
|
perform_search(
|
||||||
@@ -463,6 +474,7 @@ pub async fn search_with_post(
|
|||||||
retrieve_vectors,
|
retrieve_vectors,
|
||||||
index_scheduler.features(),
|
index_scheduler.features(),
|
||||||
request_uid,
|
request_uid,
|
||||||
|
include_metadata,
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
@@ -21,7 +21,7 @@ use crate::routes::indexes::search::search_kind;
|
|||||||
use crate::search::{
|
use crate::search::{
|
||||||
add_search_rules, perform_federated_search, perform_search, FederatedSearch,
|
add_search_rules, perform_federated_search, perform_search, FederatedSearch,
|
||||||
FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex,
|
FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex,
|
||||||
PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE,
|
PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, INCLUDE_METADATA_HEADER,
|
||||||
};
|
};
|
||||||
use crate::search_queue::SearchQueue;
|
use crate::search_queue::SearchQueue;
|
||||||
|
|
||||||
@@ -202,6 +202,10 @@ pub async fn multi_search_with_post(
|
|||||||
.headers()
|
.headers()
|
||||||
.get(PROXY_SEARCH_HEADER)
|
.get(PROXY_SEARCH_HEADER)
|
||||||
.is_some_and(|value| value.as_bytes() == PROXY_SEARCH_HEADER_VALUE.as_bytes());
|
.is_some_and(|value| value.as_bytes() == PROXY_SEARCH_HEADER_VALUE.as_bytes());
|
||||||
|
let include_metadata = req
|
||||||
|
.headers()
|
||||||
|
.get(INCLUDE_METADATA_HEADER)
|
||||||
|
.is_some();
|
||||||
let search_result = perform_federated_search(
|
let search_result = perform_federated_search(
|
||||||
&index_scheduler,
|
&index_scheduler,
|
||||||
queries,
|
queries,
|
||||||
@@ -209,6 +213,7 @@ pub async fn multi_search_with_post(
|
|||||||
features,
|
features,
|
||||||
is_proxy,
|
is_proxy,
|
||||||
request_uid,
|
request_uid,
|
||||||
|
include_metadata,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
permit.drop().await;
|
permit.drop().await;
|
||||||
@@ -228,6 +233,11 @@ pub async fn multi_search_with_post(
|
|||||||
HttpResponse::Ok().json(search_result?)
|
HttpResponse::Ok().json(search_result?)
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
|
let include_metadata = req
|
||||||
|
.headers()
|
||||||
|
.get(INCLUDE_METADATA_HEADER)
|
||||||
|
.is_some();
|
||||||
|
|
||||||
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
|
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
|
||||||
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
|
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
|
||||||
// changes.
|
// changes.
|
||||||
@@ -286,6 +296,7 @@ pub async fn multi_search_with_post(
|
|||||||
retrieve_vector,
|
retrieve_vector,
|
||||||
features,
|
features,
|
||||||
request_uid,
|
request_uid,
|
||||||
|
include_metadata,
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
|
@@ -4,7 +4,7 @@ mod types;
|
|||||||
mod weighted_scores;
|
mod weighted_scores;
|
||||||
|
|
||||||
pub use perform::perform_federated_search;
|
pub use perform::perform_federated_search;
|
||||||
pub use proxy::{PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE};
|
pub use proxy::{PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, INCLUDE_METADATA_HEADER};
|
||||||
pub use types::{
|
pub use types::{
|
||||||
FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets,
|
FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets,
|
||||||
};
|
};
|
||||||
|
@@ -42,6 +42,7 @@ pub async fn perform_federated_search(
|
|||||||
features: RoFeatures,
|
features: RoFeatures,
|
||||||
is_proxy: bool,
|
is_proxy: bool,
|
||||||
request_uid: Uuid,
|
request_uid: Uuid,
|
||||||
|
include_metadata: bool,
|
||||||
) -> Result<FederatedSearchResult, ResponseError> {
|
) -> Result<FederatedSearchResult, ResponseError> {
|
||||||
if is_proxy {
|
if is_proxy {
|
||||||
features.check_network("Performing a remote federated search")?;
|
features.check_network("Performing a remote federated search")?;
|
||||||
@@ -80,6 +81,7 @@ pub async fn perform_federated_search(
|
|||||||
partitioned_queries.remote_queries_by_host.clone(),
|
partitioned_queries.remote_queries_by_host.clone(),
|
||||||
&federation,
|
&federation,
|
||||||
deadline,
|
deadline,
|
||||||
|
include_metadata,
|
||||||
);
|
);
|
||||||
|
|
||||||
// 2.2. concurrently execute local queries
|
// 2.2. concurrently execute local queries
|
||||||
@@ -127,48 +129,54 @@ pub async fn perform_federated_search(
|
|||||||
merge_metadata(&mut results_by_index, &remote_results);
|
merge_metadata(&mut results_by_index, &remote_results);
|
||||||
|
|
||||||
// 3.1.1. Build metadata in the same order as the original queries
|
// 3.1.1. Build metadata in the same order as the original queries
|
||||||
let mut query_metadata = Vec::new();
|
let query_metadata = if include_metadata {
|
||||||
|
let mut query_metadata = Vec::new();
|
||||||
|
|
||||||
// Create a map of remote results by index_uid for quick lookup
|
// Create a map of remote results by index_uid for quick lookup
|
||||||
let mut remote_results_by_index = std::collections::BTreeMap::new();
|
let mut remote_results_by_index = std::collections::BTreeMap::new();
|
||||||
for remote_result in &remote_results {
|
for remote_result in &remote_results {
|
||||||
if let Some(remote_metadata) = &remote_result.metadata {
|
if let Some(remote_metadata) = &remote_result.metadata {
|
||||||
for remote_meta in remote_metadata {
|
for remote_meta in remote_metadata {
|
||||||
remote_results_by_index.insert(remote_meta.index_uid.clone(), remote_meta.clone());
|
remote_results_by_index
|
||||||
|
.insert(remote_meta.index_uid.clone(), remote_meta.clone());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Build metadata in the same order as the original queries
|
// Build metadata in the same order as the original queries
|
||||||
for original_query in original_queries {
|
for original_query in original_queries {
|
||||||
let query_uid = Uuid::now_v7();
|
let query_uid = Uuid::now_v7();
|
||||||
let index_uid = original_query.index_uid.to_string();
|
let index_uid = original_query.index_uid.to_string();
|
||||||
|
|
||||||
// Determine if this is a remote query
|
// Determine if this is a remote query
|
||||||
let (_, _, federation_options) = original_query.into_index_query_federation();
|
let (_, _, federation_options) = original_query.into_index_query_federation();
|
||||||
let remote = federation_options.and_then(|options| options.remote);
|
let remote = federation_options.and_then(|options| options.remote);
|
||||||
|
|
||||||
// Get primary key for this index
|
// Get primary key for this index
|
||||||
let mut primary_key = None;
|
let mut primary_key = None;
|
||||||
|
|
||||||
if remote.is_some() {
|
if remote.is_some() {
|
||||||
// For remote queries, try to get primary key from remote results
|
// For remote queries, try to get primary key from remote results
|
||||||
if let Some(remote_meta) = remote_results_by_index.get(&index_uid) {
|
if let Some(remote_meta) = remote_results_by_index.get(&index_uid) {
|
||||||
primary_key = remote_meta.primary_key.clone();
|
primary_key = remote_meta.primary_key.clone();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// For local queries, get primary key from local index
|
||||||
|
primary_key = index_scheduler.index(&index_uid).ok().and_then(|index| {
|
||||||
|
index.read_txn().ok().and_then(|rtxn| {
|
||||||
|
let pk = index.primary_key(&rtxn).ok().flatten().map(|pk| pk.to_string());
|
||||||
|
drop(rtxn);
|
||||||
|
pk
|
||||||
|
})
|
||||||
|
});
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
// For local queries, get primary key from local index
|
|
||||||
primary_key = index_scheduler.index(&index_uid).ok().and_then(|index| {
|
|
||||||
index.read_txn().ok().and_then(|rtxn| {
|
|
||||||
let pk = index.primary_key(&rtxn).ok().flatten().map(|pk| pk.to_string());
|
|
||||||
drop(rtxn);
|
|
||||||
pk
|
|
||||||
})
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
query_metadata.push(SearchMetadata { query_uid, index_uid, primary_key, remote });
|
query_metadata.push(SearchMetadata { query_uid, index_uid, primary_key, remote });
|
||||||
}
|
}
|
||||||
|
Some(query_metadata)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
// 3.2. merge hits
|
// 3.2. merge hits
|
||||||
let merged_hits: Vec<_> = merge_index_global_results(results_by_index, &mut remote_results)
|
let merged_hits: Vec<_> = merge_index_global_results(results_by_index, &mut remote_results)
|
||||||
@@ -231,7 +239,7 @@ pub async fn perform_federated_search(
|
|||||||
facets_by_index,
|
facets_by_index,
|
||||||
remote_errors: partitioned_queries.has_remote.then_some(remote_errors),
|
remote_errors: partitioned_queries.has_remote.then_some(remote_errors),
|
||||||
request_uid: Some(request_uid),
|
request_uid: Some(request_uid),
|
||||||
metadata: Some(query_metadata),
|
metadata: query_metadata,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -630,7 +638,12 @@ struct RemoteSearch {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl RemoteSearch {
|
impl RemoteSearch {
|
||||||
fn start(queries: RemoteQueriesByHost, federation: &Federation, deadline: Instant) -> Self {
|
fn start(
|
||||||
|
queries: RemoteQueriesByHost,
|
||||||
|
federation: &Federation,
|
||||||
|
deadline: Instant,
|
||||||
|
include_metadata: bool,
|
||||||
|
) -> Self {
|
||||||
let mut in_flight_remote_queries = BTreeMap::new();
|
let mut in_flight_remote_queries = BTreeMap::new();
|
||||||
let client = reqwest::ClientBuilder::new()
|
let client = reqwest::ClientBuilder::new()
|
||||||
.connect_timeout(std::time::Duration::from_millis(200))
|
.connect_timeout(std::time::Duration::from_millis(200))
|
||||||
@@ -650,7 +663,10 @@ impl RemoteSearch {
|
|||||||
// never merge distant facets
|
// never merge distant facets
|
||||||
proxy_federation.merge_facets = None;
|
proxy_federation.merge_facets = None;
|
||||||
let params = params.clone();
|
let params = params.clone();
|
||||||
async move { proxy_search(&node, queries, proxy_federation, ¶ms).await }
|
async move {
|
||||||
|
proxy_search(&node, queries, proxy_federation, ¶ms, include_metadata)
|
||||||
|
.await
|
||||||
|
}
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@@ -11,6 +11,7 @@ use crate::search::SearchQueryWithIndex;
|
|||||||
|
|
||||||
pub const PROXY_SEARCH_HEADER: &str = "Meili-Proxy-Search";
|
pub const PROXY_SEARCH_HEADER: &str = "Meili-Proxy-Search";
|
||||||
pub const PROXY_SEARCH_HEADER_VALUE: &str = "true";
|
pub const PROXY_SEARCH_HEADER_VALUE: &str = "true";
|
||||||
|
pub const INCLUDE_METADATA_HEADER: &str = "Meili-Include-Metadata";
|
||||||
|
|
||||||
mod error {
|
mod error {
|
||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
@@ -98,6 +99,7 @@ pub async fn proxy_search(
|
|||||||
queries: Vec<SearchQueryWithIndex>,
|
queries: Vec<SearchQueryWithIndex>,
|
||||||
federation: Federation,
|
federation: Federation,
|
||||||
params: &ProxySearchParams,
|
params: &ProxySearchParams,
|
||||||
|
include_metadata: bool,
|
||||||
) -> Result<FederatedSearchResult, ProxySearchError> {
|
) -> Result<FederatedSearchResult, ProxySearchError> {
|
||||||
let url = format!("{}/multi-search", node.url);
|
let url = format!("{}/multi-search", node.url);
|
||||||
|
|
||||||
@@ -119,7 +121,16 @@ pub async fn proxy_search(
|
|||||||
};
|
};
|
||||||
|
|
||||||
for i in 0..params.try_count {
|
for i in 0..params.try_count {
|
||||||
match try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline).await {
|
match try_proxy_search(
|
||||||
|
&url,
|
||||||
|
search_api_key,
|
||||||
|
&federated,
|
||||||
|
¶ms.client,
|
||||||
|
deadline,
|
||||||
|
include_metadata,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
Ok(response) => return Ok(response),
|
Ok(response) => return Ok(response),
|
||||||
Err(retry) => {
|
Err(retry) => {
|
||||||
let duration = retry.into_duration(i)?;
|
let duration = retry.into_duration(i)?;
|
||||||
@@ -127,7 +138,7 @@ pub async fn proxy_search(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline)
|
try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline, include_metadata)
|
||||||
.await
|
.await
|
||||||
.map_err(Retry::into_error)
|
.map_err(Retry::into_error)
|
||||||
}
|
}
|
||||||
@@ -138,6 +149,7 @@ async fn try_proxy_search(
|
|||||||
federated: &FederatedSearch,
|
federated: &FederatedSearch,
|
||||||
client: &Client,
|
client: &Client,
|
||||||
deadline: std::time::Instant,
|
deadline: std::time::Instant,
|
||||||
|
include_metadata: bool,
|
||||||
) -> Result<FederatedSearchResult, Retry> {
|
) -> Result<FederatedSearchResult, Retry> {
|
||||||
let timeout = deadline.saturating_duration_since(std::time::Instant::now());
|
let timeout = deadline.saturating_duration_since(std::time::Instant::now());
|
||||||
|
|
||||||
@@ -148,6 +160,8 @@ async fn try_proxy_search(
|
|||||||
request
|
request
|
||||||
};
|
};
|
||||||
let request = request.header(PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE);
|
let request = request.header(PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE);
|
||||||
|
let request =
|
||||||
|
if include_metadata { request.header(INCLUDE_METADATA_HEADER, "true") } else { request };
|
||||||
|
|
||||||
let response = request.send().await;
|
let response = request.send().await;
|
||||||
let response = match response {
|
let response = match response {
|
||||||
|
@@ -43,7 +43,7 @@ use crate::error::MeilisearchHttpError;
|
|||||||
mod federated;
|
mod federated;
|
||||||
pub use federated::{
|
pub use federated::{
|
||||||
perform_federated_search, FederatedSearch, FederatedSearchResult, Federation,
|
perform_federated_search, FederatedSearch, FederatedSearchResult, Federation,
|
||||||
FederationOptions, MergeFacets, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE,
|
FederationOptions, MergeFacets, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, INCLUDE_METADATA_HEADER,
|
||||||
};
|
};
|
||||||
|
|
||||||
mod ranking_rules;
|
mod ranking_rules;
|
||||||
@@ -1146,6 +1146,7 @@ pub fn perform_search(
|
|||||||
retrieve_vectors: RetrieveVectors,
|
retrieve_vectors: RetrieveVectors,
|
||||||
features: RoFeatures,
|
features: RoFeatures,
|
||||||
request_uid: Uuid,
|
request_uid: Uuid,
|
||||||
|
include_metadata: bool,
|
||||||
) -> Result<SearchResult, ResponseError> {
|
) -> Result<SearchResult, ResponseError> {
|
||||||
let before_search = Instant::now();
|
let before_search = Instant::now();
|
||||||
let index_uid_for_metadata = index_uid.clone();
|
let index_uid_for_metadata = index_uid.clone();
|
||||||
@@ -1171,8 +1172,18 @@ pub fn perform_search(
|
|||||||
semantic_hit_count,
|
semantic_hit_count,
|
||||||
) = search_from_kind(index_uid.clone(), search_kind, search)?;
|
) = search_from_kind(index_uid.clone(), search_kind, search)?;
|
||||||
|
|
||||||
let query_uid = Uuid::now_v7();
|
let metadata = if include_metadata {
|
||||||
let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
|
let query_uid = Uuid::now_v7();
|
||||||
|
let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
|
||||||
|
Some(SearchMetadata {
|
||||||
|
query_uid,
|
||||||
|
index_uid: index_uid_for_metadata,
|
||||||
|
primary_key,
|
||||||
|
remote: None, // Local searches don't have a remote
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
let SearchQuery {
|
let SearchQuery {
|
||||||
q,
|
q,
|
||||||
@@ -1267,12 +1278,7 @@ pub fn perform_search(
|
|||||||
used_negative_operator,
|
used_negative_operator,
|
||||||
semantic_hit_count,
|
semantic_hit_count,
|
||||||
request_uid: Some(request_uid),
|
request_uid: Some(request_uid),
|
||||||
metadata: Some(SearchMetadata {
|
metadata,
|
||||||
query_uid,
|
|
||||||
index_uid: index_uid_for_metadata,
|
|
||||||
primary_key,
|
|
||||||
remote: None, // Local searches don't have a remote
|
|
||||||
}),
|
|
||||||
};
|
};
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user