From 9795d98e77f65448181adf5c4e212a8fab7d4f28 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 30 Sep 2025 15:22:35 +0200 Subject: [PATCH 01/15] feat: add metadata field with queryUid to search responses - Add SearchMetadata struct with queryUid field (UUID v7) - Add metadata field to SearchResult for /search route - Add metadata field to FederatedSearchResult for /multi-search route - Update perform_search to generate queryUid and set metadata - Update federated search to generate queryUid for each query - Update multi-search non-federated path to include metadata - Fix pattern matching in analytics and other code The metadata field contains: - For /search: single object with queryUid - For /multi-search: array of objects, one per query - For federated search: array of objects, one per query All queryUid values are generated using Uuid::now_v7() for time-ordered uniqueness. --- .../src/routes/indexes/search_analytics.rs | 1 + .../meilisearch/src/search/federated/perform.rs | 6 ++++++ crates/meilisearch/src/search/federated/types.rs | 8 ++++++++ crates/meilisearch/src/search/mod.rs | 15 +++++++++++++++ 4 files changed, 30 insertions(+) diff --git a/crates/meilisearch/src/routes/indexes/search_analytics.rs b/crates/meilisearch/src/routes/indexes/search_analytics.rs index 9f095b007..09045fc4a 100644 --- a/crates/meilisearch/src/routes/indexes/search_analytics.rs +++ b/crates/meilisearch/src/routes/indexes/search_analytics.rs @@ -235,6 +235,7 @@ impl SearchAggregator { degraded, used_negative_operator, request_uid: _, + metadata: _, } = result; self.total_succeeded = self.total_succeeded.saturating_add(1); diff --git a/crates/meilisearch/src/search/federated/perform.rs b/crates/meilisearch/src/search/federated/perform.rs index bef2a0614..d42b17c34 100644 --- a/crates/meilisearch/src/search/federated/perform.rs +++ b/crates/meilisearch/src/search/federated/perform.rs @@ -20,6 +20,7 @@ use tokio::task::JoinHandle; use uuid::Uuid; use super::super::ranking_rules::{self, RankingRules}; +use super::super::SearchMetadata; use super::super::{ compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex, @@ -65,7 +66,10 @@ pub async fn perform_federated_search( // 1. partition queries by host and index let mut partitioned_queries = PartitionedQueries::new(); + let mut query_metadata = Vec::new(); for (query_index, federated_query) in queries.into_iter().enumerate() { + let query_uid = Uuid::now_v7(); + query_metadata.push(SearchMetadata { query_uid }); partitioned_queries.partition(federated_query, query_index, &network, features)? } @@ -179,6 +183,7 @@ pub async fn perform_federated_search( facets_by_index, remote_errors: partitioned_queries.has_remote.then_some(remote_errors), request_uid: Some(request_uid), + metadata: Some(query_metadata), }) } @@ -448,6 +453,7 @@ fn merge_metadata( degraded: degraded_for_host, used_negative_operator: host_used_negative_operator, remote_errors: _, + metadata: _, request_uid: _, } in remote_results { diff --git a/crates/meilisearch/src/search/federated/types.rs b/crates/meilisearch/src/search/federated/types.rs index db30314ee..b771223c8 100644 --- a/crates/meilisearch/src/search/federated/types.rs +++ b/crates/meilisearch/src/search/federated/types.rs @@ -18,6 +18,8 @@ use serde::{Deserialize, Serialize}; use utoipa::ToSchema; use uuid::Uuid; +use crate::search::SearchMetadata; + use super::super::{ComputedFacets, FacetStats, HitsInfo, SearchHit, SearchQueryWithIndex}; use crate::milli::vector::Embedding; @@ -134,6 +136,8 @@ pub struct FederatedSearchResult { pub facets_by_index: FederatedFacets, #[serde(default, skip_serializing_if = "Option::is_none")] pub request_uid: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub metadata: Option>, #[serde(default, skip_serializing_if = "Option::is_none")] pub remote_errors: Option>, @@ -160,6 +164,7 @@ impl fmt::Debug for FederatedSearchResult { facets_by_index, remote_errors, request_uid, + metadata, } = self; let mut debug = f.debug_struct("SearchResult"); @@ -195,6 +200,9 @@ impl fmt::Debug for FederatedSearchResult { if let Some(request_uid) = request_uid { debug.field("request_uid", &request_uid); } + if let Some(metadata) = metadata { + debug.field("metadata", &metadata); + } debug.finish() } diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs index 329263271..260cbb146 100644 --- a/crates/meilisearch/src/search/mod.rs +++ b/crates/meilisearch/src/search/mod.rs @@ -836,6 +836,13 @@ pub struct SearchHit { pub ranking_score_details: Option>, } +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, ToSchema)] +#[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] +pub struct SearchMetadata { + pub query_uid: Uuid, +} + #[derive(Serialize, Clone, PartialEq, ToSchema)] #[serde(rename_all = "camelCase")] #[schema(rename_all = "camelCase")] @@ -854,6 +861,8 @@ pub struct SearchResult { pub facet_stats: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub request_uid: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub metadata: Option, #[serde(skip_serializing_if = "Option::is_none")] pub semantic_hit_count: Option, @@ -876,6 +885,7 @@ impl fmt::Debug for SearchResult { facet_distribution, facet_stats, request_uid, + metadata, semantic_hit_count, degraded, used_negative_operator, @@ -908,6 +918,9 @@ impl fmt::Debug for SearchResult { if let Some(request_uid) = request_uid { debug.field("request_uid", &request_uid); } + if let Some(metadata) = metadata { + debug.field("metadata", &metadata); + } debug.finish() } @@ -1234,6 +1247,7 @@ pub fn perform_search( .map(|ComputedFacets { distribution, stats }| (distribution, stats)) .unzip(); + let query_uid = Uuid::now_v7(); let result = SearchResult { hits: documents, hits_info, @@ -1246,6 +1260,7 @@ pub fn perform_search( used_negative_operator, semantic_hit_count, request_uid: Some(request_uid), + metadata: Some(SearchMetadata { query_uid }), }; Ok(result) } From cf43ec4affc00b9f2f81596d315e08be34371567 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 30 Sep 2025 17:34:29 +0200 Subject: [PATCH 02/15] feat: add indexUid to SearchMetadata - Add indexUid field to SearchMetadata struct - Update perform_search to include indexUid in metadata - Update federated search to include indexUid for each query The metadata field now contains both queryUid and indexUid: - For /search: single object with queryUid and indexUid - For /multi-search: each result has metadata with both fields - For federated search: array of objects, each with queryUid and indexUid --- crates/meilisearch/src/search/federated/perform.rs | 3 ++- crates/meilisearch/src/search/mod.rs | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/crates/meilisearch/src/search/federated/perform.rs b/crates/meilisearch/src/search/federated/perform.rs index d42b17c34..ed9fd5c59 100644 --- a/crates/meilisearch/src/search/federated/perform.rs +++ b/crates/meilisearch/src/search/federated/perform.rs @@ -69,7 +69,8 @@ pub async fn perform_federated_search( let mut query_metadata = Vec::new(); for (query_index, federated_query) in queries.into_iter().enumerate() { let query_uid = Uuid::now_v7(); - query_metadata.push(SearchMetadata { query_uid }); + query_metadata + .push(SearchMetadata { query_uid, index_uid: federated_query.index_uid.to_string() }); partitioned_queries.partition(federated_query, query_index, &network, features)? } diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs index 260cbb146..f6eb9347d 100644 --- a/crates/meilisearch/src/search/mod.rs +++ b/crates/meilisearch/src/search/mod.rs @@ -841,6 +841,7 @@ pub struct SearchHit { #[schema(rename_all = "camelCase")] pub struct SearchMetadata { pub query_uid: Uuid, + pub index_uid: String, } #[derive(Serialize, Clone, PartialEq, ToSchema)] @@ -1143,6 +1144,7 @@ pub fn perform_search( request_uid: Uuid, ) -> Result { let before_search = Instant::now(); + let index_uid_for_metadata = index_uid.clone(); let rtxn = index.read_txn()?; let time_budget = match index.search_cutoff(&rtxn)? { Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)), @@ -1163,7 +1165,7 @@ pub fn perform_search( query_vector, }, semantic_hit_count, - ) = search_from_kind(index_uid, search_kind, search)?; + ) = search_from_kind(index_uid.clone(), search_kind, search)?; let SearchQuery { q, @@ -1260,7 +1262,7 @@ pub fn perform_search( used_negative_operator, semantic_hit_count, request_uid: Some(request_uid), - metadata: Some(SearchMetadata { query_uid }), + metadata: Some(SearchMetadata { query_uid, index_uid: index_uid_for_metadata }), }; Ok(result) } From b93b803a2ed5283bd2b019d2baf71d2230e54d50 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 1 Oct 2025 17:55:15 +0200 Subject: [PATCH 03/15] WIP: Add metadata field with queryUid, indexUid, primaryKey, and remote - Add SearchMetadata struct with queryUid, indexUid, primaryKey, and remote fields - Update SearchResult to include metadata field - Update FederatedSearchResult to include metadata array - Refactor federated search metadata building to maintain query order - Support primary key extraction from both local and remote results - Add remote field to identify remote instance for federated queries - Ensure metadata array matches query order in federated search Features: - queryUid: UUID v7 for each query - indexUid: Index identifier - primaryKey: Primary key field name (null if not available) - remote: Remote instance name (null for local queries) This provides complete traceability for search operations across local and remote instances. --- .../src/search/federated/perform.rs | 59 +++++++++++++++++-- crates/meilisearch/src/search/mod.rs | 16 ++++- 2 files changed, 66 insertions(+), 9 deletions(-) diff --git a/crates/meilisearch/src/search/federated/perform.rs b/crates/meilisearch/src/search/federated/perform.rs index ed9fd5c59..8aee62df6 100644 --- a/crates/meilisearch/src/search/federated/perform.rs +++ b/crates/meilisearch/src/search/federated/perform.rs @@ -66,18 +66,21 @@ pub async fn perform_federated_search( // 1. partition queries by host and index let mut partitioned_queries = PartitionedQueries::new(); - let mut query_metadata = Vec::new(); + + // Store the original queries order for later metadata building + let original_queries = queries.clone(); + for (query_index, federated_query) in queries.into_iter().enumerate() { - let query_uid = Uuid::now_v7(); - query_metadata - .push(SearchMetadata { query_uid, index_uid: federated_query.index_uid.to_string() }); partitioned_queries.partition(federated_query, query_index, &network, features)? } // 2. perform queries, merge and make hits index by index // 2.1. start remote queries - let remote_search = - RemoteSearch::start(partitioned_queries.remote_queries_by_host, &federation, deadline); + let remote_search = RemoteSearch::start( + partitioned_queries.remote_queries_by_host.clone(), + &federation, + deadline, + ); // 2.2. concurrently execute local queries let params = SearchByIndexParams { @@ -123,6 +126,50 @@ pub async fn perform_federated_search( let (estimated_total_hits, degraded, used_negative_operator, facets, max_remote_duration) = merge_metadata(&mut results_by_index, &remote_results); + // 3.1.1. Build metadata in the same order as the original queries + let mut query_metadata = Vec::new(); + + // Create a map of remote results by index_uid for quick lookup + let mut remote_results_by_index = std::collections::BTreeMap::new(); + for remote_result in &remote_results { + if let Some(remote_metadata) = &remote_result.metadata { + for remote_meta in remote_metadata { + remote_results_by_index.insert(remote_meta.index_uid.clone(), remote_meta.clone()); + } + } + } + + // Build metadata in the same order as the original queries + for original_query in original_queries { + let query_uid = Uuid::now_v7(); + let index_uid = original_query.index_uid.to_string(); + + // Determine if this is a remote query + let (_, _, federation_options) = original_query.into_index_query_federation(); + let remote = federation_options.and_then(|options| options.remote); + + // Get primary key for this index + let mut primary_key = None; + + if remote.is_some() { + // For remote queries, try to get primary key from remote results + if let Some(remote_meta) = remote_results_by_index.get(&index_uid) { + primary_key = remote_meta.primary_key.clone(); + } + } else { + // For local queries, get primary key from local index + primary_key = index_scheduler.index(&index_uid).ok().and_then(|index| { + index.read_txn().ok().and_then(|rtxn| { + let pk = index.primary_key(&rtxn).ok().flatten().map(|pk| pk.to_string()); + drop(rtxn); + pk + }) + }); + } + + query_metadata.push(SearchMetadata { query_uid, index_uid, primary_key, remote }); + } + // 3.2. merge hits let merged_hits: Vec<_> = merge_index_global_results(results_by_index, &mut remote_results) .skip(federation.offset) diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs index f6eb9347d..28797603f 100644 --- a/crates/meilisearch/src/search/mod.rs +++ b/crates/meilisearch/src/search/mod.rs @@ -842,6 +842,10 @@ pub struct SearchHit { pub struct SearchMetadata { pub query_uid: Uuid, pub index_uid: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub primary_key: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub remote: Option, } #[derive(Serialize, Clone, PartialEq, ToSchema)] @@ -1167,6 +1171,9 @@ pub fn perform_search( semantic_hit_count, ) = search_from_kind(index_uid.clone(), search_kind, search)?; + let query_uid = Uuid::now_v7(); + let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string()); + let SearchQuery { q, limit, @@ -1248,8 +1255,6 @@ pub fn perform_search( .transpose()? .map(|ComputedFacets { distribution, stats }| (distribution, stats)) .unzip(); - - let query_uid = Uuid::now_v7(); let result = SearchResult { hits: documents, hits_info, @@ -1262,7 +1267,12 @@ pub fn perform_search( used_negative_operator, semantic_hit_count, request_uid: Some(request_uid), - metadata: Some(SearchMetadata { query_uid, index_uid: index_uid_for_metadata }), + metadata: Some(SearchMetadata { + query_uid, + index_uid: index_uid_for_metadata, + primary_key, + remote: None, // Local searches don't have a remote + }), }; Ok(result) } From da063062740862fb3db369f2749c8089411811ee Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 2 Oct 2025 11:04:19 +0200 Subject: [PATCH 04/15] Add header-based metadata opt-in for search responses - Add Meili-Include-Metadata header constant - Modify perform_search to conditionally include metadata based on header - Modify perform_federated_search to conditionally include metadata based on header - Update all search routes to check for header and pass include_metadata parameter - Forward Meili-Include-Metadata header to remote requests for federated search - Ensure remote queries include primaryKey metadata when header is present --- .../meilisearch/src/routes/indexes/search.rs | 14 ++- crates/meilisearch/src/routes/multi_search.rs | 13 ++- .../meilisearch/src/search/federated/mod.rs | 2 +- .../src/search/federated/perform.rs | 88 +++++++++++-------- .../meilisearch/src/search/federated/proxy.rs | 18 +++- crates/meilisearch/src/search/mod.rs | 24 +++-- 6 files changed, 109 insertions(+), 50 deletions(-) diff --git a/crates/meilisearch/src/routes/indexes/search.rs b/crates/meilisearch/src/routes/indexes/search.rs index 445a3bb54..f8ee912b9 100644 --- a/crates/meilisearch/src/routes/indexes/search.rs +++ b/crates/meilisearch/src/routes/indexes/search.rs @@ -26,7 +26,7 @@ use crate::search::{ add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold, RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, - DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO, + DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO, INCLUDE_METADATA_HEADER, }; use crate::search_queue::SearchQueue; @@ -345,6 +345,11 @@ pub async fn search_with_url_query( search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?; let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors); let permit = search_queue.try_get_search_permit().await?; + let include_metadata = req + .headers() + .get(INCLUDE_METADATA_HEADER) + .is_some(); + let search_result = tokio::task::spawn_blocking(move || { perform_search( index_uid.to_string(), @@ -354,6 +359,7 @@ pub async fn search_with_url_query( retrieve_vector, index_scheduler.features(), request_uid, + include_metadata, ) }) .await; @@ -453,6 +459,11 @@ pub async fn search_with_post( search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?; let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors); + let include_metadata = req + .headers() + .get(INCLUDE_METADATA_HEADER) + .is_some(); + let permit = search_queue.try_get_search_permit().await?; let search_result = tokio::task::spawn_blocking(move || { perform_search( @@ -463,6 +474,7 @@ pub async fn search_with_post( retrieve_vectors, index_scheduler.features(), request_uid, + include_metadata, ) }) .await; diff --git a/crates/meilisearch/src/routes/multi_search.rs b/crates/meilisearch/src/routes/multi_search.rs index 15931644f..06a92971a 100644 --- a/crates/meilisearch/src/routes/multi_search.rs +++ b/crates/meilisearch/src/routes/multi_search.rs @@ -21,7 +21,7 @@ use crate::routes::indexes::search::search_kind; use crate::search::{ add_search_rules, perform_federated_search, perform_search, FederatedSearch, FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex, - PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, + PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, INCLUDE_METADATA_HEADER, }; use crate::search_queue::SearchQueue; @@ -202,6 +202,10 @@ pub async fn multi_search_with_post( .headers() .get(PROXY_SEARCH_HEADER) .is_some_and(|value| value.as_bytes() == PROXY_SEARCH_HEADER_VALUE.as_bytes()); + let include_metadata = req + .headers() + .get(INCLUDE_METADATA_HEADER) + .is_some(); let search_result = perform_federated_search( &index_scheduler, queries, @@ -209,6 +213,7 @@ pub async fn multi_search_with_post( features, is_proxy, request_uid, + include_metadata, ) .await; permit.drop().await; @@ -228,6 +233,11 @@ pub async fn multi_search_with_post( HttpResponse::Ok().json(search_result?) } None => { + let include_metadata = req + .headers() + .get(INCLUDE_METADATA_HEADER) + .is_some(); + // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only, // so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code // changes. @@ -286,6 +296,7 @@ pub async fn multi_search_with_post( retrieve_vector, features, request_uid, + include_metadata, ) }) .await diff --git a/crates/meilisearch/src/search/federated/mod.rs b/crates/meilisearch/src/search/federated/mod.rs index 40204c591..2b20f64cb 100644 --- a/crates/meilisearch/src/search/federated/mod.rs +++ b/crates/meilisearch/src/search/federated/mod.rs @@ -4,7 +4,7 @@ mod types; mod weighted_scores; pub use perform::perform_federated_search; -pub use proxy::{PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE}; +pub use proxy::{PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, INCLUDE_METADATA_HEADER}; pub use types::{ FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets, }; diff --git a/crates/meilisearch/src/search/federated/perform.rs b/crates/meilisearch/src/search/federated/perform.rs index 8aee62df6..5ca9f7148 100644 --- a/crates/meilisearch/src/search/federated/perform.rs +++ b/crates/meilisearch/src/search/federated/perform.rs @@ -42,6 +42,7 @@ pub async fn perform_federated_search( features: RoFeatures, is_proxy: bool, request_uid: Uuid, + include_metadata: bool, ) -> Result { if is_proxy { features.check_network("Performing a remote federated search")?; @@ -80,6 +81,7 @@ pub async fn perform_federated_search( partitioned_queries.remote_queries_by_host.clone(), &federation, deadline, + include_metadata, ); // 2.2. concurrently execute local queries @@ -127,48 +129,54 @@ pub async fn perform_federated_search( merge_metadata(&mut results_by_index, &remote_results); // 3.1.1. Build metadata in the same order as the original queries - let mut query_metadata = Vec::new(); + let query_metadata = if include_metadata { + let mut query_metadata = Vec::new(); - // Create a map of remote results by index_uid for quick lookup - let mut remote_results_by_index = std::collections::BTreeMap::new(); - for remote_result in &remote_results { - if let Some(remote_metadata) = &remote_result.metadata { - for remote_meta in remote_metadata { - remote_results_by_index.insert(remote_meta.index_uid.clone(), remote_meta.clone()); + // Create a map of remote results by index_uid for quick lookup + let mut remote_results_by_index = std::collections::BTreeMap::new(); + for remote_result in &remote_results { + if let Some(remote_metadata) = &remote_result.metadata { + for remote_meta in remote_metadata { + remote_results_by_index + .insert(remote_meta.index_uid.clone(), remote_meta.clone()); + } } } - } - // Build metadata in the same order as the original queries - for original_query in original_queries { - let query_uid = Uuid::now_v7(); - let index_uid = original_query.index_uid.to_string(); + // Build metadata in the same order as the original queries + for original_query in original_queries { + let query_uid = Uuid::now_v7(); + let index_uid = original_query.index_uid.to_string(); - // Determine if this is a remote query - let (_, _, federation_options) = original_query.into_index_query_federation(); - let remote = federation_options.and_then(|options| options.remote); + // Determine if this is a remote query + let (_, _, federation_options) = original_query.into_index_query_federation(); + let remote = federation_options.and_then(|options| options.remote); - // Get primary key for this index - let mut primary_key = None; + // Get primary key for this index + let mut primary_key = None; - if remote.is_some() { - // For remote queries, try to get primary key from remote results - if let Some(remote_meta) = remote_results_by_index.get(&index_uid) { - primary_key = remote_meta.primary_key.clone(); + if remote.is_some() { + // For remote queries, try to get primary key from remote results + if let Some(remote_meta) = remote_results_by_index.get(&index_uid) { + primary_key = remote_meta.primary_key.clone(); + } + } else { + // For local queries, get primary key from local index + primary_key = index_scheduler.index(&index_uid).ok().and_then(|index| { + index.read_txn().ok().and_then(|rtxn| { + let pk = index.primary_key(&rtxn).ok().flatten().map(|pk| pk.to_string()); + drop(rtxn); + pk + }) + }); } - } else { - // For local queries, get primary key from local index - primary_key = index_scheduler.index(&index_uid).ok().and_then(|index| { - index.read_txn().ok().and_then(|rtxn| { - let pk = index.primary_key(&rtxn).ok().flatten().map(|pk| pk.to_string()); - drop(rtxn); - pk - }) - }); - } - query_metadata.push(SearchMetadata { query_uid, index_uid, primary_key, remote }); - } + query_metadata.push(SearchMetadata { query_uid, index_uid, primary_key, remote }); + } + Some(query_metadata) + } else { + None + }; // 3.2. merge hits let merged_hits: Vec<_> = merge_index_global_results(results_by_index, &mut remote_results) @@ -231,7 +239,7 @@ pub async fn perform_federated_search( facets_by_index, remote_errors: partitioned_queries.has_remote.then_some(remote_errors), request_uid: Some(request_uid), - metadata: Some(query_metadata), + metadata: query_metadata, }) } @@ -630,7 +638,12 @@ struct RemoteSearch { } impl RemoteSearch { - fn start(queries: RemoteQueriesByHost, federation: &Federation, deadline: Instant) -> Self { + fn start( + queries: RemoteQueriesByHost, + federation: &Federation, + deadline: Instant, + include_metadata: bool, + ) -> Self { let mut in_flight_remote_queries = BTreeMap::new(); let client = reqwest::ClientBuilder::new() .connect_timeout(std::time::Duration::from_millis(200)) @@ -650,7 +663,10 @@ impl RemoteSearch { // never merge distant facets proxy_federation.merge_facets = None; let params = params.clone(); - async move { proxy_search(&node, queries, proxy_federation, ¶ms).await } + async move { + proxy_search(&node, queries, proxy_federation, ¶ms, include_metadata) + .await + } }), ); } diff --git a/crates/meilisearch/src/search/federated/proxy.rs b/crates/meilisearch/src/search/federated/proxy.rs index 67594f645..26387030c 100644 --- a/crates/meilisearch/src/search/federated/proxy.rs +++ b/crates/meilisearch/src/search/federated/proxy.rs @@ -11,6 +11,7 @@ use crate::search::SearchQueryWithIndex; pub const PROXY_SEARCH_HEADER: &str = "Meili-Proxy-Search"; pub const PROXY_SEARCH_HEADER_VALUE: &str = "true"; +pub const INCLUDE_METADATA_HEADER: &str = "Meili-Include-Metadata"; mod error { use meilisearch_types::error::ResponseError; @@ -98,6 +99,7 @@ pub async fn proxy_search( queries: Vec, federation: Federation, params: &ProxySearchParams, + include_metadata: bool, ) -> Result { let url = format!("{}/multi-search", node.url); @@ -119,7 +121,16 @@ pub async fn proxy_search( }; for i in 0..params.try_count { - match try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline).await { + match try_proxy_search( + &url, + search_api_key, + &federated, + ¶ms.client, + deadline, + include_metadata, + ) + .await + { Ok(response) => return Ok(response), Err(retry) => { let duration = retry.into_duration(i)?; @@ -127,7 +138,7 @@ pub async fn proxy_search( } } } - try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline) + try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline, include_metadata) .await .map_err(Retry::into_error) } @@ -138,6 +149,7 @@ async fn try_proxy_search( federated: &FederatedSearch, client: &Client, deadline: std::time::Instant, + include_metadata: bool, ) -> Result { let timeout = deadline.saturating_duration_since(std::time::Instant::now()); @@ -148,6 +160,8 @@ async fn try_proxy_search( request }; let request = request.header(PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE); + let request = + if include_metadata { request.header(INCLUDE_METADATA_HEADER, "true") } else { request }; let response = request.send().await; let response = match response { diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs index 28797603f..87d6e5195 100644 --- a/crates/meilisearch/src/search/mod.rs +++ b/crates/meilisearch/src/search/mod.rs @@ -43,7 +43,7 @@ use crate::error::MeilisearchHttpError; mod federated; pub use federated::{ perform_federated_search, FederatedSearch, FederatedSearchResult, Federation, - FederationOptions, MergeFacets, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, + FederationOptions, MergeFacets, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, INCLUDE_METADATA_HEADER, }; mod ranking_rules; @@ -1146,6 +1146,7 @@ pub fn perform_search( retrieve_vectors: RetrieveVectors, features: RoFeatures, request_uid: Uuid, + include_metadata: bool, ) -> Result { let before_search = Instant::now(); let index_uid_for_metadata = index_uid.clone(); @@ -1171,8 +1172,18 @@ pub fn perform_search( semantic_hit_count, ) = search_from_kind(index_uid.clone(), search_kind, search)?; - let query_uid = Uuid::now_v7(); - let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string()); + let metadata = if include_metadata { + let query_uid = Uuid::now_v7(); + let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string()); + Some(SearchMetadata { + query_uid, + index_uid: index_uid_for_metadata, + primary_key, + remote: None, // Local searches don't have a remote + }) + } else { + None + }; let SearchQuery { q, @@ -1267,12 +1278,7 @@ pub fn perform_search( used_negative_operator, semantic_hit_count, request_uid: Some(request_uid), - metadata: Some(SearchMetadata { - query_uid, - index_uid: index_uid_for_metadata, - primary_key, - remote: None, // Local searches don't have a remote - }), + metadata, }; Ok(result) } From f242377d2bad68ded65db75cff21daa5b677931c Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 6 Oct 2025 11:19:39 +0200 Subject: [PATCH 05/15] Fix remote index collision in federated search metadata - Use composite key (indexUid, remote) instead of indexUid only for remote metadata lookup - Prevents collisions when multiple remotes have same indexUid but different primary keys - Ensures each remote query gets correct primaryKey from its specific remote instance --- crates/meilisearch/src/search/federated/perform.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/crates/meilisearch/src/search/federated/perform.rs b/crates/meilisearch/src/search/federated/perform.rs index 5ca9f7148..e7f9bf4c8 100644 --- a/crates/meilisearch/src/search/federated/perform.rs +++ b/crates/meilisearch/src/search/federated/perform.rs @@ -132,13 +132,14 @@ pub async fn perform_federated_search( let query_metadata = if include_metadata { let mut query_metadata = Vec::new(); - // Create a map of remote results by index_uid for quick lookup - let mut remote_results_by_index = std::collections::BTreeMap::new(); + // Create a map of remote results by (index_uid, remote) for quick lookup + // This prevents collisions when multiple remotes have the same index_uid but different primary keys + let mut remote_results_by_index_and_remote = std::collections::BTreeMap::new(); for remote_result in &remote_results { if let Some(remote_metadata) = &remote_result.metadata { for remote_meta in remote_metadata { - remote_results_by_index - .insert(remote_meta.index_uid.clone(), remote_meta.clone()); + let key = (remote_meta.index_uid.clone(), remote_meta.remote.clone()); + remote_results_by_index_and_remote.insert(key, remote_meta.clone()); } } } @@ -157,7 +158,9 @@ pub async fn perform_federated_search( if remote.is_some() { // For remote queries, try to get primary key from remote results - if let Some(remote_meta) = remote_results_by_index.get(&index_uid) { + // Use composite key (index_uid, remote) to avoid collisions + let lookup_key = (index_uid.clone(), remote.clone()); + if let Some(remote_meta) = remote_results_by_index_and_remote.get(&lookup_key) { primary_key = remote_meta.primary_key.clone(); } } else { From 519905ef9c3c20a759555395ace1d04380e27c71 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 6 Oct 2025 12:10:48 +0200 Subject: [PATCH 06/15] Fix remote index collision with HashMap-based lookup - Replace BTreeMap with HashMap for (remote, index_uid) -> primary_key lookup - Prevents collisions when multiple remotes have same index_uid but different primary keys --- .../src/search/federated/perform.rs | 50 +++++++++++-------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/crates/meilisearch/src/search/federated/perform.rs b/crates/meilisearch/src/search/federated/perform.rs index e7f9bf4c8..3db6fb43d 100644 --- a/crates/meilisearch/src/search/federated/perform.rs +++ b/crates/meilisearch/src/search/federated/perform.rs @@ -132,14 +132,18 @@ pub async fn perform_federated_search( let query_metadata = if include_metadata { let mut query_metadata = Vec::new(); - // Create a map of remote results by (index_uid, remote) for quick lookup + // Create a map of (remote, index_uid) -> primary_key for quick lookup // This prevents collisions when multiple remotes have the same index_uid but different primary keys - let mut remote_results_by_index_and_remote = std::collections::BTreeMap::new(); + let mut remote_primary_keys = std::collections::HashMap::new(); for remote_result in &remote_results { if let Some(remote_metadata) = &remote_result.metadata { for remote_meta in remote_metadata { - let key = (remote_meta.index_uid.clone(), remote_meta.remote.clone()); - remote_results_by_index_and_remote.insert(key, remote_meta.clone()); + if let Some(remote_name) = &remote_meta.remote { + let key = (remote_name.clone(), remote_meta.index_uid.clone()); + if let Some(primary_key) = &remote_meta.primary_key { + remote_primary_keys.insert(key, primary_key.clone()); + } + } } } } @@ -154,25 +158,22 @@ pub async fn perform_federated_search( let remote = federation_options.and_then(|options| options.remote); // Get primary key for this index - let mut primary_key = None; - - if remote.is_some() { - // For remote queries, try to get primary key from remote results - // Use composite key (index_uid, remote) to avoid collisions - let lookup_key = (index_uid.clone(), remote.clone()); - if let Some(remote_meta) = remote_results_by_index_and_remote.get(&lookup_key) { - primary_key = remote_meta.primary_key.clone(); + let primary_key = match &remote { + Some(remote_name) => { + // For remote queries, try to get primary key from remote results + // Use composite key (remote, index_uid) to avoid collisions + let lookup_key = (remote_name.clone(), index_uid.clone()); + remote_primary_keys.get(&lookup_key).cloned() } - } else { - // For local queries, get primary key from local index - primary_key = index_scheduler.index(&index_uid).ok().and_then(|index| { - index.read_txn().ok().and_then(|rtxn| { - let pk = index.primary_key(&rtxn).ok().flatten().map(|pk| pk.to_string()); - drop(rtxn); - pk + None => { + // For local queries, get primary key from local index + index_scheduler.index(&index_uid).ok().and_then(|index| { + index.read_txn().ok().and_then(|rtxn| { + index.primary_key(&rtxn).ok().flatten().map(|pk| pk.to_string()) + }) }) - }); - } + } + }; query_metadata.push(SearchMetadata { query_uid, index_uid, primary_key, remote }); } @@ -713,6 +714,13 @@ impl RemoteSearch { continue 'remote_queries; } + // Add remote name to metadata + if let Some(metadata) = res.metadata.as_mut() { + for meta in metadata { + meta.remote = Some(node_name.clone()); + } + } + federation.insert( FEDERATION_REMOTE.to_string(), serde_json::Value::String(node_name.clone()), From c1c42e818ea4d7f409f341262d507326e626b47c Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 6 Oct 2025 14:34:17 +0200 Subject: [PATCH 07/15] refactor: group perform_search parameters into SearchParams struct - Create SearchParams struct to group related parameters - Update perform_search function to use SearchParams instead of 8 individual parameters - Fix clippy warning about too many arguments - Update all callers to use new SearchParams struct --- .../meilisearch/src/routes/indexes/search.rs | 49 +++++++++---------- crates/meilisearch/src/routes/multi_search.rs | 30 +++++------- .../meilisearch/src/search/federated/mod.rs | 2 +- .../src/search/federated/perform.rs | 22 +++++---- crates/meilisearch/src/search/mod.rs | 33 ++++++++----- 5 files changed, 73 insertions(+), 63 deletions(-) diff --git a/crates/meilisearch/src/routes/indexes/search.rs b/crates/meilisearch/src/routes/indexes/search.rs index f8ee912b9..ccc3b5b9a 100644 --- a/crates/meilisearch/src/routes/indexes/search.rs +++ b/crates/meilisearch/src/routes/indexes/search.rs @@ -24,9 +24,10 @@ use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS; use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST}; use crate::search::{ add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold, - RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH, - DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, - DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO, INCLUDE_METADATA_HEADER, + RetrieveVectors, SearchKind, SearchParams, SearchQuery, SearchResult, SemanticRatio, + DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, + DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO, + INCLUDE_METADATA_HEADER, }; use crate::search_queue::SearchQueue; @@ -345,21 +346,20 @@ pub async fn search_with_url_query( search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?; let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors); let permit = search_queue.try_get_search_permit().await?; - let include_metadata = req - .headers() - .get(INCLUDE_METADATA_HEADER) - .is_some(); + let include_metadata = req.headers().get(INCLUDE_METADATA_HEADER).is_some(); let search_result = tokio::task::spawn_blocking(move || { perform_search( - index_uid.to_string(), + SearchParams { + index_uid: index_uid.to_string(), + query, + search_kind, + retrieve_vectors: retrieve_vector, + features: index_scheduler.features(), + request_uid, + include_metadata, + }, &index, - query, - search_kind, - retrieve_vector, - index_scheduler.features(), - request_uid, - include_metadata, ) }) .await; @@ -459,22 +459,21 @@ pub async fn search_with_post( search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?; let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors); - let include_metadata = req - .headers() - .get(INCLUDE_METADATA_HEADER) - .is_some(); + let include_metadata = req.headers().get(INCLUDE_METADATA_HEADER).is_some(); let permit = search_queue.try_get_search_permit().await?; let search_result = tokio::task::spawn_blocking(move || { perform_search( - index_uid.to_string(), + SearchParams { + index_uid: index_uid.to_string(), + query, + search_kind, + retrieve_vectors, + features: index_scheduler.features(), + request_uid, + include_metadata, + }, &index, - query, - search_kind, - retrieve_vectors, - index_scheduler.features(), - request_uid, - include_metadata, ) }) .await; diff --git a/crates/meilisearch/src/routes/multi_search.rs b/crates/meilisearch/src/routes/multi_search.rs index 06a92971a..29cca8945 100644 --- a/crates/meilisearch/src/routes/multi_search.rs +++ b/crates/meilisearch/src/routes/multi_search.rs @@ -20,8 +20,8 @@ use crate::extractors::sequential_extractor::SeqHandler; use crate::routes::indexes::search::search_kind; use crate::search::{ add_search_rules, perform_federated_search, perform_search, FederatedSearch, - FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex, - PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, INCLUDE_METADATA_HEADER, + FederatedSearchResult, RetrieveVectors, SearchParams, SearchQueryWithIndex, + SearchResultWithIndex, INCLUDE_METADATA_HEADER, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, }; use crate::search_queue::SearchQueue; @@ -202,10 +202,7 @@ pub async fn multi_search_with_post( .headers() .get(PROXY_SEARCH_HEADER) .is_some_and(|value| value.as_bytes() == PROXY_SEARCH_HEADER_VALUE.as_bytes()); - let include_metadata = req - .headers() - .get(INCLUDE_METADATA_HEADER) - .is_some(); + let include_metadata = req.headers().get(INCLUDE_METADATA_HEADER).is_some(); let search_result = perform_federated_search( &index_scheduler, queries, @@ -233,10 +230,7 @@ pub async fn multi_search_with_post( HttpResponse::Ok().json(search_result?) } None => { - let include_metadata = req - .headers() - .get(INCLUDE_METADATA_HEADER) - .is_some(); + let include_metadata = req.headers().get(INCLUDE_METADATA_HEADER).is_some(); // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only, // so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code @@ -289,14 +283,16 @@ pub async fn multi_search_with_post( let search_result = tokio::task::spawn_blocking(move || { perform_search( - index_uid_str.clone(), + SearchParams { + index_uid: index_uid_str.clone(), + query, + search_kind, + retrieve_vectors: retrieve_vector, + features, + request_uid, + include_metadata, + }, &index, - query, - search_kind, - retrieve_vector, - features, - request_uid, - include_metadata, ) }) .await diff --git a/crates/meilisearch/src/search/federated/mod.rs b/crates/meilisearch/src/search/federated/mod.rs index 2b20f64cb..c2a795c3f 100644 --- a/crates/meilisearch/src/search/federated/mod.rs +++ b/crates/meilisearch/src/search/federated/mod.rs @@ -4,7 +4,7 @@ mod types; mod weighted_scores; pub use perform::perform_federated_search; -pub use proxy::{PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, INCLUDE_METADATA_HEADER}; +pub use proxy::{INCLUDE_METADATA_HEADER, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE}; pub use types::{ FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets, }; diff --git a/crates/meilisearch/src/search/federated/perform.rs b/crates/meilisearch/src/search/federated/perform.rs index 3db6fb43d..2a8db5466 100644 --- a/crates/meilisearch/src/search/federated/perform.rs +++ b/crates/meilisearch/src/search/federated/perform.rs @@ -124,11 +124,11 @@ pub async fn perform_federated_search( let after_waiting_remote_results = std::time::Instant::now(); // 3. merge hits and metadata across indexes and hosts - // 3.1. merge metadata + // 3.1. merge federation metadata let (estimated_total_hits, degraded, used_negative_operator, facets, max_remote_duration) = merge_metadata(&mut results_by_index, &remote_results); - // 3.1.1. Build metadata in the same order as the original queries + // 3.2. Build metadata in the same order as the original queries let query_metadata = if include_metadata { let mut query_metadata = Vec::new(); @@ -138,11 +138,15 @@ pub async fn perform_federated_search( for remote_result in &remote_results { if let Some(remote_metadata) = &remote_result.metadata { for remote_meta in remote_metadata { - if let Some(remote_name) = &remote_meta.remote { - let key = (remote_name.clone(), remote_meta.index_uid.clone()); - if let Some(primary_key) = &remote_meta.primary_key { - remote_primary_keys.insert(key, primary_key.clone()); - } + if let SearchMetadata { + remote: Some(remote_name), + primary_key: Some(primary_key), + index_uid, + .. + } = &remote_meta + { + let key = (remote_name, index_uid); + remote_primary_keys.insert(key, primary_key); } } } @@ -162,8 +166,8 @@ pub async fn perform_federated_search( Some(remote_name) => { // For remote queries, try to get primary key from remote results // Use composite key (remote, index_uid) to avoid collisions - let lookup_key = (remote_name.clone(), index_uid.clone()); - remote_primary_keys.get(&lookup_key).cloned() + let lookup_key = (remote_name, &index_uid); + remote_primary_keys.get(&lookup_key).map(|pk| pk.to_string()) } None => { // For local queries, get primary key from local index diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs index 87d6e5195..85cd9d678 100644 --- a/crates/meilisearch/src/search/mod.rs +++ b/crates/meilisearch/src/search/mod.rs @@ -43,7 +43,8 @@ use crate::error::MeilisearchHttpError; mod federated; pub use federated::{ perform_federated_search, FederatedSearch, FederatedSearchResult, Federation, - FederationOptions, MergeFacets, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, INCLUDE_METADATA_HEADER, + FederationOptions, MergeFacets, INCLUDE_METADATA_HEADER, PROXY_SEARCH_HEADER, + PROXY_SEARCH_HEADER_VALUE, }; mod ranking_rules; @@ -1138,16 +1139,26 @@ pub fn prepare_search<'t>( Ok((search, is_finite_pagination, max_total_hits, offset)) } -pub fn perform_search( - index_uid: String, - index: &Index, - query: SearchQuery, - search_kind: SearchKind, - retrieve_vectors: RetrieveVectors, - features: RoFeatures, - request_uid: Uuid, - include_metadata: bool, -) -> Result { +pub struct SearchParams { + pub index_uid: String, + pub query: SearchQuery, + pub search_kind: SearchKind, + pub retrieve_vectors: RetrieveVectors, + pub features: RoFeatures, + pub request_uid: Uuid, + pub include_metadata: bool, +} + +pub fn perform_search(params: SearchParams, index: &Index) -> Result { + let SearchParams { + index_uid, + query, + search_kind, + retrieve_vectors, + features, + request_uid, + include_metadata, + } = params; let before_search = Instant::now(); let index_uid_for_metadata = index_uid.clone(); let rtxn = index.read_txn()?; From 48dd9146e7ef2648137643fcf8e2bd07f458d94b Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 7 Oct 2025 11:10:51 +0200 Subject: [PATCH 08/15] Add comprehensive metadata tests with insta snapshots - Add 9 test cases covering single search, multi-search, and federated search - Test metadata header opt-in functionality with case insensitivity - Test header false value handling - Test UUID format validation and consistency - Use insta snapshots for reliable, maintainable test assertions - Fix header parsing to properly handle 'false' values - Add helper methods for testing with custom headers --- .../meilisearch/src/routes/indexes/search.rs | 14 +- crates/meilisearch/src/routes/multi_search.rs | 14 +- crates/meilisearch/tests/common/index.rs | 27 ++ crates/meilisearch/tests/common/server.rs | 9 + crates/meilisearch/tests/search/metadata.rs | 387 ++++++++++++++++++ crates/meilisearch/tests/search/mod.rs | 1 + 6 files changed, 448 insertions(+), 4 deletions(-) create mode 100644 crates/meilisearch/tests/search/metadata.rs diff --git a/crates/meilisearch/src/routes/indexes/search.rs b/crates/meilisearch/src/routes/indexes/search.rs index ccc3b5b9a..91fef74fc 100644 --- a/crates/meilisearch/src/routes/indexes/search.rs +++ b/crates/meilisearch/src/routes/indexes/search.rs @@ -346,7 +346,12 @@ pub async fn search_with_url_query( search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?; let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors); let permit = search_queue.try_get_search_permit().await?; - let include_metadata = req.headers().get(INCLUDE_METADATA_HEADER).is_some(); + let include_metadata = req + .headers() + .get(INCLUDE_METADATA_HEADER) + .and_then(|h| h.to_str().ok()) + .map(|v| matches!(v.to_lowercase().as_str(), "true" | "1")) + .unwrap_or(false); let search_result = tokio::task::spawn_blocking(move || { perform_search( @@ -459,7 +464,12 @@ pub async fn search_with_post( search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?; let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors); - let include_metadata = req.headers().get(INCLUDE_METADATA_HEADER).is_some(); + let include_metadata = req + .headers() + .get(INCLUDE_METADATA_HEADER) + .and_then(|h| h.to_str().ok()) + .map(|v| matches!(v.to_lowercase().as_str(), "true" | "1")) + .unwrap_or(false); let permit = search_queue.try_get_search_permit().await?; let search_result = tokio::task::spawn_blocking(move || { diff --git a/crates/meilisearch/src/routes/multi_search.rs b/crates/meilisearch/src/routes/multi_search.rs index 29cca8945..30938298d 100644 --- a/crates/meilisearch/src/routes/multi_search.rs +++ b/crates/meilisearch/src/routes/multi_search.rs @@ -202,7 +202,12 @@ pub async fn multi_search_with_post( .headers() .get(PROXY_SEARCH_HEADER) .is_some_and(|value| value.as_bytes() == PROXY_SEARCH_HEADER_VALUE.as_bytes()); - let include_metadata = req.headers().get(INCLUDE_METADATA_HEADER).is_some(); + let include_metadata = req + .headers() + .get(INCLUDE_METADATA_HEADER) + .and_then(|h| h.to_str().ok()) + .map(|v| matches!(v.to_lowercase().as_str(), "true" | "1")) + .unwrap_or(false); let search_result = perform_federated_search( &index_scheduler, queries, @@ -230,7 +235,12 @@ pub async fn multi_search_with_post( HttpResponse::Ok().json(search_result?) } None => { - let include_metadata = req.headers().get(INCLUDE_METADATA_HEADER).is_some(); + let include_metadata = req + .headers() + .get(INCLUDE_METADATA_HEADER) + .and_then(|h| h.to_str().ok()) + .map(|v| matches!(v.to_lowercase().as_str(), "true" | "1")) + .unwrap_or(false); // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only, // so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code diff --git a/crates/meilisearch/tests/common/index.rs b/crates/meilisearch/tests/common/index.rs index f8ff5ced9..81415d3b8 100644 --- a/crates/meilisearch/tests/common/index.rs +++ b/crates/meilisearch/tests/common/index.rs @@ -516,6 +516,33 @@ impl Index<'_, State> { self.service.post_encoded(url, query, self.encoder).await } + pub async fn search_with_headers( + &self, + query: Value, + headers: Vec<(&str, &str)>, + ) -> (Value, StatusCode) { + let url = format!("/indexes/{}/search", urlencode(self.uid.as_ref())); + let body = serde_json::to_string(&query).unwrap(); + let mut all_headers = vec![("content-type", "application/json")]; + all_headers.extend(headers); + self.service.post_str(url, body, all_headers).await + } + + pub async fn multi_search_post(&self, queries: Value) -> (Value, StatusCode) { + self.service.post("/multi-search", queries).await + } + + pub async fn multi_search_post_with_headers( + &self, + queries: Value, + headers: Vec<(&str, &str)>, + ) -> (Value, StatusCode) { + let body = serde_json::to_string(&queries).unwrap(); + let mut all_headers = vec![("content-type", "application/json")]; + all_headers.extend(headers); + self.service.post_str("/multi-search", body, all_headers).await + } + pub async fn search_get(&self, query: &str) -> (Value, StatusCode) { let url = format!("/indexes/{}/search{}", urlencode(self.uid.as_ref()), query); self.service.get(url).await diff --git a/crates/meilisearch/tests/common/server.rs b/crates/meilisearch/tests/common/server.rs index b87dbe0ad..53c137644 100644 --- a/crates/meilisearch/tests/common/server.rs +++ b/crates/meilisearch/tests/common/server.rs @@ -390,6 +390,15 @@ impl Server { self.service.post("/multi-search", queries).await } + pub async fn multi_search_post_with_headers( + &self, + queries: Value, + headers: Vec<(&str, &str)>, + ) -> (Value, StatusCode) { + let body = serde_json::to_string(&queries).unwrap(); + self.service.post_str("/multi-search", body, headers).await + } + pub async fn list_indexes_raw(&self, parameters: &str) -> (Value, StatusCode) { self.service.get(format!("/indexes{parameters}")).await } diff --git a/crates/meilisearch/tests/search/metadata.rs b/crates/meilisearch/tests/search/metadata.rs new file mode 100644 index 000000000..c4c70ea63 --- /dev/null +++ b/crates/meilisearch/tests/search/metadata.rs @@ -0,0 +1,387 @@ +use meili_snap::{json_string, snapshot}; + +use crate::common::{shared_index_with_documents, Server, DOCUMENTS}; +use crate::json; + +#[actix_rt::test] +async fn search_without_metadata_header() { + let index = shared_index_with_documents().await; + + // Test that metadata is not included by default + index + .search(json!({"q": "glass"}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]" }), @r###" + { + "hits": [ + { + "title": "Gläss", + "id": "450465", + "color": [ + "blue", + "red" + ] + } + ], + "query": "glass", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + "requestUid": "[uuid]" + } + "###); + }) + .await; +} + +#[actix_rt::test] +async fn search_with_metadata_header() { + let server = Server::new_shared(); + let index = server.unique_index(); + + let documents = DOCUMENTS.clone(); + let (task, _code) = index.add_documents(documents, None).await; + server.wait_task(task.uid()).await.succeeded(); + + // Test with Meili-Include-Metadata header + let (response, code) = index + .search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "true")]) + .await; + + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]", ".metadata.queryUid" => "[uuid]" }), @r###" + { + "hits": [ + { + "title": "Gläss", + "id": "450465", + "color": [ + "blue", + "red" + ] + } + ], + "query": "glass", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + "requestUid": "[uuid]", + "metadata": { + "queryUid": "[uuid]", + "indexUid": "[uuid]", + "primaryKey": "id" + } + } + "###); +} + +#[actix_rt::test] +async fn search_with_metadata_header_and_primary_key() { + let server = Server::new_shared(); + let index = server.unique_index(); + + let documents = DOCUMENTS.clone(); + let (task, _code) = index.add_documents(documents, Some("id")).await; + server.wait_task(task.uid()).await.succeeded(); + + // Test with Meili-Include-Metadata header + let (response, code) = index + .search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "true")]) + .await; + + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]", ".metadata.queryUid" => "[uuid]" }), @r###" + { + "hits": [ + { + "id": "450465", + "title": "Gläss", + "color": [ + "blue", + "red" + ] + } + ], + "query": "glass", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + "requestUid": "[uuid]", + "metadata": { + "queryUid": "[uuid]", + "indexUid": "[uuid]", + "primaryKey": "id" + } + } + "###); +} + +#[actix_rt::test] +async fn multi_search_without_metadata_header() { + let server = Server::new_shared(); + let index = server.unique_index(); + + let documents = DOCUMENTS.clone(); + let (task, _code) = index.add_documents(documents, None).await; + server.wait_task(task.uid()).await.succeeded(); + + // Test multi-search without metadata header + let (response, code) = index + .multi_search_post(json!({ + "queries": [ + {"indexUid": index.uid, "q": "glass"}, + {"indexUid": index.uid, "q": "dragon"} + ] + })) + .await; + + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".results[0].processingTimeMs" => "[duration]", ".results[0].requestUid" => "[uuid]", ".results[1].processingTimeMs" => "[duration]", ".results[1].requestUid" => "[uuid]" }), @r###" + { + "results": [ + { + "indexUid": "[uuid]", + "hits": [ + { + "title": "Gläss", + "id": "450465", + "color": [ + "blue", + "red" + ] + } + ], + "query": "glass", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + "requestUid": "[uuid]" + }, + { + "indexUid": "[uuid]", + "hits": [ + { + "title": "How to Train Your Dragon: The Hidden World", + "id": "166428", + "color": [ + "green", + "red" + ] + } + ], + "query": "dragon", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + "requestUid": "[uuid]" + } + ] + } + "###); +} + +#[actix_rt::test] +async fn multi_search_with_metadata_header() { + let server = Server::new_shared(); + let index = server.unique_index(); + + let documents = DOCUMENTS.clone(); + let (task, _code) = index.add_documents(documents, Some("id")).await; + server.wait_task(task.uid()).await.succeeded(); + + // Test multi-search with metadata header + let (response, code) = index + .multi_search_post_with_headers( + json!({ + "queries": [ + {"indexUid": index.uid, "q": "glass"}, + {"indexUid": index.uid, "q": "dragon"} + ] + }), + vec![("Meili-Include-Metadata", "true")], + ) + .await; + + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".results[0].processingTimeMs" => "[duration]", ".results[0].requestUid" => "[uuid]", ".results[0].metadata.queryUid" => "[uuid]", ".results[1].processingTimeMs" => "[duration]", ".results[1].requestUid" => "[uuid]", ".results[1].metadata.queryUid" => "[uuid]" }), @r###" + { + "results": [ + { + "indexUid": "[uuid]", + "hits": [ + { + "id": "450465", + "title": "Gläss", + "color": [ + "blue", + "red" + ] + } + ], + "query": "glass", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + "requestUid": "[uuid]", + "metadata": { + "queryUid": "[uuid]", + "indexUid": "[uuid]", + "primaryKey": "id" + } + }, + { + "indexUid": "[uuid]", + "hits": [ + { + "id": "166428", + "title": "How to Train Your Dragon: The Hidden World", + "color": [ + "green", + "red" + ] + } + ], + "query": "dragon", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + "requestUid": "[uuid]", + "metadata": { + "queryUid": "[uuid]", + "indexUid": "[uuid]", + "primaryKey": "id" + } + } + ] + } + "###); +} + +#[actix_rt::test] +async fn search_metadata_header_false_value() { + let server = Server::new_shared(); + let index = server.unique_index(); + + let documents = DOCUMENTS.clone(); + let (task, _code) = index.add_documents(documents, None).await; + server.wait_task(task.uid()).await.succeeded(); + + // Test with header set to false + let (response, code) = index + .search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "false")]) + .await; + + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]" }), @r###" + { + "hits": [ + { + "title": "Gläss", + "id": "450465", + "color": [ + "blue", + "red" + ] + } + ], + "query": "glass", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + "requestUid": "[uuid]" + } + "###); +} + +#[actix_rt::test] +async fn search_metadata_uuid_format() { + let server = Server::new_shared(); + let index = server.unique_index(); + + let documents = DOCUMENTS.clone(); + let (task, _code) = index.add_documents(documents, None).await; + server.wait_task(task.uid()).await.succeeded(); + + let (response, code) = index + .search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "true")]) + .await; + + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]", ".metadata.queryUid" => "[uuid]" }), @r###" + { + "hits": [ + { + "title": "Gläss", + "id": "450465", + "color": [ + "blue", + "red" + ] + } + ], + "query": "glass", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + "requestUid": "[uuid]", + "metadata": { + "queryUid": "[uuid]", + "indexUid": "[uuid]", + "primaryKey": "id" + } + } + "###); +} + +#[actix_rt::test] +async fn search_metadata_consistency_across_requests() { + let server = Server::new_shared(); + let index = server.unique_index(); + + let documents = DOCUMENTS.clone(); + let (task, _code) = index.add_documents(documents, Some("id")).await; + server.wait_task(task.uid()).await.succeeded(); + + // Make multiple requests and check that metadata is consistent + for _i in 0..3 { + let (response, code) = index + .search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "true")]) + .await; + + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]", ".metadata.queryUid" => "[uuid]" }), @r###" + { + "hits": [ + { + "id": "450465", + "title": "Gläss", + "color": [ + "blue", + "red" + ] + } + ], + "query": "glass", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + "requestUid": "[uuid]", + "metadata": { + "queryUid": "[uuid]", + "indexUid": "[uuid]", + "primaryKey": "id" + } + } + "###); + } +} diff --git a/crates/meilisearch/tests/search/mod.rs b/crates/meilisearch/tests/search/mod.rs index 1f14a380e..69a69dee6 100644 --- a/crates/meilisearch/tests/search/mod.rs +++ b/crates/meilisearch/tests/search/mod.rs @@ -11,6 +11,7 @@ mod hybrid; #[cfg(not(feature = "chinese-pinyin"))] mod locales; mod matching_strategy; +mod metadata; mod multi; mod pagination; mod restrict_searchable; From 5c33fb090c7f1f021f85e9b21b08b3972dac07e9 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 9 Oct 2025 12:07:44 +0200 Subject: [PATCH 09/15] avoid openning each index twice and remove clones --- .../src/search/federated/perform.rs | 85 +++++++++---------- 1 file changed, 41 insertions(+), 44 deletions(-) diff --git a/crates/meilisearch/src/search/federated/perform.rs b/crates/meilisearch/src/search/federated/perform.rs index 2a8db5466..847379e9a 100644 --- a/crates/meilisearch/src/search/federated/perform.rs +++ b/crates/meilisearch/src/search/federated/perform.rs @@ -68,8 +68,13 @@ pub async fn perform_federated_search( // 1. partition queries by host and index let mut partitioned_queries = PartitionedQueries::new(); - // Store the original queries order for later metadata building - let original_queries = queries.clone(); + // Preconstruct metadata keeping the original queries order for later metadata building + let precomputed_query_metadata: Vec<_> = queries + .iter() + .map(|q| { + (q.index_uid.to_string(), q.federation_options.as_ref().and_then(|o| o.remote.clone())) + }) + .collect(); for (query_index, federated_query) in queries.into_iter().enumerate() { partitioned_queries.partition(federated_query, query_index, &network, features)? @@ -78,7 +83,7 @@ pub async fn perform_federated_search( // 2. perform queries, merge and make hits index by index // 2.1. start remote queries let remote_search = RemoteSearch::start( - partitioned_queries.remote_queries_by_host.clone(), + partitioned_queries.remote_queries_by_host, &federation, deadline, include_metadata, @@ -124,69 +129,57 @@ pub async fn perform_federated_search( let after_waiting_remote_results = std::time::Instant::now(); // 3. merge hits and metadata across indexes and hosts - // 3.1. merge federation metadata - let (estimated_total_hits, degraded, used_negative_operator, facets, max_remote_duration) = - merge_metadata(&mut results_by_index, &remote_results); - // 3.2. Build metadata in the same order as the original queries + // 3.1. Build metadata in the same order as the original queries let query_metadata = if include_metadata { - let mut query_metadata = Vec::new(); - - // Create a map of (remote, index_uid) -> primary_key for quick lookup + // 3.1.1. Create a map of (remote, index_uid) -> primary_key for quick lookup // This prevents collisions when multiple remotes have the same index_uid but different primary keys - let mut remote_primary_keys = std::collections::HashMap::new(); + let mut primary_key_per_index = std::collections::HashMap::new(); + + // 3.1.1.1 Build metadata for remote results for remote_result in &remote_results { if let Some(remote_metadata) = &remote_result.metadata { for remote_meta in remote_metadata { if let SearchMetadata { remote: Some(remote_name), - primary_key: Some(primary_key), index_uid, + primary_key: Some(primary_key), .. } = &remote_meta { - let key = (remote_name, index_uid); - remote_primary_keys.insert(key, primary_key); + let key = (Some(remote_name), index_uid); + primary_key_per_index.insert(key, primary_key); } } } } - // Build metadata in the same order as the original queries - for original_query in original_queries { + // 3.1.1.2 Build metadata for local results + for local_meta in &results_by_index { + if let SearchResultByIndex { index, primary_key: Some(primary_key), .. } = &local_meta { + let key = (None, index); + primary_key_per_index.insert(key, primary_key); + } + } + + // 3.1.2 Build metadata in the same order as the original queries + let mut query_metadata = Vec::new(); + for (index_uid, remote) in precomputed_query_metadata { + let primary_key = + primary_key_per_index.get(&(remote.as_ref(), &index_uid)).map(|pk| pk.to_string()); let query_uid = Uuid::now_v7(); - let index_uid = original_query.index_uid.to_string(); - - // Determine if this is a remote query - let (_, _, federation_options) = original_query.into_index_query_federation(); - let remote = federation_options.and_then(|options| options.remote); - - // Get primary key for this index - let primary_key = match &remote { - Some(remote_name) => { - // For remote queries, try to get primary key from remote results - // Use composite key (remote, index_uid) to avoid collisions - let lookup_key = (remote_name, &index_uid); - remote_primary_keys.get(&lookup_key).map(|pk| pk.to_string()) - } - None => { - // For local queries, get primary key from local index - index_scheduler.index(&index_uid).ok().and_then(|index| { - index.read_txn().ok().and_then(|rtxn| { - index.primary_key(&rtxn).ok().flatten().map(|pk| pk.to_string()) - }) - }) - } - }; - - query_metadata.push(SearchMetadata { query_uid, index_uid, primary_key, remote }); + query_metadata.push(SearchMetadata { query_uid, primary_key, index_uid, remote }); } Some(query_metadata) } else { None }; - // 3.2. merge hits + // 3.2. merge federation metadata + let (estimated_total_hits, degraded, used_negative_operator, facets, max_remote_duration) = + merge_metadata(&mut results_by_index, &remote_results); + + // 3.3. merge hits let merged_hits: Vec<_> = merge_index_global_results(results_by_index, &mut remote_results) .skip(federation.offset) .take(federation.limit) @@ -201,7 +194,7 @@ pub async fn perform_federated_search( .map(|hit| hit.hit()) .collect(); - // 3.3. merge query vectors + // 3.4. merge query vectors let query_vectors = if retrieve_vectors { for remote_results in remote_results.iter_mut() { if let Some(remote_vectors) = remote_results.query_vectors.take() { @@ -220,7 +213,7 @@ pub async fn perform_federated_search( None }; - // 3.4. merge facets + // 3.5. merge facets let (facet_distribution, facet_stats, facets_by_index) = facet_order.merge(federation.merge_facets, remote_results, facets); @@ -471,6 +464,7 @@ struct SearchHitByIndex { struct SearchResultByIndex { index: String, + primary_key: Option, hits: Vec, estimated_total_hits: usize, degraded: bool, @@ -489,6 +483,7 @@ fn merge_metadata( let mut max_remote_duration = Duration::ZERO; for SearchResultByIndex { index, + primary_key: _, hits: _, estimated_total_hits: estimated_total_hits_by_index, facets: facets_by_index, @@ -820,6 +815,7 @@ impl SearchByIndex { } }; let rtxn = index.read_txn()?; + let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string()); let criteria = index.criteria(&rtxn)?; let dictionary = index.dictionary(&rtxn)?; let dictionary: Option> = @@ -1072,6 +1068,7 @@ impl SearchByIndex { })?; self.results_by_index.push(SearchResultByIndex { index: index_uid, + primary_key, hits: merged_result, estimated_total_hits, degraded, From e8d5228250580a651a955ca97dce54b610ee2683 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 9 Oct 2025 16:57:48 +0200 Subject: [PATCH 10/15] factorize metadata header --- crates/meilisearch/src/routes/indexes/search.rs | 16 +++------------- crates/meilisearch/src/routes/mod.rs | 13 +++++++++++++ crates/meilisearch/src/routes/multi_search.rs | 17 ++++------------- 3 files changed, 20 insertions(+), 26 deletions(-) diff --git a/crates/meilisearch/src/routes/indexes/search.rs b/crates/meilisearch/src/routes/indexes/search.rs index 91fef74fc..8012f2302 100644 --- a/crates/meilisearch/src/routes/indexes/search.rs +++ b/crates/meilisearch/src/routes/indexes/search.rs @@ -22,12 +22,12 @@ use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS; use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST}; +use crate::routes::parse_include_metadata_header; use crate::search::{ add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold, RetrieveVectors, SearchKind, SearchParams, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO, - INCLUDE_METADATA_HEADER, }; use crate::search_queue::SearchQueue; @@ -346,12 +346,7 @@ pub async fn search_with_url_query( search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?; let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors); let permit = search_queue.try_get_search_permit().await?; - let include_metadata = req - .headers() - .get(INCLUDE_METADATA_HEADER) - .and_then(|h| h.to_str().ok()) - .map(|v| matches!(v.to_lowercase().as_str(), "true" | "1")) - .unwrap_or(false); + let include_metadata = parse_include_metadata_header(&req); let search_result = tokio::task::spawn_blocking(move || { perform_search( @@ -464,12 +459,7 @@ pub async fn search_with_post( search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?; let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors); - let include_metadata = req - .headers() - .get(INCLUDE_METADATA_HEADER) - .and_then(|h| h.to_str().ok()) - .map(|v| matches!(v.to_lowercase().as_str(), "true" | "1")) - .unwrap_or(false); + let include_metadata = parse_include_metadata_header(&req); let permit = search_queue.try_get_search_permit().await?; let search_result = tokio::task::spawn_blocking(move || { diff --git a/crates/meilisearch/src/routes/mod.rs b/crates/meilisearch/src/routes/mod.rs index 5a6780cbb..74b3215ac 100644 --- a/crates/meilisearch/src/routes/mod.rs +++ b/crates/meilisearch/src/routes/mod.rs @@ -42,6 +42,7 @@ use crate::routes::multi_search::SearchResults; use crate::routes::network::{Network, Remote}; use crate::routes::swap_indexes::SwapIndexesPayload; use crate::routes::webhooks::{WebhookResults, WebhookSettings, WebhookWithMetadata}; +use crate::search::INCLUDE_METADATA_HEADER; use crate::search::{ FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets, SearchQueryWithIndex, SearchResultWithIndex, SimilarQuery, SimilarResult, @@ -184,6 +185,18 @@ pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result { .is_some_and(|s| s.to_lowercase() == "true")) } +/// Parse the `Meili-Include-Metadata` header from an HTTP request. +/// +/// Returns `true` if the header is present and set to "true" or "1" (case-insensitive). +/// Returns `false` if the header is not present or has any other value. +pub fn parse_include_metadata_header(req: &HttpRequest) -> bool { + req.headers() + .get(INCLUDE_METADATA_HEADER) + .and_then(|h| h.to_str().ok()) + .map(|v| matches!(v.to_lowercase().as_str(), "true" | "1")) + .unwrap_or(false) +} + #[derive(Debug, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] pub struct SummarizedTaskView { diff --git a/crates/meilisearch/src/routes/multi_search.rs b/crates/meilisearch/src/routes/multi_search.rs index 30938298d..50e00fef1 100644 --- a/crates/meilisearch/src/routes/multi_search.rs +++ b/crates/meilisearch/src/routes/multi_search.rs @@ -21,9 +21,10 @@ use crate::routes::indexes::search::search_kind; use crate::search::{ add_search_rules, perform_federated_search, perform_search, FederatedSearch, FederatedSearchResult, RetrieveVectors, SearchParams, SearchQueryWithIndex, - SearchResultWithIndex, INCLUDE_METADATA_HEADER, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, + SearchResultWithIndex, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, }; use crate::search_queue::SearchQueue; +use crate::routes::parse_include_metadata_header; #[derive(OpenApi)] #[openapi( @@ -202,12 +203,7 @@ pub async fn multi_search_with_post( .headers() .get(PROXY_SEARCH_HEADER) .is_some_and(|value| value.as_bytes() == PROXY_SEARCH_HEADER_VALUE.as_bytes()); - let include_metadata = req - .headers() - .get(INCLUDE_METADATA_HEADER) - .and_then(|h| h.to_str().ok()) - .map(|v| matches!(v.to_lowercase().as_str(), "true" | "1")) - .unwrap_or(false); + let include_metadata = parse_include_metadata_header(&req); let search_result = perform_federated_search( &index_scheduler, queries, @@ -235,12 +231,7 @@ pub async fn multi_search_with_post( HttpResponse::Ok().json(search_result?) } None => { - let include_metadata = req - .headers() - .get(INCLUDE_METADATA_HEADER) - .and_then(|h| h.to_str().ok()) - .map(|v| matches!(v.to_lowercase().as_str(), "true" | "1")) - .unwrap_or(false); + let include_metadata = parse_include_metadata_header(&req); // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only, // so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code From 443cc24408a511696abae9b386f2157b4797bdd6 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 9 Oct 2025 16:59:27 +0200 Subject: [PATCH 11/15] --amend --- crates/meilisearch/src/routes/multi_search.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/crates/meilisearch/src/routes/multi_search.rs b/crates/meilisearch/src/routes/multi_search.rs index 50e00fef1..4e833072a 100644 --- a/crates/meilisearch/src/routes/multi_search.rs +++ b/crates/meilisearch/src/routes/multi_search.rs @@ -18,13 +18,13 @@ use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; use crate::routes::indexes::search::search_kind; +use crate::routes::parse_include_metadata_header; use crate::search::{ add_search_rules, perform_federated_search, perform_search, FederatedSearch, FederatedSearchResult, RetrieveVectors, SearchParams, SearchQueryWithIndex, SearchResultWithIndex, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, }; use crate::search_queue::SearchQueue; -use crate::routes::parse_include_metadata_header; #[derive(OpenApi)] #[openapi( @@ -189,6 +189,7 @@ pub async fn multi_search_with_post( err })?; + let include_metadata = parse_include_metadata_header(&req); let response = match federation { Some(federation) => { debug!( @@ -203,7 +204,6 @@ pub async fn multi_search_with_post( .headers() .get(PROXY_SEARCH_HEADER) .is_some_and(|value| value.as_bytes() == PROXY_SEARCH_HEADER_VALUE.as_bytes()); - let include_metadata = parse_include_metadata_header(&req); let search_result = perform_federated_search( &index_scheduler, queries, @@ -231,8 +231,6 @@ pub async fn multi_search_with_post( HttpResponse::Ok().json(search_result?) } None => { - let include_metadata = parse_include_metadata_header(&req); - // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only, // so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code // changes. From 24a92c2809e60106d912d301a6a142001ed70540 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 9 Oct 2025 17:08:39 +0200 Subject: [PATCH 12/15] move contant header in search/mod.rs --- crates/meilisearch/src/routes/mod.rs | 2 +- crates/meilisearch/src/search/federated/mod.rs | 2 +- crates/meilisearch/src/search/federated/proxy.rs | 3 +-- crates/meilisearch/src/search/mod.rs | 4 ++-- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/crates/meilisearch/src/routes/mod.rs b/crates/meilisearch/src/routes/mod.rs index 74b3215ac..87a98fd24 100644 --- a/crates/meilisearch/src/routes/mod.rs +++ b/crates/meilisearch/src/routes/mod.rs @@ -42,10 +42,10 @@ use crate::routes::multi_search::SearchResults; use crate::routes::network::{Network, Remote}; use crate::routes::swap_indexes::SwapIndexesPayload; use crate::routes::webhooks::{WebhookResults, WebhookSettings, WebhookWithMetadata}; -use crate::search::INCLUDE_METADATA_HEADER; use crate::search::{ FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets, SearchQueryWithIndex, SearchResultWithIndex, SimilarQuery, SimilarResult, + INCLUDE_METADATA_HEADER, }; use crate::search_queue::SearchQueue; use crate::Opt; diff --git a/crates/meilisearch/src/search/federated/mod.rs b/crates/meilisearch/src/search/federated/mod.rs index c2a795c3f..40204c591 100644 --- a/crates/meilisearch/src/search/federated/mod.rs +++ b/crates/meilisearch/src/search/federated/mod.rs @@ -4,7 +4,7 @@ mod types; mod weighted_scores; pub use perform::perform_federated_search; -pub use proxy::{INCLUDE_METADATA_HEADER, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE}; +pub use proxy::{PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE}; pub use types::{ FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets, }; diff --git a/crates/meilisearch/src/search/federated/proxy.rs b/crates/meilisearch/src/search/federated/proxy.rs index 26387030c..113db90be 100644 --- a/crates/meilisearch/src/search/federated/proxy.rs +++ b/crates/meilisearch/src/search/federated/proxy.rs @@ -7,11 +7,10 @@ use serde::de::DeserializeOwned; use serde_json::Value; use super::types::{FederatedSearch, FederatedSearchResult, Federation}; -use crate::search::SearchQueryWithIndex; +use crate::search::{SearchQueryWithIndex, INCLUDE_METADATA_HEADER}; pub const PROXY_SEARCH_HEADER: &str = "Meili-Proxy-Search"; pub const PROXY_SEARCH_HEADER_VALUE: &str = "true"; -pub const INCLUDE_METADATA_HEADER: &str = "Meili-Include-Metadata"; mod error { use meilisearch_types::error::ResponseError; diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs index 85cd9d678..4a09df8fa 100644 --- a/crates/meilisearch/src/search/mod.rs +++ b/crates/meilisearch/src/search/mod.rs @@ -43,8 +43,7 @@ use crate::error::MeilisearchHttpError; mod federated; pub use federated::{ perform_federated_search, FederatedSearch, FederatedSearchResult, Federation, - FederationOptions, MergeFacets, INCLUDE_METADATA_HEADER, PROXY_SEARCH_HEADER, - PROXY_SEARCH_HEADER_VALUE, + FederationOptions, MergeFacets, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, }; mod ranking_rules; @@ -58,6 +57,7 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string(); pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "".to_string(); pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "".to_string(); pub const DEFAULT_SEMANTIC_RATIO: fn() -> SemanticRatio = || SemanticRatio(0.5); +pub const INCLUDE_METADATA_HEADER: &str = "Meili-Include-Metadata"; #[derive(Clone, Default, PartialEq, Deserr, ToSchema)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] From f19f7124335fc3c4d7a707651fca983982f59339 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 9 Oct 2025 17:53:56 +0200 Subject: [PATCH 13/15] Add local remote name when a remote federated search is made --- crates/meilisearch/src/search/federated/perform.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/crates/meilisearch/src/search/federated/perform.rs b/crates/meilisearch/src/search/federated/perform.rs index 847379e9a..ccb75e023 100644 --- a/crates/meilisearch/src/search/federated/perform.rs +++ b/crates/meilisearch/src/search/federated/perform.rs @@ -162,12 +162,18 @@ pub async fn perform_federated_search( } } + // if there are remote results, set the local remote name + let local_remote_name = + (!remote_results.is_empty()).then_some(network.local.clone()).flatten(); + // 3.1.2 Build metadata in the same order as the original queries let mut query_metadata = Vec::new(); for (index_uid, remote) in precomputed_query_metadata { let primary_key = primary_key_per_index.get(&(remote.as_ref(), &index_uid)).map(|pk| pk.to_string()); let query_uid = Uuid::now_v7(); + // if the remote is not set, use the local remote name + let remote = remote.or_else(|| local_remote_name.clone()); query_metadata.push(SearchMetadata { query_uid, primary_key, index_uid, remote }); } Some(query_metadata) From ed6f4799403615630ee28fc5dbd42563e426b210 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 13 Oct 2025 15:46:30 +0200 Subject: [PATCH 14/15] Remove irrelevant test index method --- crates/meilisearch/tests/common/index.rs | 15 --------------- crates/meilisearch/tests/common/server.rs | 6 ++++-- crates/meilisearch/tests/search/metadata.rs | 8 ++++---- 3 files changed, 8 insertions(+), 21 deletions(-) diff --git a/crates/meilisearch/tests/common/index.rs b/crates/meilisearch/tests/common/index.rs index 81415d3b8..2d7af1bd6 100644 --- a/crates/meilisearch/tests/common/index.rs +++ b/crates/meilisearch/tests/common/index.rs @@ -528,21 +528,6 @@ impl Index<'_, State> { self.service.post_str(url, body, all_headers).await } - pub async fn multi_search_post(&self, queries: Value) -> (Value, StatusCode) { - self.service.post("/multi-search", queries).await - } - - pub async fn multi_search_post_with_headers( - &self, - queries: Value, - headers: Vec<(&str, &str)>, - ) -> (Value, StatusCode) { - let body = serde_json::to_string(&queries).unwrap(); - let mut all_headers = vec![("content-type", "application/json")]; - all_headers.extend(headers); - self.service.post_str("/multi-search", body, all_headers).await - } - pub async fn search_get(&self, query: &str) -> (Value, StatusCode) { let url = format!("/indexes/{}/search{}", urlencode(self.uid.as_ref()), query); self.service.get(url).await diff --git a/crates/meilisearch/tests/common/server.rs b/crates/meilisearch/tests/common/server.rs index 53c137644..4dad90ff3 100644 --- a/crates/meilisearch/tests/common/server.rs +++ b/crates/meilisearch/tests/common/server.rs @@ -390,13 +390,15 @@ impl Server { self.service.post("/multi-search", queries).await } - pub async fn multi_search_post_with_headers( + pub async fn multi_search_with_headers( &self, queries: Value, headers: Vec<(&str, &str)>, ) -> (Value, StatusCode) { let body = serde_json::to_string(&queries).unwrap(); - self.service.post_str("/multi-search", body, headers).await + let mut all_headers = vec![("content-type", "application/json")]; + all_headers.extend(headers); + self.service.post_str("/multi-search", body, all_headers).await } pub async fn list_indexes_raw(&self, parameters: &str) -> (Value, StatusCode) { diff --git a/crates/meilisearch/tests/search/metadata.rs b/crates/meilisearch/tests/search/metadata.rs index c4c70ea63..65af3df98 100644 --- a/crates/meilisearch/tests/search/metadata.rs +++ b/crates/meilisearch/tests/search/metadata.rs @@ -129,8 +129,8 @@ async fn multi_search_without_metadata_header() { server.wait_task(task.uid()).await.succeeded(); // Test multi-search without metadata header - let (response, code) = index - .multi_search_post(json!({ + let (response, code) = server + .multi_search(json!({ "queries": [ {"indexUid": index.uid, "q": "glass"}, {"indexUid": index.uid, "q": "dragon"} @@ -195,8 +195,8 @@ async fn multi_search_with_metadata_header() { server.wait_task(task.uid()).await.succeeded(); // Test multi-search with metadata header - let (response, code) = index - .multi_search_post_with_headers( + let (response, code) = server + .multi_search_with_headers( json!({ "queries": [ {"indexUid": index.uid, "q": "glass"}, From 22d529523a344ea3ed2345886249013e36df8f6a Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 14 Oct 2025 14:39:07 +0200 Subject: [PATCH 15/15] refactor: extract query metadata building logic into separate function --- .../src/search/federated/perform.rs | 134 ++++++++++-------- 1 file changed, 78 insertions(+), 56 deletions(-) diff --git a/crates/meilisearch/src/search/federated/perform.rs b/crates/meilisearch/src/search/federated/perform.rs index ccb75e023..3098037a3 100644 --- a/crates/meilisearch/src/search/federated/perform.rs +++ b/crates/meilisearch/src/search/federated/perform.rs @@ -61,6 +61,19 @@ pub async fn perform_federated_search( let network = index_scheduler.network(); + // Preconstruct metadata keeping the original queries order for later metadata building + let precomputed_query_metadata: Option> = include_metadata.then(|| { + queries + .iter() + .map(|q| { + ( + q.index_uid.to_string(), + q.federation_options.as_ref().and_then(|o| o.remote.clone()), + ) + }) + .collect() + }); + // this implementation partition the queries by index to guarantee an important property: // - all the queries to a particular index use the same read transaction. // This is an important property, otherwise we cannot guarantee the self-consistency of the results. @@ -68,14 +81,6 @@ pub async fn perform_federated_search( // 1. partition queries by host and index let mut partitioned_queries = PartitionedQueries::new(); - // Preconstruct metadata keeping the original queries order for later metadata building - let precomputed_query_metadata: Vec<_> = queries - .iter() - .map(|q| { - (q.index_uid.to_string(), q.federation_options.as_ref().and_then(|o| o.remote.clone())) - }) - .collect(); - for (query_index, federated_query) in queries.into_iter().enumerate() { partitioned_queries.partition(federated_query, query_index, &network, features)? } @@ -131,55 +136,17 @@ pub async fn perform_federated_search( // 3. merge hits and metadata across indexes and hosts // 3.1. Build metadata in the same order as the original queries - let query_metadata = if include_metadata { - // 3.1.1. Create a map of (remote, index_uid) -> primary_key for quick lookup - // This prevents collisions when multiple remotes have the same index_uid but different primary keys - let mut primary_key_per_index = std::collections::HashMap::new(); + let query_metadata = precomputed_query_metadata.map(|precomputed_query_metadata| { + // If a remote is present, set the local remote name + let local_remote_name = network.local.clone().filter(|_| partitioned_queries.has_remote); - // 3.1.1.1 Build metadata for remote results - for remote_result in &remote_results { - if let Some(remote_metadata) = &remote_result.metadata { - for remote_meta in remote_metadata { - if let SearchMetadata { - remote: Some(remote_name), - index_uid, - primary_key: Some(primary_key), - .. - } = &remote_meta - { - let key = (Some(remote_name), index_uid); - primary_key_per_index.insert(key, primary_key); - } - } - } - } - - // 3.1.1.2 Build metadata for local results - for local_meta in &results_by_index { - if let SearchResultByIndex { index, primary_key: Some(primary_key), .. } = &local_meta { - let key = (None, index); - primary_key_per_index.insert(key, primary_key); - } - } - - // if there are remote results, set the local remote name - let local_remote_name = - (!remote_results.is_empty()).then_some(network.local.clone()).flatten(); - - // 3.1.2 Build metadata in the same order as the original queries - let mut query_metadata = Vec::new(); - for (index_uid, remote) in precomputed_query_metadata { - let primary_key = - primary_key_per_index.get(&(remote.as_ref(), &index_uid)).map(|pk| pk.to_string()); - let query_uid = Uuid::now_v7(); - // if the remote is not set, use the local remote name - let remote = remote.or_else(|| local_remote_name.clone()); - query_metadata.push(SearchMetadata { query_uid, primary_key, index_uid, remote }); - } - Some(query_metadata) - } else { - None - }; + build_query_metadata( + precomputed_query_metadata, + local_remote_name, + &remote_results, + &results_by_index, + ) + }); // 3.2. merge federation metadata let (estimated_total_hits, degraded, used_negative_operator, facets, max_remote_duration) = @@ -478,6 +445,61 @@ struct SearchResultByIndex { facets: Option, } +/// Builds query metadata for federated search results. +/// +/// This function creates metadata for each query in the same order as the original queries, +/// combining information from both local and remote search results. It handles the mapping +/// of primary keys to their respective indexes and remotes to prevent collisions when +/// multiple remotes have the same index_uid but different primary keys. +fn build_query_metadata( + precomputed_query_metadata: Vec<(String, Option)>, + local_remote_name: Option, + remote_results: &[FederatedSearchResult], + results_by_index: &[SearchResultByIndex], +) -> Vec { + // Create a map of (remote, index_uid) -> primary_key for quick lookup + // This prevents collisions when multiple remotes have the same index_uid but different primary keys + let mut primary_key_per_index = std::collections::HashMap::new(); + + // Build metadata for remote results + for remote_result in remote_results { + if let Some(remote_metadata) = &remote_result.metadata { + for remote_meta in remote_metadata { + if let SearchMetadata { + remote: Some(remote_name), + index_uid, + primary_key: Some(primary_key), + .. + } = remote_meta + { + let key = (Some(remote_name), index_uid); + primary_key_per_index.insert(key, primary_key); + } + } + } + } + + // Build metadata for local results + for local_meta in results_by_index { + if let SearchResultByIndex { index, primary_key: Some(primary_key), .. } = local_meta { + let key = (None, index); + primary_key_per_index.insert(key, primary_key); + } + } + + // Build metadata in the same order as the original queries + let mut query_metadata = Vec::new(); + for (index_uid, remote) in precomputed_query_metadata { + let primary_key = + primary_key_per_index.get(&(remote.as_ref(), &index_uid)).map(|pk| pk.to_string()); + let query_uid = Uuid::now_v7(); + // if the remote is not set, use the local remote name + let remote = remote.or_else(|| local_remote_name.clone()); + query_metadata.push(SearchMetadata { query_uid, primary_key, index_uid, remote }); + } + query_metadata +} + fn merge_metadata( results_by_index: &mut Vec, remote_results: &Vec,