diff --git a/crates/meilisearch/src/routes/indexes/search.rs b/crates/meilisearch/src/routes/indexes/search.rs index 445a3bb54..8012f2302 100644 --- a/crates/meilisearch/src/routes/indexes/search.rs +++ b/crates/meilisearch/src/routes/indexes/search.rs @@ -22,11 +22,12 @@ use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS; use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST}; +use crate::routes::parse_include_metadata_header; use crate::search::{ add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold, - RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH, - DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, - DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO, + RetrieveVectors, SearchKind, SearchParams, SearchQuery, SearchResult, SemanticRatio, + DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, + DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO, }; use crate::search_queue::SearchQueue; @@ -345,15 +346,20 @@ pub async fn search_with_url_query( search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?; let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors); let permit = search_queue.try_get_search_permit().await?; + let include_metadata = parse_include_metadata_header(&req); + let search_result = tokio::task::spawn_blocking(move || { perform_search( - index_uid.to_string(), + SearchParams { + index_uid: index_uid.to_string(), + query, + search_kind, + retrieve_vectors: retrieve_vector, + features: index_scheduler.features(), + request_uid, + include_metadata, + }, &index, - query, - search_kind, - retrieve_vector, - index_scheduler.features(), - request_uid, ) }) .await; @@ -453,16 +459,21 @@ pub async fn search_with_post( search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?; let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors); + let include_metadata = parse_include_metadata_header(&req); + let permit = search_queue.try_get_search_permit().await?; let search_result = tokio::task::spawn_blocking(move || { perform_search( - index_uid.to_string(), + SearchParams { + index_uid: index_uid.to_string(), + query, + search_kind, + retrieve_vectors, + features: index_scheduler.features(), + request_uid, + include_metadata, + }, &index, - query, - search_kind, - retrieve_vectors, - index_scheduler.features(), - request_uid, ) }) .await; diff --git a/crates/meilisearch/src/routes/indexes/search_analytics.rs b/crates/meilisearch/src/routes/indexes/search_analytics.rs index 9f095b007..09045fc4a 100644 --- a/crates/meilisearch/src/routes/indexes/search_analytics.rs +++ b/crates/meilisearch/src/routes/indexes/search_analytics.rs @@ -235,6 +235,7 @@ impl SearchAggregator { degraded, used_negative_operator, request_uid: _, + metadata: _, } = result; self.total_succeeded = self.total_succeeded.saturating_add(1); diff --git a/crates/meilisearch/src/routes/mod.rs b/crates/meilisearch/src/routes/mod.rs index 5a6780cbb..87a98fd24 100644 --- a/crates/meilisearch/src/routes/mod.rs +++ b/crates/meilisearch/src/routes/mod.rs @@ -45,6 +45,7 @@ use crate::routes::webhooks::{WebhookResults, WebhookSettings, WebhookWithMetada use crate::search::{ FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets, SearchQueryWithIndex, SearchResultWithIndex, SimilarQuery, SimilarResult, + INCLUDE_METADATA_HEADER, }; use crate::search_queue::SearchQueue; use crate::Opt; @@ -184,6 +185,18 @@ pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result { .is_some_and(|s| s.to_lowercase() == "true")) } +/// Parse the `Meili-Include-Metadata` header from an HTTP request. +/// +/// Returns `true` if the header is present and set to "true" or "1" (case-insensitive). +/// Returns `false` if the header is not present or has any other value. +pub fn parse_include_metadata_header(req: &HttpRequest) -> bool { + req.headers() + .get(INCLUDE_METADATA_HEADER) + .and_then(|h| h.to_str().ok()) + .map(|v| matches!(v.to_lowercase().as_str(), "true" | "1")) + .unwrap_or(false) +} + #[derive(Debug, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] pub struct SummarizedTaskView { diff --git a/crates/meilisearch/src/routes/multi_search.rs b/crates/meilisearch/src/routes/multi_search.rs index 15931644f..4e833072a 100644 --- a/crates/meilisearch/src/routes/multi_search.rs +++ b/crates/meilisearch/src/routes/multi_search.rs @@ -18,10 +18,11 @@ use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; use crate::routes::indexes::search::search_kind; +use crate::routes::parse_include_metadata_header; use crate::search::{ add_search_rules, perform_federated_search, perform_search, FederatedSearch, - FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex, - PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, + FederatedSearchResult, RetrieveVectors, SearchParams, SearchQueryWithIndex, + SearchResultWithIndex, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, }; use crate::search_queue::SearchQueue; @@ -188,6 +189,7 @@ pub async fn multi_search_with_post( err })?; + let include_metadata = parse_include_metadata_header(&req); let response = match federation { Some(federation) => { debug!( @@ -209,6 +211,7 @@ pub async fn multi_search_with_post( features, is_proxy, request_uid, + include_metadata, ) .await; permit.drop().await; @@ -279,13 +282,16 @@ pub async fn multi_search_with_post( let search_result = tokio::task::spawn_blocking(move || { perform_search( - index_uid_str.clone(), + SearchParams { + index_uid: index_uid_str.clone(), + query, + search_kind, + retrieve_vectors: retrieve_vector, + features, + request_uid, + include_metadata, + }, &index, - query, - search_kind, - retrieve_vector, - features, - request_uid, ) }) .await diff --git a/crates/meilisearch/src/search/federated/perform.rs b/crates/meilisearch/src/search/federated/perform.rs index bef2a0614..3098037a3 100644 --- a/crates/meilisearch/src/search/federated/perform.rs +++ b/crates/meilisearch/src/search/federated/perform.rs @@ -20,6 +20,7 @@ use tokio::task::JoinHandle; use uuid::Uuid; use super::super::ranking_rules::{self, RankingRules}; +use super::super::SearchMetadata; use super::super::{ compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex, @@ -41,6 +42,7 @@ pub async fn perform_federated_search( features: RoFeatures, is_proxy: bool, request_uid: Uuid, + include_metadata: bool, ) -> Result { if is_proxy { features.check_network("Performing a remote federated search")?; @@ -59,20 +61,38 @@ pub async fn perform_federated_search( let network = index_scheduler.network(); + // Preconstruct metadata keeping the original queries order for later metadata building + let precomputed_query_metadata: Option> = include_metadata.then(|| { + queries + .iter() + .map(|q| { + ( + q.index_uid.to_string(), + q.federation_options.as_ref().and_then(|o| o.remote.clone()), + ) + }) + .collect() + }); + // this implementation partition the queries by index to guarantee an important property: // - all the queries to a particular index use the same read transaction. // This is an important property, otherwise we cannot guarantee the self-consistency of the results. // 1. partition queries by host and index let mut partitioned_queries = PartitionedQueries::new(); + for (query_index, federated_query) in queries.into_iter().enumerate() { partitioned_queries.partition(federated_query, query_index, &network, features)? } // 2. perform queries, merge and make hits index by index // 2.1. start remote queries - let remote_search = - RemoteSearch::start(partitioned_queries.remote_queries_by_host, &federation, deadline); + let remote_search = RemoteSearch::start( + partitioned_queries.remote_queries_by_host, + &federation, + deadline, + include_metadata, + ); // 2.2. concurrently execute local queries let params = SearchByIndexParams { @@ -114,11 +134,25 @@ pub async fn perform_federated_search( let after_waiting_remote_results = std::time::Instant::now(); // 3. merge hits and metadata across indexes and hosts - // 3.1. merge metadata + + // 3.1. Build metadata in the same order as the original queries + let query_metadata = precomputed_query_metadata.map(|precomputed_query_metadata| { + // If a remote is present, set the local remote name + let local_remote_name = network.local.clone().filter(|_| partitioned_queries.has_remote); + + build_query_metadata( + precomputed_query_metadata, + local_remote_name, + &remote_results, + &results_by_index, + ) + }); + + // 3.2. merge federation metadata let (estimated_total_hits, degraded, used_negative_operator, facets, max_remote_duration) = merge_metadata(&mut results_by_index, &remote_results); - // 3.2. merge hits + // 3.3. merge hits let merged_hits: Vec<_> = merge_index_global_results(results_by_index, &mut remote_results) .skip(federation.offset) .take(federation.limit) @@ -133,7 +167,7 @@ pub async fn perform_federated_search( .map(|hit| hit.hit()) .collect(); - // 3.3. merge query vectors + // 3.4. merge query vectors let query_vectors = if retrieve_vectors { for remote_results in remote_results.iter_mut() { if let Some(remote_vectors) = remote_results.query_vectors.take() { @@ -152,7 +186,7 @@ pub async fn perform_federated_search( None }; - // 3.4. merge facets + // 3.5. merge facets let (facet_distribution, facet_stats, facets_by_index) = facet_order.merge(federation.merge_facets, remote_results, facets); @@ -179,6 +213,7 @@ pub async fn perform_federated_search( facets_by_index, remote_errors: partitioned_queries.has_remote.then_some(remote_errors), request_uid: Some(request_uid), + metadata: query_metadata, }) } @@ -402,6 +437,7 @@ struct SearchHitByIndex { struct SearchResultByIndex { index: String, + primary_key: Option, hits: Vec, estimated_total_hits: usize, degraded: bool, @@ -409,6 +445,61 @@ struct SearchResultByIndex { facets: Option, } +/// Builds query metadata for federated search results. +/// +/// This function creates metadata for each query in the same order as the original queries, +/// combining information from both local and remote search results. It handles the mapping +/// of primary keys to their respective indexes and remotes to prevent collisions when +/// multiple remotes have the same index_uid but different primary keys. +fn build_query_metadata( + precomputed_query_metadata: Vec<(String, Option)>, + local_remote_name: Option, + remote_results: &[FederatedSearchResult], + results_by_index: &[SearchResultByIndex], +) -> Vec { + // Create a map of (remote, index_uid) -> primary_key for quick lookup + // This prevents collisions when multiple remotes have the same index_uid but different primary keys + let mut primary_key_per_index = std::collections::HashMap::new(); + + // Build metadata for remote results + for remote_result in remote_results { + if let Some(remote_metadata) = &remote_result.metadata { + for remote_meta in remote_metadata { + if let SearchMetadata { + remote: Some(remote_name), + index_uid, + primary_key: Some(primary_key), + .. + } = remote_meta + { + let key = (Some(remote_name), index_uid); + primary_key_per_index.insert(key, primary_key); + } + } + } + } + + // Build metadata for local results + for local_meta in results_by_index { + if let SearchResultByIndex { index, primary_key: Some(primary_key), .. } = local_meta { + let key = (None, index); + primary_key_per_index.insert(key, primary_key); + } + } + + // Build metadata in the same order as the original queries + let mut query_metadata = Vec::new(); + for (index_uid, remote) in precomputed_query_metadata { + let primary_key = + primary_key_per_index.get(&(remote.as_ref(), &index_uid)).map(|pk| pk.to_string()); + let query_uid = Uuid::now_v7(); + // if the remote is not set, use the local remote name + let remote = remote.or_else(|| local_remote_name.clone()); + query_metadata.push(SearchMetadata { query_uid, primary_key, index_uid, remote }); + } + query_metadata +} + fn merge_metadata( results_by_index: &mut Vec, remote_results: &Vec, @@ -420,6 +511,7 @@ fn merge_metadata( let mut max_remote_duration = Duration::ZERO; for SearchResultByIndex { index, + primary_key: _, hits: _, estimated_total_hits: estimated_total_hits_by_index, facets: facets_by_index, @@ -448,6 +540,7 @@ fn merge_metadata( degraded: degraded_for_host, used_negative_operator: host_used_negative_operator, remote_errors: _, + metadata: _, request_uid: _, } in remote_results { @@ -576,7 +669,12 @@ struct RemoteSearch { } impl RemoteSearch { - fn start(queries: RemoteQueriesByHost, federation: &Federation, deadline: Instant) -> Self { + fn start( + queries: RemoteQueriesByHost, + federation: &Federation, + deadline: Instant, + include_metadata: bool, + ) -> Self { let mut in_flight_remote_queries = BTreeMap::new(); let client = reqwest::ClientBuilder::new() .connect_timeout(std::time::Duration::from_millis(200)) @@ -596,7 +694,10 @@ impl RemoteSearch { // never merge distant facets proxy_federation.merge_facets = None; let params = params.clone(); - async move { proxy_search(&node, queries, proxy_federation, ¶ms).await } + async move { + proxy_search(&node, queries, proxy_federation, ¶ms, include_metadata) + .await + } }), ); } @@ -640,6 +741,13 @@ impl RemoteSearch { continue 'remote_queries; } + // Add remote name to metadata + if let Some(metadata) = res.metadata.as_mut() { + for meta in metadata { + meta.remote = Some(node_name.clone()); + } + } + federation.insert( FEDERATION_REMOTE.to_string(), serde_json::Value::String(node_name.clone()), @@ -735,6 +843,7 @@ impl SearchByIndex { } }; let rtxn = index.read_txn()?; + let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string()); let criteria = index.criteria(&rtxn)?; let dictionary = index.dictionary(&rtxn)?; let dictionary: Option> = @@ -987,6 +1096,7 @@ impl SearchByIndex { })?; self.results_by_index.push(SearchResultByIndex { index: index_uid, + primary_key, hits: merged_result, estimated_total_hits, degraded, diff --git a/crates/meilisearch/src/search/federated/proxy.rs b/crates/meilisearch/src/search/federated/proxy.rs index 67594f645..113db90be 100644 --- a/crates/meilisearch/src/search/federated/proxy.rs +++ b/crates/meilisearch/src/search/federated/proxy.rs @@ -7,7 +7,7 @@ use serde::de::DeserializeOwned; use serde_json::Value; use super::types::{FederatedSearch, FederatedSearchResult, Federation}; -use crate::search::SearchQueryWithIndex; +use crate::search::{SearchQueryWithIndex, INCLUDE_METADATA_HEADER}; pub const PROXY_SEARCH_HEADER: &str = "Meili-Proxy-Search"; pub const PROXY_SEARCH_HEADER_VALUE: &str = "true"; @@ -98,6 +98,7 @@ pub async fn proxy_search( queries: Vec, federation: Federation, params: &ProxySearchParams, + include_metadata: bool, ) -> Result { let url = format!("{}/multi-search", node.url); @@ -119,7 +120,16 @@ pub async fn proxy_search( }; for i in 0..params.try_count { - match try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline).await { + match try_proxy_search( + &url, + search_api_key, + &federated, + ¶ms.client, + deadline, + include_metadata, + ) + .await + { Ok(response) => return Ok(response), Err(retry) => { let duration = retry.into_duration(i)?; @@ -127,7 +137,7 @@ pub async fn proxy_search( } } } - try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline) + try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline, include_metadata) .await .map_err(Retry::into_error) } @@ -138,6 +148,7 @@ async fn try_proxy_search( federated: &FederatedSearch, client: &Client, deadline: std::time::Instant, + include_metadata: bool, ) -> Result { let timeout = deadline.saturating_duration_since(std::time::Instant::now()); @@ -148,6 +159,8 @@ async fn try_proxy_search( request }; let request = request.header(PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE); + let request = + if include_metadata { request.header(INCLUDE_METADATA_HEADER, "true") } else { request }; let response = request.send().await; let response = match response { diff --git a/crates/meilisearch/src/search/federated/types.rs b/crates/meilisearch/src/search/federated/types.rs index db30314ee..b771223c8 100644 --- a/crates/meilisearch/src/search/federated/types.rs +++ b/crates/meilisearch/src/search/federated/types.rs @@ -18,6 +18,8 @@ use serde::{Deserialize, Serialize}; use utoipa::ToSchema; use uuid::Uuid; +use crate::search::SearchMetadata; + use super::super::{ComputedFacets, FacetStats, HitsInfo, SearchHit, SearchQueryWithIndex}; use crate::milli::vector::Embedding; @@ -134,6 +136,8 @@ pub struct FederatedSearchResult { pub facets_by_index: FederatedFacets, #[serde(default, skip_serializing_if = "Option::is_none")] pub request_uid: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub metadata: Option>, #[serde(default, skip_serializing_if = "Option::is_none")] pub remote_errors: Option>, @@ -160,6 +164,7 @@ impl fmt::Debug for FederatedSearchResult { facets_by_index, remote_errors, request_uid, + metadata, } = self; let mut debug = f.debug_struct("SearchResult"); @@ -195,6 +200,9 @@ impl fmt::Debug for FederatedSearchResult { if let Some(request_uid) = request_uid { debug.field("request_uid", &request_uid); } + if let Some(metadata) = metadata { + debug.field("metadata", &metadata); + } debug.finish() } diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs index 329263271..4a09df8fa 100644 --- a/crates/meilisearch/src/search/mod.rs +++ b/crates/meilisearch/src/search/mod.rs @@ -57,6 +57,7 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string(); pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "".to_string(); pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "".to_string(); pub const DEFAULT_SEMANTIC_RATIO: fn() -> SemanticRatio = || SemanticRatio(0.5); +pub const INCLUDE_METADATA_HEADER: &str = "Meili-Include-Metadata"; #[derive(Clone, Default, PartialEq, Deserr, ToSchema)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] @@ -836,6 +837,18 @@ pub struct SearchHit { pub ranking_score_details: Option>, } +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, ToSchema)] +#[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] +pub struct SearchMetadata { + pub query_uid: Uuid, + pub index_uid: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub primary_key: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub remote: Option, +} + #[derive(Serialize, Clone, PartialEq, ToSchema)] #[serde(rename_all = "camelCase")] #[schema(rename_all = "camelCase")] @@ -854,6 +867,8 @@ pub struct SearchResult { pub facet_stats: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub request_uid: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub metadata: Option, #[serde(skip_serializing_if = "Option::is_none")] pub semantic_hit_count: Option, @@ -876,6 +891,7 @@ impl fmt::Debug for SearchResult { facet_distribution, facet_stats, request_uid, + metadata, semantic_hit_count, degraded, used_negative_operator, @@ -908,6 +924,9 @@ impl fmt::Debug for SearchResult { if let Some(request_uid) = request_uid { debug.field("request_uid", &request_uid); } + if let Some(metadata) = metadata { + debug.field("metadata", &metadata); + } debug.finish() } @@ -1120,16 +1139,28 @@ pub fn prepare_search<'t>( Ok((search, is_finite_pagination, max_total_hits, offset)) } -pub fn perform_search( - index_uid: String, - index: &Index, - query: SearchQuery, - search_kind: SearchKind, - retrieve_vectors: RetrieveVectors, - features: RoFeatures, - request_uid: Uuid, -) -> Result { +pub struct SearchParams { + pub index_uid: String, + pub query: SearchQuery, + pub search_kind: SearchKind, + pub retrieve_vectors: RetrieveVectors, + pub features: RoFeatures, + pub request_uid: Uuid, + pub include_metadata: bool, +} + +pub fn perform_search(params: SearchParams, index: &Index) -> Result { + let SearchParams { + index_uid, + query, + search_kind, + retrieve_vectors, + features, + request_uid, + include_metadata, + } = params; let before_search = Instant::now(); + let index_uid_for_metadata = index_uid.clone(); let rtxn = index.read_txn()?; let time_budget = match index.search_cutoff(&rtxn)? { Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)), @@ -1150,7 +1181,20 @@ pub fn perform_search( query_vector, }, semantic_hit_count, - ) = search_from_kind(index_uid, search_kind, search)?; + ) = search_from_kind(index_uid.clone(), search_kind, search)?; + + let metadata = if include_metadata { + let query_uid = Uuid::now_v7(); + let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string()); + Some(SearchMetadata { + query_uid, + index_uid: index_uid_for_metadata, + primary_key, + remote: None, // Local searches don't have a remote + }) + } else { + None + }; let SearchQuery { q, @@ -1233,7 +1277,6 @@ pub fn perform_search( .transpose()? .map(|ComputedFacets { distribution, stats }| (distribution, stats)) .unzip(); - let result = SearchResult { hits: documents, hits_info, @@ -1246,6 +1289,7 @@ pub fn perform_search( used_negative_operator, semantic_hit_count, request_uid: Some(request_uid), + metadata, }; Ok(result) } diff --git a/crates/meilisearch/tests/common/index.rs b/crates/meilisearch/tests/common/index.rs index f8ff5ced9..2d7af1bd6 100644 --- a/crates/meilisearch/tests/common/index.rs +++ b/crates/meilisearch/tests/common/index.rs @@ -516,6 +516,18 @@ impl Index<'_, State> { self.service.post_encoded(url, query, self.encoder).await } + pub async fn search_with_headers( + &self, + query: Value, + headers: Vec<(&str, &str)>, + ) -> (Value, StatusCode) { + let url = format!("/indexes/{}/search", urlencode(self.uid.as_ref())); + let body = serde_json::to_string(&query).unwrap(); + let mut all_headers = vec![("content-type", "application/json")]; + all_headers.extend(headers); + self.service.post_str(url, body, all_headers).await + } + pub async fn search_get(&self, query: &str) -> (Value, StatusCode) { let url = format!("/indexes/{}/search{}", urlencode(self.uid.as_ref()), query); self.service.get(url).await diff --git a/crates/meilisearch/tests/common/server.rs b/crates/meilisearch/tests/common/server.rs index b87dbe0ad..4dad90ff3 100644 --- a/crates/meilisearch/tests/common/server.rs +++ b/crates/meilisearch/tests/common/server.rs @@ -390,6 +390,17 @@ impl Server { self.service.post("/multi-search", queries).await } + pub async fn multi_search_with_headers( + &self, + queries: Value, + headers: Vec<(&str, &str)>, + ) -> (Value, StatusCode) { + let body = serde_json::to_string(&queries).unwrap(); + let mut all_headers = vec![("content-type", "application/json")]; + all_headers.extend(headers); + self.service.post_str("/multi-search", body, all_headers).await + } + pub async fn list_indexes_raw(&self, parameters: &str) -> (Value, StatusCode) { self.service.get(format!("/indexes{parameters}")).await } diff --git a/crates/meilisearch/tests/search/metadata.rs b/crates/meilisearch/tests/search/metadata.rs new file mode 100644 index 000000000..65af3df98 --- /dev/null +++ b/crates/meilisearch/tests/search/metadata.rs @@ -0,0 +1,387 @@ +use meili_snap::{json_string, snapshot}; + +use crate::common::{shared_index_with_documents, Server, DOCUMENTS}; +use crate::json; + +#[actix_rt::test] +async fn search_without_metadata_header() { + let index = shared_index_with_documents().await; + + // Test that metadata is not included by default + index + .search(json!({"q": "glass"}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]" }), @r###" + { + "hits": [ + { + "title": "Gläss", + "id": "450465", + "color": [ + "blue", + "red" + ] + } + ], + "query": "glass", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + "requestUid": "[uuid]" + } + "###); + }) + .await; +} + +#[actix_rt::test] +async fn search_with_metadata_header() { + let server = Server::new_shared(); + let index = server.unique_index(); + + let documents = DOCUMENTS.clone(); + let (task, _code) = index.add_documents(documents, None).await; + server.wait_task(task.uid()).await.succeeded(); + + // Test with Meili-Include-Metadata header + let (response, code) = index + .search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "true")]) + .await; + + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]", ".metadata.queryUid" => "[uuid]" }), @r###" + { + "hits": [ + { + "title": "Gläss", + "id": "450465", + "color": [ + "blue", + "red" + ] + } + ], + "query": "glass", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + "requestUid": "[uuid]", + "metadata": { + "queryUid": "[uuid]", + "indexUid": "[uuid]", + "primaryKey": "id" + } + } + "###); +} + +#[actix_rt::test] +async fn search_with_metadata_header_and_primary_key() { + let server = Server::new_shared(); + let index = server.unique_index(); + + let documents = DOCUMENTS.clone(); + let (task, _code) = index.add_documents(documents, Some("id")).await; + server.wait_task(task.uid()).await.succeeded(); + + // Test with Meili-Include-Metadata header + let (response, code) = index + .search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "true")]) + .await; + + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]", ".metadata.queryUid" => "[uuid]" }), @r###" + { + "hits": [ + { + "id": "450465", + "title": "Gläss", + "color": [ + "blue", + "red" + ] + } + ], + "query": "glass", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + "requestUid": "[uuid]", + "metadata": { + "queryUid": "[uuid]", + "indexUid": "[uuid]", + "primaryKey": "id" + } + } + "###); +} + +#[actix_rt::test] +async fn multi_search_without_metadata_header() { + let server = Server::new_shared(); + let index = server.unique_index(); + + let documents = DOCUMENTS.clone(); + let (task, _code) = index.add_documents(documents, None).await; + server.wait_task(task.uid()).await.succeeded(); + + // Test multi-search without metadata header + let (response, code) = server + .multi_search(json!({ + "queries": [ + {"indexUid": index.uid, "q": "glass"}, + {"indexUid": index.uid, "q": "dragon"} + ] + })) + .await; + + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".results[0].processingTimeMs" => "[duration]", ".results[0].requestUid" => "[uuid]", ".results[1].processingTimeMs" => "[duration]", ".results[1].requestUid" => "[uuid]" }), @r###" + { + "results": [ + { + "indexUid": "[uuid]", + "hits": [ + { + "title": "Gläss", + "id": "450465", + "color": [ + "blue", + "red" + ] + } + ], + "query": "glass", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + "requestUid": "[uuid]" + }, + { + "indexUid": "[uuid]", + "hits": [ + { + "title": "How to Train Your Dragon: The Hidden World", + "id": "166428", + "color": [ + "green", + "red" + ] + } + ], + "query": "dragon", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + "requestUid": "[uuid]" + } + ] + } + "###); +} + +#[actix_rt::test] +async fn multi_search_with_metadata_header() { + let server = Server::new_shared(); + let index = server.unique_index(); + + let documents = DOCUMENTS.clone(); + let (task, _code) = index.add_documents(documents, Some("id")).await; + server.wait_task(task.uid()).await.succeeded(); + + // Test multi-search with metadata header + let (response, code) = server + .multi_search_with_headers( + json!({ + "queries": [ + {"indexUid": index.uid, "q": "glass"}, + {"indexUid": index.uid, "q": "dragon"} + ] + }), + vec![("Meili-Include-Metadata", "true")], + ) + .await; + + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".results[0].processingTimeMs" => "[duration]", ".results[0].requestUid" => "[uuid]", ".results[0].metadata.queryUid" => "[uuid]", ".results[1].processingTimeMs" => "[duration]", ".results[1].requestUid" => "[uuid]", ".results[1].metadata.queryUid" => "[uuid]" }), @r###" + { + "results": [ + { + "indexUid": "[uuid]", + "hits": [ + { + "id": "450465", + "title": "Gläss", + "color": [ + "blue", + "red" + ] + } + ], + "query": "glass", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + "requestUid": "[uuid]", + "metadata": { + "queryUid": "[uuid]", + "indexUid": "[uuid]", + "primaryKey": "id" + } + }, + { + "indexUid": "[uuid]", + "hits": [ + { + "id": "166428", + "title": "How to Train Your Dragon: The Hidden World", + "color": [ + "green", + "red" + ] + } + ], + "query": "dragon", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + "requestUid": "[uuid]", + "metadata": { + "queryUid": "[uuid]", + "indexUid": "[uuid]", + "primaryKey": "id" + } + } + ] + } + "###); +} + +#[actix_rt::test] +async fn search_metadata_header_false_value() { + let server = Server::new_shared(); + let index = server.unique_index(); + + let documents = DOCUMENTS.clone(); + let (task, _code) = index.add_documents(documents, None).await; + server.wait_task(task.uid()).await.succeeded(); + + // Test with header set to false + let (response, code) = index + .search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "false")]) + .await; + + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]" }), @r###" + { + "hits": [ + { + "title": "Gläss", + "id": "450465", + "color": [ + "blue", + "red" + ] + } + ], + "query": "glass", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + "requestUid": "[uuid]" + } + "###); +} + +#[actix_rt::test] +async fn search_metadata_uuid_format() { + let server = Server::new_shared(); + let index = server.unique_index(); + + let documents = DOCUMENTS.clone(); + let (task, _code) = index.add_documents(documents, None).await; + server.wait_task(task.uid()).await.succeeded(); + + let (response, code) = index + .search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "true")]) + .await; + + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]", ".metadata.queryUid" => "[uuid]" }), @r###" + { + "hits": [ + { + "title": "Gläss", + "id": "450465", + "color": [ + "blue", + "red" + ] + } + ], + "query": "glass", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + "requestUid": "[uuid]", + "metadata": { + "queryUid": "[uuid]", + "indexUid": "[uuid]", + "primaryKey": "id" + } + } + "###); +} + +#[actix_rt::test] +async fn search_metadata_consistency_across_requests() { + let server = Server::new_shared(); + let index = server.unique_index(); + + let documents = DOCUMENTS.clone(); + let (task, _code) = index.add_documents(documents, Some("id")).await; + server.wait_task(task.uid()).await.succeeded(); + + // Make multiple requests and check that metadata is consistent + for _i in 0..3 { + let (response, code) = index + .search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "true")]) + .await; + + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]", ".metadata.queryUid" => "[uuid]" }), @r###" + { + "hits": [ + { + "id": "450465", + "title": "Gläss", + "color": [ + "blue", + "red" + ] + } + ], + "query": "glass", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + "requestUid": "[uuid]", + "metadata": { + "queryUid": "[uuid]", + "indexUid": "[uuid]", + "primaryKey": "id" + } + } + "###); + } +} diff --git a/crates/meilisearch/tests/search/mod.rs b/crates/meilisearch/tests/search/mod.rs index 1f14a380e..69a69dee6 100644 --- a/crates/meilisearch/tests/search/mod.rs +++ b/crates/meilisearch/tests/search/mod.rs @@ -11,6 +11,7 @@ mod hybrid; #[cfg(not(feature = "chinese-pinyin"))] mod locales; mod matching_strategy; +mod metadata; mod multi; mod pagination; mod restrict_searchable;