mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-10-16 16:46:29 +00:00
Merge pull request #5926 from meilisearch/search-metadata
Search metadata
This commit is contained in:
@@ -22,11 +22,12 @@ use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
||||
use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST};
|
||||
use crate::routes::parse_include_metadata_header;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
||||
RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH,
|
||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||
RetrieveVectors, SearchKind, SearchParams, SearchQuery, SearchResult, SemanticRatio,
|
||||
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
|
||||
@@ -345,15 +346,20 @@ pub async fn search_with_url_query(
|
||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
||||
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let include_metadata = parse_include_metadata_header(&req);
|
||||
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(
|
||||
index_uid.to_string(),
|
||||
SearchParams {
|
||||
index_uid: index_uid.to_string(),
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vectors: retrieve_vector,
|
||||
features: index_scheduler.features(),
|
||||
request_uid,
|
||||
include_metadata,
|
||||
},
|
||||
&index,
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vector,
|
||||
index_scheduler.features(),
|
||||
request_uid,
|
||||
)
|
||||
})
|
||||
.await;
|
||||
@@ -453,16 +459,21 @@ pub async fn search_with_post(
|
||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
|
||||
|
||||
let include_metadata = parse_include_metadata_header(&req);
|
||||
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(
|
||||
index_uid.to_string(),
|
||||
SearchParams {
|
||||
index_uid: index_uid.to_string(),
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vectors,
|
||||
features: index_scheduler.features(),
|
||||
request_uid,
|
||||
include_metadata,
|
||||
},
|
||||
&index,
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vectors,
|
||||
index_scheduler.features(),
|
||||
request_uid,
|
||||
)
|
||||
})
|
||||
.await;
|
||||
|
@@ -235,6 +235,7 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
request_uid: _,
|
||||
metadata: _,
|
||||
} = result;
|
||||
|
||||
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
||||
|
@@ -45,6 +45,7 @@ use crate::routes::webhooks::{WebhookResults, WebhookSettings, WebhookWithMetada
|
||||
use crate::search::{
|
||||
FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets,
|
||||
SearchQueryWithIndex, SearchResultWithIndex, SimilarQuery, SimilarResult,
|
||||
INCLUDE_METADATA_HEADER,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
use crate::Opt;
|
||||
@@ -184,6 +185,18 @@ pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result<bool, ResponseError> {
|
||||
.is_some_and(|s| s.to_lowercase() == "true"))
|
||||
}
|
||||
|
||||
/// Parse the `Meili-Include-Metadata` header from an HTTP request.
|
||||
///
|
||||
/// Returns `true` if the header is present and set to "true" or "1" (case-insensitive).
|
||||
/// Returns `false` if the header is not present or has any other value.
|
||||
pub fn parse_include_metadata_header(req: &HttpRequest) -> bool {
|
||||
req.headers()
|
||||
.get(INCLUDE_METADATA_HEADER)
|
||||
.and_then(|h| h.to_str().ok())
|
||||
.map(|v| matches!(v.to_lowercase().as_str(), "true" | "1"))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SummarizedTaskView {
|
||||
|
@@ -18,10 +18,11 @@ use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::indexes::search::search_kind;
|
||||
use crate::routes::parse_include_metadata_header;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_federated_search, perform_search, FederatedSearch,
|
||||
FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex,
|
||||
PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE,
|
||||
FederatedSearchResult, RetrieveVectors, SearchParams, SearchQueryWithIndex,
|
||||
SearchResultWithIndex, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
|
||||
@@ -188,6 +189,7 @@ pub async fn multi_search_with_post(
|
||||
err
|
||||
})?;
|
||||
|
||||
let include_metadata = parse_include_metadata_header(&req);
|
||||
let response = match federation {
|
||||
Some(federation) => {
|
||||
debug!(
|
||||
@@ -209,6 +211,7 @@ pub async fn multi_search_with_post(
|
||||
features,
|
||||
is_proxy,
|
||||
request_uid,
|
||||
include_metadata,
|
||||
)
|
||||
.await;
|
||||
permit.drop().await;
|
||||
@@ -279,13 +282,16 @@ pub async fn multi_search_with_post(
|
||||
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(
|
||||
index_uid_str.clone(),
|
||||
SearchParams {
|
||||
index_uid: index_uid_str.clone(),
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vectors: retrieve_vector,
|
||||
features,
|
||||
request_uid,
|
||||
include_metadata,
|
||||
},
|
||||
&index,
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vector,
|
||||
features,
|
||||
request_uid,
|
||||
)
|
||||
})
|
||||
.await
|
||||
|
@@ -20,6 +20,7 @@ use tokio::task::JoinHandle;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::super::ranking_rules::{self, RankingRules};
|
||||
use super::super::SearchMetadata;
|
||||
use super::super::{
|
||||
compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, HitMaker,
|
||||
HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex,
|
||||
@@ -41,6 +42,7 @@ pub async fn perform_federated_search(
|
||||
features: RoFeatures,
|
||||
is_proxy: bool,
|
||||
request_uid: Uuid,
|
||||
include_metadata: bool,
|
||||
) -> Result<FederatedSearchResult, ResponseError> {
|
||||
if is_proxy {
|
||||
features.check_network("Performing a remote federated search")?;
|
||||
@@ -59,20 +61,38 @@ pub async fn perform_federated_search(
|
||||
|
||||
let network = index_scheduler.network();
|
||||
|
||||
// Preconstruct metadata keeping the original queries order for later metadata building
|
||||
let precomputed_query_metadata: Option<Vec<_>> = include_metadata.then(|| {
|
||||
queries
|
||||
.iter()
|
||||
.map(|q| {
|
||||
(
|
||||
q.index_uid.to_string(),
|
||||
q.federation_options.as_ref().and_then(|o| o.remote.clone()),
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
});
|
||||
|
||||
// this implementation partition the queries by index to guarantee an important property:
|
||||
// - all the queries to a particular index use the same read transaction.
|
||||
// This is an important property, otherwise we cannot guarantee the self-consistency of the results.
|
||||
|
||||
// 1. partition queries by host and index
|
||||
let mut partitioned_queries = PartitionedQueries::new();
|
||||
|
||||
for (query_index, federated_query) in queries.into_iter().enumerate() {
|
||||
partitioned_queries.partition(federated_query, query_index, &network, features)?
|
||||
}
|
||||
|
||||
// 2. perform queries, merge and make hits index by index
|
||||
// 2.1. start remote queries
|
||||
let remote_search =
|
||||
RemoteSearch::start(partitioned_queries.remote_queries_by_host, &federation, deadline);
|
||||
let remote_search = RemoteSearch::start(
|
||||
partitioned_queries.remote_queries_by_host,
|
||||
&federation,
|
||||
deadline,
|
||||
include_metadata,
|
||||
);
|
||||
|
||||
// 2.2. concurrently execute local queries
|
||||
let params = SearchByIndexParams {
|
||||
@@ -114,11 +134,25 @@ pub async fn perform_federated_search(
|
||||
let after_waiting_remote_results = std::time::Instant::now();
|
||||
|
||||
// 3. merge hits and metadata across indexes and hosts
|
||||
// 3.1. merge metadata
|
||||
|
||||
// 3.1. Build metadata in the same order as the original queries
|
||||
let query_metadata = precomputed_query_metadata.map(|precomputed_query_metadata| {
|
||||
// If a remote is present, set the local remote name
|
||||
let local_remote_name = network.local.clone().filter(|_| partitioned_queries.has_remote);
|
||||
|
||||
build_query_metadata(
|
||||
precomputed_query_metadata,
|
||||
local_remote_name,
|
||||
&remote_results,
|
||||
&results_by_index,
|
||||
)
|
||||
});
|
||||
|
||||
// 3.2. merge federation metadata
|
||||
let (estimated_total_hits, degraded, used_negative_operator, facets, max_remote_duration) =
|
||||
merge_metadata(&mut results_by_index, &remote_results);
|
||||
|
||||
// 3.2. merge hits
|
||||
// 3.3. merge hits
|
||||
let merged_hits: Vec<_> = merge_index_global_results(results_by_index, &mut remote_results)
|
||||
.skip(federation.offset)
|
||||
.take(federation.limit)
|
||||
@@ -133,7 +167,7 @@ pub async fn perform_federated_search(
|
||||
.map(|hit| hit.hit())
|
||||
.collect();
|
||||
|
||||
// 3.3. merge query vectors
|
||||
// 3.4. merge query vectors
|
||||
let query_vectors = if retrieve_vectors {
|
||||
for remote_results in remote_results.iter_mut() {
|
||||
if let Some(remote_vectors) = remote_results.query_vectors.take() {
|
||||
@@ -152,7 +186,7 @@ pub async fn perform_federated_search(
|
||||
None
|
||||
};
|
||||
|
||||
// 3.4. merge facets
|
||||
// 3.5. merge facets
|
||||
let (facet_distribution, facet_stats, facets_by_index) =
|
||||
facet_order.merge(federation.merge_facets, remote_results, facets);
|
||||
|
||||
@@ -179,6 +213,7 @@ pub async fn perform_federated_search(
|
||||
facets_by_index,
|
||||
remote_errors: partitioned_queries.has_remote.then_some(remote_errors),
|
||||
request_uid: Some(request_uid),
|
||||
metadata: query_metadata,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -402,6 +437,7 @@ struct SearchHitByIndex {
|
||||
|
||||
struct SearchResultByIndex {
|
||||
index: String,
|
||||
primary_key: Option<String>,
|
||||
hits: Vec<SearchHitByIndex>,
|
||||
estimated_total_hits: usize,
|
||||
degraded: bool,
|
||||
@@ -409,6 +445,61 @@ struct SearchResultByIndex {
|
||||
facets: Option<ComputedFacets>,
|
||||
}
|
||||
|
||||
/// Builds query metadata for federated search results.
|
||||
///
|
||||
/// This function creates metadata for each query in the same order as the original queries,
|
||||
/// combining information from both local and remote search results. It handles the mapping
|
||||
/// of primary keys to their respective indexes and remotes to prevent collisions when
|
||||
/// multiple remotes have the same index_uid but different primary keys.
|
||||
fn build_query_metadata(
|
||||
precomputed_query_metadata: Vec<(String, Option<String>)>,
|
||||
local_remote_name: Option<String>,
|
||||
remote_results: &[FederatedSearchResult],
|
||||
results_by_index: &[SearchResultByIndex],
|
||||
) -> Vec<SearchMetadata> {
|
||||
// Create a map of (remote, index_uid) -> primary_key for quick lookup
|
||||
// This prevents collisions when multiple remotes have the same index_uid but different primary keys
|
||||
let mut primary_key_per_index = std::collections::HashMap::new();
|
||||
|
||||
// Build metadata for remote results
|
||||
for remote_result in remote_results {
|
||||
if let Some(remote_metadata) = &remote_result.metadata {
|
||||
for remote_meta in remote_metadata {
|
||||
if let SearchMetadata {
|
||||
remote: Some(remote_name),
|
||||
index_uid,
|
||||
primary_key: Some(primary_key),
|
||||
..
|
||||
} = remote_meta
|
||||
{
|
||||
let key = (Some(remote_name), index_uid);
|
||||
primary_key_per_index.insert(key, primary_key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build metadata for local results
|
||||
for local_meta in results_by_index {
|
||||
if let SearchResultByIndex { index, primary_key: Some(primary_key), .. } = local_meta {
|
||||
let key = (None, index);
|
||||
primary_key_per_index.insert(key, primary_key);
|
||||
}
|
||||
}
|
||||
|
||||
// Build metadata in the same order as the original queries
|
||||
let mut query_metadata = Vec::new();
|
||||
for (index_uid, remote) in precomputed_query_metadata {
|
||||
let primary_key =
|
||||
primary_key_per_index.get(&(remote.as_ref(), &index_uid)).map(|pk| pk.to_string());
|
||||
let query_uid = Uuid::now_v7();
|
||||
// if the remote is not set, use the local remote name
|
||||
let remote = remote.or_else(|| local_remote_name.clone());
|
||||
query_metadata.push(SearchMetadata { query_uid, primary_key, index_uid, remote });
|
||||
}
|
||||
query_metadata
|
||||
}
|
||||
|
||||
fn merge_metadata(
|
||||
results_by_index: &mut Vec<SearchResultByIndex>,
|
||||
remote_results: &Vec<FederatedSearchResult>,
|
||||
@@ -420,6 +511,7 @@ fn merge_metadata(
|
||||
let mut max_remote_duration = Duration::ZERO;
|
||||
for SearchResultByIndex {
|
||||
index,
|
||||
primary_key: _,
|
||||
hits: _,
|
||||
estimated_total_hits: estimated_total_hits_by_index,
|
||||
facets: facets_by_index,
|
||||
@@ -448,6 +540,7 @@ fn merge_metadata(
|
||||
degraded: degraded_for_host,
|
||||
used_negative_operator: host_used_negative_operator,
|
||||
remote_errors: _,
|
||||
metadata: _,
|
||||
request_uid: _,
|
||||
} in remote_results
|
||||
{
|
||||
@@ -576,7 +669,12 @@ struct RemoteSearch {
|
||||
}
|
||||
|
||||
impl RemoteSearch {
|
||||
fn start(queries: RemoteQueriesByHost, federation: &Federation, deadline: Instant) -> Self {
|
||||
fn start(
|
||||
queries: RemoteQueriesByHost,
|
||||
federation: &Federation,
|
||||
deadline: Instant,
|
||||
include_metadata: bool,
|
||||
) -> Self {
|
||||
let mut in_flight_remote_queries = BTreeMap::new();
|
||||
let client = reqwest::ClientBuilder::new()
|
||||
.connect_timeout(std::time::Duration::from_millis(200))
|
||||
@@ -596,7 +694,10 @@ impl RemoteSearch {
|
||||
// never merge distant facets
|
||||
proxy_federation.merge_facets = None;
|
||||
let params = params.clone();
|
||||
async move { proxy_search(&node, queries, proxy_federation, ¶ms).await }
|
||||
async move {
|
||||
proxy_search(&node, queries, proxy_federation, ¶ms, include_metadata)
|
||||
.await
|
||||
}
|
||||
}),
|
||||
);
|
||||
}
|
||||
@@ -640,6 +741,13 @@ impl RemoteSearch {
|
||||
continue 'remote_queries;
|
||||
}
|
||||
|
||||
// Add remote name to metadata
|
||||
if let Some(metadata) = res.metadata.as_mut() {
|
||||
for meta in metadata {
|
||||
meta.remote = Some(node_name.clone());
|
||||
}
|
||||
}
|
||||
|
||||
federation.insert(
|
||||
FEDERATION_REMOTE.to_string(),
|
||||
serde_json::Value::String(node_name.clone()),
|
||||
@@ -735,6 +843,7 @@ impl SearchByIndex {
|
||||
}
|
||||
};
|
||||
let rtxn = index.read_txn()?;
|
||||
let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
|
||||
let criteria = index.criteria(&rtxn)?;
|
||||
let dictionary = index.dictionary(&rtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
@@ -987,6 +1096,7 @@ impl SearchByIndex {
|
||||
})?;
|
||||
self.results_by_index.push(SearchResultByIndex {
|
||||
index: index_uid,
|
||||
primary_key,
|
||||
hits: merged_result,
|
||||
estimated_total_hits,
|
||||
degraded,
|
||||
|
@@ -7,7 +7,7 @@ use serde::de::DeserializeOwned;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::types::{FederatedSearch, FederatedSearchResult, Federation};
|
||||
use crate::search::SearchQueryWithIndex;
|
||||
use crate::search::{SearchQueryWithIndex, INCLUDE_METADATA_HEADER};
|
||||
|
||||
pub const PROXY_SEARCH_HEADER: &str = "Meili-Proxy-Search";
|
||||
pub const PROXY_SEARCH_HEADER_VALUE: &str = "true";
|
||||
@@ -98,6 +98,7 @@ pub async fn proxy_search(
|
||||
queries: Vec<SearchQueryWithIndex>,
|
||||
federation: Federation,
|
||||
params: &ProxySearchParams,
|
||||
include_metadata: bool,
|
||||
) -> Result<FederatedSearchResult, ProxySearchError> {
|
||||
let url = format!("{}/multi-search", node.url);
|
||||
|
||||
@@ -119,7 +120,16 @@ pub async fn proxy_search(
|
||||
};
|
||||
|
||||
for i in 0..params.try_count {
|
||||
match try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline).await {
|
||||
match try_proxy_search(
|
||||
&url,
|
||||
search_api_key,
|
||||
&federated,
|
||||
¶ms.client,
|
||||
deadline,
|
||||
include_metadata,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(response) => return Ok(response),
|
||||
Err(retry) => {
|
||||
let duration = retry.into_duration(i)?;
|
||||
@@ -127,7 +137,7 @@ pub async fn proxy_search(
|
||||
}
|
||||
}
|
||||
}
|
||||
try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline)
|
||||
try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline, include_metadata)
|
||||
.await
|
||||
.map_err(Retry::into_error)
|
||||
}
|
||||
@@ -138,6 +148,7 @@ async fn try_proxy_search(
|
||||
federated: &FederatedSearch,
|
||||
client: &Client,
|
||||
deadline: std::time::Instant,
|
||||
include_metadata: bool,
|
||||
) -> Result<FederatedSearchResult, Retry> {
|
||||
let timeout = deadline.saturating_duration_since(std::time::Instant::now());
|
||||
|
||||
@@ -148,6 +159,8 @@ async fn try_proxy_search(
|
||||
request
|
||||
};
|
||||
let request = request.header(PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE);
|
||||
let request =
|
||||
if include_metadata { request.header(INCLUDE_METADATA_HEADER, "true") } else { request };
|
||||
|
||||
let response = request.send().await;
|
||||
let response = match response {
|
||||
|
@@ -18,6 +18,8 @@ use serde::{Deserialize, Serialize};
|
||||
use utoipa::ToSchema;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::search::SearchMetadata;
|
||||
|
||||
use super::super::{ComputedFacets, FacetStats, HitsInfo, SearchHit, SearchQueryWithIndex};
|
||||
use crate::milli::vector::Embedding;
|
||||
|
||||
@@ -134,6 +136,8 @@ pub struct FederatedSearchResult {
|
||||
pub facets_by_index: FederatedFacets,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub request_uid: Option<Uuid>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub metadata: Option<Vec<SearchMetadata>>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub remote_errors: Option<BTreeMap<String, ResponseError>>,
|
||||
@@ -160,6 +164,7 @@ impl fmt::Debug for FederatedSearchResult {
|
||||
facets_by_index,
|
||||
remote_errors,
|
||||
request_uid,
|
||||
metadata,
|
||||
} = self;
|
||||
|
||||
let mut debug = f.debug_struct("SearchResult");
|
||||
@@ -195,6 +200,9 @@ impl fmt::Debug for FederatedSearchResult {
|
||||
if let Some(request_uid) = request_uid {
|
||||
debug.field("request_uid", &request_uid);
|
||||
}
|
||||
if let Some(metadata) = metadata {
|
||||
debug.field("metadata", &metadata);
|
||||
}
|
||||
|
||||
debug.finish()
|
||||
}
|
||||
|
@@ -57,6 +57,7 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
|
||||
pub const DEFAULT_SEMANTIC_RATIO: fn() -> SemanticRatio = || SemanticRatio(0.5);
|
||||
pub const INCLUDE_METADATA_HEADER: &str = "Meili-Include-Metadata";
|
||||
|
||||
#[derive(Clone, Default, PartialEq, Deserr, ToSchema)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
@@ -836,6 +837,18 @@ pub struct SearchHit {
|
||||
pub ranking_score_details: Option<serde_json::Map<String, serde_json::Value>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct SearchMetadata {
|
||||
pub query_uid: Uuid,
|
||||
pub index_uid: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub primary_key: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub remote: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone, PartialEq, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
@@ -854,6 +867,8 @@ pub struct SearchResult {
|
||||
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub request_uid: Option<Uuid>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub metadata: Option<SearchMetadata>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub semantic_hit_count: Option<u32>,
|
||||
@@ -876,6 +891,7 @@ impl fmt::Debug for SearchResult {
|
||||
facet_distribution,
|
||||
facet_stats,
|
||||
request_uid,
|
||||
metadata,
|
||||
semantic_hit_count,
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
@@ -908,6 +924,9 @@ impl fmt::Debug for SearchResult {
|
||||
if let Some(request_uid) = request_uid {
|
||||
debug.field("request_uid", &request_uid);
|
||||
}
|
||||
if let Some(metadata) = metadata {
|
||||
debug.field("metadata", &metadata);
|
||||
}
|
||||
|
||||
debug.finish()
|
||||
}
|
||||
@@ -1120,16 +1139,28 @@ pub fn prepare_search<'t>(
|
||||
Ok((search, is_finite_pagination, max_total_hits, offset))
|
||||
}
|
||||
|
||||
pub fn perform_search(
|
||||
index_uid: String,
|
||||
index: &Index,
|
||||
query: SearchQuery,
|
||||
search_kind: SearchKind,
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
features: RoFeatures,
|
||||
request_uid: Uuid,
|
||||
) -> Result<SearchResult, ResponseError> {
|
||||
pub struct SearchParams {
|
||||
pub index_uid: String,
|
||||
pub query: SearchQuery,
|
||||
pub search_kind: SearchKind,
|
||||
pub retrieve_vectors: RetrieveVectors,
|
||||
pub features: RoFeatures,
|
||||
pub request_uid: Uuid,
|
||||
pub include_metadata: bool,
|
||||
}
|
||||
|
||||
pub fn perform_search(params: SearchParams, index: &Index) -> Result<SearchResult, ResponseError> {
|
||||
let SearchParams {
|
||||
index_uid,
|
||||
query,
|
||||
search_kind,
|
||||
retrieve_vectors,
|
||||
features,
|
||||
request_uid,
|
||||
include_metadata,
|
||||
} = params;
|
||||
let before_search = Instant::now();
|
||||
let index_uid_for_metadata = index_uid.clone();
|
||||
let rtxn = index.read_txn()?;
|
||||
let time_budget = match index.search_cutoff(&rtxn)? {
|
||||
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
|
||||
@@ -1150,7 +1181,20 @@ pub fn perform_search(
|
||||
query_vector,
|
||||
},
|
||||
semantic_hit_count,
|
||||
) = search_from_kind(index_uid, search_kind, search)?;
|
||||
) = search_from_kind(index_uid.clone(), search_kind, search)?;
|
||||
|
||||
let metadata = if include_metadata {
|
||||
let query_uid = Uuid::now_v7();
|
||||
let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
|
||||
Some(SearchMetadata {
|
||||
query_uid,
|
||||
index_uid: index_uid_for_metadata,
|
||||
primary_key,
|
||||
remote: None, // Local searches don't have a remote
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let SearchQuery {
|
||||
q,
|
||||
@@ -1233,7 +1277,6 @@ pub fn perform_search(
|
||||
.transpose()?
|
||||
.map(|ComputedFacets { distribution, stats }| (distribution, stats))
|
||||
.unzip();
|
||||
|
||||
let result = SearchResult {
|
||||
hits: documents,
|
||||
hits_info,
|
||||
@@ -1246,6 +1289,7 @@ pub fn perform_search(
|
||||
used_negative_operator,
|
||||
semantic_hit_count,
|
||||
request_uid: Some(request_uid),
|
||||
metadata,
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
|
@@ -516,6 +516,18 @@ impl<State> Index<'_, State> {
|
||||
self.service.post_encoded(url, query, self.encoder).await
|
||||
}
|
||||
|
||||
pub async fn search_with_headers(
|
||||
&self,
|
||||
query: Value,
|
||||
headers: Vec<(&str, &str)>,
|
||||
) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/search", urlencode(self.uid.as_ref()));
|
||||
let body = serde_json::to_string(&query).unwrap();
|
||||
let mut all_headers = vec![("content-type", "application/json")];
|
||||
all_headers.extend(headers);
|
||||
self.service.post_str(url, body, all_headers).await
|
||||
}
|
||||
|
||||
pub async fn search_get(&self, query: &str) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/search{}", urlencode(self.uid.as_ref()), query);
|
||||
self.service.get(url).await
|
||||
|
@@ -390,6 +390,17 @@ impl<State> Server<State> {
|
||||
self.service.post("/multi-search", queries).await
|
||||
}
|
||||
|
||||
pub async fn multi_search_with_headers(
|
||||
&self,
|
||||
queries: Value,
|
||||
headers: Vec<(&str, &str)>,
|
||||
) -> (Value, StatusCode) {
|
||||
let body = serde_json::to_string(&queries).unwrap();
|
||||
let mut all_headers = vec![("content-type", "application/json")];
|
||||
all_headers.extend(headers);
|
||||
self.service.post_str("/multi-search", body, all_headers).await
|
||||
}
|
||||
|
||||
pub async fn list_indexes_raw(&self, parameters: &str) -> (Value, StatusCode) {
|
||||
self.service.get(format!("/indexes{parameters}")).await
|
||||
}
|
||||
|
387
crates/meilisearch/tests/search/metadata.rs
Normal file
387
crates/meilisearch/tests/search/metadata.rs
Normal file
@@ -0,0 +1,387 @@
|
||||
use meili_snap::{json_string, snapshot};
|
||||
|
||||
use crate::common::{shared_index_with_documents, Server, DOCUMENTS};
|
||||
use crate::json;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_without_metadata_header() {
|
||||
let index = shared_index_with_documents().await;
|
||||
|
||||
// Test that metadata is not included by default
|
||||
index
|
||||
.search(json!({"q": "glass"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"title": "Gläss",
|
||||
"id": "450465",
|
||||
"color": [
|
||||
"blue",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
],
|
||||
"query": "glass",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1,
|
||||
"requestUid": "[uuid]"
|
||||
}
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_metadata_header() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (task, _code) = index.add_documents(documents, None).await;
|
||||
server.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
// Test with Meili-Include-Metadata header
|
||||
let (response, code) = index
|
||||
.search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "true")])
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]", ".metadata.queryUid" => "[uuid]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"title": "Gläss",
|
||||
"id": "450465",
|
||||
"color": [
|
||||
"blue",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
],
|
||||
"query": "glass",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1,
|
||||
"requestUid": "[uuid]",
|
||||
"metadata": {
|
||||
"queryUid": "[uuid]",
|
||||
"indexUid": "[uuid]",
|
||||
"primaryKey": "id"
|
||||
}
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_metadata_header_and_primary_key() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (task, _code) = index.add_documents(documents, Some("id")).await;
|
||||
server.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
// Test with Meili-Include-Metadata header
|
||||
let (response, code) = index
|
||||
.search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "true")])
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]", ".metadata.queryUid" => "[uuid]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": "450465",
|
||||
"title": "Gläss",
|
||||
"color": [
|
||||
"blue",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
],
|
||||
"query": "glass",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1,
|
||||
"requestUid": "[uuid]",
|
||||
"metadata": {
|
||||
"queryUid": "[uuid]",
|
||||
"indexUid": "[uuid]",
|
||||
"primaryKey": "id"
|
||||
}
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn multi_search_without_metadata_header() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (task, _code) = index.add_documents(documents, None).await;
|
||||
server.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
// Test multi-search without metadata header
|
||||
let (response, code) = server
|
||||
.multi_search(json!({
|
||||
"queries": [
|
||||
{"indexUid": index.uid, "q": "glass"},
|
||||
{"indexUid": index.uid, "q": "dragon"}
|
||||
]
|
||||
}))
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".results[0].processingTimeMs" => "[duration]", ".results[0].requestUid" => "[uuid]", ".results[1].processingTimeMs" => "[duration]", ".results[1].requestUid" => "[uuid]" }), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"indexUid": "[uuid]",
|
||||
"hits": [
|
||||
{
|
||||
"title": "Gläss",
|
||||
"id": "450465",
|
||||
"color": [
|
||||
"blue",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
],
|
||||
"query": "glass",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1,
|
||||
"requestUid": "[uuid]"
|
||||
},
|
||||
{
|
||||
"indexUid": "[uuid]",
|
||||
"hits": [
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"id": "166428",
|
||||
"color": [
|
||||
"green",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
],
|
||||
"query": "dragon",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1,
|
||||
"requestUid": "[uuid]"
|
||||
}
|
||||
]
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn multi_search_with_metadata_header() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (task, _code) = index.add_documents(documents, Some("id")).await;
|
||||
server.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
// Test multi-search with metadata header
|
||||
let (response, code) = server
|
||||
.multi_search_with_headers(
|
||||
json!({
|
||||
"queries": [
|
||||
{"indexUid": index.uid, "q": "glass"},
|
||||
{"indexUid": index.uid, "q": "dragon"}
|
||||
]
|
||||
}),
|
||||
vec![("Meili-Include-Metadata", "true")],
|
||||
)
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".results[0].processingTimeMs" => "[duration]", ".results[0].requestUid" => "[uuid]", ".results[0].metadata.queryUid" => "[uuid]", ".results[1].processingTimeMs" => "[duration]", ".results[1].requestUid" => "[uuid]", ".results[1].metadata.queryUid" => "[uuid]" }), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"indexUid": "[uuid]",
|
||||
"hits": [
|
||||
{
|
||||
"id": "450465",
|
||||
"title": "Gläss",
|
||||
"color": [
|
||||
"blue",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
],
|
||||
"query": "glass",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1,
|
||||
"requestUid": "[uuid]",
|
||||
"metadata": {
|
||||
"queryUid": "[uuid]",
|
||||
"indexUid": "[uuid]",
|
||||
"primaryKey": "id"
|
||||
}
|
||||
},
|
||||
{
|
||||
"indexUid": "[uuid]",
|
||||
"hits": [
|
||||
{
|
||||
"id": "166428",
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"color": [
|
||||
"green",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
],
|
||||
"query": "dragon",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1,
|
||||
"requestUid": "[uuid]",
|
||||
"metadata": {
|
||||
"queryUid": "[uuid]",
|
||||
"indexUid": "[uuid]",
|
||||
"primaryKey": "id"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_metadata_header_false_value() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (task, _code) = index.add_documents(documents, None).await;
|
||||
server.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
// Test with header set to false
|
||||
let (response, code) = index
|
||||
.search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "false")])
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"title": "Gläss",
|
||||
"id": "450465",
|
||||
"color": [
|
||||
"blue",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
],
|
||||
"query": "glass",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1,
|
||||
"requestUid": "[uuid]"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_metadata_uuid_format() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (task, _code) = index.add_documents(documents, None).await;
|
||||
server.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let (response, code) = index
|
||||
.search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "true")])
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]", ".metadata.queryUid" => "[uuid]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"title": "Gläss",
|
||||
"id": "450465",
|
||||
"color": [
|
||||
"blue",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
],
|
||||
"query": "glass",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1,
|
||||
"requestUid": "[uuid]",
|
||||
"metadata": {
|
||||
"queryUid": "[uuid]",
|
||||
"indexUid": "[uuid]",
|
||||
"primaryKey": "id"
|
||||
}
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_metadata_consistency_across_requests() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (task, _code) = index.add_documents(documents, Some("id")).await;
|
||||
server.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
// Make multiple requests and check that metadata is consistent
|
||||
for _i in 0..3 {
|
||||
let (response, code) = index
|
||||
.search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "true")])
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]", ".metadata.queryUid" => "[uuid]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": "450465",
|
||||
"title": "Gläss",
|
||||
"color": [
|
||||
"blue",
|
||||
"red"
|
||||
]
|
||||
}
|
||||
],
|
||||
"query": "glass",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1,
|
||||
"requestUid": "[uuid]",
|
||||
"metadata": {
|
||||
"queryUid": "[uuid]",
|
||||
"indexUid": "[uuid]",
|
||||
"primaryKey": "id"
|
||||
}
|
||||
}
|
||||
"###);
|
||||
}
|
||||
}
|
@@ -11,6 +11,7 @@ mod hybrid;
|
||||
#[cfg(not(feature = "chinese-pinyin"))]
|
||||
mod locales;
|
||||
mod matching_strategy;
|
||||
mod metadata;
|
||||
mod multi;
|
||||
mod pagination;
|
||||
mod restrict_searchable;
|
||||
|
Reference in New Issue
Block a user