Compare commits

...

23 Commits

Author SHA1 Message Date
ManyTheFish
cc115894cd feat(metrics): add personalization count to metrics endpoint
- Add MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS metric to track personalized searches
- Increment metric directly in search analytics when personalization is used
- Metric automatically exposed in /metrics endpoint for monitoring
2025-10-14 12:25:08 +02:00
ManyTheFish
76632d7410 feat(analytics): add personalization tracking to segment analytics
- Add total_personalized field to SearchAggregator to track personalization usage
- Track when search requests include personalization parameters
- Include personalization data in analytics JSON output
- Maintain clean personalization service interface
2025-10-14 12:25:08 +02:00
ManyTheFish
be3ddc28eb refactor(personalization): improve Cohere reranking logic and error handling
- Replace and_then() with early return for missing personalization
- Simplify reranking by building new hits vector instead of swapping
- Add debug logging for reranked indices
- Fix potential index out-of-bounds issues in reranking
2025-10-14 12:25:08 +02:00
ManyTheFish
08ea9f728a refactor: split PersonalizationService into enum with CohereService
- Refactor PersonalizationService as enum with Cohere and Uninitialized variants
- Create dedicated CohereService struct with rerank_search_results method
- Split constructor into cohere() and uninitialized() methods
- Move all Cohere logic into CohereService for better separation of concerns
- Update tests and lib.rs to use new API
- Improve code organization and maintainability
2025-10-14 12:25:08 +02:00
ManyTheFish
6af8e919cb feat: refine personalization query by merging with user context
- Merge initial query with user context to create a comprehensive prompt
- Only skip reranking if both query and user_context are None
- Support reranking with query-only, user_context-only, or both
- Use 'let else' pattern for cleaner error handling
- Add comprehensive tests for different parameter combinations
- Improve prompt format for better reranking effectiveness
2025-10-14 12:25:08 +02:00
ManyTheFish
3e907586a3 refactor: rename personalization API fields and move checks inside service
- Rename 'personalization' field to 'personalize' in API
- Rename 'userProfile' to 'userContext' in personalization object
- Remove 'personalized' boolean field (activation now based on non-null 'personalize')
- Move personalization checks inside rerank_search_results function
- Use 'let else' pattern for better error handling
- Update error types and messages to reflect new field names
- Update all search routes and analytics to use new field names
2025-10-14 12:25:08 +02:00
ManyTheFish
b7266d871e feat: add personalization service with EnglishV3-only reranking
- Add new personalization module with Cohere integration
- Implement rerank_search_results method using EnglishV3 model
- Remove fallback logic to EnglishV2 for simplified behavior
- Add comprehensive error handling and logging
- Include unit tests for service behavior
- Update search route to support personalization feature
2025-10-14 12:22:34 +02:00
ManyTheFish
e3974634af feat: add personalization parameters to /search route
- Add Personalization struct with personalized boolean and user_profile string
- Add personalizationPersonalized and personalizationUserProfile query parameters to SearchQueryGet
- Follow same pattern as hybrid parameters (hybridEmbedder, hybridSemanticRatio)
- Add validation: personalizationUserProfile requires personalizationPersonalized
- Add error codes for personalization parameters
- Update analytics and facet search to handle new personalization field
- Remove serde dependencies from Personalization struct, use Deserr only
2025-10-14 12:21:07 +02:00
ManyTheFish
36b77283d6 feat: add experimental_personalization_api_key feature to RoFeatures
- Add MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY environment variable
- Add experimental_personalization_api_key field to Opt struct with CLI and env support
- Add experimental_personalization_api_key field to InstanceTogglableFeatures
- Store personalization API key in FeatureData for access through IndexScheduler
- Add experimental_personalization_api_key() method to IndexScheduler
- Update analytics destructuring to include new field
- Maintain RoFeatures Copy trait while properly handling Option<String>
2025-10-14 12:16:35 +02:00
ManyTheFish
ed6f479940 Remove irrelevant test index method 2025-10-14 12:10:17 +02:00
ManyTheFish
f19f712433 Add local remote name when a remote federated search is made 2025-10-14 12:10:17 +02:00
ManyTheFish
24a92c2809 move contant header in search/mod.rs 2025-10-14 12:10:17 +02:00
ManyTheFish
443cc24408 --amend 2025-10-14 12:10:17 +02:00
ManyTheFish
e8d5228250 factorize metadata header 2025-10-14 12:10:17 +02:00
ManyTheFish
5c33fb090c avoid openning each index twice and remove clones 2025-10-14 12:10:17 +02:00
ManyTheFish
48dd9146e7 Add comprehensive metadata tests with insta snapshots
- Add 9 test cases covering single search, multi-search, and federated search
- Test metadata header opt-in functionality with case insensitivity
- Test header false value handling
- Test UUID format validation and consistency
- Use insta snapshots for reliable, maintainable test assertions
- Fix header parsing to properly handle 'false' values
- Add helper methods for testing with custom headers
2025-10-14 12:10:17 +02:00
ManyTheFish
c1c42e818e refactor: group perform_search parameters into SearchParams struct
- Create SearchParams struct to group related parameters
- Update perform_search function to use SearchParams instead of 8 individual parameters
- Fix clippy warning about too many arguments
- Update all callers to use new SearchParams struct
2025-10-14 12:10:17 +02:00
ManyTheFish
519905ef9c Fix remote index collision with HashMap-based lookup
- Replace BTreeMap with HashMap for (remote, index_uid) -> primary_key lookup
- Prevents collisions when multiple remotes have same index_uid but different primary keys
2025-10-14 12:10:17 +02:00
ManyTheFish
f242377d2b Fix remote index collision in federated search metadata
- Use composite key (indexUid, remote) instead of indexUid only for remote metadata lookup
- Prevents collisions when multiple remotes have same indexUid but different primary keys
- Ensures each remote query gets correct primaryKey from its specific remote instance
2025-10-14 12:10:17 +02:00
ManyTheFish
da06306274 Add header-based metadata opt-in for search responses
- Add Meili-Include-Metadata header constant
- Modify perform_search to conditionally include metadata based on header
- Modify perform_federated_search to conditionally include metadata based on header
- Update all search routes to check for header and pass include_metadata parameter
- Forward Meili-Include-Metadata header to remote requests for federated search
- Ensure remote queries include primaryKey metadata when header is present
2025-10-14 12:10:17 +02:00
ManyTheFish
b93b803a2e WIP: Add metadata field with queryUid, indexUid, primaryKey, and remote
- Add SearchMetadata struct with queryUid, indexUid, primaryKey, and remote fields
- Update SearchResult to include metadata field
- Update FederatedSearchResult to include metadata array
- Refactor federated search metadata building to maintain query order
- Support primary key extraction from both local and remote results
- Add remote field to identify remote instance for federated queries
- Ensure metadata array matches query order in federated search

Features:
- queryUid: UUID v7 for each query
- indexUid: Index identifier
- primaryKey: Primary key field name (null if not available)
- remote: Remote instance name (null for local queries)

This provides complete traceability for search operations across local and remote instances.
2025-10-14 12:10:17 +02:00
ManyTheFish
cf43ec4aff feat: add indexUid to SearchMetadata
- Add indexUid field to SearchMetadata struct
- Update perform_search to include indexUid in metadata
- Update federated search to include indexUid for each query

The metadata field now contains both queryUid and indexUid:
- For /search: single object with queryUid and indexUid
- For /multi-search: each result has metadata with both fields
- For federated search: array of objects, each with queryUid and indexUid
2025-10-14 12:10:17 +02:00
ManyTheFish
9795d98e77 feat: add metadata field with queryUid to search responses
- Add SearchMetadata struct with queryUid field (UUID v7)
- Add metadata field to SearchResult for /search route
- Add metadata field to FederatedSearchResult for /multi-search route
- Update perform_search to generate queryUid and set metadata
- Update federated search to generate queryUid for each query
- Update multi-search non-federated path to include metadata
- Fix pattern matching in analytics and other code

The metadata field contains:
- For /search: single object with queryUid
- For /multi-search: array of objects, one per query
- For federated search: array of objects, one per query

All queryUid values are generated using Uuid::now_v7() for time-ordered uniqueness.
2025-10-14 12:10:17 +02:00
25 changed files with 1056 additions and 54 deletions

33
Cargo.lock generated
View File

@@ -1236,6 +1236,21 @@ version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
[[package]]
name = "cohere-rust"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8b553b385b0f2562138baea705b5707335314f8e91a58e7d1a03c3a6c332423"
dependencies = [
"bytes",
"reqwest",
"serde",
"serde_json",
"strum_macros 0.26.4",
"thiserror 1.0.69",
"tokio",
]
[[package]]
name = "color-spantrace"
version = "0.3.0"
@@ -3708,7 +3723,7 @@ dependencies = [
"serde_json",
"serde_yaml",
"strum",
"strum_macros",
"strum_macros 0.27.2",
"unicode-blocks",
"unicode-normalization",
"unicode-segmentation",
@@ -4026,6 +4041,7 @@ dependencies = [
"bytes",
"cargo_toml",
"clap",
"cohere-rust",
"crossbeam-channel",
"deserr",
"dump",
@@ -6219,7 +6235,20 @@ version = "0.27.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf"
dependencies = [
"strum_macros",
"strum_macros 0.27.2",
]
[[package]]
name = "strum_macros"
version = "0.26.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
dependencies = [
"heck",
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.106",
]
[[package]]

View File

@@ -25,6 +25,7 @@ pub(crate) struct FeatureData {
persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
network: Arc<RwLock<Network>>,
experimental_personalization_api_key: Option<String>,
}
#[derive(Debug, Clone, Copy)]
@@ -188,7 +189,12 @@ impl FeatureData {
let persisted_features: RuntimeTogglableFeatures =
runtime_features_db.get(wtxn, db_keys::EXPERIMENTAL_FEATURES)?.unwrap_or_default();
let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features;
let InstanceTogglableFeatures {
metrics,
logs_route,
contains_filter,
experimental_personalization_api_key,
} = instance_features;
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
metrics: metrics || persisted_features.metrics,
logs_route: logs_route || persisted_features.logs_route,
@@ -204,6 +210,7 @@ impl FeatureData {
persisted: runtime_features_db,
runtime,
network: Arc::new(RwLock::new(network)),
experimental_personalization_api_key,
})
}
@@ -234,6 +241,10 @@ impl FeatureData {
RoFeatures::new(self)
}
pub fn experimental_personalization_api_key(&self) -> Option<&String> {
self.experimental_personalization_api_key.as_ref()
}
pub fn put_network(&self, mut wtxn: RwTxn, new_network: Network) -> Result<()> {
self.persisted.remap_data_type::<SerdeJson<Network>>().put(
&mut wtxn,

View File

@@ -303,7 +303,8 @@ impl IndexScheduler {
let mut wtxn = env.write_txn()?;
let features = features::FeatureData::new(&env, &mut wtxn, options.instance_features)?;
let features =
features::FeatureData::new(&env, &mut wtxn, options.instance_features.clone())?;
let queue = Queue::new(&env, &mut wtxn, &options)?;
let index_mapper = IndexMapper::new(&env, &mut wtxn, &options, budget)?;
let chat_settings = env.create_database(&mut wtxn, Some(db_name::CHAT_SETTINGS))?;
@@ -886,6 +887,10 @@ impl IndexScheduler {
self.features.features()
}
pub fn experimental_personalization_api_key(&self) -> Option<&String> {
self.features.experimental_personalization_api_key()
}
pub fn put_runtime_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
let wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
self.features.put_runtime_features(wtxn, features)?;

View File

@@ -315,6 +315,8 @@ InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQU
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
InvalidSearchPersonalize , InvalidRequest , BAD_REQUEST ;
InvalidSearchPersonalizeUserContext , InvalidRequest , BAD_REQUEST ;
InvalidSearchMediaAndVector , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
@@ -679,6 +681,18 @@ impl fmt::Display for deserr_codes::InvalidNetworkSearchApiKey {
}
}
impl fmt::Display for deserr_codes::InvalidSearchPersonalize {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "the value of `personalize` is invalid, expected a JSON object with optional `userContext` string.")
}
}
impl fmt::Display for deserr_codes::InvalidSearchPersonalizeUserContext {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "the value of `userContext` is invalid, expected a string.")
}
}
#[macro_export]
macro_rules! internal_error {
($target:ty : $($other:path), *) => {

View File

@@ -24,11 +24,12 @@ pub struct RuntimeTogglableFeatures {
pub vector_store_setting: bool,
}
#[derive(Default, Debug, Clone, Copy)]
#[derive(Default, Debug, Clone)]
pub struct InstanceTogglableFeatures {
pub metrics: bool,
pub logs_route: bool,
pub contains_filter: bool,
pub experimental_personalization_api_key: Option<String>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]

View File

@@ -95,6 +95,7 @@ uuid = { version = "1.18.0", features = ["serde", "v4", "v7"] }
serde_urlencoded = "0.7.1"
termcolor = "1.4.1"
url = { version = "2.5.4", features = ["serde"] }
cohere-rust = "0.6.0"
tracing = "0.1.41"
tracing-subscriber = { version = "0.3.20", features = ["json"] }
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }

View File

@@ -285,6 +285,7 @@ impl Infos {
indexer_options,
config_file_path,
no_analytics: _,
experimental_personalization_api_key: _,
} = options;
let schedule_snapshot = match schedule_snapshot {

View File

@@ -11,6 +11,7 @@ pub mod middleware;
pub mod option;
#[cfg(test)]
mod option_test;
pub mod personalization;
pub mod routes;
pub mod search;
pub mod search_queue;
@@ -684,12 +685,19 @@ pub fn configure_data(
(logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
analytics: Data<Analytics>,
) {
// Create personalization service with API key from options
let personalization_service = index_scheduler
.experimental_personalization_api_key()
.cloned()
.map(personalization::PersonalizationService::cohere)
.unwrap_or_else(personalization::PersonalizationService::uninitialized);
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
config
.app_data(index_scheduler)
.app_data(auth)
.app_data(search_queue)
.app_data(analytics)
.app_data(web::Data::new(personalization_service))
.app_data(web::Data::new(logs_route))
.app_data(web::Data::new(logs_stderr))
.app_data(web::Data::new(opt.clone()))

View File

@@ -114,4 +114,9 @@ lazy_static! {
"Meilisearch Task Queue Size Until Stop Registering",
))
.expect("Can't create a metric");
pub static ref MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS: IntGauge = register_int_gauge!(opts!(
"meilisearch_personalized_search_requests",
"Meilisearch number of search requests with personalization"
))
.expect("Can't create a metric");
}

View File

@@ -74,6 +74,8 @@ const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION";
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS: &str =
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS";
const MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY: &str =
"MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY";
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
const DEFAULT_DB_PATH: &str = "./data.ms";
const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
@@ -475,6 +477,12 @@ pub struct Opt {
#[serde(default)]
pub experimental_no_snapshot_compaction: bool,
/// Experimental personalization API key feature.
///
/// Sets the API key for personalization features.
#[clap(long, env = MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY)]
pub experimental_personalization_api_key: Option<String>,
#[serde(flatten)]
#[clap(flatten)]
pub indexer_options: IndexerOpts,
@@ -580,6 +588,7 @@ impl Opt {
experimental_limit_batched_tasks_total_size,
experimental_embedding_cache_entries,
experimental_no_snapshot_compaction,
experimental_personalization_api_key,
} = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
@@ -680,6 +689,12 @@ impl Opt {
MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION,
experimental_no_snapshot_compaction.to_string(),
);
if let Some(experimental_personalization_api_key) = experimental_personalization_api_key {
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY,
experimental_personalization_api_key,
);
}
indexer_options.export_to_env();
}
@@ -732,6 +747,7 @@ impl Opt {
metrics: self.experimental_enable_metrics,
logs_route: self.experimental_enable_logs_route,
contains_filter: self.experimental_contains_filter,
experimental_personalization_api_key: self.experimental_personalization_api_key.clone(),
}
}
}

View File

@@ -0,0 +1,244 @@
use crate::search::{Personalize, SearchResult};
use cohere_rust::{
api::rerank::{ReRankModel, ReRankRequest},
Cohere,
};
use meilisearch_types::error::ResponseError;
use tracing::{debug, error, info};
pub struct CohereService {
cohere: Cohere,
}
impl CohereService {
pub fn new(api_key: String) -> Self {
info!("Personalization service initialized with Cohere API");
Self { cohere: Cohere::new("https://api.cohere.ai", api_key) }
}
pub async fn rerank_search_results(
&self,
search_result: SearchResult,
personalize: Option<&Personalize>,
query: Option<&str>,
) -> Result<SearchResult, ResponseError> {
// Extract user context from personalization
let Some(user_context) = personalize.and_then(|p| p.user_context.as_deref()) else {
return Ok(search_result);
};
// Build the prompt by merging query and user context
let prompt = match query {
Some(q) => format!("User Context: {user_context}\nQuery: {q}"),
None => format!("User Context: {user_context}"),
};
// Extract documents for reranking
let documents: Vec<String> = search_result
.hits
.iter()
.map(|hit| {
// Convert the document to a string representation for reranking
serde_json::to_string(&hit.document).unwrap_or_else(|_| "{}".to_string())
})
.collect();
if documents.is_empty() {
return Ok(search_result);
}
// Prepare the rerank request
let rerank_request = ReRankRequest {
query: &prompt,
documents: &documents,
model: ReRankModel::EnglishV3, // Use the default and more recent model
top_n: None,
max_chunks_per_doc: None,
};
// Call Cohere's rerank API
match self.cohere.rerank(&rerank_request).await {
Ok(rerank_response) => {
debug!("Cohere rerank successful, reordering {} results", search_result.hits.len());
// Create a mapping from original index to new rank
let reranked_indices: Vec<usize> =
rerank_response.iter().map(|result| result.index as usize).collect();
// Reorder the hits based on Cohere's reranking
let mut reranked_hits = Vec::new();
for index in reranked_indices.iter() {
reranked_hits.push(search_result.hits[*index].clone());
}
Ok(SearchResult { hits: reranked_hits, ..search_result })
}
Err(e) => {
error!("Cohere rerank failed with model EnglishV3: {}", e);
// Return original results on error
Ok(search_result)
}
}
}
}
pub enum PersonalizationService {
Cohere(CohereService),
Uninitialized,
}
impl PersonalizationService {
pub fn cohere(api_key: String) -> Self {
Self::Cohere(CohereService::new(api_key))
}
pub fn uninitialized() -> Self {
debug!("Personalization service uninitialized");
Self::Uninitialized
}
pub async fn rerank_search_results(
&self,
search_result: SearchResult,
personalize: Option<&Personalize>,
query: Option<&str>,
) -> Result<SearchResult, ResponseError> {
match self {
Self::Cohere(cohere_service) => {
cohere_service.rerank_search_results(search_result, personalize, query).await
}
Self::Uninitialized => Ok(search_result),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::search::{HitsInfo, SearchHit};
#[tokio::test]
async fn test_personalization_service_without_api_key() {
let service = PersonalizationService::uninitialized();
let personalize = Personalize { user_context: Some("test user".to_string()) };
let search_result = SearchResult {
hits: vec![SearchHit {
document: serde_json::Map::new(),
formatted: serde_json::Map::new(),
matches_position: None,
ranking_score: Some(1.0),
ranking_score_details: None,
}],
query: "test".to_string(),
processing_time_ms: 10,
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
facet_distribution: None,
facet_stats: None,
semantic_hit_count: None,
degraded: false,
used_negative_operator: false,
};
let result = service
.rerank_search_results(search_result.clone(), Some(&personalize), Some("test"))
.await;
assert!(result.is_ok());
// Should return original results when no API key is provided
let reranked_result = result.unwrap();
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
}
#[tokio::test]
async fn test_personalization_service_with_user_context_only() {
let service = PersonalizationService::cohere("fake_key".to_string());
let personalize = Personalize { user_context: Some("test user".to_string()) };
let search_result = SearchResult {
hits: vec![SearchHit {
document: serde_json::Map::new(),
formatted: serde_json::Map::new(),
matches_position: None,
ranking_score: Some(1.0),
ranking_score_details: None,
}],
query: "test".to_string(),
processing_time_ms: 10,
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
facet_distribution: None,
facet_stats: None,
semantic_hit_count: None,
degraded: false,
used_negative_operator: false,
};
let result =
service.rerank_search_results(search_result.clone(), Some(&personalize), None).await;
assert!(result.is_ok());
// Should attempt reranking with user context only
let reranked_result = result.unwrap();
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
}
#[tokio::test]
async fn test_personalization_service_with_query_only() {
let service = PersonalizationService::cohere("fake_key".to_string());
let search_result = SearchResult {
hits: vec![SearchHit {
document: serde_json::Map::new(),
formatted: serde_json::Map::new(),
matches_position: None,
ranking_score: Some(1.0),
ranking_score_details: None,
}],
query: "test".to_string(),
processing_time_ms: 10,
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
facet_distribution: None,
facet_stats: None,
semantic_hit_count: None,
degraded: false,
used_negative_operator: false,
};
let result = service.rerank_search_results(search_result.clone(), None, Some("test")).await;
assert!(result.is_ok());
// Should attempt reranking with query only
let reranked_result = result.unwrap();
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
}
#[tokio::test]
async fn test_personalization_service_both_none() {
let service = PersonalizationService::cohere("fake_key".to_string());
let search_result = SearchResult {
hits: vec![SearchHit {
document: serde_json::Map::new(),
formatted: serde_json::Map::new(),
matches_position: None,
ranking_score: Some(1.0),
ranking_score_details: None,
}],
query: "test".to_string(),
processing_time_ms: 10,
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
facet_distribution: None,
facet_stats: None,
semantic_hit_count: None,
degraded: false,
used_negative_operator: false,
};
let result = service.rerank_search_results(search_result.clone(), None, None).await;
assert!(result.is_ok());
// Should return original results when both query and user_context are None
let reranked_result = result.unwrap();
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
}
}

View File

@@ -343,6 +343,7 @@ impl From<FacetSearchQuery> for SearchQuery {
hybrid,
ranking_score_threshold,
locales,
personalize: None,
}
}
}

View File

@@ -22,11 +22,12 @@ use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST};
use crate::routes::parse_include_metadata_header;
use crate::search::{
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH,
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
add_search_rules, perform_search, HybridQuery, MatchingStrategy, Personalize,
RankingScoreThreshold, RetrieveVectors, SearchKind, SearchParams, SearchQuery, SearchResult,
SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
};
use crate::search_queue::SearchQueue;
@@ -133,6 +134,8 @@ pub struct SearchQueryGet {
#[deserr(default, error = DeserrQueryParamError<InvalidSearchLocales>)]
#[param(value_type = Vec<Locale>, explode = false)]
pub locales: Option<CS<Locale>>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchPersonalizeUserContext>)]
pub personalize_user_context: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
@@ -204,6 +207,10 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
));
}
let personalize = other
.personalize_user_context
.map(|user_context| Personalize { user_context: Some(user_context) });
Ok(Self {
q: other.q,
// `media` not supported for `GET`
@@ -233,6 +240,7 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
hybrid,
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
locales: other.locales.map(|o| o.into_iter().collect()),
personalize,
})
}
}
@@ -321,6 +329,7 @@ pub fn fix_sort_query_parameters(sort_query: &str) -> Vec<String> {
pub async fn search_with_url_query(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
search_queue: web::Data<SearchQueue>,
personalization_service: web::Data<crate::personalization::PersonalizationService>,
index_uid: web::Path<String>,
params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>,
req: HttpRequest,
@@ -344,16 +353,26 @@ pub async fn search_with_url_query(
let search_kind =
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
// Extract personalization and query string before moving query
let personalize = query.personalize.clone();
let query_str = query.q.clone();
let permit = search_queue.try_get_search_permit().await?;
let include_metadata = parse_include_metadata_header(&req);
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
index_uid.to_string(),
SearchParams {
index_uid: index_uid.to_string(),
query,
search_kind,
retrieve_vectors: retrieve_vector,
features: index_scheduler.features(),
request_uid,
include_metadata,
},
&index,
query,
search_kind,
retrieve_vector,
index_scheduler.features(),
request_uid,
)
})
.await;
@@ -364,7 +383,12 @@ pub async fn search_with_url_query(
}
analytics.publish(aggregate, &req);
let search_result = search_result?;
let mut search_result = search_result?;
// Apply personalization if requested
search_result = personalization_service
.rerank_search_results(search_result, personalize.as_ref(), query_str.as_deref())
.await?;
debug!(request_uid = ?request_uid, returns = ?search_result, "Search get");
Ok(HttpResponse::Ok().json(search_result))
@@ -429,6 +453,7 @@ pub async fn search_with_url_query(
pub async fn search_with_post(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
search_queue: web::Data<SearchQueue>,
personalization_service: web::Data<crate::personalization::PersonalizationService>,
index_uid: web::Path<String>,
params: AwebJson<SearchQuery, DeserrJsonError>,
req: HttpRequest,
@@ -453,16 +478,25 @@ pub async fn search_with_post(
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
let include_metadata = parse_include_metadata_header(&req);
// Extract personalization and query string before moving query
let personalize = query.personalize.clone();
let query_str = query.q.clone();
let permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
index_uid.to_string(),
SearchParams {
index_uid: index_uid.to_string(),
query,
search_kind,
retrieve_vectors,
features: index_scheduler.features(),
request_uid,
include_metadata,
},
&index,
query,
search_kind,
retrieve_vectors,
index_scheduler.features(),
request_uid,
)
})
.await;
@@ -476,7 +510,12 @@ pub async fn search_with_post(
}
analytics.publish(aggregate, &req);
let search_result = search_result?;
let mut search_result = search_result?;
// Apply personalization if requested
search_result = personalization_service
.rerank_search_results(search_result, personalize.as_ref(), query_str.as_deref())
.await?;
debug!(request_uid = ?request_uid, returns = ?search_result, "Search post");
Ok(HttpResponse::Ok().json(search_result))

View File

@@ -7,6 +7,7 @@ use serde_json::{json, Value};
use crate::aggregate_methods;
use crate::analytics::{Aggregate, AggregateMethod};
use crate::metrics::MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS;
use crate::search::{
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
@@ -95,6 +96,9 @@ pub struct SearchAggregator<Method: AggregateMethod> {
show_ranking_score_details: bool,
ranking_score_threshold: bool,
// personalization
total_personalized: usize,
marker: std::marker::PhantomData<Method>,
}
@@ -129,6 +133,7 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
hybrid,
ranking_score_threshold,
locales,
personalize,
} = query;
let mut ret = Self::default();
@@ -204,6 +209,12 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
ret.locales = locales.iter().copied().collect();
}
// personalization
if personalize.is_some() {
ret.total_personalized = 1;
MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS.inc();
}
ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
@@ -235,6 +246,7 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
degraded,
used_negative_operator,
request_uid: _,
metadata: _,
} = result;
self.total_succeeded = self.total_succeeded.saturating_add(1);
@@ -295,6 +307,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
total_used_negative_operator,
ranking_score_threshold,
mut locales,
total_personalized,
marker: _,
} = *new;
@@ -380,6 +393,9 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
// locales
self.locales.append(&mut locales);
// personalization
self.total_personalized = self.total_personalized.saturating_add(total_personalized);
self
}
@@ -425,6 +441,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
total_used_negative_operator,
ranking_score_threshold,
locales,
total_personalized,
marker: _,
} = *self;
@@ -498,6 +515,9 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
"show_ranking_score_details": show_ranking_score_details,
"ranking_score_threshold": ranking_score_threshold,
},
"personalization": {
"total_personalized": total_personalized,
},
})
}
}

View File

@@ -45,6 +45,7 @@ use crate::routes::webhooks::{WebhookResults, WebhookSettings, WebhookWithMetada
use crate::search::{
FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets,
SearchQueryWithIndex, SearchResultWithIndex, SimilarQuery, SimilarResult,
INCLUDE_METADATA_HEADER,
};
use crate::search_queue::SearchQueue;
use crate::Opt;
@@ -184,6 +185,18 @@ pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result<bool, ResponseError> {
.is_some_and(|s| s.to_lowercase() == "true"))
}
/// Parse the `Meili-Include-Metadata` header from an HTTP request.
///
/// Returns `true` if the header is present and set to "true" or "1" (case-insensitive).
/// Returns `false` if the header is not present or has any other value.
pub fn parse_include_metadata_header(req: &HttpRequest) -> bool {
req.headers()
.get(INCLUDE_METADATA_HEADER)
.and_then(|h| h.to_str().ok())
.map(|v| matches!(v.to_lowercase().as_str(), "true" | "1"))
.unwrap_or(false)
}
#[derive(Debug, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct SummarizedTaskView {

View File

@@ -18,10 +18,11 @@ use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::indexes::search::search_kind;
use crate::routes::parse_include_metadata_header;
use crate::search::{
add_search_rules, perform_federated_search, perform_search, FederatedSearch,
FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex,
PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE,
FederatedSearchResult, RetrieveVectors, SearchParams, SearchQueryWithIndex,
SearchResultWithIndex, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE,
};
use crate::search_queue::SearchQueue;
@@ -188,6 +189,7 @@ pub async fn multi_search_with_post(
err
})?;
let include_metadata = parse_include_metadata_header(&req);
let response = match federation {
Some(federation) => {
debug!(
@@ -209,6 +211,7 @@ pub async fn multi_search_with_post(
features,
is_proxy,
request_uid,
include_metadata,
)
.await;
permit.drop().await;
@@ -279,13 +282,16 @@ pub async fn multi_search_with_post(
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
index_uid_str.clone(),
SearchParams {
index_uid: index_uid_str.clone(),
query,
search_kind,
retrieve_vectors: retrieve_vector,
features,
request_uid,
include_metadata,
},
&index,
query,
search_kind,
retrieve_vector,
features,
request_uid,
)
})
.await

View File

@@ -67,6 +67,7 @@ impl MultiSearchAggregator {
hybrid: _,
ranking_score_threshold: _,
locales: _,
personalize: _,
} in &federated_search.queries
{
if let Some(federation_options) = federation_options {

View File

@@ -20,6 +20,7 @@ use tokio::task::JoinHandle;
use uuid::Uuid;
use super::super::ranking_rules::{self, RankingRules};
use super::super::SearchMetadata;
use super::super::{
compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, HitMaker,
HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex,
@@ -41,6 +42,7 @@ pub async fn perform_federated_search(
features: RoFeatures,
is_proxy: bool,
request_uid: Uuid,
include_metadata: bool,
) -> Result<FederatedSearchResult, ResponseError> {
if is_proxy {
features.check_network("Performing a remote federated search")?;
@@ -65,14 +67,27 @@ pub async fn perform_federated_search(
// 1. partition queries by host and index
let mut partitioned_queries = PartitionedQueries::new();
// Preconstruct metadata keeping the original queries order for later metadata building
let precomputed_query_metadata: Vec<_> = queries
.iter()
.map(|q| {
(q.index_uid.to_string(), q.federation_options.as_ref().and_then(|o| o.remote.clone()))
})
.collect();
for (query_index, federated_query) in queries.into_iter().enumerate() {
partitioned_queries.partition(federated_query, query_index, &network, features)?
}
// 2. perform queries, merge and make hits index by index
// 2.1. start remote queries
let remote_search =
RemoteSearch::start(partitioned_queries.remote_queries_by_host, &federation, deadline);
let remote_search = RemoteSearch::start(
partitioned_queries.remote_queries_by_host,
&federation,
deadline,
include_metadata,
);
// 2.2. concurrently execute local queries
let params = SearchByIndexParams {
@@ -114,11 +129,63 @@ pub async fn perform_federated_search(
let after_waiting_remote_results = std::time::Instant::now();
// 3. merge hits and metadata across indexes and hosts
// 3.1. merge metadata
// 3.1. Build metadata in the same order as the original queries
let query_metadata = if include_metadata {
// 3.1.1. Create a map of (remote, index_uid) -> primary_key for quick lookup
// This prevents collisions when multiple remotes have the same index_uid but different primary keys
let mut primary_key_per_index = std::collections::HashMap::new();
// 3.1.1.1 Build metadata for remote results
for remote_result in &remote_results {
if let Some(remote_metadata) = &remote_result.metadata {
for remote_meta in remote_metadata {
if let SearchMetadata {
remote: Some(remote_name),
index_uid,
primary_key: Some(primary_key),
..
} = &remote_meta
{
let key = (Some(remote_name), index_uid);
primary_key_per_index.insert(key, primary_key);
}
}
}
}
// 3.1.1.2 Build metadata for local results
for local_meta in &results_by_index {
if let SearchResultByIndex { index, primary_key: Some(primary_key), .. } = &local_meta {
let key = (None, index);
primary_key_per_index.insert(key, primary_key);
}
}
// if there are remote results, set the local remote name
let local_remote_name =
(!remote_results.is_empty()).then_some(network.local.clone()).flatten();
// 3.1.2 Build metadata in the same order as the original queries
let mut query_metadata = Vec::new();
for (index_uid, remote) in precomputed_query_metadata {
let primary_key =
primary_key_per_index.get(&(remote.as_ref(), &index_uid)).map(|pk| pk.to_string());
let query_uid = Uuid::now_v7();
// if the remote is not set, use the local remote name
let remote = remote.or_else(|| local_remote_name.clone());
query_metadata.push(SearchMetadata { query_uid, primary_key, index_uid, remote });
}
Some(query_metadata)
} else {
None
};
// 3.2. merge federation metadata
let (estimated_total_hits, degraded, used_negative_operator, facets, max_remote_duration) =
merge_metadata(&mut results_by_index, &remote_results);
// 3.2. merge hits
// 3.3. merge hits
let merged_hits: Vec<_> = merge_index_global_results(results_by_index, &mut remote_results)
.skip(federation.offset)
.take(federation.limit)
@@ -133,7 +200,7 @@ pub async fn perform_federated_search(
.map(|hit| hit.hit())
.collect();
// 3.3. merge query vectors
// 3.4. merge query vectors
let query_vectors = if retrieve_vectors {
for remote_results in remote_results.iter_mut() {
if let Some(remote_vectors) = remote_results.query_vectors.take() {
@@ -152,7 +219,7 @@ pub async fn perform_federated_search(
None
};
// 3.4. merge facets
// 3.5. merge facets
let (facet_distribution, facet_stats, facets_by_index) =
facet_order.merge(federation.merge_facets, remote_results, facets);
@@ -179,6 +246,7 @@ pub async fn perform_federated_search(
facets_by_index,
remote_errors: partitioned_queries.has_remote.then_some(remote_errors),
request_uid: Some(request_uid),
metadata: query_metadata,
})
}
@@ -402,6 +470,7 @@ struct SearchHitByIndex {
struct SearchResultByIndex {
index: String,
primary_key: Option<String>,
hits: Vec<SearchHitByIndex>,
estimated_total_hits: usize,
degraded: bool,
@@ -420,6 +489,7 @@ fn merge_metadata(
let mut max_remote_duration = Duration::ZERO;
for SearchResultByIndex {
index,
primary_key: _,
hits: _,
estimated_total_hits: estimated_total_hits_by_index,
facets: facets_by_index,
@@ -448,6 +518,7 @@ fn merge_metadata(
degraded: degraded_for_host,
used_negative_operator: host_used_negative_operator,
remote_errors: _,
metadata: _,
request_uid: _,
} in remote_results
{
@@ -576,7 +647,12 @@ struct RemoteSearch {
}
impl RemoteSearch {
fn start(queries: RemoteQueriesByHost, federation: &Federation, deadline: Instant) -> Self {
fn start(
queries: RemoteQueriesByHost,
federation: &Federation,
deadline: Instant,
include_metadata: bool,
) -> Self {
let mut in_flight_remote_queries = BTreeMap::new();
let client = reqwest::ClientBuilder::new()
.connect_timeout(std::time::Duration::from_millis(200))
@@ -596,7 +672,10 @@ impl RemoteSearch {
// never merge distant facets
proxy_federation.merge_facets = None;
let params = params.clone();
async move { proxy_search(&node, queries, proxy_federation, &params).await }
async move {
proxy_search(&node, queries, proxy_federation, &params, include_metadata)
.await
}
}),
);
}
@@ -640,6 +719,13 @@ impl RemoteSearch {
continue 'remote_queries;
}
// Add remote name to metadata
if let Some(metadata) = res.metadata.as_mut() {
for meta in metadata {
meta.remote = Some(node_name.clone());
}
}
federation.insert(
FEDERATION_REMOTE.to_string(),
serde_json::Value::String(node_name.clone()),
@@ -735,6 +821,7 @@ impl SearchByIndex {
}
};
let rtxn = index.read_txn()?;
let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
let criteria = index.criteria(&rtxn)?;
let dictionary = index.dictionary(&rtxn)?;
let dictionary: Option<Vec<_>> =
@@ -987,6 +1074,7 @@ impl SearchByIndex {
})?;
self.results_by_index.push(SearchResultByIndex {
index: index_uid,
primary_key,
hits: merged_result,
estimated_total_hits,
degraded,

View File

@@ -7,7 +7,7 @@ use serde::de::DeserializeOwned;
use serde_json::Value;
use super::types::{FederatedSearch, FederatedSearchResult, Federation};
use crate::search::SearchQueryWithIndex;
use crate::search::{SearchQueryWithIndex, INCLUDE_METADATA_HEADER};
pub const PROXY_SEARCH_HEADER: &str = "Meili-Proxy-Search";
pub const PROXY_SEARCH_HEADER_VALUE: &str = "true";
@@ -98,6 +98,7 @@ pub async fn proxy_search(
queries: Vec<SearchQueryWithIndex>,
federation: Federation,
params: &ProxySearchParams,
include_metadata: bool,
) -> Result<FederatedSearchResult, ProxySearchError> {
let url = format!("{}/multi-search", node.url);
@@ -119,7 +120,16 @@ pub async fn proxy_search(
};
for i in 0..params.try_count {
match try_proxy_search(&url, search_api_key, &federated, &params.client, deadline).await {
match try_proxy_search(
&url,
search_api_key,
&federated,
&params.client,
deadline,
include_metadata,
)
.await
{
Ok(response) => return Ok(response),
Err(retry) => {
let duration = retry.into_duration(i)?;
@@ -127,7 +137,7 @@ pub async fn proxy_search(
}
}
}
try_proxy_search(&url, search_api_key, &federated, &params.client, deadline)
try_proxy_search(&url, search_api_key, &federated, &params.client, deadline, include_metadata)
.await
.map_err(Retry::into_error)
}
@@ -138,6 +148,7 @@ async fn try_proxy_search(
federated: &FederatedSearch,
client: &Client,
deadline: std::time::Instant,
include_metadata: bool,
) -> Result<FederatedSearchResult, Retry> {
let timeout = deadline.saturating_duration_since(std::time::Instant::now());
@@ -148,6 +159,8 @@ async fn try_proxy_search(
request
};
let request = request.header(PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE);
let request =
if include_metadata { request.header(INCLUDE_METADATA_HEADER, "true") } else { request };
let response = request.send().await;
let response = match response {

View File

@@ -18,6 +18,8 @@ use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use uuid::Uuid;
use crate::search::SearchMetadata;
use super::super::{ComputedFacets, FacetStats, HitsInfo, SearchHit, SearchQueryWithIndex};
use crate::milli::vector::Embedding;
@@ -134,6 +136,8 @@ pub struct FederatedSearchResult {
pub facets_by_index: FederatedFacets,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub request_uid: Option<Uuid>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub metadata: Option<Vec<SearchMetadata>>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub remote_errors: Option<BTreeMap<String, ResponseError>>,
@@ -160,6 +164,7 @@ impl fmt::Debug for FederatedSearchResult {
facets_by_index,
remote_errors,
request_uid,
metadata,
} = self;
let mut debug = f.debug_struct("SearchResult");
@@ -195,6 +200,9 @@ impl fmt::Debug for FederatedSearchResult {
if let Some(request_uid) = request_uid {
debug.field("request_uid", &request_uid);
}
if let Some(metadata) = metadata {
debug.field("metadata", &metadata);
}
debug.finish()
}

View File

@@ -57,6 +57,14 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
pub const DEFAULT_SEMANTIC_RATIO: fn() -> SemanticRatio = || SemanticRatio(0.5);
pub const INCLUDE_METADATA_HEADER: &str = "Meili-Include-Metadata";
#[derive(Clone, Default, PartialEq, Deserr, ToSchema, Debug)]
#[deserr(error = DeserrJsonError<InvalidSearchPersonalize>, rename_all = camelCase, deny_unknown_fields)]
pub struct Personalize {
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalizeUserContext>)]
pub user_context: Option<String>,
}
#[derive(Clone, Default, PartialEq, Deserr, ToSchema)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
@@ -121,6 +129,8 @@ pub struct SearchQuery {
pub ranking_score_threshold: Option<RankingScoreThreshold>,
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>)]
pub locales: Option<Vec<Locale>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalize>, default)]
pub personalize: Option<Personalize>,
}
impl From<SearchParameters> for SearchQuery {
@@ -168,6 +178,7 @@ impl From<SearchParameters> for SearchQuery {
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
crop_marker: DEFAULT_CROP_MARKER(),
locales: None,
personalize: None,
}
}
}
@@ -249,6 +260,7 @@ impl fmt::Debug for SearchQuery {
attributes_to_search_on,
ranking_score_threshold,
locales,
personalize,
} = self;
let mut debug = f.debug_struct("SearchQuery");
@@ -337,6 +349,10 @@ impl fmt::Debug for SearchQuery {
debug.field("locales", &locales);
}
if let Some(personalize) = personalize {
debug.field("personalize", &personalize);
}
debug.finish()
}
}
@@ -542,6 +558,9 @@ pub struct SearchQueryWithIndex {
pub ranking_score_threshold: Option<RankingScoreThreshold>,
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
pub locales: Option<Vec<Locale>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalize>, default)]
#[serde(skip)]
pub personalize: Option<Personalize>,
#[deserr(default)]
pub federation_options: Option<FederationOptions>,
@@ -599,6 +618,7 @@ impl SearchQueryWithIndex {
attributes_to_search_on,
ranking_score_threshold,
locales,
personalize,
} = query;
SearchQueryWithIndex {
@@ -630,6 +650,7 @@ impl SearchQueryWithIndex {
attributes_to_search_on,
ranking_score_threshold,
locales,
personalize,
federation_options,
}
}
@@ -665,6 +686,7 @@ impl SearchQueryWithIndex {
hybrid,
ranking_score_threshold,
locales,
personalize,
} = self;
(
index_uid,
@@ -696,6 +718,7 @@ impl SearchQueryWithIndex {
hybrid,
ranking_score_threshold,
locales,
personalize,
// do not use ..Default::default() here,
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
},
@@ -836,6 +859,18 @@ pub struct SearchHit {
pub ranking_score_details: Option<serde_json::Map<String, serde_json::Value>>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, ToSchema)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub struct SearchMetadata {
pub query_uid: Uuid,
pub index_uid: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub primary_key: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub remote: Option<String>,
}
#[derive(Serialize, Clone, PartialEq, ToSchema)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
@@ -854,6 +889,8 @@ pub struct SearchResult {
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub request_uid: Option<Uuid>,
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<SearchMetadata>,
#[serde(skip_serializing_if = "Option::is_none")]
pub semantic_hit_count: Option<u32>,
@@ -876,6 +913,7 @@ impl fmt::Debug for SearchResult {
facet_distribution,
facet_stats,
request_uid,
metadata,
semantic_hit_count,
degraded,
used_negative_operator,
@@ -908,6 +946,9 @@ impl fmt::Debug for SearchResult {
if let Some(request_uid) = request_uid {
debug.field("request_uid", &request_uid);
}
if let Some(metadata) = metadata {
debug.field("metadata", &metadata);
}
debug.finish()
}
@@ -932,7 +973,7 @@ pub struct SearchResultWithIndex {
pub result: SearchResult,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, ToSchema)]
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, ToSchema)]
#[serde(untagged)]
pub enum HitsInfo {
#[serde(rename_all = "camelCase")]
@@ -1120,16 +1161,28 @@ pub fn prepare_search<'t>(
Ok((search, is_finite_pagination, max_total_hits, offset))
}
pub fn perform_search(
index_uid: String,
index: &Index,
query: SearchQuery,
search_kind: SearchKind,
retrieve_vectors: RetrieveVectors,
features: RoFeatures,
request_uid: Uuid,
) -> Result<SearchResult, ResponseError> {
pub struct SearchParams {
pub index_uid: String,
pub query: SearchQuery,
pub search_kind: SearchKind,
pub retrieve_vectors: RetrieveVectors,
pub features: RoFeatures,
pub request_uid: Uuid,
pub include_metadata: bool,
}
pub fn perform_search(params: SearchParams, index: &Index) -> Result<SearchResult, ResponseError> {
let SearchParams {
index_uid,
query,
search_kind,
retrieve_vectors,
features,
request_uid,
include_metadata,
} = params;
let before_search = Instant::now();
let index_uid_for_metadata = index_uid.clone();
let rtxn = index.read_txn()?;
let time_budget = match index.search_cutoff(&rtxn)? {
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
@@ -1150,7 +1203,20 @@ pub fn perform_search(
query_vector,
},
semantic_hit_count,
) = search_from_kind(index_uid, search_kind, search)?;
) = search_from_kind(index_uid.clone(), search_kind, search)?;
let metadata = if include_metadata {
let query_uid = Uuid::now_v7();
let primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
Some(SearchMetadata {
query_uid,
index_uid: index_uid_for_metadata,
primary_key,
remote: None, // Local searches don't have a remote
})
} else {
None
};
let SearchQuery {
q,
@@ -1182,6 +1248,7 @@ pub fn perform_search(
attributes_to_search_on: _,
filter: _,
distinct: _,
personalize: _,
} = query;
let format = AttributesFormat {
@@ -1233,7 +1300,6 @@ pub fn perform_search(
.transpose()?
.map(|ComputedFacets { distribution, stats }| (distribution, stats))
.unzip();
let result = SearchResult {
hits: documents,
hits_info,
@@ -1246,6 +1312,7 @@ pub fn perform_search(
used_negative_operator,
semantic_hit_count,
request_uid: Some(request_uid),
metadata,
};
Ok(result)
}

View File

@@ -516,6 +516,18 @@ impl<State> Index<'_, State> {
self.service.post_encoded(url, query, self.encoder).await
}
pub async fn search_with_headers(
&self,
query: Value,
headers: Vec<(&str, &str)>,
) -> (Value, StatusCode) {
let url = format!("/indexes/{}/search", urlencode(self.uid.as_ref()));
let body = serde_json::to_string(&query).unwrap();
let mut all_headers = vec![("content-type", "application/json")];
all_headers.extend(headers);
self.service.post_str(url, body, all_headers).await
}
pub async fn search_get(&self, query: &str) -> (Value, StatusCode) {
let url = format!("/indexes/{}/search{}", urlencode(self.uid.as_ref()), query);
self.service.get(url).await

View File

@@ -390,6 +390,17 @@ impl<State> Server<State> {
self.service.post("/multi-search", queries).await
}
pub async fn multi_search_with_headers(
&self,
queries: Value,
headers: Vec<(&str, &str)>,
) -> (Value, StatusCode) {
let body = serde_json::to_string(&queries).unwrap();
let mut all_headers = vec![("content-type", "application/json")];
all_headers.extend(headers);
self.service.post_str("/multi-search", body, all_headers).await
}
pub async fn list_indexes_raw(&self, parameters: &str) -> (Value, StatusCode) {
self.service.get(format!("/indexes{parameters}")).await
}

View File

@@ -0,0 +1,387 @@
use meili_snap::{json_string, snapshot};
use crate::common::{shared_index_with_documents, Server, DOCUMENTS};
use crate::json;
#[actix_rt::test]
async fn search_without_metadata_header() {
let index = shared_index_with_documents().await;
// Test that metadata is not included by default
index
.search(json!({"q": "glass"}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]" }), @r###"
{
"hits": [
{
"title": "Gläss",
"id": "450465",
"color": [
"blue",
"red"
]
}
],
"query": "glass",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1,
"requestUid": "[uuid]"
}
"###);
})
.await;
}
#[actix_rt::test]
async fn search_with_metadata_header() {
let server = Server::new_shared();
let index = server.unique_index();
let documents = DOCUMENTS.clone();
let (task, _code) = index.add_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
// Test with Meili-Include-Metadata header
let (response, code) = index
.search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "true")])
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]", ".metadata.queryUid" => "[uuid]" }), @r###"
{
"hits": [
{
"title": "Gläss",
"id": "450465",
"color": [
"blue",
"red"
]
}
],
"query": "glass",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1,
"requestUid": "[uuid]",
"metadata": {
"queryUid": "[uuid]",
"indexUid": "[uuid]",
"primaryKey": "id"
}
}
"###);
}
#[actix_rt::test]
async fn search_with_metadata_header_and_primary_key() {
let server = Server::new_shared();
let index = server.unique_index();
let documents = DOCUMENTS.clone();
let (task, _code) = index.add_documents(documents, Some("id")).await;
server.wait_task(task.uid()).await.succeeded();
// Test with Meili-Include-Metadata header
let (response, code) = index
.search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "true")])
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]", ".metadata.queryUid" => "[uuid]" }), @r###"
{
"hits": [
{
"id": "450465",
"title": "Gläss",
"color": [
"blue",
"red"
]
}
],
"query": "glass",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1,
"requestUid": "[uuid]",
"metadata": {
"queryUid": "[uuid]",
"indexUid": "[uuid]",
"primaryKey": "id"
}
}
"###);
}
#[actix_rt::test]
async fn multi_search_without_metadata_header() {
let server = Server::new_shared();
let index = server.unique_index();
let documents = DOCUMENTS.clone();
let (task, _code) = index.add_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
// Test multi-search without metadata header
let (response, code) = server
.multi_search(json!({
"queries": [
{"indexUid": index.uid, "q": "glass"},
{"indexUid": index.uid, "q": "dragon"}
]
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".results[0].processingTimeMs" => "[duration]", ".results[0].requestUid" => "[uuid]", ".results[1].processingTimeMs" => "[duration]", ".results[1].requestUid" => "[uuid]" }), @r###"
{
"results": [
{
"indexUid": "[uuid]",
"hits": [
{
"title": "Gläss",
"id": "450465",
"color": [
"blue",
"red"
]
}
],
"query": "glass",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1,
"requestUid": "[uuid]"
},
{
"indexUid": "[uuid]",
"hits": [
{
"title": "How to Train Your Dragon: The Hidden World",
"id": "166428",
"color": [
"green",
"red"
]
}
],
"query": "dragon",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1,
"requestUid": "[uuid]"
}
]
}
"###);
}
#[actix_rt::test]
async fn multi_search_with_metadata_header() {
let server = Server::new_shared();
let index = server.unique_index();
let documents = DOCUMENTS.clone();
let (task, _code) = index.add_documents(documents, Some("id")).await;
server.wait_task(task.uid()).await.succeeded();
// Test multi-search with metadata header
let (response, code) = server
.multi_search_with_headers(
json!({
"queries": [
{"indexUid": index.uid, "q": "glass"},
{"indexUid": index.uid, "q": "dragon"}
]
}),
vec![("Meili-Include-Metadata", "true")],
)
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".results[0].processingTimeMs" => "[duration]", ".results[0].requestUid" => "[uuid]", ".results[0].metadata.queryUid" => "[uuid]", ".results[1].processingTimeMs" => "[duration]", ".results[1].requestUid" => "[uuid]", ".results[1].metadata.queryUid" => "[uuid]" }), @r###"
{
"results": [
{
"indexUid": "[uuid]",
"hits": [
{
"id": "450465",
"title": "Gläss",
"color": [
"blue",
"red"
]
}
],
"query": "glass",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1,
"requestUid": "[uuid]",
"metadata": {
"queryUid": "[uuid]",
"indexUid": "[uuid]",
"primaryKey": "id"
}
},
{
"indexUid": "[uuid]",
"hits": [
{
"id": "166428",
"title": "How to Train Your Dragon: The Hidden World",
"color": [
"green",
"red"
]
}
],
"query": "dragon",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1,
"requestUid": "[uuid]",
"metadata": {
"queryUid": "[uuid]",
"indexUid": "[uuid]",
"primaryKey": "id"
}
}
]
}
"###);
}
#[actix_rt::test]
async fn search_metadata_header_false_value() {
let server = Server::new_shared();
let index = server.unique_index();
let documents = DOCUMENTS.clone();
let (task, _code) = index.add_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
// Test with header set to false
let (response, code) = index
.search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "false")])
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]" }), @r###"
{
"hits": [
{
"title": "Gläss",
"id": "450465",
"color": [
"blue",
"red"
]
}
],
"query": "glass",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1,
"requestUid": "[uuid]"
}
"###);
}
#[actix_rt::test]
async fn search_metadata_uuid_format() {
let server = Server::new_shared();
let index = server.unique_index();
let documents = DOCUMENTS.clone();
let (task, _code) = index.add_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
let (response, code) = index
.search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "true")])
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]", ".metadata.queryUid" => "[uuid]" }), @r###"
{
"hits": [
{
"title": "Gläss",
"id": "450465",
"color": [
"blue",
"red"
]
}
],
"query": "glass",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1,
"requestUid": "[uuid]",
"metadata": {
"queryUid": "[uuid]",
"indexUid": "[uuid]",
"primaryKey": "id"
}
}
"###);
}
#[actix_rt::test]
async fn search_metadata_consistency_across_requests() {
let server = Server::new_shared();
let index = server.unique_index();
let documents = DOCUMENTS.clone();
let (task, _code) = index.add_documents(documents, Some("id")).await;
server.wait_task(task.uid()).await.succeeded();
// Make multiple requests and check that metadata is consistent
for _i in 0..3 {
let (response, code) = index
.search_with_headers(json!({"q": "glass"}), vec![("Meili-Include-Metadata", "true")])
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".requestUid" => "[uuid]", ".metadata.queryUid" => "[uuid]" }), @r###"
{
"hits": [
{
"id": "450465",
"title": "Gläss",
"color": [
"blue",
"red"
]
}
],
"query": "glass",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1,
"requestUid": "[uuid]",
"metadata": {
"queryUid": "[uuid]",
"indexUid": "[uuid]",
"primaryKey": "id"
}
}
"###);
}
}

View File

@@ -11,6 +11,7 @@ mod hybrid;
#[cfg(not(feature = "chinese-pinyin"))]
mod locales;
mod matching_strategy;
mod metadata;
mod multi;
mod pagination;
mod restrict_searchable;