Compare commits

...

9 Commits

Author SHA1 Message Date
ManyTheFish
1197ec32e6 feat(metrics): add personalization count to metrics endpoint
- Add MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS metric to track personalized searches
- Increment metric directly in search analytics when personalization is used
- Metric automatically exposed in /metrics endpoint for monitoring
2025-07-28 13:53:37 +02:00
ManyTheFish
0afab11e25 feat(analytics): add personalization tracking to segment analytics
- Add total_personalized field to SearchAggregator to track personalization usage
- Track when search requests include personalization parameters
- Include personalization data in analytics JSON output
- Maintain clean personalization service interface
2025-07-28 13:53:37 +02:00
ManyTheFish
e3062c4070 refactor(personalization): improve Cohere reranking logic and error handling
- Replace and_then() with early return for missing personalization
- Simplify reranking by building new hits vector instead of swapping
- Add debug logging for reranked indices
- Fix potential index out-of-bounds issues in reranking
2025-07-28 13:53:37 +02:00
ManyTheFish
c56be3d820 refactor: split PersonalizationService into enum with CohereService
- Refactor PersonalizationService as enum with Cohere and Uninitialized variants
- Create dedicated CohereService struct with rerank_search_results method
- Split constructor into cohere() and uninitialized() methods
- Move all Cohere logic into CohereService for better separation of concerns
- Update tests and lib.rs to use new API
- Improve code organization and maintainability
2025-07-28 13:53:37 +02:00
ManyTheFish
be68dd6785 feat: refine personalization query by merging with user context
- Merge initial query with user context to create a comprehensive prompt
- Only skip reranking if both query and user_context are None
- Support reranking with query-only, user_context-only, or both
- Use 'let else' pattern for cleaner error handling
- Add comprehensive tests for different parameter combinations
- Improve prompt format for better reranking effectiveness
2025-07-28 13:53:37 +02:00
ManyTheFish
bae231fb91 refactor: rename personalization API fields and move checks inside service
- Rename 'personalization' field to 'personalize' in API
- Rename 'userProfile' to 'userContext' in personalization object
- Remove 'personalized' boolean field (activation now based on non-null 'personalize')
- Move personalization checks inside rerank_search_results function
- Use 'let else' pattern for better error handling
- Update error types and messages to reflect new field names
- Update all search routes and analytics to use new field names
2025-07-28 13:53:37 +02:00
ManyTheFish
34f18ad3a8 feat: add personalization service with EnglishV3-only reranking
- Add new personalization module with Cohere integration
- Implement rerank_search_results method using EnglishV3 model
- Remove fallback logic to EnglishV2 for simplified behavior
- Add comprehensive error handling and logging
- Include unit tests for service behavior
- Update search route to support personalization feature
2025-07-28 13:53:37 +02:00
ManyTheFish
a7cdcad4b2 feat: add personalization parameters to /search route
- Add Personalization struct with personalized boolean and user_profile string
- Add personalizationPersonalized and personalizationUserProfile query parameters to SearchQueryGet
- Follow same pattern as hybrid parameters (hybridEmbedder, hybridSemanticRatio)
- Add validation: personalizationUserProfile requires personalizationPersonalized
- Add error codes for personalization parameters
- Update analytics and facet search to handle new personalization field
- Remove serde dependencies from Personalization struct, use Deserr only
2025-07-28 13:53:37 +02:00
ManyTheFish
c0cd05e5f6 feat: add experimental_personalization_api_key feature to RoFeatures
- Add MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY environment variable
- Add experimental_personalization_api_key field to Opt struct with CLI and env support
- Add experimental_personalization_api_key field to InstanceTogglableFeatures
- Store personalization API key in FeatureData for access through IndexScheduler
- Add experimental_personalization_api_key() method to IndexScheduler
- Update analytics destructuring to include new field
- Maintain RoFeatures Copy trait while properly handling Option<String>
2025-07-28 13:53:36 +02:00
16 changed files with 419 additions and 12 deletions

33
Cargo.lock generated
View File

@@ -1194,6 +1194,21 @@ version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6"
[[package]]
name = "cohere-rust"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8b553b385b0f2562138baea705b5707335314f8e91a58e7d1a03c3a6c332423"
dependencies = [
"bytes",
"reqwest",
"serde",
"serde_json",
"strum_macros 0.26.4",
"thiserror 1.0.69",
"tokio",
]
[[package]]
name = "color-spantrace"
version = "0.3.0"
@@ -3445,7 +3460,7 @@ dependencies = [
"serde_json",
"serde_yaml",
"strum",
"strum_macros",
"strum_macros 0.27.1",
"unicode-blocks",
"unicode-normalization",
"unicode-segmentation",
@@ -3753,6 +3768,7 @@ dependencies = [
"bytes",
"cargo_toml",
"clap",
"cohere-rust",
"crossbeam-channel",
"deserr",
"dump",
@@ -5822,7 +5838,20 @@ version = "0.27.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f64def088c51c9510a8579e3c5d67c65349dcf755e5479ad3d010aa6454e2c32"
dependencies = [
"strum_macros",
"strum_macros 0.27.1",
]
[[package]]
name = "strum_macros"
version = "0.26.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
dependencies = [
"heck",
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.101",
]
[[package]]

View File

@@ -24,6 +24,7 @@ pub(crate) struct FeatureData {
persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
network: Arc<RwLock<Network>>,
experimental_personalization_api_key: Option<String>,
}
#[derive(Debug, Clone, Copy)]
@@ -174,7 +175,12 @@ impl FeatureData {
let persisted_features: RuntimeTogglableFeatures =
runtime_features_db.get(wtxn, db_keys::EXPERIMENTAL_FEATURES)?.unwrap_or_default();
let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features;
let InstanceTogglableFeatures {
metrics,
logs_route,
contains_filter,
experimental_personalization_api_key,
} = instance_features;
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
metrics: metrics || persisted_features.metrics,
logs_route: logs_route || persisted_features.logs_route,
@@ -189,6 +195,7 @@ impl FeatureData {
persisted: runtime_features_db,
runtime,
network: Arc::new(RwLock::new(network)),
experimental_personalization_api_key,
})
}
@@ -219,6 +226,10 @@ impl FeatureData {
RoFeatures::new(self)
}
pub fn experimental_personalization_api_key(&self) -> Option<&String> {
self.experimental_personalization_api_key.as_ref()
}
pub fn put_network(&self, mut wtxn: RwTxn, new_network: Network) -> Result<()> {
self.persisted.remap_data_type::<SerdeJson<Network>>().put(
&mut wtxn,

View File

@@ -284,7 +284,8 @@ impl IndexScheduler {
let version = versioning::Versioning::new(&env, from_db_version)?;
let mut wtxn = env.write_txn()?;
let features = features::FeatureData::new(&env, &mut wtxn, options.instance_features)?;
let features =
features::FeatureData::new(&env, &mut wtxn, options.instance_features.clone())?;
let queue = Queue::new(&env, &mut wtxn, &options)?;
let index_mapper = IndexMapper::new(&env, &mut wtxn, &options, budget)?;
let chat_settings = env.create_database(&mut wtxn, Some(CHAT_SETTINGS_DB_NAME))?;
@@ -846,6 +847,10 @@ impl IndexScheduler {
self.features.features()
}
pub fn experimental_personalization_api_key(&self) -> Option<&String> {
self.features.experimental_personalization_api_key()
}
pub fn put_runtime_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
let wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
self.features.put_runtime_features(wtxn, features)?;

View File

@@ -310,6 +310,8 @@ InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQU
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
InvalidSearchPersonalize , InvalidRequest , BAD_REQUEST ;
InvalidSearchPersonalizeUserContext , InvalidRequest , BAD_REQUEST ;
InvalidSearchMediaAndVector , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
@@ -650,6 +652,18 @@ impl fmt::Display for deserr_codes::InvalidNetworkSearchApiKey {
}
}
impl fmt::Display for deserr_codes::InvalidSearchPersonalize {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "the value of `personalize` is invalid, expected a JSON object with optional `userContext` string.")
}
}
impl fmt::Display for deserr_codes::InvalidSearchPersonalizeUserContext {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "the value of `userContext` is invalid, expected a string.")
}
}
#[macro_export]
macro_rules! internal_error {
($target:ty : $($other:path), *) => {

View File

@@ -25,11 +25,12 @@ pub struct RuntimeTogglableFeatures {
pub multimodal: bool,
}
#[derive(Default, Debug, Clone, Copy)]
#[derive(Default, Debug, Clone)]
pub struct InstanceTogglableFeatures {
pub metrics: bool,
pub logs_route: bool,
pub contains_filter: bool,
pub experimental_personalization_api_key: Option<String>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]

View File

@@ -95,6 +95,7 @@ uuid = { version = "1.17.0", features = ["serde", "v4"] }
serde_urlencoded = "0.7.1"
termcolor = "1.4.1"
url = { version = "2.5.4", features = ["serde"] }
cohere-rust = "0.6.0"
tracing = "0.1.41"
tracing-subscriber = { version = "0.3.19", features = ["json"] }
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }

View File

@@ -282,6 +282,7 @@ impl Infos {
indexer_options,
config_file_path,
no_analytics: _,
experimental_personalization_api_key: _,
} = options;
let schedule_snapshot = match schedule_snapshot {

View File

@@ -9,6 +9,7 @@ pub mod middleware;
pub mod option;
#[cfg(test)]
mod option_test;
pub mod personalization;
pub mod routes;
pub mod search;
pub mod search_queue;
@@ -676,12 +677,19 @@ pub fn configure_data(
(logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
analytics: Data<Analytics>,
) {
// Create personalization service with API key from options
let personalization_service = index_scheduler
.experimental_personalization_api_key()
.cloned()
.map(personalization::PersonalizationService::cohere)
.unwrap_or_else(personalization::PersonalizationService::uninitialized);
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
config
.app_data(index_scheduler)
.app_data(auth)
.app_data(search_queue)
.app_data(analytics)
.app_data(web::Data::new(personalization_service))
.app_data(web::Data::new(logs_route))
.app_data(web::Data::new(logs_stderr))
.app_data(web::Data::new(opt.clone()))

View File

@@ -111,4 +111,9 @@ lazy_static! {
"Meilisearch Task Queue Size Until Stop Registering",
))
.expect("Can't create a metric");
pub static ref MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS: IntGauge = register_int_gauge!(opts!(
"meilisearch_personalized_search_requests",
"Meilisearch number of search requests with personalization"
))
.expect("Can't create a metric");
}

View File

@@ -70,6 +70,8 @@ const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION";
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS: &str =
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS";
const MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY: &str =
"MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY";
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
const DEFAULT_DB_PATH: &str = "./data.ms";
const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
@@ -469,6 +471,12 @@ pub struct Opt {
#[serde(default)]
pub experimental_no_snapshot_compaction: bool,
/// Experimental personalization API key feature.
///
/// Sets the API key for personalization features.
#[clap(long, env = MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY)]
pub experimental_personalization_api_key: Option<String>,
#[serde(flatten)]
#[clap(flatten)]
pub indexer_options: IndexerOpts,
@@ -574,6 +582,7 @@ impl Opt {
experimental_limit_batched_tasks_total_size,
experimental_embedding_cache_entries,
experimental_no_snapshot_compaction,
experimental_personalization_api_key,
} = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
@@ -674,6 +683,12 @@ impl Opt {
MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION,
experimental_no_snapshot_compaction.to_string(),
);
if let Some(experimental_personalization_api_key) = experimental_personalization_api_key {
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY,
experimental_personalization_api_key,
);
}
indexer_options.export_to_env();
}
@@ -726,6 +741,7 @@ impl Opt {
metrics: self.experimental_enable_metrics,
logs_route: self.experimental_enable_logs_route,
contains_filter: self.experimental_contains_filter,
experimental_personalization_api_key: self.experimental_personalization_api_key.clone(),
}
}
}

View File

@@ -0,0 +1,244 @@
use crate::search::{Personalize, SearchResult};
use cohere_rust::{
api::rerank::{ReRankModel, ReRankRequest},
Cohere,
};
use meilisearch_types::error::ResponseError;
use tracing::{debug, error, info};
pub struct CohereService {
cohere: Cohere,
}
impl CohereService {
pub fn new(api_key: String) -> Self {
info!("Personalization service initialized with Cohere API");
Self { cohere: Cohere::new("https://api.cohere.ai", api_key) }
}
pub async fn rerank_search_results(
&self,
search_result: SearchResult,
personalize: Option<&Personalize>,
query: Option<&str>,
) -> Result<SearchResult, ResponseError> {
// Extract user context from personalization
let Some(user_context) = personalize.and_then(|p| p.user_context.as_deref()) else {
return Ok(search_result);
};
// Build the prompt by merging query and user context
let prompt = match query {
Some(q) => format!("User Context: {user_context}\nQuery: {q}"),
None => format!("User Context: {user_context}"),
};
// Extract documents for reranking
let documents: Vec<String> = search_result
.hits
.iter()
.map(|hit| {
// Convert the document to a string representation for reranking
serde_json::to_string(&hit.document).unwrap_or_else(|_| "{}".to_string())
})
.collect();
if documents.is_empty() {
return Ok(search_result);
}
// Prepare the rerank request
let rerank_request = ReRankRequest {
query: &prompt,
documents: &documents,
model: ReRankModel::EnglishV3, // Use the default and more recent model
top_n: None,
max_chunks_per_doc: None,
};
// Call Cohere's rerank API
match self.cohere.rerank(&rerank_request).await {
Ok(rerank_response) => {
debug!("Cohere rerank successful, reordering {} results", search_result.hits.len());
// Create a mapping from original index to new rank
let reranked_indices: Vec<usize> =
rerank_response.iter().map(|result| result.index as usize).collect();
// Reorder the hits based on Cohere's reranking
let mut reranked_hits = Vec::new();
for index in reranked_indices.iter() {
reranked_hits.push(search_result.hits[*index].clone());
}
Ok(SearchResult { hits: reranked_hits, ..search_result })
}
Err(e) => {
error!("Cohere rerank failed with model EnglishV3: {}", e);
// Return original results on error
Ok(search_result)
}
}
}
}
pub enum PersonalizationService {
Cohere(CohereService),
Uninitialized,
}
impl PersonalizationService {
pub fn cohere(api_key: String) -> Self {
Self::Cohere(CohereService::new(api_key))
}
pub fn uninitialized() -> Self {
debug!("Personalization service uninitialized");
Self::Uninitialized
}
pub async fn rerank_search_results(
&self,
search_result: SearchResult,
personalize: Option<&Personalize>,
query: Option<&str>,
) -> Result<SearchResult, ResponseError> {
match self {
Self::Cohere(cohere_service) => {
cohere_service.rerank_search_results(search_result, personalize, query).await
}
Self::Uninitialized => Ok(search_result),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::search::{HitsInfo, SearchHit};
#[tokio::test]
async fn test_personalization_service_without_api_key() {
let service = PersonalizationService::uninitialized();
let personalize = Personalize { user_context: Some("test user".to_string()) };
let search_result = SearchResult {
hits: vec![SearchHit {
document: serde_json::Map::new(),
formatted: serde_json::Map::new(),
matches_position: None,
ranking_score: Some(1.0),
ranking_score_details: None,
}],
query: "test".to_string(),
processing_time_ms: 10,
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
facet_distribution: None,
facet_stats: None,
semantic_hit_count: None,
degraded: false,
used_negative_operator: false,
};
let result = service
.rerank_search_results(search_result.clone(), Some(&personalize), Some("test"))
.await;
assert!(result.is_ok());
// Should return original results when no API key is provided
let reranked_result = result.unwrap();
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
}
#[tokio::test]
async fn test_personalization_service_with_user_context_only() {
let service = PersonalizationService::cohere("fake_key".to_string());
let personalize = Personalize { user_context: Some("test user".to_string()) };
let search_result = SearchResult {
hits: vec![SearchHit {
document: serde_json::Map::new(),
formatted: serde_json::Map::new(),
matches_position: None,
ranking_score: Some(1.0),
ranking_score_details: None,
}],
query: "test".to_string(),
processing_time_ms: 10,
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
facet_distribution: None,
facet_stats: None,
semantic_hit_count: None,
degraded: false,
used_negative_operator: false,
};
let result =
service.rerank_search_results(search_result.clone(), Some(&personalize), None).await;
assert!(result.is_ok());
// Should attempt reranking with user context only
let reranked_result = result.unwrap();
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
}
#[tokio::test]
async fn test_personalization_service_with_query_only() {
let service = PersonalizationService::cohere("fake_key".to_string());
let search_result = SearchResult {
hits: vec![SearchHit {
document: serde_json::Map::new(),
formatted: serde_json::Map::new(),
matches_position: None,
ranking_score: Some(1.0),
ranking_score_details: None,
}],
query: "test".to_string(),
processing_time_ms: 10,
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
facet_distribution: None,
facet_stats: None,
semantic_hit_count: None,
degraded: false,
used_negative_operator: false,
};
let result = service.rerank_search_results(search_result.clone(), None, Some("test")).await;
assert!(result.is_ok());
// Should attempt reranking with query only
let reranked_result = result.unwrap();
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
}
#[tokio::test]
async fn test_personalization_service_both_none() {
let service = PersonalizationService::cohere("fake_key".to_string());
let search_result = SearchResult {
hits: vec![SearchHit {
document: serde_json::Map::new(),
formatted: serde_json::Map::new(),
matches_position: None,
ranking_score: Some(1.0),
ranking_score_details: None,
}],
query: "test".to_string(),
processing_time_ms: 10,
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
facet_distribution: None,
facet_stats: None,
semantic_hit_count: None,
degraded: false,
used_negative_operator: false,
};
let result = service.rerank_search_results(search_result.clone(), None, None).await;
assert!(result.is_ok());
// Should return original results when both query and user_context are None
let reranked_result = result.unwrap();
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
}
}

View File

@@ -343,6 +343,7 @@ impl From<FacetSearchQuery> for SearchQuery {
hybrid,
ranking_score_threshold,
locales,
personalize: None,
}
}
}

View File

@@ -22,10 +22,10 @@ use crate::extractors::sequential_extractor::SeqHandler;
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST};
use crate::search::{
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH,
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
add_search_rules, perform_search, HybridQuery, MatchingStrategy, Personalize,
RankingScoreThreshold, RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio,
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
};
use crate::search_queue::SearchQueue;
@@ -132,6 +132,8 @@ pub struct SearchQueryGet {
#[deserr(default, error = DeserrQueryParamError<InvalidSearchLocales>)]
#[param(value_type = Vec<Locale>, explode = false)]
pub locales: Option<CS<Locale>>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchPersonalizeUserContext>)]
pub personalize_user_context: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
@@ -203,6 +205,10 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
));
}
let personalize = other
.personalize_user_context
.map(|user_context| Personalize { user_context: Some(user_context) });
Ok(Self {
q: other.q,
// `media` not supported for `GET`
@@ -232,6 +238,7 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
hybrid,
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
locales: other.locales.map(|o| o.into_iter().collect()),
personalize,
})
}
}
@@ -320,6 +327,7 @@ pub fn fix_sort_query_parameters(sort_query: &str) -> Vec<String> {
pub async fn search_with_url_query(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
search_queue: web::Data<SearchQueue>,
personalization_service: web::Data<crate::personalization::PersonalizationService>,
index_uid: web::Path<String>,
params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>,
req: HttpRequest,
@@ -342,6 +350,11 @@ pub async fn search_with_url_query(
let search_kind =
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
// Extract personalization and query string before moving query
let personalize = query.personalize.clone();
let query_str = query.q.clone();
let permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
@@ -361,7 +374,12 @@ pub async fn search_with_url_query(
}
analytics.publish(aggregate, &req);
let search_result = search_result?;
let mut search_result = search_result?;
// Apply personalization if requested
search_result = personalization_service
.rerank_search_results(search_result, personalize.as_ref(), query_str.as_deref())
.await?;
debug!(returns = ?search_result, "Search get");
Ok(HttpResponse::Ok().json(search_result))
@@ -426,6 +444,7 @@ pub async fn search_with_url_query(
pub async fn search_with_post(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
search_queue: web::Data<SearchQueue>,
personalization_service: web::Data<crate::personalization::PersonalizationService>,
index_uid: web::Path<String>,
params: AwebJson<SearchQuery, DeserrJsonError>,
req: HttpRequest,
@@ -449,6 +468,10 @@ pub async fn search_with_post(
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
// Extract personalization and query string before moving query
let personalize = query.personalize.clone();
let query_str = query.q.clone();
let permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
@@ -471,7 +494,12 @@ pub async fn search_with_post(
}
analytics.publish(aggregate, &req);
let search_result = search_result?;
let mut search_result = search_result?;
// Apply personalization if requested
search_result = personalization_service
.rerank_search_results(search_result, personalize.as_ref(), query_str.as_deref())
.await?;
debug!(returns = ?search_result, "Search post");
Ok(HttpResponse::Ok().json(search_result))

View File

@@ -7,6 +7,7 @@ use serde_json::{json, Value};
use crate::aggregate_methods;
use crate::analytics::{Aggregate, AggregateMethod};
use crate::metrics::MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS;
use crate::search::{
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
@@ -94,6 +95,9 @@ pub struct SearchAggregator<Method: AggregateMethod> {
show_ranking_score_details: bool,
ranking_score_threshold: bool,
// personalization
total_personalized: usize,
marker: std::marker::PhantomData<Method>,
}
@@ -128,6 +132,7 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
hybrid,
ranking_score_threshold,
locales,
personalize,
} = query;
let mut ret = Self::default();
@@ -202,6 +207,12 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
ret.locales = locales.iter().copied().collect();
}
// personalization
if personalize.is_some() {
ret.total_personalized = 1;
MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS.inc();
}
ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
@@ -290,6 +301,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
total_used_negative_operator,
ranking_score_threshold,
mut locales,
total_personalized,
marker: _,
} = *new;
@@ -374,6 +386,9 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
// locales
self.locales.append(&mut locales);
// personalization
self.total_personalized = self.total_personalized.saturating_add(total_personalized);
self
}
@@ -418,6 +433,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
total_used_negative_operator,
ranking_score_threshold,
locales,
total_personalized,
marker: _,
} = *self;
@@ -490,6 +506,9 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
"show_ranking_score_details": show_ranking_score_details,
"ranking_score_threshold": ranking_score_threshold,
},
"personalization": {
"total_personalized": total_personalized,
},
})
}
}

View File

@@ -67,6 +67,7 @@ impl MultiSearchAggregator {
hybrid: _,
ranking_score_threshold: _,
locales: _,
personalize: _,
} in &federated_search.queries
{
if let Some(federation_options) = federation_options {

View File

@@ -57,6 +57,13 @@ pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
pub const DEFAULT_SEMANTIC_RATIO: fn() -> SemanticRatio = || SemanticRatio(0.5);
#[derive(Clone, Default, PartialEq, Deserr, ToSchema, Debug)]
#[deserr(error = DeserrJsonError<InvalidSearchPersonalize>, rename_all = camelCase, deny_unknown_fields)]
pub struct Personalize {
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalizeUserContext>)]
pub user_context: Option<String>,
}
#[derive(Clone, Default, PartialEq, Deserr, ToSchema)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct SearchQuery {
@@ -120,6 +127,8 @@ pub struct SearchQuery {
pub ranking_score_threshold: Option<RankingScoreThreshold>,
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>)]
pub locales: Option<Vec<Locale>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalize>, default)]
pub personalize: Option<Personalize>,
}
impl From<SearchParameters> for SearchQuery {
@@ -167,6 +176,7 @@ impl From<SearchParameters> for SearchQuery {
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
crop_marker: DEFAULT_CROP_MARKER(),
locales: None,
personalize: None,
}
}
}
@@ -248,6 +258,7 @@ impl fmt::Debug for SearchQuery {
attributes_to_search_on,
ranking_score_threshold,
locales,
personalize,
} = self;
let mut debug = f.debug_struct("SearchQuery");
@@ -336,6 +347,10 @@ impl fmt::Debug for SearchQuery {
debug.field("locales", &locales);
}
if let Some(personalize) = personalize {
debug.field("personalize", &personalize);
}
debug.finish()
}
}
@@ -541,6 +556,9 @@ pub struct SearchQueryWithIndex {
pub ranking_score_threshold: Option<RankingScoreThreshold>,
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
pub locales: Option<Vec<Locale>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalize>, default)]
#[serde(skip)]
pub personalize: Option<Personalize>,
#[deserr(default)]
pub federation_options: Option<FederationOptions>,
@@ -598,6 +616,7 @@ impl SearchQueryWithIndex {
attributes_to_search_on,
ranking_score_threshold,
locales,
personalize,
} = query;
SearchQueryWithIndex {
@@ -629,6 +648,7 @@ impl SearchQueryWithIndex {
attributes_to_search_on,
ranking_score_threshold,
locales,
personalize,
federation_options,
}
}
@@ -664,6 +684,7 @@ impl SearchQueryWithIndex {
hybrid,
ranking_score_threshold,
locales,
personalize,
} = self;
(
index_uid,
@@ -695,6 +716,7 @@ impl SearchQueryWithIndex {
hybrid,
ranking_score_threshold,
locales,
personalize,
// do not use ..Default::default() here,
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
},
@@ -919,7 +941,7 @@ pub struct SearchResultWithIndex {
pub result: SearchResult,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, ToSchema)]
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, ToSchema)]
#[serde(untagged)]
pub enum HitsInfo {
#[serde(rename_all = "camelCase")]
@@ -1166,6 +1188,7 @@ pub fn perform_search(
attributes_to_search_on: _,
filter: _,
distinct: _,
personalize: _,
} = query;
let format = AttributesFormat {