mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-22 04:36:32 +00:00
Compare commits
9 Commits
v1.22.3
...
prototype-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
aac2983acb | ||
|
|
e8fe43ec29 | ||
|
|
79f7e68a4d | ||
|
|
f3a0969c61 | ||
|
|
0cf7ed9135 | ||
|
|
59eaf1875f | ||
|
|
decd4df5a8 | ||
|
|
2b2bbad3f8 | ||
|
|
a6eb5ec9b0 |
33
Cargo.lock
generated
33
Cargo.lock
generated
@@ -1194,6 +1194,21 @@ version = "0.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6"
|
||||
|
||||
[[package]]
|
||||
name = "cohere-rust"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d8b553b385b0f2562138baea705b5707335314f8e91a58e7d1a03c3a6c332423"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"strum_macros 0.26.4",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "color-spantrace"
|
||||
version = "0.3.0"
|
||||
@@ -3445,7 +3460,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"serde_yaml",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"strum_macros 0.27.1",
|
||||
"unicode-blocks",
|
||||
"unicode-normalization",
|
||||
"unicode-segmentation",
|
||||
@@ -3753,6 +3768,7 @@ dependencies = [
|
||||
"bytes",
|
||||
"cargo_toml",
|
||||
"clap",
|
||||
"cohere-rust",
|
||||
"crossbeam-channel",
|
||||
"deserr",
|
||||
"dump",
|
||||
@@ -5821,7 +5837,20 @@ version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f64def088c51c9510a8579e3c5d67c65349dcf755e5479ad3d010aa6454e2c32"
|
||||
dependencies = [
|
||||
"strum_macros",
|
||||
"strum_macros 0.27.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
version = "0.26.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn 2.0.101",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -24,6 +24,7 @@ pub(crate) struct FeatureData {
|
||||
persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
|
||||
runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
|
||||
network: Arc<RwLock<Network>>,
|
||||
experimental_personalization_api_key: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
@@ -174,7 +175,12 @@ impl FeatureData {
|
||||
|
||||
let persisted_features: RuntimeTogglableFeatures =
|
||||
runtime_features_db.get(wtxn, db_keys::EXPERIMENTAL_FEATURES)?.unwrap_or_default();
|
||||
let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features;
|
||||
let InstanceTogglableFeatures {
|
||||
metrics,
|
||||
logs_route,
|
||||
contains_filter,
|
||||
experimental_personalization_api_key,
|
||||
} = instance_features;
|
||||
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
|
||||
metrics: metrics || persisted_features.metrics,
|
||||
logs_route: logs_route || persisted_features.logs_route,
|
||||
@@ -189,6 +195,7 @@ impl FeatureData {
|
||||
persisted: runtime_features_db,
|
||||
runtime,
|
||||
network: Arc::new(RwLock::new(network)),
|
||||
experimental_personalization_api_key,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -219,6 +226,10 @@ impl FeatureData {
|
||||
RoFeatures::new(self)
|
||||
}
|
||||
|
||||
pub fn experimental_personalization_api_key(&self) -> Option<&String> {
|
||||
self.experimental_personalization_api_key.as_ref()
|
||||
}
|
||||
|
||||
pub fn put_network(&self, mut wtxn: RwTxn, new_network: Network) -> Result<()> {
|
||||
self.persisted.remap_data_type::<SerdeJson<Network>>().put(
|
||||
&mut wtxn,
|
||||
|
||||
@@ -280,7 +280,8 @@ impl IndexScheduler {
|
||||
let version = versioning::Versioning::new(&env, from_db_version)?;
|
||||
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let features = features::FeatureData::new(&env, &mut wtxn, options.instance_features)?;
|
||||
let features =
|
||||
features::FeatureData::new(&env, &mut wtxn, options.instance_features.clone())?;
|
||||
let queue = Queue::new(&env, &mut wtxn, &options)?;
|
||||
let index_mapper = IndexMapper::new(&env, &mut wtxn, &options, budget)?;
|
||||
let chat_settings = env.create_database(&mut wtxn, Some(CHAT_SETTINGS_DB_NAME))?;
|
||||
@@ -834,6 +835,10 @@ impl IndexScheduler {
|
||||
self.features.features()
|
||||
}
|
||||
|
||||
pub fn experimental_personalization_api_key(&self) -> Option<&String> {
|
||||
self.features.experimental_personalization_api_key()
|
||||
}
|
||||
|
||||
pub fn put_runtime_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
|
||||
let wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
|
||||
self.features.put_runtime_features(wtxn, features)?;
|
||||
|
||||
@@ -310,6 +310,8 @@ InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQU
|
||||
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchPersonalize , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchPersonalizeUserContext , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchMediaAndVector , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -650,6 +652,18 @@ impl fmt::Display for deserr_codes::InvalidNetworkSearchApiKey {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for deserr_codes::InvalidSearchPersonalize {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "the value of `personalize` is invalid, expected a JSON object with optional `userContext` string.")
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for deserr_codes::InvalidSearchPersonalizeUserContext {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "the value of `userContext` is invalid, expected a string.")
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! internal_error {
|
||||
($target:ty : $($other:path), *) => {
|
||||
|
||||
@@ -25,11 +25,12 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub multimodal: bool,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, Copy)]
|
||||
#[derive(Default, Debug, Clone)]
|
||||
pub struct InstanceTogglableFeatures {
|
||||
pub metrics: bool,
|
||||
pub logs_route: bool,
|
||||
pub contains_filter: bool,
|
||||
pub experimental_personalization_api_key: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
|
||||
@@ -94,6 +94,7 @@ uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
serde_urlencoded = "0.7.1"
|
||||
termcolor = "1.4.1"
|
||||
url = { version = "2.5.4", features = ["serde"] }
|
||||
cohere-rust = "0.6.0"
|
||||
tracing = "0.1.41"
|
||||
tracing-subscriber = { version = "0.3.19", features = ["json"] }
|
||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||
|
||||
@@ -281,6 +281,7 @@ impl Infos {
|
||||
indexer_options,
|
||||
config_file_path,
|
||||
no_analytics: _,
|
||||
experimental_personalization_api_key: _,
|
||||
} = options;
|
||||
|
||||
let schedule_snapshot = match schedule_snapshot {
|
||||
|
||||
@@ -9,6 +9,7 @@ pub mod middleware;
|
||||
pub mod option;
|
||||
#[cfg(test)]
|
||||
mod option_test;
|
||||
pub mod personalization;
|
||||
pub mod routes;
|
||||
pub mod search;
|
||||
pub mod search_queue;
|
||||
@@ -622,12 +623,19 @@ pub fn configure_data(
|
||||
(logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
|
||||
analytics: Data<Analytics>,
|
||||
) {
|
||||
// Create personalization service with API key from options
|
||||
let personalization_service = index_scheduler
|
||||
.experimental_personalization_api_key()
|
||||
.cloned()
|
||||
.map(personalization::PersonalizationService::cohere)
|
||||
.unwrap_or_else(personalization::PersonalizationService::uninitialized);
|
||||
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
|
||||
config
|
||||
.app_data(index_scheduler)
|
||||
.app_data(auth)
|
||||
.app_data(search_queue)
|
||||
.app_data(analytics)
|
||||
.app_data(web::Data::new(personalization_service))
|
||||
.app_data(web::Data::new(logs_route))
|
||||
.app_data(web::Data::new(logs_stderr))
|
||||
.app_data(web::Data::new(opt.clone()))
|
||||
|
||||
@@ -111,4 +111,9 @@ lazy_static! {
|
||||
"Meilisearch Task Queue Size Until Stop Registering",
|
||||
))
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS: IntGauge = register_int_gauge!(opts!(
|
||||
"meilisearch_personalized_search_requests",
|
||||
"Meilisearch number of search requests with personalization"
|
||||
))
|
||||
.expect("Can't create a metric");
|
||||
}
|
||||
|
||||
@@ -68,6 +68,8 @@ const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str =
|
||||
const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
|
||||
"MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES";
|
||||
const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION";
|
||||
const MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY: &str =
|
||||
"MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY";
|
||||
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
|
||||
const DEFAULT_DB_PATH: &str = "./data.ms";
|
||||
const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
|
||||
@@ -467,6 +469,12 @@ pub struct Opt {
|
||||
#[serde(default)]
|
||||
pub experimental_no_snapshot_compaction: bool,
|
||||
|
||||
/// Experimental personalization API key feature.
|
||||
///
|
||||
/// Sets the API key for personalization features.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY)]
|
||||
pub experimental_personalization_api_key: Option<String>,
|
||||
|
||||
#[serde(flatten)]
|
||||
#[clap(flatten)]
|
||||
pub indexer_options: IndexerOpts,
|
||||
@@ -572,6 +580,7 @@ impl Opt {
|
||||
experimental_limit_batched_tasks_total_size,
|
||||
experimental_embedding_cache_entries,
|
||||
experimental_no_snapshot_compaction,
|
||||
experimental_personalization_api_key,
|
||||
} = self;
|
||||
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
|
||||
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
|
||||
@@ -672,6 +681,12 @@ impl Opt {
|
||||
MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION,
|
||||
experimental_no_snapshot_compaction.to_string(),
|
||||
);
|
||||
if let Some(experimental_personalization_api_key) = experimental_personalization_api_key {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY,
|
||||
experimental_personalization_api_key,
|
||||
);
|
||||
}
|
||||
indexer_options.export_to_env();
|
||||
}
|
||||
|
||||
@@ -724,6 +739,7 @@ impl Opt {
|
||||
metrics: self.experimental_enable_metrics,
|
||||
logs_route: self.experimental_enable_logs_route,
|
||||
contains_filter: self.experimental_contains_filter,
|
||||
experimental_personalization_api_key: self.experimental_personalization_api_key.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
244
crates/meilisearch/src/personalization/mod.rs
Normal file
244
crates/meilisearch/src/personalization/mod.rs
Normal file
@@ -0,0 +1,244 @@
|
||||
use crate::search::{Personalize, SearchResult};
|
||||
use cohere_rust::{
|
||||
api::rerank::{ReRankModel, ReRankRequest},
|
||||
Cohere,
|
||||
};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use tracing::{debug, error, info};
|
||||
|
||||
pub struct CohereService {
|
||||
cohere: Cohere,
|
||||
}
|
||||
|
||||
impl CohereService {
|
||||
pub fn new(api_key: String) -> Self {
|
||||
info!("Personalization service initialized with Cohere API");
|
||||
Self { cohere: Cohere::new("https://api.cohere.ai", api_key) }
|
||||
}
|
||||
|
||||
pub async fn rerank_search_results(
|
||||
&self,
|
||||
search_result: SearchResult,
|
||||
personalize: Option<&Personalize>,
|
||||
query: Option<&str>,
|
||||
) -> Result<SearchResult, ResponseError> {
|
||||
// Extract user context from personalization
|
||||
let Some(user_context) = personalize.and_then(|p| p.user_context.as_deref()) else {
|
||||
return Ok(search_result);
|
||||
};
|
||||
|
||||
// Build the prompt by merging query and user context
|
||||
let prompt = match query {
|
||||
Some(q) => format!("User Context: {user_context}\nQuery: {q}"),
|
||||
None => format!("User Context: {user_context}"),
|
||||
};
|
||||
|
||||
// Extract documents for reranking
|
||||
let documents: Vec<String> = search_result
|
||||
.hits
|
||||
.iter()
|
||||
.map(|hit| {
|
||||
// Convert the document to a string representation for reranking
|
||||
serde_json::to_string(&hit.document).unwrap_or_else(|_| "{}".to_string())
|
||||
})
|
||||
.collect();
|
||||
|
||||
if documents.is_empty() {
|
||||
return Ok(search_result);
|
||||
}
|
||||
|
||||
// Prepare the rerank request
|
||||
let rerank_request = ReRankRequest {
|
||||
query: &prompt,
|
||||
documents: &documents,
|
||||
model: ReRankModel::EnglishV3, // Use the default and more recent model
|
||||
top_n: None,
|
||||
max_chunks_per_doc: None,
|
||||
};
|
||||
|
||||
// Call Cohere's rerank API
|
||||
match self.cohere.rerank(&rerank_request).await {
|
||||
Ok(rerank_response) => {
|
||||
debug!("Cohere rerank successful, reordering {} results", search_result.hits.len());
|
||||
|
||||
// Create a mapping from original index to new rank
|
||||
let reranked_indices: Vec<usize> =
|
||||
rerank_response.iter().map(|result| result.index as usize).collect();
|
||||
|
||||
// Reorder the hits based on Cohere's reranking
|
||||
let mut reranked_hits = Vec::new();
|
||||
for index in reranked_indices.iter() {
|
||||
reranked_hits.push(search_result.hits[*index].clone());
|
||||
}
|
||||
|
||||
Ok(SearchResult { hits: reranked_hits, ..search_result })
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Cohere rerank failed with model EnglishV3: {}", e);
|
||||
// Return original results on error
|
||||
Ok(search_result)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub enum PersonalizationService {
|
||||
Cohere(CohereService),
|
||||
Uninitialized,
|
||||
}
|
||||
|
||||
impl PersonalizationService {
|
||||
pub fn cohere(api_key: String) -> Self {
|
||||
Self::Cohere(CohereService::new(api_key))
|
||||
}
|
||||
|
||||
pub fn uninitialized() -> Self {
|
||||
debug!("Personalization service uninitialized");
|
||||
Self::Uninitialized
|
||||
}
|
||||
|
||||
pub async fn rerank_search_results(
|
||||
&self,
|
||||
search_result: SearchResult,
|
||||
personalize: Option<&Personalize>,
|
||||
query: Option<&str>,
|
||||
) -> Result<SearchResult, ResponseError> {
|
||||
match self {
|
||||
Self::Cohere(cohere_service) => {
|
||||
cohere_service.rerank_search_results(search_result, personalize, query).await
|
||||
}
|
||||
Self::Uninitialized => Ok(search_result),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::search::{HitsInfo, SearchHit};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_personalization_service_without_api_key() {
|
||||
let service = PersonalizationService::uninitialized();
|
||||
let personalize = Personalize { user_context: Some("test user".to_string()) };
|
||||
|
||||
let search_result = SearchResult {
|
||||
hits: vec![SearchHit {
|
||||
document: serde_json::Map::new(),
|
||||
formatted: serde_json::Map::new(),
|
||||
matches_position: None,
|
||||
ranking_score: Some(1.0),
|
||||
ranking_score_details: None,
|
||||
}],
|
||||
query: "test".to_string(),
|
||||
processing_time_ms: 10,
|
||||
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
|
||||
facet_distribution: None,
|
||||
facet_stats: None,
|
||||
semantic_hit_count: None,
|
||||
degraded: false,
|
||||
used_negative_operator: false,
|
||||
};
|
||||
|
||||
let result = service
|
||||
.rerank_search_results(search_result.clone(), Some(&personalize), Some("test"))
|
||||
.await;
|
||||
assert!(result.is_ok());
|
||||
|
||||
// Should return original results when no API key is provided
|
||||
let reranked_result = result.unwrap();
|
||||
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_personalization_service_with_user_context_only() {
|
||||
let service = PersonalizationService::cohere("fake_key".to_string());
|
||||
let personalize = Personalize { user_context: Some("test user".to_string()) };
|
||||
|
||||
let search_result = SearchResult {
|
||||
hits: vec![SearchHit {
|
||||
document: serde_json::Map::new(),
|
||||
formatted: serde_json::Map::new(),
|
||||
matches_position: None,
|
||||
ranking_score: Some(1.0),
|
||||
ranking_score_details: None,
|
||||
}],
|
||||
query: "test".to_string(),
|
||||
processing_time_ms: 10,
|
||||
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
|
||||
facet_distribution: None,
|
||||
facet_stats: None,
|
||||
semantic_hit_count: None,
|
||||
degraded: false,
|
||||
used_negative_operator: false,
|
||||
};
|
||||
|
||||
let result =
|
||||
service.rerank_search_results(search_result.clone(), Some(&personalize), None).await;
|
||||
assert!(result.is_ok());
|
||||
|
||||
// Should attempt reranking with user context only
|
||||
let reranked_result = result.unwrap();
|
||||
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_personalization_service_with_query_only() {
|
||||
let service = PersonalizationService::cohere("fake_key".to_string());
|
||||
|
||||
let search_result = SearchResult {
|
||||
hits: vec![SearchHit {
|
||||
document: serde_json::Map::new(),
|
||||
formatted: serde_json::Map::new(),
|
||||
matches_position: None,
|
||||
ranking_score: Some(1.0),
|
||||
ranking_score_details: None,
|
||||
}],
|
||||
query: "test".to_string(),
|
||||
processing_time_ms: 10,
|
||||
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
|
||||
facet_distribution: None,
|
||||
facet_stats: None,
|
||||
semantic_hit_count: None,
|
||||
degraded: false,
|
||||
used_negative_operator: false,
|
||||
};
|
||||
|
||||
let result = service.rerank_search_results(search_result.clone(), None, Some("test")).await;
|
||||
assert!(result.is_ok());
|
||||
|
||||
// Should attempt reranking with query only
|
||||
let reranked_result = result.unwrap();
|
||||
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_personalization_service_both_none() {
|
||||
let service = PersonalizationService::cohere("fake_key".to_string());
|
||||
|
||||
let search_result = SearchResult {
|
||||
hits: vec![SearchHit {
|
||||
document: serde_json::Map::new(),
|
||||
formatted: serde_json::Map::new(),
|
||||
matches_position: None,
|
||||
ranking_score: Some(1.0),
|
||||
ranking_score_details: None,
|
||||
}],
|
||||
query: "test".to_string(),
|
||||
processing_time_ms: 10,
|
||||
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
|
||||
facet_distribution: None,
|
||||
facet_stats: None,
|
||||
semantic_hit_count: None,
|
||||
degraded: false,
|
||||
used_negative_operator: false,
|
||||
};
|
||||
|
||||
let result = service.rerank_search_results(search_result.clone(), None, None).await;
|
||||
assert!(result.is_ok());
|
||||
|
||||
// Should return original results when both query and user_context are None
|
||||
let reranked_result = result.unwrap();
|
||||
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
|
||||
}
|
||||
}
|
||||
@@ -343,6 +343,7 @@ impl From<FacetSearchQuery> for SearchQuery {
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,10 +22,10 @@ use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
||||
use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST};
|
||||
use crate::search::{
|
||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
||||
RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH,
|
||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, Personalize,
|
||||
RankingScoreThreshold, RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio,
|
||||
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
|
||||
@@ -132,6 +132,8 @@ pub struct SearchQueryGet {
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchLocales>)]
|
||||
#[param(value_type = Vec<Locale>, explode = false)]
|
||||
pub locales: Option<CS<Locale>>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchPersonalizeUserContext>)]
|
||||
pub personalize_user_context: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
||||
@@ -203,6 +205,10 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
|
||||
));
|
||||
}
|
||||
|
||||
let personalize = other
|
||||
.personalize_user_context
|
||||
.map(|user_context| Personalize { user_context: Some(user_context) });
|
||||
|
||||
Ok(Self {
|
||||
q: other.q,
|
||||
// `media` not supported for `GET`
|
||||
@@ -232,6 +238,7 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
|
||||
hybrid,
|
||||
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
|
||||
locales: other.locales.map(|o| o.into_iter().collect()),
|
||||
personalize,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -320,6 +327,7 @@ pub fn fix_sort_query_parameters(sort_query: &str) -> Vec<String> {
|
||||
pub async fn search_with_url_query(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
search_queue: web::Data<SearchQueue>,
|
||||
personalization_service: web::Data<crate::personalization::PersonalizationService>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>,
|
||||
req: HttpRequest,
|
||||
@@ -342,6 +350,11 @@ pub async fn search_with_url_query(
|
||||
let search_kind =
|
||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
||||
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
|
||||
|
||||
// Extract personalization and query string before moving query
|
||||
let personalize = query.personalize.clone();
|
||||
let query_str = query.q.clone();
|
||||
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(
|
||||
@@ -361,7 +374,12 @@ pub async fn search_with_url_query(
|
||||
}
|
||||
analytics.publish(aggregate, &req);
|
||||
|
||||
let search_result = search_result?;
|
||||
let mut search_result = search_result?;
|
||||
|
||||
// Apply personalization if requested
|
||||
search_result = personalization_service
|
||||
.rerank_search_results(search_result, personalize.as_ref(), query_str.as_deref())
|
||||
.await?;
|
||||
|
||||
debug!(returns = ?search_result, "Search get");
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
@@ -426,6 +444,7 @@ pub async fn search_with_url_query(
|
||||
pub async fn search_with_post(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
search_queue: web::Data<SearchQueue>,
|
||||
personalization_service: web::Data<crate::personalization::PersonalizationService>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebJson<SearchQuery, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
@@ -449,6 +468,10 @@ pub async fn search_with_post(
|
||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
|
||||
|
||||
// Extract personalization and query string before moving query
|
||||
let personalize = query.personalize.clone();
|
||||
let query_str = query.q.clone();
|
||||
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(
|
||||
@@ -471,7 +494,12 @@ pub async fn search_with_post(
|
||||
}
|
||||
analytics.publish(aggregate, &req);
|
||||
|
||||
let search_result = search_result?;
|
||||
let mut search_result = search_result?;
|
||||
|
||||
// Apply personalization if requested
|
||||
search_result = personalization_service
|
||||
.rerank_search_results(search_result, personalize.as_ref(), query_str.as_deref())
|
||||
.await?;
|
||||
|
||||
debug!(returns = ?search_result, "Search post");
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
|
||||
@@ -7,6 +7,7 @@ use serde_json::{json, Value};
|
||||
|
||||
use crate::aggregate_methods;
|
||||
use crate::analytics::{Aggregate, AggregateMethod};
|
||||
use crate::metrics::MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS;
|
||||
use crate::search::{
|
||||
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||
@@ -94,6 +95,9 @@ pub struct SearchAggregator<Method: AggregateMethod> {
|
||||
show_ranking_score_details: bool,
|
||||
ranking_score_threshold: bool,
|
||||
|
||||
// personalization
|
||||
total_personalized: usize,
|
||||
|
||||
marker: std::marker::PhantomData<Method>,
|
||||
}
|
||||
|
||||
@@ -128,6 +132,7 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
} = query;
|
||||
|
||||
let mut ret = Self::default();
|
||||
@@ -202,6 +207,12 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
||||
ret.locales = locales.iter().copied().collect();
|
||||
}
|
||||
|
||||
// personalization
|
||||
if personalize.is_some() {
|
||||
ret.total_personalized = 1;
|
||||
MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS.inc();
|
||||
}
|
||||
|
||||
ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
|
||||
ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
|
||||
ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
|
||||
@@ -290,6 +301,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
total_used_negative_operator,
|
||||
ranking_score_threshold,
|
||||
mut locales,
|
||||
total_personalized,
|
||||
marker: _,
|
||||
} = *new;
|
||||
|
||||
@@ -374,6 +386,9 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
// locales
|
||||
self.locales.append(&mut locales);
|
||||
|
||||
// personalization
|
||||
self.total_personalized = self.total_personalized.saturating_add(total_personalized);
|
||||
|
||||
self
|
||||
}
|
||||
|
||||
@@ -418,6 +433,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
total_used_negative_operator,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
total_personalized,
|
||||
marker: _,
|
||||
} = *self;
|
||||
|
||||
@@ -490,6 +506,9 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
"show_ranking_score_details": show_ranking_score_details,
|
||||
"ranking_score_threshold": ranking_score_threshold,
|
||||
},
|
||||
"personalization": {
|
||||
"total_personalized": total_personalized,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -67,6 +67,7 @@ impl MultiSearchAggregator {
|
||||
hybrid: _,
|
||||
ranking_score_threshold: _,
|
||||
locales: _,
|
||||
personalize: _,
|
||||
} in &federated_search.queries
|
||||
{
|
||||
if let Some(federation_options) = federation_options {
|
||||
|
||||
@@ -57,6 +57,13 @@ pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
|
||||
pub const DEFAULT_SEMANTIC_RATIO: fn() -> SemanticRatio = || SemanticRatio(0.5);
|
||||
|
||||
#[derive(Clone, Default, PartialEq, Deserr, ToSchema, Debug)]
|
||||
#[deserr(error = DeserrJsonError<InvalidSearchPersonalize>, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct Personalize {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalizeUserContext>)]
|
||||
pub user_context: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Default, PartialEq, Deserr, ToSchema)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct SearchQuery {
|
||||
@@ -120,6 +127,8 @@ pub struct SearchQuery {
|
||||
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>)]
|
||||
pub locales: Option<Vec<Locale>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalize>, default)]
|
||||
pub personalize: Option<Personalize>,
|
||||
}
|
||||
|
||||
impl From<SearchParameters> for SearchQuery {
|
||||
@@ -167,6 +176,7 @@ impl From<SearchParameters> for SearchQuery {
|
||||
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
|
||||
crop_marker: DEFAULT_CROP_MARKER(),
|
||||
locales: None,
|
||||
personalize: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -248,6 +258,7 @@ impl fmt::Debug for SearchQuery {
|
||||
attributes_to_search_on,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
} = self;
|
||||
|
||||
let mut debug = f.debug_struct("SearchQuery");
|
||||
@@ -336,6 +347,10 @@ impl fmt::Debug for SearchQuery {
|
||||
debug.field("locales", &locales);
|
||||
}
|
||||
|
||||
if let Some(personalize) = personalize {
|
||||
debug.field("personalize", &personalize);
|
||||
}
|
||||
|
||||
debug.finish()
|
||||
}
|
||||
}
|
||||
@@ -541,6 +556,9 @@ pub struct SearchQueryWithIndex {
|
||||
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
|
||||
pub locales: Option<Vec<Locale>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalize>, default)]
|
||||
#[serde(skip)]
|
||||
pub personalize: Option<Personalize>,
|
||||
|
||||
#[deserr(default)]
|
||||
pub federation_options: Option<FederationOptions>,
|
||||
@@ -598,6 +616,7 @@ impl SearchQueryWithIndex {
|
||||
attributes_to_search_on,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
} = query;
|
||||
|
||||
SearchQueryWithIndex {
|
||||
@@ -629,6 +648,7 @@ impl SearchQueryWithIndex {
|
||||
attributes_to_search_on,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
federation_options,
|
||||
}
|
||||
}
|
||||
@@ -664,6 +684,7 @@ impl SearchQueryWithIndex {
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
} = self;
|
||||
(
|
||||
index_uid,
|
||||
@@ -695,6 +716,7 @@ impl SearchQueryWithIndex {
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
// do not use ..Default::default() here,
|
||||
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
|
||||
},
|
||||
@@ -919,7 +941,7 @@ pub struct SearchResultWithIndex {
|
||||
pub result: SearchResult,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, ToSchema)]
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, ToSchema)]
|
||||
#[serde(untagged)]
|
||||
pub enum HitsInfo {
|
||||
#[serde(rename_all = "camelCase")]
|
||||
@@ -1166,6 +1188,7 @@ pub fn perform_search(
|
||||
attributes_to_search_on: _,
|
||||
filter: _,
|
||||
distinct: _,
|
||||
personalize: _,
|
||||
} = query;
|
||||
|
||||
let format = AttributesFormat {
|
||||
|
||||
Reference in New Issue
Block a user