mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-01 02:05:36 +00:00
Compare commits
17 Commits
add-ollama
...
aggregate_
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a1a79389fc | ||
|
|
3115af9baf | ||
|
|
da138deaf7 | ||
|
|
fafe432eb1 | ||
|
|
5e99f16859 | ||
|
|
fc0eb3901d | ||
|
|
4e740f4c5f | ||
|
|
efc3371b6f | ||
|
|
73085d6b03 | ||
|
|
0ee35ede86 | ||
|
|
16898c661e | ||
|
|
4a2a6dc529 | ||
|
|
63ddea8ae4 | ||
|
|
df749d424c | ||
|
|
0cfecf4e9a | ||
|
|
b8f4e2b3e4 | ||
|
|
daafbc88d6 |
@@ -236,10 +236,13 @@ InvalidSearchHighlightPreTag , InvalidRequest , BAD_REQUEST ;
|
|||||||
InvalidSearchHitsPerPage , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchHitsPerPage , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchLimit , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchLimit , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchMatchingStrategy , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchMatchingStrategy , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidMultiSearchMergeStrategy , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchOffset , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchOffset , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchPage , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchPage , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchQ , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchQ , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
||||||
|
|||||||
@@ -56,6 +56,10 @@ pub struct SearchQueryGet {
|
|||||||
sort: Option<String>,
|
sort: Option<String>,
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
|
||||||
show_matches_position: Param<bool>,
|
show_matches_position: Param<bool>,
|
||||||
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScore>)]
|
||||||
|
show_ranking_score: Param<bool>,
|
||||||
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScoreDetails>)]
|
||||||
|
show_ranking_score_details: Param<bool>,
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchFacets>)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchFacets>)]
|
||||||
facets: Option<CS<String>>,
|
facets: Option<CS<String>>,
|
||||||
#[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError<InvalidSearchHighlightPreTag>)]
|
#[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError<InvalidSearchHighlightPreTag>)]
|
||||||
@@ -91,6 +95,8 @@ impl From<SearchQueryGet> for SearchQuery {
|
|||||||
filter,
|
filter,
|
||||||
sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
|
sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
|
||||||
show_matches_position: other.show_matches_position.0,
|
show_matches_position: other.show_matches_position.0,
|
||||||
|
show_ranking_score: other.show_ranking_score.0,
|
||||||
|
show_ranking_score_details: other.show_ranking_score_details.0,
|
||||||
facets: other.facets.map(|o| o.into_iter().collect()),
|
facets: other.facets.map(|o| o.into_iter().collect()),
|
||||||
highlight_pre_tag: other.highlight_pre_tag,
|
highlight_pre_tag: other.highlight_pre_tag,
|
||||||
highlight_post_tag: other.highlight_post_tag,
|
highlight_post_tag: other.highlight_post_tag,
|
||||||
|
|||||||
@@ -1,20 +1,26 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use actix_http::StatusCode;
|
use actix_http::StatusCode;
|
||||||
use actix_web::web::{self, Data};
|
use actix_web::web::{self, Data};
|
||||||
use actix_web::{HttpRequest, HttpResponse};
|
use actix_web::{HttpRequest, HttpResponse};
|
||||||
use deserr::actix_web::AwebJson;
|
use deserr::actix_web::AwebJson;
|
||||||
|
use deserr::Deserr;
|
||||||
use index_scheduler::IndexScheduler;
|
use index_scheduler::IndexScheduler;
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use meilisearch_types::deserr::DeserrJsonError;
|
use meilisearch_types::deserr::DeserrJsonError;
|
||||||
|
use meilisearch_types::error::deserr_codes::InvalidMultiSearchMergeStrategy;
|
||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
use meilisearch_types::keys::actions;
|
use meilisearch_types::keys::actions;
|
||||||
|
use meilisearch_types::milli::score_details::NotComparable;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
|
||||||
use crate::analytics::{Analytics, MultiSearchAggregator};
|
use crate::analytics::{Analytics, MultiSearchAggregator};
|
||||||
use crate::extractors::authentication::policies::ActionPolicy;
|
use crate::extractors::authentication::policies::ActionPolicy;
|
||||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||||
use crate::extractors::sequential_extractor::SeqHandler;
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
|
use crate::milli::score_details::ScoreDetails;
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
add_search_rules, perform_search, SearchQueryWithIndex, SearchResultWithIndex,
|
add_search_rules, perform_search, SearchHit, SearchQueryWithIndex, SearchResultWithIndex,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||||
@@ -23,13 +29,34 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
|||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
struct SearchResults {
|
struct SearchResults {
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
aggregate_hits: Option<Vec<SearchHitWithIndex>>,
|
||||||
results: Vec<SearchResultWithIndex>,
|
results: Vec<SearchResultWithIndex>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug, Clone, PartialEq)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
struct SearchHitWithIndex {
|
||||||
|
pub index_uid: String,
|
||||||
|
#[serde(flatten)]
|
||||||
|
pub hit: SearchHit,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, deserr::Deserr)]
|
#[derive(Debug, deserr::Deserr)]
|
||||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||||
pub struct SearchQueries {
|
pub struct SearchQueries {
|
||||||
queries: Vec<SearchQueryWithIndex>,
|
queries: Vec<SearchQueryWithIndex>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchMergeStrategy>, default)]
|
||||||
|
merge_strategy: MergeStrategy,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Deserr, Default)]
|
||||||
|
#[deserr(rename_all = camelCase)]
|
||||||
|
pub enum MergeStrategy {
|
||||||
|
#[default]
|
||||||
|
None,
|
||||||
|
ByNormalizedScore,
|
||||||
|
ByScoreDetails,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn multi_search_with_post(
|
pub async fn multi_search_with_post(
|
||||||
@@ -38,7 +65,13 @@ pub async fn multi_search_with_post(
|
|||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
analytics: web::Data<dyn Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let queries = params.into_inner().queries;
|
let SearchQueries { queries, merge_strategy } = params.into_inner();
|
||||||
|
// FIXME: REMOVE UNWRAP
|
||||||
|
let max_hits = queries
|
||||||
|
.iter()
|
||||||
|
.map(|SearchQueryWithIndex { limit, hits_per_page, .. }| hits_per_page.unwrap_or(*limit))
|
||||||
|
.max()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
|
let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
|
||||||
|
|
||||||
@@ -104,7 +137,117 @@ pub async fn multi_search_with_post(
|
|||||||
|
|
||||||
debug!("returns: {:?}", search_results);
|
debug!("returns: {:?}", search_results);
|
||||||
|
|
||||||
Ok(HttpResponse::Ok().json(SearchResults { results: search_results }))
|
let aggregate_hits = match merge_strategy {
|
||||||
|
MergeStrategy::None => None,
|
||||||
|
MergeStrategy::ByScoreDetails => Some(merge_by_score_details(&search_results, max_hits)),
|
||||||
|
MergeStrategy::ByNormalizedScore => {
|
||||||
|
Some(merge_by_normalized_score(&search_results, max_hits))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(HttpResponse::Ok().json(SearchResults { aggregate_hits, results: search_results }))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn merge_by_score_details(
|
||||||
|
search_results: &[SearchResultWithIndex],
|
||||||
|
max_hits: usize,
|
||||||
|
) -> Vec<SearchHitWithIndex> {
|
||||||
|
let mut iterators: Vec<_> = search_results
|
||||||
|
.iter()
|
||||||
|
.filter_map(|SearchResultWithIndex { index_uid, result }| {
|
||||||
|
let mut it = result.hits.iter();
|
||||||
|
let next = it.next()?;
|
||||||
|
Some((index_uid, it, next))
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut hits = Vec::with_capacity(max_hits);
|
||||||
|
|
||||||
|
let mut inconsistent_indexes = HashMap::new();
|
||||||
|
|
||||||
|
for _ in 0..max_hits {
|
||||||
|
iterators.sort_by(|(left_uid, _, left_hit), (right_uid, _, right_hit)| {
|
||||||
|
let error = match ScoreDetails::partial_cmp_iter(
|
||||||
|
left_hit.ranking_score_raw.iter(),
|
||||||
|
right_hit.ranking_score_raw.iter(),
|
||||||
|
) {
|
||||||
|
Ok(ord) => return ord,
|
||||||
|
Err(NotComparable(incomparable_index)) => incomparable_index,
|
||||||
|
};
|
||||||
|
inconsistent_indexes.entry((left_uid.to_owned(), right_uid.to_owned())).or_insert_with(
|
||||||
|
|| {
|
||||||
|
format!(
|
||||||
|
"Detailed score {:?} is not comparable with {:?}: (left: {:#?}, right: {:#?})",
|
||||||
|
left_hit.ranking_score_raw.get(error),
|
||||||
|
right_hit.ranking_score_raw.get(error),
|
||||||
|
left_hit.ranking_score_raw,
|
||||||
|
right_hit.ranking_score_raw
|
||||||
|
)
|
||||||
|
},
|
||||||
|
);
|
||||||
|
std::cmp::Ordering::Less
|
||||||
|
});
|
||||||
|
if !inconsistent_indexes.is_empty() {
|
||||||
|
let mut s = String::new();
|
||||||
|
for ((left_uid, right_uid), error) in &inconsistent_indexes {
|
||||||
|
use std::fmt::Write;
|
||||||
|
writeln!(s, "Indexes {} and {} are inconsistent: {}", left_uid, right_uid, error)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
// Replace panic with proper error
|
||||||
|
panic!("{}", s);
|
||||||
|
}
|
||||||
|
|
||||||
|
let Some((index_uid, it, next)) = iterators.last_mut()
|
||||||
|
else {
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
|
||||||
|
let hit = SearchHitWithIndex { index_uid: index_uid.clone(), hit: next.clone() };
|
||||||
|
if let Some(next_hit) = it.next() {
|
||||||
|
*next = next_hit;
|
||||||
|
} else {
|
||||||
|
iterators.pop();
|
||||||
|
}
|
||||||
|
hits.push(hit);
|
||||||
|
}
|
||||||
|
hits
|
||||||
|
}
|
||||||
|
|
||||||
|
fn merge_by_normalized_score(
|
||||||
|
search_results: &[SearchResultWithIndex],
|
||||||
|
max_hits: usize,
|
||||||
|
) -> Vec<SearchHitWithIndex> {
|
||||||
|
let mut iterators: Vec<_> = search_results
|
||||||
|
.iter()
|
||||||
|
.filter_map(|SearchResultWithIndex { index_uid, result }| {
|
||||||
|
let mut it = result.hits.iter();
|
||||||
|
let next = it.next()?;
|
||||||
|
Some((index_uid, it, next))
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut hits = Vec::with_capacity(max_hits);
|
||||||
|
|
||||||
|
for _ in 0..max_hits {
|
||||||
|
iterators.sort_by_key(|(_, _, hit)| {
|
||||||
|
ScoreDetails::global_score_linear_scale(hit.ranking_score_raw.iter())
|
||||||
|
});
|
||||||
|
|
||||||
|
let Some((index_uid, it, next)) = iterators.last_mut()
|
||||||
|
else {
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
|
||||||
|
let hit = SearchHitWithIndex { index_uid: index_uid.clone(), hit: next.clone() };
|
||||||
|
if let Some(next_hit) = it.next() {
|
||||||
|
*next = next_hit;
|
||||||
|
} else {
|
||||||
|
iterators.pop();
|
||||||
|
}
|
||||||
|
hits.push(hit);
|
||||||
|
}
|
||||||
|
hits
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Local `Result` extension trait to avoid `map_err` boilerplate.
|
/// Local `Result` extension trait to avoid `map_err` boilerplate.
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ use meilisearch_auth::IndexSearchRules;
|
|||||||
use meilisearch_types::deserr::DeserrJsonError;
|
use meilisearch_types::deserr::DeserrJsonError;
|
||||||
use meilisearch_types::error::deserr_codes::*;
|
use meilisearch_types::error::deserr_codes::*;
|
||||||
use meilisearch_types::index_uid::IndexUid;
|
use meilisearch_types::index_uid::IndexUid;
|
||||||
|
use meilisearch_types::milli::score_details::ScoreDetails;
|
||||||
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||||
use meilisearch_types::{milli, Document};
|
use meilisearch_types::{milli, Document};
|
||||||
use milli::tokenizer::TokenizerBuilder;
|
use milli::tokenizer::TokenizerBuilder;
|
||||||
@@ -54,6 +55,10 @@ pub struct SearchQuery {
|
|||||||
pub attributes_to_highlight: Option<HashSet<String>>,
|
pub attributes_to_highlight: Option<HashSet<String>>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
|
||||||
pub show_matches_position: bool,
|
pub show_matches_position: bool,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchShowRankingScore>, default)]
|
||||||
|
pub show_ranking_score: bool,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchShowRankingScoreDetails>, default)]
|
||||||
|
pub show_ranking_score_details: bool,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
|
||||||
pub filter: Option<Value>,
|
pub filter: Option<Value>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
|
||||||
@@ -103,6 +108,10 @@ pub struct SearchQueryWithIndex {
|
|||||||
pub crop_length: usize,
|
pub crop_length: usize,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToHighlight>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToHighlight>)]
|
||||||
pub attributes_to_highlight: Option<HashSet<String>>,
|
pub attributes_to_highlight: Option<HashSet<String>>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchShowRankingScore>, default)]
|
||||||
|
pub show_ranking_score: bool,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSearchShowRankingScoreDetails>, default)]
|
||||||
|
pub show_ranking_score_details: bool,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
|
||||||
pub show_matches_position: bool,
|
pub show_matches_position: bool,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
|
||||||
@@ -134,6 +143,8 @@ impl SearchQueryWithIndex {
|
|||||||
attributes_to_crop,
|
attributes_to_crop,
|
||||||
crop_length,
|
crop_length,
|
||||||
attributes_to_highlight,
|
attributes_to_highlight,
|
||||||
|
show_ranking_score,
|
||||||
|
show_ranking_score_details,
|
||||||
show_matches_position,
|
show_matches_position,
|
||||||
filter,
|
filter,
|
||||||
sort,
|
sort,
|
||||||
@@ -155,6 +166,8 @@ impl SearchQueryWithIndex {
|
|||||||
attributes_to_crop,
|
attributes_to_crop,
|
||||||
crop_length,
|
crop_length,
|
||||||
attributes_to_highlight,
|
attributes_to_highlight,
|
||||||
|
show_ranking_score,
|
||||||
|
show_ranking_score_details,
|
||||||
show_matches_position,
|
show_matches_position,
|
||||||
filter,
|
filter,
|
||||||
sort,
|
sort,
|
||||||
@@ -194,7 +207,7 @@ impl From<MatchingStrategy> for TermsMatchingStrategy {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
|
#[derive(Debug, Clone, Serialize, PartialEq)]
|
||||||
pub struct SearchHit {
|
pub struct SearchHit {
|
||||||
#[serde(flatten)]
|
#[serde(flatten)]
|
||||||
pub document: Document,
|
pub document: Document,
|
||||||
@@ -202,6 +215,12 @@ pub struct SearchHit {
|
|||||||
pub formatted: Document,
|
pub formatted: Document,
|
||||||
#[serde(rename = "_matchesPosition", skip_serializing_if = "Option::is_none")]
|
#[serde(rename = "_matchesPosition", skip_serializing_if = "Option::is_none")]
|
||||||
pub matches_position: Option<MatchesPosition>,
|
pub matches_position: Option<MatchesPosition>,
|
||||||
|
#[serde(rename = "_rankingScore", skip_serializing_if = "Option::is_none")]
|
||||||
|
pub ranking_score: Option<u64>,
|
||||||
|
#[serde(rename = "_rankingScoreDetails", skip_serializing_if = "Option::is_none")]
|
||||||
|
pub ranking_score_details: Option<serde_json::Map<String, serde_json::Value>>,
|
||||||
|
#[serde(skip)]
|
||||||
|
pub ranking_score_raw: Vec<ScoreDetails>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Debug, Clone, PartialEq)]
|
#[derive(Serialize, Debug, Clone, PartialEq)]
|
||||||
@@ -320,7 +339,8 @@ pub fn perform_search(
|
|||||||
search.sort_criteria(sort);
|
search.sort_criteria(sort);
|
||||||
}
|
}
|
||||||
|
|
||||||
let milli::SearchResult { documents_ids, matching_words, candidates, .. } = search.execute()?;
|
let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } =
|
||||||
|
search.execute()?;
|
||||||
|
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
|
|
||||||
@@ -392,7 +412,7 @@ pub fn perform_search(
|
|||||||
|
|
||||||
let documents_iter = index.documents(&rtxn, documents_ids)?;
|
let documents_iter = index.documents(&rtxn, documents_ids)?;
|
||||||
|
|
||||||
for (_id, obkv) in documents_iter {
|
for ((_id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
|
||||||
// First generate a document with all the displayed fields
|
// First generate a document with all the displayed fields
|
||||||
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
|
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
|
||||||
|
|
||||||
@@ -416,7 +436,19 @@ pub fn perform_search(
|
|||||||
insert_geo_distance(sort, &mut document);
|
insert_geo_distance(sort, &mut document);
|
||||||
}
|
}
|
||||||
|
|
||||||
let hit = SearchHit { document, formatted, matches_position };
|
let ranking_score =
|
||||||
|
query.show_ranking_score.then(|| ScoreDetails::global_score_linear_scale(score.iter()));
|
||||||
|
let ranking_score_details =
|
||||||
|
query.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter()));
|
||||||
|
|
||||||
|
let hit = SearchHit {
|
||||||
|
document,
|
||||||
|
formatted,
|
||||||
|
matches_position,
|
||||||
|
ranking_score_details,
|
||||||
|
ranking_score,
|
||||||
|
ranking_score_raw: score,
|
||||||
|
};
|
||||||
documents.push(hit);
|
documents.push(hit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
use insta::{allow_duplicates, assert_json_snapshot};
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
@@ -18,30 +19,45 @@ async fn formatted_contain_wildcard() {
|
|||||||
|response, code|
|
|response, code|
|
||||||
{
|
{
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
allow_duplicates! {
|
||||||
response["hits"][0],
|
assert_json_snapshot!(response["hits"][0],
|
||||||
json!({
|
{ "._rankingScore" => "[score]" },
|
||||||
"_formatted": {
|
@r###"
|
||||||
"id": "852",
|
{
|
||||||
"cattos": "<em>pésti</em>",
|
"_formatted": {
|
||||||
},
|
"id": "852",
|
||||||
"_matchesPosition": {"cattos": [{"start": 0, "length": 5}]},
|
"cattos": "<em>pésti</em>"
|
||||||
})
|
},
|
||||||
);
|
"_matchesPosition": {
|
||||||
}
|
"cattos": [
|
||||||
|
{
|
||||||
|
"start": 0,
|
||||||
|
"length": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
}
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.search(json!({ "q": "pésti", "attributesToRetrieve": ["*"] }), |response, code| {
|
.search(json!({ "q": "pésti", "attributesToRetrieve": ["*"] }), |response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
allow_duplicates! {
|
||||||
response["hits"][0],
|
assert_json_snapshot!(response["hits"][0],
|
||||||
json!({
|
{ "._rankingScore" => "[score]" },
|
||||||
"id": 852,
|
@r###"
|
||||||
"cattos": "pésti",
|
{
|
||||||
})
|
"id": 852,
|
||||||
);
|
"cattos": "pésti",
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@@ -50,20 +66,30 @@ async fn formatted_contain_wildcard() {
|
|||||||
json!({ "q": "pésti", "attributesToRetrieve": ["*"], "attributesToHighlight": ["id"], "showMatchesPosition": true }),
|
json!({ "q": "pésti", "attributesToRetrieve": ["*"], "attributesToHighlight": ["id"], "showMatchesPosition": true }),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
allow_duplicates! {
|
||||||
response["hits"][0],
|
assert_json_snapshot!(response["hits"][0],
|
||||||
json!({
|
{ "._rankingScore" => "[score]" },
|
||||||
"id": 852,
|
@r###"
|
||||||
"cattos": "pésti",
|
{
|
||||||
"_formatted": {
|
"id": 852,
|
||||||
"id": "852",
|
"cattos": "pésti",
|
||||||
"cattos": "pésti",
|
"_formatted": {
|
||||||
},
|
"id": "852",
|
||||||
"_matchesPosition": {"cattos": [{"start": 0, "length": 5}]},
|
"cattos": "pésti"
|
||||||
})
|
},
|
||||||
);
|
"_matchesPosition": {
|
||||||
}
|
"cattos": [
|
||||||
)
|
{
|
||||||
|
"start": 0,
|
||||||
|
"length": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###)
|
||||||
|
}
|
||||||
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
index
|
index
|
||||||
@@ -71,17 +97,21 @@ async fn formatted_contain_wildcard() {
|
|||||||
json!({ "q": "pésti", "attributesToRetrieve": ["*"], "attributesToCrop": ["*"] }),
|
json!({ "q": "pésti", "attributesToRetrieve": ["*"], "attributesToCrop": ["*"] }),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
allow_duplicates! {
|
||||||
response["hits"][0],
|
assert_json_snapshot!(response["hits"][0],
|
||||||
json!({
|
{ "._rankingScore" => "[score]" },
|
||||||
"id": 852,
|
@r###"
|
||||||
"cattos": "pésti",
|
{
|
||||||
"_formatted": {
|
"id": 852,
|
||||||
"id": "852",
|
"cattos": "pésti",
|
||||||
"cattos": "pésti",
|
"_formatted": {
|
||||||
}
|
"id": "852",
|
||||||
})
|
"cattos": "pésti"
|
||||||
);
|
},
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
@@ -89,17 +119,21 @@ async fn formatted_contain_wildcard() {
|
|||||||
index
|
index
|
||||||
.search(json!({ "q": "pésti", "attributesToCrop": ["*"] }), |response, code| {
|
.search(json!({ "q": "pésti", "attributesToCrop": ["*"] }), |response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
allow_duplicates! {
|
||||||
response["hits"][0],
|
assert_json_snapshot!(response["hits"][0],
|
||||||
json!({
|
{ "._rankingScore" => "[score]" },
|
||||||
"id": 852,
|
@r###"
|
||||||
"cattos": "pésti",
|
{
|
||||||
"_formatted": {
|
"id": 852,
|
||||||
"id": "852",
|
"cattos": "pésti",
|
||||||
"cattos": "pésti",
|
"_formatted": {
|
||||||
}
|
"id": "852",
|
||||||
})
|
"cattos": "pésti"
|
||||||
);
|
},
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
@@ -116,21 +150,25 @@ async fn format_nested() {
|
|||||||
index
|
index
|
||||||
.search(json!({ "q": "pésti", "attributesToRetrieve": ["doggos"] }), |response, code| {
|
.search(json!({ "q": "pésti", "attributesToRetrieve": ["doggos"] }), |response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
allow_duplicates! {
|
||||||
response["hits"][0],
|
assert_json_snapshot!(response["hits"][0],
|
||||||
json!({
|
{ "._rankingScore" => "[score]" },
|
||||||
"doggos": [
|
@r###"
|
||||||
{
|
{
|
||||||
"name": "bobby",
|
"doggos": [
|
||||||
"age": 2,
|
{
|
||||||
},
|
"name": "bobby",
|
||||||
{
|
"age": 2
|
||||||
"name": "buddy",
|
},
|
||||||
"age": 4,
|
{
|
||||||
},
|
"name": "buddy",
|
||||||
],
|
"age": 4
|
||||||
})
|
}
|
||||||
);
|
],
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@@ -139,19 +177,23 @@ async fn format_nested() {
|
|||||||
json!({ "q": "pésti", "attributesToRetrieve": ["doggos.name"] }),
|
json!({ "q": "pésti", "attributesToRetrieve": ["doggos.name"] }),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
allow_duplicates! {
|
||||||
response["hits"][0],
|
assert_json_snapshot!(response["hits"][0],
|
||||||
json!({
|
{ "._rankingScore" => "[score]" },
|
||||||
"doggos": [
|
@r###"
|
||||||
{
|
{
|
||||||
"name": "bobby",
|
"doggos": [
|
||||||
},
|
{
|
||||||
{
|
"name": "bobby"
|
||||||
"name": "buddy",
|
},
|
||||||
},
|
{
|
||||||
],
|
"name": "buddy"
|
||||||
})
|
}
|
||||||
);
|
],
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###)
|
||||||
|
}
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
@@ -161,20 +203,31 @@ async fn format_nested() {
|
|||||||
json!({ "q": "bobby", "attributesToRetrieve": ["doggos.name"], "showMatchesPosition": true }),
|
json!({ "q": "bobby", "attributesToRetrieve": ["doggos.name"], "showMatchesPosition": true }),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
allow_duplicates! {
|
||||||
response["hits"][0],
|
assert_json_snapshot!(response["hits"][0],
|
||||||
json!({
|
{ "._rankingScore" => "[score]" },
|
||||||
"doggos": [
|
@r###"
|
||||||
{
|
{
|
||||||
"name": "bobby",
|
"doggos": [
|
||||||
},
|
{
|
||||||
{
|
"name": "bobby"
|
||||||
"name": "buddy",
|
},
|
||||||
},
|
{
|
||||||
],
|
"name": "buddy"
|
||||||
"_matchesPosition": {"doggos.name": [{"start": 0, "length": 5}]},
|
}
|
||||||
})
|
],
|
||||||
);
|
"_matchesPosition": {
|
||||||
|
"doggos.name": [
|
||||||
|
{
|
||||||
|
"start": 0,
|
||||||
|
"length": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
@@ -183,21 +236,25 @@ async fn format_nested() {
|
|||||||
.search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.name"] }),
|
.search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.name"] }),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
allow_duplicates! {
|
||||||
response["hits"][0],
|
assert_json_snapshot!(response["hits"][0],
|
||||||
json!({
|
{ "._rankingScore" => "[score]" },
|
||||||
"_formatted": {
|
@r###"
|
||||||
"doggos": [
|
{
|
||||||
{
|
"_formatted": {
|
||||||
"name": "bobby",
|
"doggos": [
|
||||||
},
|
{
|
||||||
{
|
"name": "bobby"
|
||||||
"name": "buddy",
|
},
|
||||||
},
|
{
|
||||||
],
|
"name": "buddy"
|
||||||
},
|
}
|
||||||
})
|
]
|
||||||
);
|
},
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@@ -205,21 +262,25 @@ async fn format_nested() {
|
|||||||
.search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToCrop": ["doggos.name"] }),
|
.search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToCrop": ["doggos.name"] }),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
allow_duplicates! {
|
||||||
response["hits"][0],
|
assert_json_snapshot!(response["hits"][0],
|
||||||
json!({
|
{ "._rankingScore" => "[score]" },
|
||||||
"_formatted": {
|
@r###"
|
||||||
"doggos": [
|
{
|
||||||
{
|
"_formatted": {
|
||||||
"name": "bobby",
|
"doggos": [
|
||||||
},
|
{
|
||||||
{
|
"name": "bobby"
|
||||||
"name": "buddy",
|
},
|
||||||
},
|
{
|
||||||
],
|
"name": "buddy"
|
||||||
},
|
}
|
||||||
})
|
]
|
||||||
);
|
},
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@@ -227,55 +288,63 @@ async fn format_nested() {
|
|||||||
.search(json!({ "q": "pésti", "attributesToRetrieve": ["doggos.name"], "attributesToHighlight": ["doggos.age"] }),
|
.search(json!({ "q": "pésti", "attributesToRetrieve": ["doggos.name"], "attributesToHighlight": ["doggos.age"] }),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
allow_duplicates! {
|
||||||
response["hits"][0],
|
assert_json_snapshot!(response["hits"][0],
|
||||||
json!({
|
{ "._rankingScore" => "[score]" },
|
||||||
"doggos": [
|
@r###"
|
||||||
{
|
{
|
||||||
"name": "bobby",
|
"doggos": [
|
||||||
},
|
{
|
||||||
{
|
"name": "bobby"
|
||||||
"name": "buddy",
|
|
||||||
},
|
|
||||||
],
|
|
||||||
"_formatted": {
|
|
||||||
"doggos": [
|
|
||||||
{
|
|
||||||
"name": "bobby",
|
|
||||||
"age": "2",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "buddy",
|
|
||||||
"age": "4",
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
},
|
||||||
})
|
{
|
||||||
);
|
"name": "buddy"
|
||||||
})
|
}
|
||||||
|
],
|
||||||
|
"_formatted": {
|
||||||
|
"doggos": [
|
||||||
|
{
|
||||||
|
"name": "bobby",
|
||||||
|
"age": "2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "buddy",
|
||||||
|
"age": "4"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###)
|
||||||
|
}
|
||||||
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.age"], "attributesToCrop": ["doggos.name"] }),
|
.search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.age"], "attributesToCrop": ["doggos.name"] }),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
allow_duplicates! {
|
||||||
response["hits"][0],
|
assert_json_snapshot!(response["hits"][0],
|
||||||
json!({
|
{ "._rankingScore" => "[score]" },
|
||||||
"_formatted": {
|
@r###"
|
||||||
"doggos": [
|
|
||||||
{
|
{
|
||||||
"name": "bobby",
|
"_formatted": {
|
||||||
"age": "2",
|
"doggos": [
|
||||||
},
|
{
|
||||||
{
|
"name": "bobby",
|
||||||
"name": "buddy",
|
"age": "2"
|
||||||
"age": "4",
|
},
|
||||||
},
|
{
|
||||||
],
|
"name": "buddy",
|
||||||
},
|
"age": "4"
|
||||||
})
|
}
|
||||||
);
|
]
|
||||||
|
},
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
@@ -297,54 +366,70 @@ async fn displayedattr_2_smol() {
|
|||||||
.search(json!({ "attributesToRetrieve": ["father", "id"], "attributesToHighlight": ["mother"], "attributesToCrop": ["cattos"] }),
|
.search(json!({ "attributesToRetrieve": ["father", "id"], "attributesToHighlight": ["mother"], "attributesToCrop": ["cattos"] }),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
allow_duplicates! {
|
||||||
response["hits"][0],
|
assert_json_snapshot!(response["hits"][0],
|
||||||
json!({
|
{ "._rankingScore" => "[score]" },
|
||||||
"id": 852,
|
@r###"
|
||||||
})
|
{
|
||||||
);
|
"id": 852,
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.search(json!({ "attributesToRetrieve": ["id"] }), |response, code| {
|
.search(json!({ "attributesToRetrieve": ["id"] }), |response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
allow_duplicates! {
|
||||||
response["hits"][0],
|
assert_json_snapshot!(response["hits"][0],
|
||||||
json!({
|
{ "._rankingScore" => "[score]" },
|
||||||
"id": 852,
|
@r###"
|
||||||
})
|
{
|
||||||
);
|
"id": 852,
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.search(json!({ "attributesToHighlight": ["id"] }), |response, code| {
|
.search(json!({ "attributesToHighlight": ["id"] }), |response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
allow_duplicates! {
|
||||||
response["hits"][0],
|
assert_json_snapshot!(response["hits"][0],
|
||||||
json!({
|
{ "._rankingScore" => "[score]" },
|
||||||
"id": 852,
|
@r###"
|
||||||
"_formatted": {
|
{
|
||||||
"id": "852",
|
"id": 852,
|
||||||
}
|
"_formatted": {
|
||||||
})
|
"id": "852"
|
||||||
);
|
},
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.search(json!({ "attributesToCrop": ["id"] }), |response, code| {
|
.search(json!({ "attributesToCrop": ["id"] }), |response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
allow_duplicates! {
|
||||||
response["hits"][0],
|
assert_json_snapshot!(response["hits"][0],
|
||||||
json!({
|
{ "._rankingScore" => "[score]" },
|
||||||
"id": 852,
|
@r###"
|
||||||
"_formatted": {
|
{
|
||||||
"id": "852",
|
"id": 852,
|
||||||
}
|
"_formatted": {
|
||||||
})
|
"id": "852"
|
||||||
);
|
},
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@@ -353,15 +438,19 @@ async fn displayedattr_2_smol() {
|
|||||||
json!({ "attributesToHighlight": ["id"], "attributesToCrop": ["id"] }),
|
json!({ "attributesToHighlight": ["id"], "attributesToCrop": ["id"] }),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
allow_duplicates! {
|
||||||
response["hits"][0],
|
assert_json_snapshot!(response["hits"][0],
|
||||||
json!({
|
{ "._rankingScore" => "[score]" },
|
||||||
"id": 852,
|
@r###"
|
||||||
"_formatted": {
|
{
|
||||||
"id": "852",
|
"id": 852,
|
||||||
}
|
"_formatted": {
|
||||||
})
|
"id": "852"
|
||||||
);
|
},
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###)
|
||||||
|
}
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
@@ -369,31 +458,47 @@ async fn displayedattr_2_smol() {
|
|||||||
index
|
index
|
||||||
.search(json!({ "attributesToHighlight": ["cattos"] }), |response, code| {
|
.search(json!({ "attributesToHighlight": ["cattos"] }), |response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
allow_duplicates! {
|
||||||
response["hits"][0],
|
assert_json_snapshot!(response["hits"][0],
|
||||||
json!({
|
{ "._rankingScore" => "[score]" },
|
||||||
"id": 852,
|
@r###"
|
||||||
})
|
{
|
||||||
);
|
"id": 852,
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.search(json!({ "attributesToCrop": ["cattos"] }), |response, code| {
|
.search(json!({ "attributesToCrop": ["cattos"] }), |response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
allow_duplicates! {
|
||||||
response["hits"][0],
|
assert_json_snapshot!(response["hits"][0],
|
||||||
json!({
|
{ "._rankingScore" => "[score]" },
|
||||||
"id": 852,
|
@r###"
|
||||||
})
|
{
|
||||||
);
|
"id": 852,
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.search(json!({ "attributesToRetrieve": ["cattos"] }), |response, code| {
|
.search(json!({ "attributesToRetrieve": ["cattos"] }), |response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(response["hits"][0], json!({}));
|
allow_duplicates! {
|
||||||
|
assert_json_snapshot!(response["hits"][0],
|
||||||
|
{ "._rankingScore" => "[score]" },
|
||||||
|
@r###"
|
||||||
|
{
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@@ -402,7 +507,15 @@ async fn displayedattr_2_smol() {
|
|||||||
json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["cattos"], "attributesToCrop": ["cattos"] }),
|
json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["cattos"], "attributesToCrop": ["cattos"] }),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(response["hits"][0], json!({}));
|
allow_duplicates! {
|
||||||
|
assert_json_snapshot!(response["hits"][0],
|
||||||
|
{ "._rankingScore" => "[score]" },
|
||||||
|
@r###"
|
||||||
|
{
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###)
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@@ -413,14 +526,18 @@ async fn displayedattr_2_smol() {
|
|||||||
json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["id"] }),
|
json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["id"] }),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
allow_duplicates! {
|
||||||
response["hits"][0],
|
assert_json_snapshot!(response["hits"][0],
|
||||||
json!({
|
{ "._rankingScore" => "[score]" },
|
||||||
"_formatted": {
|
@r###"
|
||||||
"id": "852",
|
{
|
||||||
}
|
"_formatted": {
|
||||||
})
|
"id": "852"
|
||||||
);
|
},
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###)
|
||||||
|
}
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
@@ -430,14 +547,18 @@ async fn displayedattr_2_smol() {
|
|||||||
json!({ "attributesToRetrieve": ["cattos"], "attributesToCrop": ["id"] }),
|
json!({ "attributesToRetrieve": ["cattos"], "attributesToCrop": ["id"] }),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
assert_eq!(
|
allow_duplicates! {
|
||||||
response["hits"][0],
|
assert_json_snapshot!(response["hits"][0],
|
||||||
json!({
|
{ "._rankingScore" => "[score]" },
|
||||||
"_formatted": {
|
@r###"
|
||||||
"id": "852",
|
{
|
||||||
}
|
"_formatted": {
|
||||||
})
|
"id": "852"
|
||||||
);
|
},
|
||||||
|
"_rankingScore": "[score]"
|
||||||
|
}
|
||||||
|
"###)
|
||||||
|
}
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|||||||
@@ -65,14 +65,15 @@ async fn simple_search_single_index() {
|
|||||||
]}))
|
]}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]" }, @r###"
|
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###"
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"indexUid": "test",
|
"indexUid": "test",
|
||||||
"hits": [
|
"hits": [
|
||||||
{
|
{
|
||||||
"title": "Gläss",
|
"title": "Gläss",
|
||||||
"id": "450465"
|
"id": "450465",
|
||||||
|
"_rankingScore": "[score]"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"query": "glass",
|
"query": "glass",
|
||||||
@@ -86,7 +87,8 @@ async fn simple_search_single_index() {
|
|||||||
"hits": [
|
"hits": [
|
||||||
{
|
{
|
||||||
"title": "Captain Marvel",
|
"title": "Captain Marvel",
|
||||||
"id": "299537"
|
"id": "299537",
|
||||||
|
"_rankingScore": "[score]"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"query": "captain",
|
"query": "captain",
|
||||||
@@ -170,14 +172,15 @@ async fn simple_search_two_indexes() {
|
|||||||
]}))
|
]}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]" }, @r###"
|
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###"
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"indexUid": "test",
|
"indexUid": "test",
|
||||||
"hits": [
|
"hits": [
|
||||||
{
|
{
|
||||||
"title": "Gläss",
|
"title": "Gläss",
|
||||||
"id": "450465"
|
"id": "450465",
|
||||||
|
"_rankingScore": "[score]"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"query": "glass",
|
"query": "glass",
|
||||||
@@ -203,7 +206,8 @@ async fn simple_search_two_indexes() {
|
|||||||
"age": 4
|
"age": 4
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"cattos": "pésti"
|
"cattos": "pésti",
|
||||||
|
"_rankingScore": "[score]"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 654,
|
"id": 654,
|
||||||
@@ -218,7 +222,8 @@ async fn simple_search_two_indexes() {
|
|||||||
"cattos": [
|
"cattos": [
|
||||||
"simba",
|
"simba",
|
||||||
"pestiféré"
|
"pestiféré"
|
||||||
]
|
],
|
||||||
|
"_rankingScore": "[score]"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"query": "pésti",
|
"query": "pésti",
|
||||||
|
|||||||
@@ -2488,8 +2488,12 @@ pub(crate) mod tests {
|
|||||||
|
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
let search = Search::new(&rtxn, &index);
|
let search = Search::new(&rtxn, &index);
|
||||||
let SearchResult { matching_words: _, candidates: _, mut documents_ids } =
|
let SearchResult {
|
||||||
search.execute().unwrap();
|
matching_words: _,
|
||||||
|
candidates: _,
|
||||||
|
document_scores: _,
|
||||||
|
mut documents_ids,
|
||||||
|
} = search.execute().unwrap();
|
||||||
let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap();
|
let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap();
|
||||||
documents_ids.sort_unstable();
|
documents_ids.sort_unstable();
|
||||||
let docs = index.documents(&rtxn, documents_ids).unwrap();
|
let docs = index.documents(&rtxn, documents_ids).unwrap();
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ mod fields_ids_map;
|
|||||||
pub mod heed_codec;
|
pub mod heed_codec;
|
||||||
pub mod index;
|
pub mod index;
|
||||||
pub mod proximity;
|
pub mod proximity;
|
||||||
|
pub mod score_details;
|
||||||
mod search;
|
mod search;
|
||||||
pub mod update;
|
pub mod update;
|
||||||
|
|
||||||
|
|||||||
544
milli/src/score_details.rs
Normal file
544
milli/src/score_details.rs
Normal file
@@ -0,0 +1,544 @@
|
|||||||
|
use std::cmp::Ordering;
|
||||||
|
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
|
use crate::distance_between_two_points;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub enum ScoreDetails {
|
||||||
|
Words(Words),
|
||||||
|
Typo(Typo),
|
||||||
|
Proximity(Rank),
|
||||||
|
Fid(Rank),
|
||||||
|
Position(Rank),
|
||||||
|
ExactAttribute(ExactAttribute),
|
||||||
|
Exactness(Rank),
|
||||||
|
Sort(Sort),
|
||||||
|
GeoSort(GeoSort),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialOrd for ScoreDetails {
|
||||||
|
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||||
|
use ScoreDetails::*;
|
||||||
|
match (self, other) {
|
||||||
|
// matching left and right hands => defer to sub impl
|
||||||
|
(Words(left), Words(right)) => left.partial_cmp(right),
|
||||||
|
(Typo(left), Typo(right)) => left.partial_cmp(right),
|
||||||
|
(Proximity(left), Proximity(right)) => left.partial_cmp(right),
|
||||||
|
(Fid(left), Fid(right)) => left.partial_cmp(right),
|
||||||
|
(Position(left), Position(right)) => left.partial_cmp(right),
|
||||||
|
(ExactAttribute(left), ExactAttribute(right)) => left.partial_cmp(right),
|
||||||
|
(Exactness(left), Exactness(right)) => left.partial_cmp(right),
|
||||||
|
(Sort(left), Sort(right)) => left.partial_cmp(right),
|
||||||
|
(GeoSort(left), GeoSort(right)) => left.partial_cmp(right),
|
||||||
|
// non matching left and right hands => None
|
||||||
|
// written this way rather than with a single `_` arm, so that adding a new variant
|
||||||
|
// still results in a compile error
|
||||||
|
(Words(_), _) => None,
|
||||||
|
(Typo(_), _) => None,
|
||||||
|
(Proximity(_), _) => None,
|
||||||
|
(Fid(_), _) => None,
|
||||||
|
(Position(_), _) => None,
|
||||||
|
(ExactAttribute(_), _) => None,
|
||||||
|
(Exactness(_), _) => None,
|
||||||
|
(Sort(_), _) => None,
|
||||||
|
(GeoSort(_), _) => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ScoreDetails {
|
||||||
|
pub fn local_score(&self) -> Option<f64> {
|
||||||
|
self.rank().map(Rank::local_score)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn rank(&self) -> Option<Rank> {
|
||||||
|
match self {
|
||||||
|
ScoreDetails::Words(details) => Some(details.rank()),
|
||||||
|
ScoreDetails::Typo(details) => Some(details.rank()),
|
||||||
|
ScoreDetails::Proximity(details) => Some(*details),
|
||||||
|
ScoreDetails::Fid(details) => Some(*details),
|
||||||
|
ScoreDetails::Position(details) => Some(*details),
|
||||||
|
ScoreDetails::ExactAttribute(details) => Some(details.rank()),
|
||||||
|
ScoreDetails::Exactness(details) => Some(*details),
|
||||||
|
ScoreDetails::Sort(_) => None,
|
||||||
|
ScoreDetails::GeoSort(_) => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn global_score<'a>(details: impl Iterator<Item = &'a Self>) -> f64 {
|
||||||
|
Rank::global_score(details.filter_map(Self::rank))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn global_score_linear_scale<'a>(details: impl Iterator<Item = &'a Self>) -> u64 {
|
||||||
|
(Self::global_score(details) * LINEAR_SCALE_FACTOR).round() as u64
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Panics
|
||||||
|
///
|
||||||
|
/// - If Position is not preceded by Fid
|
||||||
|
/// - If Exactness is not preceded by ExactAttribute
|
||||||
|
/// - If a sort fid is not contained in the passed `fields_ids_map`.
|
||||||
|
pub fn to_json_map<'a>(
|
||||||
|
details: impl Iterator<Item = &'a Self>,
|
||||||
|
) -> serde_json::Map<String, serde_json::Value> {
|
||||||
|
let mut order = 0;
|
||||||
|
let mut details_map = serde_json::Map::default();
|
||||||
|
for details in details {
|
||||||
|
match details {
|
||||||
|
ScoreDetails::Words(words) => {
|
||||||
|
let words_details = serde_json::json!({
|
||||||
|
"order": order,
|
||||||
|
"matchingWords": words.matching_words,
|
||||||
|
"maxMatchingWords": words.max_matching_words,
|
||||||
|
"score": words.rank().local_score_linear_scale(),
|
||||||
|
});
|
||||||
|
details_map.insert("words".into(), words_details);
|
||||||
|
order += 1;
|
||||||
|
}
|
||||||
|
ScoreDetails::Typo(typo) => {
|
||||||
|
let typo_details = serde_json::json!({
|
||||||
|
"order": order,
|
||||||
|
"typoCount": typo.typo_count,
|
||||||
|
"maxTypoCount": typo.max_typo_count,
|
||||||
|
"score": typo.rank().local_score_linear_scale(),
|
||||||
|
});
|
||||||
|
details_map.insert("typo".into(), typo_details);
|
||||||
|
order += 1;
|
||||||
|
}
|
||||||
|
ScoreDetails::Proximity(proximity) => {
|
||||||
|
let proximity_details = serde_json::json!({
|
||||||
|
"order": order,
|
||||||
|
"score": proximity.local_score_linear_scale(),
|
||||||
|
});
|
||||||
|
details_map.insert("proximity".into(), proximity_details);
|
||||||
|
order += 1;
|
||||||
|
}
|
||||||
|
ScoreDetails::Fid(fid) => {
|
||||||
|
// For now, fid is a virtual rule always followed by the "position" rule
|
||||||
|
let fid_details = serde_json::json!({
|
||||||
|
"order": order,
|
||||||
|
"attributes_ranking_order": fid.local_score_linear_scale(),
|
||||||
|
});
|
||||||
|
details_map.insert("attribute".into(), fid_details);
|
||||||
|
order += 1;
|
||||||
|
}
|
||||||
|
ScoreDetails::Position(position) => {
|
||||||
|
// For now, position is a virtual rule always preceded by the "fid" rule
|
||||||
|
let attribute_details = details_map
|
||||||
|
.get_mut("attribute")
|
||||||
|
.expect("position not preceded by attribute");
|
||||||
|
let attribute_details = attribute_details
|
||||||
|
.as_object_mut()
|
||||||
|
.expect("attribute details was not an object");
|
||||||
|
attribute_details.insert(
|
||||||
|
"attributes_query_word_order".into(),
|
||||||
|
position.local_score_linear_scale().into(),
|
||||||
|
);
|
||||||
|
// do not update the order since this was already done by fid
|
||||||
|
}
|
||||||
|
ScoreDetails::ExactAttribute(exact_attribute) => {
|
||||||
|
let exactness_details = serde_json::json!({
|
||||||
|
"order": order,
|
||||||
|
"exactIn": exact_attribute,
|
||||||
|
"score": exact_attribute.rank().local_score_linear_scale(),
|
||||||
|
});
|
||||||
|
details_map.insert("exactness".into(), exactness_details);
|
||||||
|
order += 1;
|
||||||
|
}
|
||||||
|
ScoreDetails::Exactness(details) => {
|
||||||
|
// For now, exactness is a virtual rule always preceded by the "ExactAttribute" rule
|
||||||
|
let exactness_details = details_map
|
||||||
|
.get_mut("exactness")
|
||||||
|
.expect("Exactness not preceded by exactAttribute");
|
||||||
|
let exactness_details = exactness_details
|
||||||
|
.as_object_mut()
|
||||||
|
.expect("exactness details was not an object");
|
||||||
|
if exactness_details.get("exactIn").expect("missing 'exactIn'")
|
||||||
|
== &serde_json::json!(ExactAttribute::NoExactMatch)
|
||||||
|
{
|
||||||
|
let score = Rank::global_score_linear_scale(
|
||||||
|
[ExactAttribute::NoExactMatch.rank(), *details].iter().copied(),
|
||||||
|
);
|
||||||
|
*exactness_details.get_mut("score").expect("missing score") = score.into();
|
||||||
|
}
|
||||||
|
// do not update the order since this was already done by exactAttribute
|
||||||
|
}
|
||||||
|
ScoreDetails::Sort(details) => {
|
||||||
|
let sort = format!(
|
||||||
|
"{}:{}",
|
||||||
|
details.field_name,
|
||||||
|
if details.ascending { "asc" } else { "desc" }
|
||||||
|
);
|
||||||
|
let sort_details = serde_json::json!({
|
||||||
|
"order": order,
|
||||||
|
"value": details.value,
|
||||||
|
});
|
||||||
|
details_map.insert(sort, sort_details);
|
||||||
|
order += 1;
|
||||||
|
}
|
||||||
|
ScoreDetails::GeoSort(details) => {
|
||||||
|
let sort = format!(
|
||||||
|
"_geoPoint({}, {}):{}",
|
||||||
|
details.target_point[0],
|
||||||
|
details.target_point[1],
|
||||||
|
if details.ascending { "asc" } else { "desc" }
|
||||||
|
);
|
||||||
|
let point = if let Some(value) = details.value {
|
||||||
|
serde_json::json!({ "lat": value[0], "lng": value[1]})
|
||||||
|
} else {
|
||||||
|
serde_json::Value::Null
|
||||||
|
};
|
||||||
|
let sort_details = serde_json::json!({
|
||||||
|
"order": order,
|
||||||
|
"value": point,
|
||||||
|
"distance": details.distance(),
|
||||||
|
});
|
||||||
|
details_map.insert(sort, sort_details);
|
||||||
|
order += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
details_map
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn partial_cmp_iter<'a>(
|
||||||
|
mut left: impl Iterator<Item = &'a Self>,
|
||||||
|
mut right: impl Iterator<Item = &'a Self>,
|
||||||
|
) -> Result<Ordering, NotComparable> {
|
||||||
|
let mut index = 0;
|
||||||
|
let mut order = match (left.next(), right.next()) {
|
||||||
|
(Some(left), Some(right)) => left.partial_cmp(right).incomparable(index)?,
|
||||||
|
_ => return Ok(Ordering::Equal),
|
||||||
|
};
|
||||||
|
for (left, right) in left.zip(right) {
|
||||||
|
if order != Ordering::Equal {
|
||||||
|
return Ok(order);
|
||||||
|
};
|
||||||
|
|
||||||
|
index += 1;
|
||||||
|
order = left.partial_cmp(right).incomparable(index)?;
|
||||||
|
}
|
||||||
|
Ok(order)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct NotComparable(pub usize);
|
||||||
|
|
||||||
|
trait OptionToNotComparable<T> {
|
||||||
|
fn incomparable(self, index: usize) -> Result<T, NotComparable>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> OptionToNotComparable<T> for Option<T> {
|
||||||
|
fn incomparable(self, index: usize) -> Result<T, NotComparable> {
|
||||||
|
match self {
|
||||||
|
Some(t) => Ok(t),
|
||||||
|
None => Err(NotComparable(index)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub struct Words {
|
||||||
|
pub matching_words: u32,
|
||||||
|
pub max_matching_words: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialOrd for Words {
|
||||||
|
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||||
|
(self.max_matching_words == other.max_matching_words)
|
||||||
|
.then(|| self.matching_words.cmp(&other.matching_words))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Words {
|
||||||
|
pub fn rank(&self) -> Rank {
|
||||||
|
Rank { rank: self.matching_words, max_rank: self.max_matching_words }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn from_rank(rank: Rank) -> Words {
|
||||||
|
Words { matching_words: rank.rank, max_matching_words: rank.max_rank }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub struct Typo {
|
||||||
|
pub typo_count: u32,
|
||||||
|
pub max_typo_count: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialOrd for Typo {
|
||||||
|
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||||
|
(self.max_typo_count == other.max_typo_count).then(|| {
|
||||||
|
// the order is reverted as having fewer typos gives a better score
|
||||||
|
self.typo_count.cmp(&other.typo_count).reverse()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Typo {
|
||||||
|
pub fn rank(&self) -> Rank {
|
||||||
|
Rank {
|
||||||
|
rank: self.max_typo_count - self.typo_count + 1,
|
||||||
|
max_rank: (self.max_typo_count + 1),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// max_rank = max_typo + 1
|
||||||
|
// max_typo = max_rank - 1
|
||||||
|
//
|
||||||
|
// rank = max_typo - typo + 1
|
||||||
|
// rank = max_rank - 1 - typo + 1
|
||||||
|
// rank + typo = max_rank
|
||||||
|
// typo = max_rank - rank
|
||||||
|
pub fn from_rank(rank: Rank) -> Typo {
|
||||||
|
Typo { typo_count: rank.max_rank - rank.rank, max_typo_count: rank.max_rank - 1 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub struct Rank {
|
||||||
|
/// The ordinal rank, such that `max_rank` is the first rank, and 0 is the last rank.
|
||||||
|
///
|
||||||
|
/// The higher the better. Documents with a rank of 0 have a score of 0 and are typically never returned
|
||||||
|
/// (they don't match the query).
|
||||||
|
pub rank: u32,
|
||||||
|
/// The maximum possible rank. Documents with this rank have a score of 1.
|
||||||
|
///
|
||||||
|
/// The max rank should not be 0.
|
||||||
|
pub max_rank: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialOrd for Rank {
|
||||||
|
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||||
|
(self.max_rank == other.max_rank).then(|| self.rank.cmp(&other.rank))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Rank {
|
||||||
|
pub fn local_score(self) -> f64 {
|
||||||
|
self.rank as f64 / self.max_rank as f64
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn local_score_linear_scale(self) -> u64 {
|
||||||
|
(self.local_score() * LINEAR_SCALE_FACTOR).round() as u64
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn global_score(details: impl Iterator<Item = Self>) -> f64 {
|
||||||
|
let mut rank = Rank { rank: 1, max_rank: 1 };
|
||||||
|
for inner_rank in details {
|
||||||
|
rank.rank -= 1;
|
||||||
|
|
||||||
|
rank.rank *= inner_rank.max_rank;
|
||||||
|
rank.max_rank *= inner_rank.max_rank;
|
||||||
|
|
||||||
|
rank.rank += inner_rank.rank;
|
||||||
|
}
|
||||||
|
rank.local_score()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn global_score_linear_scale(details: impl Iterator<Item = Self>) -> u64 {
|
||||||
|
(Self::global_score(details) * LINEAR_SCALE_FACTOR).round() as u64
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub enum ExactAttribute {
|
||||||
|
// Do not reorder as the order is significant, from least relevant to most relevant
|
||||||
|
NoExactMatch,
|
||||||
|
MatchesStart,
|
||||||
|
MatchesFull,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ExactAttribute {
|
||||||
|
pub fn rank(&self) -> Rank {
|
||||||
|
let rank = match self {
|
||||||
|
ExactAttribute::MatchesFull => 3,
|
||||||
|
ExactAttribute::MatchesStart => 2,
|
||||||
|
ExactAttribute::NoExactMatch => 1,
|
||||||
|
};
|
||||||
|
Rank { rank, max_rank: 3 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub struct Sort {
|
||||||
|
pub field_name: String,
|
||||||
|
pub ascending: bool,
|
||||||
|
pub value: serde_json::Value,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialOrd for Sort {
|
||||||
|
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||||
|
if self.field_name != other.field_name {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
if self.ascending != other.ascending {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
match (&self.value, &other.value) {
|
||||||
|
(serde_json::Value::Null, serde_json::Value::Null) => Some(Ordering::Equal),
|
||||||
|
(serde_json::Value::Null, _) => Some(Ordering::Less),
|
||||||
|
(_, serde_json::Value::Null) => Some(Ordering::Greater),
|
||||||
|
// numbers are always before strings
|
||||||
|
(serde_json::Value::Number(_), serde_json::Value::String(_)) => Some(Ordering::Greater),
|
||||||
|
(serde_json::Value::String(_), serde_json::Value::Number(_)) => Some(Ordering::Less),
|
||||||
|
(serde_json::Value::Number(left), serde_json::Value::Number(right)) => {
|
||||||
|
//FIXME: unwrap permitted here?
|
||||||
|
let order = left.as_f64().unwrap().partial_cmp(&right.as_f64().unwrap())?;
|
||||||
|
// always reverted, as bigger is better
|
||||||
|
Some(if self.ascending { order.reverse() } else { order })
|
||||||
|
}
|
||||||
|
(serde_json::Value::String(left), serde_json::Value::String(right)) => {
|
||||||
|
let order = left.cmp(right);
|
||||||
|
Some(if self.ascending { order.reverse() } else { order })
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||||
|
pub struct GeoSort {
|
||||||
|
pub target_point: [f64; 2],
|
||||||
|
pub ascending: bool,
|
||||||
|
pub value: Option<[f64; 2]>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialOrd for GeoSort {
|
||||||
|
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||||
|
if self.target_point != other.target_point {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
if self.ascending != other.ascending {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(match (self.distance(), other.distance()) {
|
||||||
|
(None, None) => Ordering::Equal,
|
||||||
|
(None, Some(_)) => Ordering::Less,
|
||||||
|
(Some(_), None) => Ordering::Greater,
|
||||||
|
(Some(left), Some(right)) => {
|
||||||
|
let order = left.partial_cmp(&right)?;
|
||||||
|
if self.ascending {
|
||||||
|
// when ascending, the one with the smallest distance has the best score
|
||||||
|
order.reverse()
|
||||||
|
} else {
|
||||||
|
order
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GeoSort {
|
||||||
|
pub fn distance(&self) -> Option<f64> {
|
||||||
|
self.value.map(|value| distance_between_two_points(&self.target_point, &value))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const LINEAR_SCALE_FACTOR: f64 = 1000.0;
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
#[test]
|
||||||
|
fn compare() {
|
||||||
|
let left = [
|
||||||
|
ScoreDetails::Words(Words { matching_words: 3, max_matching_words: 4 }),
|
||||||
|
ScoreDetails::Sort(Sort {
|
||||||
|
field_name: "doggo".into(),
|
||||||
|
ascending: true,
|
||||||
|
value: "Intel the Beagle".into(),
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
let right = [
|
||||||
|
ScoreDetails::Words(Words { matching_words: 3, max_matching_words: 4 }),
|
||||||
|
ScoreDetails::Sort(Sort {
|
||||||
|
field_name: "doggo".into(),
|
||||||
|
ascending: true,
|
||||||
|
value: "Max the Labrador".into(),
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
assert_eq!(
|
||||||
|
Ok(Ordering::Greater),
|
||||||
|
ScoreDetails::partial_cmp_iter(left.iter(), right.iter())
|
||||||
|
);
|
||||||
|
// equal when all the common components are equal
|
||||||
|
assert_eq!(
|
||||||
|
Ok(Ordering::Equal),
|
||||||
|
ScoreDetails::partial_cmp_iter(left[0..1].iter(), right.iter())
|
||||||
|
);
|
||||||
|
|
||||||
|
let right = [
|
||||||
|
ScoreDetails::Words(Words { matching_words: 4, max_matching_words: 4 }),
|
||||||
|
ScoreDetails::Sort(Sort {
|
||||||
|
field_name: "doggo".into(),
|
||||||
|
ascending: true,
|
||||||
|
value: "Max the Labrador".into(),
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
|
||||||
|
assert_eq!(Ok(Ordering::Less), ScoreDetails::partial_cmp_iter(left.iter(), right.iter()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sort_not_comparable() {
|
||||||
|
let left = [
|
||||||
|
ScoreDetails::Words(Words { matching_words: 3, max_matching_words: 4 }),
|
||||||
|
ScoreDetails::Sort(Sort {
|
||||||
|
// not the same field name
|
||||||
|
field_name: "catto".into(),
|
||||||
|
ascending: true,
|
||||||
|
value: "Sylver the cat".into(),
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
let right = [
|
||||||
|
ScoreDetails::Words(Words { matching_words: 3, max_matching_words: 4 }),
|
||||||
|
ScoreDetails::Sort(Sort {
|
||||||
|
field_name: "doggo".into(),
|
||||||
|
ascending: true,
|
||||||
|
value: "Max the Labrador".into(),
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
assert_eq!(
|
||||||
|
Err(NotComparable(1)),
|
||||||
|
ScoreDetails::partial_cmp_iter(left.iter(), right.iter())
|
||||||
|
);
|
||||||
|
let left = [
|
||||||
|
ScoreDetails::Words(Words { matching_words: 3, max_matching_words: 4 }),
|
||||||
|
ScoreDetails::Sort(Sort {
|
||||||
|
field_name: "doggo".into(),
|
||||||
|
// Not the same order
|
||||||
|
ascending: false,
|
||||||
|
value: "Intel the Beagle".into(),
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
let right = [
|
||||||
|
ScoreDetails::Words(Words { matching_words: 3, max_matching_words: 4 }),
|
||||||
|
ScoreDetails::Sort(Sort {
|
||||||
|
field_name: "doggo".into(),
|
||||||
|
ascending: true,
|
||||||
|
value: "Max the Labrador".into(),
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
assert_eq!(
|
||||||
|
Err(NotComparable(1)),
|
||||||
|
ScoreDetails::partial_cmp_iter(left.iter(), right.iter())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sort_behavior() {
|
||||||
|
let left = Sort { field_name: "price".into(), ascending: true, value: "5400".into() };
|
||||||
|
let right = Sort { field_name: "price".into(), ascending: true, value: 53.into() };
|
||||||
|
// number always better match than strings
|
||||||
|
assert_eq!(Some(Ordering::Less), left.partial_cmp(&right));
|
||||||
|
|
||||||
|
let left = Sort { field_name: "price".into(), ascending: false, value: "5400".into() };
|
||||||
|
let right = Sort { field_name: "price".into(), ascending: false, value: 53.into() };
|
||||||
|
// true regardless of the sort direction
|
||||||
|
assert_eq!(Some(Ordering::Less), left.partial_cmp(&right));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -7,6 +7,7 @@ use roaring::bitmap::RoaringBitmap;
|
|||||||
pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET};
|
pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET};
|
||||||
pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
|
pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
|
||||||
use self::new::PartialSearchResult;
|
use self::new::PartialSearchResult;
|
||||||
|
use crate::score_details::ScoreDetails;
|
||||||
use crate::{
|
use crate::{
|
||||||
execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext,
|
execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext,
|
||||||
};
|
};
|
||||||
@@ -93,7 +94,7 @@ impl<'a> Search<'a> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Force the search to exhastivelly compute the number of candidates,
|
/// Forces the search to exhaustively compute the number of candidates,
|
||||||
/// this will increase the search time but allows finite pagination.
|
/// this will increase the search time but allows finite pagination.
|
||||||
pub fn exhaustive_number_hits(&mut self, exhaustive_number_hits: bool) -> &mut Search<'a> {
|
pub fn exhaustive_number_hits(&mut self, exhaustive_number_hits: bool) -> &mut Search<'a> {
|
||||||
self.exhaustive_number_hits = exhaustive_number_hits;
|
self.exhaustive_number_hits = exhaustive_number_hits;
|
||||||
@@ -102,7 +103,7 @@ impl<'a> Search<'a> {
|
|||||||
|
|
||||||
pub fn execute(&self) -> Result<SearchResult> {
|
pub fn execute(&self) -> Result<SearchResult> {
|
||||||
let mut ctx = SearchContext::new(self.index, self.rtxn);
|
let mut ctx = SearchContext::new(self.index, self.rtxn);
|
||||||
let PartialSearchResult { located_query_terms, candidates, documents_ids } =
|
let PartialSearchResult { located_query_terms, candidates, documents_ids, document_scores } =
|
||||||
execute_search(
|
execute_search(
|
||||||
&mut ctx,
|
&mut ctx,
|
||||||
&self.query,
|
&self.query,
|
||||||
@@ -124,7 +125,7 @@ impl<'a> Search<'a> {
|
|||||||
None => MatchingWords::default(),
|
None => MatchingWords::default(),
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(SearchResult { matching_words, candidates, documents_ids })
|
Ok(SearchResult { matching_words, candidates, document_scores, documents_ids })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -160,8 +161,8 @@ impl fmt::Debug for Search<'_> {
|
|||||||
pub struct SearchResult {
|
pub struct SearchResult {
|
||||||
pub matching_words: MatchingWords,
|
pub matching_words: MatchingWords,
|
||||||
pub candidates: RoaringBitmap,
|
pub candidates: RoaringBitmap,
|
||||||
// TODO those documents ids should be associated with their criteria scores.
|
|
||||||
pub documents_ids: Vec<DocumentId>,
|
pub documents_ids: Vec<DocumentId>,
|
||||||
|
pub document_scores: Vec<Vec<ScoreDetails>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
|||||||
@@ -3,11 +3,13 @@ use roaring::RoaringBitmap;
|
|||||||
use super::logger::SearchLogger;
|
use super::logger::SearchLogger;
|
||||||
use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait};
|
use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait};
|
||||||
use super::SearchContext;
|
use super::SearchContext;
|
||||||
|
use crate::score_details::ScoreDetails;
|
||||||
use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput};
|
use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
pub struct BucketSortOutput {
|
pub struct BucketSortOutput {
|
||||||
pub docids: Vec<u32>,
|
pub docids: Vec<u32>,
|
||||||
|
pub scores: Vec<Vec<ScoreDetails>>,
|
||||||
pub all_candidates: RoaringBitmap,
|
pub all_candidates: RoaringBitmap,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -31,7 +33,11 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
};
|
};
|
||||||
|
|
||||||
if universe.len() < from as u64 {
|
if universe.len() < from as u64 {
|
||||||
return Ok(BucketSortOutput { docids: vec![], all_candidates: universe.clone() });
|
return Ok(BucketSortOutput {
|
||||||
|
docids: vec![],
|
||||||
|
scores: vec![],
|
||||||
|
all_candidates: universe.clone(),
|
||||||
|
});
|
||||||
}
|
}
|
||||||
if ranking_rules.is_empty() {
|
if ranking_rules.is_empty() {
|
||||||
if let Some(distinct_fid) = distinct_fid {
|
if let Some(distinct_fid) = distinct_fid {
|
||||||
@@ -49,22 +55,32 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
}
|
}
|
||||||
let mut all_candidates = universe - excluded;
|
let mut all_candidates = universe - excluded;
|
||||||
all_candidates.extend(results.iter().copied());
|
all_candidates.extend(results.iter().copied());
|
||||||
return Ok(BucketSortOutput { docids: results, all_candidates });
|
return Ok(BucketSortOutput {
|
||||||
|
scores: vec![Default::default(); results.len()],
|
||||||
|
docids: results,
|
||||||
|
all_candidates,
|
||||||
|
});
|
||||||
} else {
|
} else {
|
||||||
let docids = universe.iter().skip(from).take(length).collect();
|
let docids: Vec<u32> = universe.iter().skip(from).take(length).collect();
|
||||||
return Ok(BucketSortOutput { docids, all_candidates: universe.clone() });
|
return Ok(BucketSortOutput {
|
||||||
|
scores: vec![Default::default(); docids.len()],
|
||||||
|
docids,
|
||||||
|
all_candidates: universe.clone(),
|
||||||
|
});
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
let ranking_rules_len = ranking_rules.len();
|
let ranking_rules_len = ranking_rules.len();
|
||||||
|
|
||||||
logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query, universe);
|
logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query, universe);
|
||||||
|
|
||||||
ranking_rules[0].start_iteration(ctx, logger, universe, query)?;
|
ranking_rules[0].start_iteration(ctx, logger, universe, query)?;
|
||||||
|
|
||||||
|
let mut ranking_rule_scores: Vec<ScoreDetails> = vec![];
|
||||||
|
|
||||||
let mut ranking_rule_universes: Vec<RoaringBitmap> =
|
let mut ranking_rule_universes: Vec<RoaringBitmap> =
|
||||||
vec![RoaringBitmap::default(); ranking_rules_len];
|
vec![RoaringBitmap::default(); ranking_rules_len];
|
||||||
ranking_rule_universes[0] = universe.clone();
|
ranking_rule_universes[0] = universe.clone();
|
||||||
|
|
||||||
let mut cur_ranking_rule_index = 0;
|
let mut cur_ranking_rule_index = 0;
|
||||||
|
|
||||||
/// Finish iterating over the current ranking rule, yielding
|
/// Finish iterating over the current ranking rule, yielding
|
||||||
@@ -89,11 +105,16 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
} else {
|
} else {
|
||||||
cur_ranking_rule_index -= 1;
|
cur_ranking_rule_index -= 1;
|
||||||
}
|
}
|
||||||
|
// FIXME: check off by one
|
||||||
|
if ranking_rule_scores.len() > cur_ranking_rule_index {
|
||||||
|
ranking_rule_scores.pop();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut all_candidates = universe.clone();
|
let mut all_candidates = universe.clone();
|
||||||
let mut valid_docids = vec![];
|
let mut valid_docids = vec![];
|
||||||
|
let mut valid_scores = vec![];
|
||||||
let mut cur_offset = 0usize;
|
let mut cur_offset = 0usize;
|
||||||
|
|
||||||
macro_rules! maybe_add_to_results {
|
macro_rules! maybe_add_to_results {
|
||||||
@@ -104,23 +125,23 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
length,
|
length,
|
||||||
logger,
|
logger,
|
||||||
&mut valid_docids,
|
&mut valid_docids,
|
||||||
|
&mut valid_scores,
|
||||||
&mut all_candidates,
|
&mut all_candidates,
|
||||||
&mut ranking_rule_universes,
|
&mut ranking_rule_universes,
|
||||||
&mut ranking_rules,
|
&mut ranking_rules,
|
||||||
cur_ranking_rule_index,
|
cur_ranking_rule_index,
|
||||||
&mut cur_offset,
|
&mut cur_offset,
|
||||||
distinct_fid,
|
distinct_fid,
|
||||||
|
&ranking_rule_scores,
|
||||||
$candidates,
|
$candidates,
|
||||||
)?;
|
)?;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
while valid_docids.len() < length {
|
while valid_docids.len() < length {
|
||||||
// The universe for this bucket is zero or one element, so we don't need to sort
|
// The universe for this bucket is zero, so we don't need to sort
|
||||||
// anything, just extend the results and go back to the parent ranking rule.
|
// anything, just go back to the parent ranking rule.
|
||||||
if ranking_rule_universes[cur_ranking_rule_index].len() <= 1 {
|
if ranking_rule_universes[cur_ranking_rule_index].is_empty() {
|
||||||
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
|
|
||||||
maybe_add_to_results!(bucket);
|
|
||||||
back!();
|
back!();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -130,6 +151,8 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
ranking_rule_scores.push(next_bucket.score);
|
||||||
|
|
||||||
logger.next_bucket_ranking_rule(
|
logger.next_bucket_ranking_rule(
|
||||||
cur_ranking_rule_index,
|
cur_ranking_rule_index,
|
||||||
ranking_rules[cur_ranking_rule_index].as_ref(),
|
ranking_rules[cur_ranking_rule_index].as_ref(),
|
||||||
@@ -143,10 +166,11 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates;
|
ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates;
|
||||||
|
|
||||||
if cur_ranking_rule_index == ranking_rules_len - 1
|
if cur_ranking_rule_index == ranking_rules_len - 1
|
||||||
|| next_bucket.candidates.len() <= 1
|
|
||||||
|| cur_offset + (next_bucket.candidates.len() as usize) < from
|
|| cur_offset + (next_bucket.candidates.len() as usize) < from
|
||||||
{
|
{
|
||||||
maybe_add_to_results!(next_bucket.candidates);
|
maybe_add_to_results!(next_bucket.candidates);
|
||||||
|
// FIXME: use index based logic like all the other rules so that you don't have to maintain the pop/push?
|
||||||
|
ranking_rule_scores.pop();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -166,7 +190,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(BucketSortOutput { docids: valid_docids, all_candidates })
|
Ok(BucketSortOutput { docids: valid_docids, scores: valid_scores, all_candidates })
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset`
|
/// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset`
|
||||||
@@ -179,14 +203,18 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
logger: &mut dyn SearchLogger<Q>,
|
logger: &mut dyn SearchLogger<Q>,
|
||||||
|
|
||||||
valid_docids: &mut Vec<u32>,
|
valid_docids: &mut Vec<u32>,
|
||||||
|
valid_scores: &mut Vec<Vec<ScoreDetails>>,
|
||||||
all_candidates: &mut RoaringBitmap,
|
all_candidates: &mut RoaringBitmap,
|
||||||
|
|
||||||
ranking_rule_universes: &mut [RoaringBitmap],
|
ranking_rule_universes: &mut [RoaringBitmap],
|
||||||
ranking_rules: &mut [BoxRankingRule<'ctx, Q>],
|
ranking_rules: &mut [BoxRankingRule<'ctx, Q>],
|
||||||
|
|
||||||
cur_ranking_rule_index: usize,
|
cur_ranking_rule_index: usize,
|
||||||
|
|
||||||
cur_offset: &mut usize,
|
cur_offset: &mut usize,
|
||||||
|
|
||||||
distinct_fid: Option<u16>,
|
distinct_fid: Option<u16>,
|
||||||
|
ranking_rule_scores: &[ScoreDetails],
|
||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
// First apply the distinct rule on the candidates, reducing the universes if necessary
|
// First apply the distinct rule on the candidates, reducing the universes if necessary
|
||||||
@@ -231,13 +259,17 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
let candidates =
|
let candidates =
|
||||||
candidates.iter().take(length - valid_docids.len()).copied().collect::<Vec<_>>();
|
candidates.iter().take(length - valid_docids.len()).copied().collect::<Vec<_>>();
|
||||||
logger.add_to_results(&candidates);
|
logger.add_to_results(&candidates);
|
||||||
valid_docids.extend(&candidates);
|
valid_docids.extend_from_slice(&candidates);
|
||||||
|
valid_scores
|
||||||
|
.extend(std::iter::repeat(ranking_rule_scores.to_owned()).take(candidates.len()));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// if we have passed the offset already, add some of the documents (up to the limit)
|
// if we have passed the offset already, add some of the documents (up to the limit)
|
||||||
let candidates = candidates.iter().take(length - valid_docids.len()).collect::<Vec<u32>>();
|
let candidates = candidates.iter().take(length - valid_docids.len()).collect::<Vec<u32>>();
|
||||||
logger.add_to_results(&candidates);
|
logger.add_to_results(&candidates);
|
||||||
valid_docids.extend(&candidates);
|
valid_docids.extend_from_slice(&candidates);
|
||||||
|
valid_scores
|
||||||
|
.extend(std::iter::repeat(ranking_rule_scores.to_owned()).take(candidates.len()));
|
||||||
}
|
}
|
||||||
|
|
||||||
*cur_offset += candidates.len() as usize;
|
*cur_offset += candidates.len() as usize;
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ use roaring::{MultiOps, RoaringBitmap};
|
|||||||
|
|
||||||
use super::query_graph::QueryGraph;
|
use super::query_graph::QueryGraph;
|
||||||
use super::ranking_rules::{RankingRule, RankingRuleOutput};
|
use super::ranking_rules::{RankingRule, RankingRuleOutput};
|
||||||
|
use crate::score_details::{self, ScoreDetails};
|
||||||
use crate::search::new::query_graph::QueryNodeData;
|
use crate::search::new::query_graph::QueryNodeData;
|
||||||
use crate::search::new::query_term::ExactTerm;
|
use crate::search::new::query_term::ExactTerm;
|
||||||
use crate::{Result, SearchContext, SearchLogger};
|
use crate::{Result, SearchContext, SearchLogger};
|
||||||
@@ -244,7 +245,13 @@ impl State {
|
|||||||
candidates &= universe;
|
candidates &= universe;
|
||||||
(
|
(
|
||||||
State::AttributeStarts(query_graph.clone(), candidates_per_attribute),
|
State::AttributeStarts(query_graph.clone(), candidates_per_attribute),
|
||||||
Some(RankingRuleOutput { query: query_graph, candidates }),
|
Some(RankingRuleOutput {
|
||||||
|
query: query_graph,
|
||||||
|
candidates,
|
||||||
|
score: ScoreDetails::ExactAttribute(
|
||||||
|
score_details::ExactAttribute::MatchesFull,
|
||||||
|
),
|
||||||
|
}),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
State::AttributeStarts(query_graph, candidates_per_attribute) => {
|
State::AttributeStarts(query_graph, candidates_per_attribute) => {
|
||||||
@@ -257,12 +264,24 @@ impl State {
|
|||||||
candidates &= universe;
|
candidates &= universe;
|
||||||
(
|
(
|
||||||
State::Empty(query_graph.clone()),
|
State::Empty(query_graph.clone()),
|
||||||
Some(RankingRuleOutput { query: query_graph, candidates }),
|
Some(RankingRuleOutput {
|
||||||
|
query: query_graph,
|
||||||
|
candidates,
|
||||||
|
score: ScoreDetails::ExactAttribute(
|
||||||
|
score_details::ExactAttribute::MatchesStart,
|
||||||
|
),
|
||||||
|
}),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
State::Empty(query_graph) => (
|
State::Empty(query_graph) => (
|
||||||
State::Empty(query_graph.clone()),
|
State::Empty(query_graph.clone()),
|
||||||
Some(RankingRuleOutput { query: query_graph, candidates: universe.clone() }),
|
Some(RankingRuleOutput {
|
||||||
|
query: query_graph,
|
||||||
|
candidates: universe.clone(),
|
||||||
|
score: ScoreDetails::ExactAttribute(
|
||||||
|
score_details::ExactAttribute::NoExactMatch,
|
||||||
|
),
|
||||||
|
}),
|
||||||
),
|
),
|
||||||
};
|
};
|
||||||
(state, output)
|
(state, output)
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ use rstar::RTree;
|
|||||||
|
|
||||||
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
|
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
|
||||||
use crate::heed_codec::facet::{FieldDocIdFacetCodec, OrderedF64Codec};
|
use crate::heed_codec::facet::{FieldDocIdFacetCodec, OrderedF64Codec};
|
||||||
|
use crate::score_details::{self, ScoreDetails};
|
||||||
use crate::{
|
use crate::{
|
||||||
distance_between_two_points, lat_lng_to_xyz, GeoPoint, Index, Result, SearchContext,
|
distance_between_two_points, lat_lng_to_xyz, GeoPoint, Index, Result, SearchContext,
|
||||||
SearchLogger,
|
SearchLogger,
|
||||||
@@ -80,7 +81,7 @@ pub struct GeoSort<Q: RankingRuleQueryTrait> {
|
|||||||
field_ids: Option<[u16; 2]>,
|
field_ids: Option<[u16; 2]>,
|
||||||
rtree: Option<RTree<GeoPoint>>,
|
rtree: Option<RTree<GeoPoint>>,
|
||||||
|
|
||||||
cached_sorted_docids: VecDeque<u32>,
|
cached_sorted_docids: VecDeque<(u32, [f64; 2])>,
|
||||||
geo_candidates: RoaringBitmap,
|
geo_candidates: RoaringBitmap,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -130,7 +131,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
|
|||||||
let point = lat_lng_to_xyz(&self.point);
|
let point = lat_lng_to_xyz(&self.point);
|
||||||
for point in rtree.nearest_neighbor_iter(&point) {
|
for point in rtree.nearest_neighbor_iter(&point) {
|
||||||
if self.geo_candidates.contains(point.data.0) {
|
if self.geo_candidates.contains(point.data.0) {
|
||||||
self.cached_sorted_docids.push_back(point.data.0);
|
self.cached_sorted_docids.push_back(point.data);
|
||||||
if self.cached_sorted_docids.len() >= cache_size {
|
if self.cached_sorted_docids.len() >= cache_size {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -142,7 +143,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
|
|||||||
let point = lat_lng_to_xyz(&opposite_of(self.point));
|
let point = lat_lng_to_xyz(&opposite_of(self.point));
|
||||||
for point in rtree.nearest_neighbor_iter(&point) {
|
for point in rtree.nearest_neighbor_iter(&point) {
|
||||||
if self.geo_candidates.contains(point.data.0) {
|
if self.geo_candidates.contains(point.data.0) {
|
||||||
self.cached_sorted_docids.push_front(point.data.0);
|
self.cached_sorted_docids.push_front(point.data);
|
||||||
if self.cached_sorted_docids.len() >= cache_size {
|
if self.cached_sorted_docids.len() >= cache_size {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -177,7 +178,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
|
|||||||
// computing the distance between two points is expensive thus we cache the result
|
// computing the distance between two points is expensive thus we cache the result
|
||||||
documents
|
documents
|
||||||
.sort_by_cached_key(|(_, p)| distance_between_two_points(&self.point, p) as usize);
|
.sort_by_cached_key(|(_, p)| distance_between_two_points(&self.point, p) as usize);
|
||||||
self.cached_sorted_docids.extend(documents.into_iter().map(|(doc_id, _)| doc_id));
|
self.cached_sorted_docids.extend(documents.into_iter());
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -220,12 +221,19 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
|
|||||||
logger: &mut dyn SearchLogger<Q>,
|
logger: &mut dyn SearchLogger<Q>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
) -> Result<Option<RankingRuleOutput<Q>>> {
|
) -> Result<Option<RankingRuleOutput<Q>>> {
|
||||||
assert!(universe.len() > 1);
|
|
||||||
let query = self.query.as_ref().unwrap().clone();
|
let query = self.query.as_ref().unwrap().clone();
|
||||||
self.geo_candidates &= universe;
|
self.geo_candidates &= universe;
|
||||||
|
|
||||||
if self.geo_candidates.is_empty() {
|
if self.geo_candidates.is_empty() {
|
||||||
return Ok(Some(RankingRuleOutput { query, candidates: universe.clone() }));
|
return Ok(Some(RankingRuleOutput {
|
||||||
|
query,
|
||||||
|
candidates: universe.clone(),
|
||||||
|
score: ScoreDetails::GeoSort(score_details::GeoSort {
|
||||||
|
target_point: self.point,
|
||||||
|
ascending: self.ascending,
|
||||||
|
value: None,
|
||||||
|
}),
|
||||||
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
let ascending = self.ascending;
|
let ascending = self.ascending;
|
||||||
@@ -236,11 +244,16 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
|
|||||||
cache.pop_back()
|
cache.pop_back()
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
while let Some(id) = next(&mut self.cached_sorted_docids) {
|
while let Some((id, point)) = next(&mut self.cached_sorted_docids) {
|
||||||
if self.geo_candidates.contains(id) {
|
if self.geo_candidates.contains(id) {
|
||||||
return Ok(Some(RankingRuleOutput {
|
return Ok(Some(RankingRuleOutput {
|
||||||
query,
|
query,
|
||||||
candidates: RoaringBitmap::from_iter([id]),
|
candidates: RoaringBitmap::from_iter([id]),
|
||||||
|
score: ScoreDetails::GeoSort(score_details::GeoSort {
|
||||||
|
target_point: self.point,
|
||||||
|
ascending: self.ascending,
|
||||||
|
value: Some(point),
|
||||||
|
}),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -50,6 +50,7 @@ use super::ranking_rule_graph::{
|
|||||||
};
|
};
|
||||||
use super::small_bitmap::SmallBitmap;
|
use super::small_bitmap::SmallBitmap;
|
||||||
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
|
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
|
||||||
|
use crate::score_details::Rank;
|
||||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||||
use crate::search::new::ranking_rule_graph::PathVisitor;
|
use crate::search::new::ranking_rule_graph::PathVisitor;
|
||||||
use crate::{Result, TermsMatchingStrategy};
|
use crate::{Result, TermsMatchingStrategy};
|
||||||
@@ -118,6 +119,8 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
|
|||||||
all_costs: MappedInterner<QueryNode, Vec<u64>>,
|
all_costs: MappedInterner<QueryNode, Vec<u64>>,
|
||||||
/// An index in the first element of `all_distances`, giving the cost of the next bucket
|
/// An index in the first element of `all_distances`, giving the cost of the next bucket
|
||||||
cur_cost: u64,
|
cur_cost: u64,
|
||||||
|
/// One above the highest possible cost for this rule
|
||||||
|
next_max_cost: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBasedRankingRule<G> {
|
impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBasedRankingRule<G> {
|
||||||
@@ -139,13 +142,12 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
let mut forbidden_nodes =
|
let mut forbidden_nodes =
|
||||||
SmallBitmap::for_interned_values_in(&query_graph.nodes);
|
SmallBitmap::for_interned_values_in(&query_graph.nodes);
|
||||||
let mut costs = query_graph.nodes.map(|_| None);
|
let mut costs = query_graph.nodes.map(|_| None);
|
||||||
let mut cost = 100;
|
// FIXME: this works because only words uses termsmatchingstrategy at the moment.
|
||||||
for ns in removal_order {
|
for ns in removal_order {
|
||||||
for n in ns.iter() {
|
for n in ns.iter() {
|
||||||
*costs.get_mut(n) = Some((cost, forbidden_nodes.clone()));
|
*costs.get_mut(n) = Some((1, forbidden_nodes.clone()));
|
||||||
}
|
}
|
||||||
forbidden_nodes.union(&ns);
|
forbidden_nodes.union(&ns);
|
||||||
cost += 100;
|
|
||||||
}
|
}
|
||||||
costs
|
costs
|
||||||
}
|
}
|
||||||
@@ -162,12 +164,16 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
// Then pre-compute the cost of all paths from each node to the end node
|
// Then pre-compute the cost of all paths from each node to the end node
|
||||||
let all_costs = graph.find_all_costs_to_end();
|
let all_costs = graph.find_all_costs_to_end();
|
||||||
|
|
||||||
|
let next_max_cost =
|
||||||
|
all_costs.get(graph.query_graph.root_node).iter().copied().max().unwrap_or(0) + 1;
|
||||||
|
|
||||||
let state = GraphBasedRankingRuleState {
|
let state = GraphBasedRankingRuleState {
|
||||||
graph,
|
graph,
|
||||||
conditions_cache: condition_docids_cache,
|
conditions_cache: condition_docids_cache,
|
||||||
dead_ends_cache,
|
dead_ends_cache,
|
||||||
all_costs,
|
all_costs,
|
||||||
cur_cost: 0,
|
cur_cost: 0,
|
||||||
|
next_max_cost,
|
||||||
};
|
};
|
||||||
|
|
||||||
self.state = Some(state);
|
self.state = Some(state);
|
||||||
@@ -181,17 +187,13 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
|
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
|
||||||
// If universe.len() <= 1, the bucket sort algorithm
|
|
||||||
// should not have called this function.
|
|
||||||
assert!(universe.len() > 1);
|
|
||||||
// Will crash if `next_bucket` is called before `start_iteration` or after `end_iteration`,
|
// Will crash if `next_bucket` is called before `start_iteration` or after `end_iteration`,
|
||||||
// should never happen
|
// should never happen
|
||||||
let mut state = self.state.take().unwrap();
|
let mut state = self.state.take().unwrap();
|
||||||
|
|
||||||
|
let all_costs = state.all_costs.get(state.graph.query_graph.root_node);
|
||||||
// Retrieve the cost of the paths to compute
|
// Retrieve the cost of the paths to compute
|
||||||
let Some(&cost) = state
|
let Some(&cost) = all_costs
|
||||||
.all_costs
|
|
||||||
.get(state.graph.query_graph.root_node)
|
|
||||||
.iter()
|
.iter()
|
||||||
.find(|c| **c >= state.cur_cost) else {
|
.find(|c| **c >= state.cur_cost) else {
|
||||||
self.state = None;
|
self.state = None;
|
||||||
@@ -207,8 +209,12 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
dead_ends_cache,
|
dead_ends_cache,
|
||||||
all_costs,
|
all_costs,
|
||||||
cur_cost: _,
|
cur_cost: _,
|
||||||
|
next_max_cost,
|
||||||
} = &mut state;
|
} = &mut state;
|
||||||
|
|
||||||
|
let rank = *next_max_cost - cost;
|
||||||
|
let score = G::rank_to_score(Rank { rank: rank as u32, max_rank: *next_max_cost as u32 });
|
||||||
|
|
||||||
let mut universe = universe.clone();
|
let mut universe = universe.clone();
|
||||||
|
|
||||||
let mut used_conditions = SmallBitmap::for_interned_values_in(&graph.conditions_interner);
|
let mut used_conditions = SmallBitmap::for_interned_values_in(&graph.conditions_interner);
|
||||||
@@ -325,7 +331,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
|
|
||||||
self.state = Some(state);
|
self.state = Some(state);
|
||||||
|
|
||||||
Ok(Some(RankingRuleOutput { query: next_query_graph, candidates: bucket }))
|
Ok(Some(RankingRuleOutput { query: next_query_graph, candidates: bucket, score }))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn end_iteration(
|
fn end_iteration(
|
||||||
|
|||||||
@@ -44,6 +44,7 @@ use self::geo_sort::GeoSort;
|
|||||||
pub use self::geo_sort::Strategy as GeoSortStrategy;
|
pub use self::geo_sort::Strategy as GeoSortStrategy;
|
||||||
use self::graph_based_ranking_rule::Words;
|
use self::graph_based_ranking_rule::Words;
|
||||||
use self::interner::Interned;
|
use self::interner::Interned;
|
||||||
|
use crate::score_details::ScoreDetails;
|
||||||
use crate::search::new::distinct::apply_distinct_rule;
|
use crate::search::new::distinct::apply_distinct_rule;
|
||||||
use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError};
|
use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError};
|
||||||
|
|
||||||
@@ -426,13 +427,15 @@ pub fn execute_search(
|
|||||||
)?
|
)?
|
||||||
};
|
};
|
||||||
|
|
||||||
let BucketSortOutput { docids, mut all_candidates } = bucket_sort_output;
|
let BucketSortOutput { docids, scores, mut all_candidates } = bucket_sort_output;
|
||||||
|
|
||||||
|
let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
|
||||||
|
|
||||||
// The candidates is the universe unless the exhaustive number of hits
|
// The candidates is the universe unless the exhaustive number of hits
|
||||||
// is requested and a distinct attribute is set.
|
// is requested and a distinct attribute is set.
|
||||||
if exhaustive_number_hits {
|
if exhaustive_number_hits {
|
||||||
if let Some(f) = ctx.index.distinct_field(ctx.txn)? {
|
if let Some(f) = ctx.index.distinct_field(ctx.txn)? {
|
||||||
if let Some(distinct_fid) = ctx.index.fields_ids_map(ctx.txn)?.id(f) {
|
if let Some(distinct_fid) = fields_ids_map.id(f) {
|
||||||
all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining;
|
all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -440,6 +443,7 @@ pub fn execute_search(
|
|||||||
|
|
||||||
Ok(PartialSearchResult {
|
Ok(PartialSearchResult {
|
||||||
candidates: all_candidates,
|
candidates: all_candidates,
|
||||||
|
document_scores: scores,
|
||||||
documents_ids: docids,
|
documents_ids: docids,
|
||||||
located_query_terms,
|
located_query_terms,
|
||||||
})
|
})
|
||||||
@@ -491,4 +495,5 @@ pub struct PartialSearchResult {
|
|||||||
pub located_query_terms: Option<Vec<LocatedQueryTerm>>,
|
pub located_query_terms: Option<Vec<LocatedQueryTerm>>,
|
||||||
pub candidates: RoaringBitmap,
|
pub candidates: RoaringBitmap,
|
||||||
pub documents_ids: Vec<DocumentId>,
|
pub documents_ids: Vec<DocumentId>,
|
||||||
|
pub document_scores: Vec<Vec<ScoreDetails>>,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -49,10 +49,15 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
if let Some((cost_of_ignoring, forbidden_nodes)) =
|
if let Some((cost_of_ignoring, forbidden_nodes)) =
|
||||||
cost_of_ignoring_node.get(dest_idx)
|
cost_of_ignoring_node.get(dest_idx)
|
||||||
{
|
{
|
||||||
|
let dest = graph_nodes.get(dest_idx);
|
||||||
|
let dest_size = match &dest.data {
|
||||||
|
QueryNodeData::Term(term) => term.term_ids.len(),
|
||||||
|
_ => panic!(),
|
||||||
|
};
|
||||||
let new_edge_id = edges_store.insert(Some(Edge {
|
let new_edge_id = edges_store.insert(Some(Edge {
|
||||||
source_node: source_id,
|
source_node: source_id,
|
||||||
dest_node: dest_idx,
|
dest_node: dest_idx,
|
||||||
cost: *cost_of_ignoring,
|
cost: *cost_of_ignoring * dest_size as u32,
|
||||||
condition: None,
|
condition: None,
|
||||||
nodes_to_skip: forbidden_nodes.clone(),
|
nodes_to_skip: forbidden_nodes.clone(),
|
||||||
}));
|
}));
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{ComputedCondition, RankingRuleGraphTrait};
|
use super::{ComputedCondition, RankingRuleGraphTrait};
|
||||||
|
use crate::score_details::{Rank, ScoreDetails};
|
||||||
use crate::search::new::interner::{DedupInterner, Interned};
|
use crate::search::new::interner::{DedupInterner, Interned};
|
||||||
use crate::search::new::query_term::{ExactTerm, LocatedQueryTermSubset};
|
use crate::search::new::query_term::{ExactTerm, LocatedQueryTermSubset};
|
||||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
|
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
|
||||||
@@ -84,4 +85,8 @@ impl RankingRuleGraphTrait for ExactnessGraph {
|
|||||||
|
|
||||||
Ok(vec![(0, exact_condition), (dest_node.term_ids.len() as u32, skip_condition)])
|
Ok(vec![(0, exact_condition), (dest_node.term_ids.len() as u32, skip_condition)])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn rank_to_score(rank: Rank) -> ScoreDetails {
|
||||||
|
ScoreDetails::Exactness(rank)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ use fxhash::FxHashSet;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{ComputedCondition, RankingRuleGraphTrait};
|
use super::{ComputedCondition, RankingRuleGraphTrait};
|
||||||
|
use crate::score_details::{Rank, ScoreDetails};
|
||||||
use crate::search::new::interner::{DedupInterner, Interned};
|
use crate::search::new::interner::{DedupInterner, Interned};
|
||||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id;
|
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id;
|
||||||
@@ -68,7 +69,7 @@ impl RankingRuleGraphTrait for FidGraph {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mut edges = vec![];
|
let mut edges = vec![];
|
||||||
for fid in all_fields {
|
for fid in all_fields.iter().copied() {
|
||||||
// TODO: We can improve performances and relevancy by storing
|
// TODO: We can improve performances and relevancy by storing
|
||||||
// the term subsets associated to each field ids fetched.
|
// the term subsets associated to each field ids fetched.
|
||||||
edges.push((
|
edges.push((
|
||||||
@@ -80,6 +81,35 @@ impl RankingRuleGraphTrait for FidGraph {
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// always lookup the max_fid if we don't already and add an artificial condition for max scoring
|
||||||
|
let max_fid: Option<u16> = {
|
||||||
|
if let Some(max_fid) = ctx
|
||||||
|
.index
|
||||||
|
.searchable_fields_ids(ctx.txn)?
|
||||||
|
.map(|field_ids| field_ids.into_iter().max())
|
||||||
|
{
|
||||||
|
max_fid
|
||||||
|
} else {
|
||||||
|
ctx.index.fields_ids_map(ctx.txn)?.ids().max()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(max_fid) = max_fid {
|
||||||
|
if !all_fields.contains(&max_fid) {
|
||||||
|
edges.push((
|
||||||
|
max_fid as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
|
||||||
|
conditions_interner.insert(FidCondition {
|
||||||
|
term: term.clone(), // TODO remove this ugly clone
|
||||||
|
fid: max_fid,
|
||||||
|
}),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Ok(edges)
|
Ok(edges)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn rank_to_score(rank: Rank) -> ScoreDetails {
|
||||||
|
ScoreDetails::Fid(rank)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ use super::interner::{DedupInterner, FixedSizeInterner, Interned, MappedInterner
|
|||||||
use super::query_term::LocatedQueryTermSubset;
|
use super::query_term::LocatedQueryTermSubset;
|
||||||
use super::small_bitmap::SmallBitmap;
|
use super::small_bitmap::SmallBitmap;
|
||||||
use super::{QueryGraph, QueryNode, SearchContext};
|
use super::{QueryGraph, QueryNode, SearchContext};
|
||||||
|
use crate::score_details::{Rank, ScoreDetails};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
pub struct ComputedCondition {
|
pub struct ComputedCondition {
|
||||||
@@ -110,6 +111,9 @@ pub trait RankingRuleGraphTrait: Sized + 'static {
|
|||||||
source_node: Option<&LocatedQueryTermSubset>,
|
source_node: Option<&LocatedQueryTermSubset>,
|
||||||
dest_node: &LocatedQueryTermSubset,
|
dest_node: &LocatedQueryTermSubset,
|
||||||
) -> Result<Vec<(u32, Interned<Self::Condition>)>>;
|
) -> Result<Vec<(u32, Interned<Self::Condition>)>>;
|
||||||
|
|
||||||
|
/// Convert the rank of a path to its corresponding score for the ranking rule
|
||||||
|
fn rank_to_score(rank: Rank) -> ScoreDetails;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The graph used by graph-based ranking rules.
|
/// The graph used by graph-based ranking rules.
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ use fxhash::{FxHashMap, FxHashSet};
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{ComputedCondition, RankingRuleGraphTrait};
|
use super::{ComputedCondition, RankingRuleGraphTrait};
|
||||||
|
use crate::score_details::{Rank, ScoreDetails};
|
||||||
use crate::search::new::interner::{DedupInterner, Interned};
|
use crate::search::new::interner::{DedupInterner, Interned};
|
||||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_position;
|
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_position;
|
||||||
@@ -105,8 +106,20 @@ impl RankingRuleGraphTrait for PositionGraph {
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// artificial empty condition for computing max cost
|
||||||
|
let max_cost = term.term_ids.len() as u32 * 10;
|
||||||
|
edges.push((
|
||||||
|
max_cost,
|
||||||
|
conditions_interner
|
||||||
|
.insert(PositionCondition { term: term.clone(), positions: Vec::default() }),
|
||||||
|
));
|
||||||
|
|
||||||
Ok(edges)
|
Ok(edges)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn rank_to_score(rank: Rank) -> ScoreDetails {
|
||||||
|
ScoreDetails::Position(rank)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn cost_from_position(sum_positions: u32) -> u32 {
|
fn cost_from_position(sum_positions: u32) -> u32 {
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ pub mod compute_docids;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{ComputedCondition, RankingRuleGraphTrait};
|
use super::{ComputedCondition, RankingRuleGraphTrait};
|
||||||
|
use crate::score_details::{Rank, ScoreDetails};
|
||||||
use crate::search::new::interner::{DedupInterner, Interned};
|
use crate::search::new::interner::{DedupInterner, Interned};
|
||||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||||
use crate::search::new::SearchContext;
|
use crate::search::new::SearchContext;
|
||||||
@@ -36,4 +37,8 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
|||||||
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
|
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
|
||||||
build::build_edges(ctx, conditions_interner, source_term, dest_term)
|
build::build_edges(ctx, conditions_interner, source_term, dest_term)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn rank_to_score(rank: Rank) -> ScoreDetails {
|
||||||
|
ScoreDetails::Proximity(rank)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{ComputedCondition, RankingRuleGraphTrait};
|
use super::{ComputedCondition, RankingRuleGraphTrait};
|
||||||
|
use crate::score_details::{self, Rank, ScoreDetails};
|
||||||
use crate::search::new::interner::{DedupInterner, Interned};
|
use crate::search::new::interner::{DedupInterner, Interned};
|
||||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
|
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
|
||||||
@@ -75,4 +76,8 @@ impl RankingRuleGraphTrait for TypoGraph {
|
|||||||
}
|
}
|
||||||
Ok(edges)
|
Ok(edges)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn rank_to_score(rank: Rank) -> ScoreDetails {
|
||||||
|
ScoreDetails::Typo(score_details::Typo::from_rank(rank))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{ComputedCondition, RankingRuleGraphTrait};
|
use super::{ComputedCondition, RankingRuleGraphTrait};
|
||||||
|
use crate::score_details::{self, Rank, ScoreDetails};
|
||||||
use crate::search::new::interner::{DedupInterner, Interned};
|
use crate::search::new::interner::{DedupInterner, Interned};
|
||||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
|
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
|
||||||
@@ -41,9 +42,10 @@ impl RankingRuleGraphTrait for WordsGraph {
|
|||||||
_from: Option<&LocatedQueryTermSubset>,
|
_from: Option<&LocatedQueryTermSubset>,
|
||||||
to_term: &LocatedQueryTermSubset,
|
to_term: &LocatedQueryTermSubset,
|
||||||
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
|
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
|
||||||
Ok(vec![(
|
Ok(vec![(0, conditions_interner.insert(WordsCondition { term: to_term.clone() }))])
|
||||||
to_term.term_ids.len() as u32,
|
}
|
||||||
conditions_interner.insert(WordsCondition { term: to_term.clone() }),
|
|
||||||
)])
|
fn rank_to_score(rank: Rank) -> ScoreDetails {
|
||||||
|
ScoreDetails::Words(score_details::Words::from_rank(rank))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::logger::SearchLogger;
|
use super::logger::SearchLogger;
|
||||||
use super::{QueryGraph, SearchContext};
|
use super::{QueryGraph, SearchContext};
|
||||||
|
use crate::score_details::ScoreDetails;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
/// An internal trait implemented by only [`PlaceholderQuery`] and [`QueryGraph`]
|
/// An internal trait implemented by only [`PlaceholderQuery`] and [`QueryGraph`]
|
||||||
@@ -66,4 +67,6 @@ pub struct RankingRuleOutput<Q> {
|
|||||||
pub query: Q,
|
pub query: Q,
|
||||||
/// The allowed candidates for the child ranking rule
|
/// The allowed candidates for the child ranking rule
|
||||||
pub candidates: RoaringBitmap,
|
pub candidates: RoaringBitmap,
|
||||||
|
/// The score for the candidates of the current bucket
|
||||||
|
pub score: ScoreDetails,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,9 +1,11 @@
|
|||||||
|
use heed::BytesDecode;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::logger::SearchLogger;
|
use super::logger::SearchLogger;
|
||||||
use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext};
|
use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext};
|
||||||
use crate::heed_codec::facet::FacetGroupKeyCodec;
|
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
|
||||||
use crate::heed_codec::ByteSliceRefCodec;
|
use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec};
|
||||||
|
use crate::score_details::{self, ScoreDetails};
|
||||||
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
||||||
use crate::{FieldId, Index, Result};
|
use crate::{FieldId, Index, Result};
|
||||||
|
|
||||||
@@ -67,7 +69,7 @@ impl<'ctx, Query> Sort<'ctx, Query> {
|
|||||||
impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx, Query> {
|
impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx, Query> {
|
||||||
fn id(&self) -> String {
|
fn id(&self) -> String {
|
||||||
let Self { field_name, is_ascending, .. } = self;
|
let Self { field_name, is_ascending, .. } = self;
|
||||||
format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc " })
|
format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc" })
|
||||||
}
|
}
|
||||||
fn start_iteration(
|
fn start_iteration(
|
||||||
&mut self,
|
&mut self,
|
||||||
@@ -118,12 +120,43 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
|
|||||||
|
|
||||||
(itertools::Either::Right(number_iter), itertools::Either::Right(string_iter))
|
(itertools::Either::Right(number_iter), itertools::Either::Right(string_iter))
|
||||||
};
|
};
|
||||||
|
let number_iter = number_iter.map(|r| -> Result<_> {
|
||||||
|
let (docids, bytes) = r?;
|
||||||
|
Ok((
|
||||||
|
docids,
|
||||||
|
serde_json::Value::Number(
|
||||||
|
serde_json::Number::from_f64(
|
||||||
|
OrderedF64Codec::bytes_decode(bytes).expect("some number"),
|
||||||
|
)
|
||||||
|
.expect("too big float"),
|
||||||
|
),
|
||||||
|
))
|
||||||
|
});
|
||||||
|
let string_iter = string_iter.map(|r| -> Result<_> {
|
||||||
|
let (docids, bytes) = r?;
|
||||||
|
Ok((
|
||||||
|
docids,
|
||||||
|
serde_json::Value::String(
|
||||||
|
StrRefCodec::bytes_decode(bytes).expect("some string").to_owned(),
|
||||||
|
),
|
||||||
|
))
|
||||||
|
});
|
||||||
|
|
||||||
let query_graph = parent_query.clone();
|
let query_graph = parent_query.clone();
|
||||||
|
let ascending = self.is_ascending;
|
||||||
|
let field_name = self.field_name.clone();
|
||||||
RankingRuleOutputIterWrapper::new(Box::new(number_iter.chain(string_iter).map(
|
RankingRuleOutputIterWrapper::new(Box::new(number_iter.chain(string_iter).map(
|
||||||
move |r| {
|
move |r| {
|
||||||
let (docids, _) = r?;
|
let (docids, value) = r?;
|
||||||
Ok(RankingRuleOutput { query: query_graph.clone(), candidates: docids })
|
Ok(RankingRuleOutput {
|
||||||
|
query: query_graph.clone(),
|
||||||
|
candidates: docids,
|
||||||
|
score: ScoreDetails::Sort(score_details::Sort {
|
||||||
|
field_name: field_name.clone(),
|
||||||
|
ascending,
|
||||||
|
value,
|
||||||
|
}),
|
||||||
|
})
|
||||||
},
|
},
|
||||||
)))
|
)))
|
||||||
}
|
}
|
||||||
@@ -150,7 +183,15 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
|
|||||||
Ok(Some(bucket))
|
Ok(Some(bucket))
|
||||||
} else {
|
} else {
|
||||||
let query = self.original_query.as_ref().unwrap().clone();
|
let query = self.original_query.as_ref().unwrap().clone();
|
||||||
Ok(Some(RankingRuleOutput { query, candidates: universe.clone() }))
|
Ok(Some(RankingRuleOutput {
|
||||||
|
query,
|
||||||
|
candidates: universe.clone(),
|
||||||
|
score: ScoreDetails::Sort(score_details::Sort {
|
||||||
|
field_name: self.field_name.clone(),
|
||||||
|
ascending: self.is_ascending,
|
||||||
|
value: serde_json::Value::Null,
|
||||||
|
}),
|
||||||
|
}))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user