Compare commits

..

1 Commits

Author SHA1 Message Date
Louis Dureuil
2d1b25388c Fix test again 2023-06-09 08:38:26 +02:00
6 changed files with 36 additions and 466 deletions

View File

@@ -236,7 +236,6 @@ InvalidSearchHighlightPreTag , InvalidRequest , BAD_REQUEST ;
InvalidSearchHitsPerPage , InvalidRequest , BAD_REQUEST ;
InvalidSearchLimit , InvalidRequest , BAD_REQUEST ;
InvalidSearchMatchingStrategy , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchMergeStrategy , InvalidRequest , BAD_REQUEST ;
InvalidSearchOffset , InvalidRequest , BAD_REQUEST ;
InvalidSearchPage , InvalidRequest , BAD_REQUEST ;
InvalidSearchQ , InvalidRequest , BAD_REQUEST ;

View File

@@ -1,26 +1,20 @@
use std::collections::HashMap;
use actix_http::StatusCode;
use actix_web::web::{self, Data};
use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use deserr::Deserr;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::InvalidMultiSearchMergeStrategy;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use meilisearch_types::milli::score_details::NotComparable;
use serde::Serialize;
use crate::analytics::{Analytics, MultiSearchAggregator};
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use crate::milli::score_details::ScoreDetails;
use crate::search::{
add_search_rules, perform_search, SearchHit, SearchQueryWithIndex, SearchResultWithIndex,
add_search_rules, perform_search, SearchQueryWithIndex, SearchResultWithIndex,
};
pub fn configure(cfg: &mut web::ServiceConfig) {
@@ -29,34 +23,13 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
#[derive(Serialize)]
struct SearchResults {
#[serde(skip_serializing_if = "Option::is_none")]
aggregate_hits: Option<Vec<SearchHitWithIndex>>,
results: Vec<SearchResultWithIndex>,
}
#[derive(Serialize, Debug, Clone, PartialEq)]
#[serde(rename_all = "camelCase")]
struct SearchHitWithIndex {
pub index_uid: String,
#[serde(flatten)]
pub hit: SearchHit,
}
#[derive(Debug, deserr::Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct SearchQueries {
queries: Vec<SearchQueryWithIndex>,
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchMergeStrategy>, default)]
merge_strategy: MergeStrategy,
}
#[derive(Debug, Clone, PartialEq, Eq, Deserr, Default)]
#[deserr(rename_all = camelCase)]
pub enum MergeStrategy {
#[default]
None,
ByNormalizedScore,
ByScoreDetails,
}
pub async fn multi_search_with_post(
@@ -65,13 +38,7 @@ pub async fn multi_search_with_post(
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let SearchQueries { queries, merge_strategy } = params.into_inner();
// FIXME: REMOVE UNWRAP
let max_hits = queries
.iter()
.map(|SearchQueryWithIndex { limit, hits_per_page, .. }| hits_per_page.unwrap_or(*limit))
.max()
.unwrap();
let queries = params.into_inner().queries;
let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
@@ -137,117 +104,7 @@ pub async fn multi_search_with_post(
debug!("returns: {:?}", search_results);
let aggregate_hits = match merge_strategy {
MergeStrategy::None => None,
MergeStrategy::ByScoreDetails => Some(merge_by_score_details(&search_results, max_hits)),
MergeStrategy::ByNormalizedScore => {
Some(merge_by_normalized_score(&search_results, max_hits))
}
};
Ok(HttpResponse::Ok().json(SearchResults { aggregate_hits, results: search_results }))
}
fn merge_by_score_details(
search_results: &[SearchResultWithIndex],
max_hits: usize,
) -> Vec<SearchHitWithIndex> {
let mut iterators: Vec<_> = search_results
.iter()
.filter_map(|SearchResultWithIndex { index_uid, result }| {
let mut it = result.hits.iter();
let next = it.next()?;
Some((index_uid, it, next))
})
.collect();
let mut hits = Vec::with_capacity(max_hits);
let mut inconsistent_indexes = HashMap::new();
for _ in 0..max_hits {
iterators.sort_by(|(left_uid, _, left_hit), (right_uid, _, right_hit)| {
let error = match ScoreDetails::partial_cmp_iter(
left_hit.ranking_score_raw.iter(),
right_hit.ranking_score_raw.iter(),
) {
Ok(ord) => return ord,
Err(NotComparable(incomparable_index)) => incomparable_index,
};
inconsistent_indexes.entry((left_uid.to_owned(), right_uid.to_owned())).or_insert_with(
|| {
format!(
"Detailed score {:?} is not comparable with {:?}: (left: {:#?}, right: {:#?})",
left_hit.ranking_score_raw.get(error),
right_hit.ranking_score_raw.get(error),
left_hit.ranking_score_raw,
right_hit.ranking_score_raw
)
},
);
std::cmp::Ordering::Less
});
if !inconsistent_indexes.is_empty() {
let mut s = String::new();
for ((left_uid, right_uid), error) in &inconsistent_indexes {
use std::fmt::Write;
writeln!(s, "Indexes {} and {} are inconsistent: {}", left_uid, right_uid, error)
.unwrap();
}
// Replace panic with proper error
panic!("{}", s);
}
let Some((index_uid, it, next)) = iterators.last_mut()
else {
break;
};
let hit = SearchHitWithIndex { index_uid: index_uid.clone(), hit: next.clone() };
if let Some(next_hit) = it.next() {
*next = next_hit;
} else {
iterators.pop();
}
hits.push(hit);
}
hits
}
fn merge_by_normalized_score(
search_results: &[SearchResultWithIndex],
max_hits: usize,
) -> Vec<SearchHitWithIndex> {
let mut iterators: Vec<_> = search_results
.iter()
.filter_map(|SearchResultWithIndex { index_uid, result }| {
let mut it = result.hits.iter();
let next = it.next()?;
Some((index_uid, it, next))
})
.collect();
let mut hits = Vec::with_capacity(max_hits);
for _ in 0..max_hits {
iterators.sort_by_key(|(_, _, hit)| {
ScoreDetails::global_score_linear_scale(hit.ranking_score_raw.iter())
});
let Some((index_uid, it, next)) = iterators.last_mut()
else {
break;
};
let hit = SearchHitWithIndex { index_uid: index_uid.clone(), hit: next.clone() };
if let Some(next_hit) = it.next() {
*next = next_hit;
} else {
iterators.pop();
}
hits.push(hit);
}
hits
Ok(HttpResponse::Ok().json(SearchResults { results: search_results }))
}
/// Local `Result` extension trait to avoid `map_err` boilerplate.

View File

@@ -219,8 +219,6 @@ pub struct SearchHit {
pub ranking_score: Option<u64>,
#[serde(rename = "_rankingScoreDetails", skip_serializing_if = "Option::is_none")]
pub ranking_score_details: Option<serde_json::Map<String, serde_json::Value>>,
#[serde(skip)]
pub ranking_score_raw: Vec<ScoreDetails>,
}
#[derive(Serialize, Debug, Clone, PartialEq)]
@@ -447,7 +445,6 @@ pub fn perform_search(
matches_position,
ranking_score_details,
ranking_score,
ranking_score_raw: score,
};
documents.push(hit);
}

View File

@@ -35,8 +35,7 @@ async fn formatted_contain_wildcard() {
"length": 5
}
]
},
"_rankingScore": "[score]"
}
}
"###);
}
@@ -53,8 +52,7 @@ async fn formatted_contain_wildcard() {
@r###"
{
"id": 852,
"cattos": "pésti",
"_rankingScore": "[score]"
"cattos": "pésti"
}
"###)
}
@@ -84,8 +82,7 @@ async fn formatted_contain_wildcard() {
"length": 5
}
]
},
"_rankingScore": "[score]"
}
}
"###)
}
@@ -107,8 +104,7 @@ async fn formatted_contain_wildcard() {
"_formatted": {
"id": "852",
"cattos": "pésti"
},
"_rankingScore": "[score]"
}
}
"###);
}
@@ -129,8 +125,7 @@ async fn formatted_contain_wildcard() {
"_formatted": {
"id": "852",
"cattos": "pésti"
},
"_rankingScore": "[score]"
}
}
"###)
}
@@ -164,8 +159,7 @@ async fn format_nested() {
"name": "buddy",
"age": 4
}
],
"_rankingScore": "[score]"
]
}
"###)
}
@@ -189,8 +183,7 @@ async fn format_nested() {
{
"name": "buddy"
}
],
"_rankingScore": "[score]"
]
}
"###)
}
@@ -223,8 +216,7 @@ async fn format_nested() {
"length": 5
}
]
},
"_rankingScore": "[score]"
}
}
"###)
}
@@ -250,8 +242,7 @@ async fn format_nested() {
"name": "buddy"
}
]
},
"_rankingScore": "[score]"
}
}
"###)
}
@@ -276,8 +267,7 @@ async fn format_nested() {
"name": "buddy"
}
]
},
"_rankingScore": "[score]"
}
}
"###)
}
@@ -312,8 +302,7 @@ async fn format_nested() {
"age": "4"
}
]
},
"_rankingScore": "[score]"
}
}
"###)
}
@@ -340,8 +329,7 @@ async fn format_nested() {
"age": "4"
}
]
},
"_rankingScore": "[score]"
}
}
"###)
}
@@ -371,8 +359,7 @@ async fn displayedattr_2_smol() {
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"_rankingScore": "[score]"
"id": 852
}
"###)
}
@@ -387,8 +374,7 @@ async fn displayedattr_2_smol() {
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"_rankingScore": "[score]"
"id": 852
}
"###)
}
@@ -406,8 +392,7 @@ async fn displayedattr_2_smol() {
"id": 852,
"_formatted": {
"id": "852"
},
"_rankingScore": "[score]"
}
}
"###)
}
@@ -425,8 +410,7 @@ async fn displayedattr_2_smol() {
"id": 852,
"_formatted": {
"id": "852"
},
"_rankingScore": "[score]"
}
}
"###)
}
@@ -446,8 +430,7 @@ async fn displayedattr_2_smol() {
"id": 852,
"_formatted": {
"id": "852"
},
"_rankingScore": "[score]"
}
}
"###)
}
@@ -463,8 +446,7 @@ async fn displayedattr_2_smol() {
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"_rankingScore": "[score]"
"id": 852
}
"###)
}
@@ -479,8 +461,7 @@ async fn displayedattr_2_smol() {
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"_rankingScore": "[score]"
"id": 852
}
"###)
}
@@ -493,11 +474,7 @@ async fn displayedattr_2_smol() {
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"_rankingScore": "[score]"
}
"###)
@"{}")
}
})
.await;
@@ -510,11 +487,7 @@ async fn displayedattr_2_smol() {
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"_rankingScore": "[score]"
}
"###)
@"{}")
}
}
@@ -533,8 +506,7 @@ async fn displayedattr_2_smol() {
{
"_formatted": {
"id": "852"
},
"_rankingScore": "[score]"
}
}
"###)
}
@@ -554,8 +526,7 @@ async fn displayedattr_2_smol() {
{
"_formatted": {
"id": "852"
},
"_rankingScore": "[score]"
}
}
"###)
}

View File

@@ -72,8 +72,7 @@ async fn simple_search_single_index() {
"hits": [
{
"title": "Gläss",
"id": "450465",
"_rankingScore": "[score]"
"id": "450465"
}
],
"query": "glass",
@@ -87,8 +86,7 @@ async fn simple_search_single_index() {
"hits": [
{
"title": "Captain Marvel",
"id": "299537",
"_rankingScore": "[score]"
"id": "299537"
}
],
"query": "captain",
@@ -179,8 +177,7 @@ async fn simple_search_two_indexes() {
"hits": [
{
"title": "Gläss",
"id": "450465",
"_rankingScore": "[score]"
"id": "450465"
}
],
"query": "glass",
@@ -206,8 +203,7 @@ async fn simple_search_two_indexes() {
"age": 4
}
],
"cattos": "pésti",
"_rankingScore": "[score]"
"cattos": "pésti"
},
{
"id": 654,
@@ -222,8 +218,7 @@ async fn simple_search_two_indexes() {
"cattos": [
"simba",
"pestiféré"
],
"_rankingScore": "[score]"
]
}
],
"query": "pésti",

View File

@@ -1,5 +1,3 @@
use std::cmp::Ordering;
use serde::Serialize;
use crate::distance_between_two_points;
@@ -17,36 +15,6 @@ pub enum ScoreDetails {
GeoSort(GeoSort),
}
impl PartialOrd for ScoreDetails {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
use ScoreDetails::*;
match (self, other) {
// matching left and right hands => defer to sub impl
(Words(left), Words(right)) => left.partial_cmp(right),
(Typo(left), Typo(right)) => left.partial_cmp(right),
(Proximity(left), Proximity(right)) => left.partial_cmp(right),
(Fid(left), Fid(right)) => left.partial_cmp(right),
(Position(left), Position(right)) => left.partial_cmp(right),
(ExactAttribute(left), ExactAttribute(right)) => left.partial_cmp(right),
(Exactness(left), Exactness(right)) => left.partial_cmp(right),
(Sort(left), Sort(right)) => left.partial_cmp(right),
(GeoSort(left), GeoSort(right)) => left.partial_cmp(right),
// non matching left and right hands => None
// written this way rather than with a single `_` arm, so that adding a new variant
// still results in a compile error
(Words(_), _) => None,
(Typo(_), _) => None,
(Proximity(_), _) => None,
(Fid(_), _) => None,
(Position(_), _) => None,
(ExactAttribute(_), _) => None,
(Exactness(_), _) => None,
(Sort(_), _) => None,
(GeoSort(_), _) => None,
}
}
}
impl ScoreDetails {
pub fn local_score(&self) -> Option<f64> {
self.rank().map(Rank::local_score)
@@ -201,57 +169,14 @@ impl ScoreDetails {
}
details_map
}
pub fn partial_cmp_iter<'a>(
mut left: impl Iterator<Item = &'a Self>,
mut right: impl Iterator<Item = &'a Self>,
) -> Result<Ordering, NotComparable> {
let mut index = 0;
let mut order = match (left.next(), right.next()) {
(Some(left), Some(right)) => left.partial_cmp(right).incomparable(index)?,
_ => return Ok(Ordering::Equal),
};
for (left, right) in left.zip(right) {
if order != Ordering::Equal {
return Ok(order);
};
index += 1;
order = left.partial_cmp(right).incomparable(index)?;
}
Ok(order)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct NotComparable(pub usize);
trait OptionToNotComparable<T> {
fn incomparable(self, index: usize) -> Result<T, NotComparable>;
}
impl<T> OptionToNotComparable<T> for Option<T> {
fn incomparable(self, index: usize) -> Result<T, NotComparable> {
match self {
Some(t) => Ok(t),
None => Err(NotComparable(index)),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Words {
pub matching_words: u32,
pub max_matching_words: u32,
}
impl PartialOrd for Words {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
(self.max_matching_words == other.max_matching_words)
.then(|| self.matching_words.cmp(&other.matching_words))
}
}
impl Words {
pub fn rank(&self) -> Rank {
Rank { rank: self.matching_words, max_rank: self.max_matching_words }
@@ -262,21 +187,12 @@ impl Words {
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Typo {
pub typo_count: u32,
pub max_typo_count: u32,
}
impl PartialOrd for Typo {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
(self.max_typo_count == other.max_typo_count).then(|| {
// the order is reverted as having fewer typos gives a better score
self.typo_count.cmp(&other.typo_count).reverse()
})
}
}
impl Typo {
pub fn rank(&self) -> Rank {
Rank {
@@ -297,7 +213,7 @@ impl Typo {
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Rank {
/// The ordinal rank, such that `max_rank` is the first rank, and 0 is the last rank.
///
@@ -310,12 +226,6 @@ pub struct Rank {
pub max_rank: u32,
}
impl PartialOrd for Rank {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
(self.max_rank == other.max_rank).then(|| self.rank.cmp(&other.rank))
}
}
impl Rank {
pub fn local_score(self) -> f64 {
self.rank as f64 / self.max_rank as f64
@@ -346,10 +256,9 @@ impl Rank {
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)]
#[serde(rename_all = "camelCase")]
pub enum ExactAttribute {
// Do not reorder as the order is significant, from least relevant to most relevant
NoExactMatch,
MatchesStart,
MatchesFull,
MatchesStart,
NoExactMatch,
}
impl ExactAttribute {
@@ -370,68 +279,13 @@ pub struct Sort {
pub value: serde_json::Value,
}
impl PartialOrd for Sort {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
if self.field_name != other.field_name {
return None;
}
if self.ascending != other.ascending {
return None;
}
match (&self.value, &other.value) {
(serde_json::Value::Null, serde_json::Value::Null) => Some(Ordering::Equal),
(serde_json::Value::Null, _) => Some(Ordering::Less),
(_, serde_json::Value::Null) => Some(Ordering::Greater),
// numbers are always before strings
(serde_json::Value::Number(_), serde_json::Value::String(_)) => Some(Ordering::Greater),
(serde_json::Value::String(_), serde_json::Value::Number(_)) => Some(Ordering::Less),
(serde_json::Value::Number(left), serde_json::Value::Number(right)) => {
//FIXME: unwrap permitted here?
let order = left.as_f64().unwrap().partial_cmp(&right.as_f64().unwrap())?;
// always reverted, as bigger is better
Some(if self.ascending { order.reverse() } else { order })
}
(serde_json::Value::String(left), serde_json::Value::String(right)) => {
let order = left.cmp(right);
Some(if self.ascending { order.reverse() } else { order })
}
_ => None,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
pub struct GeoSort {
pub target_point: [f64; 2],
pub ascending: bool,
pub value: Option<[f64; 2]>,
}
impl PartialOrd for GeoSort {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
if self.target_point != other.target_point {
return None;
}
if self.ascending != other.ascending {
return None;
}
Some(match (self.distance(), other.distance()) {
(None, None) => Ordering::Equal,
(None, Some(_)) => Ordering::Less,
(Some(_), None) => Ordering::Greater,
(Some(left), Some(right)) => {
let order = left.partial_cmp(&right)?;
if self.ascending {
// when ascending, the one with the smallest distance has the best score
order.reverse()
} else {
order
}
}
})
}
}
impl GeoSort {
pub fn distance(&self) -> Option<f64> {
self.value.map(|value| distance_between_two_points(&self.target_point, &value))
@@ -439,106 +293,3 @@ impl GeoSort {
}
const LINEAR_SCALE_FACTOR: f64 = 1000.0;
#[cfg(test)]
mod test {
use super::*;
#[test]
fn compare() {
let left = [
ScoreDetails::Words(Words { matching_words: 3, max_matching_words: 4 }),
ScoreDetails::Sort(Sort {
field_name: "doggo".into(),
ascending: true,
value: "Intel the Beagle".into(),
}),
];
let right = [
ScoreDetails::Words(Words { matching_words: 3, max_matching_words: 4 }),
ScoreDetails::Sort(Sort {
field_name: "doggo".into(),
ascending: true,
value: "Max the Labrador".into(),
}),
];
assert_eq!(
Ok(Ordering::Greater),
ScoreDetails::partial_cmp_iter(left.iter(), right.iter())
);
// equal when all the common components are equal
assert_eq!(
Ok(Ordering::Equal),
ScoreDetails::partial_cmp_iter(left[0..1].iter(), right.iter())
);
let right = [
ScoreDetails::Words(Words { matching_words: 4, max_matching_words: 4 }),
ScoreDetails::Sort(Sort {
field_name: "doggo".into(),
ascending: true,
value: "Max the Labrador".into(),
}),
];
assert_eq!(Ok(Ordering::Less), ScoreDetails::partial_cmp_iter(left.iter(), right.iter()));
}
#[test]
fn sort_not_comparable() {
let left = [
ScoreDetails::Words(Words { matching_words: 3, max_matching_words: 4 }),
ScoreDetails::Sort(Sort {
// not the same field name
field_name: "catto".into(),
ascending: true,
value: "Sylver the cat".into(),
}),
];
let right = [
ScoreDetails::Words(Words { matching_words: 3, max_matching_words: 4 }),
ScoreDetails::Sort(Sort {
field_name: "doggo".into(),
ascending: true,
value: "Max the Labrador".into(),
}),
];
assert_eq!(
Err(NotComparable(1)),
ScoreDetails::partial_cmp_iter(left.iter(), right.iter())
);
let left = [
ScoreDetails::Words(Words { matching_words: 3, max_matching_words: 4 }),
ScoreDetails::Sort(Sort {
field_name: "doggo".into(),
// Not the same order
ascending: false,
value: "Intel the Beagle".into(),
}),
];
let right = [
ScoreDetails::Words(Words { matching_words: 3, max_matching_words: 4 }),
ScoreDetails::Sort(Sort {
field_name: "doggo".into(),
ascending: true,
value: "Max the Labrador".into(),
}),
];
assert_eq!(
Err(NotComparable(1)),
ScoreDetails::partial_cmp_iter(left.iter(), right.iter())
);
}
#[test]
fn sort_behavior() {
let left = Sort { field_name: "price".into(), ascending: true, value: "5400".into() };
let right = Sort { field_name: "price".into(), ascending: true, value: 53.into() };
// number always better match than strings
assert_eq!(Some(Ordering::Less), left.partial_cmp(&right));
let left = Sort { field_name: "price".into(), ascending: false, value: "5400".into() };
let right = Sort { field_name: "price".into(), ascending: false, value: 53.into() };
// true regardless of the sort direction
assert_eq!(Some(Ordering::Less), left.partial_cmp(&right));
}
}