Compare commits

...

3 Commits

Author SHA1 Message Date
2495058a6e Fix the tests 2023-11-15 10:21:30 +01:00
4cfb48fbb6 Make the basic ranking rule boosting work 2023-11-08 14:47:35 +01:00
67dc0268c5 Rename Criterion/Criteria into RankingRules 2023-11-08 11:22:59 +01:00
33 changed files with 446 additions and 230 deletions

View File

@ -12,7 +12,7 @@ use milli::heed::EnvOpenOptions;
use milli::update::{
IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings,
};
use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy};
use milli::{Filter, Index, Object, RankingRule, TermsMatchingStrategy};
use serde_json::Value;
pub struct Conf<'a> {
@ -78,11 +78,11 @@ pub fn base_setup(conf: &Conf) -> Index {
if let Some(criterion) = conf.criterion {
builder.reset_filterable_fields();
builder.reset_criteria();
builder.reset_ranking_rules();
builder.reset_stop_words();
let criterion = criterion.iter().map(|s| Criterion::from_str(s).unwrap()).collect();
builder.set_criteria(criterion);
let criterion = criterion.iter().map(|s| RankingRule::from_str(s).unwrap()).collect();
builder.set_ranking_rules(criterion);
}
(conf.configure)(&mut builder);

View File

@ -9,7 +9,7 @@ use std::str::FromStr;
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
use fst::IntoStreamer;
use milli::update::Setting;
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
use milli::{Index, RankingRule, RankingRuleError, DEFAULT_VALUES_PER_FACET};
use serde::{Deserialize, Serialize, Serializer};
use crate::deserr::DeserrJsonError;
@ -117,10 +117,10 @@ pub struct PaginationSettings {
pub max_total_hits: Setting<usize>,
}
impl MergeWithError<milli::CriterionError> for DeserrJsonError<InvalidSettingsRankingRules> {
impl MergeWithError<milli::RankingRuleError> for DeserrJsonError<InvalidSettingsRankingRules> {
fn merge(
_self_: Option<Self>,
other: milli::CriterionError,
other: milli::RankingRuleError,
merge_location: ValuePointerRef,
) -> ControlFlow<Self, Self> {
Self::error::<Infallible>(
@ -344,9 +344,9 @@ pub fn apply_settings_to_builder(
match settings.ranking_rules {
Setting::Set(ref criteria) => {
builder.set_criteria(criteria.iter().map(|c| c.clone().into()).collect())
builder.set_ranking_rules(criteria.iter().map(|c| c.clone().into()).collect())
}
Setting::Reset => builder.reset_criteria(),
Setting::Reset => builder.reset_ranking_rules(),
Setting::NotSet => (),
}
@ -578,11 +578,13 @@ pub fn settings(
}
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
#[deserr(try_from(&String) = FromStr::from_str -> CriterionError)]
#[deserr(try_from(&String) = FromStr::from_str -> RankingRuleError)]
pub enum RankingRuleView {
/// Sorted by decreasing number of matched query terms.
/// Query words at the front of an attribute is considered better than if it was at the back.
Words,
/// Sorted by documents matching the given filter and then documents not matching it.
Boost(String),
/// Sorted by increasing number of typos.
Typo,
/// Sorted by increasing distance between matched query terms.
@ -605,7 +607,7 @@ impl Serialize for RankingRuleView {
where
S: Serializer,
{
serializer.serialize_str(&format!("{}", Criterion::from(self.clone())))
serializer.serialize_str(&format!("{}", RankingRule::from(self.clone())))
}
}
impl<'de> Deserialize<'de> for RankingRuleView {
@ -623,7 +625,7 @@ impl<'de> Deserialize<'de> for RankingRuleView {
where
E: serde::de::Error,
{
let criterion = Criterion::from_str(v).map_err(|_| {
let criterion = RankingRule::from_str(v).map_err(|_| {
E::invalid_value(serde::de::Unexpected::Str(v), &"a valid ranking rule")
})?;
Ok(RankingRuleView::from(criterion))
@ -633,42 +635,44 @@ impl<'de> Deserialize<'de> for RankingRuleView {
}
}
impl FromStr for RankingRuleView {
type Err = <Criterion as FromStr>::Err;
type Err = <RankingRule as FromStr>::Err;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(RankingRuleView::from(Criterion::from_str(s)?))
Ok(RankingRuleView::from(RankingRule::from_str(s)?))
}
}
impl fmt::Display for RankingRuleView {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
fmt::Display::fmt(&Criterion::from(self.clone()), f)
fmt::Display::fmt(&RankingRule::from(self.clone()), f)
}
}
impl From<Criterion> for RankingRuleView {
fn from(value: Criterion) -> Self {
impl From<RankingRule> for RankingRuleView {
fn from(value: RankingRule) -> Self {
match value {
Criterion::Words => RankingRuleView::Words,
Criterion::Typo => RankingRuleView::Typo,
Criterion::Proximity => RankingRuleView::Proximity,
Criterion::Attribute => RankingRuleView::Attribute,
Criterion::Sort => RankingRuleView::Sort,
Criterion::Exactness => RankingRuleView::Exactness,
Criterion::Asc(x) => RankingRuleView::Asc(x),
Criterion::Desc(x) => RankingRuleView::Desc(x),
RankingRule::Words => RankingRuleView::Words,
RankingRule::Boost(filter) => RankingRuleView::Boost(filter),
RankingRule::Typo => RankingRuleView::Typo,
RankingRule::Proximity => RankingRuleView::Proximity,
RankingRule::Attribute => RankingRuleView::Attribute,
RankingRule::Sort => RankingRuleView::Sort,
RankingRule::Exactness => RankingRuleView::Exactness,
RankingRule::Asc(x) => RankingRuleView::Asc(x),
RankingRule::Desc(x) => RankingRuleView::Desc(x),
}
}
}
impl From<RankingRuleView> for Criterion {
impl From<RankingRuleView> for RankingRule {
fn from(value: RankingRuleView) -> Self {
match value {
RankingRuleView::Words => Criterion::Words,
RankingRuleView::Typo => Criterion::Typo,
RankingRuleView::Proximity => Criterion::Proximity,
RankingRuleView::Attribute => Criterion::Attribute,
RankingRuleView::Sort => Criterion::Sort,
RankingRuleView::Exactness => Criterion::Exactness,
RankingRuleView::Asc(x) => Criterion::Asc(x),
RankingRuleView::Desc(x) => Criterion::Desc(x),
RankingRuleView::Words => RankingRule::Words,
RankingRuleView::Boost(filter) => RankingRule::Boost(filter),
RankingRuleView::Typo => RankingRule::Typo,
RankingRuleView::Proximity => RankingRule::Proximity,
RankingRuleView::Attribute => RankingRule::Attribute,
RankingRuleView::Sort => RankingRule::Sort,
RankingRuleView::Exactness => RankingRule::Exactness,
RankingRuleView::Asc(x) => RankingRule::Asc(x),
RankingRuleView::Desc(x) => RankingRule::Desc(x),
}
}
}

View File

@ -3,7 +3,7 @@ use heed::EnvOpenOptions;
// use maplit::hashset;
use milli::{
update::{IndexerConfig, Settings},
Criterion, Index,
Index, RankingRule,
};
fn main() {
@ -19,13 +19,13 @@ fn main() {
// builder.set_min_word_len_one_typo(5);
// builder.set_min_word_len_two_typos(7);
// builder.set_sortable_fields(hashset! { S("release_date") });
builder.set_criteria(vec![
Criterion::Words,
Criterion::Typo,
Criterion::Proximity,
Criterion::Attribute,
Criterion::Sort,
Criterion::Exactness,
builder.set_ranking_rules(vec![
RankingRule::Words,
RankingRule::Typo,
RankingRule::Proximity,
RankingRule::Attribute,
RankingRule::Sort,
RankingRule::Exactness,
]);
builder.execute(|_| (), || false).unwrap();

View File

@ -8,7 +8,7 @@ use thiserror::Error;
use crate::error::is_reserved_keyword;
use crate::search::facet::BadGeoError;
use crate::{CriterionError, Error, UserError};
use crate::{Error, RankingRuleError, UserError};
/// This error type is never supposed to be shown to the end user.
/// You must always cast it to a sort error or a criterion error.
@ -28,23 +28,23 @@ impl From<BadGeoError> for AscDescError {
}
}
impl From<AscDescError> for CriterionError {
impl From<AscDescError> for RankingRuleError {
fn from(error: AscDescError) -> Self {
match error {
AscDescError::GeoError(_) => {
CriterionError::ReservedNameForSort { name: "_geoPoint".to_string() }
RankingRuleError::ReservedNameForSort { name: "_geoPoint".to_string() }
}
AscDescError::InvalidSyntax { name } => CriterionError::InvalidName { name },
AscDescError::InvalidSyntax { name } => RankingRuleError::InvalidName { name },
AscDescError::ReservedKeyword { name } if name.starts_with("_geoPoint") => {
CriterionError::ReservedNameForSort { name: "_geoPoint".to_string() }
RankingRuleError::ReservedNameForSort { name: "_geoPoint".to_string() }
}
AscDescError::ReservedKeyword { name } if name.starts_with("_geoRadius") => {
CriterionError::ReservedNameForFilter { name: "_geoRadius".to_string() }
RankingRuleError::ReservedNameForFilter { name: "_geoRadius".to_string() }
}
AscDescError::ReservedKeyword { name } if name.starts_with("_geoBoundingBox") => {
CriterionError::ReservedNameForFilter { name: "_geoBoundingBox".to_string() }
RankingRuleError::ReservedNameForFilter { name: "_geoBoundingBox".to_string() }
}
AscDescError::ReservedKeyword { name } => CriterionError::ReservedName { name },
AscDescError::ReservedKeyword { name } => RankingRuleError::ReservedName { name },
}
}
}

38
milli/src/boost.rs Normal file
View File

@ -0,0 +1,38 @@
//! This module provides the `Boost` type and defines all the errors related to this type.
use std::str::FromStr;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use crate::RankingRuleError;
/// This error type is never supposed to be shown to the end user.
/// You must always cast it to a sort error or a criterion error.
#[derive(Error, Debug)]
pub enum BoostError {
#[error("Invalid syntax for the boost parameter: expected expression starting with `boost:`, found `{name}`.")]
InvalidSyntax { name: String },
}
impl From<BoostError> for RankingRuleError {
fn from(error: BoostError) -> Self {
match error {
BoostError::InvalidSyntax { name } => RankingRuleError::InvalidName { name },
}
}
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct Boost(pub String);
impl FromStr for Boost {
type Err = BoostError;
fn from_str(text: &str) -> Result<Boost, Self::Err> {
match text.split_once(':') {
Some(("boost", right)) => Ok(Boost(right.to_string())), // TODO check filter validity
_ => Err(BoostError::InvalidSyntax { name: text.to_string() }),
}
}
}

View File

@ -9,7 +9,7 @@ use serde_json::Value;
use thiserror::Error;
use crate::documents::{self, DocumentsBatchCursorError};
use crate::{CriterionError, DocumentId, FieldId, Object, SortError};
use crate::{DocumentId, FieldId, Object, RankingRuleError, SortError};
pub fn is_reserved_keyword(keyword: &str) -> bool {
["_geo", "_geoDistance", "_geoPoint", "_geoRadius", "_geoBoundingBox"].contains(&keyword)
@ -94,7 +94,7 @@ pub enum UserError {
#[error("A document cannot contain more than 65,535 fields.")]
AttributeLimitReached,
#[error(transparent)]
CriterionError(#[from] CriterionError),
CriterionError(#[from] RankingRuleError),
#[error("Maximum number of documents reached.")]
DocumentLimitReached,
#[error(
@ -280,7 +280,7 @@ error_from_sub_error! {
ThreadPoolBuildError => InternalError,
SerializationError => InternalError,
GeoError => UserError,
CriterionError => UserError,
RankingRuleError => UserError,
}
impl<E> From<grenad::Error<E>> for Error

View File

@ -25,10 +25,9 @@ use crate::heed_codec::{
};
use crate::readable_slices::ReadableSlices;
use crate::{
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
OrderBy, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16,
BEU32,
default_criteria, CboRoaringBitmapCodec, DocumentId, ExternalDocumentsIds, FacetDistribution,
FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec, OrderBy, RankingRule,
Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32,
};
/// The HNSW data-structure that we serialize, fill and search in.
@ -895,7 +894,7 @@ impl Index {
let distinct_field = self.distinct_field(rtxn)?;
let asc_desc_fields =
self.criteria(rtxn)?.into_iter().filter_map(|criterion| match criterion {
Criterion::Asc(field) | Criterion::Desc(field) => Some(field),
RankingRule::Asc(field) | RankingRule::Desc(field) => Some(field),
_otherwise => None,
});
@ -1023,17 +1022,17 @@ impl Index {
pub(crate) fn put_criteria(
&self,
wtxn: &mut RwTxn,
criteria: &[Criterion],
criteria: &[RankingRule],
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, main_key::CRITERIA_KEY, &criteria)
self.main.put::<_, Str, SerdeJson<&[RankingRule]>>(wtxn, main_key::CRITERIA_KEY, &criteria)
}
pub(crate) fn delete_criteria(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::CRITERIA_KEY)
}
pub fn criteria(&self, rtxn: &RoTxn) -> heed::Result<Vec<Criterion>> {
match self.main.get::<_, Str, SerdeJson<Vec<Criterion>>>(rtxn, main_key::CRITERIA_KEY)? {
pub fn criteria(&self, rtxn: &RoTxn) -> heed::Result<Vec<RankingRule>> {
match self.main.get::<_, Str, SerdeJson<Vec<RankingRule>>>(rtxn, main_key::CRITERIA_KEY)? {
Some(criteria) => Ok(criteria),
None => Ok(default_criteria()),
}

View File

@ -9,7 +9,7 @@ pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
pub mod documents;
mod asc_desc;
mod criterion;
mod boost;
pub mod distance;
mod error;
mod external_documents_ids;
@ -18,6 +18,7 @@ mod fields_ids_map;
pub mod heed_codec;
pub mod index;
pub mod proximity;
mod ranking_rule;
mod readable_slices;
pub mod score_details;
mod search;
@ -44,7 +45,6 @@ use serde_json::Value;
pub use {charabia as tokenizer, heed};
pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
pub use self::criterion::{default_criteria, Criterion, CriterionError};
pub use self::error::{
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
};
@ -57,6 +57,7 @@ pub use self::heed_codec::{
UncheckedU8StrStrCodec,
};
pub use self::index::Index;
pub use self::ranking_rule::{default_criteria, RankingRule, RankingRuleError};
pub use self::search::{
FacetDistribution, FacetValueHit, Filter, FormatOptions, MatchBounds, MatcherBuilder,
MatchingWords, OrderBy, Search, SearchForFacetValues, SearchResult, TermsMatchingStrategy,

View File

@ -4,10 +4,11 @@ use std::str::FromStr;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use crate::{AscDesc, Member};
use crate::boost::{Boost, BoostError};
use crate::{AscDesc, AscDescError, Member};
#[derive(Error, Debug)]
pub enum CriterionError {
pub enum RankingRuleError {
#[error("`{name}` ranking rule is invalid. Valid ranking rules are words, typo, sort, proximity, attribute, exactness and custom ranking rules.")]
InvalidName { name: String },
#[error("`{name}` is a reserved keyword and thus can't be used as a ranking rule")]
@ -25,10 +26,12 @@ pub enum CriterionError {
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Criterion {
pub enum RankingRule {
/// Sorted by decreasing number of matched query terms.
/// Query words at the front of an attribute is considered better than if it was at the back.
Words,
/// Sorted by documents matching the given filter and then documents not matching it.
Boost(String),
/// Sorted by increasing number of typos.
Typo,
/// Sorted by increasing distance between matched query terms.
@ -47,62 +50,76 @@ pub enum Criterion {
Desc(String),
}
impl Criterion {
impl RankingRule {
/// Returns the field name parameter of this criterion.
pub fn field_name(&self) -> Option<&str> {
match self {
Criterion::Asc(name) | Criterion::Desc(name) => Some(name),
RankingRule::Asc(name) | RankingRule::Desc(name) => Some(name),
_otherwise => None,
}
}
}
impl FromStr for Criterion {
type Err = CriterionError;
impl FromStr for RankingRule {
type Err = RankingRuleError;
fn from_str(text: &str) -> Result<Criterion, Self::Err> {
fn from_str(text: &str) -> Result<RankingRule, Self::Err> {
match text {
"words" => Ok(Criterion::Words),
"typo" => Ok(Criterion::Typo),
"proximity" => Ok(Criterion::Proximity),
"attribute" => Ok(Criterion::Attribute),
"sort" => Ok(Criterion::Sort),
"exactness" => Ok(Criterion::Exactness),
text => match AscDesc::from_str(text)? {
AscDesc::Asc(Member::Field(field)) => Ok(Criterion::Asc(field)),
AscDesc::Desc(Member::Field(field)) => Ok(Criterion::Desc(field)),
AscDesc::Asc(Member::Geo(_)) | AscDesc::Desc(Member::Geo(_)) => {
Err(CriterionError::ReservedNameForSort { name: "_geoPoint".to_string() })?
}
"words" => Ok(RankingRule::Words),
"typo" => Ok(RankingRule::Typo),
"proximity" => Ok(RankingRule::Proximity),
"attribute" => Ok(RankingRule::Attribute),
"sort" => Ok(RankingRule::Sort),
"exactness" => Ok(RankingRule::Exactness),
text => match (AscDesc::from_str(text), Boost::from_str(text)) {
(Ok(asc_desc), _) => match asc_desc {
AscDesc::Asc(Member::Field(field)) => Ok(RankingRule::Asc(field)),
AscDesc::Desc(Member::Field(field)) => Ok(RankingRule::Desc(field)),
AscDesc::Asc(Member::Geo(_)) | AscDesc::Desc(Member::Geo(_)) => {
Err(RankingRuleError::ReservedNameForSort {
name: "_geoPoint".to_string(),
})?
}
},
(_, Ok(Boost(filter))) => Ok(RankingRule::Boost(filter)),
(
Err(AscDescError::InvalidSyntax { name: asc_desc_name }),
Err(BoostError::InvalidSyntax { name: boost_name }),
) => Err(RankingRuleError::InvalidName {
// TODO improve the error message quality
name: format!("{asc_desc_name} {boost_name}"),
}),
(Err(asc_desc_error), _) => Err(asc_desc_error.into()),
},
}
}
}
pub fn default_criteria() -> Vec<Criterion> {
pub fn default_criteria() -> Vec<RankingRule> {
vec![
Criterion::Words,
Criterion::Typo,
Criterion::Proximity,
Criterion::Attribute,
Criterion::Sort,
Criterion::Exactness,
RankingRule::Words,
RankingRule::Typo,
RankingRule::Proximity,
RankingRule::Attribute,
RankingRule::Sort,
RankingRule::Exactness,
]
}
impl fmt::Display for Criterion {
impl fmt::Display for RankingRule {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use Criterion::*;
use RankingRule::*;
match self {
Words => f.write_str("words"),
Boost(filter) => write!(f, "boost:{filter}"),
Typo => f.write_str("typo"),
Proximity => f.write_str("proximity"),
Attribute => f.write_str("attribute"),
Sort => f.write_str("sort"),
Exactness => f.write_str("exactness"),
Asc(attr) => write!(f, "{}:asc", attr),
Desc(attr) => write!(f, "{}:desc", attr),
Asc(attr) => write!(f, "{attr}:asc"),
Desc(attr) => write!(f, "{attr}:desc"),
}
}
}
@ -110,29 +127,29 @@ impl fmt::Display for Criterion {
#[cfg(test)]
mod tests {
use big_s::S;
use CriterionError::*;
use RankingRuleError::*;
use super::*;
#[test]
fn parse_criterion() {
let valid_criteria = [
("words", Criterion::Words),
("typo", Criterion::Typo),
("proximity", Criterion::Proximity),
("attribute", Criterion::Attribute),
("sort", Criterion::Sort),
("exactness", Criterion::Exactness),
("price:asc", Criterion::Asc(S("price"))),
("price:desc", Criterion::Desc(S("price"))),
("price:asc:desc", Criterion::Desc(S("price:asc"))),
("truc:machin:desc", Criterion::Desc(S("truc:machin"))),
("hello-world!:desc", Criterion::Desc(S("hello-world!"))),
("it's spacy over there:asc", Criterion::Asc(S("it's spacy over there"))),
("words", RankingRule::Words),
("typo", RankingRule::Typo),
("proximity", RankingRule::Proximity),
("attribute", RankingRule::Attribute),
("sort", RankingRule::Sort),
("exactness", RankingRule::Exactness),
("price:asc", RankingRule::Asc(S("price"))),
("price:desc", RankingRule::Desc(S("price"))),
("price:asc:desc", RankingRule::Desc(S("price:asc"))),
("truc:machin:desc", RankingRule::Desc(S("truc:machin"))),
("hello-world!:desc", RankingRule::Desc(S("hello-world!"))),
("it's spacy over there:asc", RankingRule::Asc(S("it's spacy over there"))),
];
for (input, expected) in valid_criteria {
let res = input.parse::<Criterion>();
let res = input.parse::<RankingRule>();
assert!(
res.is_ok(),
"Failed to parse `{}`, was expecting `{:?}` but instead got `{:?}`",
@ -167,7 +184,7 @@ mod tests {
];
for (input, expected) in invalid_criteria {
let res = input.parse::<Criterion>();
let res = input.parse::<RankingRule>();
assert!(
res.is_err(),
"Should no be able to parse `{}`, was expecting an error but instead got: `{:?}`",

View File

@ -5,6 +5,7 @@ use crate::distance_between_two_points;
#[derive(Debug, Clone, PartialEq)]
pub enum ScoreDetails {
Words(Words),
Boost(Boost),
Typo(Typo),
Proximity(Rank),
Fid(Rank),
@ -23,6 +24,7 @@ impl ScoreDetails {
pub fn rank(&self) -> Option<Rank> {
match self {
ScoreDetails::Words(details) => Some(details.rank()),
ScoreDetails::Boost(_) => None,
ScoreDetails::Typo(details) => Some(details.rank()),
ScoreDetails::Proximity(details) => Some(*details),
ScoreDetails::Fid(details) => Some(*details),
@ -60,6 +62,14 @@ impl ScoreDetails {
details_map.insert("words".into(), words_details);
order += 1;
}
ScoreDetails::Boost(Boost { filter, matching }) => {
let sort = format!("boost:{}", filter);
let sort_details = serde_json::json!({
"value": matching,
});
details_map.insert(sort, sort_details);
order += 1;
}
ScoreDetails::Typo(typo) => {
let typo_details = serde_json::json!({
"order": order,
@ -221,6 +231,12 @@ impl Words {
}
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Boost {
pub filter: String,
pub matching: bool,
}
/// Structure that is super similar to [`Words`], but whose semantics is a bit distinct.
///
/// In exactness, the number of matching words can actually be 0 with a non-zero score,

View File

@ -0,0 +1,88 @@
use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext};
use crate::score_details::{self, ScoreDetails};
use crate::{Filter, Result};
pub struct Boost<Query> {
original_expression: String,
original_query: Option<Query>,
matching: Option<RankingRuleOutput<Query>>,
non_matching: Option<RankingRuleOutput<Query>>,
}
impl<Query> Boost<Query> {
pub fn new(expression: String) -> Result<Self> {
Ok(Self {
original_expression: expression,
original_query: None,
matching: None,
non_matching: None,
})
}
}
impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Boost<Query> {
fn id(&self) -> String {
// TODO improve this
let Self { original_expression, .. } = self;
format!("boost:{original_expression}")
}
fn start_iteration(
&mut self,
ctx: &mut SearchContext<'ctx>,
_logger: &mut dyn SearchLogger<Query>,
parent_candidates: &RoaringBitmap,
parent_query: &Query,
) -> Result<()> {
let universe_matching = match Filter::from_str(&self.original_expression)? {
Some(filter) => filter.evaluate(ctx.txn, ctx.index)?,
None => RoaringBitmap::default(),
};
let matching = parent_candidates & universe_matching;
let non_matching = parent_candidates - &matching;
self.original_query = Some(parent_query.clone());
self.matching = Some(RankingRuleOutput {
query: parent_query.clone(),
candidates: matching,
score: ScoreDetails::Boost(score_details::Boost {
filter: self.original_expression.clone(),
matching: true,
}),
});
self.non_matching = Some(RankingRuleOutput {
query: parent_query.clone(),
candidates: non_matching,
score: ScoreDetails::Boost(score_details::Boost {
filter: self.original_expression.clone(),
matching: false,
}),
});
Ok(())
}
fn next_bucket(
&mut self,
_ctx: &mut SearchContext<'ctx>,
_logger: &mut dyn SearchLogger<Query>,
_universe: &RoaringBitmap,
) -> Result<Option<RankingRuleOutput<Query>>> {
Ok(self.matching.take().or_else(|| self.non_matching.take()))
}
fn end_iteration(
&mut self,
_ctx: &mut SearchContext<'ctx>,
_logger: &mut dyn SearchLogger<Query>,
) {
self.original_query = None;
self.matching = None;
self.non_matching = None;
}
}

View File

@ -14,6 +14,7 @@ mod ranking_rules;
mod resolve_query_graph;
mod small_bitmap;
mod boost;
mod exact_attribute;
mod sort;
@ -22,6 +23,7 @@ mod tests;
use std::collections::HashSet;
use boost::Boost;
use bucket_sort::{bucket_sort, BucketSortOutput};
use charabia::TokenizerBuilder;
use db_cache::DatabaseCache;
@ -203,12 +205,13 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
for rr in settings_ranking_rules {
match rr {
// These rules need a query to have an effect; ignore them in placeholder search
crate::Criterion::Words
| crate::Criterion::Typo
| crate::Criterion::Attribute
| crate::Criterion::Proximity
| crate::Criterion::Exactness => continue,
crate::Criterion::Sort => {
crate::RankingRule::Words
| crate::RankingRule::Typo
| crate::RankingRule::Attribute
| crate::RankingRule::Proximity
| crate::RankingRule::Exactness => continue,
crate::RankingRule::Boost(filter) => ranking_rules.push(Box::new(Boost::new(filter)?)),
crate::RankingRule::Sort => {
if sort {
continue;
}
@ -222,14 +225,14 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
)?;
sort = true;
}
crate::Criterion::Asc(field_name) => {
crate::RankingRule::Asc(field_name) => {
if sorted_fields.contains(&field_name) {
continue;
}
sorted_fields.insert(field_name.clone());
ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, true)?));
}
crate::Criterion::Desc(field_name) => {
crate::RankingRule::Desc(field_name) => {
if sorted_fields.contains(&field_name) {
continue;
}
@ -268,10 +271,10 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
for rr in settings_ranking_rules {
// Add Words before any of: typo, proximity, attribute
match rr {
crate::Criterion::Typo
| crate::Criterion::Attribute
| crate::Criterion::Proximity
| crate::Criterion::Exactness => {
crate::RankingRule::Typo
| crate::RankingRule::Attribute
| crate::RankingRule::Proximity
| crate::RankingRule::Exactness => {
if !words {
ranking_rules.push(Box::new(Words::new(terms_matching_strategy)));
words = true;
@ -280,28 +283,31 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
_ => {}
}
match rr {
crate::Criterion::Words => {
crate::RankingRule::Words => {
if words {
continue;
}
ranking_rules.push(Box::new(Words::new(terms_matching_strategy)));
words = true;
}
crate::Criterion::Typo => {
crate::RankingRule::Boost(filter) => {
ranking_rules.push(Box::new(Boost::new(filter)?));
}
crate::RankingRule::Typo => {
if typo {
continue;
}
typo = true;
ranking_rules.push(Box::new(Typo::new(None)));
}
crate::Criterion::Proximity => {
crate::RankingRule::Proximity => {
if proximity {
continue;
}
proximity = true;
ranking_rules.push(Box::new(Proximity::new(None)));
}
crate::Criterion::Attribute => {
crate::RankingRule::Attribute => {
if attribute {
continue;
}
@ -309,7 +315,7 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
ranking_rules.push(Box::new(Fid::new(None)));
ranking_rules.push(Box::new(Position::new(None)));
}
crate::Criterion::Sort => {
crate::RankingRule::Sort => {
if sort {
continue;
}
@ -323,7 +329,7 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
)?;
sort = true;
}
crate::Criterion::Exactness => {
crate::RankingRule::Exactness => {
if exactness {
continue;
}
@ -331,14 +337,15 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
ranking_rules.push(Box::new(Exactness::new()));
exactness = true;
}
crate::Criterion::Asc(field_name) => {
crate::RankingRule::Asc(field_name) => {
// TODO Question: Why would it be invalid to sort price:asc, typo, price:desc?
if sorted_fields.contains(&field_name) {
continue;
}
sorted_fields.insert(field_name.clone());
ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, true)?));
}
crate::Criterion::Desc(field_name) => {
crate::RankingRule::Desc(field_name) => {
if sorted_fields.contains(&field_name) {
continue;
}
@ -580,7 +587,8 @@ fn check_sort_criteria(ctx: &SearchContext, sort_criteria: Option<&Vec<AscDesc>>
// We check that the sort ranking rule exists and throw an
// error if we try to use it and that it doesn't.
let sort_ranking_rule_missing = !ctx.index.criteria(ctx.txn)?.contains(&crate::Criterion::Sort);
let sort_ranking_rule_missing =
!ctx.index.criteria(ctx.txn)?.contains(&crate::RankingRule::Sort);
if sort_ranking_rule_missing {
return Err(UserError::SortRankingRuleMissing.into());
}

View File

@ -1,5 +1,5 @@
use crate::index::tests::TempIndex;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@ -12,7 +12,7 @@ fn create_index() -> TempIndex {
"description".to_owned(),
"plot".to_owned(),
]);
s.set_criteria(vec![Criterion::Attribute]);
s.set_ranking_rules(vec![RankingRule::Attribute]);
})
.unwrap();

View File

@ -1,5 +1,5 @@
use crate::index::tests::TempIndex;
use crate::{db_snap, Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{db_snap, RankingRule, Search, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@ -12,7 +12,7 @@ fn create_index() -> TempIndex {
"text2".to_owned(),
"other".to_owned(),
]);
s.set_criteria(vec![Criterion::Attribute]);
s.set_ranking_rules(vec![RankingRule::Attribute]);
})
.unwrap();

View File

@ -19,7 +19,7 @@ use maplit::hashset;
use super::collect_field_values;
use crate::index::tests::TempIndex;
use crate::{AscDesc, Criterion, Index, Member, Search, SearchResult, TermsMatchingStrategy};
use crate::{AscDesc, Index, Member, RankingRule, Search, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@ -30,7 +30,7 @@ fn create_index() -> TempIndex {
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_sortable_fields(hashset! { S("rank1"), S("letter") });
s.set_distinct_field("letter".to_owned());
s.set_criteria(vec![Criterion::Words]);
s.set_ranking_rules(vec![RankingRule::Words]);
})
.unwrap();
@ -252,7 +252,7 @@ fn test_distinct_placeholder_sort() {
let index = create_index();
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Sort]);
s.set_ranking_rules(vec![RankingRule::Sort]);
})
.unwrap();
@ -387,7 +387,7 @@ fn test_distinct_words() {
let index = create_index();
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Words]);
s.set_ranking_rules(vec![RankingRule::Words]);
})
.unwrap();
@ -440,7 +440,11 @@ fn test_distinct_sort_words() {
let index = create_index();
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Sort, Criterion::Words, Criterion::Desc(S("rank1"))]);
s.set_ranking_rules(vec![
RankingRule::Sort,
RankingRule::Words,
RankingRule::Desc(S("rank1")),
]);
})
.unwrap();
@ -513,7 +517,7 @@ fn test_distinct_all_candidates() {
let index = create_index();
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Sort]);
s.set_ranking_rules(vec![RankingRule::Sort]);
})
.unwrap();
@ -536,7 +540,7 @@ fn test_distinct_typo() {
let index = create_index();
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Words, Criterion::Typo]);
s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Typo]);
})
.unwrap();

View File

@ -21,7 +21,7 @@ Then these rules will only work with
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
fn create_index_simple_ordered() -> TempIndex {
let index = TempIndex::new();
@ -30,7 +30,7 @@ fn create_index_simple_ordered() -> TempIndex {
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_criteria(vec![Criterion::Exactness]);
s.set_ranking_rules(vec![RankingRule::Exactness]);
})
.unwrap();
@ -89,7 +89,7 @@ fn create_index_simple_reversed() -> TempIndex {
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_criteria(vec![Criterion::Exactness]);
s.set_ranking_rules(vec![RankingRule::Exactness]);
})
.unwrap();
@ -147,7 +147,7 @@ fn create_index_simple_random() -> TempIndex {
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_criteria(vec![Criterion::Exactness]);
s.set_ranking_rules(vec![RankingRule::Exactness]);
})
.unwrap();
@ -201,7 +201,7 @@ fn create_index_attribute_starts_with() -> TempIndex {
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_criteria(vec![Criterion::Exactness]);
s.set_ranking_rules(vec![RankingRule::Exactness]);
})
.unwrap();
@ -251,7 +251,7 @@ fn create_index_simple_ordered_with_typos() -> TempIndex {
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_criteria(vec![Criterion::Exactness]);
s.set_ranking_rules(vec![RankingRule::Exactness]);
})
.unwrap();
@ -350,7 +350,11 @@ fn create_index_with_varying_proximities() -> TempIndex {
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Proximity]);
s.set_ranking_rules(vec![
RankingRule::Exactness,
RankingRule::Words,
RankingRule::Proximity,
]);
})
.unwrap();
@ -404,7 +408,7 @@ fn create_index_with_typo_and_prefix() -> TempIndex {
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_criteria(vec![Criterion::Exactness]);
s.set_ranking_rules(vec![RankingRule::Exactness]);
})
.unwrap();
@ -442,7 +446,11 @@ fn create_index_all_equal_except_proximity_between_ignored_terms() -> TempIndex
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Proximity]);
s.set_ranking_rules(vec![
RankingRule::Exactness,
RankingRule::Words,
RankingRule::Proximity,
]);
})
.unwrap();
@ -698,7 +706,7 @@ fn test_exactness_after_words() {
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Words, Criterion::Exactness]);
s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Exactness]);
})
.unwrap();
@ -747,7 +755,7 @@ fn test_words_after_exactness() {
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Exactness, Criterion::Words]);
s.set_ranking_rules(vec![RankingRule::Exactness, RankingRule::Words]);
})
.unwrap();
@ -796,7 +804,11 @@ fn test_proximity_after_exactness() {
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Proximity]);
s.set_ranking_rules(vec![
RankingRule::Exactness,
RankingRule::Words,
RankingRule::Proximity,
]);
})
.unwrap();
@ -834,7 +846,11 @@ fn test_proximity_after_exactness() {
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Proximity]);
s.set_ranking_rules(vec![
RankingRule::Exactness,
RankingRule::Words,
RankingRule::Proximity,
]);
})
.unwrap();
@ -868,7 +884,11 @@ fn test_exactness_followed_by_typo_prefer_no_typo_prefix() {
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Typo]);
s.set_ranking_rules(vec![
RankingRule::Exactness,
RankingRule::Words,
RankingRule::Typo,
]);
})
.unwrap();
@ -904,7 +924,11 @@ fn test_typo_followed_by_exactness() {
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Words, Criterion::Typo, Criterion::Exactness]);
s.set_ranking_rules(vec![
RankingRule::Words,
RankingRule::Typo,
RankingRule::Exactness,
]);
})
.unwrap();

View File

@ -9,7 +9,7 @@ use maplit::hashset;
use crate::index::tests::TempIndex;
use crate::score_details::ScoreDetails;
use crate::search::new::tests::collect_field_values;
use crate::{AscDesc, Criterion, GeoSortStrategy, Member, Search, SearchResult};
use crate::{AscDesc, GeoSortStrategy, Member, RankingRule, Search, SearchResult};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@ -18,7 +18,7 @@ fn create_index() -> TempIndex {
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_sortable_fields(hashset! { S("_geo") });
s.set_criteria(vec![Criterion::Words, Criterion::Sort]);
s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Sort]);
})
.unwrap();
index

View File

@ -6,10 +6,10 @@ use maplit::{btreemap, hashset};
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use crate::{db_snap, Criterion, Index, Object};
use crate::{db_snap, Index, Object, RankingRule};
pub const CONTENT: &str = include_str!("../../../../tests/assets/test_set.ndjson");
pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
pub fn setup_search_index_with_criteria(criteria: &[RankingRule]) -> Index {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
@ -20,7 +20,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
let mut builder = Settings::new(&mut wtxn, &index, &config);
builder.set_criteria(criteria.to_vec());
builder.set_ranking_rules(criteria.to_vec());
builder.set_filterable_fields(hashset! {
S("tag"),
S("asc_desc_rank"),
@ -70,6 +70,6 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
#[test]
fn snapshot_integration_dataset() {
let index = setup_search_index_with_criteria(&[Criterion::Attribute]);
let index = setup_search_index_with_criteria(&[RankingRule::Attribute]);
db_snap!(index, word_position_docids, @"3c9347a767bceef3beb31465f1e5f3ae");
}

View File

@ -19,7 +19,7 @@ This module tests the following properties:
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@ -28,7 +28,7 @@ fn create_index() -> TempIndex {
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_criteria(vec![Criterion::Words]);
s.set_ranking_rules(vec![RankingRule::Words]);
})
.unwrap();

View File

@ -19,7 +19,7 @@ use std::collections::BTreeMap;
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
fn create_simple_index() -> TempIndex {
let index = TempIndex::new();
@ -28,7 +28,7 @@ fn create_simple_index() -> TempIndex {
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Proximity]);
})
.unwrap();
@ -94,7 +94,7 @@ fn create_edge_cases_index() -> TempIndex {
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Proximity]);
})
.unwrap();

View File

@ -8,7 +8,7 @@ implemented.
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@ -17,7 +17,11 @@ fn create_index() -> TempIndex {
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_criteria(vec![Criterion::Words, Criterion::Proximity, Criterion::Typo]);
s.set_ranking_rules(vec![
RankingRule::Words,
RankingRule::Proximity,
RankingRule::Typo,
]);
})
.unwrap();

View File

@ -17,7 +17,7 @@ use maplit::hashset;
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{
score_details, AscDesc, Criterion, Member, Search, SearchResult, TermsMatchingStrategy,
score_details, AscDesc, Member, RankingRule, Search, SearchResult, TermsMatchingStrategy,
};
fn create_index() -> TempIndex {
@ -28,7 +28,7 @@ fn create_index() -> TempIndex {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_sortable_fields(hashset! { S("rank"), S("vague"), S("letter") });
s.set_criteria(vec![Criterion::Sort]);
s.set_ranking_rules(vec![RankingRule::Sort]);
})
.unwrap();
@ -331,7 +331,7 @@ fn test_redacted() {
.update_settings(|s| {
s.set_displayed_fields(vec!["text".to_owned(), "vague".to_owned()]);
s.set_sortable_fields(hashset! { S("rank"), S("vague"), S("letter") });
s.set_criteria(vec![Criterion::Sort]);
s.set_ranking_rules(vec![RankingRule::Sort]);
})
.unwrap();

View File

@ -22,7 +22,7 @@ use std::collections::BTreeMap;
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@ -31,7 +31,7 @@ fn create_index() -> TempIndex {
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_criteria(vec![Criterion::Words]);
s.set_ranking_rules(vec![RankingRule::Words]);
})
.unwrap();
@ -457,7 +457,7 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {
let index = create_index();
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Typo]);
s.set_ranking_rules(vec![RankingRule::Typo]);
})
.unwrap();
@ -495,7 +495,7 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Words, Criterion::Typo]);
s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Typo]);
})
.unwrap();
@ -540,7 +540,7 @@ fn test_typo_bucketing() {
drop(txn);
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Typo]);
s.set_ranking_rules(vec![RankingRule::Typo]);
})
.unwrap();
let txn = index.read_txn().unwrap();
@ -589,7 +589,7 @@ fn test_typo_synonyms() {
let index = create_index();
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Typo]);
s.set_ranking_rules(vec![RankingRule::Typo]);
let mut synonyms = BTreeMap::new();
synonyms.insert("lackadaisical".to_owned(), vec!["lazy".to_owned()]);

View File

@ -17,7 +17,7 @@ because the typo ranking rule before it only used the derivation `beautiful`.
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@ -26,7 +26,11 @@ fn create_index() -> TempIndex {
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_criteria(vec![Criterion::Words, Criterion::Typo, Criterion::Proximity]);
s.set_ranking_rules(vec![
RankingRule::Words,
RankingRule::Typo,
RankingRule::Proximity,
]);
})
.unwrap();

View File

@ -14,7 +14,7 @@ account by the proximity ranking rule.
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@ -23,7 +23,7 @@ fn create_index() -> TempIndex {
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_criteria(vec![Criterion::Words]);
s.set_ranking_rules(vec![RankingRule::Words]);
})
.unwrap();
@ -265,7 +265,7 @@ fn test_words_proximity_tms_last_simple() {
let index = create_index();
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Proximity]);
})
.unwrap();
@ -346,7 +346,7 @@ fn test_words_proximity_tms_last_phrase() {
let index = create_index();
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Proximity]);
})
.unwrap();
@ -416,7 +416,7 @@ fn test_words_tms_all() {
let index = create_index();
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Proximity]);
})
.unwrap();

View File

@ -9,9 +9,9 @@ use time::OffsetDateTime;
use super::index_documents::{IndexDocumentsConfig, Transform};
use super::IndexerConfig;
use crate::criterion::Criterion;
use crate::error::UserError;
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
use crate::ranking_rule::RankingRule;
use crate::update::index_documents::IndexDocumentsMethod;
use crate::update::{IndexDocuments, UpdateIndexingStep};
use crate::{FieldsIdsMap, Index, OrderBy, Result};
@ -110,7 +110,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
displayed_fields: Setting<Vec<String>>,
filterable_fields: Setting<HashSet<String>>,
sortable_fields: Setting<HashSet<String>>,
criteria: Setting<Vec<Criterion>>,
ranking_rules: Setting<Vec<RankingRule>>,
stop_words: Setting<BTreeSet<String>>,
non_separator_tokens: Setting<BTreeSet<String>>,
separator_tokens: Setting<BTreeSet<String>>,
@ -142,7 +142,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
displayed_fields: Setting::NotSet,
filterable_fields: Setting::NotSet,
sortable_fields: Setting::NotSet,
criteria: Setting::NotSet,
ranking_rules: Setting::NotSet,
stop_words: Setting::NotSet,
non_separator_tokens: Setting::NotSet,
separator_tokens: Setting::NotSet,
@ -194,12 +194,12 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
self.sortable_fields = Setting::Reset;
}
pub fn reset_criteria(&mut self) {
self.criteria = Setting::Reset;
pub fn reset_ranking_rules(&mut self) {
self.ranking_rules = Setting::Reset;
}
pub fn set_criteria(&mut self, criteria: Vec<Criterion>) {
self.criteria = Setting::Set(criteria);
pub fn set_ranking_rules(&mut self, ranking_rules: Vec<RankingRule>) {
self.ranking_rules = Setting::Set(ranking_rules);
}
pub fn reset_stop_words(&mut self) {
@ -696,7 +696,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
}
fn update_criteria(&mut self) -> Result<()> {
match &self.criteria {
match &self.ranking_rules {
Setting::Set(criteria) => {
self.index.put_criteria(self.wtxn, criteria)?;
}
@ -924,7 +924,7 @@ mod tests {
use crate::error::Error;
use crate::index::tests::TempIndex;
use crate::update::{ClearDocuments, DeleteDocuments};
use crate::{Criterion, Filter, SearchResult};
use crate::{Filter, RankingRule, SearchResult};
#[test]
fn set_and_reset_searchable_fields() {
@ -1167,7 +1167,7 @@ mod tests {
index
.update_settings(|settings| {
settings.set_displayed_fields(vec![S("name")]);
settings.set_criteria(vec![Criterion::Asc("age".to_owned())]);
settings.set_ranking_rules(vec![RankingRule::Asc("age".to_owned())]);
})
.unwrap();
@ -1473,7 +1473,7 @@ mod tests {
.update_settings(|settings| {
settings.set_displayed_fields(vec!["hello".to_string()]);
settings.set_filterable_fields(hashset! { S("age"), S("toto") });
settings.set_criteria(vec![Criterion::Asc(S("toto"))]);
settings.set_ranking_rules(vec![RankingRule::Asc(S("toto"))]);
})
.unwrap();
@ -1482,7 +1482,7 @@ mod tests {
assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap());
// since no documents have been pushed the primary key is still unset
assert!(index.primary_key(&rtxn).unwrap().is_none());
assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap());
assert_eq!(vec![RankingRule::Asc("toto".to_string())], index.criteria(&rtxn).unwrap());
drop(rtxn);
// We set toto and age as searchable to force reordering of the fields
@ -1495,7 +1495,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap());
assert!(index.primary_key(&rtxn).unwrap().is_none());
assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap());
assert_eq!(vec![RankingRule::Asc("toto".to_string())], index.criteria(&rtxn).unwrap());
}
#[test]
@ -1507,7 +1507,7 @@ mod tests {
.update_settings(|settings| {
settings.set_displayed_fields(vec!["hello".to_string()]);
// It is only Asc(toto), there is a facet database but it is denied to filter with toto.
settings.set_criteria(vec![Criterion::Asc(S("toto"))]);
settings.set_ranking_rules(vec![RankingRule::Asc(S("toto"))]);
})
.unwrap();
@ -1715,7 +1715,7 @@ mod tests {
displayed_fields,
filterable_fields,
sortable_fields,
criteria,
ranking_rules: criteria,
stop_words,
non_separator_tokens,
separator_tokens,

View File

@ -2,8 +2,8 @@ use std::collections::HashSet;
use big_s::S;
use milli::update::Settings;
use milli::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use Criterion::*;
use milli::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
use RankingRule::*;
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};

View File

@ -1,6 +1,6 @@
use either::{Either, Left, Right};
use milli::{Criterion, Filter, Search, SearchResult, TermsMatchingStrategy};
use Criterion::*;
use milli::{Filter, RankingRule, Search, SearchResult, TermsMatchingStrategy};
use RankingRule::*;
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};

View File

@ -8,7 +8,7 @@ use heed::EnvOpenOptions;
use maplit::{btreemap, hashset};
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use milli::{AscDesc, Criterion, DocumentId, Index, Member, Object, TermsMatchingStrategy};
use milli::{AscDesc, DocumentId, Index, Member, Object, RankingRule, TermsMatchingStrategy};
use serde::{Deserialize, Deserializer};
use slice_group_by::GroupBy;
@ -27,7 +27,7 @@ pub const EXTERNAL_DOCUMENTS_IDS: &[&str; 17] =
pub const CONTENT: &str = include_str!("../assets/test_set.ndjson");
pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
pub fn setup_search_index_with_criteria(criteria: &[RankingRule]) -> Index {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
@ -38,7 +38,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
let mut builder = Settings::new(&mut wtxn, &index, &config);
builder.set_criteria(criteria.to_vec());
builder.set_ranking_rules(criteria.to_vec());
builder.set_filterable_fields(hashset! {
S("tag"),
S("asc_desc_rank"),
@ -95,7 +95,7 @@ pub fn internal_to_external_ids(index: &Index, internal_ids: &[DocumentId]) -> V
}
pub fn expected_order(
criteria: &[Criterion],
criteria: &[RankingRule],
optional_words: TermsMatchingStrategy,
sort_by: &[AscDesc],
) -> Vec<TestDocument> {
@ -107,47 +107,56 @@ pub fn expected_order(
let mut new_groups = Vec::new();
for group in groups.iter_mut() {
match criterion {
Criterion::Attribute => {
RankingRule::Attribute => {
group.sort_by_key(|d| d.attribute_rank);
new_groups
.extend(group.linear_group_by_key(|d| d.attribute_rank).map(Vec::from));
}
Criterion::Exactness => {
RankingRule::Exactness => {
group.sort_by_key(|d| d.exact_rank);
new_groups.extend(group.linear_group_by_key(|d| d.exact_rank).map(Vec::from));
}
Criterion::Proximity => {
RankingRule::Proximity => {
group.sort_by_key(|d| d.proximity_rank);
new_groups
.extend(group.linear_group_by_key(|d| d.proximity_rank).map(Vec::from));
}
Criterion::Sort if sort_by == [AscDesc::Asc(Member::Field(S("tag")))] => {
RankingRule::Sort if sort_by == [AscDesc::Asc(Member::Field(S("tag")))] => {
group.sort_by_key(|d| d.sort_by_rank);
new_groups.extend(group.linear_group_by_key(|d| d.sort_by_rank).map(Vec::from));
}
Criterion::Sort if sort_by == [AscDesc::Desc(Member::Field(S("tag")))] => {
RankingRule::Sort if sort_by == [AscDesc::Desc(Member::Field(S("tag")))] => {
group.sort_by_key(|d| Reverse(d.sort_by_rank));
new_groups.extend(group.linear_group_by_key(|d| d.sort_by_rank).map(Vec::from));
}
Criterion::Typo => {
RankingRule::Typo => {
group.sort_by_key(|d| d.typo_rank);
new_groups.extend(group.linear_group_by_key(|d| d.typo_rank).map(Vec::from));
}
Criterion::Words => {
RankingRule::Words => {
group.sort_by_key(|d| d.word_rank);
new_groups.extend(group.linear_group_by_key(|d| d.word_rank).map(Vec::from));
}
Criterion::Asc(field_name) if field_name == "asc_desc_rank" => {
RankingRule::Asc(field_name) if field_name == "asc_desc_rank" => {
group.sort_by_key(|d| d.asc_desc_rank);
new_groups
.extend(group.linear_group_by_key(|d| d.asc_desc_rank).map(Vec::from));
}
Criterion::Desc(field_name) if field_name == "asc_desc_rank" => {
RankingRule::Desc(field_name) if field_name == "asc_desc_rank" => {
group.sort_by_key(|d| Reverse(d.asc_desc_rank));
new_groups
.extend(group.linear_group_by_key(|d| d.asc_desc_rank).map(Vec::from));
}
Criterion::Asc(_) | Criterion::Desc(_) | Criterion::Sort => {
RankingRule::Boost(filter) => {
// move the matching documents first, then the ones that don't match
group.sort_by_key(|d| if execute_filter(filter, d).is_some() { 0 } else { 1 });
new_groups.extend(
group
.linear_group_by_key(|d| execute_filter(filter, d).is_some())
.map(Vec::from),
);
}
RankingRule::Asc(_) | RankingRule::Desc(_) | RankingRule::Sort => {
new_groups.push(group.clone())
}
}

View File

@ -1,7 +1,7 @@
use milli::update::{IndexerConfig, Settings};
use milli::{Criterion, Index, Search, TermsMatchingStrategy};
use milli::{Index, RankingRule, Search, TermsMatchingStrategy};
use crate::search::Criterion::{Attribute, Exactness, Proximity};
use crate::search::RankingRule::{Attribute, Exactness, Proximity};
fn set_stop_words(index: &Index, stop_words: &[&str]) {
let mut wtxn = index.write_txn().unwrap();
@ -14,7 +14,7 @@ fn set_stop_words(index: &Index, stop_words: &[&str]) {
wtxn.commit().unwrap();
}
fn test_phrase_search_with_stop_words_given_criteria(criteria: &[Criterion]) {
fn test_phrase_search_with_stop_words_given_criteria(criteria: &[RankingRule]) {
let index = super::setup_search_index_with_criteria(criteria);
// Add stop_words

View File

@ -7,9 +7,9 @@ use itertools::Itertools;
use maplit::hashset;
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use milli::{AscDesc, Criterion, Index, Member, Search, SearchResult, TermsMatchingStrategy};
use milli::{AscDesc, Index, Member, RankingRule, Search, SearchResult, TermsMatchingStrategy};
use rand::Rng;
use Criterion::*;
use RankingRule::*;
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
@ -88,7 +88,7 @@ test_criterion!(
#[test]
fn criteria_mixup() {
use Criterion::*;
use RankingRule::*;
let index = search::setup_search_index_with_criteria(&[
Words,
Attribute,
@ -233,7 +233,7 @@ fn criteria_mixup() {
//update criteria
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, &config);
builder.set_criteria(criteria.clone());
builder.set_ranking_rules(criteria.clone());
builder.execute(|_| (), || false).unwrap();
wtxn.commit().unwrap();
@ -324,7 +324,7 @@ fn criteria_ascdesc() {
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, &config);
builder.set_criteria(vec![criterion.clone()]);
builder.set_ranking_rules(vec![criterion.clone()]);
builder.execute(|_| (), || false).unwrap();
wtxn.commit().unwrap();

View File

@ -1,5 +1,5 @@
use big_s::S;
use milli::Criterion::{Attribute, Exactness, Proximity, Typo, Words};
use milli::RankingRule::{Attribute, Exactness, Proximity, Typo, Words};
use milli::{AscDesc, Error, Member, Search, TermsMatchingStrategy, UserError};
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};

View File

@ -2,10 +2,10 @@ use std::collections::BTreeSet;
use heed::EnvOpenOptions;
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use milli::{Criterion, Index, Search, TermsMatchingStrategy};
use milli::{Index, RankingRule, Search, TermsMatchingStrategy};
use serde_json::json;
use tempfile::tempdir;
use Criterion::*;
use RankingRule::*;
#[test]
fn test_typo_tolerance_one_typo() {