4257: Change proximity precision settings r=dureuill a=ManyTheFish

- [x] Add proximity_precision value into the analytics
- [x] Change the naming of `attributeScale` and `wordScale` into `byAttribute` and `byWord`
- [x] Remove proximityPrecision from the experimental feature

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Many the fish <many@meilisearch.com>
This commit is contained in:
meili-bors[bot]
2023-12-18 09:07:28 +00:00
committed by GitHub
14 changed files with 44 additions and 122 deletions

View File

@@ -1354,9 +1354,6 @@ impl IndexScheduler {
if matches!(checked_settings.embedders, milli::update::Setting::Set(_)) { if matches!(checked_settings.embedders, milli::update::Setting::Set(_)) {
self.features().check_vector("Passing `embedders` in settings")? self.features().check_vector("Passing `embedders` in settings")?
} }
if checked_settings.proximity_precision.set().is_some() {
self.features.features().check_proximity_precision()?;
}
task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) }); task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) });
apply_settings_to_builder(&checked_settings, &mut builder); apply_settings_to_builder(&checked_settings, &mut builder);

View File

@@ -81,19 +81,6 @@ impl RoFeatures {
.into()) .into())
} }
} }
pub fn check_proximity_precision(&self) -> Result<()> {
if self.runtime.proximity_precision {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Using `proximityPrecision` index setting",
feature: "proximity precision",
issue_link: "https://github.com/orgs/meilisearch/discussions/710",
}
.into())
}
}
} }
impl FeatureData { impl FeatureData {

View File

@@ -7,7 +7,6 @@ pub struct RuntimeTogglableFeatures {
pub vector_store: bool, pub vector_store: bool,
pub metrics: bool, pub metrics: bool,
pub export_puffin_reports: bool, pub export_puffin_reports: bool,
pub proximity_precision: bool,
} }
#[derive(Default, Debug, Clone, Copy)] #[derive(Default, Debug, Clone, Copy)]

View File

@@ -724,23 +724,23 @@ impl From<RankingRuleView> for Criterion {
#[serde(deny_unknown_fields, rename_all = "camelCase")] #[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields)] #[deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields)]
pub enum ProximityPrecisionView { pub enum ProximityPrecisionView {
WordScale, ByWord,
AttributeScale, ByAttribute,
} }
impl From<ProximityPrecision> for ProximityPrecisionView { impl From<ProximityPrecision> for ProximityPrecisionView {
fn from(value: ProximityPrecision) -> Self { fn from(value: ProximityPrecision) -> Self {
match value { match value {
ProximityPrecision::WordScale => ProximityPrecisionView::WordScale, ProximityPrecision::ByWord => ProximityPrecisionView::ByWord,
ProximityPrecision::AttributeScale => ProximityPrecisionView::AttributeScale, ProximityPrecision::ByAttribute => ProximityPrecisionView::ByAttribute,
} }
} }
} }
impl From<ProximityPrecisionView> for ProximityPrecision { impl From<ProximityPrecisionView> for ProximityPrecision {
fn from(value: ProximityPrecisionView) -> Self { fn from(value: ProximityPrecisionView) -> Self {
match value { match value {
ProximityPrecisionView::WordScale => ProximityPrecision::WordScale, ProximityPrecisionView::ByWord => ProximityPrecision::ByWord,
ProximityPrecisionView::AttributeScale => ProximityPrecision::AttributeScale, ProximityPrecisionView::ByAttribute => ProximityPrecision::ByAttribute,
} }
} }
} }

View File

@@ -48,8 +48,6 @@ pub struct RuntimeTogglableFeatures {
pub metrics: Option<bool>, pub metrics: Option<bool>,
#[deserr(default)] #[deserr(default)]
pub export_puffin_reports: Option<bool>, pub export_puffin_reports: Option<bool>,
#[deserr(default)]
pub proximity_precision: Option<bool>,
} }
async fn patch_features( async fn patch_features(
@@ -72,10 +70,6 @@ async fn patch_features(
.0 .0
.export_puffin_reports .export_puffin_reports
.unwrap_or(old_features.export_puffin_reports), .unwrap_or(old_features.export_puffin_reports),
proximity_precision: new_features
.0
.proximity_precision
.unwrap_or(old_features.proximity_precision),
}; };
// explicitly destructure for analytics rather than using the `Serialize` implementation, because // explicitly destructure for analytics rather than using the `Serialize` implementation, because
@@ -86,7 +80,6 @@ async fn patch_features(
vector_store, vector_store,
metrics, metrics,
export_puffin_reports, export_puffin_reports,
proximity_precision,
} = new_features; } = new_features;
analytics.publish( analytics.publish(
@@ -96,7 +89,6 @@ async fn patch_features(
"vector_store": vector_store, "vector_store": vector_store,
"metrics": metrics, "metrics": metrics,
"export_puffin_reports": export_puffin_reports, "export_puffin_reports": export_puffin_reports,
"proximity_precision": proximity_precision,
}), }),
Some(&req), Some(&req),
); );

View File

@@ -453,6 +453,7 @@ make_setting_route!(
json!({ json!({
"proximity_precision": { "proximity_precision": {
"set": precision.is_some(), "set": precision.is_some(),
"value": precision,
} }
}), }),
Some(req), Some(req),

View File

@@ -1860,8 +1860,7 @@ async fn import_dump_v6_containing_experimental_features() {
"scoreDetails": false, "scoreDetails": false,
"vectorStore": false, "vectorStore": false,
"metrics": false, "metrics": false,
"exportPuffinReports": false, "exportPuffinReports": false
"proximityPrecision": false
} }
"###); "###);
@@ -1890,7 +1889,7 @@ async fn import_dump_v6_containing_experimental_features() {
"dictionary": [], "dictionary": [],
"synonyms": {}, "synonyms": {},
"distinctAttribute": null, "distinctAttribute": null,
"proximityPrecision": "attributeScale", "proximityPrecision": "byAttribute",
"typoTolerance": { "typoTolerance": {
"enabled": true, "enabled": true,
"minWordSizeForTypos": { "minWordSizeForTypos": {

View File

@@ -21,8 +21,7 @@ async fn experimental_features() {
"scoreDetails": false, "scoreDetails": false,
"vectorStore": false, "vectorStore": false,
"metrics": false, "metrics": false,
"exportPuffinReports": false, "exportPuffinReports": false
"proximityPrecision": false
} }
"###); "###);
@@ -34,8 +33,7 @@ async fn experimental_features() {
"scoreDetails": false, "scoreDetails": false,
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"exportPuffinReports": false, "exportPuffinReports": false
"proximityPrecision": false
} }
"###); "###);
@@ -47,8 +45,7 @@ async fn experimental_features() {
"scoreDetails": false, "scoreDetails": false,
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"exportPuffinReports": false, "exportPuffinReports": false
"proximityPrecision": false
} }
"###); "###);
@@ -61,8 +58,7 @@ async fn experimental_features() {
"scoreDetails": false, "scoreDetails": false,
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"exportPuffinReports": false, "exportPuffinReports": false
"proximityPrecision": false
} }
"###); "###);
@@ -75,8 +71,7 @@ async fn experimental_features() {
"scoreDetails": false, "scoreDetails": false,
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"exportPuffinReports": false, "exportPuffinReports": false
"proximityPrecision": false
} }
"###); "###);
} }
@@ -96,8 +91,7 @@ async fn experimental_feature_metrics() {
"scoreDetails": false, "scoreDetails": false,
"vectorStore": false, "vectorStore": false,
"metrics": true, "metrics": true,
"exportPuffinReports": false, "exportPuffinReports": false
"proximityPrecision": false
} }
"###); "###);
@@ -152,7 +146,7 @@ async fn errors() {
meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`, `proximityPrecision`", "message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`",
"code": "bad_request", "code": "bad_request",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request" "link": "https://docs.meilisearch.com/errors#bad_request"

View File

@@ -16,8 +16,7 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Inde
"scoreDetails": false, "scoreDetails": false,
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"exportPuffinReports": false, "exportPuffinReports": false
"proximityPrecision": false
} }
"###); "###);

View File

@@ -27,17 +27,6 @@ static DOCUMENTS: Lazy<crate::common::Value> = Lazy::new(|| {
#[actix_rt::test] #[actix_rt::test]
async fn attribute_scale_search() { async fn attribute_scale_search() {
let server = Server::new().await; let server = Server::new().await;
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"exportPuffinReports": false,
"proximityPrecision": true
}
"###);
let index = server.index("test"); let index = server.index("test");
index.add_documents(DOCUMENTS.clone(), None).await; index.add_documents(DOCUMENTS.clone(), None).await;
@@ -45,7 +34,7 @@ async fn attribute_scale_search() {
let (response, code) = index let (response, code) = index
.update_settings(json!({ .update_settings(json!({
"proximityPrecision": "attributeScale", "proximityPrecision": "byAttribute",
"rankingRules": ["words", "typo", "proximity"], "rankingRules": ["words", "typo", "proximity"],
})) }))
.await; .await;
@@ -111,17 +100,6 @@ async fn attribute_scale_search() {
#[actix_rt::test] #[actix_rt::test]
async fn attribute_scale_phrase_search() { async fn attribute_scale_phrase_search() {
let server = Server::new().await; let server = Server::new().await;
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"exportPuffinReports": false,
"proximityPrecision": true
}
"###);
let index = server.index("test"); let index = server.index("test");
index.add_documents(DOCUMENTS.clone(), None).await; index.add_documents(DOCUMENTS.clone(), None).await;
@@ -129,7 +107,7 @@ async fn attribute_scale_phrase_search() {
let (_response, _code) = index let (_response, _code) = index
.update_settings(json!({ .update_settings(json!({
"proximityPrecision": "attributeScale", "proximityPrecision": "byAttribute",
"rankingRules": ["words", "typo", "proximity"], "rankingRules": ["words", "typo", "proximity"],
})) }))
.await; .await;
@@ -190,17 +168,6 @@ async fn attribute_scale_phrase_search() {
#[actix_rt::test] #[actix_rt::test]
async fn word_scale_set_and_reset() { async fn word_scale_set_and_reset() {
let server = Server::new().await; let server = Server::new().await;
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"exportPuffinReports": false,
"proximityPrecision": true
}
"###);
let index = server.index("test"); let index = server.index("test");
index.add_documents(DOCUMENTS.clone(), None).await; index.add_documents(DOCUMENTS.clone(), None).await;
@@ -209,7 +176,7 @@ async fn word_scale_set_and_reset() {
// Set and reset the setting ensuring the swap between the 2 settings is applied. // Set and reset the setting ensuring the swap between the 2 settings is applied.
let (_response, _code) = index let (_response, _code) = index
.update_settings(json!({ .update_settings(json!({
"proximityPrecision": "attributeScale", "proximityPrecision": "byAttribute",
"rankingRules": ["words", "typo", "proximity"], "rankingRules": ["words", "typo", "proximity"],
})) }))
.await; .await;
@@ -217,7 +184,7 @@ async fn word_scale_set_and_reset() {
let (_response, _code) = index let (_response, _code) = index
.update_settings(json!({ .update_settings(json!({
"proximityPrecision": "wordScale", "proximityPrecision": "byWord",
"rankingRules": ["words", "typo", "proximity"], "rankingRules": ["words", "typo", "proximity"],
})) }))
.await; .await;
@@ -316,17 +283,6 @@ async fn word_scale_set_and_reset() {
#[actix_rt::test] #[actix_rt::test]
async fn attribute_scale_default_ranking_rules() { async fn attribute_scale_default_ranking_rules() {
let server = Server::new().await; let server = Server::new().await;
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"exportPuffinReports": false,
"proximityPrecision": true
}
"###);
let index = server.index("test"); let index = server.index("test");
index.add_documents(DOCUMENTS.clone(), None).await; index.add_documents(DOCUMENTS.clone(), None).await;
@@ -334,7 +290,7 @@ async fn attribute_scale_default_ranking_rules() {
let (response, code) = index let (response, code) = index
.update_settings(json!({ .update_settings(json!({
"proximityPrecision": "attributeScale" "proximityPrecision": "byAttribute"
})) }))
.await; .await;
assert_eq!("202", code.as_str(), "{:?}", response); assert_eq!("202", code.as_str(), "{:?}", response);

View File

@@ -32,6 +32,6 @@ pub fn path_proximity(path: &[Position]) -> u32 {
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub enum ProximityPrecision { pub enum ProximityPrecision {
#[default] #[default]
WordScale, ByWord,
AttributeScale, ByAttribute,
} }

View File

@@ -299,9 +299,9 @@ impl<'ctx> SearchContext<'ctx> {
proximity: u8, proximity: u8,
) -> Result<Option<RoaringBitmap>> { ) -> Result<Option<RoaringBitmap>> {
match self.index.proximity_precision(self.txn)?.unwrap_or_default() { match self.index.proximity_precision(self.txn)?.unwrap_or_default() {
ProximityPrecision::AttributeScale => { ProximityPrecision::ByAttribute => {
// Force proximity to 0 because: // Force proximity to 0 because:
// in AttributeScale, there are only 2 possible distances: // in ByAttribute, there are only 2 possible distances:
// 1. words in same attribute: in that the DB contains (0, word1, word2) // 1. words in same attribute: in that the DB contains (0, word1, word2)
// 2. words in different attributes: no DB entry for these two words. // 2. words in different attributes: no DB entry for these two words.
let proximity = 0; let proximity = 0;
@@ -345,8 +345,7 @@ impl<'ctx> SearchContext<'ctx> {
Ok(docids) Ok(docids)
} }
ProximityPrecision::WordScale => { ProximityPrecision::ByWord => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
self.txn, self.txn,
(proximity, word1, word2), (proximity, word1, word2),
&( &(
@@ -356,8 +355,7 @@ impl<'ctx> SearchContext<'ctx> {
), ),
&mut self.db_cache.word_pair_proximity_docids, &mut self.db_cache.word_pair_proximity_docids,
self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(), self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
) ),
}
} }
} }
@@ -368,10 +366,10 @@ impl<'ctx> SearchContext<'ctx> {
proximity: u8, proximity: u8,
) -> Result<Option<u64>> { ) -> Result<Option<u64>> {
match self.index.proximity_precision(self.txn)?.unwrap_or_default() { match self.index.proximity_precision(self.txn)?.unwrap_or_default() {
ProximityPrecision::AttributeScale => Ok(self ProximityPrecision::ByAttribute => Ok(self
.get_db_word_pair_proximity_docids(word1, word2, proximity)? .get_db_word_pair_proximity_docids(word1, word2, proximity)?
.map(|d| d.len())), .map(|d| d.len())),
ProximityPrecision::WordScale => { ProximityPrecision::ByWord => {
DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>( DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>(
self.txn, self.txn,
(proximity, word1, word2), (proximity, word1, word2),
@@ -394,9 +392,9 @@ impl<'ctx> SearchContext<'ctx> {
mut proximity: u8, mut proximity: u8,
) -> Result<Option<RoaringBitmap>> { ) -> Result<Option<RoaringBitmap>> {
let proximity_precision = self.index.proximity_precision(self.txn)?.unwrap_or_default(); let proximity_precision = self.index.proximity_precision(self.txn)?.unwrap_or_default();
if proximity_precision == ProximityPrecision::AttributeScale { if proximity_precision == ProximityPrecision::ByAttribute {
// Force proximity to 0 because: // Force proximity to 0 because:
// in AttributeScale, there are only 2 possible distances: // in ByAttribute, there are only 2 possible distances:
// 1. words in same attribute: in that the DB contains (0, word1, word2) // 1. words in same attribute: in that the DB contains (0, word1, word2)
// 2. words in different attributes: no DB entry for these two words. // 2. words in different attributes: no DB entry for these two words.
proximity = 0; proximity = 0;
@@ -408,7 +406,7 @@ impl<'ctx> SearchContext<'ctx> {
docids.clone() docids.clone()
} else { } else {
let prefix_docids = match proximity_precision { let prefix_docids = match proximity_precision {
ProximityPrecision::AttributeScale => { ProximityPrecision::ByAttribute => {
// Compute the distance at the attribute level and store it in the cache. // Compute the distance at the attribute level and store it in the cache.
let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? { let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
fids fids
@@ -429,7 +427,7 @@ impl<'ctx> SearchContext<'ctx> {
} }
prefix_docids prefix_docids
} }
ProximityPrecision::WordScale => { ProximityPrecision::ByWord => {
// compute docids using prefix iter and store the result in the cache. // compute docids using prefix iter and store the result in the cache.
let key = U8StrStrCodec::bytes_encode(&( let key = U8StrStrCodec::bytes_encode(&(
proximity, proximity,

View File

@@ -157,7 +157,7 @@ pub(crate) fn data_from_obkv_documents(
}); });
} }
if proximity_precision == ProximityPrecision::WordScale { if proximity_precision == ProximityPrecision::ByWord {
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>( spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
docid_word_positions_chunks.clone(), docid_word_positions_chunks.clone(),
indexer, indexer,