Merge pull request #5494 from meilisearch/deactivate-numbers-in-typos

Deactivate numbers in typos
This commit is contained in:
Many the fish 2025-05-05 09:19:53 +00:00 committed by GitHub
commit 96bc519f9e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 296 additions and 35 deletions

View File

@ -373,6 +373,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
}, },
disable_on_words: typo.disable_on_words.into(), disable_on_words: typo.disable_on_words.into(),
disable_on_attributes: typo.disable_on_attributes.into(), disable_on_attributes: typo.disable_on_attributes.into(),
disable_on_numbers: v6::Setting::NotSet,
}), }),
v5::Setting::Reset => v6::Setting::Reset, v5::Setting::Reset => v6::Setting::Reset,
v5::Setting::NotSet => v6::Setting::NotSet, v5::Setting::NotSet => v6::Setting::NotSet,

View File

@ -8,6 +8,7 @@ use std::str::FromStr;
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef}; use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
use fst::IntoStreamer; use fst::IntoStreamer;
use milli::disabled_typos_terms::DisabledTyposTerms;
use milli::index::{IndexEmbeddingConfig, PrefixSearch}; use milli::index::{IndexEmbeddingConfig, PrefixSearch};
use milli::proximity::ProximityPrecision; use milli::proximity::ProximityPrecision;
use milli::update::Setting; use milli::update::Setting;
@ -104,6 +105,10 @@ pub struct TypoSettings {
#[deserr(default)] #[deserr(default)]
#[schema(value_type = Option<BTreeSet<String>>, example = json!(["uuid", "url"]))] #[schema(value_type = Option<BTreeSet<String>>, example = json!(["uuid", "url"]))]
pub disable_on_attributes: Setting<BTreeSet<String>>, pub disable_on_attributes: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
#[schema(value_type = Option<bool>, example = json!(true))]
pub disable_on_numbers: Setting<bool>,
} }
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)] #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)]
@ -701,6 +706,12 @@ pub fn apply_settings_to_builder(
Setting::Reset => builder.reset_exact_attributes(), Setting::Reset => builder.reset_exact_attributes(),
Setting::NotSet => (), Setting::NotSet => (),
} }
match value.disable_on_numbers {
Setting::Set(val) => builder.set_disable_on_numbers(val),
Setting::Reset => builder.reset_disable_on_numbers(),
Setting::NotSet => (),
}
} }
Setting::Reset => { Setting::Reset => {
// all typo settings need to be reset here. // all typo settings need to be reset here.
@ -826,12 +837,14 @@ pub fn settings(
}; };
let disabled_attributes = index.exact_attributes(rtxn)?.into_iter().map(String::from).collect(); let disabled_attributes = index.exact_attributes(rtxn)?.into_iter().map(String::from).collect();
let DisabledTyposTerms { disable_on_numbers } = index.disabled_typos_terms(rtxn)?;
let typo_tolerance = TypoSettings { let typo_tolerance = TypoSettings {
enabled: Setting::Set(index.authorize_typos(rtxn)?), enabled: Setting::Set(index.authorize_typos(rtxn)?),
min_word_size_for_typos: Setting::Set(min_typo_word_len), min_word_size_for_typos: Setting::Set(min_typo_word_len),
disable_on_words: Setting::Set(disabled_words), disable_on_words: Setting::Set(disabled_words),
disable_on_attributes: Setting::Set(disabled_attributes), disable_on_attributes: Setting::Set(disabled_attributes),
disable_on_numbers: Setting::Set(disable_on_numbers),
}; };
let faceting = FacetingSettings { let faceting = FacetingSettings {

View File

@ -87,7 +87,8 @@ async fn import_dump_v1_movie_raw() {
"twoTypos": 9 "twoTypos": 9
}, },
"disableOnWords": [], "disableOnWords": [],
"disableOnAttributes": [] "disableOnAttributes": [],
"disableOnNumbers": false
}, },
"faceting": { "faceting": {
"maxValuesPerFacet": 100, "maxValuesPerFacet": 100,
@ -260,7 +261,8 @@ async fn import_dump_v1_movie_with_settings() {
"twoTypos": 9 "twoTypos": 9
}, },
"disableOnWords": [], "disableOnWords": [],
"disableOnAttributes": [] "disableOnAttributes": [],
"disableOnNumbers": false
}, },
"faceting": { "faceting": {
"maxValuesPerFacet": 100, "maxValuesPerFacet": 100,
@ -432,7 +434,8 @@ async fn import_dump_v1_rubygems_with_settings() {
"twoTypos": 9 "twoTypos": 9
}, },
"disableOnWords": [], "disableOnWords": [],
"disableOnAttributes": [] "disableOnAttributes": [],
"disableOnNumbers": false
}, },
"faceting": { "faceting": {
"maxValuesPerFacet": 100, "maxValuesPerFacet": 100,
@ -590,7 +593,8 @@ async fn import_dump_v2_movie_raw() {
"twoTypos": 9 "twoTypos": 9
}, },
"disableOnWords": [], "disableOnWords": [],
"disableOnAttributes": [] "disableOnAttributes": [],
"disableOnNumbers": false
}, },
"faceting": { "faceting": {
"maxValuesPerFacet": 100, "maxValuesPerFacet": 100,
@ -760,7 +764,8 @@ async fn import_dump_v2_movie_with_settings() {
"twoTypos": 9 "twoTypos": 9
}, },
"disableOnWords": [], "disableOnWords": [],
"disableOnAttributes": [] "disableOnAttributes": [],
"disableOnNumbers": false
}, },
"faceting": { "faceting": {
"maxValuesPerFacet": 100, "maxValuesPerFacet": 100,
@ -929,7 +934,8 @@ async fn import_dump_v2_rubygems_with_settings() {
"twoTypos": 9 "twoTypos": 9
}, },
"disableOnWords": [], "disableOnWords": [],
"disableOnAttributes": [] "disableOnAttributes": [],
"disableOnNumbers": false
}, },
"faceting": { "faceting": {
"maxValuesPerFacet": 100, "maxValuesPerFacet": 100,
@ -1087,7 +1093,8 @@ async fn import_dump_v3_movie_raw() {
"twoTypos": 9 "twoTypos": 9
}, },
"disableOnWords": [], "disableOnWords": [],
"disableOnAttributes": [] "disableOnAttributes": [],
"disableOnNumbers": false
}, },
"faceting": { "faceting": {
"maxValuesPerFacet": 100, "maxValuesPerFacet": 100,
@ -1257,7 +1264,8 @@ async fn import_dump_v3_movie_with_settings() {
"twoTypos": 9 "twoTypos": 9
}, },
"disableOnWords": [], "disableOnWords": [],
"disableOnAttributes": [] "disableOnAttributes": [],
"disableOnNumbers": false
}, },
"faceting": { "faceting": {
"maxValuesPerFacet": 100, "maxValuesPerFacet": 100,
@ -1426,7 +1434,8 @@ async fn import_dump_v3_rubygems_with_settings() {
"twoTypos": 9 "twoTypos": 9
}, },
"disableOnWords": [], "disableOnWords": [],
"disableOnAttributes": [] "disableOnAttributes": [],
"disableOnNumbers": false
}, },
"faceting": { "faceting": {
"maxValuesPerFacet": 100, "maxValuesPerFacet": 100,
@ -1584,7 +1593,8 @@ async fn import_dump_v4_movie_raw() {
"twoTypos": 9 "twoTypos": 9
}, },
"disableOnWords": [], "disableOnWords": [],
"disableOnAttributes": [] "disableOnAttributes": [],
"disableOnNumbers": false
}, },
"faceting": { "faceting": {
"maxValuesPerFacet": 100, "maxValuesPerFacet": 100,
@ -1754,7 +1764,8 @@ async fn import_dump_v4_movie_with_settings() {
"twoTypos": 9 "twoTypos": 9
}, },
"disableOnWords": [], "disableOnWords": [],
"disableOnAttributes": [] "disableOnAttributes": [],
"disableOnNumbers": false
}, },
"faceting": { "faceting": {
"maxValuesPerFacet": 100, "maxValuesPerFacet": 100,
@ -1923,7 +1934,8 @@ async fn import_dump_v4_rubygems_with_settings() {
"twoTypos": 9 "twoTypos": 9
}, },
"disableOnWords": [], "disableOnWords": [],
"disableOnAttributes": [] "disableOnAttributes": [],
"disableOnNumbers": false
}, },
"faceting": { "faceting": {
"maxValuesPerFacet": 100, "maxValuesPerFacet": 100,
@ -2212,7 +2224,8 @@ async fn import_dump_v6_containing_experimental_features() {
"twoTypos": 9 "twoTypos": 9
}, },
"disableOnWords": [], "disableOnWords": [],
"disableOnAttributes": [] "disableOnAttributes": [],
"disableOnNumbers": false
}, },
"faceting": { "faceting": {
"maxValuesPerFacet": 100, "maxValuesPerFacet": 100,
@ -2444,7 +2457,8 @@ async fn generate_and_import_dump_containing_vectors() {
"twoTypos": 9 "twoTypos": 9
}, },
"disableOnWords": [], "disableOnWords": [],
"disableOnAttributes": [] "disableOnAttributes": [],
"disableOnNumbers": false
}, },
"faceting": { "faceting": {
"maxValuesPerFacet": 100, "maxValuesPerFacet": 100,

View File

@ -1976,3 +1976,93 @@ async fn change_facet_casing() {
}) })
.await; .await;
} }
#[actix_rt::test]
async fn test_exact_typos_terms() {
let documents = json!([
{
"id": 0,
"title": "The zeroth document 1298484",
},
{
"id": 1,
"title": "The first document 234342",
"nested": {
"object": "field 22231",
"machin": "bidule 23443.32111",
},
},
{
"id": 2,
"title": "The second document 3398499",
"nested": [
"array",
{
"object": "field 23245121,23223",
},
{
"prout": "truc 123980612321",
"machin": "lol 12345645333447879",
},
],
},
{
"id": 3,
"title": "The third document 12333",
"nested": "I lied 98878",
},
]);
// Test prefix search
test_settings_documents_indexing_swapping_and_search(
&documents,
&json!({
"searchableAttributes": ["title", "nested.object", "nested.machin"],
"typoTolerance": {
"enabled": true,
"disableOnNumbers": true
}
}),
&json!({"q": "12345"}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 2,
"title": "The second document 3398499",
"nested": [
"array",
{
"object": "field 23245121,23223"
},
{
"prout": "truc 123980612321",
"machin": "lol 12345645333447879"
}
]
}
]
"###);
},
)
.await;
// Test typo search
test_settings_documents_indexing_swapping_and_search(
&documents,
&json!({
"searchableAttributes": ["title", "nested.object", "nested.machin"],
"typoTolerance": {
"enabled": true,
"disableOnNumbers": true
}
}),
&json!({"q": "123457"}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response["hits"]), @r###"[]"###);
},
)
.await;
}

View File

@ -274,7 +274,7 @@ async fn settings_bad_typo_tolerance() {
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Unknown field `typoTolerance`: expected one of `enabled`, `minWordSizeForTypos`, `disableOnWords`, `disableOnAttributes`", "message": "Unknown field `typoTolerance`: expected one of `enabled`, `minWordSizeForTypos`, `disableOnWords`, `disableOnAttributes`, `disableOnNumbers`",
"code": "invalid_settings_typo_tolerance", "code": "invalid_settings_typo_tolerance",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_typo_tolerance" "link": "https://docs.meilisearch.com/errors#invalid_settings_typo_tolerance"

View File

@ -179,7 +179,7 @@ test_setting_routes!(
{ {
setting: typo_tolerance, setting: typo_tolerance,
update_verb: patch, update_verb: patch,
default_value: {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": []} default_value: {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [], "disableOnNumbers": false}
}, },
); );
@ -276,7 +276,7 @@ async fn secrets_are_hidden_in_settings() {
let (response, code) = index.settings().await; let (response, code) = index.settings().await;
meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r#" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
"displayedAttributes": [ "displayedAttributes": [
"*" "*"
@ -308,7 +308,8 @@ async fn secrets_are_hidden_in_settings() {
"twoTypos": 9 "twoTypos": 9
}, },
"disableOnWords": [], "disableOnWords": [],
"disableOnAttributes": [] "disableOnAttributes": [],
"disableOnNumbers": false
}, },
"faceting": { "faceting": {
"maxValuesPerFacet": 100, "maxValuesPerFacet": 100,
@ -337,7 +338,7 @@ async fn secrets_are_hidden_in_settings() {
"facetSearch": true, "facetSearch": true,
"prefixSearch": "indexingTime" "prefixSearch": "indexingTime"
} }
"#); "###);
let (response, code) = server.get_task(settings_update_uid).await; let (response, code) = server.get_task(settings_update_uid).await;
meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(code, @"200 OK");

View File

@ -1,6 +1,5 @@
--- ---
source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
snapshot_kind: text
--- ---
{ {
"displayedAttributes": [ "displayedAttributes": [
@ -49,7 +48,8 @@ snapshot_kind: text
], ],
"disableOnAttributes": [ "disableOnAttributes": [
"surname" "surname"
] ],
"disableOnNumbers": false
}, },
"faceting": { "faceting": {
"maxValuesPerFacet": 99, "maxValuesPerFacet": 99,

View File

@ -0,0 +1,50 @@
use heed::{
types::{SerdeJson, Str},
RoTxn, RwTxn,
};
use serde::{Deserialize, Serialize};
use crate::{index::main_key, Index};
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
#[serde(rename_all = "camelCase")]
pub struct DisabledTyposTerms {
pub disable_on_numbers: bool,
}
impl Index {
pub fn disabled_typos_terms(&self, txn: &RoTxn<'_>) -> heed::Result<DisabledTyposTerms> {
self.main
.remap_types::<Str, SerdeJson<DisabledTyposTerms>>()
.get(txn, main_key::DISABLED_TYPOS_TERMS)
.map(|option| option.unwrap_or_default())
}
pub(crate) fn put_disabled_typos_terms(
&self,
txn: &mut RwTxn<'_>,
disabled_typos_terms: &DisabledTyposTerms,
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeJson<DisabledTyposTerms>>().put(
txn,
main_key::DISABLED_TYPOS_TERMS,
disabled_typos_terms,
)?;
Ok(())
}
pub(crate) fn delete_disabled_typos_terms(&self, txn: &mut RwTxn<'_>) -> heed::Result<()> {
self.main
.remap_types::<Str, SerdeJson<DisabledTyposTerms>>()
.delete(txn, main_key::DISABLED_TYPOS_TERMS)?;
Ok(())
}
}
impl DisabledTyposTerms {
pub fn is_exact(&self, word: &str) -> bool {
// If disable_on_numbers is true, we disable the word if it contains only numbers or punctuation
self.disable_on_numbers && word.chars().all(|c| c.is_numeric() || c.is_ascii_punctuation())
}
}

View File

@ -78,6 +78,7 @@ pub mod main_key {
pub const FACET_SEARCH: &str = "facet_search"; pub const FACET_SEARCH: &str = "facet_search";
pub const PREFIX_SEARCH: &str = "prefix_search"; pub const PREFIX_SEARCH: &str = "prefix_search";
pub const DOCUMENTS_STATS: &str = "documents_stats"; pub const DOCUMENTS_STATS: &str = "documents_stats";
pub const DISABLED_TYPOS_TERMS: &str = "disabled_typos_terms";
} }
pub mod db_name { pub mod db_name {

View File

@ -12,6 +12,7 @@ mod asc_desc;
mod attribute_patterns; mod attribute_patterns;
mod criterion; mod criterion;
pub mod database_stats; pub mod database_stats;
pub mod disabled_typos_terms;
mod error; mod error;
mod external_documents_ids; mod external_documents_ids;
pub mod facet; pub mod facet;

View File

@ -127,7 +127,8 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
// merge all deletions // merge all deletions
let obkv = KvReaderDelAdd::from_slice(value); let obkv = KvReaderDelAdd::from_slice(value);
if let Some(value) = obkv.get(DelAdd::Deletion) { if let Some(value) = obkv.get(DelAdd::Deletion) {
let delete_from_exact = settings_diff.old.exact_attributes.contains(&fid); let delete_from_exact = settings_diff.old.exact_attributes.contains(&fid)
|| settings_diff.old.disabled_typos_terms.is_exact(w);
buffer.clear(); buffer.clear();
let mut obkv = KvWriterDelAdd::new(&mut buffer); let mut obkv = KvWriterDelAdd::new(&mut buffer);
obkv.insert(DelAdd::Deletion, value)?; obkv.insert(DelAdd::Deletion, value)?;
@ -139,7 +140,8 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
} }
// merge all additions // merge all additions
if let Some(value) = obkv.get(DelAdd::Addition) { if let Some(value) = obkv.get(DelAdd::Addition) {
let add_in_exact = settings_diff.new.exact_attributes.contains(&fid); let add_in_exact = settings_diff.new.exact_attributes.contains(&fid)
|| settings_diff.new.disabled_typos_terms.is_exact(w);
buffer.clear(); buffer.clear();
let mut obkv = KvWriterDelAdd::new(&mut buffer); let mut obkv = KvWriterDelAdd::new(&mut buffer);
obkv.insert(DelAdd::Addition, value)?; obkv.insert(DelAdd::Addition, value)?;

View File

@ -273,14 +273,11 @@ pub(crate) fn write_typed_chunk_into_index(
unreachable!(); unreachable!();
}; };
let clonable_word_docids = unsafe { as_cloneable_grenad(&word_docids_reader) }?; let clonable_word_docids = unsafe { as_cloneable_grenad(&word_docids_reader) }?;
let clonable_exact_word_docids =
unsafe { as_cloneable_grenad(&exact_word_docids_reader) }?;
word_docids_builder.push(word_docids_reader.into_cursor()?); word_docids_builder.push(word_docids_reader.into_cursor()?);
exact_word_docids_builder.push(exact_word_docids_reader.into_cursor()?); exact_word_docids_builder.push(exact_word_docids_reader.into_cursor()?);
word_fid_docids_builder.push(word_fid_docids_reader.into_cursor()?); word_fid_docids_builder.push(word_fid_docids_reader.into_cursor()?);
fst_merger_builder.push(clonable_word_docids.into_cursor()?); fst_merger_builder.push(clonable_word_docids.into_cursor()?);
fst_merger_builder.push(clonable_exact_word_docids.into_cursor()?);
} }
let word_docids_merger = word_docids_builder.build(); let word_docids_merger = word_docids_builder.build();

View File

@ -319,8 +319,11 @@ impl WordDocidsExtractors {
let doc_alloc = &context.doc_alloc; let doc_alloc = &context.doc_alloc;
let exact_attributes = index.exact_attributes(rtxn)?; let exact_attributes = index.exact_attributes(rtxn)?;
let is_exact_attribute = let disabled_typos_terms = index.disabled_typos_terms(rtxn)?;
|fname: &str| exact_attributes.iter().any(|attr| contained_in(fname, attr)); let is_exact = |fname: &str, word: &str| {
exact_attributes.iter().any(|attr| contained_in(fname, attr))
|| disabled_typos_terms.is_exact(word)
};
match document_change { match document_change {
DocumentChange::Deletion(inner) => { DocumentChange::Deletion(inner) => {
let mut token_fn = |fname: &str, fid, pos, word: &str| { let mut token_fn = |fname: &str, fid, pos, word: &str| {
@ -328,7 +331,7 @@ impl WordDocidsExtractors {
fid, fid,
pos, pos,
word, word,
is_exact_attribute(fname), is_exact(fname, word),
inner.docid(), inner.docid(),
doc_alloc, doc_alloc,
) )
@ -356,7 +359,7 @@ impl WordDocidsExtractors {
fid, fid,
pos, pos,
word, word,
is_exact_attribute(fname), is_exact(fname, word),
inner.docid(), inner.docid(),
doc_alloc, doc_alloc,
) )
@ -372,7 +375,7 @@ impl WordDocidsExtractors {
fid, fid,
pos, pos,
word, word,
is_exact_attribute(fname), is_exact(fname, word),
inner.docid(), inner.docid(),
doc_alloc, doc_alloc,
) )
@ -389,7 +392,7 @@ impl WordDocidsExtractors {
fid, fid,
pos, pos,
word, word,
is_exact_attribute(fname), is_exact(fname, word),
inner.docid(), inner.docid(),
doc_alloc, doc_alloc,
) )

View File

@ -9,6 +9,7 @@ pub use document_operation::{DocumentOperation, PayloadStats};
use hashbrown::HashMap; use hashbrown::HashMap;
use heed::RwTxn; use heed::RwTxn;
pub use partial_dump::PartialDump; pub use partial_dump::PartialDump;
pub use post_processing::recompute_word_fst_from_word_docids_database;
pub use update_by_function::UpdateByFunction; pub use update_by_function::UpdateByFunction;
pub use write::ChannelCongestion; pub use write::ChannelCongestion;
use write::{build_vectors, update_index, write_to_db}; use write::{build_vectors, update_index, write_to_db};

View File

@ -131,6 +131,20 @@ fn compute_word_fst(
} }
} }
pub fn recompute_word_fst_from_word_docids_database(index: &Index, wtxn: &mut RwTxn) -> Result<()> {
let fst = fst::Set::default().map_data(std::borrow::Cow::Owned)?;
let mut word_fst_builder = WordFstBuilder::new(&fst)?;
let words = index.word_docids.iter(wtxn)?.remap_data_type::<DecodeIgnore>();
for res in words {
let (word, _) = res?;
word_fst_builder.register_word(DelAdd::Addition, word.as_ref())?;
}
let (word_fst_mmap, _) = word_fst_builder.build(index, wtxn)?;
index.main.remap_types::<Str, Bytes>().put(wtxn, WORDS_FST_KEY, &word_fst_mmap)?;
Ok(())
}
#[tracing::instrument(level = "trace", skip_all, target = "indexing::facet_search")] #[tracing::instrument(level = "trace", skip_all, target = "indexing::facet_search")]
fn compute_facet_search_database( fn compute_facet_search_database(
index: &Index, index: &Index,

View File

@ -17,6 +17,7 @@ use super::IndexerConfig;
use crate::attribute_patterns::PatternMatch; use crate::attribute_patterns::PatternMatch;
use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::criterion::Criterion; use crate::criterion::Criterion;
use crate::disabled_typos_terms::DisabledTyposTerms;
use crate::error::UserError; use crate::error::UserError;
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
use crate::filterable_attributes_rules::match_faceted_field; use crate::filterable_attributes_rules::match_faceted_field;
@ -169,6 +170,7 @@ pub struct Settings<'a, 't, 'i> {
synonyms: Setting<BTreeMap<String, Vec<String>>>, synonyms: Setting<BTreeMap<String, Vec<String>>>,
primary_key: Setting<String>, primary_key: Setting<String>,
authorize_typos: Setting<bool>, authorize_typos: Setting<bool>,
disable_on_numbers: Setting<bool>,
min_word_len_two_typos: Setting<u8>, min_word_len_two_typos: Setting<u8>,
min_word_len_one_typo: Setting<u8>, min_word_len_one_typo: Setting<u8>,
exact_words: Setting<BTreeSet<String>>, exact_words: Setting<BTreeSet<String>>,
@ -207,6 +209,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
synonyms: Setting::NotSet, synonyms: Setting::NotSet,
primary_key: Setting::NotSet, primary_key: Setting::NotSet,
authorize_typos: Setting::NotSet, authorize_typos: Setting::NotSet,
disable_on_numbers: Setting::NotSet,
exact_words: Setting::NotSet, exact_words: Setting::NotSet,
min_word_len_two_typos: Setting::NotSet, min_word_len_two_typos: Setting::NotSet,
min_word_len_one_typo: Setting::NotSet, min_word_len_one_typo: Setting::NotSet,
@ -354,6 +357,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.min_word_len_one_typo = Setting::Reset; self.min_word_len_one_typo = Setting::Reset;
} }
pub fn set_disable_on_numbers(&mut self, disable_on_numbers: bool) {
self.disable_on_numbers = Setting::Set(disable_on_numbers);
}
pub fn reset_disable_on_numbers(&mut self) {
self.disable_on_numbers = Setting::Reset;
}
pub fn set_exact_words(&mut self, words: BTreeSet<String>) { pub fn set_exact_words(&mut self, words: BTreeSet<String>) {
self.exact_words = Setting::Set(words); self.exact_words = Setting::Set(words);
} }
@ -866,6 +877,24 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
Ok(()) Ok(())
} }
fn update_disabled_typos_terms(&mut self) -> Result<()> {
let mut disabled_typos_terms = self.index.disabled_typos_terms(self.wtxn)?;
match self.disable_on_numbers {
Setting::Set(disable_on_numbers) => {
disabled_typos_terms.disable_on_numbers = disable_on_numbers;
}
Setting::Reset => {
self.index.delete_disabled_typos_terms(self.wtxn)?;
disabled_typos_terms.disable_on_numbers =
DisabledTyposTerms::default().disable_on_numbers;
}
Setting::NotSet => (),
}
self.index.put_disabled_typos_terms(self.wtxn, &disabled_typos_terms)?;
Ok(())
}
fn update_exact_words(&mut self) -> Result<()> { fn update_exact_words(&mut self) -> Result<()> {
match self.exact_words { match self.exact_words {
Setting::Set(ref mut words) => { Setting::Set(ref mut words) => {
@ -1246,6 +1275,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.update_prefix_search()?; self.update_prefix_search()?;
self.update_facet_search()?; self.update_facet_search()?;
self.update_localized_attributes_rules()?; self.update_localized_attributes_rules()?;
self.update_disabled_typos_terms()?;
let embedding_config_updates = self.update_embedding_configs()?; let embedding_config_updates = self.update_embedding_configs()?;
@ -1327,6 +1357,7 @@ impl InnerIndexSettingsDiff {
|| old_settings.prefix_search != new_settings.prefix_search || old_settings.prefix_search != new_settings.prefix_search
|| old_settings.localized_attributes_rules || old_settings.localized_attributes_rules
!= new_settings.localized_attributes_rules != new_settings.localized_attributes_rules
|| old_settings.disabled_typos_terms != new_settings.disabled_typos_terms
}; };
let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes; let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes;
@ -1526,6 +1557,7 @@ pub(crate) struct InnerIndexSettings {
pub user_defined_searchable_attributes: Option<Vec<String>>, pub user_defined_searchable_attributes: Option<Vec<String>>,
pub sortable_fields: HashSet<String>, pub sortable_fields: HashSet<String>,
pub exact_attributes: HashSet<FieldId>, pub exact_attributes: HashSet<FieldId>,
pub disabled_typos_terms: DisabledTyposTerms,
pub proximity_precision: ProximityPrecision, pub proximity_precision: ProximityPrecision,
pub embedding_configs: EmbeddingConfigs, pub embedding_configs: EmbeddingConfigs,
pub geo_fields_ids: Option<(FieldId, FieldId)>, pub geo_fields_ids: Option<(FieldId, FieldId)>,
@ -1574,7 +1606,7 @@ impl InnerIndexSettings {
.map(|fields| fields.into_iter().map(|f| f.to_string()).collect()); .map(|fields| fields.into_iter().map(|f| f.to_string()).collect());
let builder = MetadataBuilder::from_index(index, rtxn)?; let builder = MetadataBuilder::from_index(index, rtxn)?;
let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder); let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder);
let disabled_typos_terms = index.disabled_typos_terms(rtxn)?;
Ok(Self { Ok(Self {
stop_words, stop_words,
allowed_separators, allowed_separators,
@ -1592,6 +1624,7 @@ impl InnerIndexSettings {
geo_fields_ids, geo_fields_ids,
prefix_search, prefix_search,
facet_search, facet_search,
disabled_typos_terms,
}) })
} }

View File

@ -896,6 +896,7 @@ fn test_correct_settings_init() {
localized_attributes_rules, localized_attributes_rules,
prefix_search, prefix_search,
facet_search, facet_search,
disable_on_numbers,
} = settings; } = settings;
assert!(matches!(searchable_fields, Setting::NotSet)); assert!(matches!(searchable_fields, Setting::NotSet));
assert!(matches!(displayed_fields, Setting::NotSet)); assert!(matches!(displayed_fields, Setting::NotSet));
@ -923,6 +924,7 @@ fn test_correct_settings_init() {
assert!(matches!(localized_attributes_rules, Setting::NotSet)); assert!(matches!(localized_attributes_rules, Setting::NotSet));
assert!(matches!(prefix_search, Setting::NotSet)); assert!(matches!(prefix_search, Setting::NotSet));
assert!(matches!(facet_search, Setting::NotSet)); assert!(matches!(facet_search, Setting::NotSet));
assert!(matches!(disable_on_numbers, Setting::NotSet));
}) })
.unwrap(); .unwrap();
} }

View File

@ -1,11 +1,12 @@
mod v1_12; mod v1_12;
mod v1_13; mod v1_13;
mod v1_14; mod v1_14;
mod v1_15;
use heed::RwTxn; use heed::RwTxn;
use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3}; use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3};
use v1_13::{V1_13_0_To_V1_13_1, V1_13_1_To_Latest_V1_13}; use v1_13::{V1_13_0_To_V1_13_1, V1_13_1_To_Latest_V1_13};
use v1_14::Latest_V1_13_To_Latest_V1_14; use v1_14::Latest_V1_13_To_Latest_V1_14;
use v1_15::Latest_V1_14_To_Latest_V1_15;
use crate::progress::{Progress, VariableNameStep}; use crate::progress::{Progress, VariableNameStep};
use crate::{Index, InternalError, Result}; use crate::{Index, InternalError, Result};
@ -36,6 +37,7 @@ pub fn upgrade(
&V1_13_0_To_V1_13_1 {}, &V1_13_0_To_V1_13_1 {},
&V1_13_1_To_Latest_V1_13 {}, &V1_13_1_To_Latest_V1_13 {},
&Latest_V1_13_To_Latest_V1_14 {}, &Latest_V1_13_To_Latest_V1_14 {},
&Latest_V1_14_To_Latest_V1_15 {},
]; ];
let start = match from { let start = match from {
@ -43,8 +45,9 @@ pub fn upgrade(
(1, 12, 3..) => 1, (1, 12, 3..) => 1,
(1, 13, 0) => 2, (1, 13, 0) => 2,
(1, 13, _) => 4, (1, 13, _) => 4,
(1, 14, _) => 5,
// We must handle the current version in the match because in case of a failure some index may have been upgraded but not other. // We must handle the current version in the match because in case of a failure some index may have been upgraded but not other.
(1, 14, _) => 4, (1, 15, _) => 5,
(major, minor, patch) => { (major, minor, patch) => {
return Err(InternalError::CannotUpgradeToVersion(major, minor, patch).into()) return Err(InternalError::CannotUpgradeToVersion(major, minor, patch).into())
} }

View File

@ -0,0 +1,35 @@
use heed::RwTxn;
use super::UpgradeIndex;
use crate::progress::Progress;
use crate::update::new::indexer::recompute_word_fst_from_word_docids_database;
use crate::{make_enum_progress, Index, Result};
#[allow(non_camel_case_types)]
pub(super) struct Latest_V1_14_To_Latest_V1_15();
impl UpgradeIndex for Latest_V1_14_To_Latest_V1_15 {
fn upgrade(
&self,
wtxn: &mut RwTxn,
index: &Index,
_original: (u32, u32, u32),
progress: Progress,
) -> Result<bool> {
// Recompute the word FST from the word docids database.
make_enum_progress! {
enum TypoTolerance {
RecomputeWordFst,
}
};
progress.update_progress(TypoTolerance::RecomputeWordFst);
recompute_word_fst_from_word_docids_database(index, wtxn)?;
Ok(false)
}
fn target_version(&self) -> (u32, u32, u32) {
(1, 15, 0)
}
}