diff --git a/crates/dump/src/reader/compat/v5_to_v6.rs b/crates/dump/src/reader/compat/v5_to_v6.rs index 6b63e7c6b..b4a4fcb24 100644 --- a/crates/dump/src/reader/compat/v5_to_v6.rs +++ b/crates/dump/src/reader/compat/v5_to_v6.rs @@ -373,6 +373,7 @@ impl From> for v6::Settings { }, disable_on_words: typo.disable_on_words.into(), disable_on_attributes: typo.disable_on_attributes.into(), + disable_on_numbers: v6::Setting::NotSet, }), v5::Setting::Reset => v6::Setting::Reset, v5::Setting::NotSet => v6::Setting::NotSet, diff --git a/crates/meilisearch-types/src/settings.rs b/crates/meilisearch-types/src/settings.rs index 6ace0f4ee..ccf0d75ee 100644 --- a/crates/meilisearch-types/src/settings.rs +++ b/crates/meilisearch-types/src/settings.rs @@ -8,6 +8,7 @@ use std::str::FromStr; use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef}; use fst::IntoStreamer; +use milli::disabled_typos_terms::DisabledTyposTerms; use milli::index::{IndexEmbeddingConfig, PrefixSearch}; use milli::proximity::ProximityPrecision; use milli::update::Setting; @@ -104,6 +105,10 @@ pub struct TypoSettings { #[deserr(default)] #[schema(value_type = Option>, example = json!(["uuid", "url"]))] pub disable_on_attributes: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option, example = json!(true))] + pub disable_on_numbers: Setting, } #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)] @@ -701,6 +706,12 @@ pub fn apply_settings_to_builder( Setting::Reset => builder.reset_exact_attributes(), Setting::NotSet => (), } + + match value.disable_on_numbers { + Setting::Set(val) => builder.set_disable_on_numbers(val), + Setting::Reset => builder.reset_disable_on_numbers(), + Setting::NotSet => (), + } } Setting::Reset => { // all typo settings need to be reset here. @@ -826,12 +837,14 @@ pub fn settings( }; let disabled_attributes = index.exact_attributes(rtxn)?.into_iter().map(String::from).collect(); + let DisabledTyposTerms { disable_on_numbers } = index.disabled_typos_terms(rtxn)?; let typo_tolerance = TypoSettings { enabled: Setting::Set(index.authorize_typos(rtxn)?), min_word_size_for_typos: Setting::Set(min_typo_word_len), disable_on_words: Setting::Set(disabled_words), disable_on_attributes: Setting::Set(disabled_attributes), + disable_on_numbers: Setting::Set(disable_on_numbers), }; let faceting = FacetingSettings { diff --git a/crates/meilisearch/tests/dumps/mod.rs b/crates/meilisearch/tests/dumps/mod.rs index e5aa52dc6..3ba3c20eb 100644 --- a/crates/meilisearch/tests/dumps/mod.rs +++ b/crates/meilisearch/tests/dumps/mod.rs @@ -87,7 +87,8 @@ async fn import_dump_v1_movie_raw() { "twoTypos": 9 }, "disableOnWords": [], - "disableOnAttributes": [] + "disableOnAttributes": [], + "disableOnNumbers": false }, "faceting": { "maxValuesPerFacet": 100, @@ -260,7 +261,8 @@ async fn import_dump_v1_movie_with_settings() { "twoTypos": 9 }, "disableOnWords": [], - "disableOnAttributes": [] + "disableOnAttributes": [], + "disableOnNumbers": false }, "faceting": { "maxValuesPerFacet": 100, @@ -432,7 +434,8 @@ async fn import_dump_v1_rubygems_with_settings() { "twoTypos": 9 }, "disableOnWords": [], - "disableOnAttributes": [] + "disableOnAttributes": [], + "disableOnNumbers": false }, "faceting": { "maxValuesPerFacet": 100, @@ -590,7 +593,8 @@ async fn import_dump_v2_movie_raw() { "twoTypos": 9 }, "disableOnWords": [], - "disableOnAttributes": [] + "disableOnAttributes": [], + "disableOnNumbers": false }, "faceting": { "maxValuesPerFacet": 100, @@ -760,7 +764,8 @@ async fn import_dump_v2_movie_with_settings() { "twoTypos": 9 }, "disableOnWords": [], - "disableOnAttributes": [] + "disableOnAttributes": [], + "disableOnNumbers": false }, "faceting": { "maxValuesPerFacet": 100, @@ -929,7 +934,8 @@ async fn import_dump_v2_rubygems_with_settings() { "twoTypos": 9 }, "disableOnWords": [], - "disableOnAttributes": [] + "disableOnAttributes": [], + "disableOnNumbers": false }, "faceting": { "maxValuesPerFacet": 100, @@ -1087,7 +1093,8 @@ async fn import_dump_v3_movie_raw() { "twoTypos": 9 }, "disableOnWords": [], - "disableOnAttributes": [] + "disableOnAttributes": [], + "disableOnNumbers": false }, "faceting": { "maxValuesPerFacet": 100, @@ -1257,7 +1264,8 @@ async fn import_dump_v3_movie_with_settings() { "twoTypos": 9 }, "disableOnWords": [], - "disableOnAttributes": [] + "disableOnAttributes": [], + "disableOnNumbers": false }, "faceting": { "maxValuesPerFacet": 100, @@ -1426,7 +1434,8 @@ async fn import_dump_v3_rubygems_with_settings() { "twoTypos": 9 }, "disableOnWords": [], - "disableOnAttributes": [] + "disableOnAttributes": [], + "disableOnNumbers": false }, "faceting": { "maxValuesPerFacet": 100, @@ -1584,7 +1593,8 @@ async fn import_dump_v4_movie_raw() { "twoTypos": 9 }, "disableOnWords": [], - "disableOnAttributes": [] + "disableOnAttributes": [], + "disableOnNumbers": false }, "faceting": { "maxValuesPerFacet": 100, @@ -1754,7 +1764,8 @@ async fn import_dump_v4_movie_with_settings() { "twoTypos": 9 }, "disableOnWords": [], - "disableOnAttributes": [] + "disableOnAttributes": [], + "disableOnNumbers": false }, "faceting": { "maxValuesPerFacet": 100, @@ -1923,7 +1934,8 @@ async fn import_dump_v4_rubygems_with_settings() { "twoTypos": 9 }, "disableOnWords": [], - "disableOnAttributes": [] + "disableOnAttributes": [], + "disableOnNumbers": false }, "faceting": { "maxValuesPerFacet": 100, @@ -2212,7 +2224,8 @@ async fn import_dump_v6_containing_experimental_features() { "twoTypos": 9 }, "disableOnWords": [], - "disableOnAttributes": [] + "disableOnAttributes": [], + "disableOnNumbers": false }, "faceting": { "maxValuesPerFacet": 100, @@ -2444,7 +2457,8 @@ async fn generate_and_import_dump_containing_vectors() { "twoTypos": 9 }, "disableOnWords": [], - "disableOnAttributes": [] + "disableOnAttributes": [], + "disableOnNumbers": false }, "faceting": { "maxValuesPerFacet": 100, diff --git a/crates/meilisearch/tests/search/mod.rs b/crates/meilisearch/tests/search/mod.rs index f6e79dbb9..6d98c0b2a 100644 --- a/crates/meilisearch/tests/search/mod.rs +++ b/crates/meilisearch/tests/search/mod.rs @@ -1976,3 +1976,93 @@ async fn change_facet_casing() { }) .await; } + +#[actix_rt::test] +async fn test_exact_typos_terms() { + let documents = json!([ + { + "id": 0, + "title": "The zeroth document 1298484", + }, + { + "id": 1, + "title": "The first document 234342", + "nested": { + "object": "field 22231", + "machin": "bidule 23443.32111", + }, + }, + { + "id": 2, + "title": "The second document 3398499", + "nested": [ + "array", + { + "object": "field 23245121,23223", + }, + { + "prout": "truc 123980612321", + "machin": "lol 12345645333447879", + }, + ], + }, + { + "id": 3, + "title": "The third document 12333", + "nested": "I lied 98878", + }, + ]); + + // Test prefix search + test_settings_documents_indexing_swapping_and_search( + &documents, + &json!({ + "searchableAttributes": ["title", "nested.object", "nested.machin"], + "typoTolerance": { + "enabled": true, + "disableOnNumbers": true + } + }), + &json!({"q": "12345"}), + |response, code| { + assert_eq!(code, 200, "{}", response); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "title": "The second document 3398499", + "nested": [ + "array", + { + "object": "field 23245121,23223" + }, + { + "prout": "truc 123980612321", + "machin": "lol 12345645333447879" + } + ] + } + ] + "###); + }, + ) + .await; + + // Test typo search + test_settings_documents_indexing_swapping_and_search( + &documents, + &json!({ + "searchableAttributes": ["title", "nested.object", "nested.machin"], + "typoTolerance": { + "enabled": true, + "disableOnNumbers": true + } + }), + &json!({"q": "123457"}), + |response, code| { + assert_eq!(code, 200, "{}", response); + snapshot!(json_string!(response["hits"]), @r###"[]"###); + }, + ) + .await; +} diff --git a/crates/meilisearch/tests/settings/errors.rs b/crates/meilisearch/tests/settings/errors.rs index ed1e0298f..4220cdbf8 100644 --- a/crates/meilisearch/tests/settings/errors.rs +++ b/crates/meilisearch/tests/settings/errors.rs @@ -274,7 +274,7 @@ async fn settings_bad_typo_tolerance() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Unknown field `typoTolerance`: expected one of `enabled`, `minWordSizeForTypos`, `disableOnWords`, `disableOnAttributes`", + "message": "Unknown field `typoTolerance`: expected one of `enabled`, `minWordSizeForTypos`, `disableOnWords`, `disableOnAttributes`, `disableOnNumbers`", "code": "invalid_settings_typo_tolerance", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_settings_typo_tolerance" diff --git a/crates/meilisearch/tests/settings/get_settings.rs b/crates/meilisearch/tests/settings/get_settings.rs index fbb97f999..5c0f89ed3 100644 --- a/crates/meilisearch/tests/settings/get_settings.rs +++ b/crates/meilisearch/tests/settings/get_settings.rs @@ -179,7 +179,7 @@ test_setting_routes!( { setting: typo_tolerance, update_verb: patch, - default_value: {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": []} + default_value: {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [], "disableOnNumbers": false} }, ); @@ -276,7 +276,7 @@ async fn secrets_are_hidden_in_settings() { let (response, code) = index.settings().await; meili_snap::snapshot!(code, @"200 OK"); - meili_snap::snapshot!(meili_snap::json_string!(response), @r#" + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { "displayedAttributes": [ "*" @@ -308,7 +308,8 @@ async fn secrets_are_hidden_in_settings() { "twoTypos": 9 }, "disableOnWords": [], - "disableOnAttributes": [] + "disableOnAttributes": [], + "disableOnNumbers": false }, "faceting": { "maxValuesPerFacet": 100, @@ -337,7 +338,7 @@ async fn secrets_are_hidden_in_settings() { "facetSearch": true, "prefixSearch": "indexingTime" } - "#); + "###); let (response, code) = server.get_task(settings_update_uid).await; meili_snap::snapshot!(code, @"200 OK"); diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_features/kefir_settings.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_features/kefir_settings.snap index e836fa4b3..af7e82c8b 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_features/kefir_settings.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_features/kefir_settings.snap @@ -1,6 +1,5 @@ --- source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs -snapshot_kind: text --- { "displayedAttributes": [ @@ -49,7 +48,8 @@ snapshot_kind: text ], "disableOnAttributes": [ "surname" - ] + ], + "disableOnNumbers": false }, "faceting": { "maxValuesPerFacet": 99, diff --git a/crates/milli/src/disabled_typos_terms.rs b/crates/milli/src/disabled_typos_terms.rs new file mode 100644 index 000000000..3a0d0c0f5 --- /dev/null +++ b/crates/milli/src/disabled_typos_terms.rs @@ -0,0 +1,50 @@ +use heed::{ + types::{SerdeJson, Str}, + RoTxn, RwTxn, +}; +use serde::{Deserialize, Serialize}; + +use crate::{index::main_key, Index}; + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)] +#[serde(rename_all = "camelCase")] +pub struct DisabledTyposTerms { + pub disable_on_numbers: bool, +} + +impl Index { + pub fn disabled_typos_terms(&self, txn: &RoTxn<'_>) -> heed::Result { + self.main + .remap_types::>() + .get(txn, main_key::DISABLED_TYPOS_TERMS) + .map(|option| option.unwrap_or_default()) + } + + pub(crate) fn put_disabled_typos_terms( + &self, + txn: &mut RwTxn<'_>, + disabled_typos_terms: &DisabledTyposTerms, + ) -> heed::Result<()> { + self.main.remap_types::>().put( + txn, + main_key::DISABLED_TYPOS_TERMS, + disabled_typos_terms, + )?; + + Ok(()) + } + + pub(crate) fn delete_disabled_typos_terms(&self, txn: &mut RwTxn<'_>) -> heed::Result<()> { + self.main + .remap_types::>() + .delete(txn, main_key::DISABLED_TYPOS_TERMS)?; + Ok(()) + } +} + +impl DisabledTyposTerms { + pub fn is_exact(&self, word: &str) -> bool { + // If disable_on_numbers is true, we disable the word if it contains only numbers or punctuation + self.disable_on_numbers && word.chars().all(|c| c.is_numeric() || c.is_ascii_punctuation()) + } +} diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index 1f006b316..d87f8715f 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -78,6 +78,7 @@ pub mod main_key { pub const FACET_SEARCH: &str = "facet_search"; pub const PREFIX_SEARCH: &str = "prefix_search"; pub const DOCUMENTS_STATS: &str = "documents_stats"; + pub const DISABLED_TYPOS_TERMS: &str = "disabled_typos_terms"; } pub mod db_name { diff --git a/crates/milli/src/lib.rs b/crates/milli/src/lib.rs index 516e6d31b..47d3dc75c 100644 --- a/crates/milli/src/lib.rs +++ b/crates/milli/src/lib.rs @@ -12,6 +12,7 @@ mod asc_desc; mod attribute_patterns; mod criterion; pub mod database_stats; +pub mod disabled_typos_terms; mod error; mod external_documents_ids; pub mod facet; diff --git a/crates/milli/src/update/index_documents/extract/extract_word_docids.rs b/crates/milli/src/update/index_documents/extract/extract_word_docids.rs index 829da768c..a964c0bbe 100644 --- a/crates/milli/src/update/index_documents/extract/extract_word_docids.rs +++ b/crates/milli/src/update/index_documents/extract/extract_word_docids.rs @@ -127,7 +127,8 @@ pub fn extract_word_docids( // merge all deletions let obkv = KvReaderDelAdd::from_slice(value); if let Some(value) = obkv.get(DelAdd::Deletion) { - let delete_from_exact = settings_diff.old.exact_attributes.contains(&fid); + let delete_from_exact = settings_diff.old.exact_attributes.contains(&fid) + || settings_diff.old.disabled_typos_terms.is_exact(w); buffer.clear(); let mut obkv = KvWriterDelAdd::new(&mut buffer); obkv.insert(DelAdd::Deletion, value)?; @@ -139,7 +140,8 @@ pub fn extract_word_docids( } // merge all additions if let Some(value) = obkv.get(DelAdd::Addition) { - let add_in_exact = settings_diff.new.exact_attributes.contains(&fid); + let add_in_exact = settings_diff.new.exact_attributes.contains(&fid) + || settings_diff.new.disabled_typos_terms.is_exact(w); buffer.clear(); let mut obkv = KvWriterDelAdd::new(&mut buffer); obkv.insert(DelAdd::Addition, value)?; diff --git a/crates/milli/src/update/index_documents/typed_chunk.rs b/crates/milli/src/update/index_documents/typed_chunk.rs index 87ea31942..6d575a98b 100644 --- a/crates/milli/src/update/index_documents/typed_chunk.rs +++ b/crates/milli/src/update/index_documents/typed_chunk.rs @@ -273,14 +273,11 @@ pub(crate) fn write_typed_chunk_into_index( unreachable!(); }; let clonable_word_docids = unsafe { as_cloneable_grenad(&word_docids_reader) }?; - let clonable_exact_word_docids = - unsafe { as_cloneable_grenad(&exact_word_docids_reader) }?; word_docids_builder.push(word_docids_reader.into_cursor()?); exact_word_docids_builder.push(exact_word_docids_reader.into_cursor()?); word_fid_docids_builder.push(word_fid_docids_reader.into_cursor()?); fst_merger_builder.push(clonable_word_docids.into_cursor()?); - fst_merger_builder.push(clonable_exact_word_docids.into_cursor()?); } let word_docids_merger = word_docids_builder.build(); diff --git a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs index a085a89ae..046116939 100644 --- a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs +++ b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs @@ -319,8 +319,11 @@ impl WordDocidsExtractors { let doc_alloc = &context.doc_alloc; let exact_attributes = index.exact_attributes(rtxn)?; - let is_exact_attribute = - |fname: &str| exact_attributes.iter().any(|attr| contained_in(fname, attr)); + let disabled_typos_terms = index.disabled_typos_terms(rtxn)?; + let is_exact = |fname: &str, word: &str| { + exact_attributes.iter().any(|attr| contained_in(fname, attr)) + || disabled_typos_terms.is_exact(word) + }; match document_change { DocumentChange::Deletion(inner) => { let mut token_fn = |fname: &str, fid, pos, word: &str| { @@ -328,7 +331,7 @@ impl WordDocidsExtractors { fid, pos, word, - is_exact_attribute(fname), + is_exact(fname, word), inner.docid(), doc_alloc, ) @@ -356,7 +359,7 @@ impl WordDocidsExtractors { fid, pos, word, - is_exact_attribute(fname), + is_exact(fname, word), inner.docid(), doc_alloc, ) @@ -372,7 +375,7 @@ impl WordDocidsExtractors { fid, pos, word, - is_exact_attribute(fname), + is_exact(fname, word), inner.docid(), doc_alloc, ) @@ -389,7 +392,7 @@ impl WordDocidsExtractors { fid, pos, word, - is_exact_attribute(fname), + is_exact(fname, word), inner.docid(), doc_alloc, ) diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs index d2a88f4ff..2ea3c787e 100644 --- a/crates/milli/src/update/new/indexer/mod.rs +++ b/crates/milli/src/update/new/indexer/mod.rs @@ -9,6 +9,7 @@ pub use document_operation::{DocumentOperation, PayloadStats}; use hashbrown::HashMap; use heed::RwTxn; pub use partial_dump::PartialDump; +pub use post_processing::recompute_word_fst_from_word_docids_database; pub use update_by_function::UpdateByFunction; pub use write::ChannelCongestion; use write::{build_vectors, update_index, write_to_db}; diff --git a/crates/milli/src/update/new/indexer/post_processing.rs b/crates/milli/src/update/new/indexer/post_processing.rs index aace70cff..b5c89d0d9 100644 --- a/crates/milli/src/update/new/indexer/post_processing.rs +++ b/crates/milli/src/update/new/indexer/post_processing.rs @@ -131,6 +131,20 @@ fn compute_word_fst( } } +pub fn recompute_word_fst_from_word_docids_database(index: &Index, wtxn: &mut RwTxn) -> Result<()> { + let fst = fst::Set::default().map_data(std::borrow::Cow::Owned)?; + let mut word_fst_builder = WordFstBuilder::new(&fst)?; + let words = index.word_docids.iter(wtxn)?.remap_data_type::(); + for res in words { + let (word, _) = res?; + word_fst_builder.register_word(DelAdd::Addition, word.as_ref())?; + } + let (word_fst_mmap, _) = word_fst_builder.build(index, wtxn)?; + index.main.remap_types::().put(wtxn, WORDS_FST_KEY, &word_fst_mmap)?; + + Ok(()) +} + #[tracing::instrument(level = "trace", skip_all, target = "indexing::facet_search")] fn compute_facet_search_database( index: &Index, diff --git a/crates/milli/src/update/settings.rs b/crates/milli/src/update/settings.rs index 317be1968..51d9aed27 100644 --- a/crates/milli/src/update/settings.rs +++ b/crates/milli/src/update/settings.rs @@ -17,6 +17,7 @@ use super::IndexerConfig; use crate::attribute_patterns::PatternMatch; use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::criterion::Criterion; +use crate::disabled_typos_terms::DisabledTyposTerms; use crate::error::UserError; use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; use crate::filterable_attributes_rules::match_faceted_field; @@ -169,6 +170,7 @@ pub struct Settings<'a, 't, 'i> { synonyms: Setting>>, primary_key: Setting, authorize_typos: Setting, + disable_on_numbers: Setting, min_word_len_two_typos: Setting, min_word_len_one_typo: Setting, exact_words: Setting>, @@ -207,6 +209,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { synonyms: Setting::NotSet, primary_key: Setting::NotSet, authorize_typos: Setting::NotSet, + disable_on_numbers: Setting::NotSet, exact_words: Setting::NotSet, min_word_len_two_typos: Setting::NotSet, min_word_len_one_typo: Setting::NotSet, @@ -354,6 +357,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.min_word_len_one_typo = Setting::Reset; } + pub fn set_disable_on_numbers(&mut self, disable_on_numbers: bool) { + self.disable_on_numbers = Setting::Set(disable_on_numbers); + } + + pub fn reset_disable_on_numbers(&mut self) { + self.disable_on_numbers = Setting::Reset; + } + pub fn set_exact_words(&mut self, words: BTreeSet) { self.exact_words = Setting::Set(words); } @@ -866,6 +877,24 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { Ok(()) } + fn update_disabled_typos_terms(&mut self) -> Result<()> { + let mut disabled_typos_terms = self.index.disabled_typos_terms(self.wtxn)?; + match self.disable_on_numbers { + Setting::Set(disable_on_numbers) => { + disabled_typos_terms.disable_on_numbers = disable_on_numbers; + } + Setting::Reset => { + self.index.delete_disabled_typos_terms(self.wtxn)?; + disabled_typos_terms.disable_on_numbers = + DisabledTyposTerms::default().disable_on_numbers; + } + Setting::NotSet => (), + } + + self.index.put_disabled_typos_terms(self.wtxn, &disabled_typos_terms)?; + Ok(()) + } + fn update_exact_words(&mut self) -> Result<()> { match self.exact_words { Setting::Set(ref mut words) => { @@ -1246,6 +1275,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.update_prefix_search()?; self.update_facet_search()?; self.update_localized_attributes_rules()?; + self.update_disabled_typos_terms()?; let embedding_config_updates = self.update_embedding_configs()?; @@ -1327,6 +1357,7 @@ impl InnerIndexSettingsDiff { || old_settings.prefix_search != new_settings.prefix_search || old_settings.localized_attributes_rules != new_settings.localized_attributes_rules + || old_settings.disabled_typos_terms != new_settings.disabled_typos_terms }; let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes; @@ -1526,6 +1557,7 @@ pub(crate) struct InnerIndexSettings { pub user_defined_searchable_attributes: Option>, pub sortable_fields: HashSet, pub exact_attributes: HashSet, + pub disabled_typos_terms: DisabledTyposTerms, pub proximity_precision: ProximityPrecision, pub embedding_configs: EmbeddingConfigs, pub geo_fields_ids: Option<(FieldId, FieldId)>, @@ -1574,7 +1606,7 @@ impl InnerIndexSettings { .map(|fields| fields.into_iter().map(|f| f.to_string()).collect()); let builder = MetadataBuilder::from_index(index, rtxn)?; let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder); - + let disabled_typos_terms = index.disabled_typos_terms(rtxn)?; Ok(Self { stop_words, allowed_separators, @@ -1592,6 +1624,7 @@ impl InnerIndexSettings { geo_fields_ids, prefix_search, facet_search, + disabled_typos_terms, }) } diff --git a/crates/milli/src/update/test_settings.rs b/crates/milli/src/update/test_settings.rs index 00be0476a..2b9ee3a5e 100644 --- a/crates/milli/src/update/test_settings.rs +++ b/crates/milli/src/update/test_settings.rs @@ -896,6 +896,7 @@ fn test_correct_settings_init() { localized_attributes_rules, prefix_search, facet_search, + disable_on_numbers, } = settings; assert!(matches!(searchable_fields, Setting::NotSet)); assert!(matches!(displayed_fields, Setting::NotSet)); @@ -923,6 +924,7 @@ fn test_correct_settings_init() { assert!(matches!(localized_attributes_rules, Setting::NotSet)); assert!(matches!(prefix_search, Setting::NotSet)); assert!(matches!(facet_search, Setting::NotSet)); + assert!(matches!(disable_on_numbers, Setting::NotSet)); }) .unwrap(); } diff --git a/crates/milli/src/update/upgrade/mod.rs b/crates/milli/src/update/upgrade/mod.rs index 7c8dcf64a..d471107ec 100644 --- a/crates/milli/src/update/upgrade/mod.rs +++ b/crates/milli/src/update/upgrade/mod.rs @@ -1,11 +1,12 @@ mod v1_12; mod v1_13; mod v1_14; - +mod v1_15; use heed::RwTxn; use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3}; use v1_13::{V1_13_0_To_V1_13_1, V1_13_1_To_Latest_V1_13}; use v1_14::Latest_V1_13_To_Latest_V1_14; +use v1_15::Latest_V1_14_To_Latest_V1_15; use crate::progress::{Progress, VariableNameStep}; use crate::{Index, InternalError, Result}; @@ -36,6 +37,7 @@ pub fn upgrade( &V1_13_0_To_V1_13_1 {}, &V1_13_1_To_Latest_V1_13 {}, &Latest_V1_13_To_Latest_V1_14 {}, + &Latest_V1_14_To_Latest_V1_15 {}, ]; let start = match from { @@ -43,8 +45,9 @@ pub fn upgrade( (1, 12, 3..) => 1, (1, 13, 0) => 2, (1, 13, _) => 4, + (1, 14, _) => 5, // We must handle the current version in the match because in case of a failure some index may have been upgraded but not other. - (1, 14, _) => 4, + (1, 15, _) => 5, (major, minor, patch) => { return Err(InternalError::CannotUpgradeToVersion(major, minor, patch).into()) } diff --git a/crates/milli/src/update/upgrade/v1_15.rs b/crates/milli/src/update/upgrade/v1_15.rs new file mode 100644 index 000000000..2c3cff355 --- /dev/null +++ b/crates/milli/src/update/upgrade/v1_15.rs @@ -0,0 +1,35 @@ +use heed::RwTxn; + +use super::UpgradeIndex; +use crate::progress::Progress; +use crate::update::new::indexer::recompute_word_fst_from_word_docids_database; +use crate::{make_enum_progress, Index, Result}; + +#[allow(non_camel_case_types)] +pub(super) struct Latest_V1_14_To_Latest_V1_15(); + +impl UpgradeIndex for Latest_V1_14_To_Latest_V1_15 { + fn upgrade( + &self, + wtxn: &mut RwTxn, + index: &Index, + _original: (u32, u32, u32), + progress: Progress, + ) -> Result { + // Recompute the word FST from the word docids database. + make_enum_progress! { + enum TypoTolerance { + RecomputeWordFst, + } + }; + + progress.update_progress(TypoTolerance::RecomputeWordFst); + recompute_word_fst_from_word_docids_database(index, wtxn)?; + + Ok(false) + } + + fn target_version(&self) -> (u32, u32, u32) { + (1, 15, 0) + } +}