mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Add disableOnNumber setting
This commit is contained in:
		| @@ -373,6 +373,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> { | |||||||
|                     }, |                     }, | ||||||
|                     disable_on_words: typo.disable_on_words.into(), |                     disable_on_words: typo.disable_on_words.into(), | ||||||
|                     disable_on_attributes: typo.disable_on_attributes.into(), |                     disable_on_attributes: typo.disable_on_attributes.into(), | ||||||
|  |                     disable_on_numbers: v6::Setting::NotSet, | ||||||
|                 }), |                 }), | ||||||
|                 v5::Setting::Reset => v6::Setting::Reset, |                 v5::Setting::Reset => v6::Setting::Reset, | ||||||
|                 v5::Setting::NotSet => v6::Setting::NotSet, |                 v5::Setting::NotSet => v6::Setting::NotSet, | ||||||
|   | |||||||
| @@ -8,6 +8,7 @@ use std::str::FromStr; | |||||||
|  |  | ||||||
| use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef}; | use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef}; | ||||||
| use fst::IntoStreamer; | use fst::IntoStreamer; | ||||||
|  | use milli::disabled_typos_terms::DisabledTyposTerms; | ||||||
| use milli::index::{IndexEmbeddingConfig, PrefixSearch}; | use milli::index::{IndexEmbeddingConfig, PrefixSearch}; | ||||||
| use milli::proximity::ProximityPrecision; | use milli::proximity::ProximityPrecision; | ||||||
| use milli::update::Setting; | use milli::update::Setting; | ||||||
| @@ -104,6 +105,10 @@ pub struct TypoSettings { | |||||||
|     #[deserr(default)] |     #[deserr(default)] | ||||||
|     #[schema(value_type = Option<BTreeSet<String>>, example = json!(["uuid", "url"]))] |     #[schema(value_type = Option<BTreeSet<String>>, example = json!(["uuid", "url"]))] | ||||||
|     pub disable_on_attributes: Setting<BTreeSet<String>>, |     pub disable_on_attributes: Setting<BTreeSet<String>>, | ||||||
|  |     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||||
|  |     #[deserr(default)] | ||||||
|  |     #[schema(value_type = Option<bool>, example = json!(true))] | ||||||
|  |     pub disable_on_numbers: Setting<bool>, | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)] | #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)] | ||||||
| @@ -826,12 +831,14 @@ pub fn settings( | |||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     let disabled_attributes = index.exact_attributes(rtxn)?.into_iter().map(String::from).collect(); |     let disabled_attributes = index.exact_attributes(rtxn)?.into_iter().map(String::from).collect(); | ||||||
|  |     let DisabledTyposTerms { disable_on_numbers } = index.disabled_typos_terms(rtxn)?; | ||||||
|  |  | ||||||
|     let typo_tolerance = TypoSettings { |     let typo_tolerance = TypoSettings { | ||||||
|         enabled: Setting::Set(index.authorize_typos(rtxn)?), |         enabled: Setting::Set(index.authorize_typos(rtxn)?), | ||||||
|         min_word_size_for_typos: Setting::Set(min_typo_word_len), |         min_word_size_for_typos: Setting::Set(min_typo_word_len), | ||||||
|         disable_on_words: Setting::Set(disabled_words), |         disable_on_words: Setting::Set(disabled_words), | ||||||
|         disable_on_attributes: Setting::Set(disabled_attributes), |         disable_on_attributes: Setting::Set(disabled_attributes), | ||||||
|  |         disable_on_numbers: Setting::Set(disable_on_numbers), | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     let faceting = FacetingSettings { |     let faceting = FacetingSettings { | ||||||
|   | |||||||
| @@ -87,7 +87,8 @@ async fn import_dump_v1_movie_raw() { | |||||||
|           "twoTypos": 9 |           "twoTypos": 9 | ||||||
|         }, |         }, | ||||||
|         "disableOnWords": [], |         "disableOnWords": [], | ||||||
|         "disableOnAttributes": [] |         "disableOnAttributes": [], | ||||||
|  |         "disableOnNumbers": false | ||||||
|       }, |       }, | ||||||
|       "faceting": { |       "faceting": { | ||||||
|         "maxValuesPerFacet": 100, |         "maxValuesPerFacet": 100, | ||||||
| @@ -260,7 +261,8 @@ async fn import_dump_v1_movie_with_settings() { | |||||||
|           "twoTypos": 9 |           "twoTypos": 9 | ||||||
|         }, |         }, | ||||||
|         "disableOnWords": [], |         "disableOnWords": [], | ||||||
|         "disableOnAttributes": [] |         "disableOnAttributes": [], | ||||||
|  |         "disableOnNumbers": false | ||||||
|       }, |       }, | ||||||
|       "faceting": { |       "faceting": { | ||||||
|         "maxValuesPerFacet": 100, |         "maxValuesPerFacet": 100, | ||||||
| @@ -432,7 +434,8 @@ async fn import_dump_v1_rubygems_with_settings() { | |||||||
|           "twoTypos": 9 |           "twoTypos": 9 | ||||||
|         }, |         }, | ||||||
|         "disableOnWords": [], |         "disableOnWords": [], | ||||||
|         "disableOnAttributes": [] |         "disableOnAttributes": [], | ||||||
|  |         "disableOnNumbers": false | ||||||
|       }, |       }, | ||||||
|       "faceting": { |       "faceting": { | ||||||
|         "maxValuesPerFacet": 100, |         "maxValuesPerFacet": 100, | ||||||
| @@ -590,7 +593,8 @@ async fn import_dump_v2_movie_raw() { | |||||||
|           "twoTypos": 9 |           "twoTypos": 9 | ||||||
|         }, |         }, | ||||||
|         "disableOnWords": [], |         "disableOnWords": [], | ||||||
|         "disableOnAttributes": [] |         "disableOnAttributes": [], | ||||||
|  |         "disableOnNumbers": false | ||||||
|       }, |       }, | ||||||
|       "faceting": { |       "faceting": { | ||||||
|         "maxValuesPerFacet": 100, |         "maxValuesPerFacet": 100, | ||||||
| @@ -760,7 +764,8 @@ async fn import_dump_v2_movie_with_settings() { | |||||||
|           "twoTypos": 9 |           "twoTypos": 9 | ||||||
|         }, |         }, | ||||||
|         "disableOnWords": [], |         "disableOnWords": [], | ||||||
|         "disableOnAttributes": [] |         "disableOnAttributes": [], | ||||||
|  |         "disableOnNumbers": false | ||||||
|       }, |       }, | ||||||
|       "faceting": { |       "faceting": { | ||||||
|         "maxValuesPerFacet": 100, |         "maxValuesPerFacet": 100, | ||||||
| @@ -929,7 +934,8 @@ async fn import_dump_v2_rubygems_with_settings() { | |||||||
|           "twoTypos": 9 |           "twoTypos": 9 | ||||||
|         }, |         }, | ||||||
|         "disableOnWords": [], |         "disableOnWords": [], | ||||||
|         "disableOnAttributes": [] |         "disableOnAttributes": [], | ||||||
|  |         "disableOnNumbers": false | ||||||
|       }, |       }, | ||||||
|       "faceting": { |       "faceting": { | ||||||
|         "maxValuesPerFacet": 100, |         "maxValuesPerFacet": 100, | ||||||
| @@ -1087,7 +1093,8 @@ async fn import_dump_v3_movie_raw() { | |||||||
|           "twoTypos": 9 |           "twoTypos": 9 | ||||||
|         }, |         }, | ||||||
|         "disableOnWords": [], |         "disableOnWords": [], | ||||||
|         "disableOnAttributes": [] |         "disableOnAttributes": [], | ||||||
|  |         "disableOnNumbers": false | ||||||
|       }, |       }, | ||||||
|       "faceting": { |       "faceting": { | ||||||
|         "maxValuesPerFacet": 100, |         "maxValuesPerFacet": 100, | ||||||
| @@ -1257,7 +1264,8 @@ async fn import_dump_v3_movie_with_settings() { | |||||||
|           "twoTypos": 9 |           "twoTypos": 9 | ||||||
|         }, |         }, | ||||||
|         "disableOnWords": [], |         "disableOnWords": [], | ||||||
|         "disableOnAttributes": [] |         "disableOnAttributes": [], | ||||||
|  |         "disableOnNumbers": false | ||||||
|       }, |       }, | ||||||
|       "faceting": { |       "faceting": { | ||||||
|         "maxValuesPerFacet": 100, |         "maxValuesPerFacet": 100, | ||||||
| @@ -1426,7 +1434,8 @@ async fn import_dump_v3_rubygems_with_settings() { | |||||||
|           "twoTypos": 9 |           "twoTypos": 9 | ||||||
|         }, |         }, | ||||||
|         "disableOnWords": [], |         "disableOnWords": [], | ||||||
|         "disableOnAttributes": [] |         "disableOnAttributes": [], | ||||||
|  |         "disableOnNumbers": false | ||||||
|       }, |       }, | ||||||
|       "faceting": { |       "faceting": { | ||||||
|         "maxValuesPerFacet": 100, |         "maxValuesPerFacet": 100, | ||||||
| @@ -1584,7 +1593,8 @@ async fn import_dump_v4_movie_raw() { | |||||||
|           "twoTypos": 9 |           "twoTypos": 9 | ||||||
|         }, |         }, | ||||||
|         "disableOnWords": [], |         "disableOnWords": [], | ||||||
|         "disableOnAttributes": [] |         "disableOnAttributes": [], | ||||||
|  |         "disableOnNumbers": false | ||||||
|       }, |       }, | ||||||
|       "faceting": { |       "faceting": { | ||||||
|         "maxValuesPerFacet": 100, |         "maxValuesPerFacet": 100, | ||||||
| @@ -1754,7 +1764,8 @@ async fn import_dump_v4_movie_with_settings() { | |||||||
|           "twoTypos": 9 |           "twoTypos": 9 | ||||||
|         }, |         }, | ||||||
|         "disableOnWords": [], |         "disableOnWords": [], | ||||||
|         "disableOnAttributes": [] |         "disableOnAttributes": [], | ||||||
|  |         "disableOnNumbers": false | ||||||
|       }, |       }, | ||||||
|       "faceting": { |       "faceting": { | ||||||
|         "maxValuesPerFacet": 100, |         "maxValuesPerFacet": 100, | ||||||
| @@ -1923,7 +1934,8 @@ async fn import_dump_v4_rubygems_with_settings() { | |||||||
|           "twoTypos": 9 |           "twoTypos": 9 | ||||||
|         }, |         }, | ||||||
|         "disableOnWords": [], |         "disableOnWords": [], | ||||||
|         "disableOnAttributes": [] |         "disableOnAttributes": [], | ||||||
|  |         "disableOnNumbers": false | ||||||
|       }, |       }, | ||||||
|       "faceting": { |       "faceting": { | ||||||
|         "maxValuesPerFacet": 100, |         "maxValuesPerFacet": 100, | ||||||
| @@ -2212,7 +2224,8 @@ async fn import_dump_v6_containing_experimental_features() { | |||||||
|           "twoTypos": 9 |           "twoTypos": 9 | ||||||
|         }, |         }, | ||||||
|         "disableOnWords": [], |         "disableOnWords": [], | ||||||
|         "disableOnAttributes": [] |         "disableOnAttributes": [], | ||||||
|  |         "disableOnNumbers": false | ||||||
|       }, |       }, | ||||||
|       "faceting": { |       "faceting": { | ||||||
|         "maxValuesPerFacet": 100, |         "maxValuesPerFacet": 100, | ||||||
| @@ -2444,7 +2457,8 @@ async fn generate_and_import_dump_containing_vectors() { | |||||||
|           "twoTypos": 9 |           "twoTypos": 9 | ||||||
|         }, |         }, | ||||||
|         "disableOnWords": [], |         "disableOnWords": [], | ||||||
|         "disableOnAttributes": [] |         "disableOnAttributes": [], | ||||||
|  |         "disableOnNumbers": false | ||||||
|       }, |       }, | ||||||
|       "faceting": { |       "faceting": { | ||||||
|         "maxValuesPerFacet": 100, |         "maxValuesPerFacet": 100, | ||||||
|   | |||||||
| @@ -274,7 +274,7 @@ async fn settings_bad_typo_tolerance() { | |||||||
|     snapshot!(code, @"400 Bad Request"); |     snapshot!(code, @"400 Bad Request"); | ||||||
|     snapshot!(json_string!(response), @r###" |     snapshot!(json_string!(response), @r###" | ||||||
|     { |     { | ||||||
|       "message": "Unknown field `typoTolerance`: expected one of `enabled`, `minWordSizeForTypos`, `disableOnWords`, `disableOnAttributes`", |       "message": "Unknown field `typoTolerance`: expected one of `enabled`, `minWordSizeForTypos`, `disableOnWords`, `disableOnAttributes`, `disableOnNumbers`", | ||||||
|       "code": "invalid_settings_typo_tolerance", |       "code": "invalid_settings_typo_tolerance", | ||||||
|       "type": "invalid_request", |       "type": "invalid_request", | ||||||
|       "link": "https://docs.meilisearch.com/errors#invalid_settings_typo_tolerance" |       "link": "https://docs.meilisearch.com/errors#invalid_settings_typo_tolerance" | ||||||
|   | |||||||
| @@ -276,7 +276,7 @@ async fn secrets_are_hidden_in_settings() { | |||||||
|  |  | ||||||
|     let (response, code) = index.settings().await; |     let (response, code) = index.settings().await; | ||||||
|     meili_snap::snapshot!(code, @"200 OK"); |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r#" |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|     { |     { | ||||||
|       "displayedAttributes": [ |       "displayedAttributes": [ | ||||||
|         "*" |         "*" | ||||||
| @@ -308,7 +308,8 @@ async fn secrets_are_hidden_in_settings() { | |||||||
|           "twoTypos": 9 |           "twoTypos": 9 | ||||||
|         }, |         }, | ||||||
|         "disableOnWords": [], |         "disableOnWords": [], | ||||||
|         "disableOnAttributes": [] |         "disableOnAttributes": [], | ||||||
|  |         "disableOnNumbers": false | ||||||
|       }, |       }, | ||||||
|       "faceting": { |       "faceting": { | ||||||
|         "maxValuesPerFacet": 100, |         "maxValuesPerFacet": 100, | ||||||
| @@ -337,7 +338,7 @@ async fn secrets_are_hidden_in_settings() { | |||||||
|       "facetSearch": true, |       "facetSearch": true, | ||||||
|       "prefixSearch": "indexingTime" |       "prefixSearch": "indexingTime" | ||||||
|     } |     } | ||||||
|     "#); |     "###); | ||||||
|  |  | ||||||
|     let (response, code) = server.get_task(settings_update_uid).await; |     let (response, code) = server.get_task(settings_update_uid).await; | ||||||
|     meili_snap::snapshot!(code, @"200 OK"); |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|   | |||||||
| @@ -1,6 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs | source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs | ||||||
| snapshot_kind: text |  | ||||||
| --- | --- | ||||||
| { | { | ||||||
|   "displayedAttributes": [ |   "displayedAttributes": [ | ||||||
| @@ -49,7 +48,8 @@ snapshot_kind: text | |||||||
|     ], |     ], | ||||||
|     "disableOnAttributes": [ |     "disableOnAttributes": [ | ||||||
|       "surname" |       "surname" | ||||||
|     ] |     ], | ||||||
|  |     "disableOnNumbers": false | ||||||
|   }, |   }, | ||||||
|   "faceting": { |   "faceting": { | ||||||
|     "maxValuesPerFacet": 99, |     "maxValuesPerFacet": 99, | ||||||
|   | |||||||
							
								
								
									
										50
									
								
								crates/milli/src/disabled_typos_terms.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								crates/milli/src/disabled_typos_terms.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,50 @@ | |||||||
|  | use heed::{ | ||||||
|  |     types::{SerdeJson, Str}, | ||||||
|  |     RoTxn, RwTxn, | ||||||
|  | }; | ||||||
|  | use serde::{Deserialize, Serialize}; | ||||||
|  |  | ||||||
|  | use crate::{index::main_key, Index}; | ||||||
|  |  | ||||||
|  | #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)] | ||||||
|  | #[serde(rename_all = "camelCase")] | ||||||
|  | pub struct DisabledTyposTerms { | ||||||
|  |     pub disable_on_numbers: bool, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Index { | ||||||
|  |     pub fn disabled_typos_terms(&self, txn: &RoTxn<'_>) -> heed::Result<DisabledTyposTerms> { | ||||||
|  |         self.main | ||||||
|  |             .remap_types::<Str, SerdeJson<DisabledTyposTerms>>() | ||||||
|  |             .get(txn, main_key::DISABLED_TYPOS_TERMS) | ||||||
|  |             .map(|option| option.unwrap_or_default()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn put_disabled_typos_terms( | ||||||
|  |         &self, | ||||||
|  |         txn: &mut RwTxn<'_>, | ||||||
|  |         disabled_typos_terms: &DisabledTyposTerms, | ||||||
|  |     ) -> heed::Result<()> { | ||||||
|  |         self.main.remap_types::<Str, SerdeJson<DisabledTyposTerms>>().put( | ||||||
|  |             txn, | ||||||
|  |             main_key::DISABLED_TYPOS_TERMS, | ||||||
|  |             &disabled_typos_terms, | ||||||
|  |         )?; | ||||||
|  |  | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn delete_disabled_typos_terms(&self, txn: &mut RwTxn<'_>) -> heed::Result<()> { | ||||||
|  |         self.main | ||||||
|  |             .remap_types::<Str, SerdeJson<DisabledTyposTerms>>() | ||||||
|  |             .delete(txn, main_key::DISABLED_TYPOS_TERMS)?; | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl DisabledTyposTerms { | ||||||
|  |     pub fn is_exact(&self, word: &str) -> bool { | ||||||
|  |         // If disable_on_numbers is true, we disable the word if it contains only numbers or punctuation | ||||||
|  |         self.disable_on_numbers && word.chars().all(|c| c.is_numeric() || c.is_ascii_punctuation()) | ||||||
|  |     } | ||||||
|  | } | ||||||
| @@ -78,6 +78,7 @@ pub mod main_key { | |||||||
|     pub const FACET_SEARCH: &str = "facet_search"; |     pub const FACET_SEARCH: &str = "facet_search"; | ||||||
|     pub const PREFIX_SEARCH: &str = "prefix_search"; |     pub const PREFIX_SEARCH: &str = "prefix_search"; | ||||||
|     pub const DOCUMENTS_STATS: &str = "documents_stats"; |     pub const DOCUMENTS_STATS: &str = "documents_stats"; | ||||||
|  |     pub const DISABLED_TYPOS_TERMS: &str = "disabled_typos_terms"; | ||||||
| } | } | ||||||
|  |  | ||||||
| pub mod db_name { | pub mod db_name { | ||||||
|   | |||||||
| @@ -12,6 +12,7 @@ mod asc_desc; | |||||||
| mod attribute_patterns; | mod attribute_patterns; | ||||||
| mod criterion; | mod criterion; | ||||||
| pub mod database_stats; | pub mod database_stats; | ||||||
|  | pub mod disabled_typos_terms; | ||||||
| mod error; | mod error; | ||||||
| mod external_documents_ids; | mod external_documents_ids; | ||||||
| pub mod facet; | pub mod facet; | ||||||
|   | |||||||
| @@ -127,7 +127,8 @@ pub fn extract_word_docids<R: io::Read + io::Seek>( | |||||||
|         // merge all deletions |         // merge all deletions | ||||||
|         let obkv = KvReaderDelAdd::from_slice(value); |         let obkv = KvReaderDelAdd::from_slice(value); | ||||||
|         if let Some(value) = obkv.get(DelAdd::Deletion) { |         if let Some(value) = obkv.get(DelAdd::Deletion) { | ||||||
|             let delete_from_exact = settings_diff.old.exact_attributes.contains(&fid); |             let delete_from_exact = settings_diff.old.exact_attributes.contains(&fid) | ||||||
|  |                 || settings_diff.old.disabled_typos_terms.is_exact(&w); | ||||||
|             buffer.clear(); |             buffer.clear(); | ||||||
|             let mut obkv = KvWriterDelAdd::new(&mut buffer); |             let mut obkv = KvWriterDelAdd::new(&mut buffer); | ||||||
|             obkv.insert(DelAdd::Deletion, value)?; |             obkv.insert(DelAdd::Deletion, value)?; | ||||||
| @@ -139,7 +140,8 @@ pub fn extract_word_docids<R: io::Read + io::Seek>( | |||||||
|         } |         } | ||||||
|         // merge all additions |         // merge all additions | ||||||
|         if let Some(value) = obkv.get(DelAdd::Addition) { |         if let Some(value) = obkv.get(DelAdd::Addition) { | ||||||
|             let add_in_exact = settings_diff.new.exact_attributes.contains(&fid); |             let add_in_exact = settings_diff.new.exact_attributes.contains(&fid) | ||||||
|  |                 || settings_diff.new.disabled_typos_terms.is_exact(&w); | ||||||
|             buffer.clear(); |             buffer.clear(); | ||||||
|             let mut obkv = KvWriterDelAdd::new(&mut buffer); |             let mut obkv = KvWriterDelAdd::new(&mut buffer); | ||||||
|             obkv.insert(DelAdd::Addition, value)?; |             obkv.insert(DelAdd::Addition, value)?; | ||||||
|   | |||||||
| @@ -273,14 +273,11 @@ pub(crate) fn write_typed_chunk_into_index( | |||||||
|                     unreachable!(); |                     unreachable!(); | ||||||
|                 }; |                 }; | ||||||
|                 let clonable_word_docids = unsafe { as_cloneable_grenad(&word_docids_reader) }?; |                 let clonable_word_docids = unsafe { as_cloneable_grenad(&word_docids_reader) }?; | ||||||
|                 let clonable_exact_word_docids = |  | ||||||
|                     unsafe { as_cloneable_grenad(&exact_word_docids_reader) }?; |  | ||||||
|  |  | ||||||
|                 word_docids_builder.push(word_docids_reader.into_cursor()?); |                 word_docids_builder.push(word_docids_reader.into_cursor()?); | ||||||
|                 exact_word_docids_builder.push(exact_word_docids_reader.into_cursor()?); |                 exact_word_docids_builder.push(exact_word_docids_reader.into_cursor()?); | ||||||
|                 word_fid_docids_builder.push(word_fid_docids_reader.into_cursor()?); |                 word_fid_docids_builder.push(word_fid_docids_reader.into_cursor()?); | ||||||
|                 fst_merger_builder.push(clonable_word_docids.into_cursor()?); |                 fst_merger_builder.push(clonable_word_docids.into_cursor()?); | ||||||
|                 fst_merger_builder.push(clonable_exact_word_docids.into_cursor()?); |  | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             let word_docids_merger = word_docids_builder.build(); |             let word_docids_merger = word_docids_builder.build(); | ||||||
|   | |||||||
| @@ -319,8 +319,11 @@ impl WordDocidsExtractors { | |||||||
|         let doc_alloc = &context.doc_alloc; |         let doc_alloc = &context.doc_alloc; | ||||||
|  |  | ||||||
|         let exact_attributes = index.exact_attributes(rtxn)?; |         let exact_attributes = index.exact_attributes(rtxn)?; | ||||||
|         let is_exact_attribute = |         let disabled_typos_terms = index.disabled_typos_terms(rtxn)?; | ||||||
|             |fname: &str| exact_attributes.iter().any(|attr| contained_in(fname, attr)); |         let is_exact = |fname: &str, word: &str| { | ||||||
|  |             exact_attributes.iter().any(|attr| contained_in(fname, attr)) | ||||||
|  |                 || disabled_typos_terms.is_exact(word) | ||||||
|  |         }; | ||||||
|         match document_change { |         match document_change { | ||||||
|             DocumentChange::Deletion(inner) => { |             DocumentChange::Deletion(inner) => { | ||||||
|                 let mut token_fn = |fname: &str, fid, pos, word: &str| { |                 let mut token_fn = |fname: &str, fid, pos, word: &str| { | ||||||
| @@ -328,7 +331,7 @@ impl WordDocidsExtractors { | |||||||
|                         fid, |                         fid, | ||||||
|                         pos, |                         pos, | ||||||
|                         word, |                         word, | ||||||
|                         is_exact_attribute(fname), |                         is_exact(fname, word), | ||||||
|                         inner.docid(), |                         inner.docid(), | ||||||
|                         doc_alloc, |                         doc_alloc, | ||||||
|                     ) |                     ) | ||||||
| @@ -356,7 +359,7 @@ impl WordDocidsExtractors { | |||||||
|                         fid, |                         fid, | ||||||
|                         pos, |                         pos, | ||||||
|                         word, |                         word, | ||||||
|                         is_exact_attribute(fname), |                         is_exact(fname, word), | ||||||
|                         inner.docid(), |                         inner.docid(), | ||||||
|                         doc_alloc, |                         doc_alloc, | ||||||
|                     ) |                     ) | ||||||
| @@ -372,7 +375,7 @@ impl WordDocidsExtractors { | |||||||
|                         fid, |                         fid, | ||||||
|                         pos, |                         pos, | ||||||
|                         word, |                         word, | ||||||
|                         is_exact_attribute(fname), |                         is_exact(fname, word), | ||||||
|                         inner.docid(), |                         inner.docid(), | ||||||
|                         doc_alloc, |                         doc_alloc, | ||||||
|                     ) |                     ) | ||||||
| @@ -389,7 +392,7 @@ impl WordDocidsExtractors { | |||||||
|                         fid, |                         fid, | ||||||
|                         pos, |                         pos, | ||||||
|                         word, |                         word, | ||||||
|                         is_exact_attribute(fname), |                         is_exact(fname, word), | ||||||
|                         inner.docid(), |                         inner.docid(), | ||||||
|                         doc_alloc, |                         doc_alloc, | ||||||
|                     ) |                     ) | ||||||
|   | |||||||
| @@ -17,6 +17,7 @@ use super::IndexerConfig; | |||||||
| use crate::attribute_patterns::PatternMatch; | use crate::attribute_patterns::PatternMatch; | ||||||
| use crate::constants::RESERVED_GEO_FIELD_NAME; | use crate::constants::RESERVED_GEO_FIELD_NAME; | ||||||
| use crate::criterion::Criterion; | use crate::criterion::Criterion; | ||||||
|  | use crate::disabled_typos_terms::DisabledTyposTerms; | ||||||
| use crate::error::UserError; | use crate::error::UserError; | ||||||
| use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; | use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; | ||||||
| use crate::filterable_attributes_rules::match_faceted_field; | use crate::filterable_attributes_rules::match_faceted_field; | ||||||
| @@ -169,6 +170,7 @@ pub struct Settings<'a, 't, 'i> { | |||||||
|     synonyms: Setting<BTreeMap<String, Vec<String>>>, |     synonyms: Setting<BTreeMap<String, Vec<String>>>, | ||||||
|     primary_key: Setting<String>, |     primary_key: Setting<String>, | ||||||
|     authorize_typos: Setting<bool>, |     authorize_typos: Setting<bool>, | ||||||
|  |     disabled_typos_terms: Setting<DisabledTyposTerms>, | ||||||
|     min_word_len_two_typos: Setting<u8>, |     min_word_len_two_typos: Setting<u8>, | ||||||
|     min_word_len_one_typo: Setting<u8>, |     min_word_len_one_typo: Setting<u8>, | ||||||
|     exact_words: Setting<BTreeSet<String>>, |     exact_words: Setting<BTreeSet<String>>, | ||||||
| @@ -207,6 +209,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { | |||||||
|             synonyms: Setting::NotSet, |             synonyms: Setting::NotSet, | ||||||
|             primary_key: Setting::NotSet, |             primary_key: Setting::NotSet, | ||||||
|             authorize_typos: Setting::NotSet, |             authorize_typos: Setting::NotSet, | ||||||
|  |             disabled_typos_terms: Setting::NotSet, | ||||||
|             exact_words: Setting::NotSet, |             exact_words: Setting::NotSet, | ||||||
|             min_word_len_two_typos: Setting::NotSet, |             min_word_len_two_typos: Setting::NotSet, | ||||||
|             min_word_len_one_typo: Setting::NotSet, |             min_word_len_one_typo: Setting::NotSet, | ||||||
| @@ -354,6 +357,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { | |||||||
|         self.min_word_len_one_typo = Setting::Reset; |         self.min_word_len_one_typo = Setting::Reset; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn set_disabled_typos_terms(&mut self, disabled_typos_terms: DisabledTyposTerms) { | ||||||
|  |         self.disabled_typos_terms = Setting::Set(disabled_typos_terms); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn set_exact_words(&mut self, words: BTreeSet<String>) { |     pub fn set_exact_words(&mut self, words: BTreeSet<String>) { | ||||||
|         self.exact_words = Setting::Set(words); |         self.exact_words = Setting::Set(words); | ||||||
|     } |     } | ||||||
| @@ -866,6 +873,19 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { | |||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     fn update_disabled_typos_terms(&mut self) -> Result<()> { | ||||||
|  |         match self.disabled_typos_terms { | ||||||
|  |             Setting::Set(disabled_typos_terms) => { | ||||||
|  |                 self.index.put_disabled_typos_terms(self.wtxn, &disabled_typos_terms)?; | ||||||
|  |             } | ||||||
|  |             Setting::Reset => { | ||||||
|  |                 self.index.delete_disabled_typos_terms(self.wtxn)?; | ||||||
|  |             } | ||||||
|  |             Setting::NotSet => (), | ||||||
|  |         } | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     fn update_exact_words(&mut self) -> Result<()> { |     fn update_exact_words(&mut self) -> Result<()> { | ||||||
|         match self.exact_words { |         match self.exact_words { | ||||||
|             Setting::Set(ref mut words) => { |             Setting::Set(ref mut words) => { | ||||||
| @@ -1246,6 +1266,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { | |||||||
|         self.update_prefix_search()?; |         self.update_prefix_search()?; | ||||||
|         self.update_facet_search()?; |         self.update_facet_search()?; | ||||||
|         self.update_localized_attributes_rules()?; |         self.update_localized_attributes_rules()?; | ||||||
|  |         self.update_disabled_typos_terms()?; | ||||||
|  |  | ||||||
|         let embedding_config_updates = self.update_embedding_configs()?; |         let embedding_config_updates = self.update_embedding_configs()?; | ||||||
|  |  | ||||||
| @@ -1327,6 +1348,7 @@ impl InnerIndexSettingsDiff { | |||||||
|                 || old_settings.prefix_search != new_settings.prefix_search |                 || old_settings.prefix_search != new_settings.prefix_search | ||||||
|                 || old_settings.localized_attributes_rules |                 || old_settings.localized_attributes_rules | ||||||
|                     != new_settings.localized_attributes_rules |                     != new_settings.localized_attributes_rules | ||||||
|  |                 || old_settings.disabled_typos_terms != new_settings.disabled_typos_terms | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes; |         let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes; | ||||||
| @@ -1526,6 +1548,7 @@ pub(crate) struct InnerIndexSettings { | |||||||
|     pub user_defined_searchable_attributes: Option<Vec<String>>, |     pub user_defined_searchable_attributes: Option<Vec<String>>, | ||||||
|     pub sortable_fields: HashSet<String>, |     pub sortable_fields: HashSet<String>, | ||||||
|     pub exact_attributes: HashSet<FieldId>, |     pub exact_attributes: HashSet<FieldId>, | ||||||
|  |     pub disabled_typos_terms: DisabledTyposTerms, | ||||||
|     pub proximity_precision: ProximityPrecision, |     pub proximity_precision: ProximityPrecision, | ||||||
|     pub embedding_configs: EmbeddingConfigs, |     pub embedding_configs: EmbeddingConfigs, | ||||||
|     pub geo_fields_ids: Option<(FieldId, FieldId)>, |     pub geo_fields_ids: Option<(FieldId, FieldId)>, | ||||||
| @@ -1574,7 +1597,7 @@ impl InnerIndexSettings { | |||||||
|             .map(|fields| fields.into_iter().map(|f| f.to_string()).collect()); |             .map(|fields| fields.into_iter().map(|f| f.to_string()).collect()); | ||||||
|         let builder = MetadataBuilder::from_index(index, rtxn)?; |         let builder = MetadataBuilder::from_index(index, rtxn)?; | ||||||
|         let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder); |         let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder); | ||||||
|  |         let disabled_typos_terms = index.disabled_typos_terms(rtxn)?; | ||||||
|         Ok(Self { |         Ok(Self { | ||||||
|             stop_words, |             stop_words, | ||||||
|             allowed_separators, |             allowed_separators, | ||||||
| @@ -1592,6 +1615,7 @@ impl InnerIndexSettings { | |||||||
|             geo_fields_ids, |             geo_fields_ids, | ||||||
|             prefix_search, |             prefix_search, | ||||||
|             facet_search, |             facet_search, | ||||||
|  |             disabled_typos_terms, | ||||||
|         }) |         }) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -896,6 +896,7 @@ fn test_correct_settings_init() { | |||||||
|                 localized_attributes_rules, |                 localized_attributes_rules, | ||||||
|                 prefix_search, |                 prefix_search, | ||||||
|                 facet_search, |                 facet_search, | ||||||
|  |                 disabled_typos_terms, | ||||||
|             } = settings; |             } = settings; | ||||||
|             assert!(matches!(searchable_fields, Setting::NotSet)); |             assert!(matches!(searchable_fields, Setting::NotSet)); | ||||||
|             assert!(matches!(displayed_fields, Setting::NotSet)); |             assert!(matches!(displayed_fields, Setting::NotSet)); | ||||||
| @@ -923,6 +924,7 @@ fn test_correct_settings_init() { | |||||||
|             assert!(matches!(localized_attributes_rules, Setting::NotSet)); |             assert!(matches!(localized_attributes_rules, Setting::NotSet)); | ||||||
|             assert!(matches!(prefix_search, Setting::NotSet)); |             assert!(matches!(prefix_search, Setting::NotSet)); | ||||||
|             assert!(matches!(facet_search, Setting::NotSet)); |             assert!(matches!(facet_search, Setting::NotSet)); | ||||||
|  |             assert!(matches!(disabled_typos_terms, Setting::NotSet)); | ||||||
|         }) |         }) | ||||||
|         .unwrap(); |         .unwrap(); | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user