mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	introduce word len for typo setting
This commit is contained in:
		| @@ -72,6 +72,7 @@ pub enum UserError { | ||||
|     SerdeJson(serde_json::Error), | ||||
|     SortError(SortError), | ||||
|     UnknownInternalDocumentId { document_id: DocumentId }, | ||||
|     InvalidMinTypoWordSetting(u8, u8), | ||||
| } | ||||
|  | ||||
| impl From<io::Error> for Error { | ||||
| @@ -291,6 +292,7 @@ ranking rules settings to use the sort parameter at search time.", | ||||
|             Self::UnknownInternalDocumentId { document_id } => { | ||||
|                 write!(f, "An unknown internal document id have been used: `{}`.", document_id) | ||||
|             } | ||||
|             Self::InvalidMinTypoWordSetting(one, two) => write!(f, "Invalid settings for MinWordLenForTypo, expected 0 < 1-typo < 2-typos < 255, but found 1-typo: {} and 2-typo: {}", one, two), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -23,6 +23,9 @@ use crate::{ | ||||
|     Search, StrBEU32Codec, StrStrU8Codec, BEU32, | ||||
| }; | ||||
|  | ||||
| pub const DEFAULT_MIN_WORD_LEN_1_TYPO: u8 = 5; | ||||
| pub const DEFAULT_MIN_WORD_LEN_2_TYPOS: u8 = 9; | ||||
|  | ||||
| pub mod main_key { | ||||
|     pub const CRITERIA_KEY: &str = "criteria"; | ||||
|     pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields"; | ||||
| @@ -47,6 +50,8 @@ pub mod main_key { | ||||
|     pub const CREATED_AT_KEY: &str = "created-at"; | ||||
|     pub const UPDATED_AT_KEY: &str = "updated-at"; | ||||
|     pub const AUTHORIZE_TYPOS: &str = "authorize-typos"; | ||||
|     pub const ONE_TYPO_WORD_LEN: &str = "one-typo-word-len"; | ||||
|     pub const TWO_TYPOS_WORD_LEN: &str = "two-typos-word-len"; | ||||
| } | ||||
|  | ||||
| pub mod db_name { | ||||
| @@ -886,6 +891,42 @@ impl Index { | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn min_word_len_1_typo(&self, txn: &RoTxn) -> heed::Result<u8> { | ||||
|         // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We | ||||
|         // identify 0 as being false, and anything else as true. The absence of a value is true, | ||||
|         // because by default, we authorize typos. | ||||
|         Ok(self | ||||
|             .main | ||||
|             .get::<_, Str, OwnedType<u8>>(txn, main_key::ONE_TYPO_WORD_LEN)? | ||||
|             .unwrap_or(DEFAULT_MIN_WORD_LEN_1_TYPO)) | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn put_min_word_len_1_typo(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> { | ||||
|         // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We | ||||
|         // identify 0 as being false, and anything else as true. The absence of a value is true, | ||||
|         // because by default, we authorize typos. | ||||
|         self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::ONE_TYPO_WORD_LEN, &val)?; | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn min_word_len_2_typo(&self, txn: &RoTxn) -> heed::Result<u8> { | ||||
|         // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We | ||||
|         // identify 0 as being false, and anything else as true. The absence of a value is true, | ||||
|         // because by default, we authorize typos. | ||||
|         Ok(self | ||||
|             .main | ||||
|             .get::<_, Str, OwnedType<u8>>(txn, main_key::TWO_TYPOS_WORD_LEN)? | ||||
|             .unwrap_or(DEFAULT_MIN_WORD_LEN_2_TYPOS)) | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn put_min_word_len_2_typo(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> { | ||||
|         // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We | ||||
|         // identify 0 as being false, and anything else as true. The absence of a value is true, | ||||
|         // because by default, we authorize typos. | ||||
|         self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::TWO_TYPOS_WORD_LEN, &val)?; | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
|   | ||||
| @@ -90,6 +90,8 @@ pub struct Settings<'a, 't, 'u, 'i> { | ||||
|     synonyms: Setting<HashMap<String, Vec<String>>>, | ||||
|     primary_key: Setting<String>, | ||||
|     authorize_typos: Setting<bool>, | ||||
|     min_2_typos_word_len: Setting<u8>, | ||||
|     min_1_typo_word_len: Setting<u8>, | ||||
| } | ||||
|  | ||||
| impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||
| @@ -112,6 +114,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||
|             primary_key: Setting::NotSet, | ||||
|             authorize_typos: Setting::NotSet, | ||||
|             indexer_config, | ||||
|             min_2_typos_word_len: Setting::Reset, | ||||
|             min_1_typo_word_len: Setting::Reset, | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -196,6 +200,22 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||
|         self.authorize_typos = Setting::Reset; | ||||
|     } | ||||
|  | ||||
|     pub fn set_min_2_typos_word_len(&mut self, val: u8) { | ||||
|         self.min_2_typos_word_len = Setting::Set(val); | ||||
|     } | ||||
|  | ||||
|     pub fn reset_min_2_typos_word_len(&mut self) { | ||||
|         self.min_2_typos_word_len = Setting::Reset; | ||||
|     } | ||||
|  | ||||
|     pub fn set_min_1_typo_word_len(&mut self, val: u8) { | ||||
|         self.min_1_typo_word_len = Setting::Set(val); | ||||
|     } | ||||
|  | ||||
|     pub fn reset_min_1_typos_word_len(&mut self) { | ||||
|         self.min_1_typo_word_len = Setting::Reset; | ||||
|     } | ||||
|  | ||||
|     fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()> | ||||
|     where | ||||
|         F: Fn(UpdateIndexingStep) + Sync, | ||||
| @@ -474,6 +494,38 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn update_min_typo_word_len(&mut self) -> Result<()> { | ||||
|         match (&self.min_1_typo_word_len, &self.min_2_typos_word_len) { | ||||
|             (Setting::Set(one), Setting::Set(two)) => { | ||||
|                 if one < two { | ||||
|                     self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?; | ||||
|                     self.index.put_min_word_len_2_typo(&mut self.wtxn, *two)?; | ||||
|                 } else { | ||||
|                     return Err(UserError::InvalidMinTypoWordSetting(*one, *two).into()); | ||||
|                 } | ||||
|             } | ||||
|             (Setting::Set(one), _) => { | ||||
|                 let two = self.index.min_word_len_2_typo(&self.wtxn)?; | ||||
|                 if *one < two { | ||||
|                     self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?; | ||||
|                 } else { | ||||
|                     return Err(UserError::InvalidMinTypoWordSetting(*one, two).into()); | ||||
|                 } | ||||
|             } | ||||
|             (_, Setting::Set(two)) => { | ||||
|                 let one = self.index.min_word_len_1_typo(&self.wtxn)?; | ||||
|                 if one < *two { | ||||
|                     self.index.put_min_word_len_2_typo(&mut self.wtxn, *two)?; | ||||
|                 } else { | ||||
|                     return Err(UserError::InvalidMinTypoWordSetting(one, *two).into()); | ||||
|                 } | ||||
|             } | ||||
|             _ => (), | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn execute<F>(mut self, progress_callback: F) -> Result<()> | ||||
|     where | ||||
|         F: Fn(UpdateIndexingStep) + Sync, | ||||
| @@ -490,6 +542,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||
|         self.update_criteria()?; | ||||
|         self.update_primary_key()?; | ||||
|         self.update_authorize_typos()?; | ||||
|         self.update_min_typo_word_len()?; | ||||
|  | ||||
|         // If there is new faceted fields we indicate that we must reindex as we must | ||||
|         // index new fields as facets. It means that the distinct attribute, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user