mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	introduce word len for typo setting
This commit is contained in:
		| @@ -72,6 +72,7 @@ pub enum UserError { | |||||||
|     SerdeJson(serde_json::Error), |     SerdeJson(serde_json::Error), | ||||||
|     SortError(SortError), |     SortError(SortError), | ||||||
|     UnknownInternalDocumentId { document_id: DocumentId }, |     UnknownInternalDocumentId { document_id: DocumentId }, | ||||||
|  |     InvalidMinTypoWordSetting(u8, u8), | ||||||
| } | } | ||||||
|  |  | ||||||
| impl From<io::Error> for Error { | impl From<io::Error> for Error { | ||||||
| @@ -291,6 +292,7 @@ ranking rules settings to use the sort parameter at search time.", | |||||||
|             Self::UnknownInternalDocumentId { document_id } => { |             Self::UnknownInternalDocumentId { document_id } => { | ||||||
|                 write!(f, "An unknown internal document id have been used: `{}`.", document_id) |                 write!(f, "An unknown internal document id have been used: `{}`.", document_id) | ||||||
|             } |             } | ||||||
|  |             Self::InvalidMinTypoWordSetting(one, two) => write!(f, "Invalid settings for MinWordLenForTypo, expected 0 < 1-typo < 2-typos < 255, but found 1-typo: {} and 2-typo: {}", one, two), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -23,6 +23,9 @@ use crate::{ | |||||||
|     Search, StrBEU32Codec, StrStrU8Codec, BEU32, |     Search, StrBEU32Codec, StrStrU8Codec, BEU32, | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | pub const DEFAULT_MIN_WORD_LEN_1_TYPO: u8 = 5; | ||||||
|  | pub const DEFAULT_MIN_WORD_LEN_2_TYPOS: u8 = 9; | ||||||
|  |  | ||||||
| pub mod main_key { | pub mod main_key { | ||||||
|     pub const CRITERIA_KEY: &str = "criteria"; |     pub const CRITERIA_KEY: &str = "criteria"; | ||||||
|     pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields"; |     pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields"; | ||||||
| @@ -47,6 +50,8 @@ pub mod main_key { | |||||||
|     pub const CREATED_AT_KEY: &str = "created-at"; |     pub const CREATED_AT_KEY: &str = "created-at"; | ||||||
|     pub const UPDATED_AT_KEY: &str = "updated-at"; |     pub const UPDATED_AT_KEY: &str = "updated-at"; | ||||||
|     pub const AUTHORIZE_TYPOS: &str = "authorize-typos"; |     pub const AUTHORIZE_TYPOS: &str = "authorize-typos"; | ||||||
|  |     pub const ONE_TYPO_WORD_LEN: &str = "one-typo-word-len"; | ||||||
|  |     pub const TWO_TYPOS_WORD_LEN: &str = "two-typos-word-len"; | ||||||
| } | } | ||||||
|  |  | ||||||
| pub mod db_name { | pub mod db_name { | ||||||
| @@ -886,6 +891,42 @@ impl Index { | |||||||
|  |  | ||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn min_word_len_1_typo(&self, txn: &RoTxn) -> heed::Result<u8> { | ||||||
|  |         // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We | ||||||
|  |         // identify 0 as being false, and anything else as true. The absence of a value is true, | ||||||
|  |         // because by default, we authorize typos. | ||||||
|  |         Ok(self | ||||||
|  |             .main | ||||||
|  |             .get::<_, Str, OwnedType<u8>>(txn, main_key::ONE_TYPO_WORD_LEN)? | ||||||
|  |             .unwrap_or(DEFAULT_MIN_WORD_LEN_1_TYPO)) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn put_min_word_len_1_typo(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> { | ||||||
|  |         // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We | ||||||
|  |         // identify 0 as being false, and anything else as true. The absence of a value is true, | ||||||
|  |         // because by default, we authorize typos. | ||||||
|  |         self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::ONE_TYPO_WORD_LEN, &val)?; | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn min_word_len_2_typo(&self, txn: &RoTxn) -> heed::Result<u8> { | ||||||
|  |         // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We | ||||||
|  |         // identify 0 as being false, and anything else as true. The absence of a value is true, | ||||||
|  |         // because by default, we authorize typos. | ||||||
|  |         Ok(self | ||||||
|  |             .main | ||||||
|  |             .get::<_, Str, OwnedType<u8>>(txn, main_key::TWO_TYPOS_WORD_LEN)? | ||||||
|  |             .unwrap_or(DEFAULT_MIN_WORD_LEN_2_TYPOS)) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn put_min_word_len_2_typo(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> { | ||||||
|  |         // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We | ||||||
|  |         // identify 0 as being false, and anything else as true. The absence of a value is true, | ||||||
|  |         // because by default, we authorize typos. | ||||||
|  |         self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::TWO_TYPOS_WORD_LEN, &val)?; | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[cfg(test)] | #[cfg(test)] | ||||||
|   | |||||||
| @@ -90,6 +90,8 @@ pub struct Settings<'a, 't, 'u, 'i> { | |||||||
|     synonyms: Setting<HashMap<String, Vec<String>>>, |     synonyms: Setting<HashMap<String, Vec<String>>>, | ||||||
|     primary_key: Setting<String>, |     primary_key: Setting<String>, | ||||||
|     authorize_typos: Setting<bool>, |     authorize_typos: Setting<bool>, | ||||||
|  |     min_2_typos_word_len: Setting<u8>, | ||||||
|  |     min_1_typo_word_len: Setting<u8>, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||||
| @@ -112,6 +114,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | |||||||
|             primary_key: Setting::NotSet, |             primary_key: Setting::NotSet, | ||||||
|             authorize_typos: Setting::NotSet, |             authorize_typos: Setting::NotSet, | ||||||
|             indexer_config, |             indexer_config, | ||||||
|  |             min_2_typos_word_len: Setting::Reset, | ||||||
|  |             min_1_typo_word_len: Setting::Reset, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -196,6 +200,22 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | |||||||
|         self.authorize_typos = Setting::Reset; |         self.authorize_typos = Setting::Reset; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn set_min_2_typos_word_len(&mut self, val: u8) { | ||||||
|  |         self.min_2_typos_word_len = Setting::Set(val); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn reset_min_2_typos_word_len(&mut self) { | ||||||
|  |         self.min_2_typos_word_len = Setting::Reset; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn set_min_1_typo_word_len(&mut self, val: u8) { | ||||||
|  |         self.min_1_typo_word_len = Setting::Set(val); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn reset_min_1_typos_word_len(&mut self) { | ||||||
|  |         self.min_1_typo_word_len = Setting::Reset; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()> |     fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()> | ||||||
|     where |     where | ||||||
|         F: Fn(UpdateIndexingStep) + Sync, |         F: Fn(UpdateIndexingStep) + Sync, | ||||||
| @@ -474,6 +494,38 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     fn update_min_typo_word_len(&mut self) -> Result<()> { | ||||||
|  |         match (&self.min_1_typo_word_len, &self.min_2_typos_word_len) { | ||||||
|  |             (Setting::Set(one), Setting::Set(two)) => { | ||||||
|  |                 if one < two { | ||||||
|  |                     self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?; | ||||||
|  |                     self.index.put_min_word_len_2_typo(&mut self.wtxn, *two)?; | ||||||
|  |                 } else { | ||||||
|  |                     return Err(UserError::InvalidMinTypoWordSetting(*one, *two).into()); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             (Setting::Set(one), _) => { | ||||||
|  |                 let two = self.index.min_word_len_2_typo(&self.wtxn)?; | ||||||
|  |                 if *one < two { | ||||||
|  |                     self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?; | ||||||
|  |                 } else { | ||||||
|  |                     return Err(UserError::InvalidMinTypoWordSetting(*one, two).into()); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             (_, Setting::Set(two)) => { | ||||||
|  |                 let one = self.index.min_word_len_1_typo(&self.wtxn)?; | ||||||
|  |                 if one < *two { | ||||||
|  |                     self.index.put_min_word_len_2_typo(&mut self.wtxn, *two)?; | ||||||
|  |                 } else { | ||||||
|  |                     return Err(UserError::InvalidMinTypoWordSetting(one, *two).into()); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             _ => (), | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn execute<F>(mut self, progress_callback: F) -> Result<()> |     pub fn execute<F>(mut self, progress_callback: F) -> Result<()> | ||||||
|     where |     where | ||||||
|         F: Fn(UpdateIndexingStep) + Sync, |         F: Fn(UpdateIndexingStep) + Sync, | ||||||
| @@ -490,6 +542,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | |||||||
|         self.update_criteria()?; |         self.update_criteria()?; | ||||||
|         self.update_primary_key()?; |         self.update_primary_key()?; | ||||||
|         self.update_authorize_typos()?; |         self.update_authorize_typos()?; | ||||||
|  |         self.update_min_typo_word_len()?; | ||||||
|  |  | ||||||
|         // If there is new faceted fields we indicate that we must reindex as we must |         // If there is new faceted fields we indicate that we must reindex as we must | ||||||
|         // index new fields as facets. It means that the distinct attribute, |         // index new fields as facets. It means that the distinct attribute, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user