mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 07:56:28 +00:00 
			
		
		
		
	Merge #469
469: add authorize typo setting r=Kerollmops a=MarinPostma This PR adds support for an authorize typo settings. This makes is possible to disable typos for a whole index. Typos are enabled by default. Co-authored-by: ad hoc <postma.marin@protonmail.com>
This commit is contained in:
		| @@ -46,6 +46,7 @@ pub mod main_key { | ||||
|     pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst"; | ||||
|     pub const CREATED_AT_KEY: &str = "created-at"; | ||||
|     pub const UPDATED_AT_KEY: &str = "updated-at"; | ||||
|     pub const AUTHORIZE_TYPOS: &str = "authorize-typos"; | ||||
| } | ||||
|  | ||||
| pub mod db_name { | ||||
| @@ -866,6 +867,25 @@ impl Index { | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, SerdeJson<OffsetDateTime>>(wtxn, main_key::UPDATED_AT_KEY, &time) | ||||
|     } | ||||
|  | ||||
|     pub fn authorize_typos(&self, txn: &RoTxn) -> heed::Result<bool> { | ||||
|         // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We | ||||
|         // identify 0 as being false, and anything else as true. The absence of a value is true, | ||||
|         // because by default, we authorize typos. | ||||
|         match self.main.get::<_, Str, OwnedType<u8>>(txn, main_key::AUTHORIZE_TYPOS)? { | ||||
|             Some(0) => Ok(false), | ||||
|             _ => Ok(true), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn put_authorize_typos(&self, txn: &mut RwTxn, flag: bool) -> heed::Result<()> { | ||||
|         // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We | ||||
|         // identify 0 as being false, and anything else as true. The absence of a value is true, | ||||
|         // because by default, we authorize typos. | ||||
|         self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::AUTHORIZE_TYPOS, &(flag as u8))?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| @@ -989,4 +1009,18 @@ pub(crate) mod tests { | ||||
|             } | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn put_and_retrieve_disable_typo() { | ||||
|         let index = TempIndex::new(); | ||||
|         let mut txn = index.write_txn().unwrap(); | ||||
|         // default value is true | ||||
|         assert!(index.authorize_typos(&txn).unwrap()); | ||||
|         // set to false | ||||
|         index.put_authorize_typos(&mut txn, false).unwrap(); | ||||
|         txn.commit().unwrap(); | ||||
|  | ||||
|         let txn = index.read_txn().unwrap(); | ||||
|         assert!(!index.authorize_typos(&txn).unwrap()); | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -105,6 +105,12 @@ impl<'a> Search<'a> { | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     fn is_typo_authorized(&self) -> Result<bool> { | ||||
|         let index_authorizes_typos = self.index.authorize_typos(self.rtxn)?; | ||||
|         // only authorize typos if both the index and the query allow it. | ||||
|         Ok(self.authorize_typos && index_authorizes_typos) | ||||
|     } | ||||
|  | ||||
|     pub fn execute(&self) -> Result<SearchResult> { | ||||
|         // We create the query tree by spliting the query into tokens. | ||||
|         let before = Instant::now(); | ||||
| @@ -112,7 +118,9 @@ impl<'a> Search<'a> { | ||||
|             Some(query) => { | ||||
|                 let mut builder = QueryTreeBuilder::new(self.rtxn, self.index); | ||||
|                 builder.optional_words(self.optional_words); | ||||
|                 builder.authorize_typos(self.authorize_typos); | ||||
|  | ||||
|                 builder.authorize_typos(self.is_typo_authorized()?); | ||||
|  | ||||
|                 builder.words_limit(self.words_limit); | ||||
|                 // We make sure that the analyzer is aware of the stop words | ||||
|                 // this ensures that the query builder is able to properly remove them. | ||||
| @@ -360,3 +368,34 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA { | ||||
|         lev.build_dfa(word) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use super::*; | ||||
|     use crate::index::tests::TempIndex; | ||||
|  | ||||
|     #[test] | ||||
|     fn test_is_authorized_typos() { | ||||
|         let index = TempIndex::new(); | ||||
|         let mut txn = index.write_txn().unwrap(); | ||||
|  | ||||
|         let mut search = Search::new(&txn, &index); | ||||
|  | ||||
|         // default is authorized | ||||
|         assert!(search.is_typo_authorized().unwrap()); | ||||
|  | ||||
|         search.authorize_typos(false); | ||||
|         assert!(!search.is_typo_authorized().unwrap()); | ||||
|  | ||||
|         index.put_authorize_typos(&mut txn, false).unwrap(); | ||||
|         txn.commit().unwrap(); | ||||
|  | ||||
|         let txn = index.read_txn().unwrap(); | ||||
|         let mut search = Search::new(&txn, &index); | ||||
|  | ||||
|         assert!(!search.is_typo_authorized().unwrap()); | ||||
|  | ||||
|         search.authorize_typos(true); | ||||
|         assert!(!search.is_typo_authorized().unwrap()); | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -191,7 +191,6 @@ impl<'a> QueryTreeBuilder<'a> { | ||||
|     /// generated forcing all query words to be present in each matching documents | ||||
|     /// (the criterion `words` will be ignored). | ||||
|     /// default value if not called: `true` | ||||
|     #[allow(unused)] | ||||
|     pub fn optional_words(&mut self, optional_words: bool) -> &mut Self { | ||||
|         self.optional_words = optional_words; | ||||
|         self | ||||
| @@ -201,7 +200,6 @@ impl<'a> QueryTreeBuilder<'a> { | ||||
|     /// forcing all query words to match documents without any typo | ||||
|     /// (the criterion `typo` will be ignored). | ||||
|     /// default value if not called: `true` | ||||
|     #[allow(unused)] | ||||
|     pub fn authorize_typos(&mut self, authorize_typos: bool) -> &mut Self { | ||||
|         self.authorize_typos = authorize_typos; | ||||
|         self | ||||
|   | ||||
| @@ -89,6 +89,7 @@ pub struct Settings<'a, 't, 'u, 'i> { | ||||
|     distinct_field: Setting<String>, | ||||
|     synonyms: Setting<HashMap<String, Vec<String>>>, | ||||
|     primary_key: Setting<String>, | ||||
|     authorize_typos: Setting<bool>, | ||||
| } | ||||
|  | ||||
| impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||
| @@ -109,6 +110,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||
|             distinct_field: Setting::NotSet, | ||||
|             synonyms: Setting::NotSet, | ||||
|             primary_key: Setting::NotSet, | ||||
|             authorize_typos: Setting::NotSet, | ||||
|             indexer_config, | ||||
|         } | ||||
|     } | ||||
| @@ -186,6 +188,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||
|         self.primary_key = Setting::Set(primary_key); | ||||
|     } | ||||
|  | ||||
|     pub fn set_autorize_typos(&mut self, val: bool) { | ||||
|         self.authorize_typos = Setting::Set(val); | ||||
|     } | ||||
|  | ||||
|     pub fn reset_authorize_typos(&mut self) { | ||||
|         self.authorize_typos = Setting::Reset; | ||||
|     } | ||||
|  | ||||
|     fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()> | ||||
|     where | ||||
|         F: Fn(UpdateIndexingStep) + Sync, | ||||
| @@ -450,6 +460,20 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn update_authorize_typos(&mut self) -> Result<()> { | ||||
|         match self.authorize_typos { | ||||
|             Setting::Set(flag) => { | ||||
|                 self.index.put_authorize_typos(self.wtxn, flag)?; | ||||
|                 Ok(()) | ||||
|             } | ||||
|             Setting::Reset => { | ||||
|                 self.index.put_authorize_typos(self.wtxn, true)?; | ||||
|                 Ok(()) | ||||
|             } | ||||
|             Setting::NotSet => Ok(()), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn execute<F>(mut self, progress_callback: F) -> Result<()> | ||||
|     where | ||||
|         F: Fn(UpdateIndexingStep) + Sync, | ||||
| @@ -465,6 +489,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||
|         self.update_distinct_field()?; | ||||
|         self.update_criteria()?; | ||||
|         self.update_primary_key()?; | ||||
|         self.update_authorize_typos()?; | ||||
|  | ||||
|         // If there is new faceted fields we indicate that we must reindex as we must | ||||
|         // index new fields as facets. It means that the distinct attribute, | ||||
| @@ -493,6 +518,7 @@ mod tests { | ||||
|  | ||||
|     use super::*; | ||||
|     use crate::error::Error; | ||||
|     use crate::index::tests::TempIndex; | ||||
|     use crate::update::IndexDocuments; | ||||
|     use crate::{Criterion, Filter, SearchResult}; | ||||
|  | ||||
| @@ -1193,4 +1219,18 @@ mod tests { | ||||
|         let line = std::str::from_utf8(content.get(fid).unwrap()).unwrap(); | ||||
|         assert_eq!(line, r#""Star Wars""#); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_disable_typo() { | ||||
|         let index = TempIndex::new(); | ||||
|  | ||||
|         let mut txn = index.write_txn().unwrap(); | ||||
|         let config = IndexerConfig::default(); | ||||
|  | ||||
|         assert!(index.authorize_typos(&txn).unwrap()); | ||||
|         let mut builder = Settings::new(&mut txn, &index, &config); | ||||
|         builder.set_autorize_typos(false); | ||||
|         builder.execute(|_| ()).unwrap(); | ||||
|         assert!(!index.authorize_typos(&txn).unwrap()); | ||||
|     } | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user