mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 16:06:31 +00:00 
			
		
		
		
	Merge #469
469: add authorize typo setting r=Kerollmops a=MarinPostma This PR adds support for an authorize typo settings. This makes is possible to disable typos for a whole index. Typos are enabled by default. Co-authored-by: ad hoc <postma.marin@protonmail.com>
This commit is contained in:
		| @@ -46,6 +46,7 @@ pub mod main_key { | |||||||
|     pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst"; |     pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst"; | ||||||
|     pub const CREATED_AT_KEY: &str = "created-at"; |     pub const CREATED_AT_KEY: &str = "created-at"; | ||||||
|     pub const UPDATED_AT_KEY: &str = "updated-at"; |     pub const UPDATED_AT_KEY: &str = "updated-at"; | ||||||
|  |     pub const AUTHORIZE_TYPOS: &str = "authorize-typos"; | ||||||
| } | } | ||||||
|  |  | ||||||
| pub mod db_name { | pub mod db_name { | ||||||
| @@ -866,6 +867,25 @@ impl Index { | |||||||
|     ) -> heed::Result<()> { |     ) -> heed::Result<()> { | ||||||
|         self.main.put::<_, Str, SerdeJson<OffsetDateTime>>(wtxn, main_key::UPDATED_AT_KEY, &time) |         self.main.put::<_, Str, SerdeJson<OffsetDateTime>>(wtxn, main_key::UPDATED_AT_KEY, &time) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn authorize_typos(&self, txn: &RoTxn) -> heed::Result<bool> { | ||||||
|  |         // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We | ||||||
|  |         // identify 0 as being false, and anything else as true. The absence of a value is true, | ||||||
|  |         // because by default, we authorize typos. | ||||||
|  |         match self.main.get::<_, Str, OwnedType<u8>>(txn, main_key::AUTHORIZE_TYPOS)? { | ||||||
|  |             Some(0) => Ok(false), | ||||||
|  |             _ => Ok(true), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn put_authorize_typos(&self, txn: &mut RwTxn, flag: bool) -> heed::Result<()> { | ||||||
|  |         // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We | ||||||
|  |         // identify 0 as being false, and anything else as true. The absence of a value is true, | ||||||
|  |         // because by default, we authorize typos. | ||||||
|  |         self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::AUTHORIZE_TYPOS, &(flag as u8))?; | ||||||
|  |  | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[cfg(test)] | #[cfg(test)] | ||||||
| @@ -989,4 +1009,18 @@ pub(crate) mod tests { | |||||||
|             } |             } | ||||||
|         ); |         ); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn put_and_retrieve_disable_typo() { | ||||||
|  |         let index = TempIndex::new(); | ||||||
|  |         let mut txn = index.write_txn().unwrap(); | ||||||
|  |         // default value is true | ||||||
|  |         assert!(index.authorize_typos(&txn).unwrap()); | ||||||
|  |         // set to false | ||||||
|  |         index.put_authorize_typos(&mut txn, false).unwrap(); | ||||||
|  |         txn.commit().unwrap(); | ||||||
|  |  | ||||||
|  |         let txn = index.read_txn().unwrap(); | ||||||
|  |         assert!(!index.authorize_typos(&txn).unwrap()); | ||||||
|  |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -105,6 +105,12 @@ impl<'a> Search<'a> { | |||||||
|         self |         self | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     fn is_typo_authorized(&self) -> Result<bool> { | ||||||
|  |         let index_authorizes_typos = self.index.authorize_typos(self.rtxn)?; | ||||||
|  |         // only authorize typos if both the index and the query allow it. | ||||||
|  |         Ok(self.authorize_typos && index_authorizes_typos) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn execute(&self) -> Result<SearchResult> { |     pub fn execute(&self) -> Result<SearchResult> { | ||||||
|         // We create the query tree by spliting the query into tokens. |         // We create the query tree by spliting the query into tokens. | ||||||
|         let before = Instant::now(); |         let before = Instant::now(); | ||||||
| @@ -112,7 +118,9 @@ impl<'a> Search<'a> { | |||||||
|             Some(query) => { |             Some(query) => { | ||||||
|                 let mut builder = QueryTreeBuilder::new(self.rtxn, self.index); |                 let mut builder = QueryTreeBuilder::new(self.rtxn, self.index); | ||||||
|                 builder.optional_words(self.optional_words); |                 builder.optional_words(self.optional_words); | ||||||
|                 builder.authorize_typos(self.authorize_typos); |  | ||||||
|  |                 builder.authorize_typos(self.is_typo_authorized()?); | ||||||
|  |  | ||||||
|                 builder.words_limit(self.words_limit); |                 builder.words_limit(self.words_limit); | ||||||
|                 // We make sure that the analyzer is aware of the stop words |                 // We make sure that the analyzer is aware of the stop words | ||||||
|                 // this ensures that the query builder is able to properly remove them. |                 // this ensures that the query builder is able to properly remove them. | ||||||
| @@ -360,3 +368,34 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA { | |||||||
|         lev.build_dfa(word) |         lev.build_dfa(word) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[cfg(test)] | ||||||
|  | mod test { | ||||||
|  |     use super::*; | ||||||
|  |     use crate::index::tests::TempIndex; | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn test_is_authorized_typos() { | ||||||
|  |         let index = TempIndex::new(); | ||||||
|  |         let mut txn = index.write_txn().unwrap(); | ||||||
|  |  | ||||||
|  |         let mut search = Search::new(&txn, &index); | ||||||
|  |  | ||||||
|  |         // default is authorized | ||||||
|  |         assert!(search.is_typo_authorized().unwrap()); | ||||||
|  |  | ||||||
|  |         search.authorize_typos(false); | ||||||
|  |         assert!(!search.is_typo_authorized().unwrap()); | ||||||
|  |  | ||||||
|  |         index.put_authorize_typos(&mut txn, false).unwrap(); | ||||||
|  |         txn.commit().unwrap(); | ||||||
|  |  | ||||||
|  |         let txn = index.read_txn().unwrap(); | ||||||
|  |         let mut search = Search::new(&txn, &index); | ||||||
|  |  | ||||||
|  |         assert!(!search.is_typo_authorized().unwrap()); | ||||||
|  |  | ||||||
|  |         search.authorize_typos(true); | ||||||
|  |         assert!(!search.is_typo_authorized().unwrap()); | ||||||
|  |     } | ||||||
|  | } | ||||||
|   | |||||||
| @@ -191,7 +191,6 @@ impl<'a> QueryTreeBuilder<'a> { | |||||||
|     /// generated forcing all query words to be present in each matching documents |     /// generated forcing all query words to be present in each matching documents | ||||||
|     /// (the criterion `words` will be ignored). |     /// (the criterion `words` will be ignored). | ||||||
|     /// default value if not called: `true` |     /// default value if not called: `true` | ||||||
|     #[allow(unused)] |  | ||||||
|     pub fn optional_words(&mut self, optional_words: bool) -> &mut Self { |     pub fn optional_words(&mut self, optional_words: bool) -> &mut Self { | ||||||
|         self.optional_words = optional_words; |         self.optional_words = optional_words; | ||||||
|         self |         self | ||||||
| @@ -201,7 +200,6 @@ impl<'a> QueryTreeBuilder<'a> { | |||||||
|     /// forcing all query words to match documents without any typo |     /// forcing all query words to match documents without any typo | ||||||
|     /// (the criterion `typo` will be ignored). |     /// (the criterion `typo` will be ignored). | ||||||
|     /// default value if not called: `true` |     /// default value if not called: `true` | ||||||
|     #[allow(unused)] |  | ||||||
|     pub fn authorize_typos(&mut self, authorize_typos: bool) -> &mut Self { |     pub fn authorize_typos(&mut self, authorize_typos: bool) -> &mut Self { | ||||||
|         self.authorize_typos = authorize_typos; |         self.authorize_typos = authorize_typos; | ||||||
|         self |         self | ||||||
|   | |||||||
| @@ -89,6 +89,7 @@ pub struct Settings<'a, 't, 'u, 'i> { | |||||||
|     distinct_field: Setting<String>, |     distinct_field: Setting<String>, | ||||||
|     synonyms: Setting<HashMap<String, Vec<String>>>, |     synonyms: Setting<HashMap<String, Vec<String>>>, | ||||||
|     primary_key: Setting<String>, |     primary_key: Setting<String>, | ||||||
|  |     authorize_typos: Setting<bool>, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||||
| @@ -109,6 +110,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | |||||||
|             distinct_field: Setting::NotSet, |             distinct_field: Setting::NotSet, | ||||||
|             synonyms: Setting::NotSet, |             synonyms: Setting::NotSet, | ||||||
|             primary_key: Setting::NotSet, |             primary_key: Setting::NotSet, | ||||||
|  |             authorize_typos: Setting::NotSet, | ||||||
|             indexer_config, |             indexer_config, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -186,6 +188,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | |||||||
|         self.primary_key = Setting::Set(primary_key); |         self.primary_key = Setting::Set(primary_key); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn set_autorize_typos(&mut self, val: bool) { | ||||||
|  |         self.authorize_typos = Setting::Set(val); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn reset_authorize_typos(&mut self) { | ||||||
|  |         self.authorize_typos = Setting::Reset; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()> |     fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()> | ||||||
|     where |     where | ||||||
|         F: Fn(UpdateIndexingStep) + Sync, |         F: Fn(UpdateIndexingStep) + Sync, | ||||||
| @@ -450,6 +460,20 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     fn update_authorize_typos(&mut self) -> Result<()> { | ||||||
|  |         match self.authorize_typos { | ||||||
|  |             Setting::Set(flag) => { | ||||||
|  |                 self.index.put_authorize_typos(self.wtxn, flag)?; | ||||||
|  |                 Ok(()) | ||||||
|  |             } | ||||||
|  |             Setting::Reset => { | ||||||
|  |                 self.index.put_authorize_typos(self.wtxn, true)?; | ||||||
|  |                 Ok(()) | ||||||
|  |             } | ||||||
|  |             Setting::NotSet => Ok(()), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn execute<F>(mut self, progress_callback: F) -> Result<()> |     pub fn execute<F>(mut self, progress_callback: F) -> Result<()> | ||||||
|     where |     where | ||||||
|         F: Fn(UpdateIndexingStep) + Sync, |         F: Fn(UpdateIndexingStep) + Sync, | ||||||
| @@ -465,6 +489,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | |||||||
|         self.update_distinct_field()?; |         self.update_distinct_field()?; | ||||||
|         self.update_criteria()?; |         self.update_criteria()?; | ||||||
|         self.update_primary_key()?; |         self.update_primary_key()?; | ||||||
|  |         self.update_authorize_typos()?; | ||||||
|  |  | ||||||
|         // If there is new faceted fields we indicate that we must reindex as we must |         // If there is new faceted fields we indicate that we must reindex as we must | ||||||
|         // index new fields as facets. It means that the distinct attribute, |         // index new fields as facets. It means that the distinct attribute, | ||||||
| @@ -493,6 +518,7 @@ mod tests { | |||||||
|  |  | ||||||
|     use super::*; |     use super::*; | ||||||
|     use crate::error::Error; |     use crate::error::Error; | ||||||
|  |     use crate::index::tests::TempIndex; | ||||||
|     use crate::update::IndexDocuments; |     use crate::update::IndexDocuments; | ||||||
|     use crate::{Criterion, Filter, SearchResult}; |     use crate::{Criterion, Filter, SearchResult}; | ||||||
|  |  | ||||||
| @@ -1193,4 +1219,18 @@ mod tests { | |||||||
|         let line = std::str::from_utf8(content.get(fid).unwrap()).unwrap(); |         let line = std::str::from_utf8(content.get(fid).unwrap()).unwrap(); | ||||||
|         assert_eq!(line, r#""Star Wars""#); |         assert_eq!(line, r#""Star Wars""#); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn test_disable_typo() { | ||||||
|  |         let index = TempIndex::new(); | ||||||
|  |  | ||||||
|  |         let mut txn = index.write_txn().unwrap(); | ||||||
|  |         let config = IndexerConfig::default(); | ||||||
|  |  | ||||||
|  |         assert!(index.authorize_typos(&txn).unwrap()); | ||||||
|  |         let mut builder = Settings::new(&mut txn, &index, &config); | ||||||
|  |         builder.set_autorize_typos(false); | ||||||
|  |         builder.execute(|_| ()).unwrap(); | ||||||
|  |         assert!(!index.authorize_typos(&txn).unwrap()); | ||||||
|  |     } | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user