disable typos on exact words

This commit is contained in:
ad hoc
2022-03-21 16:25:15 +01:00
parent 9bbffb8fee
commit 774fa8f065

View File

@@ -1,4 +1,4 @@
use std::{cmp, fmt, mem}; use std::{borrow::Cow, cmp, fmt, mem};
use fst::Set; use fst::Set;
use meilisearch_tokenizer::token::SeparatorKind; use meilisearch_tokenizer::token::SeparatorKind;
@@ -157,6 +157,7 @@ trait Context {
} }
/// Returns the minimum word len for 1 and 2 typos. /// Returns the minimum word len for 1 and 2 typos.
fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)>; fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)>;
fn exact_words(&self) -> crate::Result<fst::Set<Cow<[u8]>>>;
} }
/// The query tree builder is the interface to build a query tree. /// The query tree builder is the interface to build a query tree.
@@ -186,6 +187,10 @@ impl<'a> Context for QueryTreeBuilder<'a> {
let two = self.index.min_word_len_two_typos(&self.rtxn)?; let two = self.index.min_word_len_two_typos(&self.rtxn)?;
Ok((one, two)) Ok((one, two))
} }
fn exact_words(&self) -> crate::Result<fst::Set<Cow<[u8]>>> {
self.index.exact_words(self.rtxn)
}
} }
impl<'a> QueryTreeBuilder<'a> { impl<'a> QueryTreeBuilder<'a> {
@@ -265,15 +270,16 @@ fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result<Option<O
} }
#[derive(Clone)] #[derive(Clone)]
pub struct TypoConfig { pub struct TypoConfig<'a> {
pub max_typos: u8, pub max_typos: u8,
pub word_len_one_typo: u8, pub word_len_one_typo: u8,
pub word_len_two_typo: u8, pub word_len_two_typo: u8,
pub exact_words: fst::Set<Cow<'a, [u8]>>,
} }
/// Return the `QueryKind` of a word depending on `authorize_typos` /// Return the `QueryKind` of a word depending on `authorize_typos`
/// and the provided word length. /// and the provided word length.
fn typos(word: String, authorize_typos: bool, config: TypoConfig) -> QueryKind { fn typos<'a>(word: String, authorize_typos: bool, config: TypoConfig<'a>) -> QueryKind {
if authorize_typos { if authorize_typos {
let count = word.chars().count().min(u8::MAX as usize) as u8; let count = word.chars().count().min(u8::MAX as usize) as u8;
if count < config.word_len_one_typo { if count < config.word_len_one_typo {
@@ -333,7 +339,9 @@ fn create_query_tree(
children.push(child); children.push(child);
} }
let (word_len_one_typo, word_len_two_typo) = ctx.min_word_len_for_typo()?; let (word_len_one_typo, word_len_two_typo) = ctx.min_word_len_for_typo()?;
let config = TypoConfig { max_typos: 2, word_len_one_typo, word_len_two_typo }; let exact_words = ctx.exact_words()?;
let config =
TypoConfig { max_typos: 2, word_len_one_typo, word_len_two_typo, exact_words };
children.push(Operation::Query(Query { children.push(Operation::Query(Query {
prefix, prefix,
kind: typos(word, authorize_typos, config), kind: typos(word, authorize_typos, config),
@@ -385,8 +393,13 @@ fn create_query_tree(
let concat = words.concat(); let concat = words.concat();
let (word_len_one_typo, word_len_two_typo) = let (word_len_one_typo, word_len_two_typo) =
ctx.min_word_len_for_typo()?; ctx.min_word_len_for_typo()?;
let config = let exact_words = ctx.exact_words()?;
TypoConfig { max_typos: 1, word_len_one_typo, word_len_two_typo }; let config = TypoConfig {
max_typos: 1,
word_len_one_typo,
word_len_two_typo,
exact_words,
};
let query = Query { let query = Query {
prefix: is_prefix, prefix: is_prefix,
kind: typos(concat, authorize_typos, config), kind: typos(concat, authorize_typos, config),
@@ -605,6 +618,12 @@ mod test {
fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)> { fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)> {
Ok((DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS)) Ok((DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS))
} }
fn exact_words(&self) -> crate::Result<fst::Set<Cow<[u8]>>> {
let builder = fst::SetBuilder::new(Vec::new()).unwrap();
let data = builder.into_inner().unwrap();
Ok(fst::Set::new(Cow::Owned(data)).unwrap())
}
} }
impl Default for TestContext { impl Default for TestContext {
@@ -1225,7 +1244,9 @@ mod test {
#[test] #[test]
fn test_min_word_len_typo() { fn test_min_word_len_typo() {
let config = TypoConfig { max_typos: 2, word_len_one_typo: 5, word_len_two_typo: 7 }; let exact_words = fst::Set::from_iter([b""]).unwrap().map_data(Cow::Owned).unwrap();
let config =
TypoConfig { max_typos: 2, word_len_one_typo: 5, word_len_two_typo: 7, exact_words };
assert_eq!( assert_eq!(
typos("hello".to_string(), true, config.clone()), typos("hello".to_string(), true, config.clone()),