return optional exact words

This commit is contained in:
ad hoc
2022-05-24 09:15:49 +02:00
parent 19dac01c5c
commit 8993fec8a3
3 changed files with 11 additions and 11 deletions

View File

@@ -1041,10 +1041,10 @@ impl Index {
} }
/// List the words on which typo are not allowed /// List the words on which typo are not allowed
pub fn exact_words<'t>(&self, txn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> { pub fn exact_words<'t>(&self, txn: &'t RoTxn) -> Result<Option<fst::Set<Cow<'t, [u8]>>>> {
match self.main.get::<_, Str, ByteSlice>(txn, main_key::EXACT_WORDS)? { match self.main.get::<_, Str, ByteSlice>(txn, main_key::EXACT_WORDS)? {
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?), Some(bytes) => Ok(Some(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?)),
None => Ok(fst::Set::default().map_data(Cow::Owned)?), None => Ok(None),
} }
} }

View File

@@ -152,7 +152,7 @@ trait Context {
} }
/// Returns the minimum word len for 1 and 2 typos. /// Returns the minimum word len for 1 and 2 typos.
fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)>; fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)>;
fn exact_words(&self) -> crate::Result<fst::Set<Cow<[u8]>>>; fn exact_words(&self) -> crate::Result<Option<fst::Set<Cow<[u8]>>>>;
} }
/// The query tree builder is the interface to build a query tree. /// The query tree builder is the interface to build a query tree.
@@ -183,7 +183,7 @@ impl<'a> Context for QueryTreeBuilder<'a> {
Ok((one, two)) Ok((one, two))
} }
fn exact_words(&self) -> crate::Result<fst::Set<Cow<[u8]>>> { fn exact_words(&self) -> crate::Result<Option<fst::Set<Cow<[u8]>>>> {
self.index.exact_words(self.rtxn) self.index.exact_words(self.rtxn)
} }
} }
@@ -277,13 +277,13 @@ pub struct TypoConfig<'a> {
pub max_typos: u8, pub max_typos: u8,
pub word_len_one_typo: u8, pub word_len_one_typo: u8,
pub word_len_two_typo: u8, pub word_len_two_typo: u8,
pub exact_words: fst::Set<Cow<'a, [u8]>>, pub exact_words: Option<fst::Set<Cow<'a, [u8]>>>,
} }
/// Return the `QueryKind` of a word depending on `authorize_typos` /// Return the `QueryKind` of a word depending on `authorize_typos`
/// and the provided word length. /// and the provided word length.
fn typos<'a>(word: String, authorize_typos: bool, config: TypoConfig<'a>) -> QueryKind { fn typos<'a>(word: String, authorize_typos: bool, config: TypoConfig<'a>) -> QueryKind {
if authorize_typos && !config.exact_words.contains(&word) { if authorize_typos && !config.exact_words.map(|s| s.contains(&word)).unwrap_or(false) {
let count = word.chars().count().min(u8::MAX as usize) as u8; let count = word.chars().count().min(u8::MAX as usize) as u8;
if count < config.word_len_one_typo { if count < config.word_len_one_typo {
QueryKind::exact(word) QueryKind::exact(word)
@@ -779,8 +779,8 @@ mod test {
Ok((DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS)) Ok((DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS))
} }
fn exact_words(&self) -> crate::Result<fst::Set<Cow<[u8]>>> { fn exact_words(&self) -> crate::Result<Option<fst::Set<Cow<[u8]>>>> {
Ok(fst::Set::new(Cow::Borrowed(self.exact_words.as_slice())).unwrap()) Ok(Some(fst::Set::new(Cow::Borrowed(self.exact_words.as_slice())).unwrap()))
} }
} }
@@ -1405,7 +1405,7 @@ mod test {
#[test] #[test]
fn test_min_word_len_typo() { fn test_min_word_len_typo() {
let exact_words = fst::Set::from_iter([b""]).unwrap().map_data(Cow::Owned).unwrap(); let exact_words = Some(fst::Set::from_iter([b""]).unwrap().map_data(Cow::Owned).unwrap());
let config = let config =
TypoConfig { max_typos: 2, word_len_one_typo: 5, word_len_two_typo: 7, exact_words }; TypoConfig { max_typos: 2, word_len_one_typo: 5, word_len_two_typo: 7, exact_words };

View File

@@ -1495,7 +1495,7 @@ mod tests {
let words = btreeset! { S("Ab"), S("ac") }; let words = btreeset! { S("Ab"), S("ac") };
builder.set_exact_words(words); builder.set_exact_words(words);
assert!(builder.execute(|_| ()).is_ok()); assert!(builder.execute(|_| ()).is_ok());
let exact_words = index.exact_words(&txn).unwrap(); let exact_words = index.exact_words(&txn).unwrap().unwrap();
for word in exact_words.into_fst().stream().into_str_vec().unwrap() { for word in exact_words.into_fst().stream().into_str_vec().unwrap() {
assert!(word.0 == "ac" || word.0 == "ab"); assert!(word.0 == "ac" || word.0 == "ab");
} }