WIP on split words and synonyms support

This commit is contained in:
Loïc Lecrenier
2023-03-02 21:27:57 +01:00
parent c27ea2677f
commit 1db152046e
5 changed files with 233 additions and 142 deletions

View File

@ -31,22 +31,27 @@ pub fn make_query_graph<'transaction>(
query: &str,
) -> Result<QueryGraph> {
assert!(!query.is_empty());
let fst = index.words_fst(txn).unwrap();
let query = LocatedQueryTerm::from_query(query.tokenize(), None, |word, is_prefix| {
word_derivations(
index,
txn,
word,
if word.len() < 4 {
0
} else if word.len() < 100 {
1
} else {
2
},
is_prefix,
&fst,
)
let authorize_typos = index.authorize_typos(txn)?;
let min_len_one_typo = index.min_word_len_one_typo(txn)?;
let min_len_two_typos = index.min_word_len_two_typos(txn)?;
let exact_words = index.exact_words(txn)?;
let fst = index.words_fst(txn)?;
// TODO: get rid of this closure
// also, ngrams can have one typo?
let query = LocatedQueryTerm::from_query(query.tokenize(), None, move |word, is_prefix| {
let typos = if !authorize_typos
|| word.len() < min_len_one_typo as usize
|| exact_words.as_ref().map_or(false, |fst| fst.contains(word))
{
0
} else if word.len() < min_len_two_typos as usize {
1
} else {
2
};
word_derivations(index, txn, word, typos, is_prefix, &fst)
})
.unwrap();
let graph = QueryGraph::from_query(index, txn, db_cache, query)?;