Fix(Search): Fix phrase search candidates computation

This bug is an old bug but was hidden by the proximity criterion,
Phrase search were always returning an empty candidates list.

Before the fix, we were trying to find any words[n] near words[n]
instead of finding  any words[n] near words[n+1], for example:

for a phrase search '"Hello world"' we were searching for "hello" near "hello" first, instead of "hello" near "world".
This commit is contained in:
ManyTheFish
2022-07-21 10:04:30 +02:00
parent 5704235521
commit cbb3b25459
2 changed files with 33 additions and 13 deletions

View File

@@ -335,7 +335,7 @@ pub fn resolve_query_tree<'t>(
// Get all the documents with the matching distance for each word pairs. // Get all the documents with the matching distance for each word pairs.
let mut bitmaps = Vec::with_capacity(winsize.pow(2)); let mut bitmaps = Vec::with_capacity(winsize.pow(2));
for (offset, s1) in win.iter().enumerate() { for (offset, s1) in win.iter().enumerate() {
for (dist, s2) in win.iter().skip(offset).enumerate() { for (dist, s2) in win.iter().skip(offset + 1).enumerate() {
match ctx.word_pair_proximity_docids(s1, s2, dist as u8 + 1)? { match ctx.word_pair_proximity_docids(s1, s2, dist as u8 + 1)? {
Some(m) => bitmaps.push(m), Some(m) => bitmaps.push(m),
// If there are no document for this distance, there will be no // If there are no document for this distance, there will be no

View File

@@ -192,22 +192,42 @@ fn resolve_candidates<'t>(
let most_right = words let most_right = words
.last() .last()
.map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) }); .map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
let mut candidates = None; let mut candidates = RoaringBitmap::new();
for slice in words.windows(2) { let mut first_iter = true;
let (left, right) = (&slice[0], &slice[1]); let winsize = words.len().min(7);
match ctx.word_pair_proximity_docids(left, right, 1)? {
Some(pair_docids) => match candidates.as_mut() { for win in words.windows(winsize) {
Some(candidates) => *candidates &= pair_docids, // Get all the documents with the matching distance for each word pairs.
None => candidates = Some(pair_docids), let mut bitmaps = Vec::with_capacity(winsize.pow(2));
}, for (offset, s1) in win.iter().enumerate() {
None => { for (dist, s2) in win.iter().skip(offset + 1).enumerate() {
candidates = None; match ctx.word_pair_proximity_docids(s1, s2, dist as u8 + 1)? {
Some(m) => bitmaps.push(m),
// If there are no document for this distance, there will be no
// results for the phrase query.
None => return Ok(Default::default()),
}
}
}
// We sort the bitmaps so that we perform the small intersections first, which is faster.
bitmaps.sort_unstable_by(|a, b| a.len().cmp(&b.len()));
for bitmap in bitmaps {
if first_iter {
candidates = bitmap;
first_iter = false;
} else {
candidates &= bitmap;
}
// There will be no match, return early
if candidates.is_empty() {
break; break;
} }
} }
} }
match (most_left, most_right, candidates) { match (most_left, most_right) {
(Some(l), Some(r), Some(c)) => vec![(l, r, c)], (Some(l), Some(r)) => vec![(l, r, candidates)],
_otherwise => Default::default(), _otherwise => Default::default(),
} }
} else { } else {