feat: Introduce a WordArea struct

Useful to highlight matching areas in the original text.
This commit is contained in:
Clément Renault
2018-12-23 16:46:49 +01:00
parent 62521262e8
commit b32c96cdc9
14 changed files with 373 additions and 136 deletions

View File

@ -44,7 +44,7 @@ where D: Deref<Target=DB>
mod tests {
use super::*;
use crate::DocumentId;
use crate::{DocumentId, Attribute, WordArea};
// typing: "Geox CEO"
//
@ -54,8 +54,8 @@ mod tests {
fn one_typo_reference() {
let doc0 = {
let matches = vec![
Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false },
Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 2, is_exact: false },
Match { query_index: 0, distance: 0, attribute: Attribute::new(0, 0), is_exact: false, word_area: WordArea::new(0, 6) },
Match { query_index: 1, distance: 0, attribute: Attribute::new(0, 2), is_exact: false, word_area: WordArea::new(0, 6) },
];
Document {
id: DocumentId(0),
@ -65,8 +65,8 @@ mod tests {
let doc1 = {
let matches = vec![
Match { query_index: 0, distance: 1, attribute: 0, attribute_index: 0, is_exact: false },
Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 2, is_exact: false },
Match { query_index: 0, distance: 1, attribute: Attribute::new(0, 0), is_exact: false, word_area: WordArea::new(0, 6) },
Match { query_index: 1, distance: 0, attribute: Attribute::new(0, 2), is_exact: false, word_area: WordArea::new(0, 6) },
];
Document {
id: DocumentId(1),
@ -87,8 +87,8 @@ mod tests {
fn no_typo() {
let doc0 = {
let matches = vec![
Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false },
Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 1, is_exact: false },
Match { query_index: 0, distance: 0, attribute: Attribute::new(0, 0), is_exact: false, word_area: WordArea::new(0, 6) },
Match { query_index: 1, distance: 0, attribute: Attribute::new(0, 1), is_exact: false, word_area: WordArea::new(0, 6) },
];
Document {
id: DocumentId(0),
@ -98,7 +98,7 @@ mod tests {
let doc1 = {
let matches = vec![
Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false },
Match { query_index: 0, distance: 0, attribute: Attribute::new(0, 0), is_exact: false, word_area: WordArea::new(0, 6) },
];
Document {
id: DocumentId(1),
@ -119,8 +119,8 @@ mod tests {
fn one_typo() {
let doc0 = {
let matches = vec![
Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false },
Match { query_index: 1, distance: 1, attribute: 0, attribute_index: 1, is_exact: false },
Match { query_index: 0, distance: 0, attribute: Attribute::new(0, 0), is_exact: false, word_area: WordArea::new(0, 6) },
Match { query_index: 1, distance: 1, attribute: Attribute::new(0, 1), is_exact: false, word_area: WordArea::new(0, 6) },
];
Document {
id: DocumentId(0),
@ -130,7 +130,7 @@ mod tests {
let doc1 = {
let matches = vec![
Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false },
Match { query_index: 0, distance: 0, attribute: Attribute::new(0, 0), is_exact: false, word_area: WordArea::new(0, 6) },
];
Document {
id: DocumentId(1),

View File

@ -10,11 +10,11 @@ use crate::rank::criterion::Criterion;
use crate::Match;
#[inline]
fn sum_matches_attributes(matches: &[Match]) -> u8 {
fn sum_matches_attributes(matches: &[Match]) -> u16 {
// note that GroupBy will never return an empty group
// so we can do this assumption safely
GroupBy::new(matches, match_query_index).map(|group| unsafe {
group.get_unchecked(0).attribute
group.get_unchecked(0).attribute.attribute()
}).sum()
}

View File

@ -14,7 +14,7 @@ fn sum_matches_attribute_index(matches: &[Match]) -> u32 {
// note that GroupBy will never return an empty group
// so we can do this assumption safely
GroupBy::new(matches, match_query_index).map(|group| unsafe {
group.get_unchecked(0).attribute_index
group.get_unchecked(0).attribute.word_index()
}).sum()
}

View File

@ -20,8 +20,8 @@ fn index_proximity(lhs: u32, rhs: u32) -> u32 {
}
fn attribute_proximity(lhs: &Match, rhs: &Match) -> u32 {
if lhs.attribute != rhs.attribute { return MAX_DISTANCE }
index_proximity(lhs.attribute_index, rhs.attribute_index)
if lhs.attribute.attribute() != rhs.attribute.attribute() { return MAX_DISTANCE }
index_proximity(lhs.attribute.word_index(), rhs.attribute.word_index())
}
fn min_proximity(lhs: &[Match], rhs: &[Match]) -> u32 {
@ -67,6 +67,8 @@ where D: Deref<Target=DB>
mod tests {
use super::*;
use crate::Attribute;
#[test]
fn three_different_attributes() {
@ -79,11 +81,11 @@ mod tests {
// { id: 3, attr: 3, attr_index: 1 }
let matches = &[
Match { query_index: 0, attribute: 0, attribute_index: 0, ..Match::zero() },
Match { query_index: 1, attribute: 1, attribute_index: 0, ..Match::zero() },
Match { query_index: 2, attribute: 1, attribute_index: 1, ..Match::zero() },
Match { query_index: 2, attribute: 2, attribute_index: 0, ..Match::zero() },
Match { query_index: 3, attribute: 3, attribute_index: 1, ..Match::zero() },
Match { query_index: 0, attribute: Attribute::new(0, 0), ..Match::zero() },
Match { query_index: 1, attribute: Attribute::new(1, 0), ..Match::zero() },
Match { query_index: 2, attribute: Attribute::new(1, 1), ..Match::zero() },
Match { query_index: 2, attribute: Attribute::new(2, 0), ..Match::zero() },
Match { query_index: 3, attribute: Attribute::new(3, 1), ..Match::zero() },
];
// soup -> of = 8
@ -105,12 +107,12 @@ mod tests {
// { id: 3, attr: 1, attr_index: 3 }
let matches = &[
Match { query_index: 0, attribute: 0, attribute_index: 0, ..Match::zero() },
Match { query_index: 0, attribute: 1, attribute_index: 0, ..Match::zero() },
Match { query_index: 1, attribute: 1, attribute_index: 1, ..Match::zero() },
Match { query_index: 2, attribute: 1, attribute_index: 2, ..Match::zero() },
Match { query_index: 3, attribute: 0, attribute_index: 1, ..Match::zero() },
Match { query_index: 3, attribute: 1, attribute_index: 3, ..Match::zero() },
Match { query_index: 0, attribute: Attribute::new(0, 0), ..Match::zero() },
Match { query_index: 0, attribute: Attribute::new(1, 0), ..Match::zero() },
Match { query_index: 1, attribute: Attribute::new(1, 1), ..Match::zero() },
Match { query_index: 2, attribute: Attribute::new(1, 2), ..Match::zero() },
Match { query_index: 3, attribute: Attribute::new(0, 1), ..Match::zero() },
Match { query_index: 3, attribute: Attribute::new(1, 3), ..Match::zero() },
];
// soup -> of = 1

View File

@ -97,8 +97,8 @@ where D: Deref<Target=DB>
query_index: iv.index as u32,
distance: distance,
attribute: doc_index.attribute,
attribute_index: doc_index.attribute_index,
is_exact: is_exact,
word_area: doc_index.word_area,
};
matches.entry(doc_index.document_id).or_insert_with(Vec::new).push(match_);
}