Introduce a better higlighting system

This commit is contained in:
Clément Renault
2020-01-16 14:56:16 +01:00
parent 00336c5154
commit 74fa9ee4df
3 changed files with 37 additions and 12 deletions

View File

@ -31,9 +31,13 @@ pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus
pub use meilisearch_types::{DocIndex, DocumentId, Highlight};
pub use query_words_mapper::QueryWordsMapper;
use std::convert::TryFrom;
use std::collections::HashMap;
use compact_arena::SmallArena;
use crate::bucket_sort::PostingsListView;
use crate::levenshtein::prefix_damerau_levenshtein;
use crate::query_tree::{QueryId, QueryKind};
use crate::reordered_attrs::ReorderedAttrs;
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
@ -47,6 +51,7 @@ pub struct Document {
fn highlights_from_raw_document<'a, 'tag, 'txn>(
raw_document: &RawDocument<'a, 'tag>,
queries_kinds: &HashMap<QueryId, &QueryKind>,
arena: &SmallArena<'tag, PostingsListView<'txn>>,
searchable_attrs: Option<&ReorderedAttrs>,
) -> Vec<Highlight>
@ -56,14 +61,20 @@ fn highlights_from_raw_document<'a, 'tag, 'txn>(
for bm in raw_document.bare_matches.iter() {
let postings_list = &arena[bm.postings_list];
let input = postings_list.input();
// let query = &automatons[bm.query_index as usize].query;
let kind = &queries_kinds.get(&bm.query_index);
for di in postings_list.iter() {
// let covered_area = if query.len() > input.len() {
// input.len()
// } else {
// prefix_damerau_levenshtein(query.as_bytes(), input).1
// };
let covered_area = match kind {
Some(QueryKind::Exact(query)) | Some(QueryKind::Tolerant(query)) => {
let len = if query.len() > input.len() {
input.len()
} else {
prefix_damerau_levenshtein(query.as_bytes(), input).1
};
u16::try_from(len).unwrap_or(u16::max_value())
},
_ => di.char_length,
};
let attribute = searchable_attrs
.and_then(|sa| sa.reverse(di.attribute))
@ -72,7 +83,7 @@ fn highlights_from_raw_document<'a, 'tag, 'txn>(
let highlight = Highlight {
attribute: attribute,
char_index: di.char_index,
char_length: di.char_length,
char_length: covered_area,
};
highlights.push(highlight);
@ -96,12 +107,14 @@ impl Document {
#[cfg(not(test))]
pub fn from_raw<'a, 'tag, 'txn>(
raw_document: RawDocument<'a, 'tag>,
queries_kinds: &HashMap<QueryId, &QueryKind>,
arena: &SmallArena<'tag, PostingsListView<'txn>>,
searchable_attrs: Option<&ReorderedAttrs>,
) -> Document
{
let highlights = highlights_from_raw_document(
&raw_document,
queries_kinds,
arena,
searchable_attrs,
);
@ -112,6 +125,7 @@ impl Document {
#[cfg(test)]
pub fn from_raw<'a, 'tag, 'txn>(
raw_document: RawDocument<'a, 'tag>,
queries_kinds: &HashMap<QueryId, &QueryKind>,
arena: &SmallArena<'tag, PostingsListView<'txn>>,
searchable_attrs: Option<&ReorderedAttrs>,
) -> Document
@ -120,6 +134,7 @@ impl Document {
let highlights = highlights_from_raw_document(
&raw_document,
queries_kinds,
arena,
searchable_attrs,
);