mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Introduce a better higlighting system
This commit is contained in:
		| @@ -1,4 +1,5 @@ | |||||||
| use std::borrow::Cow; | use std::borrow::Cow; | ||||||
|  | use std::collections::HashMap; | ||||||
| use std::collections::HashSet; | use std::collections::HashSet; | ||||||
| use std::convert::TryFrom; | use std::convert::TryFrom; | ||||||
| use std::mem; | use std::mem; | ||||||
| @@ -28,7 +29,8 @@ use crate::distinct_map::{BufferedDistinctMap, DistinctMap}; | |||||||
| use crate::raw_document::RawDocument; | use crate::raw_document::RawDocument; | ||||||
| use crate::{database::MainT, reordered_attrs::ReorderedAttrs}; | use crate::{database::MainT, reordered_attrs::ReorderedAttrs}; | ||||||
| use crate::{store, Document, DocumentId, MResult}; | use crate::{store, Document, DocumentId, MResult}; | ||||||
| use crate::query_tree::{create_query_tree, traverse_query_tree, QueryResult, PostingsKey}; | use crate::query_tree::{create_query_tree, traverse_query_tree}; | ||||||
|  | use crate::query_tree::{Operation, QueryResult, QueryKind, QueryId, PostingsKey}; | ||||||
| use crate::query_tree::Context as QTContext; | use crate::query_tree::Context as QTContext; | ||||||
| use crate::store::Postings; | use crate::store::Postings; | ||||||
|  |  | ||||||
| @@ -88,6 +90,17 @@ where | |||||||
|     println!("{:?}", operation); |     println!("{:?}", operation); | ||||||
|     println!("{:?}", mapping); |     println!("{:?}", mapping); | ||||||
|  |  | ||||||
|  |     fn recurs_operation<'o>(map: &mut HashMap<QueryId, &'o QueryKind>, operation: &'o Operation) { | ||||||
|  |         match operation { | ||||||
|  |             Operation::And(ops) => ops.iter().for_each(|op| recurs_operation(map, op)), | ||||||
|  |             Operation::Or(ops) => ops.iter().for_each(|op| recurs_operation(map, op)), | ||||||
|  |             Operation::Query(query) => { map.insert(query.id, &query.kind); }, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     let mut queries_kinds = HashMap::new(); | ||||||
|  |     recurs_operation(&mut queries_kinds, &operation); | ||||||
|  |  | ||||||
|     let QueryResult { docids, queries } = traverse_query_tree(reader, &context, &operation).unwrap(); |     let QueryResult { docids, queries } = traverse_query_tree(reader, &context, &operation).unwrap(); | ||||||
|     println!("found {} documents", docids.len()); |     println!("found {} documents", docids.len()); | ||||||
|     println!("number of postings {:?}", queries.len()); |     println!("number of postings {:?}", queries.len()); | ||||||
| @@ -99,7 +112,6 @@ where | |||||||
|     mk_arena!(arena); |     mk_arena!(arena); | ||||||
|  |  | ||||||
|     for (PostingsKey{ query, input, distance, is_exact }, matches) in queries { |     for (PostingsKey{ query, input, distance, is_exact }, matches) in queries { | ||||||
|  |  | ||||||
|         let postings_list_view = PostingsListView::original(Rc::from(input), Rc::new(matches)); |         let postings_list_view = PostingsListView::original(Rc::from(input), Rc::new(matches)); | ||||||
|         let pllen = postings_list_view.len() as f32; |         let pllen = postings_list_view.len() as f32; | ||||||
|  |  | ||||||
| @@ -126,7 +138,6 @@ where | |||||||
|             } |             } | ||||||
|  |  | ||||||
|         } else { |         } else { | ||||||
|  |  | ||||||
|             let mut offset = 0; |             let mut offset = 0; | ||||||
|             for id in docids.as_slice() { |             for id in docids.as_slice() { | ||||||
|                 let di = DocIndex { document_id: *id, ..DocIndex::default() }; |                 let di = DocIndex { document_id: *id, ..DocIndex::default() }; | ||||||
| @@ -234,7 +245,7 @@ where | |||||||
|     debug!("proximity evaluation called {} times", proximity_count.load(Ordering::Relaxed)); |     debug!("proximity evaluation called {} times", proximity_count.load(Ordering::Relaxed)); | ||||||
|  |  | ||||||
|     let iter = raw_documents.into_iter().skip(range.start).take(range.len()); |     let iter = raw_documents.into_iter().skip(range.start).take(range.len()); | ||||||
|     let iter = iter.map(|rd| Document::from_raw(rd, &arena, searchable_attrs.as_ref())); |     let iter = iter.map(|rd| Document::from_raw(rd, &queries_kinds, &arena, searchable_attrs.as_ref())); | ||||||
|     let documents = iter.collect(); |     let documents = iter.collect(); | ||||||
|  |  | ||||||
|     debug!("bucket sort took {:.02?}", before_bucket_sort.elapsed()); |     debug!("bucket sort took {:.02?}", before_bucket_sort.elapsed()); | ||||||
|   | |||||||
| @@ -31,9 +31,13 @@ pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus | |||||||
| pub use meilisearch_types::{DocIndex, DocumentId, Highlight}; | pub use meilisearch_types::{DocIndex, DocumentId, Highlight}; | ||||||
| pub use query_words_mapper::QueryWordsMapper; | pub use query_words_mapper::QueryWordsMapper; | ||||||
|  |  | ||||||
|  | use std::convert::TryFrom; | ||||||
|  | use std::collections::HashMap; | ||||||
| use compact_arena::SmallArena; | use compact_arena::SmallArena; | ||||||
|  |  | ||||||
| use crate::bucket_sort::PostingsListView; | use crate::bucket_sort::PostingsListView; | ||||||
| use crate::levenshtein::prefix_damerau_levenshtein; | use crate::levenshtein::prefix_damerau_levenshtein; | ||||||
|  | use crate::query_tree::{QueryId, QueryKind}; | ||||||
| use crate::reordered_attrs::ReorderedAttrs; | use crate::reordered_attrs::ReorderedAttrs; | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] | ||||||
| @@ -47,6 +51,7 @@ pub struct Document { | |||||||
|  |  | ||||||
| fn highlights_from_raw_document<'a, 'tag, 'txn>( | fn highlights_from_raw_document<'a, 'tag, 'txn>( | ||||||
|     raw_document: &RawDocument<'a, 'tag>, |     raw_document: &RawDocument<'a, 'tag>, | ||||||
|  |     queries_kinds: &HashMap<QueryId, &QueryKind>, | ||||||
|     arena: &SmallArena<'tag, PostingsListView<'txn>>, |     arena: &SmallArena<'tag, PostingsListView<'txn>>, | ||||||
|     searchable_attrs: Option<&ReorderedAttrs>, |     searchable_attrs: Option<&ReorderedAttrs>, | ||||||
| ) -> Vec<Highlight> | ) -> Vec<Highlight> | ||||||
| @@ -56,14 +61,20 @@ fn highlights_from_raw_document<'a, 'tag, 'txn>( | |||||||
|     for bm in raw_document.bare_matches.iter() { |     for bm in raw_document.bare_matches.iter() { | ||||||
|         let postings_list = &arena[bm.postings_list]; |         let postings_list = &arena[bm.postings_list]; | ||||||
|         let input = postings_list.input(); |         let input = postings_list.input(); | ||||||
|         // let query = &automatons[bm.query_index as usize].query; |         let kind = &queries_kinds.get(&bm.query_index); | ||||||
|  |  | ||||||
|         for di in postings_list.iter() { |         for di in postings_list.iter() { | ||||||
|             // let covered_area = if query.len() > input.len() { |             let covered_area = match kind { | ||||||
|             //     input.len() |                 Some(QueryKind::Exact(query)) | Some(QueryKind::Tolerant(query)) => { | ||||||
|             // } else { |                     let len = if query.len() > input.len() { | ||||||
|             //     prefix_damerau_levenshtein(query.as_bytes(), input).1 |                         input.len() | ||||||
|             // }; |                     } else { | ||||||
|  |                         prefix_damerau_levenshtein(query.as_bytes(), input).1 | ||||||
|  |                     }; | ||||||
|  |                     u16::try_from(len).unwrap_or(u16::max_value()) | ||||||
|  |                 }, | ||||||
|  |                 _ => di.char_length, | ||||||
|  |             }; | ||||||
|  |  | ||||||
|             let attribute = searchable_attrs |             let attribute = searchable_attrs | ||||||
|                 .and_then(|sa| sa.reverse(di.attribute)) |                 .and_then(|sa| sa.reverse(di.attribute)) | ||||||
| @@ -72,7 +83,7 @@ fn highlights_from_raw_document<'a, 'tag, 'txn>( | |||||||
|             let highlight = Highlight { |             let highlight = Highlight { | ||||||
|                 attribute: attribute, |                 attribute: attribute, | ||||||
|                 char_index: di.char_index, |                 char_index: di.char_index, | ||||||
|                 char_length: di.char_length, |                 char_length: covered_area, | ||||||
|             }; |             }; | ||||||
|  |  | ||||||
|             highlights.push(highlight); |             highlights.push(highlight); | ||||||
| @@ -96,12 +107,14 @@ impl Document { | |||||||
|     #[cfg(not(test))] |     #[cfg(not(test))] | ||||||
|     pub fn from_raw<'a, 'tag, 'txn>( |     pub fn from_raw<'a, 'tag, 'txn>( | ||||||
|         raw_document: RawDocument<'a, 'tag>, |         raw_document: RawDocument<'a, 'tag>, | ||||||
|  |         queries_kinds: &HashMap<QueryId, &QueryKind>, | ||||||
|         arena: &SmallArena<'tag, PostingsListView<'txn>>, |         arena: &SmallArena<'tag, PostingsListView<'txn>>, | ||||||
|         searchable_attrs: Option<&ReorderedAttrs>, |         searchable_attrs: Option<&ReorderedAttrs>, | ||||||
|     ) -> Document |     ) -> Document | ||||||
|     { |     { | ||||||
|         let highlights = highlights_from_raw_document( |         let highlights = highlights_from_raw_document( | ||||||
|             &raw_document, |             &raw_document, | ||||||
|  |             queries_kinds, | ||||||
|             arena, |             arena, | ||||||
|             searchable_attrs, |             searchable_attrs, | ||||||
|         ); |         ); | ||||||
| @@ -112,6 +125,7 @@ impl Document { | |||||||
|     #[cfg(test)] |     #[cfg(test)] | ||||||
|     pub fn from_raw<'a, 'tag, 'txn>( |     pub fn from_raw<'a, 'tag, 'txn>( | ||||||
|         raw_document: RawDocument<'a, 'tag>, |         raw_document: RawDocument<'a, 'tag>, | ||||||
|  |         queries_kinds: &HashMap<QueryId, &QueryKind>, | ||||||
|         arena: &SmallArena<'tag, PostingsListView<'txn>>, |         arena: &SmallArena<'tag, PostingsListView<'txn>>, | ||||||
|         searchable_attrs: Option<&ReorderedAttrs>, |         searchable_attrs: Option<&ReorderedAttrs>, | ||||||
|     ) -> Document |     ) -> Document | ||||||
| @@ -120,6 +134,7 @@ impl Document { | |||||||
|  |  | ||||||
|         let highlights = highlights_from_raw_document( |         let highlights = highlights_from_raw_document( | ||||||
|             &raw_document, |             &raw_document, | ||||||
|  |             queries_kinds, | ||||||
|             arena, |             arena, | ||||||
|             searchable_attrs, |             searchable_attrs, | ||||||
|         ); |         ); | ||||||
|   | |||||||
| @@ -285,7 +285,6 @@ pub struct PostingsKey<'o> { | |||||||
|     pub is_exact: bool, |     pub is_exact: bool, | ||||||
| } | } | ||||||
|  |  | ||||||
| pub type Distance = u8; |  | ||||||
| pub type Postings<'o, 'txn> = HashMap<PostingsKey<'o>, Cow<'txn, Set<DocIndex>>>; | pub type Postings<'o, 'txn> = HashMap<PostingsKey<'o>, Cow<'txn, Set<DocIndex>>>; | ||||||
| pub type Cache<'o, 'txn> = HashMap<&'o Operation, Cow<'txn, Set<DocumentId>>>; | pub type Cache<'o, 'txn> = HashMap<&'o Operation, Cow<'txn, Set<DocumentId>>>; | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user