mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Change the way we filter the documents
This commit is contained in:
		| @@ -96,18 +96,28 @@ where | |||||||
|  |  | ||||||
|     let mut bare_matches = Vec::new(); |     let mut bare_matches = Vec::new(); | ||||||
|     mk_arena!(arena); |     mk_arena!(arena); | ||||||
|  |  | ||||||
|     for ((query, input, distance), matches) in queries { |     for ((query, input, distance), matches) in queries { | ||||||
|  |  | ||||||
|         let postings_list_view = PostingsListView::original(Rc::from(input), Rc::new(matches)); |         let postings_list_view = PostingsListView::original(Rc::from(input), Rc::new(matches)); | ||||||
|         // TODO optimize the filter by skipping docids that have already been seen |  | ||||||
|         let mut offset = 0; |         let mut offset = 0; | ||||||
|         for matches in postings_list_view.linear_group_by_key(|m| m.document_id) { |         for id in docids.as_slice() { | ||||||
|             let document_id = matches[0].document_id; |             let di = DocIndex { document_id: *id, ..DocIndex::default() }; | ||||||
|             if docids.contains(&document_id) { |             let pos = postings_list_view[offset..].binary_search(&di).unwrap_or_else(|x| x); | ||||||
|                 let range = postings_list_view.range(offset, matches.len()); |  | ||||||
|  |             let group = postings_list_view[offset + pos..] | ||||||
|  |                 .linear_group_by_key(|m| m.document_id) | ||||||
|  |                 .next() | ||||||
|  |                 .filter(|matches| matches[0].document_id == *id); | ||||||
|  |  | ||||||
|  |             offset += pos; | ||||||
|  |  | ||||||
|  |             if let Some(matches) = group { | ||||||
|  |                 let range = postings_list_view.range(pos, matches.len()); | ||||||
|                 let posting_list_index = arena.add(range); |                 let posting_list_index = arena.add(range); | ||||||
|  |  | ||||||
|                 let bare_match = BareMatch { |                 let bare_match = BareMatch { | ||||||
|                     document_id, |                     document_id: *id, | ||||||
|                     query_index: query.id, |                     query_index: query.id, | ||||||
|                     distance: distance, |                     distance: distance, | ||||||
|                     is_exact: true, // TODO where can I find this info? |                     is_exact: true, // TODO where can I find this info? | ||||||
| @@ -116,8 +126,6 @@ where | |||||||
|  |  | ||||||
|                 bare_matches.push(bare_match); |                 bare_matches.push(bare_match); | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             offset += matches.len(); |  | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -245,8 +245,7 @@ fn multiword_rewrite_matches( | |||||||
|                                 if !found { |                                 if !found { | ||||||
|                                     // if we find a corresponding padding for the |                                     // if we find a corresponding padding for the | ||||||
|                                     // first time we must push preceding paddings |                                     // first time we must push preceding paddings | ||||||
|                                     for (i, query_index) in replacement.clone().enumerate().take(i) |                                     for (i, query_index) in replacement.clone().enumerate().take(i) { | ||||||
|                                     { |  | ||||||
|                                         let word_index = match_.word_index + padding as u16 + (i + 1) as u16; |                                         let word_index = match_.word_index + padding as u16 + (i + 1) as u16; | ||||||
|                                         let match_ = SimpleMatch { query_index, word_index, ..*match_ }; |                                         let match_ = SimpleMatch { query_index, word_index, ..*match_ }; | ||||||
|                                         padded_matches.push(match_); |                                         padded_matches.push(match_); | ||||||
|   | |||||||
| @@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize}; | |||||||
| /// | /// | ||||||
| /// It is used to inform the database the document you want to deserialize. | /// It is used to inform the database the document you want to deserialize. | ||||||
| /// Helpful for custom ranking. | /// Helpful for custom ranking. | ||||||
| #[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)] | #[derive(Debug, Default, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)] | ||||||
| #[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))] | #[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))] | ||||||
| #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] | #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] | ||||||
| #[repr(C)] | #[repr(C)] | ||||||
| @@ -19,7 +19,7 @@ pub struct DocumentId(pub u64); | |||||||
| /// | /// | ||||||
| /// This is stored in the map, generated at index time, | /// This is stored in the map, generated at index time, | ||||||
| /// extracted and interpreted at search time. | /// extracted and interpreted at search time. | ||||||
| #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] | #[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||||||
| #[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))] | #[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))] | ||||||
| #[repr(C)] | #[repr(C)] | ||||||
| pub struct DocIndex { | pub struct DocIndex { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user