mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 04:56:28 +00:00 
			
		
		
		
	Fix several warnings in extractors and remove unreachable macros
This commit is contained in:
		| @@ -87,11 +87,11 @@ pub trait FacetedExtractor { | ||||
|     where | ||||
|         MF: MergeFunction, | ||||
|         MF::Error: Debug, | ||||
|         grenad::Error<MF::Error>: Into<crate::Error>, | ||||
|     { | ||||
|         buffer.clear(); | ||||
|         match Self::build_key(fid, value, buffer) { | ||||
|             // TODO manage errors | ||||
|             Some(key) => Ok(cache_fn(cached_sorter, &key, docid).unwrap()), | ||||
|             Some(key) => cache_fn(cached_sorter, &key, docid).map_err(Into::into), | ||||
|             None => Ok(()), | ||||
|         } | ||||
|     } | ||||
|   | ||||
| @@ -1,4 +1,3 @@ | ||||
| use std::borrow::Cow; | ||||
| use std::collections::HashMap; | ||||
|  | ||||
| use heed::RoTxn; | ||||
| @@ -25,12 +24,6 @@ impl SearchableExtractor for FidWordCountDocidsExtractor { | ||||
|         Ok(vec![]) | ||||
|     } | ||||
|  | ||||
|     /// This case is unreachable because extract_document_change has been reimplemented to not call this function. | ||||
|     fn build_key(_field_id: FieldId, _position: u16, _word: &str) -> Cow<[u8]> { | ||||
|         /// TODO remove this | ||||
|         unreachable!() | ||||
|     } | ||||
|  | ||||
|     // This method is reimplemented to count the number of words in the document in each field | ||||
|     // and to store the docids of the documents that have a number of words in a given field equal to or under than MAX_COUNTED_WORDS. | ||||
|     fn extract_document_change( | ||||
| @@ -59,8 +52,7 @@ impl SearchableExtractor for FidWordCountDocidsExtractor { | ||||
|                 for (fid, count) in fid_word_count.iter() { | ||||
|                     if *count <= MAX_COUNTED_WORDS { | ||||
|                         let key = build_key(*fid, *count as u8, &mut key_buffer); | ||||
|                         /// TODO manage the error | ||||
|                         cached_sorter.insert_del_u32(key, inner.docid()).unwrap(); | ||||
|                         cached_sorter.insert_del_u32(key, inner.docid())?; | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
| @@ -93,13 +85,11 @@ impl SearchableExtractor for FidWordCountDocidsExtractor { | ||||
|                     if *current_count != *new_count { | ||||
|                         if *current_count <= MAX_COUNTED_WORDS { | ||||
|                             let key = build_key(*fid, *current_count as u8, &mut key_buffer); | ||||
|                             /// TODO manage the error | ||||
|                             cached_sorter.insert_del_u32(key, inner.docid()).unwrap(); | ||||
|                             cached_sorter.insert_del_u32(key, inner.docid())?; | ||||
|                         } | ||||
|                         if *new_count <= MAX_COUNTED_WORDS { | ||||
|                             let key = build_key(*fid, *new_count as u8, &mut key_buffer); | ||||
|                             /// TODO manage the error | ||||
|                             cached_sorter.insert_add_u32(key, inner.docid()).unwrap(); | ||||
|                             cached_sorter.insert_add_u32(key, inner.docid())?; | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
| @@ -116,8 +106,7 @@ impl SearchableExtractor for FidWordCountDocidsExtractor { | ||||
|                 for (fid, count) in fid_word_count.iter() { | ||||
|                     if *count <= MAX_COUNTED_WORDS { | ||||
|                         let key = build_key(*fid, *count as u8, &mut key_buffer); | ||||
|                         /// TODO manage the error | ||||
|                         cached_sorter.insert_add_u32(key, inner.docid()).unwrap(); | ||||
|                         cached_sorter.insert_add_u32(key, inner.docid())?; | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|   | ||||
| @@ -2,11 +2,93 @@ use std::borrow::Cow; | ||||
|  | ||||
| use heed::RoTxn; | ||||
|  | ||||
| use super::SearchableExtractor; | ||||
| use crate::{bucketed_position, FieldId, Index, Result}; | ||||
| use super::{tokenize_document::DocumentTokenizer, SearchableExtractor}; | ||||
| use crate::{ | ||||
|     bucketed_position, | ||||
|     update::{ | ||||
|         new::{extract::cache::CboCachedSorter, DocumentChange}, | ||||
|         MergeDeladdCboRoaringBitmaps, | ||||
|     }, | ||||
|     FieldId, GlobalFieldsIdsMap, Index, Result, | ||||
| }; | ||||
|  | ||||
| trait ProtoWordDocidsExtractor { | ||||
|     fn build_key(field_id: FieldId, position: u16, word: &str) -> Cow<'_, [u8]>; | ||||
|     fn attributes_to_extract<'a>( | ||||
|         _rtxn: &'a RoTxn, | ||||
|         _index: &'a Index, | ||||
|     ) -> Result<Option<Vec<&'a str>>>; | ||||
|  | ||||
|     fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>>; | ||||
| } | ||||
|  | ||||
| impl<T> SearchableExtractor for T | ||||
| where | ||||
|     T: ProtoWordDocidsExtractor, | ||||
| { | ||||
|     fn extract_document_change( | ||||
|         rtxn: &RoTxn, | ||||
|         index: &Index, | ||||
|         document_tokenizer: &DocumentTokenizer, | ||||
|         fields_ids_map: &mut GlobalFieldsIdsMap, | ||||
|         cached_sorter: &mut CboCachedSorter<MergeDeladdCboRoaringBitmaps>, | ||||
|         document_change: DocumentChange, | ||||
|     ) -> Result<()> { | ||||
|         match document_change { | ||||
|             DocumentChange::Deletion(inner) => { | ||||
|                 let mut token_fn = |fid, pos: u16, word: &str| { | ||||
|                     let key = Self::build_key(fid, pos, word); | ||||
|                     cached_sorter.insert_del_u32(&key, inner.docid()).map_err(crate::Error::from) | ||||
|                 }; | ||||
|                 document_tokenizer.tokenize_document( | ||||
|                     inner.current(rtxn, index)?.unwrap(), | ||||
|                     fields_ids_map, | ||||
|                     &mut token_fn, | ||||
|                 )?; | ||||
|             } | ||||
|             DocumentChange::Update(inner) => { | ||||
|                 let mut token_fn = |fid, pos, word: &str| { | ||||
|                     let key = Self::build_key(fid, pos, word); | ||||
|                     cached_sorter.insert_del_u32(&key, inner.docid()).map_err(crate::Error::from) | ||||
|                 }; | ||||
|                 document_tokenizer.tokenize_document( | ||||
|                     inner.current(rtxn, index)?.unwrap(), | ||||
|                     fields_ids_map, | ||||
|                     &mut token_fn, | ||||
|                 )?; | ||||
|  | ||||
|                 let mut token_fn = |fid, pos, word: &str| { | ||||
|                     let key = Self::build_key(fid, pos, word); | ||||
|                     cached_sorter.insert_add_u32(&key, inner.docid()).map_err(crate::Error::from) | ||||
|                 }; | ||||
|                 document_tokenizer.tokenize_document(inner.new(), fields_ids_map, &mut token_fn)?; | ||||
|             } | ||||
|             DocumentChange::Insertion(inner) => { | ||||
|                 let mut token_fn = |fid, pos, word: &str| { | ||||
|                     let key = Self::build_key(fid, pos, word); | ||||
|                     cached_sorter.insert_add_u32(&key, inner.docid()).map_err(crate::Error::from) | ||||
|                 }; | ||||
|                 document_tokenizer.tokenize_document(inner.new(), fields_ids_map, &mut token_fn)?; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn attributes_to_extract<'a>( | ||||
|         rtxn: &'a RoTxn, | ||||
|         index: &'a Index, | ||||
|     ) -> Result<Option<Vec<&'a str>>> { | ||||
|         Self::attributes_to_extract(rtxn, index) | ||||
|     } | ||||
|  | ||||
|     fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>> { | ||||
|         Self::attributes_to_skip(rtxn, index) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct WordDocidsExtractor; | ||||
| impl SearchableExtractor for WordDocidsExtractor { | ||||
| impl ProtoWordDocidsExtractor for WordDocidsExtractor { | ||||
|     fn attributes_to_extract<'a>( | ||||
|         rtxn: &'a RoTxn, | ||||
|         index: &'a Index, | ||||
| @@ -26,7 +108,7 @@ impl SearchableExtractor for WordDocidsExtractor { | ||||
| } | ||||
|  | ||||
| pub struct ExactWordDocidsExtractor; | ||||
| impl SearchableExtractor for ExactWordDocidsExtractor { | ||||
| impl ProtoWordDocidsExtractor for ExactWordDocidsExtractor { | ||||
|     fn attributes_to_extract<'a>( | ||||
|         rtxn: &'a RoTxn, | ||||
|         index: &'a Index, | ||||
| @@ -55,7 +137,7 @@ impl SearchableExtractor for ExactWordDocidsExtractor { | ||||
| } | ||||
|  | ||||
| pub struct WordFidDocidsExtractor; | ||||
| impl SearchableExtractor for WordFidDocidsExtractor { | ||||
| impl ProtoWordDocidsExtractor for WordFidDocidsExtractor { | ||||
|     fn attributes_to_extract<'a>( | ||||
|         rtxn: &'a RoTxn, | ||||
|         index: &'a Index, | ||||
| @@ -77,7 +159,7 @@ impl SearchableExtractor for WordFidDocidsExtractor { | ||||
| } | ||||
|  | ||||
| pub struct WordPositionDocidsExtractor; | ||||
| impl SearchableExtractor for WordPositionDocidsExtractor { | ||||
| impl ProtoWordDocidsExtractor for WordPositionDocidsExtractor { | ||||
|     fn attributes_to_extract<'a>( | ||||
|         rtxn: &'a RoTxn, | ||||
|         index: &'a Index, | ||||
|   | ||||
| @@ -1,4 +1,3 @@ | ||||
| use std::borrow::Cow; | ||||
| use std::collections::{BTreeMap, VecDeque}; | ||||
|  | ||||
| use heed::RoTxn; | ||||
| @@ -26,12 +25,6 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor { | ||||
|         Ok(vec![]) | ||||
|     } | ||||
|  | ||||
|     /// This case is unreachable because extract_document_change has been reimplemented to not call this function. | ||||
|     fn build_key(_field_id: FieldId, _position: u16, _word: &str) -> Cow<[u8]> { | ||||
|         /// TODO remove this | ||||
|         unreachable!() | ||||
|     } | ||||
|  | ||||
|     // This method is reimplemented to count the number of words in the document in each field | ||||
|     // and to store the docids of the documents that have a number of words in a given field equal to or under than MAX_COUNTED_WORDS. | ||||
|     fn extract_document_change( | ||||
| @@ -100,18 +93,18 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor { | ||||
|             match eob { | ||||
|                 Left(((w1, w2), prox)) => { | ||||
|                     let key = build_key(*prox, w1, w2, &mut key_buffer); | ||||
|                     cached_sorter.insert_del_u32(key, docid).unwrap(); | ||||
|                     cached_sorter.insert_del_u32(key, docid)?; | ||||
|                 } | ||||
|                 Right(((w1, w2), prox)) => { | ||||
|                     let key = build_key(*prox, w1, w2, &mut key_buffer); | ||||
|                     cached_sorter.insert_add_u32(key, docid).unwrap(); | ||||
|                     cached_sorter.insert_add_u32(key, docid)?; | ||||
|                 } | ||||
|                 Both(((w1, w2), del_prox), (_, add_prox)) => { | ||||
|                     if del_prox != add_prox { | ||||
|                         let key = build_key(*del_prox, w1, w2, &mut key_buffer); | ||||
|                         cached_sorter.insert_del_u32(key, docid).unwrap(); | ||||
|                         cached_sorter.insert_del_u32(key, docid)?; | ||||
|                         let key = build_key(*add_prox, w1, w2, &mut key_buffer); | ||||
|                         cached_sorter.insert_add_u32(key, docid).unwrap(); | ||||
|                         cached_sorter.insert_add_u32(key, docid)?; | ||||
|                     } | ||||
|                 } | ||||
|             }; | ||||
|   | ||||
| @@ -3,7 +3,6 @@ mod extract_word_docids; | ||||
| mod extract_word_pair_proximity_docids; | ||||
| mod tokenize_document; | ||||
|  | ||||
| use std::borrow::Cow; | ||||
| use std::fs::File; | ||||
|  | ||||
| pub use extract_fid_word_count_docids::FidWordCountDocidsExtractor; | ||||
| @@ -20,7 +19,7 @@ use tokenize_document::{tokenizer_builder, DocumentTokenizer}; | ||||
| use super::cache::CboCachedSorter; | ||||
| use crate::update::new::{DocumentChange, ItemsPool}; | ||||
| use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps}; | ||||
| use crate::{FieldId, GlobalFieldsIdsMap, Index, Result, MAX_POSITION_PER_ATTRIBUTE}; | ||||
| use crate::{GlobalFieldsIdsMap, Index, Result, MAX_POSITION_PER_ATTRIBUTE}; | ||||
|  | ||||
| pub trait SearchableExtractor { | ||||
|     fn run_extraction( | ||||
| @@ -109,60 +108,10 @@ pub trait SearchableExtractor { | ||||
|         fields_ids_map: &mut GlobalFieldsIdsMap, | ||||
|         cached_sorter: &mut CboCachedSorter<MergeDeladdCboRoaringBitmaps>, | ||||
|         document_change: DocumentChange, | ||||
|     ) -> Result<()> { | ||||
|         match document_change { | ||||
|             DocumentChange::Deletion(inner) => { | ||||
|                 let mut token_fn = |fid, pos: u16, word: &str| { | ||||
|                     let key = Self::build_key(fid, pos, word); | ||||
|                     /// TODO manage the error | ||||
|                     cached_sorter.insert_del_u32(&key, inner.docid()).unwrap(); | ||||
|                     Ok(()) | ||||
|                 }; | ||||
|                 document_tokenizer.tokenize_document( | ||||
|                     inner.current(rtxn, index)?.unwrap(), | ||||
|                     fields_ids_map, | ||||
|                     &mut token_fn, | ||||
|                 )?; | ||||
|             } | ||||
|             DocumentChange::Update(inner) => { | ||||
|                 let mut token_fn = |fid, pos, word: &str| { | ||||
|                     let key = Self::build_key(fid, pos, word); | ||||
|                     /// TODO manage the error | ||||
|                     cached_sorter.insert_del_u32(&key, inner.docid()).unwrap(); | ||||
|                     Ok(()) | ||||
|                 }; | ||||
|                 document_tokenizer.tokenize_document( | ||||
|                     inner.current(rtxn, index)?.unwrap(), | ||||
|                     fields_ids_map, | ||||
|                     &mut token_fn, | ||||
|                 )?; | ||||
|  | ||||
|                 let mut token_fn = |fid, pos, word: &str| { | ||||
|                     let key = Self::build_key(fid, pos, word); | ||||
|                     /// TODO manage the error | ||||
|                     cached_sorter.insert_add_u32(&key, inner.docid()).unwrap(); | ||||
|                     Ok(()) | ||||
|                 }; | ||||
|                 document_tokenizer.tokenize_document(inner.new(), fields_ids_map, &mut token_fn)?; | ||||
|             } | ||||
|             DocumentChange::Insertion(inner) => { | ||||
|                 let mut token_fn = |fid, pos, word: &str| { | ||||
|                     let key = Self::build_key(fid, pos, word); | ||||
|                     /// TODO manage the error | ||||
|                     cached_sorter.insert_add_u32(&key, inner.docid()).unwrap(); | ||||
|                     Ok(()) | ||||
|                 }; | ||||
|                 document_tokenizer.tokenize_document(inner.new(), fields_ids_map, &mut token_fn)?; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|     ) -> Result<()>; | ||||
|  | ||||
|     fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) | ||||
|         -> Result<Option<Vec<&'a str>>>; | ||||
|  | ||||
|     fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>>; | ||||
|  | ||||
|     fn build_key(field_id: FieldId, position: u16, word: &str) -> Cow<'_, [u8]>; | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user