mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 13:36:27 +00:00 
			
		
		
		
	Use the word pair proximity and fid word count docids extractors
Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
		| @@ -112,23 +112,27 @@ pub struct WriterOperation { | ||||
| } | ||||
|  | ||||
| pub enum Database { | ||||
|     WordDocids, | ||||
|     ExactWordDocids, | ||||
|     WordFidDocids, | ||||
|     WordPositionDocids, | ||||
|     Documents, | ||||
|     ExactWordDocids, | ||||
|     FidWordCountDocids, | ||||
|     Main, | ||||
|     WordDocids, | ||||
|     WordFidDocids, | ||||
|     WordPairProximityDocids, | ||||
|     WordPositionDocids, | ||||
| } | ||||
|  | ||||
| impl WriterOperation { | ||||
|     pub fn database(&self, index: &Index) -> heed::Database<Bytes, Bytes> { | ||||
|         match self.database { | ||||
|             Database::Main => index.main.remap_types(), | ||||
|             Database::Documents => index.documents.remap_types(), | ||||
|             Database::WordDocids => index.word_docids.remap_types(), | ||||
|             Database::ExactWordDocids => index.exact_word_docids.remap_types(), | ||||
|             Database::Main => index.main.remap_types(), | ||||
|             Database::WordDocids => index.word_docids.remap_types(), | ||||
|             Database::WordFidDocids => index.word_fid_docids.remap_types(), | ||||
|             Database::WordPositionDocids => index.word_position_docids.remap_types(), | ||||
|             Database::FidWordCountDocids => index.field_id_word_count_docids.remap_types(), | ||||
|             Database::WordPairProximityDocids => index.word_pair_proximity_docids.remap_types(), | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -198,9 +202,11 @@ impl MainSender<'_> { | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub enum WordDocids {} | ||||
| pub enum ExactWordDocids {} | ||||
| pub enum FidWordCountDocids {} | ||||
| pub enum WordDocids {} | ||||
| pub enum WordFidDocids {} | ||||
| pub enum WordPairProximityDocids {} | ||||
| pub enum WordPositionDocids {} | ||||
|  | ||||
| pub trait DatabaseType { | ||||
| @@ -209,14 +215,6 @@ pub trait DatabaseType { | ||||
|     fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation; | ||||
| } | ||||
|  | ||||
| impl DatabaseType for WordDocids { | ||||
|     const DATABASE: Database = Database::WordDocids; | ||||
|  | ||||
|     fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation { | ||||
|         MergerOperation::WordDocidsMerger(merger) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl DatabaseType for ExactWordDocids { | ||||
|     const DATABASE: Database = Database::ExactWordDocids; | ||||
|  | ||||
| @@ -225,6 +223,22 @@ impl DatabaseType for ExactWordDocids { | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl DatabaseType for FidWordCountDocids { | ||||
|     const DATABASE: Database = Database::FidWordCountDocids; | ||||
|  | ||||
|     fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation { | ||||
|         MergerOperation::FidWordCountDocidsMerger(merger) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl DatabaseType for WordDocids { | ||||
|     const DATABASE: Database = Database::WordDocids; | ||||
|  | ||||
|     fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation { | ||||
|         MergerOperation::WordDocidsMerger(merger) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl DatabaseType for WordFidDocids { | ||||
|     const DATABASE: Database = Database::WordFidDocids; | ||||
|  | ||||
| @@ -233,6 +247,14 @@ impl DatabaseType for WordFidDocids { | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl DatabaseType for WordPairProximityDocids { | ||||
|     const DATABASE: Database = Database::WordPairProximityDocids; | ||||
|  | ||||
|     fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation { | ||||
|         MergerOperation::WordPairProximityDocidsMerger(merger) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl DatabaseType for WordPositionDocids { | ||||
|     const DATABASE: Database = Database::WordPositionDocids; | ||||
|  | ||||
| @@ -293,12 +315,14 @@ impl DocumentsSender<'_> { | ||||
| } | ||||
|  | ||||
| pub enum MergerOperation { | ||||
|     WordDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>), | ||||
|     ExactWordDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>), | ||||
|     FidWordCountDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>), | ||||
|     WordDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>), | ||||
|     WordFidDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>), | ||||
|     WordPairProximityDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>), | ||||
|     WordPositionDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>), | ||||
|     InsertDocument { docid: DocumentId, document: Box<KvReaderFieldId> }, | ||||
|     DeleteDocument { docid: DocumentId }, | ||||
|     InsertDocument { docid: DocumentId, document: Box<KvReaderFieldId> }, | ||||
| } | ||||
|  | ||||
| pub struct MergerReceiver(Receiver<MergerOperation>); | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
| use serde_json::Value; | ||||
|  | ||||
| use crate::update::new::extract::perm_json_p; | ||||
| use crate::update::new::KvReaderFieldId; | ||||
| use crate::{FieldId, GlobalFieldsIdsMap, InternalError, Result, UserError}; | ||||
|  | ||||
|   | ||||
| @@ -2,6 +2,7 @@ use std::collections::HashSet; | ||||
| use std::fmt::Debug; | ||||
| use std::fs::File; | ||||
|  | ||||
| pub use extract_facets::*; | ||||
| use grenad::{MergeFunction, Merger}; | ||||
| use heed::RoTxn; | ||||
| use rayon::iter::{IntoParallelIterator, ParallelIterator}; | ||||
|   | ||||
| @@ -2,16 +2,8 @@ mod cache; | ||||
| mod faceted; | ||||
| mod searchable; | ||||
|  | ||||
| pub use faceted::modname::{ | ||||
|     FieldIdFacetExistsDocidsExtractor, FieldIdFacetIsEmptyDocidsExtractor, | ||||
|     FieldIdFacetIsNullDocidsExtractor, FieldIdFacetNumberDocidsExtractor, | ||||
|     FieldIdFacetStringDocidsExtractor, | ||||
| }; | ||||
| pub use faceted::FacetedExtractor; | ||||
| pub use searchable::{ | ||||
|     ExactWordDocidsExtractor, SearchableExtractor, WordDocidsExtractor, WordFidDocidsExtractor, | ||||
|     WordPositionDocidsExtractor, | ||||
| }; | ||||
| pub use faceted::*; | ||||
| pub use searchable::*; | ||||
|  | ||||
| /// TODO move in permissive json pointer | ||||
| pub mod perm_json_p { | ||||
|   | ||||
| @@ -1,15 +1,14 @@ | ||||
| use std::{borrow::Cow, collections::HashMap}; | ||||
| use std::borrow::Cow; | ||||
| use std::collections::HashMap; | ||||
|  | ||||
| use heed::RoTxn; | ||||
|  | ||||
| use super::{tokenize_document::DocumentTokenizer, SearchableExtractor}; | ||||
| use crate::{ | ||||
|     update::{ | ||||
|         new::{extract::cache::CboCachedSorter, DocumentChange}, | ||||
|         MergeDeladdCboRoaringBitmaps, | ||||
|     }, | ||||
|     FieldId, GlobalFieldsIdsMap, Index, Result, | ||||
| }; | ||||
| use super::tokenize_document::DocumentTokenizer; | ||||
| use super::SearchableExtractor; | ||||
| use crate::update::new::extract::cache::CboCachedSorter; | ||||
| use crate::update::new::DocumentChange; | ||||
| use crate::update::MergeDeladdCboRoaringBitmaps; | ||||
| use crate::{FieldId, GlobalFieldsIdsMap, Index, Result}; | ||||
|  | ||||
| const MAX_COUNTED_WORDS: usize = 30; | ||||
|  | ||||
| @@ -22,12 +21,13 @@ impl SearchableExtractor for FidWordCountDocidsExtractor { | ||||
|         index.user_defined_searchable_fields(rtxn).map_err(Into::into) | ||||
|     } | ||||
|  | ||||
|     fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>> { | ||||
|     fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> { | ||||
|         Ok(vec![]) | ||||
|     } | ||||
|  | ||||
|     /// This case is unreachable because extract_document_change has been reimplemented to not call this function. | ||||
|     fn build_key<'a>(_field_id: FieldId, _position: u16, _word: &'a str) -> Cow<'a, [u8]> { | ||||
|     fn build_key(_field_id: FieldId, _position: u16, _word: &str) -> Cow<[u8]> { | ||||
|         /// TODO remove this | ||||
|         unreachable!() | ||||
|     } | ||||
|  | ||||
| @@ -45,7 +45,7 @@ impl SearchableExtractor for FidWordCountDocidsExtractor { | ||||
|         match document_change { | ||||
|             DocumentChange::Deletion(inner) => { | ||||
|                 let mut fid_word_count = HashMap::new(); | ||||
|                 let mut token_fn = |fid: FieldId, pos: u16, word: &str| { | ||||
|                 let mut token_fn = |fid: FieldId, _pos: u16, _word: &str| { | ||||
|                     fid_word_count.entry(fid).and_modify(|count| *count += 1).or_insert(1); | ||||
|                     Ok(()) | ||||
|                 }; | ||||
| @@ -66,10 +66,10 @@ impl SearchableExtractor for FidWordCountDocidsExtractor { | ||||
|             } | ||||
|             DocumentChange::Update(inner) => { | ||||
|                 let mut fid_word_count = HashMap::new(); | ||||
|                 let mut token_fn = |fid: FieldId, pos: u16, word: &str| { | ||||
|                 let mut token_fn = |fid: FieldId, _pos: u16, _word: &str| { | ||||
|                     fid_word_count | ||||
|                         .entry(fid) | ||||
|                         .and_modify(|(current_count, new_count)| *current_count += 1) | ||||
|                         .and_modify(|(current_count, _new_count)| *current_count += 1) | ||||
|                         .or_insert((1, 0)); | ||||
|                     Ok(()) | ||||
|                 }; | ||||
| @@ -79,10 +79,10 @@ impl SearchableExtractor for FidWordCountDocidsExtractor { | ||||
|                     &mut token_fn, | ||||
|                 )?; | ||||
|  | ||||
|                 let mut token_fn = |fid: FieldId, pos: u16, word: &str| { | ||||
|                 let mut token_fn = |fid: FieldId, _pos: u16, _word: &str| { | ||||
|                     fid_word_count | ||||
|                         .entry(fid) | ||||
|                         .and_modify(|(current_count, new_count)| *new_count += 1) | ||||
|                         .and_modify(|(_current_count, new_count)| *new_count += 1) | ||||
|                         .or_insert((0, 1)); | ||||
|                     Ok(()) | ||||
|                 }; | ||||
| @@ -106,7 +106,7 @@ impl SearchableExtractor for FidWordCountDocidsExtractor { | ||||
|             } | ||||
|             DocumentChange::Insertion(inner) => { | ||||
|                 let mut fid_word_count = HashMap::new(); | ||||
|                 let mut token_fn = |fid: FieldId, pos: u16, word: &str| { | ||||
|                 let mut token_fn = |fid: FieldId, _pos: u16, _word: &str| { | ||||
|                     fid_word_count.entry(fid).and_modify(|count| *count += 1).or_insert(1); | ||||
|                     Ok(()) | ||||
|                 }; | ||||
|   | ||||
| @@ -20,7 +20,7 @@ impl SearchableExtractor for WordDocidsExtractor { | ||||
|     } | ||||
|  | ||||
|     /// TODO write in an external Vec buffer | ||||
|     fn build_key<'a>(_field_id: FieldId, _position: u16, word: &'a str) -> Cow<'a, [u8]> { | ||||
|     fn build_key(_field_id: FieldId, _position: u16, word: &str) -> Cow<[u8]> { | ||||
|         Cow::Borrowed(word.as_bytes()) | ||||
|     } | ||||
| } | ||||
| @@ -49,7 +49,7 @@ impl SearchableExtractor for ExactWordDocidsExtractor { | ||||
|         Ok(vec![]) | ||||
|     } | ||||
|  | ||||
|     fn build_key<'a>(_field_id: FieldId, _position: u16, word: &'a str) -> Cow<'a, [u8]> { | ||||
|     fn build_key(_field_id: FieldId, _position: u16, word: &str) -> Cow<[u8]> { | ||||
|         Cow::Borrowed(word.as_bytes()) | ||||
|     } | ||||
| } | ||||
| @@ -67,7 +67,7 @@ impl SearchableExtractor for WordFidDocidsExtractor { | ||||
|         Ok(vec![]) | ||||
|     } | ||||
|  | ||||
|     fn build_key<'a>(field_id: FieldId, _position: u16, word: &'a str) -> Cow<'a, [u8]> { | ||||
|     fn build_key(field_id: FieldId, _position: u16, word: &str) -> Cow<[u8]> { | ||||
|         let mut key = Vec::new(); | ||||
|         key.extend_from_slice(word.as_bytes()); | ||||
|         key.push(0); | ||||
| @@ -89,7 +89,7 @@ impl SearchableExtractor for WordPositionDocidsExtractor { | ||||
|         Ok(vec![]) | ||||
|     } | ||||
|  | ||||
|     fn build_key<'a>(_field_id: FieldId, position: u16, word: &'a str) -> Cow<'a, [u8]> { | ||||
|     fn build_key(_field_id: FieldId, position: u16, word: &str) -> Cow<[u8]> { | ||||
|         // position must be bucketed to reduce the number of keys in the DB. | ||||
|         let position = bucketed_position(position); | ||||
|         let mut key = Vec::new(); | ||||
|   | ||||
| @@ -1,21 +1,17 @@ | ||||
| use std::{ | ||||
|     borrow::Cow, | ||||
|     collections::{BTreeMap, VecDeque}, | ||||
| }; | ||||
| use std::borrow::Cow; | ||||
| use std::collections::{BTreeMap, VecDeque}; | ||||
|  | ||||
| use heed::RoTxn; | ||||
| use itertools::merge_join_by; | ||||
| use obkv::KvReader; | ||||
|  | ||||
| use super::{tokenize_document::DocumentTokenizer, SearchableExtractor}; | ||||
| use crate::{ | ||||
|     proximity::{index_proximity, MAX_DISTANCE}, | ||||
|     update::{ | ||||
|         new::{extract::cache::CboCachedSorter, DocumentChange}, | ||||
|         MergeDeladdCboRoaringBitmaps, | ||||
|     }, | ||||
|     FieldId, GlobalFieldsIdsMap, Index, Result, | ||||
| }; | ||||
| use super::tokenize_document::DocumentTokenizer; | ||||
| use super::SearchableExtractor; | ||||
| use crate::proximity::{index_proximity, MAX_DISTANCE}; | ||||
| use crate::update::new::extract::cache::CboCachedSorter; | ||||
| use crate::update::new::DocumentChange; | ||||
| use crate::update::MergeDeladdCboRoaringBitmaps; | ||||
| use crate::{FieldId, GlobalFieldsIdsMap, Index, Result}; | ||||
|  | ||||
| pub struct WordPairProximityDocidsExtractor; | ||||
| impl SearchableExtractor for WordPairProximityDocidsExtractor { | ||||
| @@ -26,12 +22,13 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor { | ||||
|         index.user_defined_searchable_fields(rtxn).map_err(Into::into) | ||||
|     } | ||||
|  | ||||
|     fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>> { | ||||
|     fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> { | ||||
|         Ok(vec![]) | ||||
|     } | ||||
|  | ||||
|     /// This case is unreachable because extract_document_change has been reimplemented to not call this function. | ||||
|     fn build_key<'a>(_field_id: FieldId, _position: u16, _word: &'a str) -> Cow<'a, [u8]> { | ||||
|     fn build_key(_field_id: FieldId, _position: u16, _word: &str) -> Cow<[u8]> { | ||||
|         /// TODO remove this | ||||
|         unreachable!() | ||||
|     } | ||||
|  | ||||
| @@ -159,7 +156,7 @@ fn process_document_tokens( | ||||
|     word_positions: &mut VecDeque<(String, u16)>, | ||||
|     word_pair_proximity: &mut BTreeMap<(String, String), u8>, | ||||
| ) -> Result<()> { | ||||
|     let mut token_fn = |fid: FieldId, pos: u16, word: &str| { | ||||
|     let mut token_fn = |_fid: FieldId, pos: u16, word: &str| { | ||||
|         // drain the proximity window until the head word is considered close to the word we are inserting. | ||||
|         while word_positions | ||||
|             .front() | ||||
|   | ||||
| @@ -11,15 +11,9 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator}; | ||||
| use rayon::ThreadPool; | ||||
| pub use update_by_function::UpdateByFunction; | ||||
|  | ||||
| use super::channel::{ | ||||
|     extractors_merger_channels, merger_writer_channel, EntryOperation, ExactWordDocids, WordDocids, | ||||
|     WordFidDocids, WordPositionDocids, | ||||
| }; | ||||
| use super::channel::*; | ||||
| use super::document_change::DocumentChange; | ||||
| use super::extract::{ | ||||
|     ExactWordDocidsExtractor, SearchableExtractor, WordDocidsExtractor, WordFidDocidsExtractor, | ||||
|     WordPositionDocidsExtractor, | ||||
| }; | ||||
| use super::extract::*; | ||||
| use super::merger::merge_grenad_entries; | ||||
| use super::StdResult; | ||||
| use crate::documents::{ | ||||
| @@ -71,79 +65,98 @@ where | ||||
|         // TODO manage the errors correctly | ||||
|         let handle = Builder::new().name(S("indexer-extractors")).spawn_scoped(s, move || { | ||||
|             pool.in_place_scope(|_s| { | ||||
|                 let document_changes = document_changes.into_par_iter(); | ||||
|                     let document_changes = document_changes.into_par_iter(); | ||||
|  | ||||
|                 // document but we need to create a function that collects and compresses documents. | ||||
|                 document_changes.clone().into_par_iter().try_for_each(|result| { | ||||
|                     match result? { | ||||
|                         DocumentChange::Deletion(deletion) => { | ||||
|                             let docid = deletion.docid(); | ||||
|                             extractor_sender.document_delete(docid).unwrap(); | ||||
|                     // document but we need to create a function that collects and compresses documents. | ||||
|                     document_changes.clone().into_par_iter().try_for_each(|result| { | ||||
|                         match result? { | ||||
|                             DocumentChange::Deletion(deletion) => { | ||||
|                                 let docid = deletion.docid(); | ||||
|                                 extractor_sender.document_delete(docid).unwrap(); | ||||
|                             } | ||||
|                             DocumentChange::Update(update) => { | ||||
|                                 let docid = update.docid(); | ||||
|                                 let content = update.new(); | ||||
|                                 extractor_sender.document_insert(docid, content.boxed()).unwrap(); | ||||
|                             } | ||||
|                             DocumentChange::Insertion(insertion) => { | ||||
|                                 let docid = insertion.docid(); | ||||
|                                 let content = insertion.new(); | ||||
|                                 extractor_sender.document_insert(docid, content.boxed()).unwrap(); | ||||
|                                 // extracted_dictionary_sender.send(self, dictionary: &[u8]); | ||||
|                             } | ||||
|                         } | ||||
|                         DocumentChange::Update(update) => { | ||||
|                             let docid = update.docid(); | ||||
|                             let content = update.new(); | ||||
|                             extractor_sender.document_insert(docid, content.boxed()).unwrap(); | ||||
|                         } | ||||
|                         DocumentChange::Insertion(insertion) => { | ||||
|                             let docid = insertion.docid(); | ||||
|                             let content = insertion.new(); | ||||
|                             extractor_sender.document_insert(docid, content.boxed()).unwrap(); | ||||
|                             // extracted_dictionary_sender.send(self, dictionary: &[u8]); | ||||
|                         } | ||||
|                     } | ||||
|                         Ok(()) as Result<_> | ||||
|                     })?; | ||||
|  | ||||
|                     extract_and_send_docids::<WordDocidsExtractor, WordDocids>( | ||||
|                         index, | ||||
|                         &global_fields_ids_map, | ||||
|                         GrenadParameters::default(), | ||||
|                         document_changes.clone(), | ||||
|                         &extractor_sender, | ||||
|                     )?; | ||||
|  | ||||
|                     extract_and_send_docids::<WordFidDocidsExtractor, WordFidDocids>( | ||||
|                         index, | ||||
|                         &global_fields_ids_map, | ||||
|                         GrenadParameters::default(), | ||||
|                         document_changes.clone(), | ||||
|                         &extractor_sender, | ||||
|                     )?; | ||||
|  | ||||
|                     extract_and_send_docids::<ExactWordDocidsExtractor, ExactWordDocids>( | ||||
|                         index, | ||||
|                         &global_fields_ids_map, | ||||
|                         GrenadParameters::default(), | ||||
|                         document_changes.clone(), | ||||
|                         &extractor_sender, | ||||
|                     )?; | ||||
|  | ||||
|                     extract_and_send_docids::<WordPositionDocidsExtractor, WordPositionDocids>( | ||||
|                         index, | ||||
|                         &global_fields_ids_map, | ||||
|                         GrenadParameters::default(), | ||||
|                         document_changes.clone(), | ||||
|                         &extractor_sender, | ||||
|                     )?; | ||||
|  | ||||
|                     extract_and_send_docids::<FidWordCountDocidsExtractor, FidWordCountDocids>( | ||||
|                         index, | ||||
|                         &global_fields_ids_map, | ||||
|                         GrenadParameters::default(), | ||||
|                         document_changes.clone(), | ||||
|                         &extractor_sender, | ||||
|                     )?; | ||||
|  | ||||
|                     extract_and_send_docids::< | ||||
|                         WordPairProximityDocidsExtractor, | ||||
|                         WordPairProximityDocids, | ||||
|                     >( | ||||
|                         index, | ||||
|                         &global_fields_ids_map, | ||||
|                         GrenadParameters::default(), | ||||
|                         document_changes.clone(), | ||||
|                         &extractor_sender, | ||||
|                     )?; | ||||
|  | ||||
|                     // TODO THIS IS TOO MUCH | ||||
|                     // Extract fieldid docid facet number | ||||
|                     // Extract fieldid docid facet string | ||||
|                     // Extract facetid string fst | ||||
|                     // Extract facetid normalized string strings | ||||
|  | ||||
|                     // TODO Inverted Indexes again | ||||
|                     // Extract fieldid facet isempty docids | ||||
|                     // Extract fieldid facet isnull docids | ||||
|                     // Extract fieldid facet exists docids | ||||
|  | ||||
|                     // TODO This is the normal system | ||||
|                     // Extract fieldid facet number docids | ||||
|                     // Extract fieldid facet string docids | ||||
|  | ||||
|                     Ok(()) as Result<_> | ||||
|                 })?; | ||||
|  | ||||
|                 extract_and_send_docids::<WordDocidsExtractor, WordDocids>( | ||||
|                     index, | ||||
|                     &global_fields_ids_map, | ||||
|                     GrenadParameters::default(), | ||||
|                     document_changes.clone(), | ||||
|                     &extractor_sender, | ||||
|                 )?; | ||||
|  | ||||
|                 extract_and_send_docids::<WordFidDocidsExtractor, WordFidDocids>( | ||||
|                     index, | ||||
|                     &global_fields_ids_map, | ||||
|                     GrenadParameters::default(), | ||||
|                     document_changes.clone(), | ||||
|                     &extractor_sender, | ||||
|                 )?; | ||||
|  | ||||
|                 extract_and_send_docids::<ExactWordDocidsExtractor, ExactWordDocids>( | ||||
|                     index, | ||||
|                     &global_fields_ids_map, | ||||
|                     GrenadParameters::default(), | ||||
|                     document_changes.clone(), | ||||
|                     &extractor_sender, | ||||
|                 )?; | ||||
|  | ||||
|                 extract_and_send_docids::<WordPositionDocidsExtractor, WordPositionDocids>( | ||||
|                     index, | ||||
|                     &global_fields_ids_map, | ||||
|                     GrenadParameters::default(), | ||||
|                     document_changes.clone(), | ||||
|                     &extractor_sender, | ||||
|                 )?; | ||||
|  | ||||
|                 // TODO THIS IS TOO MUCH | ||||
|                 // Extract fieldid docid facet number | ||||
|                 // Extract fieldid docid facet string | ||||
|                 // Extract facetid string fst | ||||
|                 // Extract facetid normalized string strings | ||||
|  | ||||
|                 // TODO Inverted Indexes again | ||||
|                 // Extract fieldid facet isempty docids | ||||
|                 // Extract fieldid facet isnull docids | ||||
|                 // Extract fieldid facet exists docids | ||||
|  | ||||
|                 // TODO This is the normal system | ||||
|                 // Extract fieldid facet number docids | ||||
|                 // Extract fieldid facet string docids | ||||
|  | ||||
|                 Ok(()) as Result<_> | ||||
|             }) | ||||
|                 }) | ||||
|         })?; | ||||
|  | ||||
|         // TODO manage the errors correctly | ||||
|   | ||||
| @@ -8,10 +8,7 @@ use memmap2::Mmap; | ||||
| use roaring::RoaringBitmap; | ||||
| use tempfile::tempfile; | ||||
|  | ||||
| use super::channel::{ | ||||
|     DatabaseType, DocidsSender, ExactWordDocids, MergerReceiver, MergerSender, WordDocids, | ||||
|     WordFidDocids, WordPositionDocids, | ||||
| }; | ||||
| use super::channel::*; | ||||
| use super::KvReaderDelAdd; | ||||
| use crate::update::del_add::DelAdd; | ||||
| use crate::update::new::channel::MergerOperation; | ||||
| @@ -30,6 +27,29 @@ pub fn merge_grenad_entries( | ||||
|  | ||||
|     for merger_operation in receiver { | ||||
|         match merger_operation { | ||||
|             MergerOperation::ExactWordDocidsMerger(merger) => { | ||||
|                 merge_and_send_docids( | ||||
|                     merger, | ||||
|                     /// TODO do a MergerOperation::database(&Index) -> Database<Bytes, Bytes>. | ||||
|                     index.exact_word_docids.remap_types(), | ||||
|                     rtxn, | ||||
|                     &mut buffer, | ||||
|                     sender.docids::<ExactWordDocids>(), | ||||
|                     |_key| Ok(()), | ||||
|                     |_key| Ok(()), | ||||
|                 )?; | ||||
|             } | ||||
|             MergerOperation::FidWordCountDocidsMerger(merger) => { | ||||
|                 merge_and_send_docids( | ||||
|                     merger, | ||||
|                     index.field_id_word_count_docids.remap_types(), | ||||
|                     rtxn, | ||||
|                     &mut buffer, | ||||
|                     sender.docids::<FidWordCountDocids>(), | ||||
|                     |_key| Ok(()), | ||||
|                     |_key| Ok(()), | ||||
|                 )?; | ||||
|             } | ||||
|             MergerOperation::WordDocidsMerger(merger) => { | ||||
|                 let mut add_words_fst = SetBuilder::new(tempfile()?)?; | ||||
|                 let mut del_words_fst = SetBuilder::new(tempfile()?)?; | ||||
| @@ -49,17 +69,6 @@ pub fn merge_grenad_entries( | ||||
|                 let mmap = compute_new_words_fst(add_words_fst, del_words_fst, words_fst)?; | ||||
|                 sender.main().write_words_fst(mmap).unwrap(); | ||||
|             } | ||||
|             MergerOperation::ExactWordDocidsMerger(merger) => { | ||||
|                 merge_and_send_docids( | ||||
|                     merger, | ||||
|                     index.exact_word_docids.remap_types(), | ||||
|                     rtxn, | ||||
|                     &mut buffer, | ||||
|                     sender.docids::<ExactWordDocids>(), | ||||
|                     |_key| Ok(()), | ||||
|                     |_key| Ok(()), | ||||
|                 )?; | ||||
|             } | ||||
|             MergerOperation::WordFidDocidsMerger(merger) => { | ||||
|                 merge_and_send_docids( | ||||
|                     merger, | ||||
| @@ -71,6 +80,17 @@ pub fn merge_grenad_entries( | ||||
|                     |_key| Ok(()), | ||||
|                 )?; | ||||
|             } | ||||
|             MergerOperation::WordPairProximityDocidsMerger(merger) => { | ||||
|                 merge_and_send_docids( | ||||
|                     merger, | ||||
|                     index.word_pair_proximity_docids.remap_types(), | ||||
|                     rtxn, | ||||
|                     &mut buffer, | ||||
|                     sender.docids::<WordPairProximityDocids>(), | ||||
|                     |_key| Ok(()), | ||||
|                     |_key| Ok(()), | ||||
|                 )?; | ||||
|             } | ||||
|             MergerOperation::WordPositionDocidsMerger(merger) => { | ||||
|                 merge_and_send_docids( | ||||
|                     merger, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user