mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-30 23:46:28 +00:00 
			
		
		
		
	Apply suggestions from code review
This commit is contained in:
		| @@ -528,7 +528,13 @@ where | ||||
|  | ||||
|         if let Some(word_pair_proximity_docids) = word_pair_proximity_docids { | ||||
|             // Run the word prefix pair proximity docids update operation. | ||||
|             PrefixWordPairsProximityDocids::new(self.wtxn, self.index).execute( | ||||
|             PrefixWordPairsProximityDocids::new( | ||||
|                 self.wtxn, | ||||
|                 self.index, | ||||
|                 self.indexer_config.chunk_compression_type, | ||||
|                 self.indexer_config.chunk_compression_level, | ||||
|             ) | ||||
|             .execute( | ||||
|                 word_pair_proximity_docids, | ||||
|                 &new_prefix_fst_words, | ||||
|                 &common_prefix_fst_words, | ||||
|   | ||||
| @@ -2,6 +2,7 @@ use std::borrow::Cow; | ||||
| use std::collections::HashSet; | ||||
| use std::io::BufReader; | ||||
|  | ||||
| use grenad::CompressionType; | ||||
| use heed::types::ByteSlice; | ||||
|  | ||||
| use super::index_documents::{merge_cbo_roaring_bitmaps, CursorClonableMmap}; | ||||
| @@ -18,10 +19,24 @@ pub struct PrefixWordPairsProximityDocids<'t, 'u, 'i> { | ||||
|     index: &'i Index, | ||||
|     max_proximity: u8, | ||||
|     max_prefix_length: usize, | ||||
|     chunk_compression_type: CompressionType, | ||||
|     chunk_compression_level: Option<u32>, | ||||
| } | ||||
| impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> { | ||||
|     pub fn new(wtxn: &'t mut heed::RwTxn<'i, 'u>, index: &'i Index) -> Self { | ||||
|         Self { wtxn, index, max_proximity: 4, max_prefix_length: 2 } | ||||
|     pub fn new( | ||||
|         wtxn: &'t mut heed::RwTxn<'i, 'u>, | ||||
|         index: &'i Index, | ||||
|         chunk_compression_type: CompressionType, | ||||
|         chunk_compression_level: Option<u32>, | ||||
|     ) -> Self { | ||||
|         Self { | ||||
|             wtxn, | ||||
|             index, | ||||
|             max_proximity: 4, | ||||
|             max_prefix_length: 2, | ||||
|             chunk_compression_type, | ||||
|             chunk_compression_level, | ||||
|         } | ||||
|     } | ||||
|     /// Set the maximum proximity required to make a prefix be part of the words prefixes | ||||
|     /// database. If two words are too far from the threshold the associated documents will | ||||
| @@ -42,6 +57,7 @@ impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> { | ||||
|         self.max_prefix_length = value; | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     #[logging_timer::time("WordPrefixPairProximityDocids::{}")] | ||||
|     pub fn execute<'a>( | ||||
|         self, | ||||
| @@ -60,6 +76,8 @@ impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> { | ||||
|             new_prefix_fst_words, | ||||
|             common_prefix_fst_words, | ||||
|             del_prefix_fst_words, | ||||
|             self.chunk_compression_type, | ||||
|             self.chunk_compression_level, | ||||
|         )?; | ||||
|  | ||||
|         index_prefix_word_database( | ||||
| @@ -72,6 +90,8 @@ impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> { | ||||
|             new_prefix_fst_words, | ||||
|             common_prefix_fst_words, | ||||
|             del_prefix_fst_words, | ||||
|             self.chunk_compression_type, | ||||
|             self.chunk_compression_level, | ||||
|         )?; | ||||
|  | ||||
|         Ok(()) | ||||
|   | ||||
| @@ -23,6 +23,8 @@ pub fn index_prefix_word_database( | ||||
|     new_prefix_fst_words: &[String], | ||||
|     common_prefix_fst_words: &[&[String]], | ||||
|     del_prefix_fst_words: &HashSet<Vec<u8>>, | ||||
|     chunk_compression_type: CompressionType, | ||||
|     chunk_compression_level: Option<u32>, | ||||
| ) -> Result<()> { | ||||
|     let max_proximity = max_proximity - 1; | ||||
|     debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk..."); | ||||
| @@ -35,7 +37,7 @@ pub fn index_prefix_word_database( | ||||
|         .filter(|s| s.len() <= max_prefix_length) | ||||
|         .collect(); | ||||
|  | ||||
|     for proximity in 1..=max_proximity - 1 { | ||||
|     for proximity in 1..max_proximity { | ||||
|         for prefix in common_prefixes.iter() { | ||||
|             let mut prefix_key = vec![]; | ||||
|             prefix_key.push(proximity); | ||||
| @@ -78,7 +80,8 @@ pub fn index_prefix_word_database( | ||||
|  | ||||
|     // Since we read the DB, we can't write to it directly, so we add each new (word1, prefix, proximity) | ||||
|     // element in an intermediary grenad | ||||
|     let mut writer = create_writer(CompressionType::None, None, tempfile::tempfile()?); | ||||
|     let mut writer = | ||||
|         create_writer(chunk_compression_type, chunk_compression_level, tempfile::tempfile()?); | ||||
|  | ||||
|     for proximity in 1..=max_proximity - 1 { | ||||
|         for prefix in new_prefixes.iter() { | ||||
| @@ -144,7 +147,7 @@ fn execute_on_word_pairs_and_prefixes<I>( | ||||
|     mut next_word2_and_docids: impl for<'a> FnMut(&'a mut I) -> Result<Option<(&'a [u8], &'a [u8])>>, | ||||
|     mut insert: impl for<'a> FnMut(&'a [u8], &'a [u8]) -> Result<()>, | ||||
| ) -> Result<()> { | ||||
|     let mut batch: BTreeMap<Vec<u8>, Vec<Cow<'static, [u8]>>> = <_>::default(); | ||||
|     let mut batch: BTreeMap<Vec<u8>, Vec<Cow<'static, [u8]>>> = BTreeMap::default(); | ||||
|  | ||||
|     // Memory usage check: | ||||
|     // The content of the loop will be called for each `word2` that follows a word beginning | ||||
|   | ||||
| @@ -187,6 +187,8 @@ pub fn index_word_prefix_database( | ||||
|     new_prefix_fst_words: &[String], | ||||
|     common_prefix_fst_words: &[&[String]], | ||||
|     del_prefix_fst_words: &HashSet<Vec<u8>>, | ||||
|     chunk_compression_type: CompressionType, | ||||
|     chunk_compression_level: Option<u32>, | ||||
| ) -> Result<()> { | ||||
|     debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk..."); | ||||
|  | ||||
| @@ -249,7 +251,8 @@ pub fn index_word_prefix_database( | ||||
|  | ||||
|         // Since we read the DB, we can't write to it directly, so we add each new (proximity, word1, prefix) | ||||
|         // element in an intermediary grenad | ||||
|         let mut writer = create_writer(CompressionType::None, None, tempfile::tempfile()?); | ||||
|         let mut writer = | ||||
|             create_writer(chunk_compression_type, chunk_compression_level, tempfile::tempfile()?); | ||||
|  | ||||
|         execute_on_word_pairs_and_prefixes( | ||||
|             &mut db_iter, | ||||
| @@ -325,7 +328,7 @@ fn execute_on_word_pairs_and_prefixes<I>( | ||||
|         }; | ||||
|         let word2_start_different_than_prev = word2[0] != prev_word2_start; | ||||
|         // if there were no potential prefixes for the previous word2 based on its first letter, | ||||
|         // and if the current word2 starts with the s`ame letter, then there is also no potential | ||||
|         // and if the current word2 starts with the same letter, then there is also no potential | ||||
|         // prefixes for the current word2, and we can skip to the next iteration | ||||
|         if empty_prefixes && !word2_start_different_than_prev { | ||||
|             continue; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user