mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 07:56:28 +00:00 
			
		
		
		
	Write the words pairs proximities directly into LMDB to speedup indexing
This commit is contained in:
		| @@ -325,7 +325,6 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { | ||||
|         enum DatabaseType { | ||||
|             Main, | ||||
|             WordDocids, | ||||
|             WordsPairsProximitiesDocids, | ||||
|         } | ||||
|  | ||||
|         let searchable_fields: HashSet<_> = match self.index.searchable_fields(self.wtxn)? { | ||||
| @@ -352,7 +351,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { | ||||
|             }, | ||||
|         }; | ||||
|  | ||||
|         let (receiver, docid_word_positions_readers, documents_readers) = pool.install(|| { | ||||
|         let readers = pool.install(|| { | ||||
|             let num_threads = rayon::current_num_threads(); | ||||
|             let max_memory_by_job = max_memory.map(|mm| mm / num_threads); | ||||
|  | ||||
| @@ -405,18 +404,13 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { | ||||
|  | ||||
|             // The enum and the channel which is used to transfert | ||||
|             // the readers merges potentially done on another thread. | ||||
|             let (sender, receiver) = sync_channel(3); | ||||
|             let (sender, receiver) = sync_channel(2); | ||||
|  | ||||
|             debug!("Merging the main, word docids and words pairs proximity docids in parallel..."); | ||||
|             rayon::spawn(move || { | ||||
|                 vec![ | ||||
|                     (DatabaseType::Main, main_readers, main_merge as MergeFn), | ||||
|                     (DatabaseType::WordDocids, word_docids_readers, word_docids_merge), | ||||
|                     ( | ||||
|                         DatabaseType::WordsPairsProximitiesDocids, | ||||
|                         words_pairs_proximities_docids_readers, | ||||
|                         words_pairs_proximities_docids_merge, | ||||
|                     ), | ||||
|                 ] | ||||
|                 .into_par_iter() | ||||
|                 .for_each(|(dbtype, readers, merge)| { | ||||
| @@ -427,9 +421,21 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { | ||||
|                 }); | ||||
|             }); | ||||
|  | ||||
|             Ok((receiver, docid_word_positions_readers, documents_readers)) as anyhow::Result<_> | ||||
|             Ok(( | ||||
|                 receiver, | ||||
|                 docid_word_positions_readers, | ||||
|                 documents_readers, | ||||
|                 words_pairs_proximities_docids_readers, | ||||
|             )) as anyhow::Result<_> | ||||
|         })?; | ||||
|  | ||||
|         let ( | ||||
|             receiver, | ||||
|             docid_word_positions_readers, | ||||
|             documents_readers, | ||||
|             words_pairs_proximities_docids_readers, | ||||
|         ) = readers; | ||||
|  | ||||
|         let mut documents_ids = self.index.documents_ids(self.wtxn)?; | ||||
|         let contains_documents = !documents_ids.is_empty(); | ||||
|         let write_method = if contains_documents { | ||||
| @@ -472,6 +478,15 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { | ||||
|             write_method | ||||
|         )?; | ||||
|  | ||||
|         debug!("Writing the words pairs proximities docids into LMDB on disk..."); | ||||
|         merge_into_lmdb_database( | ||||
|             self.wtxn, | ||||
|             *self.index.word_pair_proximity_docids.as_polymorph(), | ||||
|             words_pairs_proximities_docids_readers, | ||||
|             words_pairs_proximities_docids_merge, | ||||
|             write_method, | ||||
|         )?; | ||||
|  | ||||
|         for (db_type, result) in receiver { | ||||
|             let content = result?; | ||||
|             match db_type { | ||||
| @@ -496,17 +511,6 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { | ||||
|                         write_method, | ||||
|                     )?; | ||||
|                 }, | ||||
|                 DatabaseType::WordsPairsProximitiesDocids => { | ||||
|                     debug!("Writing the words pairs proximities docids into LMDB on disk..."); | ||||
|                     let db = *self.index.word_pair_proximity_docids.as_polymorph(); | ||||
|                     write_into_lmdb_database( | ||||
|                         self.wtxn, | ||||
|                         db, | ||||
|                         content, | ||||
|                         words_pairs_proximities_docids_merge, | ||||
|                         write_method, | ||||
|                     )?; | ||||
|                 }, | ||||
|             } | ||||
|         } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user