mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Improve documents deletion by iterating over all the word pair positions
This commit is contained in:
		| @@ -2,7 +2,6 @@ use std::borrow::Cow; | |||||||
| use std::convert::TryFrom; | use std::convert::TryFrom; | ||||||
|  |  | ||||||
| use fst::{IntoStreamer, Streamer}; | use fst::{IntoStreamer, Streamer}; | ||||||
| use itertools::Itertools; |  | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| use crate::{Index, BEU32}; | use crate::{Index, BEU32}; | ||||||
| @@ -168,21 +167,17 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { | |||||||
|         // We write the new words FST into the main database. |         // We write the new words FST into the main database. | ||||||
|         self.index.put_words_fst(self.wtxn, &new_words_fst)?; |         self.index.put_words_fst(self.wtxn, &new_words_fst)?; | ||||||
|  |  | ||||||
|         // We delete the documents ids that are under the pairs of words we found. |         // We delete the documents ids that are under the pairs of words, | ||||||
|         // TODO We can maybe improve this by using the `compute_words_pair_proximities` |         // it is faster and use no memory to iterate over all the words pairs than | ||||||
|         //      function instead of iterating over all the possible word pairs. |         // to compute the cartesian product of every words of the deleted documents. | ||||||
|         for ((w1, _), (w2, _)) in words.iter().cartesian_product(&words) { |         let mut iter = word_pair_proximity_docids.iter_mut(self.wtxn)?; | ||||||
|             let start = &(w1.as_str(), w2.as_str(), 0); |         while let Some(result) = iter.next() { | ||||||
|             let end = &(w1.as_str(), w2.as_str(), 7); |             let ((w1, w2, prox), mut docids) = result?; | ||||||
|             let mut iter = word_pair_proximity_docids.range_mut(self.wtxn, &(start..=end))?; |             docids.difference_with(&documents_ids); | ||||||
|             while let Some(result) = iter.next() { |             if docids.is_empty() { | ||||||
|                 let ((w1, w2, prox), mut docids) = result?; |                 iter.del_current()?; | ||||||
|                 docids.difference_with(&documents_ids); |             } else { | ||||||
|                 if docids.is_empty() { |                 iter.put_current(&(w1, w2, prox), &docids)?; | ||||||
|                     iter.del_current()?; |  | ||||||
|                 } else { |  | ||||||
|                     iter.put_current(&(w1, w2, prox), &docids)?; |  | ||||||
|                 } |  | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user