mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Clean up and remove the non-sorted merge_caches function
This commit is contained in:
		| @@ -466,110 +466,13 @@ pub fn transpose_and_freeze_caches<'a, 'extractor>( | |||||||
|     Ok(bucket_caches) |     Ok(bucket_caches) | ||||||
| } | } | ||||||
|  |  | ||||||
| /// Merges the caches that must be all associated to the same bucket. | /// Merges the caches that must be all associated to the same bucket | ||||||
|  | /// but make sure to sort the different buckets before performing the merges. | ||||||
| /// | /// | ||||||
| /// # Panics | /// # Panics | ||||||
| /// | /// | ||||||
| /// - If the bucket IDs in these frozen caches are not exactly the same. | /// - If the bucket IDs in these frozen caches are not exactly the same. | ||||||
| pub fn merge_caches<F>(frozen: Vec<FrozenCache>, mut f: F) -> Result<()> | pub fn merge_caches_sorted<F>(frozen: Vec<FrozenCache>, mut f: F) -> Result<()> | ||||||
| where |  | ||||||
|     F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> Result<()>, |  | ||||||
| { |  | ||||||
|     let mut maps = Vec::new(); |  | ||||||
|     let mut readers = Vec::new(); |  | ||||||
|     let mut current_bucket = None; |  | ||||||
|     for FrozenCache { bucket, cache, ref mut spilled } in frozen { |  | ||||||
|         assert_eq!(*current_bucket.get_or_insert(bucket), bucket); |  | ||||||
|         maps.push(cache); |  | ||||||
|         readers.append(spilled); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // First manage the spilled entries by looking into the HashMaps, |  | ||||||
|     // merge them and mark them as dummy. |  | ||||||
|     let mut heap = BinaryHeap::new(); |  | ||||||
|     for (source_index, source) in readers.into_iter().enumerate() { |  | ||||||
|         let mut cursor = source.into_cursor()?; |  | ||||||
|         if cursor.move_on_next()?.is_some() { |  | ||||||
|             heap.push(Entry { cursor, source_index }); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     loop { |  | ||||||
|         let mut first_entry = match heap.pop() { |  | ||||||
|             Some(entry) => entry, |  | ||||||
|             None => break, |  | ||||||
|         }; |  | ||||||
|  |  | ||||||
|         let (first_key, first_value) = match first_entry.cursor.current() { |  | ||||||
|             Some((key, value)) => (key, value), |  | ||||||
|             None => break, |  | ||||||
|         }; |  | ||||||
|  |  | ||||||
|         let mut output = DelAddRoaringBitmap::from_bytes(first_value)?; |  | ||||||
|         while let Some(mut entry) = heap.peek_mut() { |  | ||||||
|             if let Some((key, _value)) = entry.cursor.current() { |  | ||||||
|                 if first_key == key { |  | ||||||
|                     let new = DelAddRoaringBitmap::from_bytes(first_value)?; |  | ||||||
|                     output = output.merge(new); |  | ||||||
|                     // When we are done we the current value of this entry move make |  | ||||||
|                     // it move forward and let the heap reorganize itself (on drop) |  | ||||||
|                     if entry.cursor.move_on_next()?.is_none() { |  | ||||||
|                         PeekMut::pop(entry); |  | ||||||
|                     } |  | ||||||
|                 } else { |  | ||||||
|                     break; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Once we merged all of the spilled bitmaps we must also |  | ||||||
|         // fetch the entries from the non-spilled entries (the HashMaps). |  | ||||||
|         for (map_index, map) in maps.iter_mut().enumerate() { |  | ||||||
|             if first_entry.source_index != map_index { |  | ||||||
|                 if let Some(new) = map.get_mut(first_key) { |  | ||||||
|                     output.union_and_clear_bbbul(new); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // We send the merged entry outside. |  | ||||||
|         (f)(first_key, output)?; |  | ||||||
|  |  | ||||||
|         // Don't forget to put the first entry back into the heap. |  | ||||||
|         if first_entry.cursor.move_on_next()?.is_some() { |  | ||||||
|             heap.push(first_entry) |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Then manage the content on the HashMap entries that weren't taken (mem::take). |  | ||||||
|     while let Some(mut map) = maps.pop() { |  | ||||||
|         for (key, bbbul) in map.iter_mut() { |  | ||||||
|             // Make sure we don't try to work with entries already managed by the spilled |  | ||||||
|             if bbbul.is_empty() { |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             let mut output = DelAddRoaringBitmap::empty(); |  | ||||||
|             output.union_and_clear_bbbul(bbbul); |  | ||||||
|  |  | ||||||
|             for rhs in maps.iter_mut() { |  | ||||||
|                 if let Some(new) = rhs.get_mut(key) { |  | ||||||
|                     output.union_and_clear_bbbul(new); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             // We send the merged entry outside. |  | ||||||
|             (f)(key, output)?; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     Ok(()) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /// Merges the caches that must be all associated to the same bucket. |  | ||||||
| /// |  | ||||||
| /// It merges entries like the `merge_caches` function |  | ||||||
| pub fn merge_caches_alt<F>(frozen: Vec<FrozenCache>, mut f: F) -> Result<()> |  | ||||||
| where | where | ||||||
|     F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> Result<()>, |     F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> Result<()>, | ||||||
| { | { | ||||||
|   | |||||||
| @@ -7,8 +7,7 @@ mod vectors; | |||||||
|  |  | ||||||
| use bumpalo::Bump; | use bumpalo::Bump; | ||||||
| pub use cache::{ | pub use cache::{ | ||||||
|     merge_caches, merge_caches_alt, transpose_and_freeze_caches, BalancedCaches, |     merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap, | ||||||
|     DelAddRoaringBitmap, |  | ||||||
| }; | }; | ||||||
| pub use documents::*; | pub use documents::*; | ||||||
| pub use faceted::*; | pub use faceted::*; | ||||||
|   | |||||||
| @@ -9,8 +9,8 @@ use roaring::RoaringBitmap; | |||||||
|  |  | ||||||
| use super::channel::*; | use super::channel::*; | ||||||
| use super::extract::{ | use super::extract::{ | ||||||
|     merge_caches, merge_caches_alt, transpose_and_freeze_caches, BalancedCaches, |     merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap, | ||||||
|     DelAddRoaringBitmap, FacetKind, GeoExtractorData, |     FacetKind, GeoExtractorData, | ||||||
| }; | }; | ||||||
| use crate::{CboRoaringBitmapCodec, FieldId, GeoPoint, Index, InternalError, Result}; | use crate::{CboRoaringBitmapCodec, FieldId, GeoPoint, Index, InternalError, Result}; | ||||||
|  |  | ||||||
| @@ -78,7 +78,7 @@ where | |||||||
|         if must_stop_processing() { |         if must_stop_processing() { | ||||||
|             return Err(InternalError::AbortedIndexation.into()); |             return Err(InternalError::AbortedIndexation.into()); | ||||||
|         } |         } | ||||||
|         merge_caches_alt(frozen, |key, DelAddRoaringBitmap { del, add }| { |         merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| { | ||||||
|             let current = database.get(&rtxn, key)?; |             let current = database.get(&rtxn, key)?; | ||||||
|             match merge_cbo_bitmaps(current, del, add)? { |             match merge_cbo_bitmaps(current, del, add)? { | ||||||
|                 Operation::Write(bitmap) => { |                 Operation::Write(bitmap) => { | ||||||
| @@ -107,7 +107,7 @@ pub fn merge_and_send_facet_docids<'extractor>( | |||||||
|         .map(|frozen| { |         .map(|frozen| { | ||||||
|             let mut facet_field_ids_delta = FacetFieldIdsDelta::default(); |             let mut facet_field_ids_delta = FacetFieldIdsDelta::default(); | ||||||
|             let rtxn = index.read_txn()?; |             let rtxn = index.read_txn()?; | ||||||
|             merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| { |             merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| { | ||||||
|                 let current = database.get_cbo_roaring_bytes_value(&rtxn, key)?; |                 let current = database.get_cbo_roaring_bytes_value(&rtxn, key)?; | ||||||
|                 match merge_cbo_bitmaps(current, del, add)? { |                 match merge_cbo_bitmaps(current, del, add)? { | ||||||
|                     Operation::Write(bitmap) => { |                     Operation::Write(bitmap) => { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user