mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Introduce the ConcurrentAvailableIds struct and rename the other to AvailableIds
This commit is contained in:
		| @@ -3,12 +3,12 @@ use std::ops::RangeInclusive; | |||||||
| 
 | 
 | ||||||
| use roaring::bitmap::{IntoIter, RoaringBitmap}; | use roaring::bitmap::{IntoIter, RoaringBitmap}; | ||||||
| 
 | 
 | ||||||
| pub struct AvailableDocumentsIds { | pub struct AvailableIds { | ||||||
|     iter: Chain<IntoIter, RangeInclusive<u32>>, |     iter: Chain<IntoIter, RangeInclusive<u32>>, | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| impl AvailableDocumentsIds { | impl AvailableIds { | ||||||
|     pub fn from_documents_ids(docids: &RoaringBitmap) -> AvailableDocumentsIds { |     pub fn new(docids: &RoaringBitmap) -> AvailableIds { | ||||||
|         match docids.max() { |         match docids.max() { | ||||||
|             Some(last_id) => { |             Some(last_id) => { | ||||||
|                 let mut available = RoaringBitmap::from_iter(0..last_id); |                 let mut available = RoaringBitmap::from_iter(0..last_id); | ||||||
| @@ -20,17 +20,17 @@ impl AvailableDocumentsIds { | |||||||
|                     None => 1..=0, // empty range iterator
 |                     None => 1..=0, // empty range iterator
 | ||||||
|                 }; |                 }; | ||||||
| 
 | 
 | ||||||
|                 AvailableDocumentsIds { iter: available.into_iter().chain(iter) } |                 AvailableIds { iter: available.into_iter().chain(iter) } | ||||||
|             } |             } | ||||||
|             None => { |             None => { | ||||||
|                 let empty = RoaringBitmap::new().into_iter(); |                 let empty = RoaringBitmap::new().into_iter(); | ||||||
|                 AvailableDocumentsIds { iter: empty.chain(0..=u32::MAX) } |                 AvailableIds { iter: empty.chain(0..=u32::MAX) } | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| impl Iterator for AvailableDocumentsIds { | impl Iterator for AvailableIds { | ||||||
|     type Item = u32; |     type Item = u32; | ||||||
| 
 | 
 | ||||||
|     fn next(&mut self) -> Option<Self::Item> { |     fn next(&mut self) -> Option<Self::Item> { | ||||||
| @@ -45,7 +45,7 @@ mod tests { | |||||||
|     #[test] |     #[test] | ||||||
|     fn empty() { |     fn empty() { | ||||||
|         let base = RoaringBitmap::new(); |         let base = RoaringBitmap::new(); | ||||||
|         let left = AvailableDocumentsIds::from_documents_ids(&base); |         let left = AvailableIds::new(&base); | ||||||
|         let right = 0..=u32::MAX; |         let right = 0..=u32::MAX; | ||||||
|         left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r)); |         left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r)); | ||||||
|     } |     } | ||||||
| @@ -58,7 +58,7 @@ mod tests { | |||||||
|         base.insert(100); |         base.insert(100); | ||||||
|         base.insert(405); |         base.insert(405); | ||||||
| 
 | 
 | ||||||
|         let left = AvailableDocumentsIds::from_documents_ids(&base); |         let left = AvailableIds::new(&base); | ||||||
|         let right = (0..=u32::MAX).filter(|&n| n != 0 && n != 10 && n != 100 && n != 405); |         let right = (0..=u32::MAX).filter(|&n| n != 0 && n != 10 && n != 100 && n != 405); | ||||||
|         left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r)); |         left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r)); | ||||||
|     } |     } | ||||||
							
								
								
									
										59
									
								
								milli/src/update/concurrent_available_ids.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								milli/src/update/concurrent_available_ids.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,59 @@ | |||||||
|  | use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering}; | ||||||
|  |  | ||||||
|  | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
|  | /// A concurrent ID generate that will never return the same ID twice. | ||||||
|  | #[derive(Debug)] | ||||||
|  | pub struct ConcurrentAvailableIds { | ||||||
|  |     /// The current tree node ID we should use if there is no other IDs available. | ||||||
|  |     current: AtomicU32, | ||||||
|  |     /// The total number of tree node IDs used. | ||||||
|  |     used: AtomicU64, | ||||||
|  |  | ||||||
|  |     /// A list of IDs to exhaust before picking IDs from `current`. | ||||||
|  |     available: RoaringBitmap, | ||||||
|  |     /// The current Nth ID to select in the bitmap. | ||||||
|  |     select_in_bitmap: AtomicU32, | ||||||
|  |     /// Tells if you should look in the roaring bitmap or if all the IDs are already exhausted. | ||||||
|  |     look_into_bitmap: AtomicBool, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl ConcurrentAvailableIds { | ||||||
|  |     /// Creates an ID generator returning unique IDs, avoiding the specified used IDs. | ||||||
|  |     pub fn new(used: RoaringBitmap) -> ConcurrentAvailableIds { | ||||||
|  |         let last_id = used.max().map_or(0, |id| id + 1); | ||||||
|  |         let used_ids = used.len(); | ||||||
|  |         let available = RoaringBitmap::from_sorted_iter(0..last_id).unwrap() - used; | ||||||
|  |  | ||||||
|  |         ConcurrentAvailableIds { | ||||||
|  |             current: AtomicU32::new(last_id), | ||||||
|  |             used: AtomicU64::new(used_ids), | ||||||
|  |             select_in_bitmap: AtomicU32::new(0), | ||||||
|  |             look_into_bitmap: AtomicBool::new(!available.is_empty()), | ||||||
|  |             available, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Returns a new unique ID and increase the count of IDs used. | ||||||
|  |     pub fn next(&self) -> Option<u32> { | ||||||
|  |         if self.used.fetch_add(1, Ordering::Relaxed) > u32::MAX as u64 { | ||||||
|  |             None | ||||||
|  |         } else if self.look_into_bitmap.load(Ordering::Relaxed) { | ||||||
|  |             let current = self.select_in_bitmap.fetch_add(1, Ordering::Relaxed); | ||||||
|  |             match self.available.select(current) { | ||||||
|  |                 Some(id) => Some(id), | ||||||
|  |                 None => { | ||||||
|  |                     self.look_into_bitmap.store(false, Ordering::Relaxed); | ||||||
|  |                     Some(self.current.fetch_add(1, Ordering::Relaxed)) | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } else { | ||||||
|  |             Some(self.current.fetch_add(1, Ordering::Relaxed)) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Returns the number of used ids in total. | ||||||
|  |     pub fn used(&self) -> u64 { | ||||||
|  |         self.used.load(Ordering::Relaxed) | ||||||
|  |     } | ||||||
|  | } | ||||||
| @@ -27,7 +27,7 @@ use crate::update::del_add::{ | |||||||
| }; | }; | ||||||
| use crate::update::index_documents::GrenadParameters; | use crate::update::index_documents::GrenadParameters; | ||||||
| use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; | use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; | ||||||
| use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; | use crate::update::{AvailableIds, UpdateIndexingStep}; | ||||||
| use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; | use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; | ||||||
| use crate::vector::settings::{EmbedderAction, WriteBackToDocuments}; | use crate::vector::settings::{EmbedderAction, WriteBackToDocuments}; | ||||||
| use crate::{ | use crate::{ | ||||||
| @@ -55,7 +55,7 @@ pub struct Transform<'a, 'i> { | |||||||
|  |  | ||||||
|     indexer_settings: &'a IndexerConfig, |     indexer_settings: &'a IndexerConfig, | ||||||
|     pub index_documents_method: IndexDocumentsMethod, |     pub index_documents_method: IndexDocumentsMethod, | ||||||
|     available_documents_ids: AvailableDocumentsIds, |     available_documents_ids: AvailableIds, | ||||||
|  |  | ||||||
|     // Both grenad follows the same format: |     // Both grenad follows the same format: | ||||||
|     // key | value |     // key | value | ||||||
| @@ -143,7 +143,7 @@ impl<'a, 'i> Transform<'a, 'i> { | |||||||
|             index, |             index, | ||||||
|             fields_ids_map: index.fields_ids_map(wtxn)?, |             fields_ids_map: index.fields_ids_map(wtxn)?, | ||||||
|             indexer_settings, |             indexer_settings, | ||||||
|             available_documents_ids: AvailableDocumentsIds::from_documents_ids(&documents_ids), |             available_documents_ids: AvailableIds::new(&documents_ids), | ||||||
|             original_sorter, |             original_sorter, | ||||||
|             flattened_sorter, |             flattened_sorter, | ||||||
|             index_documents_method, |             index_documents_method, | ||||||
|   | |||||||
| @@ -1,5 +1,6 @@ | |||||||
| pub use self::available_documents_ids::AvailableDocumentsIds; | pub use self::available_ids::AvailableIds; | ||||||
| pub use self::clear_documents::ClearDocuments; | pub use self::clear_documents::ClearDocuments; | ||||||
|  | pub use self::concurrent_available_ids::ConcurrentAvailableIds; | ||||||
| pub use self::facet::bulk::FacetsUpdateBulk; | pub use self::facet::bulk::FacetsUpdateBulk; | ||||||
| pub use self::facet::incremental::FacetsUpdateIncrementalInner; | pub use self::facet::incremental::FacetsUpdateIncrementalInner; | ||||||
| pub use self::index_documents::*; | pub use self::index_documents::*; | ||||||
| @@ -10,8 +11,9 @@ pub use self::word_prefix_docids::WordPrefixDocids; | |||||||
| pub use self::words_prefix_integer_docids::WordPrefixIntegerDocids; | pub use self::words_prefix_integer_docids::WordPrefixIntegerDocids; | ||||||
| pub use self::words_prefixes_fst::WordsPrefixesFst; | pub use self::words_prefixes_fst::WordsPrefixesFst; | ||||||
|  |  | ||||||
| mod available_documents_ids; | mod available_ids; | ||||||
| mod clear_documents; | mod clear_documents; | ||||||
|  | mod concurrent_available_ids; | ||||||
| pub(crate) mod del_add; | pub(crate) mod del_add; | ||||||
| pub(crate) mod facet; | pub(crate) mod facet; | ||||||
| mod index_documents; | mod index_documents; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user