mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 16:06:31 +00:00 
			
		
		
		
	Use smartstring to store the external id in our hashmap
We need to store all the external id (primary key) in a hashmap associated to their internal id during. The smartstring remove heap allocation / memory usage and should improve the cache locality.
This commit is contained in:
		| @@ -32,6 +32,7 @@ serde_json = { version = "1.0.79", features = ["preserve_order"] } | ||||
| slice-group-by = "0.3.0" | ||||
| smallstr =  { version = "0.3.0", features = ["serde"] } | ||||
| smallvec = "1.8.0" | ||||
| smartstring = "1.0.1" | ||||
| tempfile = "3.3.0" | ||||
| time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] } | ||||
| uuid = { version = "0.8.2", features = ["v4"] } | ||||
|   | ||||
| @@ -1109,8 +1109,11 @@ mod tests { | ||||
|  | ||||
|         let mut big_object = HashMap::new(); | ||||
|         big_object.insert(S("id"), "wow"); | ||||
|         let content: String = | ||||
|             (0..=u16::MAX).into_iter().map(|p| p.to_string()).reduce(|a, b| a + " " + &b).unwrap(); | ||||
|         let content: String = (0..=u16::MAX) | ||||
|             .into_iter() | ||||
|             .map(|p| p.to_string()) | ||||
|             .reduce(|a, b| a + " " + b.as_ref()) | ||||
|             .unwrap(); | ||||
|         big_object.insert("content".to_string(), &content); | ||||
|  | ||||
|         let mut cursor = Cursor::new(Vec::new()); | ||||
|   | ||||
| @@ -11,6 +11,7 @@ use itertools::Itertools; | ||||
| use obkv::{KvReader, KvWriter}; | ||||
| use roaring::RoaringBitmap; | ||||
| use serde_json::{Map, Value}; | ||||
| use smartstring::SmartString; | ||||
|  | ||||
| use super::helpers::{create_sorter, create_writer, keep_latest_obkv, merge_obkvs, MergeFn}; | ||||
| use super::{IndexDocumentsMethod, IndexerConfig}; | ||||
| @@ -55,7 +56,8 @@ pub struct Transform<'a, 'i> { | ||||
|     flattened_sorter: grenad::Sorter<MergeFn>, | ||||
|     replaced_documents_ids: RoaringBitmap, | ||||
|     new_documents_ids: RoaringBitmap, | ||||
|     new_external_documents_ids_builder: FxHashMap<Vec<u8>, u64>, | ||||
|     // To increase the cache locality and the heap usage we use smartstring. | ||||
|     new_external_documents_ids_builder: FxHashMap<SmartString<smartstring::Compact>, u64>, | ||||
|     documents_count: usize, | ||||
| } | ||||
|  | ||||
| @@ -254,10 +256,7 @@ impl<'a, 'i> Transform<'a, 'i> { | ||||
|                     None => { | ||||
|                         // if the document has already been inserted in this | ||||
|                         // batch we need to get its docid | ||||
|                         match self | ||||
|                             .new_external_documents_ids_builder | ||||
|                             .entry(external_id.as_bytes().to_vec()) | ||||
|                         { | ||||
|                         match self.new_external_documents_ids_builder.entry(external_id.into()) { | ||||
|                             Entry::Occupied(entry) => (*entry.get() as u32, false), | ||||
|                             // if the document has never been encountered we give it a new docid | ||||
|                             // and push this new docid to the external documents ids builder | ||||
|   | ||||
		Reference in New Issue
	
	Block a user