mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 09:56:28 +00:00 
			
		
		
		
	Use smartstring to store the external id in our hashmap
We need to store all the external id (primary key) in a hashmap associated to their internal id during. The smartstring remove heap allocation / memory usage and should improve the cache locality.
This commit is contained in:
		@@ -1109,8 +1109,11 @@ mod tests {
 | 
			
		||||
 | 
			
		||||
        let mut big_object = HashMap::new();
 | 
			
		||||
        big_object.insert(S("id"), "wow");
 | 
			
		||||
        let content: String =
 | 
			
		||||
            (0..=u16::MAX).into_iter().map(|p| p.to_string()).reduce(|a, b| a + " " + &b).unwrap();
 | 
			
		||||
        let content: String = (0..=u16::MAX)
 | 
			
		||||
            .into_iter()
 | 
			
		||||
            .map(|p| p.to_string())
 | 
			
		||||
            .reduce(|a, b| a + " " + b.as_ref())
 | 
			
		||||
            .unwrap();
 | 
			
		||||
        big_object.insert("content".to_string(), &content);
 | 
			
		||||
 | 
			
		||||
        let mut cursor = Cursor::new(Vec::new());
 | 
			
		||||
 
 | 
			
		||||
@@ -11,6 +11,7 @@ use itertools::Itertools;
 | 
			
		||||
use obkv::{KvReader, KvWriter};
 | 
			
		||||
use roaring::RoaringBitmap;
 | 
			
		||||
use serde_json::{Map, Value};
 | 
			
		||||
use smartstring::SmartString;
 | 
			
		||||
 | 
			
		||||
use super::helpers::{create_sorter, create_writer, keep_latest_obkv, merge_obkvs, MergeFn};
 | 
			
		||||
use super::{IndexDocumentsMethod, IndexerConfig};
 | 
			
		||||
@@ -55,7 +56,8 @@ pub struct Transform<'a, 'i> {
 | 
			
		||||
    flattened_sorter: grenad::Sorter<MergeFn>,
 | 
			
		||||
    replaced_documents_ids: RoaringBitmap,
 | 
			
		||||
    new_documents_ids: RoaringBitmap,
 | 
			
		||||
    new_external_documents_ids_builder: FxHashMap<Vec<u8>, u64>,
 | 
			
		||||
    // To increase the cache locality and the heap usage we use smartstring.
 | 
			
		||||
    new_external_documents_ids_builder: FxHashMap<SmartString<smartstring::Compact>, u64>,
 | 
			
		||||
    documents_count: usize,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -254,10 +256,7 @@ impl<'a, 'i> Transform<'a, 'i> {
 | 
			
		||||
                    None => {
 | 
			
		||||
                        // if the document has already been inserted in this
 | 
			
		||||
                        // batch we need to get its docid
 | 
			
		||||
                        match self
 | 
			
		||||
                            .new_external_documents_ids_builder
 | 
			
		||||
                            .entry(external_id.as_bytes().to_vec())
 | 
			
		||||
                        {
 | 
			
		||||
                        match self.new_external_documents_ids_builder.entry(external_id.into()) {
 | 
			
		||||
                            Entry::Occupied(entry) => (*entry.get() as u32, false),
 | 
			
		||||
                            // if the document has never been encountered we give it a new docid
 | 
			
		||||
                            // and push this new docid to the external documents ids builder
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user