mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 09:56:28 +00:00 
			
		
		
		
	Merge #579
579: Stop reindexing already indexed documents r=ManyTheFish a=irevoire ``` % ./compare.sh indexing_stop-reindexing-unchanged-documents_cb5a1669.json indexing_main_eeba1960.json group indexing_main_eeba1960 indexing_stop-reindexing-unchanged-documents_cb5a1669 ----- ---------------------- ----------------------------------------------------- indexing/-geo-delete-facetedNumber-facetedGeo-searchable- 1.03 2.0±0.22ms ? ?/sec 1.00 1955.4±336.24µs ? ?/sec indexing/-movies-delete-facetedString-facetedNumber-searchable- 1.08 11.0±2.93ms ? ?/sec 1.00 10.2±4.04ms ? ?/sec indexing/-movies-delete-facetedString-facetedNumber-searchable-nested- 1.00 15.1±3.89ms ? ?/sec 1.14 17.1±5.18ms ? ?/sec indexing/-songs-delete-facetedString-facetedNumber-searchable- 1.26 59.2±12.01ms ? ?/sec 1.00 47.1±8.52ms ? ?/sec indexing/-wiki-delete-searchable- 1.08 316.6±31.53ms ? ?/sec 1.00 293.6±17.00ms ? ?/sec indexing/Indexing geo_point 1.01 60.9±0.31s ? ?/sec 1.00 60.6±0.36s ? ?/sec indexing/Indexing movies in three batches 1.04 20.0±0.30s ? ?/sec 1.00 19.2±0.25s ? ?/sec indexing/Indexing movies with default settings 1.02 19.1±0.18s ? ?/sec 1.00 18.7±0.24s ? ?/sec indexing/Indexing nested movies with default settings 1.02 26.2±0.29s ? ?/sec 1.00 25.9±0.22s ? ?/sec indexing/Indexing nested movies without any facets 1.02 25.3±0.32s ? ?/sec 1.00 24.7±0.26s ? ?/sec indexing/Indexing songs in three batches with default settings 1.00 66.7±0.41s ? ?/sec 1.01 67.1±0.86s ? ?/sec indexing/Indexing songs with default settings 1.00 58.3±0.90s ? ?/sec 1.01 58.8±1.32s ? ?/sec indexing/Indexing songs without any facets 1.00 54.5±1.43s ? ?/sec 1.01 55.2±1.29s ? ?/sec indexing/Indexing songs without faceted numbers 1.00 57.9±1.20s ? ?/sec 1.01 58.4±0.93s ? ?/sec indexing/Indexing wiki 1.00 1052.0±10.95s ? ?/sec 1.02 1069.4±20.38s ? ?/sec indexing/Indexing wiki in three batches 1.00 1193.1±8.83s ? ?/sec 1.00 1189.5±9.40s ? ?/sec indexing/Reindexing geo_point 3.22 67.5±0.73s ? ?/sec 1.00 21.0±0.16s ? ?/sec indexing/Reindexing movies with default settings 3.75 19.4±0.28s ? ?/sec 1.00 5.2±0.05s ? ?/sec indexing/Reindexing songs with default settings 8.90 61.4±0.91s ? ?/sec 1.00 6.9±0.07s ? ?/sec indexing/Reindexing wiki 1.00 1748.2±35.68s ? ?/sec 1.00 1750.5±18.53s ? ?/sec ``` tldr: We do not lose any performance on the normal indexing benchmark, but we get between 3 and 8 times faster on the reindexing benchmarks 👍 Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
		@@ -200,24 +200,26 @@ impl<'a, 'i> Transform<'a, 'i> {
 | 
			
		||||
 | 
			
		||||
            let mut original_docid = None;
 | 
			
		||||
 | 
			
		||||
            let docid = match self.new_external_documents_ids_builder.entry(external_id.into()) {
 | 
			
		||||
                Entry::Occupied(entry) => *entry.get() as u32,
 | 
			
		||||
                Entry::Vacant(entry) => {
 | 
			
		||||
                    // If the document was already in the db we mark it as a replaced document.
 | 
			
		||||
                    // It'll be deleted later. We keep its original docid to insert it in the grenad.
 | 
			
		||||
                    if let Some(docid) = external_documents_ids.get(entry.key()) {
 | 
			
		||||
                        self.replaced_documents_ids.insert(docid);
 | 
			
		||||
                        original_docid = Some(docid);
 | 
			
		||||
            let docid =
 | 
			
		||||
                match self.new_external_documents_ids_builder.entry(external_id.clone().into()) {
 | 
			
		||||
                    Entry::Occupied(entry) => *entry.get() as u32,
 | 
			
		||||
                    Entry::Vacant(entry) => {
 | 
			
		||||
                        // If the document was already in the db we mark it as a replaced document.
 | 
			
		||||
                        // It'll be deleted later. We keep its original docid to insert it in the grenad.
 | 
			
		||||
                        if let Some(docid) = external_documents_ids.get(entry.key()) {
 | 
			
		||||
                            self.replaced_documents_ids.insert(docid);
 | 
			
		||||
                            original_docid = Some(docid);
 | 
			
		||||
                        }
 | 
			
		||||
                        let docid = self
 | 
			
		||||
                            .available_documents_ids
 | 
			
		||||
                            .next()
 | 
			
		||||
                            .ok_or(UserError::DocumentLimitReached)?;
 | 
			
		||||
                        entry.insert(docid as u64);
 | 
			
		||||
                        docid
 | 
			
		||||
                    }
 | 
			
		||||
                    let docid = self
 | 
			
		||||
                        .available_documents_ids
 | 
			
		||||
                        .next()
 | 
			
		||||
                        .ok_or(UserError::DocumentLimitReached)?;
 | 
			
		||||
                    entry.insert(docid as u64);
 | 
			
		||||
                    docid
 | 
			
		||||
                }
 | 
			
		||||
            };
 | 
			
		||||
                };
 | 
			
		||||
 | 
			
		||||
            let mut skip_insertion = false;
 | 
			
		||||
            if let Some(original_docid) = original_docid {
 | 
			
		||||
                let original_key = BEU32::new(original_docid);
 | 
			
		||||
                let base_obkv = self
 | 
			
		||||
@@ -230,24 +232,39 @@ impl<'a, 'i> Transform<'a, 'i> {
 | 
			
		||||
                        key: None,
 | 
			
		||||
                    })?;
 | 
			
		||||
 | 
			
		||||
                // we associate the base document with the new key, everything will get merged later.
 | 
			
		||||
                self.original_sorter.insert(&docid.to_be_bytes(), base_obkv)?;
 | 
			
		||||
                match self.flatten_from_fields_ids_map(KvReader::new(&base_obkv))? {
 | 
			
		||||
                    Some(buffer) => self.flattened_sorter.insert(docid.to_be_bytes(), &buffer)?,
 | 
			
		||||
                    None => self.flattened_sorter.insert(docid.to_be_bytes(), base_obkv)?,
 | 
			
		||||
                // we check if the two documents are exactly equal. If it's the case we can skip this document entirely
 | 
			
		||||
                if base_obkv == obkv_buffer {
 | 
			
		||||
                    // we're not replacing anything
 | 
			
		||||
                    self.replaced_documents_ids.remove(original_docid);
 | 
			
		||||
                    // and we need to put back the original id as it was before
 | 
			
		||||
                    self.new_external_documents_ids_builder.remove(&*external_id);
 | 
			
		||||
                    skip_insertion = true;
 | 
			
		||||
                } else {
 | 
			
		||||
                    // we associate the base document with the new key, everything will get merged later.
 | 
			
		||||
                    self.original_sorter.insert(&docid.to_be_bytes(), base_obkv)?;
 | 
			
		||||
                    match self.flatten_from_fields_ids_map(KvReader::new(&base_obkv))? {
 | 
			
		||||
                        Some(buffer) => {
 | 
			
		||||
                            self.flattened_sorter.insert(docid.to_be_bytes(), &buffer)?
 | 
			
		||||
                        }
 | 
			
		||||
                        None => self.flattened_sorter.insert(docid.to_be_bytes(), base_obkv)?,
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            } else {
 | 
			
		||||
                self.new_documents_ids.insert(docid);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // We use the extracted/generated user id as the key for this document.
 | 
			
		||||
            self.original_sorter.insert(&docid.to_be_bytes(), &obkv_buffer)?;
 | 
			
		||||
            documents_count += 1;
 | 
			
		||||
            if !skip_insertion {
 | 
			
		||||
                // We use the extracted/generated user id as the key for this document.
 | 
			
		||||
                self.original_sorter.insert(&docid.to_be_bytes(), obkv_buffer.clone())?;
 | 
			
		||||
 | 
			
		||||
            match self.flatten_from_fields_ids_map(KvReader::new(&obkv_buffer))? {
 | 
			
		||||
                Some(buffer) => self.flattened_sorter.insert(docid.to_be_bytes(), &buffer)?,
 | 
			
		||||
                None => self.flattened_sorter.insert(docid.to_be_bytes(), &obkv_buffer)?,
 | 
			
		||||
                match self.flatten_from_fields_ids_map(KvReader::new(&obkv_buffer))? {
 | 
			
		||||
                    Some(buffer) => self.flattened_sorter.insert(docid.to_be_bytes(), &buffer)?,
 | 
			
		||||
                    None => {
 | 
			
		||||
                        self.flattened_sorter.insert(docid.to_be_bytes(), obkv_buffer.clone())?
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            documents_count += 1;
 | 
			
		||||
 | 
			
		||||
            progress_callback(UpdateIndexingStep::RemapDocumentAddition {
 | 
			
		||||
                documents_seen: documents_count,
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user