mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-30 23:46:28 +00:00 
			
		
		
		
	Merge #5131
	
		
			
	
		
	
	
		
	
		
			Some checks failed
		
		
	
	
		
			
				
	
				Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 21s
				
					
					
				
			
		
			
				
	
				Test suite / Tests on ubuntu-20.04 (push) Failing after 10s
				
					
					
				
			
		
			
				
	
				Test suite / Tests almost all features (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Test suite / Test disabled tokenization (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Test suite / Run tests in debug (push) Failing after 10s
				
					
					
				
			
		
			
				
	
				Test suite / Run Rustfmt (push) Successful in 1m25s
				
					
					
				
			
		
			
				
	
				Test suite / Run Clippy (push) Successful in 5m54s
				
					
					
				
			
		
			
				
	
				Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
				
					
					
				
			
		
		
	
	
				
					
				
			
		
			Some checks failed
		
		
	
	Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 21s
				Test suite / Tests on ubuntu-20.04 (push) Failing after 10s
				Test suite / Tests almost all features (push) Has been skipped
				Test suite / Test disabled tokenization (push) Has been skipped
				Test suite / Run tests in debug (push) Failing after 10s
				Test suite / Run Rustfmt (push) Successful in 1m25s
				Test suite / Run Clippy (push) Successful in 5m54s
				Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
				5131: Ignore documents whose selected fields didn't change r=dureuill a=dureuill Attempts to improve the new indexer performance by ignoring documents whose selected fields didn't change: - Add `Update::has_changed_for_fields` function - Ignore documents whose searchable attributes didn't change for word docids and word pair proximity extraction - Ignore documents whose faceted attributes didn't change for facet extraction Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
		| @@ -1,7 +1,10 @@ | ||||
| use bumpalo::Bump; | ||||
| use heed::RoTxn; | ||||
|  | ||||
| use super::document::{DocumentFromDb, DocumentFromVersions, MergedDocument, Versions}; | ||||
| use super::document::{ | ||||
|     Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions, | ||||
| }; | ||||
| use super::extract::perm_json_p; | ||||
| use super::vector_document::{ | ||||
|     MergedVectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions, | ||||
| }; | ||||
| @@ -164,6 +167,80 @@ impl<'doc> Update<'doc> { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Returns whether the updated version of the document is different from the current version for the passed subset of fields. | ||||
|     /// | ||||
|     /// `true` if at least one top-level-field that is a exactly a member of field or a parent of a member of field changed. | ||||
|     /// Otherwise `false`. | ||||
|     pub fn has_changed_for_fields<'t, Mapper: FieldIdMapper>( | ||||
|         &self, | ||||
|         fields: Option<&[&str]>, | ||||
|         rtxn: &'t RoTxn, | ||||
|         index: &'t Index, | ||||
|         mapper: &'t Mapper, | ||||
|     ) -> Result<bool> { | ||||
|         let mut changed = false; | ||||
|         let mut cached_current = None; | ||||
|         let mut updated_selected_field_count = 0; | ||||
|  | ||||
|         for entry in self.updated().iter_top_level_fields() { | ||||
|             let (key, updated_value) = entry?; | ||||
|  | ||||
|             if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip { | ||||
|                 continue; | ||||
|             } | ||||
|  | ||||
|             updated_selected_field_count += 1; | ||||
|             let current = match cached_current { | ||||
|                 Some(current) => current, | ||||
|                 None => self.current(rtxn, index, mapper)?, | ||||
|             }; | ||||
|             let current_value = current.top_level_field(key)?; | ||||
|             let Some(current_value) = current_value else { | ||||
|                 changed = true; | ||||
|                 break; | ||||
|             }; | ||||
|  | ||||
|             if current_value.get() != updated_value.get() { | ||||
|                 changed = true; | ||||
|                 break; | ||||
|             } | ||||
|             cached_current = Some(current); | ||||
|         } | ||||
|  | ||||
|         if !self.has_deletion { | ||||
|             // no field deletion, so fields that don't appear in `updated` cannot have changed | ||||
|             return Ok(changed); | ||||
|         } | ||||
|  | ||||
|         if changed { | ||||
|             return Ok(true); | ||||
|         } | ||||
|  | ||||
|         // we saw all updated fields, and set `changed` if any field wasn't in `current`. | ||||
|         // so if there are as many fields in `current` as in `updated`, then nothing changed. | ||||
|         // If there is any more fields in `current`, then they are missing in `updated`. | ||||
|         let has_deleted_fields = { | ||||
|             let current = match cached_current { | ||||
|                 Some(current) => current, | ||||
|                 None => self.current(rtxn, index, mapper)?, | ||||
|             }; | ||||
|  | ||||
|             let mut current_selected_field_count = 0; | ||||
|             for entry in current.iter_top_level_fields() { | ||||
|                 let (key, _) = entry?; | ||||
|  | ||||
|                 if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip { | ||||
|                     continue; | ||||
|                 } | ||||
|                 current_selected_field_count += 1; | ||||
|             } | ||||
|  | ||||
|             current_selected_field_count != updated_selected_field_count | ||||
|         }; | ||||
|  | ||||
|         Ok(has_deleted_fields) | ||||
|     } | ||||
|  | ||||
|     pub fn updated_vectors( | ||||
|         &self, | ||||
|         doc_alloc: &'doc Bump, | ||||
|   | ||||
| @@ -97,6 +97,15 @@ impl FacetedDocidsExtractor { | ||||
|                 }, | ||||
|             ), | ||||
|             DocumentChange::Update(inner) => { | ||||
|                 if !inner.has_changed_for_fields( | ||||
|                     Some(attributes_to_extract), | ||||
|                     rtxn, | ||||
|                     index, | ||||
|                     context.db_fields_ids_map, | ||||
|                 )? { | ||||
|                     return Ok(()); | ||||
|                 } | ||||
|  | ||||
|                 extract_document_facets( | ||||
|                     attributes_to_extract, | ||||
|                     inner.current(rtxn, index, context.db_fields_ids_map)?, | ||||
|   | ||||
| @@ -351,6 +351,15 @@ impl WordDocidsExtractors { | ||||
|                 )?; | ||||
|             } | ||||
|             DocumentChange::Update(inner) => { | ||||
|                 if !inner.has_changed_for_fields( | ||||
|                     document_tokenizer.attribute_to_extract, | ||||
|                     &context.rtxn, | ||||
|                     context.index, | ||||
|                     context.db_fields_ids_map, | ||||
|                 )? { | ||||
|                     return Ok(()); | ||||
|                 } | ||||
|  | ||||
|                 let mut token_fn = |fname: &str, fid, pos, word: &str| { | ||||
|                     cached_sorter.insert_del_u32( | ||||
|                         fid, | ||||
|   | ||||
| @@ -70,6 +70,15 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor { | ||||
|                 )?; | ||||
|             } | ||||
|             DocumentChange::Update(inner) => { | ||||
|                 if !inner.has_changed_for_fields( | ||||
|                     document_tokenizer.attribute_to_extract, | ||||
|                     rtxn, | ||||
|                     index, | ||||
|                     context.db_fields_ids_map, | ||||
|                 )? { | ||||
|                     return Ok(()); | ||||
|                 } | ||||
|  | ||||
|                 let document = inner.current(rtxn, index, context.db_fields_ids_map)?; | ||||
|                 process_document_tokens( | ||||
|                     document, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user