mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 16:06:31 +00:00 
			
		
		
		
	Merge #5131
	
		
			
	
		
	
	
		
	
		
			Some checks failed
		
		
	
	
		
			
				
	
				Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 21s
				
					
					
				
			
		
			
				
	
				Test suite / Tests on ubuntu-20.04 (push) Failing after 10s
				
					
					
				
			
		
			
				
	
				Test suite / Tests almost all features (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Test suite / Test disabled tokenization (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Test suite / Run tests in debug (push) Failing after 10s
				
					
					
				
			
		
			
				
	
				Test suite / Run Rustfmt (push) Successful in 1m25s
				
					
					
				
			
		
			
				
	
				Test suite / Run Clippy (push) Successful in 5m54s
				
					
					
				
			
		
			
				
	
				Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
				
					
					
				
			
		
		
	
	
				
					
				
			
		
			Some checks failed
		
		
	
	Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 21s
				Test suite / Tests on ubuntu-20.04 (push) Failing after 10s
				Test suite / Tests almost all features (push) Has been skipped
				Test suite / Test disabled tokenization (push) Has been skipped
				Test suite / Run tests in debug (push) Failing after 10s
				Test suite / Run Rustfmt (push) Successful in 1m25s
				Test suite / Run Clippy (push) Successful in 5m54s
				Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
				5131: Ignore documents whose selected fields didn't change r=dureuill a=dureuill Attempts to improve the new indexer performance by ignoring documents whose selected fields didn't change: - Add `Update::has_changed_for_fields` function - Ignore documents whose searchable attributes didn't change for word docids and word pair proximity extraction - Ignore documents whose faceted attributes didn't change for facet extraction Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
		| @@ -1,7 +1,10 @@ | |||||||
| use bumpalo::Bump; | use bumpalo::Bump; | ||||||
| use heed::RoTxn; | use heed::RoTxn; | ||||||
|  |  | ||||||
| use super::document::{DocumentFromDb, DocumentFromVersions, MergedDocument, Versions}; | use super::document::{ | ||||||
|  |     Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions, | ||||||
|  | }; | ||||||
|  | use super::extract::perm_json_p; | ||||||
| use super::vector_document::{ | use super::vector_document::{ | ||||||
|     MergedVectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions, |     MergedVectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions, | ||||||
| }; | }; | ||||||
| @@ -164,6 +167,80 @@ impl<'doc> Update<'doc> { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /// Returns whether the updated version of the document is different from the current version for the passed subset of fields. | ||||||
|  |     /// | ||||||
|  |     /// `true` if at least one top-level-field that is a exactly a member of field or a parent of a member of field changed. | ||||||
|  |     /// Otherwise `false`. | ||||||
|  |     pub fn has_changed_for_fields<'t, Mapper: FieldIdMapper>( | ||||||
|  |         &self, | ||||||
|  |         fields: Option<&[&str]>, | ||||||
|  |         rtxn: &'t RoTxn, | ||||||
|  |         index: &'t Index, | ||||||
|  |         mapper: &'t Mapper, | ||||||
|  |     ) -> Result<bool> { | ||||||
|  |         let mut changed = false; | ||||||
|  |         let mut cached_current = None; | ||||||
|  |         let mut updated_selected_field_count = 0; | ||||||
|  |  | ||||||
|  |         for entry in self.updated().iter_top_level_fields() { | ||||||
|  |             let (key, updated_value) = entry?; | ||||||
|  |  | ||||||
|  |             if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip { | ||||||
|  |                 continue; | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             updated_selected_field_count += 1; | ||||||
|  |             let current = match cached_current { | ||||||
|  |                 Some(current) => current, | ||||||
|  |                 None => self.current(rtxn, index, mapper)?, | ||||||
|  |             }; | ||||||
|  |             let current_value = current.top_level_field(key)?; | ||||||
|  |             let Some(current_value) = current_value else { | ||||||
|  |                 changed = true; | ||||||
|  |                 break; | ||||||
|  |             }; | ||||||
|  |  | ||||||
|  |             if current_value.get() != updated_value.get() { | ||||||
|  |                 changed = true; | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |             cached_current = Some(current); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if !self.has_deletion { | ||||||
|  |             // no field deletion, so fields that don't appear in `updated` cannot have changed | ||||||
|  |             return Ok(changed); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if changed { | ||||||
|  |             return Ok(true); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // we saw all updated fields, and set `changed` if any field wasn't in `current`. | ||||||
|  |         // so if there are as many fields in `current` as in `updated`, then nothing changed. | ||||||
|  |         // If there is any more fields in `current`, then they are missing in `updated`. | ||||||
|  |         let has_deleted_fields = { | ||||||
|  |             let current = match cached_current { | ||||||
|  |                 Some(current) => current, | ||||||
|  |                 None => self.current(rtxn, index, mapper)?, | ||||||
|  |             }; | ||||||
|  |  | ||||||
|  |             let mut current_selected_field_count = 0; | ||||||
|  |             for entry in current.iter_top_level_fields() { | ||||||
|  |                 let (key, _) = entry?; | ||||||
|  |  | ||||||
|  |                 if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip { | ||||||
|  |                     continue; | ||||||
|  |                 } | ||||||
|  |                 current_selected_field_count += 1; | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             current_selected_field_count != updated_selected_field_count | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         Ok(has_deleted_fields) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn updated_vectors( |     pub fn updated_vectors( | ||||||
|         &self, |         &self, | ||||||
|         doc_alloc: &'doc Bump, |         doc_alloc: &'doc Bump, | ||||||
|   | |||||||
| @@ -97,6 +97,15 @@ impl FacetedDocidsExtractor { | |||||||
|                 }, |                 }, | ||||||
|             ), |             ), | ||||||
|             DocumentChange::Update(inner) => { |             DocumentChange::Update(inner) => { | ||||||
|  |                 if !inner.has_changed_for_fields( | ||||||
|  |                     Some(attributes_to_extract), | ||||||
|  |                     rtxn, | ||||||
|  |                     index, | ||||||
|  |                     context.db_fields_ids_map, | ||||||
|  |                 )? { | ||||||
|  |                     return Ok(()); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|                 extract_document_facets( |                 extract_document_facets( | ||||||
|                     attributes_to_extract, |                     attributes_to_extract, | ||||||
|                     inner.current(rtxn, index, context.db_fields_ids_map)?, |                     inner.current(rtxn, index, context.db_fields_ids_map)?, | ||||||
|   | |||||||
| @@ -351,6 +351,15 @@ impl WordDocidsExtractors { | |||||||
|                 )?; |                 )?; | ||||||
|             } |             } | ||||||
|             DocumentChange::Update(inner) => { |             DocumentChange::Update(inner) => { | ||||||
|  |                 if !inner.has_changed_for_fields( | ||||||
|  |                     document_tokenizer.attribute_to_extract, | ||||||
|  |                     &context.rtxn, | ||||||
|  |                     context.index, | ||||||
|  |                     context.db_fields_ids_map, | ||||||
|  |                 )? { | ||||||
|  |                     return Ok(()); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|                 let mut token_fn = |fname: &str, fid, pos, word: &str| { |                 let mut token_fn = |fname: &str, fid, pos, word: &str| { | ||||||
|                     cached_sorter.insert_del_u32( |                     cached_sorter.insert_del_u32( | ||||||
|                         fid, |                         fid, | ||||||
|   | |||||||
| @@ -70,6 +70,15 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor { | |||||||
|                 )?; |                 )?; | ||||||
|             } |             } | ||||||
|             DocumentChange::Update(inner) => { |             DocumentChange::Update(inner) => { | ||||||
|  |                 if !inner.has_changed_for_fields( | ||||||
|  |                     document_tokenizer.attribute_to_extract, | ||||||
|  |                     rtxn, | ||||||
|  |                     index, | ||||||
|  |                     context.db_fields_ids_map, | ||||||
|  |                 )? { | ||||||
|  |                     return Ok(()); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|                 let document = inner.current(rtxn, index, context.db_fields_ids_map)?; |                 let document = inner.current(rtxn, index, context.db_fields_ids_map)?; | ||||||
|                 process_document_tokens( |                 process_document_tokens( | ||||||
|                     document, |                     document, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user