mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Merge #4597
4597: Fix embeddings settings update r=ManyTheFish a=ManyTheFish # Pull Request - add some conditions reducing the work done when changing the settings - add some benchmarks on embedders ## Related issue Fixes #4585 Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
		| @@ -198,11 +198,16 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | |||||||
|  |  | ||||||
|                 if document_is_kept { |                 if document_is_kept { | ||||||
|                     // Don't give up if the old prompt was failing |                     // Don't give up if the old prompt was failing | ||||||
|                     let old_prompt = prompt |                     let old_prompt = Some(prompt) | ||||||
|                         .render(obkv, DelAdd::Deletion, old_fields_ids_map) |                         // TODO: this filter works because we erase the vec database when a embedding setting changes. | ||||||
|                         .unwrap_or_default(); |                         // When vector pipeline will be optimized, this should be removed. | ||||||
|  |                         .filter(|_| !settings_diff.reindex_vectors()) | ||||||
|  |                         .map(|p| { | ||||||
|  |                             p.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or_default() | ||||||
|  |                         }); | ||||||
|                     let new_prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?; |                     let new_prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?; | ||||||
|                     if old_prompt != new_prompt { |                     if old_prompt.as_ref() != Some(&new_prompt) { | ||||||
|  |                         let old_prompt = old_prompt.unwrap_or_default(); | ||||||
|                         tracing::trace!( |                         tracing::trace!( | ||||||
|                             "🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}" |                             "🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}" | ||||||
|                         ); |                         ); | ||||||
| @@ -224,6 +229,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | |||||||
|             &mut manual_vectors_writer, |             &mut manual_vectors_writer, | ||||||
|             &mut key_buffer, |             &mut key_buffer, | ||||||
|             delta, |             delta, | ||||||
|  |             settings_diff, | ||||||
|         )?; |         )?; | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -264,10 +270,15 @@ fn push_vectors_diff( | |||||||
|     manual_vectors_writer: &mut Writer<BufWriter<File>>, |     manual_vectors_writer: &mut Writer<BufWriter<File>>, | ||||||
|     key_buffer: &mut Vec<u8>, |     key_buffer: &mut Vec<u8>, | ||||||
|     delta: VectorStateDelta, |     delta: VectorStateDelta, | ||||||
|  |     settings_diff: &InnerIndexSettingsDiff, | ||||||
| ) -> Result<()> { | ) -> Result<()> { | ||||||
|     puffin::profile_function!(); |     puffin::profile_function!(); | ||||||
|     let (must_remove, prompt, (mut del_vectors, mut add_vectors)) = delta.into_values(); |     let (must_remove, prompt, (mut del_vectors, mut add_vectors)) = delta.into_values(); | ||||||
|     if must_remove { |     if must_remove | ||||||
|  |     // TODO: the below condition works because we erase the vec database when a embedding setting changes. | ||||||
|  |     // When vector pipeline will be optimized, this should be removed. | ||||||
|  |     && !settings_diff.reindex_vectors() | ||||||
|  |     { | ||||||
|         key_buffer.truncate(TRUNCATE_SIZE); |         key_buffer.truncate(TRUNCATE_SIZE); | ||||||
|         remove_vectors_writer.insert(&key_buffer, [])?; |         remove_vectors_writer.insert(&key_buffer, [])?; | ||||||
|     } |     } | ||||||
| @@ -295,6 +306,9 @@ fn push_vectors_diff( | |||||||
|         match eob { |         match eob { | ||||||
|             EitherOrBoth::Both(_, _) => (), // no need to touch anything |             EitherOrBoth::Both(_, _) => (), // no need to touch anything | ||||||
|             EitherOrBoth::Left(vector) => { |             EitherOrBoth::Left(vector) => { | ||||||
|  |                 // TODO: the below condition works because we erase the vec database when a embedding setting changes. | ||||||
|  |                 // When vector pipeline will be optimized, this should be removed. | ||||||
|  |                 if !settings_diff.reindex_vectors() { | ||||||
|                     // We insert only the Del part of the Obkv to inform |                     // We insert only the Del part of the Obkv to inform | ||||||
|                     // that we only want to remove all those vectors. |                     // that we only want to remove all those vectors. | ||||||
|                     let mut obkv = KvWriterDelAdd::memory(); |                     let mut obkv = KvWriterDelAdd::memory(); | ||||||
| @@ -302,6 +316,7 @@ fn push_vectors_diff( | |||||||
|                     let bytes = obkv.into_inner()?; |                     let bytes = obkv.into_inner()?; | ||||||
|                     manual_vectors_writer.insert(&key_buffer, bytes)?; |                     manual_vectors_writer.insert(&key_buffer, bytes)?; | ||||||
|                 } |                 } | ||||||
|  |             } | ||||||
|             EitherOrBoth::Right(vector) => { |             EitherOrBoth::Right(vector) => { | ||||||
|                 // We insert only the Add part of the Obkv to inform |                 // We insert only the Add part of the Obkv to inform | ||||||
|                 // that we only want to remove all those vectors. |                 // that we only want to remove all those vectors. | ||||||
|   | |||||||
							
								
								
									
										68
									
								
								workloads/movies-subset-hf-embeddings.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								workloads/movies-subset-hf-embeddings.json
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,68 @@ | |||||||
|  | { | ||||||
|  |   "name": "movies-subset-hf-embeddings", | ||||||
|  |   "run_count": 5, | ||||||
|  |   "extra_cli_args": [ | ||||||
|  |     "--max-indexing-threads=4" | ||||||
|  |   ], | ||||||
|  |   "assets": { | ||||||
|  |     "movies-100.json": { | ||||||
|  |       "local_location": null, | ||||||
|  |       "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies-100.json", | ||||||
|  |       "sha256": "d215e395e4240f12f03b8f1f68901eac82d9e7ded5b462cbf4a6b8efde76c6c6" | ||||||
|  |     } | ||||||
|  |   }, | ||||||
|  |   "commands": [ | ||||||
|  |     { | ||||||
|  |       "route": "experimental-features", | ||||||
|  |       "method": "PATCH", | ||||||
|  |       "body": { | ||||||
|  |         "inline": { | ||||||
|  |           "vectorStore": true | ||||||
|  |         } | ||||||
|  |       }, | ||||||
|  |       "synchronous": "DontWait" | ||||||
|  |     }, | ||||||
|  |     { | ||||||
|  |       "route": "indexes/movies/settings", | ||||||
|  |       "method": "PATCH", | ||||||
|  |       "body": { | ||||||
|  |         "inline": { | ||||||
|  |           "searchableAttributes": [ | ||||||
|  |             "title", | ||||||
|  |             "overview" | ||||||
|  |           ], | ||||||
|  |           "filterableAttributes": [ | ||||||
|  |             "genres", | ||||||
|  |             "release_date" | ||||||
|  |           ], | ||||||
|  |           "sortableAttributes": [ | ||||||
|  |             "release_date" | ||||||
|  |           ] | ||||||
|  |         } | ||||||
|  |       }, | ||||||
|  |       "synchronous": "WaitForTask" | ||||||
|  |     }, | ||||||
|  |     { | ||||||
|  |       "route": "indexes/movies/settings", | ||||||
|  |       "method": "PATCH", | ||||||
|  |       "body": { | ||||||
|  |         "inline": { | ||||||
|  |           "embedders": { | ||||||
|  |             "default": { | ||||||
|  |               "source": "huggingFace" | ||||||
|  |             } | ||||||
|  |           } | ||||||
|  |         } | ||||||
|  |       }, | ||||||
|  |       "synchronous": "WaitForTask" | ||||||
|  |     }, | ||||||
|  |     { | ||||||
|  |       "route": "indexes/movies/documents", | ||||||
|  |       "method": "POST", | ||||||
|  |       "body": { | ||||||
|  |         "asset": "movies-100.json" | ||||||
|  |       }, | ||||||
|  |       "synchronous": "WaitForTask" | ||||||
|  |     } | ||||||
|  |   ] | ||||||
|  | } | ||||||
							
								
								
									
										72
									
								
								workloads/settings-add-embeddings.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								workloads/settings-add-embeddings.json
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,72 @@ | |||||||
|  | { | ||||||
|  |   "name": "settings-add-embeddings-hf", | ||||||
|  |   "run_count": 5, | ||||||
|  |   "extra_cli_args": [ | ||||||
|  |     "--max-indexing-threads=4" | ||||||
|  |   ], | ||||||
|  |   "assets": { | ||||||
|  |     "movies-100.json": { | ||||||
|  |       "local_location": null, | ||||||
|  |       "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies-100.json", | ||||||
|  |       "sha256": "d215e395e4240f12f03b8f1f68901eac82d9e7ded5b462cbf4a6b8efde76c6c6" | ||||||
|  |     } | ||||||
|  |   }, | ||||||
|  |   "commands": [ | ||||||
|  |     { | ||||||
|  |       "route": "experimental-features", | ||||||
|  |       "method": "PATCH", | ||||||
|  |       "body": { | ||||||
|  |         "inline": { | ||||||
|  |           "vectorStore": true | ||||||
|  |         } | ||||||
|  |       }, | ||||||
|  |       "synchronous": "DontWait" | ||||||
|  |     }, | ||||||
|  |     { | ||||||
|  |       "route": "indexes/movies/settings", | ||||||
|  |       "method": "PATCH", | ||||||
|  |       "body": { | ||||||
|  |         "inline": { | ||||||
|  |           "searchableAttributes": [ | ||||||
|  |             "title", | ||||||
|  |             "overview" | ||||||
|  |           ], | ||||||
|  |           "filterableAttributes": [ | ||||||
|  |             "genres", | ||||||
|  |             "release_date" | ||||||
|  |           ], | ||||||
|  |           "sortableAttributes": [ | ||||||
|  |             "release_date" | ||||||
|  |           ] | ||||||
|  |         } | ||||||
|  |       }, | ||||||
|  |       "synchronous": "DontWait" | ||||||
|  |     }, | ||||||
|  |     { | ||||||
|  |       "route": "indexes/movies/documents", | ||||||
|  |       "method": "POST", | ||||||
|  |       "body": { | ||||||
|  |         "asset": "movies-100.json" | ||||||
|  |       }, | ||||||
|  |       "synchronous": "WaitForTask" | ||||||
|  |     }, | ||||||
|  |     { | ||||||
|  |       "route": "indexes/movies/settings", | ||||||
|  |       "method": "PATCH", | ||||||
|  |       "body": { | ||||||
|  |         "inline": { | ||||||
|  |           "embedders": { | ||||||
|  |             "default": { | ||||||
|  |               "source": "huggingFace", | ||||||
|  |               "model": null, | ||||||
|  |               "revision": null, | ||||||
|  |               "documentTemplate": null, | ||||||
|  |               "distribution": null | ||||||
|  |             } | ||||||
|  |           } | ||||||
|  |         } | ||||||
|  |       }, | ||||||
|  |       "synchronous": "WaitForTask" | ||||||
|  |     } | ||||||
|  |   ] | ||||||
|  | } | ||||||
		Reference in New Issue
	
	Block a user