mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Merge #5355
	
		
			
	
		
	
	
		
	
		
			Some checks failed
		
		
	
	
		
			
				
	
				Run the indexing fuzzer / Setup the action (push) Successful in 1h5m46s
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for Linux (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Look for flaky tests / flaky (push) Failing after 1s
				
					
					
				
			
		
			
				
	
				Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Check the version validity (push) Failing after 5s
				
					
					
				
			
		
			
				
	
				Test suite / Tests almost all features (push) Failing after 13s
				
					
					
				
			
		
			
				
	
				Test suite / Tests on ubuntu-22.04 (push) Failing after 19s
				
					
					
				
			
		
			
				
	
				Test suite / Test with Ollama (push) Failing after 7s
				
					
					
				
			
		
			
				
	
				Test suite / Test disabled tokenization (push) Failing after 10s
				
					
					
				
			
		
			
				
	
				Test suite / Run tests in debug (push) Failing after 15s
				
					
					
				
			
		
			
				
	
				Test suite / Run Rustfmt (push) Failing after 16s
				
					
					
				
			
		
			
				
	
				Test suite / Run Clippy (push) Successful in 9m39s
				
					
					
				
			
		
			
				
	
				SDKs tests / define-docker-image (push) Failing after 5s
				
					
					
				
			
		
			
				
	
				SDKs tests / .NET SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Dart SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Go SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Java SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / JS SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / PHP SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Python SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Ruby SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Rust SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Swift SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / meilisearch-js-plugins tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / meilisearch-rails tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / meilisearch-symfony tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Test suite / Tests on macos-13 (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Test suite / Tests on windows-2022 (push) Has been cancelled
				
					
					
				
			
		
		
	
	
				
					
				
			
		
			Some checks failed
		
		
	
	Run the indexing fuzzer / Setup the action (push) Successful in 1h5m46s
				Publish binaries to GitHub release / Publish binary for Linux (push) Has been skipped
				Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been skipped
				Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been skipped
				Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been skipped
				Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Has been skipped
				Look for flaky tests / flaky (push) Failing after 1s
				Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
				Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
				Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
				Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
				Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
				Publish binaries to GitHub release / Check the version validity (push) Failing after 5s
				Test suite / Tests almost all features (push) Failing after 13s
				Test suite / Tests on ubuntu-22.04 (push) Failing after 19s
				Test suite / Test with Ollama (push) Failing after 7s
				Test suite / Test disabled tokenization (push) Failing after 10s
				Test suite / Run tests in debug (push) Failing after 15s
				Test suite / Run Rustfmt (push) Failing after 16s
				Test suite / Run Clippy (push) Successful in 9m39s
				SDKs tests / define-docker-image (push) Failing after 5s
				SDKs tests / .NET SDK tests (push) Has been skipped
				SDKs tests / Dart SDK tests (push) Has been skipped
				SDKs tests / Go SDK tests (push) Has been skipped
				SDKs tests / Java SDK tests (push) Has been skipped
				SDKs tests / JS SDK tests (push) Has been skipped
				SDKs tests / PHP SDK tests (push) Has been skipped
				SDKs tests / Python SDK tests (push) Has been skipped
				SDKs tests / Ruby SDK tests (push) Has been skipped
				SDKs tests / Rust SDK tests (push) Has been skipped
				SDKs tests / Swift SDK tests (push) Has been skipped
				SDKs tests / meilisearch-js-plugins tests (push) Has been skipped
				SDKs tests / meilisearch-rails tests (push) Has been skipped
				SDKs tests / meilisearch-symfony tests (push) Has been skipped
				Test suite / Tests on macos-13 (push) Has been cancelled
				Test suite / Tests on windows-2022 (push) Has been cancelled
				5355: Support fetching the pooling method from the model configuration r=Kerollmops a=dureuill # Pull Request ## Related issue Fixes #5354 ## What does this PR do? - Fetches the pooling configuration from the model repository - Use a pooling method that depends on the pooling configuration of that model. - Allow overriding the pooling method with a new huggingFace embedder parameter `pooling` - for backward-compatibility with Meilisearch v1.13 - for compatibility with embedders that exhibit the same behavior as Meilisearch v1.13 - Handle the default value of that new parameter - for compatibility, when importing a db/a dump, it should be set to `forceMean` - when (re)set from the settings for an embedder, it should be set to `useModel` Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
		| @@ -1,5 +1,5 @@ | ||||
| --- | ||||
| source: dump/src/reader/mod.rs | ||||
| source: crates/dump/src/reader/mod.rs | ||||
| expression: vector_index.settings().unwrap() | ||||
| --- | ||||
| { | ||||
| @@ -49,6 +49,7 @@ expression: vector_index.settings().unwrap() | ||||
|       "source": "huggingFace", | ||||
|       "model": "BAAI/bge-base-en-v1.5", | ||||
|       "revision": "617ca489d9e86b49b8167676d8220688b99db36e", | ||||
|       "pooling": "forceMean", | ||||
|       "documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}" | ||||
|     } | ||||
|   }, | ||||
|   | ||||
| @@ -3,6 +3,7 @@ use std::io::{BufRead, BufReader, ErrorKind}; | ||||
| use std::path::Path; | ||||
|  | ||||
| pub use meilisearch_types::milli; | ||||
| use meilisearch_types::milli::vector::hf::OverridePooling; | ||||
| use tempfile::TempDir; | ||||
| use time::OffsetDateTime; | ||||
| use tracing::debug; | ||||
| @@ -252,7 +253,29 @@ impl V6IndexReader { | ||||
|     } | ||||
|  | ||||
|     pub fn settings(&mut self) -> Result<Settings<Checked>> { | ||||
|         let settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?; | ||||
|         let mut settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?; | ||||
|         patch_embedders(&mut settings); | ||||
|         Ok(settings.check()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn patch_embedders(settings: &mut Settings<Unchecked>) { | ||||
|     if let Setting::Set(embedders) = &mut settings.embedders { | ||||
|         for settings in embedders.values_mut() { | ||||
|             let Setting::Set(settings) = &mut settings.inner else { | ||||
|                 continue; | ||||
|             }; | ||||
|             if settings.source != Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace) | ||||
|             { | ||||
|                 continue; | ||||
|             } | ||||
|             settings.pooling = match settings.pooling { | ||||
|                 Setting::Set(pooling) => Setting::Set(pooling), | ||||
|                 // if the pooling for a hugging face embedder is not set, force it to `forceMean` | ||||
|                 // for backward compatibility with v1.13 | ||||
|                 // dumps created in v1.14 and up will have the setting set for hugging face embedders | ||||
|                 Setting::Reset | Setting::NotSet => Setting::Set(OverridePooling::ForceMean), | ||||
|             }; | ||||
|         } | ||||
|     } | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user