mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Merge #5355
	
		
			
	
		
	
	
		
	
		
			Some checks failed
		
		
	
	
		
			
				
	
				Run the indexing fuzzer / Setup the action (push) Successful in 1h5m46s
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for Linux (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Look for flaky tests / flaky (push) Failing after 1s
				
					
					
				
			
		
			
				
	
				Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Check the version validity (push) Failing after 5s
				
					
					
				
			
		
			
				
	
				Test suite / Tests almost all features (push) Failing after 13s
				
					
					
				
			
		
			
				
	
				Test suite / Tests on ubuntu-22.04 (push) Failing after 19s
				
					
					
				
			
		
			
				
	
				Test suite / Test with Ollama (push) Failing after 7s
				
					
					
				
			
		
			
				
	
				Test suite / Test disabled tokenization (push) Failing after 10s
				
					
					
				
			
		
			
				
	
				Test suite / Run tests in debug (push) Failing after 15s
				
					
					
				
			
		
			
				
	
				Test suite / Run Rustfmt (push) Failing after 16s
				
					
					
				
			
		
			
				
	
				Test suite / Run Clippy (push) Successful in 9m39s
				
					
					
				
			
		
			
				
	
				SDKs tests / define-docker-image (push) Failing after 5s
				
					
					
				
			
		
			
				
	
				SDKs tests / .NET SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Dart SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Go SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Java SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / JS SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / PHP SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Python SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Ruby SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Rust SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Swift SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / meilisearch-js-plugins tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / meilisearch-rails tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / meilisearch-symfony tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Test suite / Tests on macos-13 (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Test suite / Tests on windows-2022 (push) Has been cancelled
				
					
					
				
			
		
		
	
	
				
					
				
			
		
			Some checks failed
		
		
	
	Run the indexing fuzzer / Setup the action (push) Successful in 1h5m46s
				Publish binaries to GitHub release / Publish binary for Linux (push) Has been skipped
				Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been skipped
				Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been skipped
				Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been skipped
				Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Has been skipped
				Look for flaky tests / flaky (push) Failing after 1s
				Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
				Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
				Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
				Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
				Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
				Publish binaries to GitHub release / Check the version validity (push) Failing after 5s
				Test suite / Tests almost all features (push) Failing after 13s
				Test suite / Tests on ubuntu-22.04 (push) Failing after 19s
				Test suite / Test with Ollama (push) Failing after 7s
				Test suite / Test disabled tokenization (push) Failing after 10s
				Test suite / Run tests in debug (push) Failing after 15s
				Test suite / Run Rustfmt (push) Failing after 16s
				Test suite / Run Clippy (push) Successful in 9m39s
				SDKs tests / define-docker-image (push) Failing after 5s
				SDKs tests / .NET SDK tests (push) Has been skipped
				SDKs tests / Dart SDK tests (push) Has been skipped
				SDKs tests / Go SDK tests (push) Has been skipped
				SDKs tests / Java SDK tests (push) Has been skipped
				SDKs tests / JS SDK tests (push) Has been skipped
				SDKs tests / PHP SDK tests (push) Has been skipped
				SDKs tests / Python SDK tests (push) Has been skipped
				SDKs tests / Ruby SDK tests (push) Has been skipped
				SDKs tests / Rust SDK tests (push) Has been skipped
				SDKs tests / Swift SDK tests (push) Has been skipped
				SDKs tests / meilisearch-js-plugins tests (push) Has been skipped
				SDKs tests / meilisearch-rails tests (push) Has been skipped
				SDKs tests / meilisearch-symfony tests (push) Has been skipped
				Test suite / Tests on macos-13 (push) Has been cancelled
				Test suite / Tests on windows-2022 (push) Has been cancelled
				5355: Support fetching the pooling method from the model configuration r=Kerollmops a=dureuill # Pull Request ## Related issue Fixes #5354 ## What does this PR do? - Fetches the pooling configuration from the model repository - Use a pooling method that depends on the pooling configuration of that model. - Allow overriding the pooling method with a new huggingFace embedder parameter `pooling` - for backward-compatibility with Meilisearch v1.13 - for compatibility with embedders that exhibit the same behavior as Meilisearch v1.13 - Handle the default value of that new parameter - for compatibility, when importing a db/a dump, it should be set to `forceMean` - when (re)set from the settings for an embedder, it should be set to `useModel` Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
		| @@ -1,5 +1,5 @@ | |||||||
| --- | --- | ||||||
| source: dump/src/reader/mod.rs | source: crates/dump/src/reader/mod.rs | ||||||
| expression: vector_index.settings().unwrap() | expression: vector_index.settings().unwrap() | ||||||
| --- | --- | ||||||
| { | { | ||||||
| @@ -49,6 +49,7 @@ expression: vector_index.settings().unwrap() | |||||||
|       "source": "huggingFace", |       "source": "huggingFace", | ||||||
|       "model": "BAAI/bge-base-en-v1.5", |       "model": "BAAI/bge-base-en-v1.5", | ||||||
|       "revision": "617ca489d9e86b49b8167676d8220688b99db36e", |       "revision": "617ca489d9e86b49b8167676d8220688b99db36e", | ||||||
|  |       "pooling": "forceMean", | ||||||
|       "documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}" |       "documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}" | ||||||
|     } |     } | ||||||
|   }, |   }, | ||||||
|   | |||||||
| @@ -3,6 +3,7 @@ use std::io::{BufRead, BufReader, ErrorKind}; | |||||||
| use std::path::Path; | use std::path::Path; | ||||||
|  |  | ||||||
| pub use meilisearch_types::milli; | pub use meilisearch_types::milli; | ||||||
|  | use meilisearch_types::milli::vector::hf::OverridePooling; | ||||||
| use tempfile::TempDir; | use tempfile::TempDir; | ||||||
| use time::OffsetDateTime; | use time::OffsetDateTime; | ||||||
| use tracing::debug; | use tracing::debug; | ||||||
| @@ -252,7 +253,29 @@ impl V6IndexReader { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn settings(&mut self) -> Result<Settings<Checked>> { |     pub fn settings(&mut self) -> Result<Settings<Checked>> { | ||||||
|         let settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?; |         let mut settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?; | ||||||
|  |         patch_embedders(&mut settings); | ||||||
|         Ok(settings.check()) |         Ok(settings.check()) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | fn patch_embedders(settings: &mut Settings<Unchecked>) { | ||||||
|  |     if let Setting::Set(embedders) = &mut settings.embedders { | ||||||
|  |         for settings in embedders.values_mut() { | ||||||
|  |             let Setting::Set(settings) = &mut settings.inner else { | ||||||
|  |                 continue; | ||||||
|  |             }; | ||||||
|  |             if settings.source != Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace) | ||||||
|  |             { | ||||||
|  |                 continue; | ||||||
|  |             } | ||||||
|  |             settings.pooling = match settings.pooling { | ||||||
|  |                 Setting::Set(pooling) => Setting::Set(pooling), | ||||||
|  |                 // if the pooling for a hugging face embedder is not set, force it to `forceMean` | ||||||
|  |                 // for backward compatibility with v1.13 | ||||||
|  |                 // dumps created in v1.14 and up will have the setting set for hugging face embedders | ||||||
|  |                 Setting::Reset | Setting::NotSet => Setting::Set(OverridePooling::ForceMean), | ||||||
|  |             }; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|   | |||||||
| @@ -1,12 +1,12 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/scheduler/test_embedders.rs | source: crates/index-scheduler/src/scheduler/test_embedders.rs | ||||||
| expression: simple_hf_config.embedder_options | expression: simple_hf_config.embedder_options | ||||||
| snapshot_kind: text |  | ||||||
| --- | --- | ||||||
| { | { | ||||||
|   "HuggingFace": { |   "HuggingFace": { | ||||||
|     "model": "sentence-transformers/all-MiniLM-L6-v2", |     "model": "sentence-transformers/all-MiniLM-L6-v2", | ||||||
|     "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", |     "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", | ||||||
|     "distribution": null |     "distribution": null, | ||||||
|  |     "pooling": "useModel" | ||||||
|   } |   } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,13 +1,12 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/scheduler/test.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text |  | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| ### Processing batch None: | ### Processing batch None: | ||||||
| [] | [] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### All Tasks: | ### All Tasks: | ||||||
| 0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | 0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### Status: | ### Status: | ||||||
| enqueued [0,] | enqueued [0,] | ||||||
|   | |||||||
| @@ -1,13 +1,12 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/scheduler/test.rs | source: crates/index-scheduler/src/scheduler/test.rs | ||||||
| snapshot_kind: text |  | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| ### Processing batch None: | ### Processing batch None: | ||||||
| [] | [] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### All Tasks: | ### All Tasks: | ||||||
| 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### Status: | ### Status: | ||||||
| enqueued [] | enqueued [] | ||||||
|   | |||||||
| @@ -1,13 +1,12 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/scheduler/test_embedders.rs | source: crates/index-scheduler/src/scheduler/test_embedders.rs | ||||||
| snapshot_kind: text |  | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| ### Processing batch None: | ### Processing batch None: | ||||||
| [] | [] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### All Tasks: | ### All Tasks: | ||||||
| 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||||
| 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} | 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} | ||||||
| 2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} | 2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
|   | |||||||
| @@ -1,13 +1,12 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/scheduler/test_embedders.rs | source: crates/index-scheduler/src/scheduler/test_embedders.rs | ||||||
| snapshot_kind: text |  | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| ### Processing batch None: | ### Processing batch None: | ||||||
| [] | [] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### All Tasks: | ### All Tasks: | ||||||
| 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||||
| 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} | 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} | ||||||
| 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} | 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
|   | |||||||
| @@ -1,13 +1,12 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/scheduler/test_embedders.rs | source: crates/index-scheduler/src/scheduler/test_embedders.rs | ||||||
| snapshot_kind: text |  | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| ### Processing batch None: | ### Processing batch None: | ||||||
| [] | [] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### All Tasks: | ### All Tasks: | ||||||
| 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||||
| 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} | 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### Status: | ### Status: | ||||||
|   | |||||||
| @@ -1,13 +1,12 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/scheduler/test_embedders.rs | source: crates/index-scheduler/src/scheduler/test_embedders.rs | ||||||
| snapshot_kind: text |  | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| ### Processing batch None: | ### Processing batch None: | ||||||
| [] | [] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### All Tasks: | ### All Tasks: | ||||||
| 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||||
| 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} | 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### Status: | ### Status: | ||||||
|   | |||||||
| @@ -1,13 +1,12 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/scheduler/test_embedders.rs | source: crates/index-scheduler/src/scheduler/test_embedders.rs | ||||||
| snapshot_kind: text |  | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| ### Processing batch None: | ### Processing batch None: | ||||||
| [] | [] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### All Tasks: | ### All Tasks: | ||||||
| 0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | 0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### Status: | ### Status: | ||||||
| enqueued [0,] | enqueued [0,] | ||||||
|   | |||||||
| @@ -1,13 +1,12 @@ | |||||||
| --- | --- | ||||||
| source: crates/index-scheduler/src/scheduler/test_embedders.rs | source: crates/index-scheduler/src/scheduler/test_embedders.rs | ||||||
| snapshot_kind: text |  | ||||||
| --- | --- | ||||||
| ### Autobatching Enabled = true | ### Autobatching Enabled = true | ||||||
| ### Processing batch None: | ### Processing batch None: | ||||||
| [] | [] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### All Tasks: | ### All Tasks: | ||||||
| 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### Status: | ### Status: | ||||||
| enqueued [] | enqueued [] | ||||||
|   | |||||||
| @@ -404,31 +404,32 @@ fn import_vectors_first_and_embedder_later() { | |||||||
|     // even though we specified the vector for the ID 3, it shouldn't be marked |     // even though we specified the vector for the ID 3, it shouldn't be marked | ||||||
|     // as user provided since we explicitely marked it as NOT user provided. |     // as user provided since we explicitely marked it as NOT user provided. | ||||||
|     snapshot!(format!("{conf:#?}"), @r###" |     snapshot!(format!("{conf:#?}"), @r###" | ||||||
|         [ |     [ | ||||||
|             IndexEmbeddingConfig { |         IndexEmbeddingConfig { | ||||||
|                 name: "my_doggo_embedder", |             name: "my_doggo_embedder", | ||||||
|                 config: EmbeddingConfig { |             config: EmbeddingConfig { | ||||||
|                     embedder_options: HuggingFace( |                 embedder_options: HuggingFace( | ||||||
|                         EmbedderOptions { |                     EmbedderOptions { | ||||||
|                             model: "sentence-transformers/all-MiniLM-L6-v2", |                         model: "sentence-transformers/all-MiniLM-L6-v2", | ||||||
|                             revision: Some( |                         revision: Some( | ||||||
|                                 "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", |                             "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", | ||||||
|                             ), |  | ||||||
|                             distribution: None, |  | ||||||
|                         }, |  | ||||||
|                     ), |  | ||||||
|                     prompt: PromptData { |  | ||||||
|                         template: "{{doc.doggo}}", |  | ||||||
|                         max_bytes: Some( |  | ||||||
|                             400, |  | ||||||
|                         ), |                         ), | ||||||
|  |                         distribution: None, | ||||||
|  |                         pooling: UseModel, | ||||||
|                     }, |                     }, | ||||||
|                     quantized: None, |                 ), | ||||||
|  |                 prompt: PromptData { | ||||||
|  |                     template: "{{doc.doggo}}", | ||||||
|  |                     max_bytes: Some( | ||||||
|  |                         400, | ||||||
|  |                     ), | ||||||
|                 }, |                 }, | ||||||
|                 user_provided: RoaringBitmap<[1, 2]>, |                 quantized: None, | ||||||
|             }, |             }, | ||||||
|         ] |             user_provided: RoaringBitmap<[1, 2]>, | ||||||
|         "###); |         }, | ||||||
|  |     ] | ||||||
|  |     "###); | ||||||
|     let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap(); |     let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap(); | ||||||
|     let embeddings = index.embeddings(&rtxn, docid).unwrap(); |     let embeddings = index.embeddings(&rtxn, docid).unwrap(); | ||||||
|     let embedding = &embeddings["my_doggo_embedder"]; |     let embedding = &embeddings["my_doggo_embedder"]; | ||||||
|   | |||||||
| @@ -2414,6 +2414,7 @@ async fn generate_and_import_dump_containing_vectors() { | |||||||
|           "source": "huggingFace", |           "source": "huggingFace", | ||||||
|           "model": "sentence-transformers/all-MiniLM-L6-v2", |           "model": "sentence-transformers/all-MiniLM-L6-v2", | ||||||
|           "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", |           "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", | ||||||
|  |           "pooling": "useModel", | ||||||
|           "documentTemplate": "{{doc.doggo}}", |           "documentTemplate": "{{doc.doggo}}", | ||||||
|           "documentTemplateMaxBytes": 400 |           "documentTemplateMaxBytes": 400 | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -2768,6 +2768,7 @@ mod tests { | |||||||
|                         source: Setting::Set(crate::vector::settings::EmbedderSource::UserProvided), |                         source: Setting::Set(crate::vector::settings::EmbedderSource::UserProvided), | ||||||
|                         model: Setting::NotSet, |                         model: Setting::NotSet, | ||||||
|                         revision: Setting::NotSet, |                         revision: Setting::NotSet, | ||||||
|  |                         pooling: Setting::NotSet, | ||||||
|                         api_key: Setting::NotSet, |                         api_key: Setting::NotSet, | ||||||
|                         dimensions: Setting::Set(3), |                         dimensions: Setting::Set(3), | ||||||
|                         document_template: Setting::NotSet, |                         document_template: Setting::NotSet, | ||||||
|   | |||||||
| @@ -1676,6 +1676,7 @@ fn validate_prompt( | |||||||
|             source, |             source, | ||||||
|             model, |             model, | ||||||
|             revision, |             revision, | ||||||
|  |             pooling, | ||||||
|             api_key, |             api_key, | ||||||
|             dimensions, |             dimensions, | ||||||
|             document_template: Setting::Set(template), |             document_template: Setting::Set(template), | ||||||
| @@ -1709,6 +1710,7 @@ fn validate_prompt( | |||||||
|                 source, |                 source, | ||||||
|                 model, |                 model, | ||||||
|                 revision, |                 revision, | ||||||
|  |                 pooling, | ||||||
|                 api_key, |                 api_key, | ||||||
|                 dimensions, |                 dimensions, | ||||||
|                 document_template: Setting::Set(template), |                 document_template: Setting::Set(template), | ||||||
| @@ -1735,6 +1737,7 @@ pub fn validate_embedding_settings( | |||||||
|         source, |         source, | ||||||
|         model, |         model, | ||||||
|         revision, |         revision, | ||||||
|  |         pooling, | ||||||
|         api_key, |         api_key, | ||||||
|         dimensions, |         dimensions, | ||||||
|         document_template, |         document_template, | ||||||
| @@ -1776,6 +1779,7 @@ pub fn validate_embedding_settings( | |||||||
|             source, |             source, | ||||||
|             model, |             model, | ||||||
|             revision, |             revision, | ||||||
|  |             pooling, | ||||||
|             api_key, |             api_key, | ||||||
|             dimensions, |             dimensions, | ||||||
|             document_template, |             document_template, | ||||||
| @@ -1791,6 +1795,7 @@ pub fn validate_embedding_settings( | |||||||
|     match inferred_source { |     match inferred_source { | ||||||
|         EmbedderSource::OpenAi => { |         EmbedderSource::OpenAi => { | ||||||
|             check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; |             check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; | ||||||
|  |             check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?; | ||||||
|  |  | ||||||
|             check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; |             check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; | ||||||
|             check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; |             check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; | ||||||
| @@ -1829,6 +1834,7 @@ pub fn validate_embedding_settings( | |||||||
|         EmbedderSource::Ollama => { |         EmbedderSource::Ollama => { | ||||||
|             check_set(&model, EmbeddingSettings::MODEL, inferred_source, name)?; |             check_set(&model, EmbeddingSettings::MODEL, inferred_source, name)?; | ||||||
|             check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; |             check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; | ||||||
|  |             check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?; | ||||||
|  |  | ||||||
|             check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; |             check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; | ||||||
|             check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; |             check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; | ||||||
| @@ -1846,6 +1852,7 @@ pub fn validate_embedding_settings( | |||||||
|         EmbedderSource::UserProvided => { |         EmbedderSource::UserProvided => { | ||||||
|             check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?; |             check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?; | ||||||
|             check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; |             check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; | ||||||
|  |             check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?; | ||||||
|             check_unset(&api_key, EmbeddingSettings::API_KEY, inferred_source, name)?; |             check_unset(&api_key, EmbeddingSettings::API_KEY, inferred_source, name)?; | ||||||
|             check_unset( |             check_unset( | ||||||
|                 &document_template, |                 &document_template, | ||||||
| @@ -1869,6 +1876,7 @@ pub fn validate_embedding_settings( | |||||||
|         EmbedderSource::Rest => { |         EmbedderSource::Rest => { | ||||||
|             check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?; |             check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?; | ||||||
|             check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; |             check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; | ||||||
|  |             check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?; | ||||||
|             check_set(&url, EmbeddingSettings::URL, inferred_source, name)?; |             check_set(&url, EmbeddingSettings::URL, inferred_source, name)?; | ||||||
|             check_set(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; |             check_set(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; | ||||||
|             check_set(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; |             check_set(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; | ||||||
| @@ -1878,6 +1886,7 @@ pub fn validate_embedding_settings( | |||||||
|         source, |         source, | ||||||
|         model, |         model, | ||||||
|         revision, |         revision, | ||||||
|  |         pooling, | ||||||
|         api_key, |         api_key, | ||||||
|         dimensions, |         dimensions, | ||||||
|         document_template, |         document_template, | ||||||
|   | |||||||
| @@ -262,6 +262,31 @@ impl NewEmbedderError { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn open_pooling_config( | ||||||
|  |         pooling_config_filename: PathBuf, | ||||||
|  |         inner: std::io::Error, | ||||||
|  |     ) -> NewEmbedderError { | ||||||
|  |         let open_config = OpenPoolingConfig { filename: pooling_config_filename, inner }; | ||||||
|  |  | ||||||
|  |         Self { | ||||||
|  |             kind: NewEmbedderErrorKind::OpenPoolingConfig(open_config), | ||||||
|  |             fault: FaultSource::Runtime, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn deserialize_pooling_config( | ||||||
|  |         model_name: String, | ||||||
|  |         pooling_config_filename: PathBuf, | ||||||
|  |         inner: serde_json::Error, | ||||||
|  |     ) -> NewEmbedderError { | ||||||
|  |         let deserialize_pooling_config = | ||||||
|  |             DeserializePoolingConfig { model_name, filename: pooling_config_filename, inner }; | ||||||
|  |         Self { | ||||||
|  |             kind: NewEmbedderErrorKind::DeserializePoolingConfig(deserialize_pooling_config), | ||||||
|  |             fault: FaultSource::Runtime, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn open_tokenizer( |     pub fn open_tokenizer( | ||||||
|         tokenizer_filename: PathBuf, |         tokenizer_filename: PathBuf, | ||||||
|         inner: Box<dyn std::error::Error + Send + Sync>, |         inner: Box<dyn std::error::Error + Send + Sync>, | ||||||
| @@ -319,6 +344,13 @@ pub struct OpenConfig { | |||||||
|     pub inner: std::io::Error, |     pub inner: std::io::Error, | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, thiserror::Error)] | ||||||
|  | #[error("could not open pooling config at {filename}: {inner}")] | ||||||
|  | pub struct OpenPoolingConfig { | ||||||
|  |     pub filename: PathBuf, | ||||||
|  |     pub inner: std::io::Error, | ||||||
|  | } | ||||||
|  |  | ||||||
| #[derive(Debug, thiserror::Error)] | #[derive(Debug, thiserror::Error)] | ||||||
| #[error("for model '{model_name}', could not deserialize config at {filename} as JSON: {inner}")] | #[error("for model '{model_name}', could not deserialize config at {filename} as JSON: {inner}")] | ||||||
| pub struct DeserializeConfig { | pub struct DeserializeConfig { | ||||||
| @@ -327,6 +359,14 @@ pub struct DeserializeConfig { | |||||||
|     pub inner: serde_json::Error, |     pub inner: serde_json::Error, | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, thiserror::Error)] | ||||||
|  | #[error("for model '{model_name}', could not deserialize file at `{filename}` as a pooling config: {inner}")] | ||||||
|  | pub struct DeserializePoolingConfig { | ||||||
|  |     pub model_name: String, | ||||||
|  |     pub filename: PathBuf, | ||||||
|  |     pub inner: serde_json::Error, | ||||||
|  | } | ||||||
|  |  | ||||||
| #[derive(Debug, thiserror::Error)] | #[derive(Debug, thiserror::Error)] | ||||||
| #[error("model `{model_name}` appears to be unsupported{}\n  - inner error: {inner}", | #[error("model `{model_name}` appears to be unsupported{}\n  - inner error: {inner}", | ||||||
| if architectures.is_empty() { | if architectures.is_empty() { | ||||||
| @@ -354,8 +394,12 @@ pub enum NewEmbedderErrorKind { | |||||||
|     #[error(transparent)] |     #[error(transparent)] | ||||||
|     OpenConfig(OpenConfig), |     OpenConfig(OpenConfig), | ||||||
|     #[error(transparent)] |     #[error(transparent)] | ||||||
|  |     OpenPoolingConfig(OpenPoolingConfig), | ||||||
|  |     #[error(transparent)] | ||||||
|     DeserializeConfig(DeserializeConfig), |     DeserializeConfig(DeserializeConfig), | ||||||
|     #[error(transparent)] |     #[error(transparent)] | ||||||
|  |     DeserializePoolingConfig(DeserializePoolingConfig), | ||||||
|  |     #[error(transparent)] | ||||||
|     UnsupportedModel(UnsupportedModel), |     UnsupportedModel(UnsupportedModel), | ||||||
|     #[error(transparent)] |     #[error(transparent)] | ||||||
|     OpenTokenizer(OpenTokenizer), |     OpenTokenizer(OpenTokenizer), | ||||||
|   | |||||||
| @@ -34,6 +34,30 @@ pub struct EmbedderOptions { | |||||||
|     pub model: String, |     pub model: String, | ||||||
|     pub revision: Option<String>, |     pub revision: Option<String>, | ||||||
|     pub distribution: Option<DistributionShift>, |     pub distribution: Option<DistributionShift>, | ||||||
|  |     #[serde(default)] | ||||||
|  |     pub pooling: OverridePooling, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive( | ||||||
|  |     Debug, | ||||||
|  |     Clone, | ||||||
|  |     Copy, | ||||||
|  |     Default, | ||||||
|  |     Hash, | ||||||
|  |     PartialEq, | ||||||
|  |     Eq, | ||||||
|  |     serde::Deserialize, | ||||||
|  |     serde::Serialize, | ||||||
|  |     utoipa::ToSchema, | ||||||
|  |     deserr::Deserr, | ||||||
|  | )] | ||||||
|  | #[deserr(rename_all = camelCase, deny_unknown_fields)] | ||||||
|  | #[serde(rename_all = "camelCase")] | ||||||
|  | pub enum OverridePooling { | ||||||
|  |     UseModel, | ||||||
|  |     ForceCls, | ||||||
|  |     #[default] | ||||||
|  |     ForceMean, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl EmbedderOptions { | impl EmbedderOptions { | ||||||
| @@ -42,6 +66,7 @@ impl EmbedderOptions { | |||||||
|             model: "BAAI/bge-base-en-v1.5".to_string(), |             model: "BAAI/bge-base-en-v1.5".to_string(), | ||||||
|             revision: Some("617ca489d9e86b49b8167676d8220688b99db36e".into()), |             revision: Some("617ca489d9e86b49b8167676d8220688b99db36e".into()), | ||||||
|             distribution: None, |             distribution: None, | ||||||
|  |             pooling: OverridePooling::UseModel, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -58,6 +83,7 @@ pub struct Embedder { | |||||||
|     tokenizer: Tokenizer, |     tokenizer: Tokenizer, | ||||||
|     options: EmbedderOptions, |     options: EmbedderOptions, | ||||||
|     dimensions: usize, |     dimensions: usize, | ||||||
|  |     pooling: Pooling, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl std::fmt::Debug for Embedder { | impl std::fmt::Debug for Embedder { | ||||||
| @@ -66,10 +92,62 @@ impl std::fmt::Debug for Embedder { | |||||||
|             .field("model", &self.options.model) |             .field("model", &self.options.model) | ||||||
|             .field("tokenizer", &self.tokenizer) |             .field("tokenizer", &self.tokenizer) | ||||||
|             .field("options", &self.options) |             .field("options", &self.options) | ||||||
|  |             .field("pooling", &self.pooling) | ||||||
|             .finish() |             .finish() | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[derive(Clone, Copy, serde::Deserialize)] | ||||||
|  | struct PoolingConfig { | ||||||
|  |     #[serde(default)] | ||||||
|  |     pub pooling_mode_cls_token: bool, | ||||||
|  |     #[serde(default)] | ||||||
|  |     pub pooling_mode_mean_tokens: bool, | ||||||
|  |     #[serde(default)] | ||||||
|  |     pub pooling_mode_max_tokens: bool, | ||||||
|  |     #[serde(default)] | ||||||
|  |     pub pooling_mode_mean_sqrt_len_tokens: bool, | ||||||
|  |     #[serde(default)] | ||||||
|  |     pub pooling_mode_lasttoken: bool, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, Clone, Copy, Default)] | ||||||
|  | pub enum Pooling { | ||||||
|  |     #[default] | ||||||
|  |     Mean, | ||||||
|  |     Cls, | ||||||
|  |     Max, | ||||||
|  |     MeanSqrtLen, | ||||||
|  |     LastToken, | ||||||
|  | } | ||||||
|  | impl Pooling { | ||||||
|  |     fn override_with(&mut self, pooling: OverridePooling) { | ||||||
|  |         match pooling { | ||||||
|  |             OverridePooling::UseModel => {} | ||||||
|  |             OverridePooling::ForceCls => *self = Pooling::Cls, | ||||||
|  |             OverridePooling::ForceMean => *self = Pooling::Mean, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl From<PoolingConfig> for Pooling { | ||||||
|  |     fn from(value: PoolingConfig) -> Self { | ||||||
|  |         if value.pooling_mode_cls_token { | ||||||
|  |             Self::Cls | ||||||
|  |         } else if value.pooling_mode_mean_tokens { | ||||||
|  |             Self::Mean | ||||||
|  |         } else if value.pooling_mode_lasttoken { | ||||||
|  |             Self::LastToken | ||||||
|  |         } else if value.pooling_mode_mean_sqrt_len_tokens { | ||||||
|  |             Self::MeanSqrtLen | ||||||
|  |         } else if value.pooling_mode_max_tokens { | ||||||
|  |             Self::Max | ||||||
|  |         } else { | ||||||
|  |             Self::default() | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| impl Embedder { | impl Embedder { | ||||||
|     pub fn new(options: EmbedderOptions) -> std::result::Result<Self, NewEmbedderError> { |     pub fn new(options: EmbedderOptions) -> std::result::Result<Self, NewEmbedderError> { | ||||||
|         let device = match candle_core::Device::cuda_if_available(0) { |         let device = match candle_core::Device::cuda_if_available(0) { | ||||||
| @@ -83,7 +161,7 @@ impl Embedder { | |||||||
|             Some(revision) => Repo::with_revision(options.model.clone(), RepoType::Model, revision), |             Some(revision) => Repo::with_revision(options.model.clone(), RepoType::Model, revision), | ||||||
|             None => Repo::model(options.model.clone()), |             None => Repo::model(options.model.clone()), | ||||||
|         }; |         }; | ||||||
|         let (config_filename, tokenizer_filename, weights_filename, weight_source) = { |         let (config_filename, tokenizer_filename, weights_filename, weight_source, pooling) = { | ||||||
|             let api = Api::new().map_err(NewEmbedderError::new_api_fail)?; |             let api = Api::new().map_err(NewEmbedderError::new_api_fail)?; | ||||||
|             let api = api.repo(repo); |             let api = api.repo(repo); | ||||||
|             let config = api.get("config.json").map_err(NewEmbedderError::api_get)?; |             let config = api.get("config.json").map_err(NewEmbedderError::api_get)?; | ||||||
| @@ -97,7 +175,38 @@ impl Embedder { | |||||||
|                     }) |                     }) | ||||||
|                     .map_err(NewEmbedderError::api_get)? |                     .map_err(NewEmbedderError::api_get)? | ||||||
|             }; |             }; | ||||||
|             (config, tokenizer, weights, source) |             let pooling = match api.get("1_Pooling/config.json") { | ||||||
|  |                 Ok(pooling) => Some(pooling), | ||||||
|  |                 Err(hf_hub::api::sync::ApiError::RequestError(error)) | ||||||
|  |                     if matches!(*error, ureq::Error::Status(404, _,)) => | ||||||
|  |                 { | ||||||
|  |                     // ignore the error if the file simply doesn't exist | ||||||
|  |                     None | ||||||
|  |                 } | ||||||
|  |                 Err(error) => return Err(NewEmbedderError::api_get(error)), | ||||||
|  |             }; | ||||||
|  |             let mut pooling: Pooling = match pooling { | ||||||
|  |                 Some(pooling_filename) => { | ||||||
|  |                     let pooling = std::fs::read_to_string(&pooling_filename).map_err(|inner| { | ||||||
|  |                         NewEmbedderError::open_pooling_config(pooling_filename.clone(), inner) | ||||||
|  |                     })?; | ||||||
|  |  | ||||||
|  |                     let pooling: PoolingConfig = | ||||||
|  |                         serde_json::from_str(&pooling).map_err(|inner| { | ||||||
|  |                             NewEmbedderError::deserialize_pooling_config( | ||||||
|  |                                 options.model.clone(), | ||||||
|  |                                 pooling_filename, | ||||||
|  |                                 inner, | ||||||
|  |                             ) | ||||||
|  |                         })?; | ||||||
|  |                     pooling.into() | ||||||
|  |                 } | ||||||
|  |                 None => Pooling::default(), | ||||||
|  |             }; | ||||||
|  |  | ||||||
|  |             pooling.override_with(options.pooling); | ||||||
|  |  | ||||||
|  |             (config, tokenizer, weights, source, pooling) | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         let config = std::fs::read_to_string(&config_filename) |         let config = std::fs::read_to_string(&config_filename) | ||||||
| @@ -122,6 +231,8 @@ impl Embedder { | |||||||
|             }, |             }, | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|  |         tracing::debug!(model = options.model, weight=?weight_source, pooling=?pooling, "model config"); | ||||||
|  |  | ||||||
|         let model = BertModel::load(vb, &config).map_err(NewEmbedderError::load_model)?; |         let model = BertModel::load(vb, &config).map_err(NewEmbedderError::load_model)?; | ||||||
|  |  | ||||||
|         if let Some(pp) = tokenizer.get_padding_mut() { |         if let Some(pp) = tokenizer.get_padding_mut() { | ||||||
| @@ -134,7 +245,7 @@ impl Embedder { | |||||||
|             tokenizer.with_padding(Some(pp)); |             tokenizer.with_padding(Some(pp)); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         let mut this = Self { model, tokenizer, options, dimensions: 0 }; |         let mut this = Self { model, tokenizer, options, dimensions: 0, pooling }; | ||||||
|  |  | ||||||
|         let embeddings = this |         let embeddings = this | ||||||
|             .embed(vec!["test".into()]) |             .embed(vec!["test".into()]) | ||||||
| @@ -168,17 +279,53 @@ impl Embedder { | |||||||
|             .forward(&token_ids, &token_type_ids, None) |             .forward(&token_ids, &token_type_ids, None) | ||||||
|             .map_err(EmbedError::model_forward)?; |             .map_err(EmbedError::model_forward)?; | ||||||
|  |  | ||||||
|         // Apply some avg-pooling by taking the mean embedding value for all tokens (including padding) |         let embeddings = Self::pooling(embeddings, self.pooling)?; | ||||||
|         let (_n_sentence, n_tokens, _hidden_size) = |  | ||||||
|             embeddings.dims3().map_err(EmbedError::tensor_shape)?; |  | ||||||
|  |  | ||||||
|         let embeddings = (embeddings.sum(1).map_err(EmbedError::tensor_value)? / (n_tokens as f64)) |  | ||||||
|             .map_err(EmbedError::tensor_shape)?; |  | ||||||
|  |  | ||||||
|         let embeddings: Vec<Embedding> = embeddings.to_vec2().map_err(EmbedError::tensor_shape)?; |         let embeddings: Vec<Embedding> = embeddings.to_vec2().map_err(EmbedError::tensor_shape)?; | ||||||
|         Ok(embeddings) |         Ok(embeddings) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     fn pooling(embeddings: Tensor, pooling: Pooling) -> Result<Tensor, EmbedError> { | ||||||
|  |         match pooling { | ||||||
|  |             Pooling::Mean => Self::mean_pooling(embeddings), | ||||||
|  |             Pooling::Cls => Self::cls_pooling(embeddings), | ||||||
|  |             Pooling::Max => Self::max_pooling(embeddings), | ||||||
|  |             Pooling::MeanSqrtLen => Self::mean_sqrt_pooling(embeddings), | ||||||
|  |             Pooling::LastToken => Self::last_token_pooling(embeddings), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn cls_pooling(embeddings: Tensor) -> Result<Tensor, EmbedError> { | ||||||
|  |         embeddings.get_on_dim(1, 0).map_err(EmbedError::tensor_value) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn mean_sqrt_pooling(embeddings: Tensor) -> Result<Tensor, EmbedError> { | ||||||
|  |         let (_n_sentence, n_tokens, _hidden_size) = | ||||||
|  |             embeddings.dims3().map_err(EmbedError::tensor_shape)?; | ||||||
|  |  | ||||||
|  |         (embeddings.sum(1).map_err(EmbedError::tensor_value)? / (n_tokens as f64).sqrt()) | ||||||
|  |             .map_err(EmbedError::tensor_shape) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn mean_pooling(embeddings: Tensor) -> Result<Tensor, EmbedError> { | ||||||
|  |         let (_n_sentence, n_tokens, _hidden_size) = | ||||||
|  |             embeddings.dims3().map_err(EmbedError::tensor_shape)?; | ||||||
|  |  | ||||||
|  |         (embeddings.sum(1).map_err(EmbedError::tensor_value)? / (n_tokens as f64)) | ||||||
|  |             .map_err(EmbedError::tensor_shape) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn max_pooling(embeddings: Tensor) -> Result<Tensor, EmbedError> { | ||||||
|  |         embeddings.max(1).map_err(EmbedError::tensor_shape) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn last_token_pooling(embeddings: Tensor) -> Result<Tensor, EmbedError> { | ||||||
|  |         let (_n_sentence, n_tokens, _hidden_size) = | ||||||
|  |             embeddings.dims3().map_err(EmbedError::tensor_shape)?; | ||||||
|  |  | ||||||
|  |         embeddings.get_on_dim(1, n_tokens - 1).map_err(EmbedError::tensor_value) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn embed_one(&self, text: &str) -> std::result::Result<Embedding, EmbedError> { |     pub fn embed_one(&self, text: &str) -> std::result::Result<Embedding, EmbedError> { | ||||||
|         let tokens = self.tokenizer.encode(text, true).map_err(EmbedError::tokenize)?; |         let tokens = self.tokenizer.encode(text, true).map_err(EmbedError::tokenize)?; | ||||||
|         let token_ids = tokens.get_ids(); |         let token_ids = tokens.get_ids(); | ||||||
| @@ -192,11 +339,8 @@ impl Embedder { | |||||||
|             .forward(&token_ids, &token_type_ids, None) |             .forward(&token_ids, &token_type_ids, None) | ||||||
|             .map_err(EmbedError::model_forward)?; |             .map_err(EmbedError::model_forward)?; | ||||||
|  |  | ||||||
|         // Apply some avg-pooling by taking the mean embedding value for all tokens (including padding) |         let embedding = Self::pooling(embeddings, self.pooling)?; | ||||||
|         let (_n_sentence, n_tokens, _hidden_size) = |  | ||||||
|             embeddings.dims3().map_err(EmbedError::tensor_shape)?; |  | ||||||
|         let embedding = (embeddings.sum(1).map_err(EmbedError::tensor_value)? / (n_tokens as f64)) |  | ||||||
|             .map_err(EmbedError::tensor_shape)?; |  | ||||||
|         let embedding = embedding.squeeze(0).map_err(EmbedError::tensor_shape)?; |         let embedding = embedding.squeeze(0).map_err(EmbedError::tensor_shape)?; | ||||||
|         let embedding: Embedding = embedding.to_vec1().map_err(EmbedError::tensor_shape)?; |         let embedding: Embedding = embedding.to_vec1().map_err(EmbedError::tensor_shape)?; | ||||||
|         Ok(embedding) |         Ok(embedding) | ||||||
|   | |||||||
| @@ -6,6 +6,7 @@ use roaring::RoaringBitmap; | |||||||
| use serde::{Deserialize, Serialize}; | use serde::{Deserialize, Serialize}; | ||||||
| use utoipa::ToSchema; | use utoipa::ToSchema; | ||||||
|  |  | ||||||
|  | use super::hf::OverridePooling; | ||||||
| use super::{ollama, openai, DistributionShift}; | use super::{ollama, openai, DistributionShift}; | ||||||
| use crate::prompt::{default_max_bytes, PromptData}; | use crate::prompt::{default_max_bytes, PromptData}; | ||||||
| use crate::update::Setting; | use crate::update::Setting; | ||||||
| @@ -30,6 +31,10 @@ pub struct EmbeddingSettings { | |||||||
|     pub revision: Setting<String>, |     pub revision: Setting<String>, | ||||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] |     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||||
|     #[deserr(default)] |     #[deserr(default)] | ||||||
|  |     #[schema(value_type = Option<OverridePooling>)] | ||||||
|  |     pub pooling: Setting<OverridePooling>, | ||||||
|  |     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||||
|  |     #[deserr(default)] | ||||||
|     #[schema(value_type = Option<String>)] |     #[schema(value_type = Option<String>)] | ||||||
|     pub api_key: Setting<String>, |     pub api_key: Setting<String>, | ||||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] |     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||||
| @@ -164,6 +169,7 @@ impl SettingsDiff { | |||||||
|                     mut source, |                     mut source, | ||||||
|                     mut model, |                     mut model, | ||||||
|                     mut revision, |                     mut revision, | ||||||
|  |                     mut pooling, | ||||||
|                     mut api_key, |                     mut api_key, | ||||||
|                     mut dimensions, |                     mut dimensions, | ||||||
|                     mut document_template, |                     mut document_template, | ||||||
| @@ -180,6 +186,7 @@ impl SettingsDiff { | |||||||
|                     source: new_source, |                     source: new_source, | ||||||
|                     model: new_model, |                     model: new_model, | ||||||
|                     revision: new_revision, |                     revision: new_revision, | ||||||
|  |                     pooling: new_pooling, | ||||||
|                     api_key: new_api_key, |                     api_key: new_api_key, | ||||||
|                     dimensions: new_dimensions, |                     dimensions: new_dimensions, | ||||||
|                     document_template: new_document_template, |                     document_template: new_document_template, | ||||||
| @@ -210,6 +217,7 @@ impl SettingsDiff { | |||||||
|                         &source, |                         &source, | ||||||
|                         &mut model, |                         &mut model, | ||||||
|                         &mut revision, |                         &mut revision, | ||||||
|  |                         &mut pooling, | ||||||
|                         &mut dimensions, |                         &mut dimensions, | ||||||
|                         &mut url, |                         &mut url, | ||||||
|                         &mut request, |                         &mut request, | ||||||
| @@ -225,6 +233,9 @@ impl SettingsDiff { | |||||||
|                 if revision.apply(new_revision) { |                 if revision.apply(new_revision) { | ||||||
|                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); |                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||||
|                 } |                 } | ||||||
|  |                 if pooling.apply(new_pooling) { | ||||||
|  |                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||||
|  |                 } | ||||||
|                 if dimensions.apply(new_dimensions) { |                 if dimensions.apply(new_dimensions) { | ||||||
|                     match source { |                     match source { | ||||||
|                         // regenerate on dimensions change in OpenAI since truncation is supported |                         // regenerate on dimensions change in OpenAI since truncation is supported | ||||||
| @@ -290,6 +301,7 @@ impl SettingsDiff { | |||||||
|                     source, |                     source, | ||||||
|                     model, |                     model, | ||||||
|                     revision, |                     revision, | ||||||
|  |                     pooling, | ||||||
|                     api_key, |                     api_key, | ||||||
|                     dimensions, |                     dimensions, | ||||||
|                     document_template, |                     document_template, | ||||||
| @@ -338,6 +350,7 @@ fn apply_default_for_source( | |||||||
|     source: &Setting<EmbedderSource>, |     source: &Setting<EmbedderSource>, | ||||||
|     model: &mut Setting<String>, |     model: &mut Setting<String>, | ||||||
|     revision: &mut Setting<String>, |     revision: &mut Setting<String>, | ||||||
|  |     pooling: &mut Setting<OverridePooling>, | ||||||
|     dimensions: &mut Setting<usize>, |     dimensions: &mut Setting<usize>, | ||||||
|     url: &mut Setting<String>, |     url: &mut Setting<String>, | ||||||
|     request: &mut Setting<serde_json::Value>, |     request: &mut Setting<serde_json::Value>, | ||||||
| @@ -350,6 +363,7 @@ fn apply_default_for_source( | |||||||
|         Setting::Set(EmbedderSource::HuggingFace) => { |         Setting::Set(EmbedderSource::HuggingFace) => { | ||||||
|             *model = Setting::Reset; |             *model = Setting::Reset; | ||||||
|             *revision = Setting::Reset; |             *revision = Setting::Reset; | ||||||
|  |             *pooling = Setting::Reset; | ||||||
|             *dimensions = Setting::NotSet; |             *dimensions = Setting::NotSet; | ||||||
|             *url = Setting::NotSet; |             *url = Setting::NotSet; | ||||||
|             *request = Setting::NotSet; |             *request = Setting::NotSet; | ||||||
| @@ -359,6 +373,7 @@ fn apply_default_for_source( | |||||||
|         Setting::Set(EmbedderSource::Ollama) => { |         Setting::Set(EmbedderSource::Ollama) => { | ||||||
|             *model = Setting::Reset; |             *model = Setting::Reset; | ||||||
|             *revision = Setting::NotSet; |             *revision = Setting::NotSet; | ||||||
|  |             *pooling = Setting::NotSet; | ||||||
|             *dimensions = Setting::Reset; |             *dimensions = Setting::Reset; | ||||||
|             *url = Setting::NotSet; |             *url = Setting::NotSet; | ||||||
|             *request = Setting::NotSet; |             *request = Setting::NotSet; | ||||||
| @@ -368,6 +383,7 @@ fn apply_default_for_source( | |||||||
|         Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => { |         Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => { | ||||||
|             *model = Setting::Reset; |             *model = Setting::Reset; | ||||||
|             *revision = Setting::NotSet; |             *revision = Setting::NotSet; | ||||||
|  |             *pooling = Setting::NotSet; | ||||||
|             *dimensions = Setting::NotSet; |             *dimensions = Setting::NotSet; | ||||||
|             *url = Setting::Reset; |             *url = Setting::Reset; | ||||||
|             *request = Setting::NotSet; |             *request = Setting::NotSet; | ||||||
| @@ -377,6 +393,7 @@ fn apply_default_for_source( | |||||||
|         Setting::Set(EmbedderSource::Rest) => { |         Setting::Set(EmbedderSource::Rest) => { | ||||||
|             *model = Setting::NotSet; |             *model = Setting::NotSet; | ||||||
|             *revision = Setting::NotSet; |             *revision = Setting::NotSet; | ||||||
|  |             *pooling = Setting::NotSet; | ||||||
|             *dimensions = Setting::Reset; |             *dimensions = Setting::Reset; | ||||||
|             *url = Setting::Reset; |             *url = Setting::Reset; | ||||||
|             *request = Setting::Reset; |             *request = Setting::Reset; | ||||||
| @@ -386,6 +403,7 @@ fn apply_default_for_source( | |||||||
|         Setting::Set(EmbedderSource::UserProvided) => { |         Setting::Set(EmbedderSource::UserProvided) => { | ||||||
|             *model = Setting::NotSet; |             *model = Setting::NotSet; | ||||||
|             *revision = Setting::NotSet; |             *revision = Setting::NotSet; | ||||||
|  |             *pooling = Setting::NotSet; | ||||||
|             *dimensions = Setting::Reset; |             *dimensions = Setting::Reset; | ||||||
|             *url = Setting::NotSet; |             *url = Setting::NotSet; | ||||||
|             *request = Setting::NotSet; |             *request = Setting::NotSet; | ||||||
| @@ -419,6 +437,7 @@ impl EmbeddingSettings { | |||||||
|     pub const SOURCE: &'static str = "source"; |     pub const SOURCE: &'static str = "source"; | ||||||
|     pub const MODEL: &'static str = "model"; |     pub const MODEL: &'static str = "model"; | ||||||
|     pub const REVISION: &'static str = "revision"; |     pub const REVISION: &'static str = "revision"; | ||||||
|  |     pub const POOLING: &'static str = "pooling"; | ||||||
|     pub const API_KEY: &'static str = "apiKey"; |     pub const API_KEY: &'static str = "apiKey"; | ||||||
|     pub const DIMENSIONS: &'static str = "dimensions"; |     pub const DIMENSIONS: &'static str = "dimensions"; | ||||||
|     pub const DOCUMENT_TEMPLATE: &'static str = "documentTemplate"; |     pub const DOCUMENT_TEMPLATE: &'static str = "documentTemplate"; | ||||||
| @@ -446,6 +465,7 @@ impl EmbeddingSettings { | |||||||
|                 &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::Ollama] |                 &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::Ollama] | ||||||
|             } |             } | ||||||
|             Self::REVISION => &[EmbedderSource::HuggingFace], |             Self::REVISION => &[EmbedderSource::HuggingFace], | ||||||
|  |             Self::POOLING => &[EmbedderSource::HuggingFace], | ||||||
|             Self::API_KEY => { |             Self::API_KEY => { | ||||||
|                 &[EmbedderSource::OpenAi, EmbedderSource::Ollama, EmbedderSource::Rest] |                 &[EmbedderSource::OpenAi, EmbedderSource::Ollama, EmbedderSource::Rest] | ||||||
|             } |             } | ||||||
| @@ -500,6 +520,7 @@ impl EmbeddingSettings { | |||||||
|                 Self::SOURCE, |                 Self::SOURCE, | ||||||
|                 Self::MODEL, |                 Self::MODEL, | ||||||
|                 Self::REVISION, |                 Self::REVISION, | ||||||
|  |                 Self::POOLING, | ||||||
|                 Self::DOCUMENT_TEMPLATE, |                 Self::DOCUMENT_TEMPLATE, | ||||||
|                 Self::DOCUMENT_TEMPLATE_MAX_BYTES, |                 Self::DOCUMENT_TEMPLATE_MAX_BYTES, | ||||||
|                 Self::DISTRIBUTION, |                 Self::DISTRIBUTION, | ||||||
| @@ -592,10 +613,12 @@ impl From<EmbeddingConfig> for EmbeddingSettings { | |||||||
|                 model, |                 model, | ||||||
|                 revision, |                 revision, | ||||||
|                 distribution, |                 distribution, | ||||||
|  |                 pooling, | ||||||
|             }) => Self { |             }) => Self { | ||||||
|                 source: Setting::Set(EmbedderSource::HuggingFace), |                 source: Setting::Set(EmbedderSource::HuggingFace), | ||||||
|                 model: Setting::Set(model), |                 model: Setting::Set(model), | ||||||
|                 revision: Setting::some_or_not_set(revision), |                 revision: Setting::some_or_not_set(revision), | ||||||
|  |                 pooling: Setting::Set(pooling), | ||||||
|                 api_key: Setting::NotSet, |                 api_key: Setting::NotSet, | ||||||
|                 dimensions: Setting::NotSet, |                 dimensions: Setting::NotSet, | ||||||
|                 document_template: Setting::Set(prompt.template), |                 document_template: Setting::Set(prompt.template), | ||||||
| @@ -617,6 +640,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings { | |||||||
|                 source: Setting::Set(EmbedderSource::OpenAi), |                 source: Setting::Set(EmbedderSource::OpenAi), | ||||||
|                 model: Setting::Set(embedding_model.name().to_owned()), |                 model: Setting::Set(embedding_model.name().to_owned()), | ||||||
|                 revision: Setting::NotSet, |                 revision: Setting::NotSet, | ||||||
|  |                 pooling: Setting::NotSet, | ||||||
|                 api_key: Setting::some_or_not_set(api_key), |                 api_key: Setting::some_or_not_set(api_key), | ||||||
|                 dimensions: Setting::some_or_not_set(dimensions), |                 dimensions: Setting::some_or_not_set(dimensions), | ||||||
|                 document_template: Setting::Set(prompt.template), |                 document_template: Setting::Set(prompt.template), | ||||||
| @@ -638,6 +662,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings { | |||||||
|                 source: Setting::Set(EmbedderSource::Ollama), |                 source: Setting::Set(EmbedderSource::Ollama), | ||||||
|                 model: Setting::Set(embedding_model), |                 model: Setting::Set(embedding_model), | ||||||
|                 revision: Setting::NotSet, |                 revision: Setting::NotSet, | ||||||
|  |                 pooling: Setting::NotSet, | ||||||
|                 api_key: Setting::some_or_not_set(api_key), |                 api_key: Setting::some_or_not_set(api_key), | ||||||
|                 dimensions: Setting::some_or_not_set(dimensions), |                 dimensions: Setting::some_or_not_set(dimensions), | ||||||
|                 document_template: Setting::Set(prompt.template), |                 document_template: Setting::Set(prompt.template), | ||||||
| @@ -656,6 +681,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings { | |||||||
|                 source: Setting::Set(EmbedderSource::UserProvided), |                 source: Setting::Set(EmbedderSource::UserProvided), | ||||||
|                 model: Setting::NotSet, |                 model: Setting::NotSet, | ||||||
|                 revision: Setting::NotSet, |                 revision: Setting::NotSet, | ||||||
|  |                 pooling: Setting::NotSet, | ||||||
|                 api_key: Setting::NotSet, |                 api_key: Setting::NotSet, | ||||||
|                 dimensions: Setting::Set(dimensions), |                 dimensions: Setting::Set(dimensions), | ||||||
|                 document_template: Setting::NotSet, |                 document_template: Setting::NotSet, | ||||||
| @@ -679,6 +705,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings { | |||||||
|                 source: Setting::Set(EmbedderSource::Rest), |                 source: Setting::Set(EmbedderSource::Rest), | ||||||
|                 model: Setting::NotSet, |                 model: Setting::NotSet, | ||||||
|                 revision: Setting::NotSet, |                 revision: Setting::NotSet, | ||||||
|  |                 pooling: Setting::NotSet, | ||||||
|                 api_key: Setting::some_or_not_set(api_key), |                 api_key: Setting::some_or_not_set(api_key), | ||||||
|                 dimensions: Setting::some_or_not_set(dimensions), |                 dimensions: Setting::some_or_not_set(dimensions), | ||||||
|                 document_template: Setting::Set(prompt.template), |                 document_template: Setting::Set(prompt.template), | ||||||
| @@ -701,6 +728,7 @@ impl From<EmbeddingSettings> for EmbeddingConfig { | |||||||
|             source, |             source, | ||||||
|             model, |             model, | ||||||
|             revision, |             revision, | ||||||
|  |             pooling, | ||||||
|             api_key, |             api_key, | ||||||
|             dimensions, |             dimensions, | ||||||
|             document_template, |             document_template, | ||||||
| @@ -764,6 +792,9 @@ impl From<EmbeddingSettings> for EmbeddingConfig { | |||||||
|                     if let Some(revision) = revision.set() { |                     if let Some(revision) = revision.set() { | ||||||
|                         options.revision = Some(revision); |                         options.revision = Some(revision); | ||||||
|                     } |                     } | ||||||
|  |                     if let Some(pooling) = pooling.set() { | ||||||
|  |                         options.pooling = pooling; | ||||||
|  |                     } | ||||||
|                     options.distribution = distribution.set(); |                     options.distribution = distribution.set(); | ||||||
|                     this.embedder_options = super::EmbedderOptions::HuggingFace(options); |                     this.embedder_options = super::EmbedderOptions::HuggingFace(options); | ||||||
|                 } |                 } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user