mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Merge #5355
	
		
			
	
		
	
	
		
	
		
			Some checks failed
		
		
	
	
		
			
				
	
				Run the indexing fuzzer / Setup the action (push) Successful in 1h5m46s
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for Linux (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Look for flaky tests / flaky (push) Failing after 1s
				
					
					
				
			
		
			
				
	
				Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Check the version validity (push) Failing after 5s
				
					
					
				
			
		
			
				
	
				Test suite / Tests almost all features (push) Failing after 13s
				
					
					
				
			
		
			
				
	
				Test suite / Tests on ubuntu-22.04 (push) Failing after 19s
				
					
					
				
			
		
			
				
	
				Test suite / Test with Ollama (push) Failing after 7s
				
					
					
				
			
		
			
				
	
				Test suite / Test disabled tokenization (push) Failing after 10s
				
					
					
				
			
		
			
				
	
				Test suite / Run tests in debug (push) Failing after 15s
				
					
					
				
			
		
			
				
	
				Test suite / Run Rustfmt (push) Failing after 16s
				
					
					
				
			
		
			
				
	
				Test suite / Run Clippy (push) Successful in 9m39s
				
					
					
				
			
		
			
				
	
				SDKs tests / define-docker-image (push) Failing after 5s
				
					
					
				
			
		
			
				
	
				SDKs tests / .NET SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Dart SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Go SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Java SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / JS SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / PHP SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Python SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Ruby SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Rust SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Swift SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / meilisearch-js-plugins tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / meilisearch-rails tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / meilisearch-symfony tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Test suite / Tests on macos-13 (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Test suite / Tests on windows-2022 (push) Has been cancelled
				
					
					
				
			
		
		
	
	
				
					
				
			
		
			Some checks failed
		
		
	
	Run the indexing fuzzer / Setup the action (push) Successful in 1h5m46s
				Publish binaries to GitHub release / Publish binary for Linux (push) Has been skipped
				Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been skipped
				Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been skipped
				Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been skipped
				Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Has been skipped
				Look for flaky tests / flaky (push) Failing after 1s
				Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
				Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
				Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
				Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
				Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
				Publish binaries to GitHub release / Check the version validity (push) Failing after 5s
				Test suite / Tests almost all features (push) Failing after 13s
				Test suite / Tests on ubuntu-22.04 (push) Failing after 19s
				Test suite / Test with Ollama (push) Failing after 7s
				Test suite / Test disabled tokenization (push) Failing after 10s
				Test suite / Run tests in debug (push) Failing after 15s
				Test suite / Run Rustfmt (push) Failing after 16s
				Test suite / Run Clippy (push) Successful in 9m39s
				SDKs tests / define-docker-image (push) Failing after 5s
				SDKs tests / .NET SDK tests (push) Has been skipped
				SDKs tests / Dart SDK tests (push) Has been skipped
				SDKs tests / Go SDK tests (push) Has been skipped
				SDKs tests / Java SDK tests (push) Has been skipped
				SDKs tests / JS SDK tests (push) Has been skipped
				SDKs tests / PHP SDK tests (push) Has been skipped
				SDKs tests / Python SDK tests (push) Has been skipped
				SDKs tests / Ruby SDK tests (push) Has been skipped
				SDKs tests / Rust SDK tests (push) Has been skipped
				SDKs tests / Swift SDK tests (push) Has been skipped
				SDKs tests / meilisearch-js-plugins tests (push) Has been skipped
				SDKs tests / meilisearch-rails tests (push) Has been skipped
				SDKs tests / meilisearch-symfony tests (push) Has been skipped
				Test suite / Tests on macos-13 (push) Has been cancelled
				Test suite / Tests on windows-2022 (push) Has been cancelled
				5355: Support fetching the pooling method from the model configuration r=Kerollmops a=dureuill # Pull Request ## Related issue Fixes #5354 ## What does this PR do? - Fetches the pooling configuration from the model repository - Use a pooling method that depends on the pooling configuration of that model. - Allow overriding the pooling method with a new huggingFace embedder parameter `pooling` - for backward-compatibility with Meilisearch v1.13 - for compatibility with embedders that exhibit the same behavior as Meilisearch v1.13 - Handle the default value of that new parameter - for compatibility, when importing a db/a dump, it should be set to `forceMean` - when (re)set from the settings for an embedder, it should be set to `useModel` Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
		| @@ -1,5 +1,5 @@ | ||||
| --- | ||||
| source: dump/src/reader/mod.rs | ||||
| source: crates/dump/src/reader/mod.rs | ||||
| expression: vector_index.settings().unwrap() | ||||
| --- | ||||
| { | ||||
| @@ -49,6 +49,7 @@ expression: vector_index.settings().unwrap() | ||||
|       "source": "huggingFace", | ||||
|       "model": "BAAI/bge-base-en-v1.5", | ||||
|       "revision": "617ca489d9e86b49b8167676d8220688b99db36e", | ||||
|       "pooling": "forceMean", | ||||
|       "documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}" | ||||
|     } | ||||
|   }, | ||||
|   | ||||
| @@ -3,6 +3,7 @@ use std::io::{BufRead, BufReader, ErrorKind}; | ||||
| use std::path::Path; | ||||
|  | ||||
| pub use meilisearch_types::milli; | ||||
| use meilisearch_types::milli::vector::hf::OverridePooling; | ||||
| use tempfile::TempDir; | ||||
| use time::OffsetDateTime; | ||||
| use tracing::debug; | ||||
| @@ -252,7 +253,29 @@ impl V6IndexReader { | ||||
|     } | ||||
|  | ||||
|     pub fn settings(&mut self) -> Result<Settings<Checked>> { | ||||
|         let settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?; | ||||
|         let mut settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?; | ||||
|         patch_embedders(&mut settings); | ||||
|         Ok(settings.check()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn patch_embedders(settings: &mut Settings<Unchecked>) { | ||||
|     if let Setting::Set(embedders) = &mut settings.embedders { | ||||
|         for settings in embedders.values_mut() { | ||||
|             let Setting::Set(settings) = &mut settings.inner else { | ||||
|                 continue; | ||||
|             }; | ||||
|             if settings.source != Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace) | ||||
|             { | ||||
|                 continue; | ||||
|             } | ||||
|             settings.pooling = match settings.pooling { | ||||
|                 Setting::Set(pooling) => Setting::Set(pooling), | ||||
|                 // if the pooling for a hugging face embedder is not set, force it to `forceMean` | ||||
|                 // for backward compatibility with v1.13 | ||||
|                 // dumps created in v1.14 and up will have the setting set for hugging face embedders | ||||
|                 Setting::Reset | Setting::NotSet => Setting::Set(OverridePooling::ForceMean), | ||||
|             }; | ||||
|         } | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,12 +1,12 @@ | ||||
| --- | ||||
| source: crates/index-scheduler/src/scheduler/test_embedders.rs | ||||
| expression: simple_hf_config.embedder_options | ||||
| snapshot_kind: text | ||||
| --- | ||||
| { | ||||
|   "HuggingFace": { | ||||
|     "model": "sentence-transformers/all-MiniLM-L6-v2", | ||||
|     "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", | ||||
|     "distribution": null | ||||
|     "distribution": null, | ||||
|     "pooling": "useModel" | ||||
|   } | ||||
| } | ||||
|   | ||||
| @@ -1,13 +1,12 @@ | ||||
| --- | ||||
| source: crates/index-scheduler/src/scheduler/test.rs | ||||
| snapshot_kind: text | ||||
| --- | ||||
| ### Autobatching Enabled = true | ||||
| ### Processing batch None: | ||||
| [] | ||||
| ---------------------------------------------------------------------- | ||||
| ### All Tasks: | ||||
| 0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||
| 0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||
| ---------------------------------------------------------------------- | ||||
| ### Status: | ||||
| enqueued [0,] | ||||
|   | ||||
| @@ -1,13 +1,12 @@ | ||||
| --- | ||||
| source: crates/index-scheduler/src/scheduler/test.rs | ||||
| snapshot_kind: text | ||||
| --- | ||||
| ### Autobatching Enabled = true | ||||
| ### Processing batch None: | ||||
| [] | ||||
| ---------------------------------------------------------------------- | ||||
| ### All Tasks: | ||||
| 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||
| 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||
| ---------------------------------------------------------------------- | ||||
| ### Status: | ||||
| enqueued [] | ||||
|   | ||||
| @@ -1,13 +1,12 @@ | ||||
| --- | ||||
| source: crates/index-scheduler/src/scheduler/test_embedders.rs | ||||
| snapshot_kind: text | ||||
| --- | ||||
| ### Autobatching Enabled = true | ||||
| ### Processing batch None: | ||||
| [] | ||||
| ---------------------------------------------------------------------- | ||||
| ### All Tasks: | ||||
| 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||
| 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||
| 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} | ||||
| 2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} | ||||
| ---------------------------------------------------------------------- | ||||
|   | ||||
| @@ -1,13 +1,12 @@ | ||||
| --- | ||||
| source: crates/index-scheduler/src/scheduler/test_embedders.rs | ||||
| snapshot_kind: text | ||||
| --- | ||||
| ### Autobatching Enabled = true | ||||
| ### Processing batch None: | ||||
| [] | ||||
| ---------------------------------------------------------------------- | ||||
| ### All Tasks: | ||||
| 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||
| 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||
| 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} | ||||
| 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} | ||||
| ---------------------------------------------------------------------- | ||||
|   | ||||
| @@ -1,13 +1,12 @@ | ||||
| --- | ||||
| source: crates/index-scheduler/src/scheduler/test_embedders.rs | ||||
| snapshot_kind: text | ||||
| --- | ||||
| ### Autobatching Enabled = true | ||||
| ### Processing batch None: | ||||
| [] | ||||
| ---------------------------------------------------------------------- | ||||
| ### All Tasks: | ||||
| 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||
| 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||
| 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} | ||||
| ---------------------------------------------------------------------- | ||||
| ### Status: | ||||
|   | ||||
| @@ -1,13 +1,12 @@ | ||||
| --- | ||||
| source: crates/index-scheduler/src/scheduler/test_embedders.rs | ||||
| snapshot_kind: text | ||||
| --- | ||||
| ### Autobatching Enabled = true | ||||
| ### Processing batch None: | ||||
| [] | ||||
| ---------------------------------------------------------------------- | ||||
| ### All Tasks: | ||||
| 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||
| 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||
| 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} | ||||
| ---------------------------------------------------------------------- | ||||
| ### Status: | ||||
|   | ||||
| @@ -1,13 +1,12 @@ | ||||
| --- | ||||
| source: crates/index-scheduler/src/scheduler/test_embedders.rs | ||||
| snapshot_kind: text | ||||
| --- | ||||
| ### Autobatching Enabled = true | ||||
| ### Processing batch None: | ||||
| [] | ||||
| ---------------------------------------------------------------------- | ||||
| ### All Tasks: | ||||
| 0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||
| 0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||
| ---------------------------------------------------------------------- | ||||
| ### Status: | ||||
| enqueued [0,] | ||||
|   | ||||
| @@ -1,13 +1,12 @@ | ||||
| --- | ||||
| source: crates/index-scheduler/src/scheduler/test_embedders.rs | ||||
| snapshot_kind: text | ||||
| --- | ||||
| ### Autobatching Enabled = true | ||||
| ### Processing batch None: | ||||
| [] | ||||
| ---------------------------------------------------------------------- | ||||
| ### All Tasks: | ||||
| 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||
| 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||
| ---------------------------------------------------------------------- | ||||
| ### Status: | ||||
| enqueued [] | ||||
|   | ||||
| @@ -404,31 +404,32 @@ fn import_vectors_first_and_embedder_later() { | ||||
|     // even though we specified the vector for the ID 3, it shouldn't be marked | ||||
|     // as user provided since we explicitely marked it as NOT user provided. | ||||
|     snapshot!(format!("{conf:#?}"), @r###" | ||||
|         [ | ||||
|             IndexEmbeddingConfig { | ||||
|                 name: "my_doggo_embedder", | ||||
|                 config: EmbeddingConfig { | ||||
|                     embedder_options: HuggingFace( | ||||
|                         EmbedderOptions { | ||||
|                             model: "sentence-transformers/all-MiniLM-L6-v2", | ||||
|                             revision: Some( | ||||
|                                 "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", | ||||
|                             ), | ||||
|                             distribution: None, | ||||
|                         }, | ||||
|                     ), | ||||
|                     prompt: PromptData { | ||||
|                         template: "{{doc.doggo}}", | ||||
|                         max_bytes: Some( | ||||
|                             400, | ||||
|     [ | ||||
|         IndexEmbeddingConfig { | ||||
|             name: "my_doggo_embedder", | ||||
|             config: EmbeddingConfig { | ||||
|                 embedder_options: HuggingFace( | ||||
|                     EmbedderOptions { | ||||
|                         model: "sentence-transformers/all-MiniLM-L6-v2", | ||||
|                         revision: Some( | ||||
|                             "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", | ||||
|                         ), | ||||
|                         distribution: None, | ||||
|                         pooling: UseModel, | ||||
|                     }, | ||||
|                     quantized: None, | ||||
|                 ), | ||||
|                 prompt: PromptData { | ||||
|                     template: "{{doc.doggo}}", | ||||
|                     max_bytes: Some( | ||||
|                         400, | ||||
|                     ), | ||||
|                 }, | ||||
|                 user_provided: RoaringBitmap<[1, 2]>, | ||||
|                 quantized: None, | ||||
|             }, | ||||
|         ] | ||||
|         "###); | ||||
|             user_provided: RoaringBitmap<[1, 2]>, | ||||
|         }, | ||||
|     ] | ||||
|     "###); | ||||
|     let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap(); | ||||
|     let embeddings = index.embeddings(&rtxn, docid).unwrap(); | ||||
|     let embedding = &embeddings["my_doggo_embedder"]; | ||||
|   | ||||
| @@ -2414,6 +2414,7 @@ async fn generate_and_import_dump_containing_vectors() { | ||||
|           "source": "huggingFace", | ||||
|           "model": "sentence-transformers/all-MiniLM-L6-v2", | ||||
|           "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", | ||||
|           "pooling": "useModel", | ||||
|           "documentTemplate": "{{doc.doggo}}", | ||||
|           "documentTemplateMaxBytes": 400 | ||||
|         } | ||||
|   | ||||
| @@ -2768,6 +2768,7 @@ mod tests { | ||||
|                         source: Setting::Set(crate::vector::settings::EmbedderSource::UserProvided), | ||||
|                         model: Setting::NotSet, | ||||
|                         revision: Setting::NotSet, | ||||
|                         pooling: Setting::NotSet, | ||||
|                         api_key: Setting::NotSet, | ||||
|                         dimensions: Setting::Set(3), | ||||
|                         document_template: Setting::NotSet, | ||||
|   | ||||
| @@ -1676,6 +1676,7 @@ fn validate_prompt( | ||||
|             source, | ||||
|             model, | ||||
|             revision, | ||||
|             pooling, | ||||
|             api_key, | ||||
|             dimensions, | ||||
|             document_template: Setting::Set(template), | ||||
| @@ -1709,6 +1710,7 @@ fn validate_prompt( | ||||
|                 source, | ||||
|                 model, | ||||
|                 revision, | ||||
|                 pooling, | ||||
|                 api_key, | ||||
|                 dimensions, | ||||
|                 document_template: Setting::Set(template), | ||||
| @@ -1735,6 +1737,7 @@ pub fn validate_embedding_settings( | ||||
|         source, | ||||
|         model, | ||||
|         revision, | ||||
|         pooling, | ||||
|         api_key, | ||||
|         dimensions, | ||||
|         document_template, | ||||
| @@ -1776,6 +1779,7 @@ pub fn validate_embedding_settings( | ||||
|             source, | ||||
|             model, | ||||
|             revision, | ||||
|             pooling, | ||||
|             api_key, | ||||
|             dimensions, | ||||
|             document_template, | ||||
| @@ -1791,6 +1795,7 @@ pub fn validate_embedding_settings( | ||||
|     match inferred_source { | ||||
|         EmbedderSource::OpenAi => { | ||||
|             check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; | ||||
|             check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?; | ||||
|  | ||||
|             check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; | ||||
|             check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; | ||||
| @@ -1829,6 +1834,7 @@ pub fn validate_embedding_settings( | ||||
|         EmbedderSource::Ollama => { | ||||
|             check_set(&model, EmbeddingSettings::MODEL, inferred_source, name)?; | ||||
|             check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; | ||||
|             check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?; | ||||
|  | ||||
|             check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; | ||||
|             check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; | ||||
| @@ -1846,6 +1852,7 @@ pub fn validate_embedding_settings( | ||||
|         EmbedderSource::UserProvided => { | ||||
|             check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?; | ||||
|             check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; | ||||
|             check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?; | ||||
|             check_unset(&api_key, EmbeddingSettings::API_KEY, inferred_source, name)?; | ||||
|             check_unset( | ||||
|                 &document_template, | ||||
| @@ -1869,6 +1876,7 @@ pub fn validate_embedding_settings( | ||||
|         EmbedderSource::Rest => { | ||||
|             check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?; | ||||
|             check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; | ||||
|             check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?; | ||||
|             check_set(&url, EmbeddingSettings::URL, inferred_source, name)?; | ||||
|             check_set(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; | ||||
|             check_set(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; | ||||
| @@ -1878,6 +1886,7 @@ pub fn validate_embedding_settings( | ||||
|         source, | ||||
|         model, | ||||
|         revision, | ||||
|         pooling, | ||||
|         api_key, | ||||
|         dimensions, | ||||
|         document_template, | ||||
|   | ||||
| @@ -262,6 +262,31 @@ impl NewEmbedderError { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn open_pooling_config( | ||||
|         pooling_config_filename: PathBuf, | ||||
|         inner: std::io::Error, | ||||
|     ) -> NewEmbedderError { | ||||
|         let open_config = OpenPoolingConfig { filename: pooling_config_filename, inner }; | ||||
|  | ||||
|         Self { | ||||
|             kind: NewEmbedderErrorKind::OpenPoolingConfig(open_config), | ||||
|             fault: FaultSource::Runtime, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn deserialize_pooling_config( | ||||
|         model_name: String, | ||||
|         pooling_config_filename: PathBuf, | ||||
|         inner: serde_json::Error, | ||||
|     ) -> NewEmbedderError { | ||||
|         let deserialize_pooling_config = | ||||
|             DeserializePoolingConfig { model_name, filename: pooling_config_filename, inner }; | ||||
|         Self { | ||||
|             kind: NewEmbedderErrorKind::DeserializePoolingConfig(deserialize_pooling_config), | ||||
|             fault: FaultSource::Runtime, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn open_tokenizer( | ||||
|         tokenizer_filename: PathBuf, | ||||
|         inner: Box<dyn std::error::Error + Send + Sync>, | ||||
| @@ -319,6 +344,13 @@ pub struct OpenConfig { | ||||
|     pub inner: std::io::Error, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, thiserror::Error)] | ||||
| #[error("could not open pooling config at {filename}: {inner}")] | ||||
| pub struct OpenPoolingConfig { | ||||
|     pub filename: PathBuf, | ||||
|     pub inner: std::io::Error, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, thiserror::Error)] | ||||
| #[error("for model '{model_name}', could not deserialize config at {filename} as JSON: {inner}")] | ||||
| pub struct DeserializeConfig { | ||||
| @@ -327,6 +359,14 @@ pub struct DeserializeConfig { | ||||
|     pub inner: serde_json::Error, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, thiserror::Error)] | ||||
| #[error("for model '{model_name}', could not deserialize file at `{filename}` as a pooling config: {inner}")] | ||||
| pub struct DeserializePoolingConfig { | ||||
|     pub model_name: String, | ||||
|     pub filename: PathBuf, | ||||
|     pub inner: serde_json::Error, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, thiserror::Error)] | ||||
| #[error("model `{model_name}` appears to be unsupported{}\n  - inner error: {inner}", | ||||
| if architectures.is_empty() { | ||||
| @@ -354,8 +394,12 @@ pub enum NewEmbedderErrorKind { | ||||
|     #[error(transparent)] | ||||
|     OpenConfig(OpenConfig), | ||||
|     #[error(transparent)] | ||||
|     OpenPoolingConfig(OpenPoolingConfig), | ||||
|     #[error(transparent)] | ||||
|     DeserializeConfig(DeserializeConfig), | ||||
|     #[error(transparent)] | ||||
|     DeserializePoolingConfig(DeserializePoolingConfig), | ||||
|     #[error(transparent)] | ||||
|     UnsupportedModel(UnsupportedModel), | ||||
|     #[error(transparent)] | ||||
|     OpenTokenizer(OpenTokenizer), | ||||
|   | ||||
| @@ -34,6 +34,30 @@ pub struct EmbedderOptions { | ||||
|     pub model: String, | ||||
|     pub revision: Option<String>, | ||||
|     pub distribution: Option<DistributionShift>, | ||||
|     #[serde(default)] | ||||
|     pub pooling: OverridePooling, | ||||
| } | ||||
|  | ||||
| #[derive( | ||||
|     Debug, | ||||
|     Clone, | ||||
|     Copy, | ||||
|     Default, | ||||
|     Hash, | ||||
|     PartialEq, | ||||
|     Eq, | ||||
|     serde::Deserialize, | ||||
|     serde::Serialize, | ||||
|     utoipa::ToSchema, | ||||
|     deserr::Deserr, | ||||
| )] | ||||
| #[deserr(rename_all = camelCase, deny_unknown_fields)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub enum OverridePooling { | ||||
|     UseModel, | ||||
|     ForceCls, | ||||
|     #[default] | ||||
|     ForceMean, | ||||
| } | ||||
|  | ||||
| impl EmbedderOptions { | ||||
| @@ -42,6 +66,7 @@ impl EmbedderOptions { | ||||
|             model: "BAAI/bge-base-en-v1.5".to_string(), | ||||
|             revision: Some("617ca489d9e86b49b8167676d8220688b99db36e".into()), | ||||
|             distribution: None, | ||||
|             pooling: OverridePooling::UseModel, | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -58,6 +83,7 @@ pub struct Embedder { | ||||
|     tokenizer: Tokenizer, | ||||
|     options: EmbedderOptions, | ||||
|     dimensions: usize, | ||||
|     pooling: Pooling, | ||||
| } | ||||
|  | ||||
| impl std::fmt::Debug for Embedder { | ||||
| @@ -66,10 +92,62 @@ impl std::fmt::Debug for Embedder { | ||||
|             .field("model", &self.options.model) | ||||
|             .field("tokenizer", &self.tokenizer) | ||||
|             .field("options", &self.options) | ||||
|             .field("pooling", &self.pooling) | ||||
|             .finish() | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Copy, serde::Deserialize)] | ||||
| struct PoolingConfig { | ||||
|     #[serde(default)] | ||||
|     pub pooling_mode_cls_token: bool, | ||||
|     #[serde(default)] | ||||
|     pub pooling_mode_mean_tokens: bool, | ||||
|     #[serde(default)] | ||||
|     pub pooling_mode_max_tokens: bool, | ||||
|     #[serde(default)] | ||||
|     pub pooling_mode_mean_sqrt_len_tokens: bool, | ||||
|     #[serde(default)] | ||||
|     pub pooling_mode_lasttoken: bool, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Copy, Default)] | ||||
| pub enum Pooling { | ||||
|     #[default] | ||||
|     Mean, | ||||
|     Cls, | ||||
|     Max, | ||||
|     MeanSqrtLen, | ||||
|     LastToken, | ||||
| } | ||||
| impl Pooling { | ||||
|     fn override_with(&mut self, pooling: OverridePooling) { | ||||
|         match pooling { | ||||
|             OverridePooling::UseModel => {} | ||||
|             OverridePooling::ForceCls => *self = Pooling::Cls, | ||||
|             OverridePooling::ForceMean => *self = Pooling::Mean, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<PoolingConfig> for Pooling { | ||||
|     fn from(value: PoolingConfig) -> Self { | ||||
|         if value.pooling_mode_cls_token { | ||||
|             Self::Cls | ||||
|         } else if value.pooling_mode_mean_tokens { | ||||
|             Self::Mean | ||||
|         } else if value.pooling_mode_lasttoken { | ||||
|             Self::LastToken | ||||
|         } else if value.pooling_mode_mean_sqrt_len_tokens { | ||||
|             Self::MeanSqrtLen | ||||
|         } else if value.pooling_mode_max_tokens { | ||||
|             Self::Max | ||||
|         } else { | ||||
|             Self::default() | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Embedder { | ||||
|     pub fn new(options: EmbedderOptions) -> std::result::Result<Self, NewEmbedderError> { | ||||
|         let device = match candle_core::Device::cuda_if_available(0) { | ||||
| @@ -83,7 +161,7 @@ impl Embedder { | ||||
|             Some(revision) => Repo::with_revision(options.model.clone(), RepoType::Model, revision), | ||||
|             None => Repo::model(options.model.clone()), | ||||
|         }; | ||||
|         let (config_filename, tokenizer_filename, weights_filename, weight_source) = { | ||||
|         let (config_filename, tokenizer_filename, weights_filename, weight_source, pooling) = { | ||||
|             let api = Api::new().map_err(NewEmbedderError::new_api_fail)?; | ||||
|             let api = api.repo(repo); | ||||
|             let config = api.get("config.json").map_err(NewEmbedderError::api_get)?; | ||||
| @@ -97,7 +175,38 @@ impl Embedder { | ||||
|                     }) | ||||
|                     .map_err(NewEmbedderError::api_get)? | ||||
|             }; | ||||
|             (config, tokenizer, weights, source) | ||||
|             let pooling = match api.get("1_Pooling/config.json") { | ||||
|                 Ok(pooling) => Some(pooling), | ||||
|                 Err(hf_hub::api::sync::ApiError::RequestError(error)) | ||||
|                     if matches!(*error, ureq::Error::Status(404, _,)) => | ||||
|                 { | ||||
|                     // ignore the error if the file simply doesn't exist | ||||
|                     None | ||||
|                 } | ||||
|                 Err(error) => return Err(NewEmbedderError::api_get(error)), | ||||
|             }; | ||||
|             let mut pooling: Pooling = match pooling { | ||||
|                 Some(pooling_filename) => { | ||||
|                     let pooling = std::fs::read_to_string(&pooling_filename).map_err(|inner| { | ||||
|                         NewEmbedderError::open_pooling_config(pooling_filename.clone(), inner) | ||||
|                     })?; | ||||
|  | ||||
|                     let pooling: PoolingConfig = | ||||
|                         serde_json::from_str(&pooling).map_err(|inner| { | ||||
|                             NewEmbedderError::deserialize_pooling_config( | ||||
|                                 options.model.clone(), | ||||
|                                 pooling_filename, | ||||
|                                 inner, | ||||
|                             ) | ||||
|                         })?; | ||||
|                     pooling.into() | ||||
|                 } | ||||
|                 None => Pooling::default(), | ||||
|             }; | ||||
|  | ||||
|             pooling.override_with(options.pooling); | ||||
|  | ||||
|             (config, tokenizer, weights, source, pooling) | ||||
|         }; | ||||
|  | ||||
|         let config = std::fs::read_to_string(&config_filename) | ||||
| @@ -122,6 +231,8 @@ impl Embedder { | ||||
|             }, | ||||
|         }; | ||||
|  | ||||
|         tracing::debug!(model = options.model, weight=?weight_source, pooling=?pooling, "model config"); | ||||
|  | ||||
|         let model = BertModel::load(vb, &config).map_err(NewEmbedderError::load_model)?; | ||||
|  | ||||
|         if let Some(pp) = tokenizer.get_padding_mut() { | ||||
| @@ -134,7 +245,7 @@ impl Embedder { | ||||
|             tokenizer.with_padding(Some(pp)); | ||||
|         } | ||||
|  | ||||
|         let mut this = Self { model, tokenizer, options, dimensions: 0 }; | ||||
|         let mut this = Self { model, tokenizer, options, dimensions: 0, pooling }; | ||||
|  | ||||
|         let embeddings = this | ||||
|             .embed(vec!["test".into()]) | ||||
| @@ -168,17 +279,53 @@ impl Embedder { | ||||
|             .forward(&token_ids, &token_type_ids, None) | ||||
|             .map_err(EmbedError::model_forward)?; | ||||
|  | ||||
|         // Apply some avg-pooling by taking the mean embedding value for all tokens (including padding) | ||||
|         let (_n_sentence, n_tokens, _hidden_size) = | ||||
|             embeddings.dims3().map_err(EmbedError::tensor_shape)?; | ||||
|  | ||||
|         let embeddings = (embeddings.sum(1).map_err(EmbedError::tensor_value)? / (n_tokens as f64)) | ||||
|             .map_err(EmbedError::tensor_shape)?; | ||||
|         let embeddings = Self::pooling(embeddings, self.pooling)?; | ||||
|  | ||||
|         let embeddings: Vec<Embedding> = embeddings.to_vec2().map_err(EmbedError::tensor_shape)?; | ||||
|         Ok(embeddings) | ||||
|     } | ||||
|  | ||||
|     fn pooling(embeddings: Tensor, pooling: Pooling) -> Result<Tensor, EmbedError> { | ||||
|         match pooling { | ||||
|             Pooling::Mean => Self::mean_pooling(embeddings), | ||||
|             Pooling::Cls => Self::cls_pooling(embeddings), | ||||
|             Pooling::Max => Self::max_pooling(embeddings), | ||||
|             Pooling::MeanSqrtLen => Self::mean_sqrt_pooling(embeddings), | ||||
|             Pooling::LastToken => Self::last_token_pooling(embeddings), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn cls_pooling(embeddings: Tensor) -> Result<Tensor, EmbedError> { | ||||
|         embeddings.get_on_dim(1, 0).map_err(EmbedError::tensor_value) | ||||
|     } | ||||
|  | ||||
|     fn mean_sqrt_pooling(embeddings: Tensor) -> Result<Tensor, EmbedError> { | ||||
|         let (_n_sentence, n_tokens, _hidden_size) = | ||||
|             embeddings.dims3().map_err(EmbedError::tensor_shape)?; | ||||
|  | ||||
|         (embeddings.sum(1).map_err(EmbedError::tensor_value)? / (n_tokens as f64).sqrt()) | ||||
|             .map_err(EmbedError::tensor_shape) | ||||
|     } | ||||
|  | ||||
|     fn mean_pooling(embeddings: Tensor) -> Result<Tensor, EmbedError> { | ||||
|         let (_n_sentence, n_tokens, _hidden_size) = | ||||
|             embeddings.dims3().map_err(EmbedError::tensor_shape)?; | ||||
|  | ||||
|         (embeddings.sum(1).map_err(EmbedError::tensor_value)? / (n_tokens as f64)) | ||||
|             .map_err(EmbedError::tensor_shape) | ||||
|     } | ||||
|  | ||||
|     fn max_pooling(embeddings: Tensor) -> Result<Tensor, EmbedError> { | ||||
|         embeddings.max(1).map_err(EmbedError::tensor_shape) | ||||
|     } | ||||
|  | ||||
|     fn last_token_pooling(embeddings: Tensor) -> Result<Tensor, EmbedError> { | ||||
|         let (_n_sentence, n_tokens, _hidden_size) = | ||||
|             embeddings.dims3().map_err(EmbedError::tensor_shape)?; | ||||
|  | ||||
|         embeddings.get_on_dim(1, n_tokens - 1).map_err(EmbedError::tensor_value) | ||||
|     } | ||||
|  | ||||
|     pub fn embed_one(&self, text: &str) -> std::result::Result<Embedding, EmbedError> { | ||||
|         let tokens = self.tokenizer.encode(text, true).map_err(EmbedError::tokenize)?; | ||||
|         let token_ids = tokens.get_ids(); | ||||
| @@ -192,11 +339,8 @@ impl Embedder { | ||||
|             .forward(&token_ids, &token_type_ids, None) | ||||
|             .map_err(EmbedError::model_forward)?; | ||||
|  | ||||
|         // Apply some avg-pooling by taking the mean embedding value for all tokens (including padding) | ||||
|         let (_n_sentence, n_tokens, _hidden_size) = | ||||
|             embeddings.dims3().map_err(EmbedError::tensor_shape)?; | ||||
|         let embedding = (embeddings.sum(1).map_err(EmbedError::tensor_value)? / (n_tokens as f64)) | ||||
|             .map_err(EmbedError::tensor_shape)?; | ||||
|         let embedding = Self::pooling(embeddings, self.pooling)?; | ||||
|  | ||||
|         let embedding = embedding.squeeze(0).map_err(EmbedError::tensor_shape)?; | ||||
|         let embedding: Embedding = embedding.to_vec1().map_err(EmbedError::tensor_shape)?; | ||||
|         Ok(embedding) | ||||
|   | ||||
| @@ -6,6 +6,7 @@ use roaring::RoaringBitmap; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use utoipa::ToSchema; | ||||
|  | ||||
| use super::hf::OverridePooling; | ||||
| use super::{ollama, openai, DistributionShift}; | ||||
| use crate::prompt::{default_max_bytes, PromptData}; | ||||
| use crate::update::Setting; | ||||
| @@ -30,6 +31,10 @@ pub struct EmbeddingSettings { | ||||
|     pub revision: Setting<String>, | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
|     #[deserr(default)] | ||||
|     #[schema(value_type = Option<OverridePooling>)] | ||||
|     pub pooling: Setting<OverridePooling>, | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
|     #[deserr(default)] | ||||
|     #[schema(value_type = Option<String>)] | ||||
|     pub api_key: Setting<String>, | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
| @@ -164,6 +169,7 @@ impl SettingsDiff { | ||||
|                     mut source, | ||||
|                     mut model, | ||||
|                     mut revision, | ||||
|                     mut pooling, | ||||
|                     mut api_key, | ||||
|                     mut dimensions, | ||||
|                     mut document_template, | ||||
| @@ -180,6 +186,7 @@ impl SettingsDiff { | ||||
|                     source: new_source, | ||||
|                     model: new_model, | ||||
|                     revision: new_revision, | ||||
|                     pooling: new_pooling, | ||||
|                     api_key: new_api_key, | ||||
|                     dimensions: new_dimensions, | ||||
|                     document_template: new_document_template, | ||||
| @@ -210,6 +217,7 @@ impl SettingsDiff { | ||||
|                         &source, | ||||
|                         &mut model, | ||||
|                         &mut revision, | ||||
|                         &mut pooling, | ||||
|                         &mut dimensions, | ||||
|                         &mut url, | ||||
|                         &mut request, | ||||
| @@ -225,6 +233,9 @@ impl SettingsDiff { | ||||
|                 if revision.apply(new_revision) { | ||||
|                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||
|                 } | ||||
|                 if pooling.apply(new_pooling) { | ||||
|                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||
|                 } | ||||
|                 if dimensions.apply(new_dimensions) { | ||||
|                     match source { | ||||
|                         // regenerate on dimensions change in OpenAI since truncation is supported | ||||
| @@ -290,6 +301,7 @@ impl SettingsDiff { | ||||
|                     source, | ||||
|                     model, | ||||
|                     revision, | ||||
|                     pooling, | ||||
|                     api_key, | ||||
|                     dimensions, | ||||
|                     document_template, | ||||
| @@ -338,6 +350,7 @@ fn apply_default_for_source( | ||||
|     source: &Setting<EmbedderSource>, | ||||
|     model: &mut Setting<String>, | ||||
|     revision: &mut Setting<String>, | ||||
|     pooling: &mut Setting<OverridePooling>, | ||||
|     dimensions: &mut Setting<usize>, | ||||
|     url: &mut Setting<String>, | ||||
|     request: &mut Setting<serde_json::Value>, | ||||
| @@ -350,6 +363,7 @@ fn apply_default_for_source( | ||||
|         Setting::Set(EmbedderSource::HuggingFace) => { | ||||
|             *model = Setting::Reset; | ||||
|             *revision = Setting::Reset; | ||||
|             *pooling = Setting::Reset; | ||||
|             *dimensions = Setting::NotSet; | ||||
|             *url = Setting::NotSet; | ||||
|             *request = Setting::NotSet; | ||||
| @@ -359,6 +373,7 @@ fn apply_default_for_source( | ||||
|         Setting::Set(EmbedderSource::Ollama) => { | ||||
|             *model = Setting::Reset; | ||||
|             *revision = Setting::NotSet; | ||||
|             *pooling = Setting::NotSet; | ||||
|             *dimensions = Setting::Reset; | ||||
|             *url = Setting::NotSet; | ||||
|             *request = Setting::NotSet; | ||||
| @@ -368,6 +383,7 @@ fn apply_default_for_source( | ||||
|         Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => { | ||||
|             *model = Setting::Reset; | ||||
|             *revision = Setting::NotSet; | ||||
|             *pooling = Setting::NotSet; | ||||
|             *dimensions = Setting::NotSet; | ||||
|             *url = Setting::Reset; | ||||
|             *request = Setting::NotSet; | ||||
| @@ -377,6 +393,7 @@ fn apply_default_for_source( | ||||
|         Setting::Set(EmbedderSource::Rest) => { | ||||
|             *model = Setting::NotSet; | ||||
|             *revision = Setting::NotSet; | ||||
|             *pooling = Setting::NotSet; | ||||
|             *dimensions = Setting::Reset; | ||||
|             *url = Setting::Reset; | ||||
|             *request = Setting::Reset; | ||||
| @@ -386,6 +403,7 @@ fn apply_default_for_source( | ||||
|         Setting::Set(EmbedderSource::UserProvided) => { | ||||
|             *model = Setting::NotSet; | ||||
|             *revision = Setting::NotSet; | ||||
|             *pooling = Setting::NotSet; | ||||
|             *dimensions = Setting::Reset; | ||||
|             *url = Setting::NotSet; | ||||
|             *request = Setting::NotSet; | ||||
| @@ -419,6 +437,7 @@ impl EmbeddingSettings { | ||||
|     pub const SOURCE: &'static str = "source"; | ||||
|     pub const MODEL: &'static str = "model"; | ||||
|     pub const REVISION: &'static str = "revision"; | ||||
|     pub const POOLING: &'static str = "pooling"; | ||||
|     pub const API_KEY: &'static str = "apiKey"; | ||||
|     pub const DIMENSIONS: &'static str = "dimensions"; | ||||
|     pub const DOCUMENT_TEMPLATE: &'static str = "documentTemplate"; | ||||
| @@ -446,6 +465,7 @@ impl EmbeddingSettings { | ||||
|                 &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::Ollama] | ||||
|             } | ||||
|             Self::REVISION => &[EmbedderSource::HuggingFace], | ||||
|             Self::POOLING => &[EmbedderSource::HuggingFace], | ||||
|             Self::API_KEY => { | ||||
|                 &[EmbedderSource::OpenAi, EmbedderSource::Ollama, EmbedderSource::Rest] | ||||
|             } | ||||
| @@ -500,6 +520,7 @@ impl EmbeddingSettings { | ||||
|                 Self::SOURCE, | ||||
|                 Self::MODEL, | ||||
|                 Self::REVISION, | ||||
|                 Self::POOLING, | ||||
|                 Self::DOCUMENT_TEMPLATE, | ||||
|                 Self::DOCUMENT_TEMPLATE_MAX_BYTES, | ||||
|                 Self::DISTRIBUTION, | ||||
| @@ -592,10 +613,12 @@ impl From<EmbeddingConfig> for EmbeddingSettings { | ||||
|                 model, | ||||
|                 revision, | ||||
|                 distribution, | ||||
|                 pooling, | ||||
|             }) => Self { | ||||
|                 source: Setting::Set(EmbedderSource::HuggingFace), | ||||
|                 model: Setting::Set(model), | ||||
|                 revision: Setting::some_or_not_set(revision), | ||||
|                 pooling: Setting::Set(pooling), | ||||
|                 api_key: Setting::NotSet, | ||||
|                 dimensions: Setting::NotSet, | ||||
|                 document_template: Setting::Set(prompt.template), | ||||
| @@ -617,6 +640,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings { | ||||
|                 source: Setting::Set(EmbedderSource::OpenAi), | ||||
|                 model: Setting::Set(embedding_model.name().to_owned()), | ||||
|                 revision: Setting::NotSet, | ||||
|                 pooling: Setting::NotSet, | ||||
|                 api_key: Setting::some_or_not_set(api_key), | ||||
|                 dimensions: Setting::some_or_not_set(dimensions), | ||||
|                 document_template: Setting::Set(prompt.template), | ||||
| @@ -638,6 +662,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings { | ||||
|                 source: Setting::Set(EmbedderSource::Ollama), | ||||
|                 model: Setting::Set(embedding_model), | ||||
|                 revision: Setting::NotSet, | ||||
|                 pooling: Setting::NotSet, | ||||
|                 api_key: Setting::some_or_not_set(api_key), | ||||
|                 dimensions: Setting::some_or_not_set(dimensions), | ||||
|                 document_template: Setting::Set(prompt.template), | ||||
| @@ -656,6 +681,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings { | ||||
|                 source: Setting::Set(EmbedderSource::UserProvided), | ||||
|                 model: Setting::NotSet, | ||||
|                 revision: Setting::NotSet, | ||||
|                 pooling: Setting::NotSet, | ||||
|                 api_key: Setting::NotSet, | ||||
|                 dimensions: Setting::Set(dimensions), | ||||
|                 document_template: Setting::NotSet, | ||||
| @@ -679,6 +705,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings { | ||||
|                 source: Setting::Set(EmbedderSource::Rest), | ||||
|                 model: Setting::NotSet, | ||||
|                 revision: Setting::NotSet, | ||||
|                 pooling: Setting::NotSet, | ||||
|                 api_key: Setting::some_or_not_set(api_key), | ||||
|                 dimensions: Setting::some_or_not_set(dimensions), | ||||
|                 document_template: Setting::Set(prompt.template), | ||||
| @@ -701,6 +728,7 @@ impl From<EmbeddingSettings> for EmbeddingConfig { | ||||
|             source, | ||||
|             model, | ||||
|             revision, | ||||
|             pooling, | ||||
|             api_key, | ||||
|             dimensions, | ||||
|             document_template, | ||||
| @@ -764,6 +792,9 @@ impl From<EmbeddingSettings> for EmbeddingConfig { | ||||
|                     if let Some(revision) = revision.set() { | ||||
|                         options.revision = Some(revision); | ||||
|                     } | ||||
|                     if let Some(pooling) = pooling.set() { | ||||
|                         options.pooling = pooling; | ||||
|                     } | ||||
|                     options.distribution = distribution.set(); | ||||
|                     this.embedder_options = super::EmbedderOptions::HuggingFace(options); | ||||
|                 } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user