mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 04:56:28 +00:00 
			
		
		
		
	Merge #4892
4892: Add a documentTemplateMaxBytes parameter to limit the max length of document templates r=ManyTheFish a=dureuill # Pull Request ## Related issue Fixes #4885 See [public usage](https://meilisearch.notion.site/v1-11-AI-search-changes-0e37727193884a70999f254fa953ce6e#a3d63628129e40adba943ae7b8ec06c2) Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
		| @@ -5403,6 +5403,9 @@ mod tests { | |||||||
|                     ), |                     ), | ||||||
|                     prompt: PromptData { |                     prompt: PromptData { | ||||||
|                         template: "{{doc.doggo}}", |                         template: "{{doc.doggo}}", | ||||||
|  |                         max_bytes: Some( | ||||||
|  |                             400, | ||||||
|  |                         ), | ||||||
|                     }, |                     }, | ||||||
|                 }, |                 }, | ||||||
|                 user_provided: RoaringBitmap<[1, 2]>, |                 user_provided: RoaringBitmap<[1, 2]>, | ||||||
| @@ -5618,6 +5621,9 @@ mod tests { | |||||||
|                     ), |                     ), | ||||||
|                     prompt: PromptData { |                     prompt: PromptData { | ||||||
|                         template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", |                         template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", | ||||||
|  |                         max_bytes: Some( | ||||||
|  |                             400, | ||||||
|  |                         ), | ||||||
|                     }, |                     }, | ||||||
|                 }, |                 }, | ||||||
|                 user_provided: RoaringBitmap<[0]>, |                 user_provided: RoaringBitmap<[0]>, | ||||||
| @@ -5658,6 +5664,9 @@ mod tests { | |||||||
|                     ), |                     ), | ||||||
|                     prompt: PromptData { |                     prompt: PromptData { | ||||||
|                         template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", |                         template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", | ||||||
|  |                         max_bytes: Some( | ||||||
|  |                             400, | ||||||
|  |                         ), | ||||||
|                     }, |                     }, | ||||||
|                 }, |                 }, | ||||||
|                 user_provided: RoaringBitmap<[]>, |                 user_provided: RoaringBitmap<[]>, | ||||||
|   | |||||||
| @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs | |||||||
| [] | [] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### All Tasks: | ### All Tasks: | ||||||
| 0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | 0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||||
| 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} | 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} | ||||||
| 2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} | 2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
|   | |||||||
| @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs | |||||||
| [] | [] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### All Tasks: | ### All Tasks: | ||||||
| 0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | 0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||||
| 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} | 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} | ||||||
| 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} | 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
|   | |||||||
| @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs | |||||||
| [] | [] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### All Tasks: | ### All Tasks: | ||||||
| 0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | 0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||||
| 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} | 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### Status: | ### Status: | ||||||
|   | |||||||
| @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs | |||||||
| [] | [] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### All Tasks: | ### All Tasks: | ||||||
| 0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | 0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||||
| 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} | 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### Status: | ### Status: | ||||||
|   | |||||||
| @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs | |||||||
| [] | [] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### All Tasks: | ### All Tasks: | ||||||
| 0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | 0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### Status: | ### Status: | ||||||
| enqueued [0,] | enqueued [0,] | ||||||
|   | |||||||
| @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs | |||||||
| [] | [] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### All Tasks: | ### All Tasks: | ||||||
| 0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | 0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### Status: | ### Status: | ||||||
| enqueued [] | enqueued [] | ||||||
|   | |||||||
| @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs | |||||||
| [] | [] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### All Tasks: | ### All Tasks: | ||||||
| 0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | 0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### Status: | ### Status: | ||||||
| enqueued [0,] | enqueued [0,] | ||||||
|   | |||||||
| @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs | |||||||
| [] | [] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### All Tasks: | ### All Tasks: | ||||||
| 0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | 0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### Status: | ### Status: | ||||||
| enqueued [] | enqueued [] | ||||||
|   | |||||||
| @@ -388,6 +388,7 @@ impl ErrorCode for milli::Error { | |||||||
|                     | UserError::InvalidOpenAiModelDimensionsMax { .. } |                     | UserError::InvalidOpenAiModelDimensionsMax { .. } | ||||||
|                     | UserError::InvalidSettingsDimensions { .. } |                     | UserError::InvalidSettingsDimensions { .. } | ||||||
|                     | UserError::InvalidUrl { .. } |                     | UserError::InvalidUrl { .. } | ||||||
|  |                     | UserError::InvalidSettingsDocumentTemplateMaxBytes { .. } | ||||||
|                     | UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders, |                     | UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders, | ||||||
|                     UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders, |                     UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders, | ||||||
|                     UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders, |                     UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders, | ||||||
|   | |||||||
| @@ -636,11 +636,19 @@ fn embedder_analytics( | |||||||
|             .any(|config| config.document_template.set().is_some()) |             .any(|config| config.document_template.set().is_some()) | ||||||
|     }); |     }); | ||||||
|  |  | ||||||
|  |     let document_template_max_bytes = setting.as_ref().and_then(|map| { | ||||||
|  |         map.values() | ||||||
|  |             .filter_map(|config| config.clone().set()) | ||||||
|  |             .filter_map(|config| config.document_template_max_bytes.set()) | ||||||
|  |             .max() | ||||||
|  |     }); | ||||||
|  |  | ||||||
|     json!( |     json!( | ||||||
|         { |         { | ||||||
|             "total": setting.as_ref().map(|s| s.len()), |             "total": setting.as_ref().map(|s| s.len()), | ||||||
|             "sources": sources, |             "sources": sources, | ||||||
|             "document_template_used": document_template_used, |             "document_template_used": document_template_used, | ||||||
|  |             "document_template_max_bytes": document_template_max_bytes | ||||||
|         } |         } | ||||||
|     ) |     ) | ||||||
| } | } | ||||||
|   | |||||||
| @@ -2097,7 +2097,8 @@ async fn generate_and_import_dump_containing_vectors() { | |||||||
|           "source": "huggingFace", |           "source": "huggingFace", | ||||||
|           "model": "sentence-transformers/all-MiniLM-L6-v2", |           "model": "sentence-transformers/all-MiniLM-L6-v2", | ||||||
|           "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", |           "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", | ||||||
|           "documentTemplate": "{{doc.doggo}}" |           "documentTemplate": "{{doc.doggo}}", | ||||||
|  |           "documentTemplateMaxBytes": 400 | ||||||
|         } |         } | ||||||
|       }, |       }, | ||||||
|       "searchCutoffMs": null, |       "searchCutoffMs": null, | ||||||
|   | |||||||
| @@ -191,6 +191,7 @@ async fn secrets_are_hidden_in_settings() { | |||||||
|           "apiKey": "My suXXXXXX...", |           "apiKey": "My suXXXXXX...", | ||||||
|           "dimensions": 4, |           "dimensions": 4, | ||||||
|           "documentTemplate": "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", |           "documentTemplate": "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", | ||||||
|  |           "documentTemplateMaxBytes": 400, | ||||||
|           "url": "https://localhost:7777", |           "url": "https://localhost:7777", | ||||||
|           "request": "{{text}}", |           "request": "{{text}}", | ||||||
|           "response": "{{embedding}}", |           "response": "{{embedding}}", | ||||||
|   | |||||||
| @@ -302,7 +302,8 @@ async fn create_mock_with_template( | |||||||
|         "source": "openAi", |         "source": "openAi", | ||||||
|         "url": url, |         "url": url, | ||||||
|         "apiKey": API_KEY, |         "apiKey": API_KEY, | ||||||
|         "documentTemplate": document_template |         "documentTemplate": document_template, | ||||||
|  |         "documentTemplateMaxBytes": 8000000, | ||||||
|     }); |     }); | ||||||
|  |  | ||||||
|     model_dimensions.add_to_settings(&mut embedder_settings); |     model_dimensions.add_to_settings(&mut embedder_settings); | ||||||
| @@ -693,6 +694,7 @@ async fn bad_api_key() { | |||||||
|             "model": "text-embedding-3-large", |             "model": "text-embedding-3-large", | ||||||
|             "apiKey": "XXX...", |             "apiKey": "XXX...", | ||||||
|             "documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n        {%- else -%}\n        Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n        {%- endif %}, de race {{doc.breed}}.", |             "documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n        {%- else -%}\n        Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n        {%- endif %}, de race {{doc.breed}}.", | ||||||
|  |             "documentTemplateMaxBytes": 8000000, | ||||||
|             "url": "[url]" |             "url": "[url]" | ||||||
|           } |           } | ||||||
|         } |         } | ||||||
| @@ -735,6 +737,7 @@ async fn bad_api_key() { | |||||||
|             "source": "openAi", |             "source": "openAi", | ||||||
|             "model": "text-embedding-3-large", |             "model": "text-embedding-3-large", | ||||||
|             "documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n        {%- else -%}\n        Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n        {%- endif %}, de race {{doc.breed}}.", |             "documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n        {%- else -%}\n        Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n        {%- endif %}, de race {{doc.breed}}.", | ||||||
|  |             "documentTemplateMaxBytes": 8000000, | ||||||
|             "url": "[url]" |             "url": "[url]" | ||||||
|           } |           } | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -258,6 +258,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco | |||||||
|     }, |     }, | ||||||
|     #[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")] |     #[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")] | ||||||
|     InvalidSettingsDimensions { embedder_name: String }, |     InvalidSettingsDimensions { embedder_name: String }, | ||||||
|  |     #[error("`.embedders.{embedder_name}.documentTemplateMaxBytes`: `documentTemplateMaxBytes` cannot be zero")] | ||||||
|  |     InvalidSettingsDocumentTemplateMaxBytes { embedder_name: String }, | ||||||
|     #[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")] |     #[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")] | ||||||
|     InvalidUrl { embedder_name: String, inner_error: url::ParseError, url: String }, |     InvalidUrl { embedder_name: String, inner_error: url::ParseError, url: String }, | ||||||
|     #[error("Document editions cannot modify a document's primary key")] |     #[error("Document editions cannot modify a document's primary key")] | ||||||
|   | |||||||
| @@ -6,6 +6,7 @@ mod template_checker; | |||||||
|  |  | ||||||
| use std::collections::BTreeMap; | use std::collections::BTreeMap; | ||||||
| use std::convert::TryFrom; | use std::convert::TryFrom; | ||||||
|  | use std::num::NonZeroUsize; | ||||||
| use std::ops::Deref; | use std::ops::Deref; | ||||||
|  |  | ||||||
| use error::{NewPromptError, RenderPromptError}; | use error::{NewPromptError, RenderPromptError}; | ||||||
| @@ -18,16 +19,18 @@ use crate::{FieldId, FieldsIdsMap}; | |||||||
| pub struct Prompt { | pub struct Prompt { | ||||||
|     template: liquid::Template, |     template: liquid::Template, | ||||||
|     template_text: String, |     template_text: String, | ||||||
|  |     max_bytes: Option<NonZeroUsize>, | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] | #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] | ||||||
| pub struct PromptData { | pub struct PromptData { | ||||||
|     pub template: String, |     pub template: String, | ||||||
|  |     pub max_bytes: Option<NonZeroUsize>, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl From<Prompt> for PromptData { | impl From<Prompt> for PromptData { | ||||||
|     fn from(value: Prompt) -> Self { |     fn from(value: Prompt) -> Self { | ||||||
|         Self { template: value.template_text } |         Self { template: value.template_text, max_bytes: value.max_bytes } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -35,14 +38,18 @@ impl TryFrom<PromptData> for Prompt { | |||||||
|     type Error = NewPromptError; |     type Error = NewPromptError; | ||||||
|  |  | ||||||
|     fn try_from(value: PromptData) -> Result<Self, Self::Error> { |     fn try_from(value: PromptData) -> Result<Self, Self::Error> { | ||||||
|         Prompt::new(value.template) |         Prompt::new(value.template, value.max_bytes) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl Clone for Prompt { | impl Clone for Prompt { | ||||||
|     fn clone(&self) -> Self { |     fn clone(&self) -> Self { | ||||||
|         let template_text = self.template_text.clone(); |         let template_text = self.template_text.clone(); | ||||||
|         Self { template: new_template(&template_text).unwrap(), template_text } |         Self { | ||||||
|  |             template: new_template(&template_text).unwrap(), | ||||||
|  |             template_text, | ||||||
|  |             max_bytes: self.max_bytes, | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -62,20 +69,28 @@ fn default_template_text() -> &'static str { | |||||||
|     {% endfor %}" |     {% endfor %}" | ||||||
| } | } | ||||||
|  |  | ||||||
|  | pub fn default_max_bytes() -> NonZeroUsize { | ||||||
|  |     NonZeroUsize::new(400).unwrap() | ||||||
|  | } | ||||||
|  |  | ||||||
| impl Default for Prompt { | impl Default for Prompt { | ||||||
|     fn default() -> Self { |     fn default() -> Self { | ||||||
|         Self { template: default_template(), template_text: default_template_text().into() } |         Self { | ||||||
|  |             template: default_template(), | ||||||
|  |             template_text: default_template_text().into(), | ||||||
|  |             max_bytes: Some(default_max_bytes()), | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl Default for PromptData { | impl Default for PromptData { | ||||||
|     fn default() -> Self { |     fn default() -> Self { | ||||||
|         Self { template: default_template_text().into() } |         Self { template: default_template_text().into(), max_bytes: Some(default_max_bytes()) } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl Prompt { | impl Prompt { | ||||||
|     pub fn new(template: String) -> Result<Self, NewPromptError> { |     pub fn new(template: String, max_bytes: Option<NonZeroUsize>) -> Result<Self, NewPromptError> { | ||||||
|         let this = Self { |         let this = Self { | ||||||
|             template: liquid::ParserBuilder::with_stdlib() |             template: liquid::ParserBuilder::with_stdlib() | ||||||
|                 .build() |                 .build() | ||||||
| @@ -83,6 +98,7 @@ impl Prompt { | |||||||
|                 .parse(&template) |                 .parse(&template) | ||||||
|                 .map_err(NewPromptError::cannot_parse_template)?, |                 .map_err(NewPromptError::cannot_parse_template)?, | ||||||
|             template_text: template, |             template_text: template, | ||||||
|  |             max_bytes, | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         // render template with special object that's OK with `doc.*` and `fields.*` |         // render template with special object that's OK with `doc.*` and `fields.*` | ||||||
| @@ -102,7 +118,24 @@ impl Prompt { | |||||||
|         let document = Document::new(document, side, field_id_map); |         let document = Document::new(document, side, field_id_map); | ||||||
|         let context = Context::new(&document, field_id_map); |         let context = Context::new(&document, field_id_map); | ||||||
|  |  | ||||||
|         self.template.render(&context).map_err(RenderPromptError::missing_context) |         let mut rendered = | ||||||
|  |             self.template.render(&context).map_err(RenderPromptError::missing_context)?; | ||||||
|  |         if let Some(max_bytes) = self.max_bytes { | ||||||
|  |             truncate(&mut rendered, max_bytes.get()); | ||||||
|  |         } | ||||||
|  |         Ok(rendered) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | fn truncate(s: &mut String, max_bytes: usize) { | ||||||
|  |     if max_bytes >= s.len() { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     for i in (0..=max_bytes).rev() { | ||||||
|  |         if s.is_char_boundary(i) { | ||||||
|  |             s.truncate(i); | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -145,6 +178,7 @@ mod test { | |||||||
|     use super::Prompt; |     use super::Prompt; | ||||||
|     use crate::error::FaultSource; |     use crate::error::FaultSource; | ||||||
|     use crate::prompt::error::{NewPromptError, NewPromptErrorKind}; |     use crate::prompt::error::{NewPromptError, NewPromptErrorKind}; | ||||||
|  |     use crate::prompt::truncate; | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
|     fn default_template() { |     fn default_template() { | ||||||
| @@ -154,18 +188,18 @@ mod test { | |||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
|     fn empty_template() { |     fn empty_template() { | ||||||
|         Prompt::new("".into()).unwrap(); |         Prompt::new("".into(), None).unwrap(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
|     fn template_ok() { |     fn template_ok() { | ||||||
|         Prompt::new("{{doc.title}}: {{doc.overview}}".into()).unwrap(); |         Prompt::new("{{doc.title}}: {{doc.overview}}".into(), None).unwrap(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
|     fn template_syntax() { |     fn template_syntax() { | ||||||
|         assert!(matches!( |         assert!(matches!( | ||||||
|             Prompt::new("{{doc.title: {{doc.overview}}".into()), |             Prompt::new("{{doc.title: {{doc.overview}}".into(), None), | ||||||
|             Err(NewPromptError { |             Err(NewPromptError { | ||||||
|                 kind: NewPromptErrorKind::CannotParseTemplate(_), |                 kind: NewPromptErrorKind::CannotParseTemplate(_), | ||||||
|                 fault: FaultSource::User |                 fault: FaultSource::User | ||||||
| @@ -176,7 +210,7 @@ mod test { | |||||||
|     #[test] |     #[test] | ||||||
|     fn template_missing_doc() { |     fn template_missing_doc() { | ||||||
|         assert!(matches!( |         assert!(matches!( | ||||||
|             Prompt::new("{{title}}: {{overview}}".into()), |             Prompt::new("{{title}}: {{overview}}".into(), None), | ||||||
|             Err(NewPromptError { |             Err(NewPromptError { | ||||||
|                 kind: NewPromptErrorKind::InvalidFieldsInTemplate(_), |                 kind: NewPromptErrorKind::InvalidFieldsInTemplate(_), | ||||||
|                 fault: FaultSource::User |                 fault: FaultSource::User | ||||||
| @@ -186,17 +220,20 @@ mod test { | |||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
|     fn template_nested_doc() { |     fn template_nested_doc() { | ||||||
|         Prompt::new("{{doc.actor.firstName}}: {{doc.actor.lastName}}".into()).unwrap(); |         Prompt::new("{{doc.actor.firstName}}: {{doc.actor.lastName}}".into(), None).unwrap(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
|     fn template_fields() { |     fn template_fields() { | ||||||
|         Prompt::new("{% for field in fields %}{{field}}{% endfor %}".into()).unwrap(); |         Prompt::new("{% for field in fields %}{{field}}{% endfor %}".into(), None).unwrap(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
|     fn template_fields_ok() { |     fn template_fields_ok() { | ||||||
|         Prompt::new("{% for field in fields %}{{field.name}}: {{field.value}}{% endfor %}".into()) |         Prompt::new( | ||||||
|  |             "{% for field in fields %}{{field.name}}: {{field.value}}{% endfor %}".into(), | ||||||
|  |             None, | ||||||
|  |         ) | ||||||
|         .unwrap(); |         .unwrap(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -204,11 +241,41 @@ mod test { | |||||||
|     fn template_fields_invalid() { |     fn template_fields_invalid() { | ||||||
|         assert!(matches!( |         assert!(matches!( | ||||||
|             // intentionally garbled field |             // intentionally garbled field | ||||||
|             Prompt::new("{% for field in fields %}{{field.vaelu}} {% endfor %}".into()), |             Prompt::new("{% for field in fields %}{{field.vaelu}} {% endfor %}".into(), None), | ||||||
|             Err(NewPromptError { |             Err(NewPromptError { | ||||||
|                 kind: NewPromptErrorKind::InvalidFieldsInTemplate(_), |                 kind: NewPromptErrorKind::InvalidFieldsInTemplate(_), | ||||||
|                 fault: FaultSource::User |                 fault: FaultSource::User | ||||||
|             }) |             }) | ||||||
|         )); |         )); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     // todo: test truncation | ||||||
|  |     #[test] | ||||||
|  |     fn template_truncation() { | ||||||
|  |         let mut s = "インテル ザー ビーグル".to_string(); | ||||||
|  |  | ||||||
|  |         truncate(&mut s, 42); | ||||||
|  |         assert_eq!(s, "インテル ザー ビーグル"); | ||||||
|  |  | ||||||
|  |         assert_eq!(s.len(), 32); | ||||||
|  |         truncate(&mut s, 32); | ||||||
|  |         assert_eq!(s, "インテル ザー ビーグル"); | ||||||
|  |  | ||||||
|  |         truncate(&mut s, 31); | ||||||
|  |         assert_eq!(s, "インテル ザー ビーグ"); | ||||||
|  |         truncate(&mut s, 30); | ||||||
|  |         assert_eq!(s, "インテル ザー ビーグ"); | ||||||
|  |         truncate(&mut s, 28); | ||||||
|  |         assert_eq!(s, "インテル ザー ビー"); | ||||||
|  |         truncate(&mut s, 26); | ||||||
|  |         assert_eq!(s, "インテル ザー ビー"); | ||||||
|  |         truncate(&mut s, 25); | ||||||
|  |         assert_eq!(s, "インテル ザー ビ"); | ||||||
|  |  | ||||||
|  |         assert_eq!("イ".len(), 3); | ||||||
|  |         truncate(&mut s, 3); | ||||||
|  |         assert_eq!(s, "イ"); | ||||||
|  |         truncate(&mut s, 2); | ||||||
|  |         assert_eq!(s, ""); | ||||||
|  |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -2740,6 +2740,7 @@ mod tests { | |||||||
|                         api_key: Setting::NotSet, |                         api_key: Setting::NotSet, | ||||||
|                         dimensions: Setting::Set(3), |                         dimensions: Setting::Set(3), | ||||||
|                         document_template: Setting::NotSet, |                         document_template: Setting::NotSet, | ||||||
|  |                         document_template_max_bytes: Setting::NotSet, | ||||||
|                         url: Setting::NotSet, |                         url: Setting::NotSet, | ||||||
|                         request: Setting::NotSet, |                         request: Setting::NotSet, | ||||||
|                         response: Setting::NotSet, |                         response: Setting::NotSet, | ||||||
|   | |||||||
| @@ -1,5 +1,6 @@ | |||||||
| use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; | use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; | ||||||
| use std::convert::TryInto; | use std::convert::TryInto; | ||||||
|  | use std::num::NonZeroUsize; | ||||||
| use std::result::Result as StdResult; | use std::result::Result as StdResult; | ||||||
| use std::sync::Arc; | use std::sync::Arc; | ||||||
|  |  | ||||||
| @@ -19,6 +20,7 @@ use crate::index::{ | |||||||
|     IndexEmbeddingConfig, DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS, |     IndexEmbeddingConfig, DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS, | ||||||
| }; | }; | ||||||
| use crate::order_by_map::OrderByMap; | use crate::order_by_map::OrderByMap; | ||||||
|  | use crate::prompt::default_max_bytes; | ||||||
| use crate::proximity::ProximityPrecision; | use crate::proximity::ProximityPrecision; | ||||||
| use crate::update::index_documents::IndexDocumentsMethod; | use crate::update::index_documents::IndexDocumentsMethod; | ||||||
| use crate::update::{IndexDocuments, UpdateIndexingStep}; | use crate::update::{IndexDocuments, UpdateIndexingStep}; | ||||||
| @@ -1573,14 +1575,28 @@ fn validate_prompt( | |||||||
|             api_key, |             api_key, | ||||||
|             dimensions, |             dimensions, | ||||||
|             document_template: Setting::Set(template), |             document_template: Setting::Set(template), | ||||||
|  |             document_template_max_bytes, | ||||||
|             url, |             url, | ||||||
|             request, |             request, | ||||||
|             response, |             response, | ||||||
|             distribution, |             distribution, | ||||||
|             headers, |             headers, | ||||||
|         }) => { |         }) => { | ||||||
|  |             let max_bytes = match document_template_max_bytes.set() { | ||||||
|  |                 Some(max_bytes) => NonZeroUsize::new(max_bytes).ok_or_else(|| { | ||||||
|  |                     crate::error::UserError::InvalidSettingsDocumentTemplateMaxBytes { | ||||||
|  |                         embedder_name: name.to_owned(), | ||||||
|  |                     } | ||||||
|  |                 })?, | ||||||
|  |                 None => default_max_bytes(), | ||||||
|  |             }; | ||||||
|  |  | ||||||
|             // validate |             // validate | ||||||
|             let template = crate::prompt::Prompt::new(template) |             let template = crate::prompt::Prompt::new( | ||||||
|  |                 template, | ||||||
|  |                 // always specify a max_bytes | ||||||
|  |                 Some(max_bytes), | ||||||
|  |             ) | ||||||
|             .map(|prompt| crate::prompt::PromptData::from(prompt).template) |             .map(|prompt| crate::prompt::PromptData::from(prompt).template) | ||||||
|             .map_err(|inner| UserError::InvalidPromptForEmbeddings(name.to_owned(), inner))?; |             .map_err(|inner| UserError::InvalidPromptForEmbeddings(name.to_owned(), inner))?; | ||||||
|  |  | ||||||
| @@ -1591,6 +1607,7 @@ fn validate_prompt( | |||||||
|                 api_key, |                 api_key, | ||||||
|                 dimensions, |                 dimensions, | ||||||
|                 document_template: Setting::Set(template), |                 document_template: Setting::Set(template), | ||||||
|  |                 document_template_max_bytes, | ||||||
|                 url, |                 url, | ||||||
|                 request, |                 request, | ||||||
|                 response, |                 response, | ||||||
| @@ -1615,6 +1632,7 @@ pub fn validate_embedding_settings( | |||||||
|         api_key, |         api_key, | ||||||
|         dimensions, |         dimensions, | ||||||
|         document_template, |         document_template, | ||||||
|  |         document_template_max_bytes, | ||||||
|         url, |         url, | ||||||
|         request, |         request, | ||||||
|         response, |         response, | ||||||
| @@ -1654,6 +1672,7 @@ pub fn validate_embedding_settings( | |||||||
|             api_key, |             api_key, | ||||||
|             dimensions, |             dimensions, | ||||||
|             document_template, |             document_template, | ||||||
|  |             document_template_max_bytes, | ||||||
|             url, |             url, | ||||||
|             request, |             request, | ||||||
|             response, |             response, | ||||||
| @@ -1726,6 +1745,12 @@ pub fn validate_embedding_settings( | |||||||
|                 inferred_source, |                 inferred_source, | ||||||
|                 name, |                 name, | ||||||
|             )?; |             )?; | ||||||
|  |             check_unset( | ||||||
|  |                 &document_template_max_bytes, | ||||||
|  |                 EmbeddingSettings::DOCUMENT_TEMPLATE_MAX_BYTES, | ||||||
|  |                 inferred_source, | ||||||
|  |                 name, | ||||||
|  |             )?; | ||||||
|             check_set(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?; |             check_set(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?; | ||||||
|  |  | ||||||
|             check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?; |             check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?; | ||||||
| @@ -1748,6 +1773,7 @@ pub fn validate_embedding_settings( | |||||||
|         api_key, |         api_key, | ||||||
|         dimensions, |         dimensions, | ||||||
|         document_template, |         document_template, | ||||||
|  |         document_template_max_bytes, | ||||||
|         url, |         url, | ||||||
|         request, |         request, | ||||||
|         response, |         response, | ||||||
|   | |||||||
| @@ -1,11 +1,12 @@ | |||||||
| use std::collections::BTreeMap; | use std::collections::BTreeMap; | ||||||
|  | use std::num::NonZeroUsize; | ||||||
|  |  | ||||||
| use deserr::Deserr; | use deserr::Deserr; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
| use serde::{Deserialize, Serialize}; | use serde::{Deserialize, Serialize}; | ||||||
|  |  | ||||||
| use super::{ollama, openai, DistributionShift}; | use super::{ollama, openai, DistributionShift}; | ||||||
| use crate::prompt::PromptData; | use crate::prompt::{default_max_bytes, PromptData}; | ||||||
| use crate::update::Setting; | use crate::update::Setting; | ||||||
| use crate::vector::EmbeddingConfig; | use crate::vector::EmbeddingConfig; | ||||||
| use crate::UserError; | use crate::UserError; | ||||||
| @@ -34,6 +35,9 @@ pub struct EmbeddingSettings { | |||||||
|     pub document_template: Setting<String>, |     pub document_template: Setting<String>, | ||||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] |     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||||
|     #[deserr(default)] |     #[deserr(default)] | ||||||
|  |     pub document_template_max_bytes: Setting<usize>, | ||||||
|  |     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||||
|  |     #[deserr(default)] | ||||||
|     pub url: Setting<String>, |     pub url: Setting<String>, | ||||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] |     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||||
|     #[deserr(default)] |     #[deserr(default)] | ||||||
| @@ -111,6 +115,7 @@ impl SettingsDiff { | |||||||
|                     mut response, |                     mut response, | ||||||
|                     mut distribution, |                     mut distribution, | ||||||
|                     mut headers, |                     mut headers, | ||||||
|  |                     mut document_template_max_bytes, | ||||||
|                 } = old; |                 } = old; | ||||||
|  |  | ||||||
|                 let EmbeddingSettings { |                 let EmbeddingSettings { | ||||||
| @@ -125,6 +130,7 @@ impl SettingsDiff { | |||||||
|                     response: new_response, |                     response: new_response, | ||||||
|                     distribution: new_distribution, |                     distribution: new_distribution, | ||||||
|                     headers: new_headers, |                     headers: new_headers, | ||||||
|  |                     document_template_max_bytes: new_document_template_max_bytes, | ||||||
|                 } = new; |                 } = new; | ||||||
|  |  | ||||||
|                 let mut reindex_action = None; |                 let mut reindex_action = None; | ||||||
| @@ -142,6 +148,7 @@ impl SettingsDiff { | |||||||
|                         &mut request, |                         &mut request, | ||||||
|                         &mut response, |                         &mut response, | ||||||
|                         &mut document_template, |                         &mut document_template, | ||||||
|  |                         &mut document_template_max_bytes, | ||||||
|                         &mut headers, |                         &mut headers, | ||||||
|                     ) |                     ) | ||||||
|                 } |                 } | ||||||
| @@ -190,6 +197,23 @@ impl SettingsDiff { | |||||||
|                     ); |                     ); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|  |                 if document_template_max_bytes.apply(new_document_template_max_bytes) { | ||||||
|  |                     let previous_document_template_max_bytes = | ||||||
|  |                         document_template_max_bytes.set().unwrap_or(default_max_bytes().get()); | ||||||
|  |                     let new_document_template_max_bytes = | ||||||
|  |                         new_document_template_max_bytes.set().unwrap_or(default_max_bytes().get()); | ||||||
|  |  | ||||||
|  |                     // only reindex if the size increased. Reasoning: | ||||||
|  |                     // - size decrease is a performance optimization, so we don't reindex and we keep the more accurate vectors | ||||||
|  |                     // - size increase is an accuracy optimization, so we want to reindex | ||||||
|  |                     if new_document_template_max_bytes > previous_document_template_max_bytes { | ||||||
|  |                         ReindexAction::push_action( | ||||||
|  |                             &mut reindex_action, | ||||||
|  |                             ReindexAction::RegeneratePrompts, | ||||||
|  |                         ) | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |  | ||||||
|                 distribution.apply(new_distribution); |                 distribution.apply(new_distribution); | ||||||
|                 api_key.apply(new_api_key); |                 api_key.apply(new_api_key); | ||||||
|                 headers.apply(new_headers); |                 headers.apply(new_headers); | ||||||
| @@ -206,6 +230,7 @@ impl SettingsDiff { | |||||||
|                     response, |                     response, | ||||||
|                     distribution, |                     distribution, | ||||||
|                     headers, |                     headers, | ||||||
|  |                     document_template_max_bytes, | ||||||
|                 }; |                 }; | ||||||
|  |  | ||||||
|                 match reindex_action { |                 match reindex_action { | ||||||
| @@ -239,6 +264,7 @@ fn apply_default_for_source( | |||||||
|     request: &mut Setting<serde_json::Value>, |     request: &mut Setting<serde_json::Value>, | ||||||
|     response: &mut Setting<serde_json::Value>, |     response: &mut Setting<serde_json::Value>, | ||||||
|     document_template: &mut Setting<String>, |     document_template: &mut Setting<String>, | ||||||
|  |     document_template_max_bytes: &mut Setting<usize>, | ||||||
|     headers: &mut Setting<BTreeMap<String, String>>, |     headers: &mut Setting<BTreeMap<String, String>>, | ||||||
| ) { | ) { | ||||||
|     match source { |     match source { | ||||||
| @@ -286,6 +312,7 @@ fn apply_default_for_source( | |||||||
|             *request = Setting::NotSet; |             *request = Setting::NotSet; | ||||||
|             *response = Setting::NotSet; |             *response = Setting::NotSet; | ||||||
|             *document_template = Setting::NotSet; |             *document_template = Setting::NotSet; | ||||||
|  |             *document_template_max_bytes = Setting::NotSet; | ||||||
|             *headers = Setting::NotSet; |             *headers = Setting::NotSet; | ||||||
|         } |         } | ||||||
|         Setting::NotSet => {} |         Setting::NotSet => {} | ||||||
| @@ -316,6 +343,7 @@ impl EmbeddingSettings { | |||||||
|     pub const API_KEY: &'static str = "apiKey"; |     pub const API_KEY: &'static str = "apiKey"; | ||||||
|     pub const DIMENSIONS: &'static str = "dimensions"; |     pub const DIMENSIONS: &'static str = "dimensions"; | ||||||
|     pub const DOCUMENT_TEMPLATE: &'static str = "documentTemplate"; |     pub const DOCUMENT_TEMPLATE: &'static str = "documentTemplate"; | ||||||
|  |     pub const DOCUMENT_TEMPLATE_MAX_BYTES: &'static str = "documentTemplateMaxBytes"; | ||||||
|  |  | ||||||
|     pub const URL: &'static str = "url"; |     pub const URL: &'static str = "url"; | ||||||
|     pub const REQUEST: &'static str = "request"; |     pub const REQUEST: &'static str = "request"; | ||||||
| @@ -459,6 +487,8 @@ impl std::fmt::Display for EmbedderSource { | |||||||
| impl From<EmbeddingConfig> for EmbeddingSettings { | impl From<EmbeddingConfig> for EmbeddingSettings { | ||||||
|     fn from(value: EmbeddingConfig) -> Self { |     fn from(value: EmbeddingConfig) -> Self { | ||||||
|         let EmbeddingConfig { embedder_options, prompt } = value; |         let EmbeddingConfig { embedder_options, prompt } = value; | ||||||
|  |         let document_template_max_bytes = | ||||||
|  |             Setting::Set(prompt.max_bytes.unwrap_or(default_max_bytes()).get()); | ||||||
|         match embedder_options { |         match embedder_options { | ||||||
|             super::EmbedderOptions::HuggingFace(super::hf::EmbedderOptions { |             super::EmbedderOptions::HuggingFace(super::hf::EmbedderOptions { | ||||||
|                 model, |                 model, | ||||||
| @@ -471,6 +501,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings { | |||||||
|                 api_key: Setting::NotSet, |                 api_key: Setting::NotSet, | ||||||
|                 dimensions: Setting::NotSet, |                 dimensions: Setting::NotSet, | ||||||
|                 document_template: Setting::Set(prompt.template), |                 document_template: Setting::Set(prompt.template), | ||||||
|  |                 document_template_max_bytes, | ||||||
|                 url: Setting::NotSet, |                 url: Setting::NotSet, | ||||||
|                 request: Setting::NotSet, |                 request: Setting::NotSet, | ||||||
|                 response: Setting::NotSet, |                 response: Setting::NotSet, | ||||||
| @@ -490,6 +521,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings { | |||||||
|                 api_key: Setting::some_or_not_set(api_key), |                 api_key: Setting::some_or_not_set(api_key), | ||||||
|                 dimensions: Setting::some_or_not_set(dimensions), |                 dimensions: Setting::some_or_not_set(dimensions), | ||||||
|                 document_template: Setting::Set(prompt.template), |                 document_template: Setting::Set(prompt.template), | ||||||
|  |                 document_template_max_bytes, | ||||||
|                 url: Setting::some_or_not_set(url), |                 url: Setting::some_or_not_set(url), | ||||||
|                 request: Setting::NotSet, |                 request: Setting::NotSet, | ||||||
|                 response: Setting::NotSet, |                 response: Setting::NotSet, | ||||||
| @@ -509,6 +541,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings { | |||||||
|                 api_key: Setting::some_or_not_set(api_key), |                 api_key: Setting::some_or_not_set(api_key), | ||||||
|                 dimensions: Setting::some_or_not_set(dimensions), |                 dimensions: Setting::some_or_not_set(dimensions), | ||||||
|                 document_template: Setting::Set(prompt.template), |                 document_template: Setting::Set(prompt.template), | ||||||
|  |                 document_template_max_bytes, | ||||||
|                 url: Setting::some_or_not_set(url), |                 url: Setting::some_or_not_set(url), | ||||||
|                 request: Setting::NotSet, |                 request: Setting::NotSet, | ||||||
|                 response: Setting::NotSet, |                 response: Setting::NotSet, | ||||||
| @@ -525,6 +558,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings { | |||||||
|                 api_key: Setting::NotSet, |                 api_key: Setting::NotSet, | ||||||
|                 dimensions: Setting::Set(dimensions), |                 dimensions: Setting::Set(dimensions), | ||||||
|                 document_template: Setting::NotSet, |                 document_template: Setting::NotSet, | ||||||
|  |                 document_template_max_bytes: Setting::NotSet, | ||||||
|                 url: Setting::NotSet, |                 url: Setting::NotSet, | ||||||
|                 request: Setting::NotSet, |                 request: Setting::NotSet, | ||||||
|                 response: Setting::NotSet, |                 response: Setting::NotSet, | ||||||
| @@ -546,6 +580,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings { | |||||||
|                 api_key: Setting::some_or_not_set(api_key), |                 api_key: Setting::some_or_not_set(api_key), | ||||||
|                 dimensions: Setting::some_or_not_set(dimensions), |                 dimensions: Setting::some_or_not_set(dimensions), | ||||||
|                 document_template: Setting::Set(prompt.template), |                 document_template: Setting::Set(prompt.template), | ||||||
|  |                 document_template_max_bytes, | ||||||
|                 url: Setting::Set(url), |                 url: Setting::Set(url), | ||||||
|                 request: Setting::Set(request), |                 request: Setting::Set(request), | ||||||
|                 response: Setting::Set(response), |                 response: Setting::Set(response), | ||||||
| @@ -566,6 +601,7 @@ impl From<EmbeddingSettings> for EmbeddingConfig { | |||||||
|             api_key, |             api_key, | ||||||
|             dimensions, |             dimensions, | ||||||
|             document_template, |             document_template, | ||||||
|  |             document_template_max_bytes, | ||||||
|             url, |             url, | ||||||
|             request, |             request, | ||||||
|             response, |             response, | ||||||
| @@ -648,7 +684,12 @@ impl From<EmbeddingSettings> for EmbeddingConfig { | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         if let Setting::Set(template) = document_template { |         if let Setting::Set(template) = document_template { | ||||||
|             this.prompt = PromptData { template } |             let max_bytes = document_template_max_bytes | ||||||
|  |                 .set() | ||||||
|  |                 .and_then(NonZeroUsize::new) | ||||||
|  |                 .unwrap_or(default_max_bytes()); | ||||||
|  |  | ||||||
|  |             this.prompt = PromptData { template, max_bytes: Some(max_bytes) } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         this |         this | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user