mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 07:56:28 +00:00 
			
		
		
		
	Document settings
This commit is contained in:
		| @@ -20,58 +20,263 @@ pub struct EmbeddingSettings { | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
|     #[deserr(default)] | ||||
|     #[schema(value_type = Option<EmbedderSource>)] | ||||
|     /// The source used to provide the embeddings. | ||||
|     /// | ||||
|     /// Which embedder parameters are available and mandatory is determined by the value of this setting. | ||||
|     /// | ||||
|     /// # π Reindexing | ||||
|     /// | ||||
|     /// - ποΈ Changing the value of this parameter always regenerates embeddings. | ||||
|     /// | ||||
|     /// # Defaults | ||||
|     /// | ||||
|     /// - Defaults to `openAi` | ||||
|     pub source: Setting<EmbedderSource>, | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
|     #[deserr(default)] | ||||
|     #[schema(value_type = Option<String>)] | ||||
|     /// The name of the model to use. | ||||
|     /// | ||||
|     /// # Mandatory | ||||
|     /// | ||||
|     /// - This parameter is mandatory for source `ollama` | ||||
|     /// | ||||
|     /// # Availability | ||||
|     /// | ||||
|     /// - This parameter is available for sources `openAi`, `huggingFace`, `ollama` | ||||
|     /// | ||||
|     /// # π Reindexing | ||||
|     /// | ||||
|     /// - ποΈ Changing the value of this parameter always regenerates embeddings. | ||||
|     /// | ||||
|     /// # Defaults | ||||
|     /// | ||||
|     /// - For source `openAi`, defaults to `text-embedding-3-small` | ||||
|     /// - For source `huggingFace`, defaults to `BAAI/bge-base-en-v1.5` | ||||
|     pub model: Setting<String>, | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
|     #[deserr(default)] | ||||
|     #[schema(value_type = Option<String>)] | ||||
|     /// The revision (commit SHA1) of the model to use. | ||||
|     /// | ||||
|     /// If unspecified, Meilisearch picks the latest revision of the model. | ||||
|     /// | ||||
|     /// # Availability | ||||
|     /// | ||||
|     /// - This parameter is available for source `huggingFace` | ||||
|     /// | ||||
|     /// # π Reindexing | ||||
|     /// | ||||
|     /// - ποΈ Changing the value of this parameter always regenerates embeddings | ||||
|     /// | ||||
|     /// # Defaults | ||||
|     /// | ||||
|     /// - When `model` is set to default, defaults to `617ca489d9e86b49b8167676d8220688b99db36e` | ||||
|     /// - Otherwise, defaults to `null` | ||||
|     pub revision: Setting<String>, | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
|     #[deserr(default)] | ||||
|     #[schema(value_type = Option<OverridePooling>)] | ||||
|     /// The pooling method to use. | ||||
|     /// | ||||
|     /// # Availability | ||||
|     /// | ||||
|     /// - This parameter is available for source `huggingFace` | ||||
|     /// | ||||
|     /// # π Reindexing | ||||
|     /// | ||||
|     /// - ποΈ Changing the value of this parameter always regenerates embeddings | ||||
|     /// | ||||
|     /// # Defaults | ||||
|     /// | ||||
|     /// - Defaults to `useModel` | ||||
|     /// | ||||
|     /// # Compatibility Note | ||||
|     /// | ||||
|     /// - Embedders created before this parameter was available default to `forceMean` to preserve the existing behavior. | ||||
|     pub pooling: Setting<OverridePooling>, | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
|     #[deserr(default)] | ||||
|     #[schema(value_type = Option<String>)] | ||||
|     /// The API key to pass to the remote embedder while making requests. | ||||
|     /// | ||||
|     /// # Availability | ||||
|     /// | ||||
|     /// - This parameter is available for source `openAi`, `ollama`, `rest` | ||||
|     /// | ||||
|     /// # π Reindexing | ||||
|     /// | ||||
|     /// - π± Changing the value of this parameter never regenerates embeddings | ||||
|     /// | ||||
|     /// # Defaults | ||||
|     /// | ||||
|     /// - For source `openAi`, the key is read from `OPENAI_API_KEY`, then `MEILI_OPENAI_API_KEY`. | ||||
|     /// - For other sources, no bearer token is sent if this parameter is not set. | ||||
|     /// | ||||
|     /// # Note | ||||
|     /// | ||||
|     /// - This setting is partially hidden when returned by the settings | ||||
|     pub api_key: Setting<String>, | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
|     #[deserr(default)] | ||||
|     #[schema(value_type = Option<String>)] | ||||
|     /// The expected dimensions of the embeddings produced by this embedder. | ||||
|     /// | ||||
|     /// # Mandatory | ||||
|     /// | ||||
|     /// - This parameter is mandatory for source `userProvided` | ||||
|     /// | ||||
|     /// # Availability | ||||
|     /// | ||||
|     /// - This parameter is available for source `openAi`, `ollama`, `rest`, `userProvided` | ||||
|     /// | ||||
|     /// # π Reindexing | ||||
|     /// | ||||
|     /// - ποΈ When the source is `openAi`, changing the value of this parameter always regenerates embeddings | ||||
|     /// - π± For other sources, changing the value of this parameter never regenerates embeddings | ||||
|     /// | ||||
|     /// # Defaults | ||||
|     /// | ||||
|     /// - For source `openAi`, the dimensions is the maximum allowed by the model. | ||||
|     /// - For sources `ollama` and `rest`, the dimensions are inferred by embedding a sample text. | ||||
|     pub dimensions: Setting<usize>, | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
|     #[deserr(default)] | ||||
|     #[schema(value_type = Option<bool>)] | ||||
|     /// Whether to binary quantize the embeddings of this embedder. | ||||
|     /// | ||||
|     /// Binary quantized embeddings are smaller than regular embeddings, which improves | ||||
|     /// disk usage and retrieval speed, at the cost of relevancy. | ||||
|     /// | ||||
|     /// # Availability | ||||
|     /// | ||||
|     /// - This parameter is available for all embedders | ||||
|     /// | ||||
|     /// # π Reindexing | ||||
|     /// | ||||
|     /// - ποΈ When set to `true`, embeddings are not regenerated, but they are binary quantized, which takes time. | ||||
|     /// | ||||
|     /// # Defaults | ||||
|     /// | ||||
|     /// - Defaults to `false` | ||||
|     /// | ||||
|     /// # Note | ||||
|     /// | ||||
|     /// As binary quantization is a destructive operation, it is not possible to disable again this setting after | ||||
|     /// first enabling it. If you are unsure of whether the performance-relevancy tradeoff is right for you, | ||||
|     /// we recommend to use this parameter on a test index first. | ||||
|     pub binary_quantized: Setting<bool>, | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
|     #[deserr(default)] | ||||
|     #[schema(value_type = Option<bool>)] | ||||
|     /// A liquid template used to render documents to a text that can be embedded. | ||||
|     /// | ||||
|     /// Meillisearch interpolates the template for each document and sends the resulting text to the embedder. | ||||
|     /// The embedder then generates document vectors based on this text. | ||||
|     /// | ||||
|     /// # Availability | ||||
|     /// | ||||
|     /// - This parameter is available for source `openAi`, `huggingFace`, `ollama` and `rest | ||||
|     /// | ||||
|     /// # π Reindexing | ||||
|     /// | ||||
|     /// - ποΈ When modified, embeddings are regenerated for documents whose rendering through the template produces a different text. | ||||
|     pub document_template: Setting<String>, | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
|     #[deserr(default)] | ||||
|     #[schema(value_type = Option<usize>)] | ||||
|     /// Rendered texts are truncated to this size. | ||||
|     /// | ||||
|     /// # Availability | ||||
|     /// | ||||
|     /// - This parameter is available for source `openAi`, `huggingFace`, `ollama` and `rest` | ||||
|     /// | ||||
|     /// # π Reindexing | ||||
|     /// | ||||
|     /// - ποΈ When increased, embeddings are regenerated for documents whose rendering through the template produces a different text. | ||||
|     /// - π± When decreased, embeddings are never regenerated | ||||
|     /// | ||||
|     /// # Default | ||||
|     /// | ||||
|     /// - Defaults to 400 | ||||
|     pub document_template_max_bytes: Setting<usize>, | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
|     #[deserr(default)] | ||||
|     #[schema(value_type = Option<String>)] | ||||
|     /// URL to reach the remote embedder. | ||||
|     /// | ||||
|     /// # Mandatory | ||||
|     /// | ||||
|     /// - This parameter is mandatory for source `rest` | ||||
|     /// | ||||
|     /// # Availability | ||||
|     /// | ||||
|     /// - This parameter is available for source `openAi`, `ollama` and `rest` | ||||
|     /// | ||||
|     /// # π Reindexing | ||||
|     /// | ||||
|     /// - π± When modified for source `openAi`, embeddings are never regenerated | ||||
|     /// - ποΈ When modified for sources `ollama` and `rest`, embeddings are always regenerated | ||||
|     pub url: Setting<String>, | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
|     #[deserr(default)] | ||||
|     #[schema(value_type = Option<serde_json::Value>)] | ||||
|     /// Template request to send to the remote embedder. | ||||
|     /// | ||||
|     /// # Mandatory | ||||
|     /// | ||||
|     /// - This parameter is mandatory for source `rest` | ||||
|     /// | ||||
|     /// # Availability | ||||
|     /// | ||||
|     /// - This parameter is available for source `rest` | ||||
|     /// | ||||
|     /// # π Reindexing | ||||
|     /// | ||||
|     /// - ποΈ Changing the value of this parameter always regenerates embeddings | ||||
|     pub request: Setting<serde_json::Value>, | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
|     #[deserr(default)] | ||||
|     #[schema(value_type = Option<serde_json::Value>)] | ||||
|     /// Template response indicating how to find the embeddings in the response from the remote embedder. | ||||
|     /// | ||||
|     /// # Mandatory | ||||
|     /// | ||||
|     /// - This parameter is mandatory for source `rest` | ||||
|     /// | ||||
|     /// # Availability | ||||
|     /// | ||||
|     /// - This parameter is available for source `rest` | ||||
|     /// | ||||
|     /// # π Reindexing | ||||
|     /// | ||||
|     /// - ποΈ Changing the value of this parameter always regenerates embeddings | ||||
|     pub response: Setting<serde_json::Value>, | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
|     #[deserr(default)] | ||||
|     #[schema(value_type = Option<BTreeMap<String, String>>)] | ||||
|     /// Additional headers to send to the remote embedder. | ||||
|     /// | ||||
|     /// # Availability | ||||
|     /// | ||||
|     /// - This parameter is available for source `rest` | ||||
|     /// | ||||
|     /// # π Reindexing | ||||
|     /// | ||||
|     /// - π± Changing the value of this parameter never regenerates embeddings | ||||
|     pub headers: Setting<BTreeMap<String, String>>, | ||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||
|     #[deserr(default)] | ||||
|     #[schema(value_type = Option<DistributionShift>)] | ||||
|     /// Affine transformation applied to the semantic score to make it more comparable to the ranking score. | ||||
|     /// | ||||
|     /// # Availability | ||||
|     /// | ||||
|     /// - This parameter is available for all embedders | ||||
|     /// | ||||
|     /// # π Reindexing | ||||
|     /// | ||||
|     /// - π± Changing the value of this parameter never regenerates embeddings | ||||
|     pub distribution: Setting<DistributionShift>, | ||||
| } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user