mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 01:46:28 +00:00 
			
		
		
		
	Document settings
This commit is contained in:
		@@ -20,58 +20,263 @@ pub struct EmbeddingSettings {
 | 
			
		||||
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
 | 
			
		||||
    #[deserr(default)]
 | 
			
		||||
    #[schema(value_type = Option<EmbedderSource>)]
 | 
			
		||||
    /// The source used to provide the embeddings.
 | 
			
		||||
    ///
 | 
			
		||||
    /// Which embedder parameters are available and mandatory is determined by the value of this setting.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # π Reindexing
 | 
			
		||||
    ///
 | 
			
		||||
    /// - ποΈ Changing the value of this parameter always regenerates embeddings.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Defaults
 | 
			
		||||
    ///
 | 
			
		||||
    /// - Defaults to `openAi`
 | 
			
		||||
    pub source: Setting<EmbedderSource>,
 | 
			
		||||
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
 | 
			
		||||
    #[deserr(default)]
 | 
			
		||||
    #[schema(value_type = Option<String>)]
 | 
			
		||||
    /// The name of the model to use.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Mandatory
 | 
			
		||||
    ///
 | 
			
		||||
    /// - This parameter is mandatory for source `ollama`
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Availability
 | 
			
		||||
    ///
 | 
			
		||||
    /// - This parameter is available for sources `openAi`, `huggingFace`, `ollama`
 | 
			
		||||
    ///
 | 
			
		||||
    /// # π Reindexing
 | 
			
		||||
    ///
 | 
			
		||||
    /// - ποΈ Changing the value of this parameter always regenerates embeddings.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Defaults
 | 
			
		||||
    ///
 | 
			
		||||
    /// - For source `openAi`, defaults to `text-embedding-3-small`
 | 
			
		||||
    /// - For source `huggingFace`, defaults to `BAAI/bge-base-en-v1.5`
 | 
			
		||||
    pub model: Setting<String>,
 | 
			
		||||
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
 | 
			
		||||
    #[deserr(default)]
 | 
			
		||||
    #[schema(value_type = Option<String>)]
 | 
			
		||||
    /// The revision (commit SHA1) of the model to use.
 | 
			
		||||
    ///
 | 
			
		||||
    /// If unspecified, Meilisearch picks the latest revision of the model.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Availability
 | 
			
		||||
    ///
 | 
			
		||||
    /// - This parameter is available for source `huggingFace`
 | 
			
		||||
    ///
 | 
			
		||||
    /// # π Reindexing
 | 
			
		||||
    ///
 | 
			
		||||
    /// - ποΈ Changing the value of this parameter always regenerates embeddings
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Defaults
 | 
			
		||||
    ///
 | 
			
		||||
    /// - When `model` is set to default, defaults to `617ca489d9e86b49b8167676d8220688b99db36e`
 | 
			
		||||
    /// - Otherwise, defaults to `null`
 | 
			
		||||
    pub revision: Setting<String>,
 | 
			
		||||
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
 | 
			
		||||
    #[deserr(default)]
 | 
			
		||||
    #[schema(value_type = Option<OverridePooling>)]
 | 
			
		||||
    /// The pooling method to use.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Availability
 | 
			
		||||
    ///
 | 
			
		||||
    /// - This parameter is available for source `huggingFace`
 | 
			
		||||
    ///
 | 
			
		||||
    /// # π Reindexing
 | 
			
		||||
    ///
 | 
			
		||||
    /// - ποΈ Changing the value of this parameter always regenerates embeddings
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Defaults
 | 
			
		||||
    ///
 | 
			
		||||
    /// - Defaults to `useModel`
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Compatibility Note
 | 
			
		||||
    ///
 | 
			
		||||
    /// - Embedders created before this parameter was available default to `forceMean` to preserve the existing behavior.
 | 
			
		||||
    pub pooling: Setting<OverridePooling>,
 | 
			
		||||
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
 | 
			
		||||
    #[deserr(default)]
 | 
			
		||||
    #[schema(value_type = Option<String>)]
 | 
			
		||||
    /// The API key to pass to the remote embedder while making requests.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Availability
 | 
			
		||||
    ///
 | 
			
		||||
    /// - This parameter is available for source `openAi`, `ollama`, `rest`
 | 
			
		||||
    ///
 | 
			
		||||
    /// # π Reindexing
 | 
			
		||||
    ///
 | 
			
		||||
    /// - π± Changing the value of this parameter never regenerates embeddings
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Defaults
 | 
			
		||||
    ///
 | 
			
		||||
    /// - For source `openAi`, the key is read from `OPENAI_API_KEY`, then `MEILI_OPENAI_API_KEY`.
 | 
			
		||||
    /// - For other sources, no bearer token is sent if this parameter is not set.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Note
 | 
			
		||||
    ///
 | 
			
		||||
    /// - This setting is partially hidden when returned by the settings
 | 
			
		||||
    pub api_key: Setting<String>,
 | 
			
		||||
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
 | 
			
		||||
    #[deserr(default)]
 | 
			
		||||
    #[schema(value_type = Option<String>)]
 | 
			
		||||
    /// The expected dimensions of the embeddings produced by this embedder.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Mandatory
 | 
			
		||||
    ///
 | 
			
		||||
    /// - This parameter is mandatory for source `userProvided`
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Availability
 | 
			
		||||
    ///
 | 
			
		||||
    /// - This parameter is available for source `openAi`, `ollama`, `rest`, `userProvided`
 | 
			
		||||
    ///
 | 
			
		||||
    /// # π Reindexing
 | 
			
		||||
    ///
 | 
			
		||||
    /// - ποΈ When the source is `openAi`, changing the value of this parameter always regenerates embeddings
 | 
			
		||||
    /// - π± For other sources, changing the value of this parameter never regenerates embeddings
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Defaults
 | 
			
		||||
    ///
 | 
			
		||||
    /// - For source `openAi`, the dimensions is the maximum allowed by the model.
 | 
			
		||||
    /// - For sources `ollama` and `rest`, the dimensions are inferred by embedding a sample text.
 | 
			
		||||
    pub dimensions: Setting<usize>,
 | 
			
		||||
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
 | 
			
		||||
    #[deserr(default)]
 | 
			
		||||
    #[schema(value_type = Option<bool>)]
 | 
			
		||||
    /// Whether to binary quantize the embeddings of this embedder.
 | 
			
		||||
    ///
 | 
			
		||||
    /// Binary quantized embeddings are smaller than regular embeddings, which improves
 | 
			
		||||
    /// disk usage and retrieval speed, at the cost of relevancy.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Availability
 | 
			
		||||
    ///
 | 
			
		||||
    /// - This parameter is available for all embedders
 | 
			
		||||
    ///
 | 
			
		||||
    /// # π Reindexing
 | 
			
		||||
    ///
 | 
			
		||||
    /// - ποΈ When set to `true`, embeddings are not regenerated, but they are binary quantized, which takes time.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Defaults
 | 
			
		||||
    ///
 | 
			
		||||
    /// - Defaults to `false`
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Note
 | 
			
		||||
    ///
 | 
			
		||||
    /// As binary quantization is a destructive operation, it is not possible to disable again this setting after
 | 
			
		||||
    /// first enabling it. If you are unsure of whether the performance-relevancy tradeoff is right for you,
 | 
			
		||||
    /// we recommend to use this parameter on a test index first.
 | 
			
		||||
    pub binary_quantized: Setting<bool>,
 | 
			
		||||
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
 | 
			
		||||
    #[deserr(default)]
 | 
			
		||||
    #[schema(value_type = Option<bool>)]
 | 
			
		||||
    /// A liquid template used to render documents to a text that can be embedded.
 | 
			
		||||
    ///
 | 
			
		||||
    /// Meillisearch interpolates the template for each document and sends the resulting text to the embedder.
 | 
			
		||||
    /// The embedder then generates document vectors based on this text.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Availability
 | 
			
		||||
    ///
 | 
			
		||||
    /// - This parameter is available for source `openAi`, `huggingFace`, `ollama` and `rest
 | 
			
		||||
    ///
 | 
			
		||||
    /// # π Reindexing
 | 
			
		||||
    ///
 | 
			
		||||
    /// - ποΈ When modified, embeddings are regenerated for documents whose rendering through the template produces a different text.
 | 
			
		||||
    pub document_template: Setting<String>,
 | 
			
		||||
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
 | 
			
		||||
    #[deserr(default)]
 | 
			
		||||
    #[schema(value_type = Option<usize>)]
 | 
			
		||||
    /// Rendered texts are truncated to this size.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Availability
 | 
			
		||||
    ///
 | 
			
		||||
    /// - This parameter is available for source `openAi`, `huggingFace`, `ollama` and `rest`
 | 
			
		||||
    ///
 | 
			
		||||
    /// # π Reindexing
 | 
			
		||||
    ///
 | 
			
		||||
    /// - ποΈ When increased, embeddings are regenerated for documents whose rendering through the template produces a different text.
 | 
			
		||||
    /// - π± When decreased, embeddings are never regenerated
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Default
 | 
			
		||||
    ///
 | 
			
		||||
    /// - Defaults to 400
 | 
			
		||||
    pub document_template_max_bytes: Setting<usize>,
 | 
			
		||||
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
 | 
			
		||||
    #[deserr(default)]
 | 
			
		||||
    #[schema(value_type = Option<String>)]
 | 
			
		||||
    /// URL to reach the remote embedder.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Mandatory
 | 
			
		||||
    ///
 | 
			
		||||
    /// - This parameter is mandatory for source `rest`
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Availability
 | 
			
		||||
    ///
 | 
			
		||||
    /// - This parameter is available for source `openAi`, `ollama` and `rest`
 | 
			
		||||
    ///
 | 
			
		||||
    /// # π Reindexing
 | 
			
		||||
    ///
 | 
			
		||||
    /// - π± When modified for source `openAi`, embeddings are never regenerated
 | 
			
		||||
    /// - ποΈ When modified for sources `ollama` and `rest`, embeddings are always regenerated
 | 
			
		||||
    pub url: Setting<String>,
 | 
			
		||||
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
 | 
			
		||||
    #[deserr(default)]
 | 
			
		||||
    #[schema(value_type = Option<serde_json::Value>)]
 | 
			
		||||
    /// Template request to send to the remote embedder.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Mandatory
 | 
			
		||||
    ///
 | 
			
		||||
    /// - This parameter is mandatory for source `rest`
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Availability
 | 
			
		||||
    ///
 | 
			
		||||
    /// - This parameter is available for source `rest`
 | 
			
		||||
    ///
 | 
			
		||||
    /// # π Reindexing
 | 
			
		||||
    ///
 | 
			
		||||
    /// - ποΈ Changing the value of this parameter always regenerates embeddings
 | 
			
		||||
    pub request: Setting<serde_json::Value>,
 | 
			
		||||
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
 | 
			
		||||
    #[deserr(default)]
 | 
			
		||||
    #[schema(value_type = Option<serde_json::Value>)]
 | 
			
		||||
    /// Template response indicating how to find the embeddings in the response from the remote embedder.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Mandatory
 | 
			
		||||
    ///
 | 
			
		||||
    /// - This parameter is mandatory for source `rest`
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Availability
 | 
			
		||||
    ///
 | 
			
		||||
    /// - This parameter is available for source `rest`
 | 
			
		||||
    ///
 | 
			
		||||
    /// # π Reindexing
 | 
			
		||||
    ///
 | 
			
		||||
    /// - ποΈ Changing the value of this parameter always regenerates embeddings
 | 
			
		||||
    pub response: Setting<serde_json::Value>,
 | 
			
		||||
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
 | 
			
		||||
    #[deserr(default)]
 | 
			
		||||
    #[schema(value_type = Option<BTreeMap<String, String>>)]
 | 
			
		||||
    /// Additional headers to send to the remote embedder.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Availability
 | 
			
		||||
    ///
 | 
			
		||||
    /// - This parameter is available for source `rest`
 | 
			
		||||
    ///
 | 
			
		||||
    /// # π Reindexing
 | 
			
		||||
    ///
 | 
			
		||||
    /// - π± Changing the value of this parameter never regenerates embeddings
 | 
			
		||||
    pub headers: Setting<BTreeMap<String, String>>,
 | 
			
		||||
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
 | 
			
		||||
    #[deserr(default)]
 | 
			
		||||
    #[schema(value_type = Option<DistributionShift>)]
 | 
			
		||||
    /// Affine transformation applied to the semantic score to make it more comparable to the ranking score.
 | 
			
		||||
    ///
 | 
			
		||||
    /// # Availability
 | 
			
		||||
    ///
 | 
			
		||||
    /// - This parameter is available for all embedders
 | 
			
		||||
    ///
 | 
			
		||||
    /// # π Reindexing
 | 
			
		||||
    ///
 | 
			
		||||
    /// - π± Changing the value of this parameter never regenerates embeddings
 | 
			
		||||
    pub distribution: Setting<DistributionShift>,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user