mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 09:56:28 +00:00 
			
		
		
		
	Refactor settings validation and introduce SubEmbedderSettings
This commit is contained in:
		@@ -27,8 +27,8 @@ use crate::proximity::ProximityPrecision;
 | 
			
		||||
use crate::update::index_documents::IndexDocumentsMethod;
 | 
			
		||||
use crate::update::{IndexDocuments, UpdateIndexingStep};
 | 
			
		||||
use crate::vector::settings::{
 | 
			
		||||
    check_set, check_unset, EmbedderAction, EmbedderSource, EmbeddingSettings, ReindexAction,
 | 
			
		||||
    WriteBackToDocuments,
 | 
			
		||||
    EmbedderAction, EmbedderSource, EmbeddingSettings, NestingContext, ReindexAction,
 | 
			
		||||
    SubEmbeddingSettings, WriteBackToDocuments,
 | 
			
		||||
};
 | 
			
		||||
use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs};
 | 
			
		||||
use crate::{FieldId, FieldsIdsMap, Index, LocalizedAttributesRule, LocalizedFieldIds, Result};
 | 
			
		||||
@@ -1669,26 +1669,12 @@ fn embedders(embedding_configs: Vec<IndexEmbeddingConfig>) -> Result<EmbeddingCo
 | 
			
		||||
 | 
			
		||||
fn validate_prompt(
 | 
			
		||||
    name: &str,
 | 
			
		||||
    new: Setting<EmbeddingSettings>,
 | 
			
		||||
) -> Result<Setting<EmbeddingSettings>> {
 | 
			
		||||
    match new {
 | 
			
		||||
        Setting::Set(EmbeddingSettings {
 | 
			
		||||
            source,
 | 
			
		||||
            model,
 | 
			
		||||
            revision,
 | 
			
		||||
            pooling,
 | 
			
		||||
            api_key,
 | 
			
		||||
            dimensions,
 | 
			
		||||
            document_template: Setting::Set(template),
 | 
			
		||||
            document_template_max_bytes,
 | 
			
		||||
            url,
 | 
			
		||||
            request,
 | 
			
		||||
            response,
 | 
			
		||||
            distribution,
 | 
			
		||||
            headers,
 | 
			
		||||
            binary_quantized: binary_quantize,
 | 
			
		||||
        }) => {
 | 
			
		||||
            let max_bytes = match document_template_max_bytes.set() {
 | 
			
		||||
    new_prompt: Setting<String>,
 | 
			
		||||
    max_bytes: Setting<usize>,
 | 
			
		||||
) -> Result<Setting<String>> {
 | 
			
		||||
    match new_prompt {
 | 
			
		||||
        Setting::Set(template) => {
 | 
			
		||||
            let max_bytes = match max_bytes.set() {
 | 
			
		||||
                Some(max_bytes) => NonZeroUsize::new(max_bytes).ok_or_else(|| {
 | 
			
		||||
                    crate::error::UserError::InvalidSettingsDocumentTemplateMaxBytes {
 | 
			
		||||
                        embedder_name: name.to_owned(),
 | 
			
		||||
@@ -1706,22 +1692,7 @@ fn validate_prompt(
 | 
			
		||||
            .map(|prompt| crate::prompt::PromptData::from(prompt).template)
 | 
			
		||||
            .map_err(|inner| UserError::InvalidPromptForEmbeddings(name.to_owned(), inner))?;
 | 
			
		||||
 | 
			
		||||
            Ok(Setting::Set(EmbeddingSettings {
 | 
			
		||||
                source,
 | 
			
		||||
                model,
 | 
			
		||||
                revision,
 | 
			
		||||
                pooling,
 | 
			
		||||
                api_key,
 | 
			
		||||
                dimensions,
 | 
			
		||||
                document_template: Setting::Set(template),
 | 
			
		||||
                document_template_max_bytes,
 | 
			
		||||
                url,
 | 
			
		||||
                request,
 | 
			
		||||
                response,
 | 
			
		||||
                distribution,
 | 
			
		||||
                headers,
 | 
			
		||||
                binary_quantized: binary_quantize,
 | 
			
		||||
            }))
 | 
			
		||||
            Ok(Setting::Set(template))
 | 
			
		||||
        }
 | 
			
		||||
        new => Ok(new),
 | 
			
		||||
    }
 | 
			
		||||
@@ -1731,7 +1702,6 @@ pub fn validate_embedding_settings(
 | 
			
		||||
    settings: Setting<EmbeddingSettings>,
 | 
			
		||||
    name: &str,
 | 
			
		||||
) -> Result<Setting<EmbeddingSettings>> {
 | 
			
		||||
    let settings = validate_prompt(name, settings)?;
 | 
			
		||||
    let Setting::Set(settings) = settings else { return Ok(settings) };
 | 
			
		||||
    let EmbeddingSettings {
 | 
			
		||||
        source,
 | 
			
		||||
@@ -1745,11 +1715,15 @@ pub fn validate_embedding_settings(
 | 
			
		||||
        url,
 | 
			
		||||
        request,
 | 
			
		||||
        response,
 | 
			
		||||
        search_embedder,
 | 
			
		||||
        mut indexing_embedder,
 | 
			
		||||
        distribution,
 | 
			
		||||
        headers,
 | 
			
		||||
        binary_quantized: binary_quantize,
 | 
			
		||||
    } = settings;
 | 
			
		||||
 | 
			
		||||
    let document_template = validate_prompt(name, document_template, document_template_max_bytes)?;
 | 
			
		||||
 | 
			
		||||
    if let Some(0) = dimensions.set() {
 | 
			
		||||
        return Err(crate::error::UserError::InvalidSettingsDimensions {
 | 
			
		||||
            embedder_name: name.to_owned(),
 | 
			
		||||
@@ -1775,6 +1749,7 @@ pub fn validate_embedding_settings(
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let Some(inferred_source) = source.set() else {
 | 
			
		||||
        // we are validating the fused settings, so we always have a source
 | 
			
		||||
        return Ok(Setting::Set(EmbeddingSettings {
 | 
			
		||||
            source,
 | 
			
		||||
            model,
 | 
			
		||||
@@ -1787,20 +1762,35 @@ pub fn validate_embedding_settings(
 | 
			
		||||
            url,
 | 
			
		||||
            request,
 | 
			
		||||
            response,
 | 
			
		||||
            search_embedder,
 | 
			
		||||
            indexing_embedder,
 | 
			
		||||
            distribution,
 | 
			
		||||
            headers,
 | 
			
		||||
            binary_quantized: binary_quantize,
 | 
			
		||||
        }));
 | 
			
		||||
    };
 | 
			
		||||
    EmbeddingSettings::check_settings(
 | 
			
		||||
        name,
 | 
			
		||||
        inferred_source,
 | 
			
		||||
        NestingContext::NotNested,
 | 
			
		||||
        &model,
 | 
			
		||||
        &revision,
 | 
			
		||||
        &pooling,
 | 
			
		||||
        &dimensions,
 | 
			
		||||
        &api_key,
 | 
			
		||||
        &url,
 | 
			
		||||
        &request,
 | 
			
		||||
        &response,
 | 
			
		||||
        &document_template,
 | 
			
		||||
        &document_template_max_bytes,
 | 
			
		||||
        &headers,
 | 
			
		||||
        &search_embedder,
 | 
			
		||||
        &indexing_embedder,
 | 
			
		||||
        &binary_quantize,
 | 
			
		||||
        &distribution,
 | 
			
		||||
    )?;
 | 
			
		||||
    match inferred_source {
 | 
			
		||||
        EmbedderSource::OpenAi => {
 | 
			
		||||
            check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?;
 | 
			
		||||
            check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?;
 | 
			
		||||
 | 
			
		||||
            check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?;
 | 
			
		||||
            check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?;
 | 
			
		||||
            check_unset(&headers, EmbeddingSettings::HEADERS, inferred_source, name)?;
 | 
			
		||||
 | 
			
		||||
            if let Setting::Set(model) = &model {
 | 
			
		||||
                let model = crate::vector::openai::EmbeddingModel::from_name(model.as_str())
 | 
			
		||||
                    .ok_or(crate::error::UserError::InvalidOpenAiModel {
 | 
			
		||||
@@ -1831,55 +1821,117 @@ pub fn validate_embedding_settings(
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        EmbedderSource::Ollama => {
 | 
			
		||||
            check_set(&model, EmbeddingSettings::MODEL, inferred_source, name)?;
 | 
			
		||||
            check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?;
 | 
			
		||||
            check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?;
 | 
			
		||||
        EmbedderSource::Ollama
 | 
			
		||||
        | EmbedderSource::HuggingFace
 | 
			
		||||
        | EmbedderSource::UserProvided
 | 
			
		||||
        | EmbedderSource::Rest => {}
 | 
			
		||||
        EmbedderSource::Composite => {
 | 
			
		||||
            if let Setting::Set(embedder) = &search_embedder {
 | 
			
		||||
                if let Some(source) = embedder.source.set() {
 | 
			
		||||
                    let search_embedder = match embedder.search_embedder.clone() {
 | 
			
		||||
                        Setting::Set(search_embedder) => Setting::Set(deserialize_sub_embedder(
 | 
			
		||||
                            search_embedder,
 | 
			
		||||
                            name,
 | 
			
		||||
                            NestingContext::Search,
 | 
			
		||||
                        )?),
 | 
			
		||||
                        Setting::Reset => Setting::Reset,
 | 
			
		||||
                        Setting::NotSet => Setting::NotSet,
 | 
			
		||||
                    };
 | 
			
		||||
                    let indexing_embedder = match embedder.indexing_embedder.clone() {
 | 
			
		||||
                        Setting::Set(indexing_embedder) => Setting::Set(deserialize_sub_embedder(
 | 
			
		||||
                            indexing_embedder,
 | 
			
		||||
                            name,
 | 
			
		||||
                            NestingContext::Search,
 | 
			
		||||
                        )?),
 | 
			
		||||
                        Setting::Reset => Setting::Reset,
 | 
			
		||||
                        Setting::NotSet => Setting::NotSet,
 | 
			
		||||
                    };
 | 
			
		||||
                    EmbeddingSettings::check_nested_source(name, source, NestingContext::Search)?;
 | 
			
		||||
                    EmbeddingSettings::check_settings(
 | 
			
		||||
                        name,
 | 
			
		||||
                        source,
 | 
			
		||||
                        NestingContext::Search,
 | 
			
		||||
                        &embedder.model,
 | 
			
		||||
                        &embedder.revision,
 | 
			
		||||
                        &embedder.pooling,
 | 
			
		||||
                        &embedder.dimensions,
 | 
			
		||||
                        &embedder.api_key,
 | 
			
		||||
                        &embedder.url,
 | 
			
		||||
                        &embedder.request,
 | 
			
		||||
                        &embedder.response,
 | 
			
		||||
                        &embedder.document_template,
 | 
			
		||||
                        &embedder.document_template_max_bytes,
 | 
			
		||||
                        &embedder.headers,
 | 
			
		||||
                        &search_embedder,
 | 
			
		||||
                        &indexing_embedder,
 | 
			
		||||
                        &embedder.binary_quantized,
 | 
			
		||||
                        &embedder.distribution,
 | 
			
		||||
                    )?;
 | 
			
		||||
                } else {
 | 
			
		||||
                    return Err(UserError::MissingSourceForNested {
 | 
			
		||||
                        embedder_name: NestingContext::Search.embedder_name_with_context(name),
 | 
			
		||||
                    }
 | 
			
		||||
                    .into());
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?;
 | 
			
		||||
            check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?;
 | 
			
		||||
            check_unset(&headers, EmbeddingSettings::HEADERS, inferred_source, name)?;
 | 
			
		||||
        }
 | 
			
		||||
        EmbedderSource::HuggingFace => {
 | 
			
		||||
            check_unset(&api_key, EmbeddingSettings::API_KEY, inferred_source, name)?;
 | 
			
		||||
            check_unset(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?;
 | 
			
		||||
            indexing_embedder = if let Setting::Set(mut embedder) = indexing_embedder {
 | 
			
		||||
                embedder.document_template = validate_prompt(
 | 
			
		||||
                    name,
 | 
			
		||||
                    embedder.document_template,
 | 
			
		||||
                    embedder.document_template_max_bytes,
 | 
			
		||||
                )?;
 | 
			
		||||
 | 
			
		||||
            check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?;
 | 
			
		||||
            check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?;
 | 
			
		||||
            check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?;
 | 
			
		||||
            check_unset(&headers, EmbeddingSettings::HEADERS, inferred_source, name)?;
 | 
			
		||||
        }
 | 
			
		||||
        EmbedderSource::UserProvided => {
 | 
			
		||||
            check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?;
 | 
			
		||||
            check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?;
 | 
			
		||||
            check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?;
 | 
			
		||||
            check_unset(&api_key, EmbeddingSettings::API_KEY, inferred_source, name)?;
 | 
			
		||||
            check_unset(
 | 
			
		||||
                &document_template,
 | 
			
		||||
                EmbeddingSettings::DOCUMENT_TEMPLATE,
 | 
			
		||||
                inferred_source,
 | 
			
		||||
                name,
 | 
			
		||||
            )?;
 | 
			
		||||
            check_unset(
 | 
			
		||||
                &document_template_max_bytes,
 | 
			
		||||
                EmbeddingSettings::DOCUMENT_TEMPLATE_MAX_BYTES,
 | 
			
		||||
                inferred_source,
 | 
			
		||||
                name,
 | 
			
		||||
            )?;
 | 
			
		||||
            check_set(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?;
 | 
			
		||||
 | 
			
		||||
            check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?;
 | 
			
		||||
            check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?;
 | 
			
		||||
            check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?;
 | 
			
		||||
            check_unset(&headers, EmbeddingSettings::HEADERS, inferred_source, name)?;
 | 
			
		||||
        }
 | 
			
		||||
        EmbedderSource::Rest => {
 | 
			
		||||
            check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?;
 | 
			
		||||
            check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?;
 | 
			
		||||
            check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?;
 | 
			
		||||
            check_set(&url, EmbeddingSettings::URL, inferred_source, name)?;
 | 
			
		||||
            check_set(&request, EmbeddingSettings::REQUEST, inferred_source, name)?;
 | 
			
		||||
            check_set(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?;
 | 
			
		||||
                if let Some(source) = embedder.source.set() {
 | 
			
		||||
                    let search_embedder = match embedder.search_embedder.clone() {
 | 
			
		||||
                        Setting::Set(search_embedder) => Setting::Set(deserialize_sub_embedder(
 | 
			
		||||
                            search_embedder,
 | 
			
		||||
                            name,
 | 
			
		||||
                            NestingContext::Indexing,
 | 
			
		||||
                        )?),
 | 
			
		||||
                        Setting::Reset => Setting::Reset,
 | 
			
		||||
                        Setting::NotSet => Setting::NotSet,
 | 
			
		||||
                    };
 | 
			
		||||
                    let indexing_embedder = match embedder.indexing_embedder.clone() {
 | 
			
		||||
                        Setting::Set(indexing_embedder) => Setting::Set(deserialize_sub_embedder(
 | 
			
		||||
                            indexing_embedder,
 | 
			
		||||
                            name,
 | 
			
		||||
                            NestingContext::Indexing,
 | 
			
		||||
                        )?),
 | 
			
		||||
                        Setting::Reset => Setting::Reset,
 | 
			
		||||
                        Setting::NotSet => Setting::NotSet,
 | 
			
		||||
                    };
 | 
			
		||||
                    EmbeddingSettings::check_nested_source(name, source, NestingContext::Indexing)?;
 | 
			
		||||
                    EmbeddingSettings::check_settings(
 | 
			
		||||
                        name,
 | 
			
		||||
                        source,
 | 
			
		||||
                        NestingContext::Indexing,
 | 
			
		||||
                        &embedder.model,
 | 
			
		||||
                        &embedder.revision,
 | 
			
		||||
                        &embedder.pooling,
 | 
			
		||||
                        &embedder.dimensions,
 | 
			
		||||
                        &embedder.api_key,
 | 
			
		||||
                        &embedder.url,
 | 
			
		||||
                        &embedder.request,
 | 
			
		||||
                        &embedder.response,
 | 
			
		||||
                        &embedder.document_template,
 | 
			
		||||
                        &embedder.document_template_max_bytes,
 | 
			
		||||
                        &embedder.headers,
 | 
			
		||||
                        &search_embedder,
 | 
			
		||||
                        &indexing_embedder,
 | 
			
		||||
                        &embedder.binary_quantized,
 | 
			
		||||
                        &embedder.distribution,
 | 
			
		||||
                    )?;
 | 
			
		||||
                } else {
 | 
			
		||||
                    return Err(UserError::MissingSourceForNested {
 | 
			
		||||
                        embedder_name: NestingContext::Indexing.embedder_name_with_context(name),
 | 
			
		||||
                    }
 | 
			
		||||
                    .into());
 | 
			
		||||
                }
 | 
			
		||||
                Setting::Set(embedder)
 | 
			
		||||
            } else {
 | 
			
		||||
                indexing_embedder
 | 
			
		||||
            };
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    Ok(Setting::Set(EmbeddingSettings {
 | 
			
		||||
 
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Reference in New Issue
	
	Block a user