mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 13:36:27 +00:00 
			
		
		
		
	Add EmbedderAction to settings
This commit is contained in:
		| @@ -1,4 +1,5 @@ | |||||||
| use deserr::Deserr; | use deserr::Deserr; | ||||||
|  | use roaring::RoaringBitmap; | ||||||
| use serde::{Deserialize, Serialize}; | use serde::{Deserialize, Serialize}; | ||||||
|  |  | ||||||
| use super::rest::InputType; | use super::rest::InputType; | ||||||
| @@ -72,6 +73,245 @@ pub fn check_unset<T>( | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /// Indicates what action should take place during a reindexing operation for an embedder | ||||||
|  | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] | ||||||
|  | pub enum ReindexAction { | ||||||
|  |     /// An indexing operation should take place for this embedder, keeping existing vectors | ||||||
|  |     /// and checking whether the document template changed or not | ||||||
|  |     RegeneratePrompts, | ||||||
|  |     /// An indexing operation should take place for all documents for this embedder, removing existing vectors | ||||||
|  |     /// (except userProvided ones) | ||||||
|  |     FullReindex, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub enum SettingsDiff { | ||||||
|  |     Remove, | ||||||
|  |     Reindex { action: ReindexAction, updated_settings: EmbeddingSettings }, | ||||||
|  |     UpdateWithoutReindex { updated_settings: EmbeddingSettings }, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub enum EmbedderAction { | ||||||
|  |     WriteBackToDocuments(WriteBackToDocuments), | ||||||
|  |     Reindex(ReindexAction), | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub struct WriteBackToDocuments { | ||||||
|  |     pub embedder_id: u8, | ||||||
|  |     pub user_provided: RoaringBitmap, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl SettingsDiff { | ||||||
|  |     pub fn should_reindex(&self) -> bool { | ||||||
|  |         match self { | ||||||
|  |             SettingsDiff::Remove { .. } | SettingsDiff::Reindex { .. } => true, | ||||||
|  |             SettingsDiff::UpdateWithoutReindex { .. } => false, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn from_settings(old: EmbeddingSettings, new: Setting<EmbeddingSettings>) -> Self { | ||||||
|  |         match new { | ||||||
|  |             Setting::Set(new) => { | ||||||
|  |                 let EmbeddingSettings { | ||||||
|  |                     mut source, | ||||||
|  |                     mut model, | ||||||
|  |                     mut revision, | ||||||
|  |                     mut api_key, | ||||||
|  |                     mut dimensions, | ||||||
|  |                     mut document_template, | ||||||
|  |                     mut url, | ||||||
|  |                     mut query, | ||||||
|  |                     mut input_field, | ||||||
|  |                     mut path_to_embeddings, | ||||||
|  |                     mut embedding_object, | ||||||
|  |                     mut input_type, | ||||||
|  |                     mut distribution, | ||||||
|  |                 } = old; | ||||||
|  |  | ||||||
|  |                 let EmbeddingSettings { | ||||||
|  |                     source: new_source, | ||||||
|  |                     model: new_model, | ||||||
|  |                     revision: new_revision, | ||||||
|  |                     api_key: new_api_key, | ||||||
|  |                     dimensions: new_dimensions, | ||||||
|  |                     document_template: new_document_template, | ||||||
|  |                     url: new_url, | ||||||
|  |                     query: new_query, | ||||||
|  |                     input_field: new_input_field, | ||||||
|  |                     path_to_embeddings: new_path_to_embeddings, | ||||||
|  |                     embedding_object: new_embedding_object, | ||||||
|  |                     input_type: new_input_type, | ||||||
|  |                     distribution: new_distribution, | ||||||
|  |                 } = new; | ||||||
|  |  | ||||||
|  |                 let mut reindex_action = None; | ||||||
|  |  | ||||||
|  |                 // **Warning**: do not use short-circuiting || here, we want all these operations applied | ||||||
|  |                 if source.apply(new_source) { | ||||||
|  |                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||||
|  |                     // when the source changes, we need to reapply the default settings for the new source | ||||||
|  |                     apply_default_for_source( | ||||||
|  |                         &source, | ||||||
|  |                         &mut model, | ||||||
|  |                         &mut revision, | ||||||
|  |                         &mut dimensions, | ||||||
|  |                         &mut url, | ||||||
|  |                         &mut query, | ||||||
|  |                         &mut input_field, | ||||||
|  |                         &mut path_to_embeddings, | ||||||
|  |                         &mut embedding_object, | ||||||
|  |                         &mut input_type, | ||||||
|  |                         &mut document_template, | ||||||
|  |                     ) | ||||||
|  |                 } | ||||||
|  |                 if model.apply(new_model) { | ||||||
|  |                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||||
|  |                 } | ||||||
|  |                 if revision.apply(new_revision) { | ||||||
|  |                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||||
|  |                 } | ||||||
|  |                 if dimensions.apply(new_dimensions) { | ||||||
|  |                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||||
|  |                 } | ||||||
|  |                 if url.apply(new_url) { | ||||||
|  |                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||||
|  |                 } | ||||||
|  |                 if query.apply(new_query) { | ||||||
|  |                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||||
|  |                 } | ||||||
|  |                 if input_field.apply(new_input_field) { | ||||||
|  |                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||||
|  |                 } | ||||||
|  |                 if path_to_embeddings.apply(new_path_to_embeddings) { | ||||||
|  |                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||||
|  |                 } | ||||||
|  |                 if embedding_object.apply(new_embedding_object) { | ||||||
|  |                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||||
|  |                 } | ||||||
|  |                 if input_type.apply(new_input_type) { | ||||||
|  |                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||||
|  |                 } | ||||||
|  |                 if document_template.apply(new_document_template) { | ||||||
|  |                     ReindexAction::push_action( | ||||||
|  |                         &mut reindex_action, | ||||||
|  |                         ReindexAction::RegeneratePrompts, | ||||||
|  |                     ); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 distribution.apply(new_distribution); | ||||||
|  |                 api_key.apply(new_api_key); | ||||||
|  |  | ||||||
|  |                 let updated_settings = EmbeddingSettings { | ||||||
|  |                     source, | ||||||
|  |                     model, | ||||||
|  |                     revision, | ||||||
|  |                     api_key, | ||||||
|  |                     dimensions, | ||||||
|  |                     document_template, | ||||||
|  |                     url, | ||||||
|  |                     query, | ||||||
|  |                     input_field, | ||||||
|  |                     path_to_embeddings, | ||||||
|  |                     embedding_object, | ||||||
|  |                     input_type, | ||||||
|  |                     distribution, | ||||||
|  |                 }; | ||||||
|  |  | ||||||
|  |                 match reindex_action { | ||||||
|  |                     Some(action) => Self::Reindex { action, updated_settings }, | ||||||
|  |                     None => Self::UpdateWithoutReindex { updated_settings }, | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             Setting::Reset => Self::Remove, | ||||||
|  |             Setting::NotSet => Self::UpdateWithoutReindex { updated_settings: old }, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl ReindexAction { | ||||||
|  |     fn push_action(this: &mut Option<Self>, other: Self) { | ||||||
|  |         *this = match (*this, other) { | ||||||
|  |             (_, ReindexAction::FullReindex) => Some(ReindexAction::FullReindex), | ||||||
|  |             (Some(ReindexAction::FullReindex), _) => Some(ReindexAction::FullReindex), | ||||||
|  |             (_, ReindexAction::RegeneratePrompts) => Some(ReindexAction::RegeneratePrompts), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[allow(clippy::too_many_arguments)] // private function | ||||||
|  | fn apply_default_for_source( | ||||||
|  |     source: &Setting<EmbedderSource>, | ||||||
|  |     model: &mut Setting<String>, | ||||||
|  |     revision: &mut Setting<String>, | ||||||
|  |     dimensions: &mut Setting<usize>, | ||||||
|  |     url: &mut Setting<String>, | ||||||
|  |     query: &mut Setting<serde_json::Value>, | ||||||
|  |     input_field: &mut Setting<Vec<String>>, | ||||||
|  |     path_to_embeddings: &mut Setting<Vec<String>>, | ||||||
|  |     embedding_object: &mut Setting<Vec<String>>, | ||||||
|  |     input_type: &mut Setting<InputType>, | ||||||
|  |     document_template: &mut Setting<String>, | ||||||
|  | ) { | ||||||
|  |     match source { | ||||||
|  |         Setting::Set(EmbedderSource::HuggingFace) => { | ||||||
|  |             *model = Setting::Reset; | ||||||
|  |             *revision = Setting::Reset; | ||||||
|  |             *dimensions = Setting::NotSet; | ||||||
|  |             *url = Setting::NotSet; | ||||||
|  |             *query = Setting::NotSet; | ||||||
|  |             *input_field = Setting::NotSet; | ||||||
|  |             *path_to_embeddings = Setting::NotSet; | ||||||
|  |             *embedding_object = Setting::NotSet; | ||||||
|  |             *input_type = Setting::NotSet; | ||||||
|  |         } | ||||||
|  |         Setting::Set(EmbedderSource::Ollama) => { | ||||||
|  |             *model = Setting::Reset; | ||||||
|  |             *revision = Setting::NotSet; | ||||||
|  |             *dimensions = Setting::Reset; | ||||||
|  |             *url = Setting::NotSet; | ||||||
|  |             *query = Setting::NotSet; | ||||||
|  |             *input_field = Setting::NotSet; | ||||||
|  |             *path_to_embeddings = Setting::NotSet; | ||||||
|  |             *embedding_object = Setting::NotSet; | ||||||
|  |             *input_type = Setting::NotSet; | ||||||
|  |         } | ||||||
|  |         Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => { | ||||||
|  |             *model = Setting::Reset; | ||||||
|  |             *revision = Setting::NotSet; | ||||||
|  |             *dimensions = Setting::NotSet; | ||||||
|  |             *url = Setting::NotSet; | ||||||
|  |             *query = Setting::NotSet; | ||||||
|  |             *input_field = Setting::NotSet; | ||||||
|  |             *path_to_embeddings = Setting::NotSet; | ||||||
|  |             *embedding_object = Setting::NotSet; | ||||||
|  |             *input_type = Setting::NotSet; | ||||||
|  |         } | ||||||
|  |         Setting::Set(EmbedderSource::Rest) => { | ||||||
|  |             *model = Setting::NotSet; | ||||||
|  |             *revision = Setting::NotSet; | ||||||
|  |             *dimensions = Setting::Reset; | ||||||
|  |             *url = Setting::Reset; | ||||||
|  |             *query = Setting::Reset; | ||||||
|  |             *input_field = Setting::Reset; | ||||||
|  |             *path_to_embeddings = Setting::Reset; | ||||||
|  |             *embedding_object = Setting::Reset; | ||||||
|  |             *input_type = Setting::Reset; | ||||||
|  |         } | ||||||
|  |         Setting::Set(EmbedderSource::UserProvided) => { | ||||||
|  |             *model = Setting::NotSet; | ||||||
|  |             *revision = Setting::NotSet; | ||||||
|  |             *dimensions = Setting::Reset; | ||||||
|  |             *url = Setting::NotSet; | ||||||
|  |             *query = Setting::NotSet; | ||||||
|  |             *input_field = Setting::NotSet; | ||||||
|  |             *path_to_embeddings = Setting::NotSet; | ||||||
|  |             *embedding_object = Setting::NotSet; | ||||||
|  |             *input_type = Setting::NotSet; | ||||||
|  |             *document_template = Setting::NotSet; | ||||||
|  |         } | ||||||
|  |         Setting::NotSet => {} | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| pub fn check_set<T>( | pub fn check_set<T>( | ||||||
|     key: &Setting<T>, |     key: &Setting<T>, | ||||||
|     field: &'static str, |     field: &'static str, | ||||||
| @@ -210,66 +450,6 @@ impl EmbeddingSettings { | |||||||
|             *model = Setting::Set(openai::EmbeddingModel::default().name().to_owned()) |             *model = Setting::Set(openai::EmbeddingModel::default().name().to_owned()) | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub(crate) fn apply_and_need_reindex( |  | ||||||
|         old: &mut Setting<EmbeddingSettings>, |  | ||||||
|         new: Setting<EmbeddingSettings>, |  | ||||||
|     ) -> bool { |  | ||||||
|         match (old, new) { |  | ||||||
|             ( |  | ||||||
|                 Setting::Set(EmbeddingSettings { |  | ||||||
|                     source: old_source, |  | ||||||
|                     model: old_model, |  | ||||||
|                     revision: old_revision, |  | ||||||
|                     api_key: old_api_key, |  | ||||||
|                     dimensions: old_dimensions, |  | ||||||
|                     document_template: old_document_template, |  | ||||||
|                     url: old_url, |  | ||||||
|                     query: old_query, |  | ||||||
|                     input_field: old_input_field, |  | ||||||
|                     path_to_embeddings: old_path_to_embeddings, |  | ||||||
|                     embedding_object: old_embedding_object, |  | ||||||
|                     input_type: old_input_type, |  | ||||||
|                     distribution: old_distribution, |  | ||||||
|                 }), |  | ||||||
|                 Setting::Set(EmbeddingSettings { |  | ||||||
|                     source: new_source, |  | ||||||
|                     model: new_model, |  | ||||||
|                     revision: new_revision, |  | ||||||
|                     api_key: new_api_key, |  | ||||||
|                     dimensions: new_dimensions, |  | ||||||
|                     document_template: new_document_template, |  | ||||||
|                     url: new_url, |  | ||||||
|                     query: new_query, |  | ||||||
|                     input_field: new_input_field, |  | ||||||
|                     path_to_embeddings: new_path_to_embeddings, |  | ||||||
|                     embedding_object: new_embedding_object, |  | ||||||
|                     input_type: new_input_type, |  | ||||||
|                     distribution: new_distribution, |  | ||||||
|                 }), |  | ||||||
|             ) => { |  | ||||||
|                 let mut needs_reindex = false; |  | ||||||
|  |  | ||||||
|                 needs_reindex |= old_source.apply(new_source); |  | ||||||
|                 needs_reindex |= old_model.apply(new_model); |  | ||||||
|                 needs_reindex |= old_revision.apply(new_revision); |  | ||||||
|                 needs_reindex |= old_dimensions.apply(new_dimensions); |  | ||||||
|                 needs_reindex |= old_document_template.apply(new_document_template); |  | ||||||
|                 needs_reindex |= old_url.apply(new_url); |  | ||||||
|                 needs_reindex |= old_query.apply(new_query); |  | ||||||
|                 needs_reindex |= old_input_field.apply(new_input_field); |  | ||||||
|                 needs_reindex |= old_path_to_embeddings.apply(new_path_to_embeddings); |  | ||||||
|                 needs_reindex |= old_embedding_object.apply(new_embedding_object); |  | ||||||
|                 needs_reindex |= old_input_type.apply(new_input_type); |  | ||||||
|  |  | ||||||
|                 old_distribution.apply(new_distribution); |  | ||||||
|                 old_api_key.apply(new_api_key); |  | ||||||
|                 needs_reindex |  | ||||||
|             } |  | ||||||
|             (Setting::Reset, Setting::Reset) | (_, Setting::NotSet) => false, |  | ||||||
|             _ => true, |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)] | #[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)] | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user