mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Merge #4456
4456: Add Ollama as an embeddings provider r=dureuill a=jakobklemm # Pull Request ## Related issue [Related Discord Thread](https://discord.com/channels/1006923006964154428/1211977150316683305) ## What does this PR do? - Adds Ollama as a provider of Embeddings besides HuggingFace and OpenAI under the name `ollama` - Adds the environment variable `MEILI_OLLAMA_URL` to set the embeddings URL of an Ollama instance with a default value of `http://localhost:11434/api/embeddings` if no variable is set - Changes some of the structs and functions in `openai.rs` to be public so that they can be shared. - Added more error variants for Ollama specific errors - It uses the model `nomic-embed-text` as default, but any string value is allowed, however it won't automatically check if the model actually exists or is an embedding model Tested against Ollama version `v0.1.27` and the `nomic-embed-text` model. ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Co-authored-by: Jakob Klemm <jakob@jeykey.net> Co-authored-by: Louis Dureuil <louis.dureuil@gmail.com>
This commit is contained in:
		| @@ -604,6 +604,7 @@ fn embedder_analytics( | |||||||
|                 EmbedderSource::OpenAi => sources.insert("openAi"), |                 EmbedderSource::OpenAi => sources.insert("openAi"), | ||||||
|                 EmbedderSource::HuggingFace => sources.insert("huggingFace"), |                 EmbedderSource::HuggingFace => sources.insert("huggingFace"), | ||||||
|                 EmbedderSource::UserProvided => sources.insert("userProvided"), |                 EmbedderSource::UserProvided => sources.insert("userProvided"), | ||||||
|  |                 EmbedderSource::Ollama => sources.insert("ollama"), | ||||||
|             }; |             }; | ||||||
|         } |         } | ||||||
|     }; |     }; | ||||||
|   | |||||||
| @@ -1178,6 +1178,13 @@ pub fn validate_embedding_settings( | |||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |         EmbedderSource::Ollama => { | ||||||
|  |             // Dimensions get inferred, only model name is required | ||||||
|  |             check_unset(&dimensions, "dimensions", inferred_source, name)?; | ||||||
|  |             check_set(&model, "model", inferred_source, name)?; | ||||||
|  |             check_unset(&api_key, "apiKey", inferred_source, name)?; | ||||||
|  |             check_unset(&revision, "revision", inferred_source, name)?; | ||||||
|  |         } | ||||||
|         EmbedderSource::HuggingFace => { |         EmbedderSource::HuggingFace => { | ||||||
|             check_unset(&api_key, "apiKey", inferred_source, name)?; |             check_unset(&api_key, "apiKey", inferred_source, name)?; | ||||||
|             check_unset(&dimensions, "dimensions", inferred_source, name)?; |             check_unset(&dimensions, "dimensions", inferred_source, name)?; | ||||||
|   | |||||||
| @@ -2,6 +2,7 @@ use std::path::PathBuf; | |||||||
|  |  | ||||||
| use hf_hub::api::sync::ApiError; | use hf_hub::api::sync::ApiError; | ||||||
|  |  | ||||||
|  | use super::ollama::OllamaError; | ||||||
| use crate::error::FaultSource; | use crate::error::FaultSource; | ||||||
| use crate::vector::openai::OpenAiError; | use crate::vector::openai::OpenAiError; | ||||||
|  |  | ||||||
| @@ -71,6 +72,17 @@ pub enum EmbedErrorKind { | |||||||
|     OpenAiRuntimeInit(std::io::Error), |     OpenAiRuntimeInit(std::io::Error), | ||||||
|     #[error("initializing web client for sending embedding requests failed: {0}")] |     #[error("initializing web client for sending embedding requests failed: {0}")] | ||||||
|     InitWebClient(reqwest::Error), |     InitWebClient(reqwest::Error), | ||||||
|  |     // Dedicated Ollama error kinds, might have to merge them into one cohesive error type for all backends. | ||||||
|  |     #[error("unexpected response from Ollama: {0}")] | ||||||
|  |     OllamaUnexpected(reqwest::Error), | ||||||
|  |     #[error("sent too many requests to Ollama: {0}")] | ||||||
|  |     OllamaTooManyRequests(OllamaError), | ||||||
|  |     #[error("received internal error from Ollama: {0}")] | ||||||
|  |     OllamaInternalServerError(OllamaError), | ||||||
|  |     #[error("model not found. Meilisearch will not automatically download models from the Ollama library, please pull the model manually: {0}")] | ||||||
|  |     OllamaModelNotFoundError(OllamaError), | ||||||
|  |     #[error("received unhandled HTTP status code {0} from Ollama")] | ||||||
|  |     OllamaUnhandledStatusCode(u16), | ||||||
| } | } | ||||||
|  |  | ||||||
| impl EmbedError { | impl EmbedError { | ||||||
| @@ -129,6 +141,26 @@ impl EmbedError { | |||||||
|     pub fn openai_initialize_web_client(inner: reqwest::Error) -> Self { |     pub fn openai_initialize_web_client(inner: reqwest::Error) -> Self { | ||||||
|         Self { kind: EmbedErrorKind::InitWebClient(inner), fault: FaultSource::Runtime } |         Self { kind: EmbedErrorKind::InitWebClient(inner), fault: FaultSource::Runtime } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn ollama_unexpected(inner: reqwest::Error) -> EmbedError { | ||||||
|  |         Self { kind: EmbedErrorKind::OllamaUnexpected(inner), fault: FaultSource::Bug } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn ollama_model_not_found(inner: OllamaError) -> EmbedError { | ||||||
|  |         Self { kind: EmbedErrorKind::OllamaModelNotFoundError(inner), fault: FaultSource::User } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn ollama_too_many_requests(inner: OllamaError) -> EmbedError { | ||||||
|  |         Self { kind: EmbedErrorKind::OllamaTooManyRequests(inner), fault: FaultSource::Runtime } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn ollama_internal_server_error(inner: OllamaError) -> EmbedError { | ||||||
|  |         Self { kind: EmbedErrorKind::OllamaInternalServerError(inner), fault: FaultSource::Runtime } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn ollama_unhandled_status_code(code: u16) -> EmbedError { | ||||||
|  |         Self { kind: EmbedErrorKind::OllamaUnhandledStatusCode(code), fault: FaultSource::Bug } | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, thiserror::Error)] | #[derive(Debug, thiserror::Error)] | ||||||
| @@ -195,6 +227,13 @@ impl NewEmbedderError { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn ollama_could_not_determine_dimension(inner: EmbedError) -> NewEmbedderError { | ||||||
|  |         Self { | ||||||
|  |             kind: NewEmbedderErrorKind::CouldNotDetermineDimension(inner), | ||||||
|  |             fault: FaultSource::User, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn openai_invalid_api_key_format(inner: reqwest::header::InvalidHeaderValue) -> Self { |     pub fn openai_invalid_api_key_format(inner: reqwest::header::InvalidHeaderValue) -> Self { | ||||||
|         Self { kind: NewEmbedderErrorKind::InvalidApiKeyFormat(inner), fault: FaultSource::User } |         Self { kind: NewEmbedderErrorKind::InvalidApiKeyFormat(inner), fault: FaultSource::User } | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -10,6 +10,8 @@ pub mod manual; | |||||||
| pub mod openai; | pub mod openai; | ||||||
| pub mod settings; | pub mod settings; | ||||||
|  |  | ||||||
|  | pub mod ollama; | ||||||
|  |  | ||||||
| pub use self::error::Error; | pub use self::error::Error; | ||||||
|  |  | ||||||
| pub type Embedding = Vec<f32>; | pub type Embedding = Vec<f32>; | ||||||
| @@ -76,6 +78,7 @@ pub enum Embedder { | |||||||
|     HuggingFace(hf::Embedder), |     HuggingFace(hf::Embedder), | ||||||
|     OpenAi(openai::Embedder), |     OpenAi(openai::Embedder), | ||||||
|     UserProvided(manual::Embedder), |     UserProvided(manual::Embedder), | ||||||
|  |     Ollama(ollama::Embedder), | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)] | #[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)] | ||||||
| @@ -127,6 +130,7 @@ impl IntoIterator for EmbeddingConfigs { | |||||||
| pub enum EmbedderOptions { | pub enum EmbedderOptions { | ||||||
|     HuggingFace(hf::EmbedderOptions), |     HuggingFace(hf::EmbedderOptions), | ||||||
|     OpenAi(openai::EmbedderOptions), |     OpenAi(openai::EmbedderOptions), | ||||||
|  |     Ollama(ollama::EmbedderOptions), | ||||||
|     UserProvided(manual::EmbedderOptions), |     UserProvided(manual::EmbedderOptions), | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -144,6 +148,10 @@ impl EmbedderOptions { | |||||||
|     pub fn openai(api_key: Option<String>) -> Self { |     pub fn openai(api_key: Option<String>) -> Self { | ||||||
|         Self::OpenAi(openai::EmbedderOptions::with_default_model(api_key)) |         Self::OpenAi(openai::EmbedderOptions::with_default_model(api_key)) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn ollama() -> Self { | ||||||
|  |         Self::Ollama(ollama::EmbedderOptions::with_default_model()) | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl Embedder { | impl Embedder { | ||||||
| @@ -151,6 +159,7 @@ impl Embedder { | |||||||
|         Ok(match options { |         Ok(match options { | ||||||
|             EmbedderOptions::HuggingFace(options) => Self::HuggingFace(hf::Embedder::new(options)?), |             EmbedderOptions::HuggingFace(options) => Self::HuggingFace(hf::Embedder::new(options)?), | ||||||
|             EmbedderOptions::OpenAi(options) => Self::OpenAi(openai::Embedder::new(options)?), |             EmbedderOptions::OpenAi(options) => Self::OpenAi(openai::Embedder::new(options)?), | ||||||
|  |             EmbedderOptions::Ollama(options) => Self::Ollama(ollama::Embedder::new(options)?), | ||||||
|             EmbedderOptions::UserProvided(options) => { |             EmbedderOptions::UserProvided(options) => { | ||||||
|                 Self::UserProvided(manual::Embedder::new(options)) |                 Self::UserProvided(manual::Embedder::new(options)) | ||||||
|             } |             } | ||||||
| @@ -167,6 +176,10 @@ impl Embedder { | |||||||
|                 let client = embedder.new_client()?; |                 let client = embedder.new_client()?; | ||||||
|                 embedder.embed(texts, &client).await |                 embedder.embed(texts, &client).await | ||||||
|             } |             } | ||||||
|  |             Embedder::Ollama(embedder) => { | ||||||
|  |                 let client = embedder.new_client()?; | ||||||
|  |                 embedder.embed(texts, &client).await | ||||||
|  |             } | ||||||
|             Embedder::UserProvided(embedder) => embedder.embed(texts), |             Embedder::UserProvided(embedder) => embedder.embed(texts), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -181,6 +194,7 @@ impl Embedder { | |||||||
|         match self { |         match self { | ||||||
|             Embedder::HuggingFace(embedder) => embedder.embed_chunks(text_chunks), |             Embedder::HuggingFace(embedder) => embedder.embed_chunks(text_chunks), | ||||||
|             Embedder::OpenAi(embedder) => embedder.embed_chunks(text_chunks), |             Embedder::OpenAi(embedder) => embedder.embed_chunks(text_chunks), | ||||||
|  |             Embedder::Ollama(embedder) => embedder.embed_chunks(text_chunks), | ||||||
|             Embedder::UserProvided(embedder) => embedder.embed_chunks(text_chunks), |             Embedder::UserProvided(embedder) => embedder.embed_chunks(text_chunks), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -189,6 +203,7 @@ impl Embedder { | |||||||
|         match self { |         match self { | ||||||
|             Embedder::HuggingFace(embedder) => embedder.chunk_count_hint(), |             Embedder::HuggingFace(embedder) => embedder.chunk_count_hint(), | ||||||
|             Embedder::OpenAi(embedder) => embedder.chunk_count_hint(), |             Embedder::OpenAi(embedder) => embedder.chunk_count_hint(), | ||||||
|  |             Embedder::Ollama(embedder) => embedder.chunk_count_hint(), | ||||||
|             Embedder::UserProvided(_) => 1, |             Embedder::UserProvided(_) => 1, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -197,6 +212,7 @@ impl Embedder { | |||||||
|         match self { |         match self { | ||||||
|             Embedder::HuggingFace(embedder) => embedder.prompt_count_in_chunk_hint(), |             Embedder::HuggingFace(embedder) => embedder.prompt_count_in_chunk_hint(), | ||||||
|             Embedder::OpenAi(embedder) => embedder.prompt_count_in_chunk_hint(), |             Embedder::OpenAi(embedder) => embedder.prompt_count_in_chunk_hint(), | ||||||
|  |             Embedder::Ollama(embedder) => embedder.prompt_count_in_chunk_hint(), | ||||||
|             Embedder::UserProvided(_) => 1, |             Embedder::UserProvided(_) => 1, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -205,6 +221,7 @@ impl Embedder { | |||||||
|         match self { |         match self { | ||||||
|             Embedder::HuggingFace(embedder) => embedder.dimensions(), |             Embedder::HuggingFace(embedder) => embedder.dimensions(), | ||||||
|             Embedder::OpenAi(embedder) => embedder.dimensions(), |             Embedder::OpenAi(embedder) => embedder.dimensions(), | ||||||
|  |             Embedder::Ollama(embedder) => embedder.dimensions(), | ||||||
|             Embedder::UserProvided(embedder) => embedder.dimensions(), |             Embedder::UserProvided(embedder) => embedder.dimensions(), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -213,6 +230,7 @@ impl Embedder { | |||||||
|         match self { |         match self { | ||||||
|             Embedder::HuggingFace(embedder) => embedder.distribution(), |             Embedder::HuggingFace(embedder) => embedder.distribution(), | ||||||
|             Embedder::OpenAi(embedder) => embedder.distribution(), |             Embedder::OpenAi(embedder) => embedder.distribution(), | ||||||
|  |             Embedder::Ollama(embedder) => embedder.distribution(), | ||||||
|             Embedder::UserProvided(_embedder) => None, |             Embedder::UserProvided(_embedder) => None, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|   | |||||||
							
								
								
									
										307
									
								
								milli/src/vector/ollama.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										307
									
								
								milli/src/vector/ollama.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,307 @@ | |||||||
|  | // Copied from "openai.rs" with the sections I actually understand changed for Ollama. | ||||||
|  | // The common components of the Ollama and OpenAI interfaces might need to be extracted. | ||||||
|  |  | ||||||
|  | use std::fmt::Display; | ||||||
|  |  | ||||||
|  | use reqwest::StatusCode; | ||||||
|  |  | ||||||
|  | use super::error::{EmbedError, NewEmbedderError}; | ||||||
|  | use super::openai::Retry; | ||||||
|  | use super::{DistributionShift, Embedding, Embeddings}; | ||||||
|  |  | ||||||
|  | #[derive(Debug)] | ||||||
|  | pub struct Embedder { | ||||||
|  |     headers: reqwest::header::HeaderMap, | ||||||
|  |     options: EmbedderOptions, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] | ||||||
|  | pub struct EmbedderOptions { | ||||||
|  |     pub embedding_model: EmbeddingModel, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive( | ||||||
|  |     Debug, Clone, Hash, PartialEq, Eq, serde::Serialize, serde::Deserialize, deserr::Deserr, | ||||||
|  | )] | ||||||
|  | #[deserr(deny_unknown_fields)] | ||||||
|  | pub struct EmbeddingModel { | ||||||
|  |     name: String, | ||||||
|  |     dimensions: usize, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, serde::Serialize)] | ||||||
|  | struct OllamaRequest<'a> { | ||||||
|  |     model: &'a str, | ||||||
|  |     prompt: &'a str, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, serde::Deserialize)] | ||||||
|  | struct OllamaResponse { | ||||||
|  |     embedding: Embedding, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, serde::Deserialize)] | ||||||
|  | pub struct OllamaError { | ||||||
|  |     error: String, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl EmbeddingModel { | ||||||
|  |     pub fn max_token(&self) -> usize { | ||||||
|  |         // this might not be the same for all models | ||||||
|  |         8192 | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn default_dimensions(&self) -> usize { | ||||||
|  |         // Dimensions for nomic-embed-text | ||||||
|  |         768 | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn name(&self) -> String { | ||||||
|  |         self.name.clone() | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn from_name(name: &str) -> Self { | ||||||
|  |         Self { name: name.to_string(), dimensions: 0 } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn supports_overriding_dimensions(&self) -> bool { | ||||||
|  |         false | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Default for EmbeddingModel { | ||||||
|  |     fn default() -> Self { | ||||||
|  |         Self { name: "nomic-embed-text".to_string(), dimensions: 0 } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl EmbedderOptions { | ||||||
|  |     pub fn with_default_model() -> Self { | ||||||
|  |         Self { embedding_model: Default::default() } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn with_embedding_model(embedding_model: EmbeddingModel) -> Self { | ||||||
|  |         Self { embedding_model } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Embedder { | ||||||
|  |     pub fn new_client(&self) -> Result<reqwest::Client, EmbedError> { | ||||||
|  |         reqwest::ClientBuilder::new() | ||||||
|  |             .default_headers(self.headers.clone()) | ||||||
|  |             .build() | ||||||
|  |             .map_err(EmbedError::openai_initialize_web_client) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn new(options: EmbedderOptions) -> Result<Self, NewEmbedderError> { | ||||||
|  |         let mut headers = reqwest::header::HeaderMap::new(); | ||||||
|  |         headers.insert( | ||||||
|  |             reqwest::header::CONTENT_TYPE, | ||||||
|  |             reqwest::header::HeaderValue::from_static("application/json"), | ||||||
|  |         ); | ||||||
|  |  | ||||||
|  |         let mut embedder = Self { options, headers }; | ||||||
|  |  | ||||||
|  |         let rt = tokio::runtime::Builder::new_current_thread() | ||||||
|  |             .enable_io() | ||||||
|  |             .enable_time() | ||||||
|  |             .build() | ||||||
|  |             .map_err(EmbedError::openai_runtime_init) | ||||||
|  |             .map_err(NewEmbedderError::ollama_could_not_determine_dimension)?; | ||||||
|  |  | ||||||
|  |         // Get dimensions from Ollama | ||||||
|  |         let request = | ||||||
|  |             OllamaRequest { model: &embedder.options.embedding_model.name(), prompt: "test" }; | ||||||
|  |         // TODO: Refactor into shared error type | ||||||
|  |         let client = embedder | ||||||
|  |             .new_client() | ||||||
|  |             .map_err(NewEmbedderError::ollama_could_not_determine_dimension)?; | ||||||
|  |  | ||||||
|  |         rt.block_on(async move { | ||||||
|  |             let response = client | ||||||
|  |                 .post(get_ollama_path()) | ||||||
|  |                 .json(&request) | ||||||
|  |                 .send() | ||||||
|  |                 .await | ||||||
|  |                 .map_err(EmbedError::ollama_unexpected) | ||||||
|  |                 .map_err(NewEmbedderError::ollama_could_not_determine_dimension)?; | ||||||
|  |  | ||||||
|  |             // Process error in case model not found | ||||||
|  |             let response = Self::check_response(response).await.map_err(|_err| { | ||||||
|  |                 let e = EmbedError::ollama_model_not_found(OllamaError { | ||||||
|  |                     error: format!("model: {}", embedder.options.embedding_model.name()), | ||||||
|  |                 }); | ||||||
|  |                 NewEmbedderError::ollama_could_not_determine_dimension(e) | ||||||
|  |             })?; | ||||||
|  |  | ||||||
|  |             let response: OllamaResponse = response | ||||||
|  |                 .json() | ||||||
|  |                 .await | ||||||
|  |                 .map_err(EmbedError::ollama_unexpected) | ||||||
|  |                 .map_err(NewEmbedderError::ollama_could_not_determine_dimension)?; | ||||||
|  |  | ||||||
|  |             let embedding = Embeddings::from_single_embedding(response.embedding); | ||||||
|  |  | ||||||
|  |             embedder.options.embedding_model.dimensions = embedding.dimension(); | ||||||
|  |  | ||||||
|  |             tracing::info!( | ||||||
|  |                 "ollama model {} with dimensionality {} added", | ||||||
|  |                 embedder.options.embedding_model.name(), | ||||||
|  |                 embedding.dimension() | ||||||
|  |             ); | ||||||
|  |  | ||||||
|  |             Ok(embedder) | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     async fn check_response(response: reqwest::Response) -> Result<reqwest::Response, Retry> { | ||||||
|  |         if !response.status().is_success() { | ||||||
|  |             // Not the same number of possible error cases covered as with OpenAI. | ||||||
|  |             match response.status() { | ||||||
|  |                 StatusCode::TOO_MANY_REQUESTS => { | ||||||
|  |                     let error_response: OllamaError = response | ||||||
|  |                         .json() | ||||||
|  |                         .await | ||||||
|  |                         .map_err(EmbedError::ollama_unexpected) | ||||||
|  |                         .map_err(Retry::retry_later)?; | ||||||
|  |  | ||||||
|  |                     return Err(Retry::rate_limited(EmbedError::ollama_too_many_requests( | ||||||
|  |                         OllamaError { error: error_response.error }, | ||||||
|  |                     ))); | ||||||
|  |                 } | ||||||
|  |                 StatusCode::SERVICE_UNAVAILABLE => { | ||||||
|  |                     let error_response: OllamaError = response | ||||||
|  |                         .json() | ||||||
|  |                         .await | ||||||
|  |                         .map_err(EmbedError::ollama_unexpected) | ||||||
|  |                         .map_err(Retry::retry_later)?; | ||||||
|  |                     return Err(Retry::retry_later(EmbedError::ollama_internal_server_error( | ||||||
|  |                         OllamaError { error: error_response.error }, | ||||||
|  |                     ))); | ||||||
|  |                 } | ||||||
|  |                 StatusCode::NOT_FOUND => { | ||||||
|  |                     let error_response: OllamaError = response | ||||||
|  |                         .json() | ||||||
|  |                         .await | ||||||
|  |                         .map_err(EmbedError::ollama_unexpected) | ||||||
|  |                         .map_err(Retry::give_up)?; | ||||||
|  |  | ||||||
|  |                     return Err(Retry::give_up(EmbedError::ollama_model_not_found(OllamaError { | ||||||
|  |                         error: error_response.error, | ||||||
|  |                     }))); | ||||||
|  |                 } | ||||||
|  |                 code => { | ||||||
|  |                     return Err(Retry::give_up(EmbedError::ollama_unhandled_status_code( | ||||||
|  |                         code.as_u16(), | ||||||
|  |                     ))); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         Ok(response) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub async fn embed( | ||||||
|  |         &self, | ||||||
|  |         texts: Vec<String>, | ||||||
|  |         client: &reqwest::Client, | ||||||
|  |     ) -> Result<Vec<Embeddings<f32>>, EmbedError> { | ||||||
|  |         // Ollama only embedds one document at a time. | ||||||
|  |         let mut results = Vec::with_capacity(texts.len()); | ||||||
|  |  | ||||||
|  |         // The retry loop is inside the texts loop, might have to switch that around | ||||||
|  |         for text in texts { | ||||||
|  |             // Retries copied from openai.rs | ||||||
|  |             for attempt in 0..7 { | ||||||
|  |                 let retry_duration = match self.try_embed(&text, client).await { | ||||||
|  |                     Ok(result) => { | ||||||
|  |                         results.push(result); | ||||||
|  |                         break; | ||||||
|  |                     } | ||||||
|  |                     Err(retry) => { | ||||||
|  |                         tracing::warn!("Failed: {}", retry.error); | ||||||
|  |                         retry.into_duration(attempt) | ||||||
|  |                     } | ||||||
|  |                 }?; | ||||||
|  |                 tracing::warn!( | ||||||
|  |                     "Attempt #{}, retrying after {}ms.", | ||||||
|  |                     attempt, | ||||||
|  |                     retry_duration.as_millis() | ||||||
|  |                 ); | ||||||
|  |                 tokio::time::sleep(retry_duration).await; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         Ok(results) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     async fn try_embed( | ||||||
|  |         &self, | ||||||
|  |         text: &str, | ||||||
|  |         client: &reqwest::Client, | ||||||
|  |     ) -> Result<Embeddings<f32>, Retry> { | ||||||
|  |         let request = OllamaRequest { model: &self.options.embedding_model.name(), prompt: text }; | ||||||
|  |         let response = client | ||||||
|  |             .post(get_ollama_path()) | ||||||
|  |             .json(&request) | ||||||
|  |             .send() | ||||||
|  |             .await | ||||||
|  |             .map_err(EmbedError::openai_network) | ||||||
|  |             .map_err(Retry::retry_later)?; | ||||||
|  |  | ||||||
|  |         let response = Self::check_response(response).await?; | ||||||
|  |  | ||||||
|  |         let response: OllamaResponse = response | ||||||
|  |             .json() | ||||||
|  |             .await | ||||||
|  |             .map_err(EmbedError::openai_unexpected) | ||||||
|  |             .map_err(Retry::retry_later)?; | ||||||
|  |  | ||||||
|  |         tracing::trace!("response: {:?}", response.embedding); | ||||||
|  |  | ||||||
|  |         let embedding = Embeddings::from_single_embedding(response.embedding); | ||||||
|  |         Ok(embedding) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn embed_chunks( | ||||||
|  |         &self, | ||||||
|  |         text_chunks: Vec<Vec<String>>, | ||||||
|  |     ) -> Result<Vec<Vec<Embeddings<f32>>>, EmbedError> { | ||||||
|  |         let rt = tokio::runtime::Builder::new_current_thread() | ||||||
|  |             .enable_io() | ||||||
|  |             .enable_time() | ||||||
|  |             .build() | ||||||
|  |             .map_err(EmbedError::openai_runtime_init)?; | ||||||
|  |         let client = self.new_client()?; | ||||||
|  |         rt.block_on(futures::future::try_join_all( | ||||||
|  |             text_chunks.into_iter().map(|prompts| self.embed(prompts, &client)), | ||||||
|  |         )) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Defaults copied from openai.rs | ||||||
|  |     pub fn chunk_count_hint(&self) -> usize { | ||||||
|  |         10 | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn prompt_count_in_chunk_hint(&self) -> usize { | ||||||
|  |         10 | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn dimensions(&self) -> usize { | ||||||
|  |         self.options.embedding_model.dimensions | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn distribution(&self) -> Option<DistributionShift> { | ||||||
|  |         None | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Display for OllamaError { | ||||||
|  |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||||
|  |         write!(f, "{}", self.error) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | fn get_ollama_path() -> String { | ||||||
|  |     // Important: Hostname not enough, has to be entire path to embeddings endpoint | ||||||
|  |     std::env::var("MEILI_OLLAMA_URL").unwrap_or("http://localhost:11434/api/embeddings".to_string()) | ||||||
|  | } | ||||||
| @@ -419,12 +419,12 @@ impl Embedder { | |||||||
|  |  | ||||||
| // retrying in case of failure | // retrying in case of failure | ||||||
|  |  | ||||||
| struct Retry { | pub struct Retry { | ||||||
|     error: EmbedError, |     pub error: EmbedError, | ||||||
|     strategy: RetryStrategy, |     strategy: RetryStrategy, | ||||||
| } | } | ||||||
|  |  | ||||||
| enum RetryStrategy { | pub enum RetryStrategy { | ||||||
|     GiveUp, |     GiveUp, | ||||||
|     Retry, |     Retry, | ||||||
|     RetryTokenized, |     RetryTokenized, | ||||||
| @@ -432,23 +432,23 @@ enum RetryStrategy { | |||||||
| } | } | ||||||
|  |  | ||||||
| impl Retry { | impl Retry { | ||||||
|     fn give_up(error: EmbedError) -> Self { |     pub fn give_up(error: EmbedError) -> Self { | ||||||
|         Self { error, strategy: RetryStrategy::GiveUp } |         Self { error, strategy: RetryStrategy::GiveUp } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn retry_later(error: EmbedError) -> Self { |     pub fn retry_later(error: EmbedError) -> Self { | ||||||
|         Self { error, strategy: RetryStrategy::Retry } |         Self { error, strategy: RetryStrategy::Retry } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn retry_tokenized(error: EmbedError) -> Self { |     pub fn retry_tokenized(error: EmbedError) -> Self { | ||||||
|         Self { error, strategy: RetryStrategy::RetryTokenized } |         Self { error, strategy: RetryStrategy::RetryTokenized } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn rate_limited(error: EmbedError) -> Self { |     pub fn rate_limited(error: EmbedError) -> Self { | ||||||
|         Self { error, strategy: RetryStrategy::RetryAfterRateLimit } |         Self { error, strategy: RetryStrategy::RetryAfterRateLimit } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn into_duration(self, attempt: u32) -> Result<tokio::time::Duration, EmbedError> { |     pub fn into_duration(self, attempt: u32) -> Result<tokio::time::Duration, EmbedError> { | ||||||
|         match self.strategy { |         match self.strategy { | ||||||
|             RetryStrategy::GiveUp => Err(self.error), |             RetryStrategy::GiveUp => Err(self.error), | ||||||
|             RetryStrategy::Retry => Ok(tokio::time::Duration::from_millis((10u64).pow(attempt))), |             RetryStrategy::Retry => Ok(tokio::time::Duration::from_millis((10u64).pow(attempt))), | ||||||
| @@ -459,11 +459,11 @@ impl Retry { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn must_tokenize(&self) -> bool { |     pub fn must_tokenize(&self) -> bool { | ||||||
|         matches!(self.strategy, RetryStrategy::RetryTokenized) |         matches!(self.strategy, RetryStrategy::RetryTokenized) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn into_error(self) -> EmbedError { |     pub fn into_error(self) -> EmbedError { | ||||||
|         self.error |         self.error | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,7 +1,7 @@ | |||||||
| use deserr::Deserr; | use deserr::Deserr; | ||||||
| use serde::{Deserialize, Serialize}; | use serde::{Deserialize, Serialize}; | ||||||
|  |  | ||||||
| use super::openai; | use super::{ollama, openai}; | ||||||
| use crate::prompt::PromptData; | use crate::prompt::PromptData; | ||||||
| use crate::update::Setting; | use crate::update::Setting; | ||||||
| use crate::vector::EmbeddingConfig; | use crate::vector::EmbeddingConfig; | ||||||
| @@ -80,11 +80,15 @@ impl EmbeddingSettings { | |||||||
|             Self::SOURCE => { |             Self::SOURCE => { | ||||||
|                 &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::UserProvided] |                 &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::UserProvided] | ||||||
|             } |             } | ||||||
|             Self::MODEL => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi], |             Self::MODEL => { | ||||||
|  |                 &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::Ollama] | ||||||
|  |             } | ||||||
|             Self::REVISION => &[EmbedderSource::HuggingFace], |             Self::REVISION => &[EmbedderSource::HuggingFace], | ||||||
|             Self::API_KEY => &[EmbedderSource::OpenAi], |             Self::API_KEY => &[EmbedderSource::OpenAi], | ||||||
|             Self::DIMENSIONS => &[EmbedderSource::OpenAi, EmbedderSource::UserProvided], |             Self::DIMENSIONS => &[EmbedderSource::OpenAi, EmbedderSource::UserProvided], | ||||||
|             Self::DOCUMENT_TEMPLATE => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi], |             Self::DOCUMENT_TEMPLATE => { | ||||||
|  |                 &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::Ollama] | ||||||
|  |             } | ||||||
|             _other => unreachable!("unknown field"), |             _other => unreachable!("unknown field"), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -101,6 +105,7 @@ impl EmbeddingSettings { | |||||||
|             EmbedderSource::HuggingFace => { |             EmbedderSource::HuggingFace => { | ||||||
|                 &[Self::SOURCE, Self::MODEL, Self::REVISION, Self::DOCUMENT_TEMPLATE] |                 &[Self::SOURCE, Self::MODEL, Self::REVISION, Self::DOCUMENT_TEMPLATE] | ||||||
|             } |             } | ||||||
|  |             EmbedderSource::Ollama => &[Self::SOURCE, Self::MODEL, Self::DOCUMENT_TEMPLATE], | ||||||
|             EmbedderSource::UserProvided => &[Self::SOURCE, Self::DIMENSIONS], |             EmbedderSource::UserProvided => &[Self::SOURCE, Self::DIMENSIONS], | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -134,6 +139,7 @@ pub enum EmbedderSource { | |||||||
|     #[default] |     #[default] | ||||||
|     OpenAi, |     OpenAi, | ||||||
|     HuggingFace, |     HuggingFace, | ||||||
|  |     Ollama, | ||||||
|     UserProvided, |     UserProvided, | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -143,6 +149,7 @@ impl std::fmt::Display for EmbedderSource { | |||||||
|             EmbedderSource::OpenAi => "openAi", |             EmbedderSource::OpenAi => "openAi", | ||||||
|             EmbedderSource::HuggingFace => "huggingFace", |             EmbedderSource::HuggingFace => "huggingFace", | ||||||
|             EmbedderSource::UserProvided => "userProvided", |             EmbedderSource::UserProvided => "userProvided", | ||||||
|  |             EmbedderSource::Ollama => "ollama", | ||||||
|         }; |         }; | ||||||
|         f.write_str(s) |         f.write_str(s) | ||||||
|     } |     } | ||||||
| @@ -192,7 +199,15 @@ impl From<EmbeddingConfig> for EmbeddingSettings { | |||||||
|                 model: Setting::Set(options.embedding_model.name().to_owned()), |                 model: Setting::Set(options.embedding_model.name().to_owned()), | ||||||
|                 revision: Setting::NotSet, |                 revision: Setting::NotSet, | ||||||
|                 api_key: options.api_key.map(Setting::Set).unwrap_or_default(), |                 api_key: options.api_key.map(Setting::Set).unwrap_or_default(), | ||||||
|                 dimensions: options.dimensions.map(Setting::Set).unwrap_or_default(), |                 dimensions: Setting::Set(options.dimensions.unwrap_or_default()), | ||||||
|  |                 document_template: Setting::Set(prompt.template), | ||||||
|  |             }, | ||||||
|  |             super::EmbedderOptions::Ollama(options) => Self { | ||||||
|  |                 source: Setting::Set(EmbedderSource::Ollama), | ||||||
|  |                 model: Setting::Set(options.embedding_model.name().to_owned()), | ||||||
|  |                 revision: Setting::NotSet, | ||||||
|  |                 api_key: Setting::NotSet, | ||||||
|  |                 dimensions: Setting::NotSet, | ||||||
|                 document_template: Setting::Set(prompt.template), |                 document_template: Setting::Set(prompt.template), | ||||||
|             }, |             }, | ||||||
|             super::EmbedderOptions::UserProvided(options) => Self { |             super::EmbedderOptions::UserProvided(options) => Self { | ||||||
| @@ -229,6 +244,14 @@ impl From<EmbeddingSettings> for EmbeddingConfig { | |||||||
|                     } |                     } | ||||||
|                     this.embedder_options = super::EmbedderOptions::OpenAi(options); |                     this.embedder_options = super::EmbedderOptions::OpenAi(options); | ||||||
|                 } |                 } | ||||||
|  |                 EmbedderSource::Ollama => { | ||||||
|  |                     let mut options: ollama::EmbedderOptions = | ||||||
|  |                         super::ollama::EmbedderOptions::with_default_model(); | ||||||
|  |                     if let Some(model) = model.set() { | ||||||
|  |                         options.embedding_model = super::ollama::EmbeddingModel::from_name(&model); | ||||||
|  |                     } | ||||||
|  |                     this.embedder_options = super::EmbedderOptions::Ollama(options); | ||||||
|  |                 } | ||||||
|                 EmbedderSource::HuggingFace => { |                 EmbedderSource::HuggingFace => { | ||||||
|                     let mut options = super::hf::EmbedderOptions::default(); |                     let mut options = super::hf::EmbedderOptions::default(); | ||||||
|                     if let Some(model) = model.set() { |                     if let Some(model) = model.set() { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user