mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 09:56:28 +00:00 
			
		
		
		
	Add composite embedder
This commit is contained in:
		
							
								
								
									
										280
									
								
								crates/milli/src/vector/composite.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										280
									
								
								crates/milli/src/vector/composite.rs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,280 @@
 | 
			
		||||
use std::time::Instant;
 | 
			
		||||
 | 
			
		||||
use arroy::Distance;
 | 
			
		||||
 | 
			
		||||
use super::error::CompositeEmbedderContainsHuggingFace;
 | 
			
		||||
use super::{
 | 
			
		||||
    hf, manual, ollama, openai, rest, DistributionShift, EmbedError, Embedding, NewEmbedderError,
 | 
			
		||||
};
 | 
			
		||||
use crate::ThreadPoolNoAbort;
 | 
			
		||||
 | 
			
		||||
#[derive(Debug)]
 | 
			
		||||
pub enum SubEmbedder {
 | 
			
		||||
    /// An embedder based on running local models, fetched from the Hugging Face Hub.
 | 
			
		||||
    HuggingFace(hf::Embedder),
 | 
			
		||||
    /// An embedder based on making embedding queries against the OpenAI API.
 | 
			
		||||
    OpenAi(openai::Embedder),
 | 
			
		||||
    /// An embedder based on the user providing the embeddings in the documents and queries.
 | 
			
		||||
    UserProvided(manual::Embedder),
 | 
			
		||||
    /// An embedder based on making embedding queries against an <https://ollama.com> embedding server.
 | 
			
		||||
    Ollama(ollama::Embedder),
 | 
			
		||||
    /// An embedder based on making embedding queries against a generic JSON/REST embedding server.
 | 
			
		||||
    Rest(rest::Embedder),
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
 | 
			
		||||
pub enum SubEmbedderOptions {
 | 
			
		||||
    HuggingFace(hf::EmbedderOptions),
 | 
			
		||||
    OpenAi(openai::EmbedderOptions),
 | 
			
		||||
    Ollama(ollama::EmbedderOptions),
 | 
			
		||||
    UserProvided(manual::EmbedderOptions),
 | 
			
		||||
    Rest(rest::EmbedderOptions),
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl SubEmbedderOptions {
 | 
			
		||||
    pub fn distribution(&self) -> Option<DistributionShift> {
 | 
			
		||||
        match self {
 | 
			
		||||
            SubEmbedderOptions::HuggingFace(embedder_options) => embedder_options.distribution,
 | 
			
		||||
            SubEmbedderOptions::OpenAi(embedder_options) => embedder_options.distribution,
 | 
			
		||||
            SubEmbedderOptions::Ollama(embedder_options) => embedder_options.distribution,
 | 
			
		||||
            SubEmbedderOptions::UserProvided(embedder_options) => embedder_options.distribution,
 | 
			
		||||
            SubEmbedderOptions::Rest(embedder_options) => embedder_options.distribution,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Debug)]
 | 
			
		||||
pub struct Embedder {
 | 
			
		||||
    pub(super) search: SubEmbedder,
 | 
			
		||||
    pub(super) index: SubEmbedder,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
 | 
			
		||||
pub struct EmbedderOptions {
 | 
			
		||||
    pub search: SubEmbedderOptions,
 | 
			
		||||
    pub index: SubEmbedderOptions,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl Embedder {
 | 
			
		||||
    pub fn new(
 | 
			
		||||
        EmbedderOptions { search, index }: EmbedderOptions,
 | 
			
		||||
    ) -> Result<Self, NewEmbedderError> {
 | 
			
		||||
        let search = SubEmbedder::new(search)?;
 | 
			
		||||
        let index = SubEmbedder::new(index)?;
 | 
			
		||||
 | 
			
		||||
        // check dimensions
 | 
			
		||||
        if search.dimensions() != index.dimensions() {
 | 
			
		||||
            return Err(NewEmbedderError::composite_dimensions_mismatch(
 | 
			
		||||
                search.dimensions(),
 | 
			
		||||
                index.dimensions(),
 | 
			
		||||
            ));
 | 
			
		||||
        }
 | 
			
		||||
        // check similarity
 | 
			
		||||
        let search_embeddings = search
 | 
			
		||||
            .embed(
 | 
			
		||||
                vec![
 | 
			
		||||
                    "test".into(),
 | 
			
		||||
                    "a brave dog".into(),
 | 
			
		||||
                    "This is a sample text. It is meant to compare similarity.".into(),
 | 
			
		||||
                ],
 | 
			
		||||
                None,
 | 
			
		||||
            )
 | 
			
		||||
            .map_err(|error| NewEmbedderError::composite_test_embedding_failed(error, "search"))?;
 | 
			
		||||
 | 
			
		||||
        let index_embeddings = index
 | 
			
		||||
            .embed(
 | 
			
		||||
                vec![
 | 
			
		||||
                    "test".into(),
 | 
			
		||||
                    "a brave dog".into(),
 | 
			
		||||
                    "This is a sample text. It is meant to compare similarity.".into(),
 | 
			
		||||
                ],
 | 
			
		||||
                None,
 | 
			
		||||
            )
 | 
			
		||||
            .map_err(|error| {
 | 
			
		||||
                NewEmbedderError::composite_test_embedding_failed(error, "indexing")
 | 
			
		||||
            })?;
 | 
			
		||||
 | 
			
		||||
        let hint = configuration_hint(&search, &index);
 | 
			
		||||
 | 
			
		||||
        check_similarity(search_embeddings, index_embeddings, hint)?;
 | 
			
		||||
 | 
			
		||||
        Ok(Self { search, index })
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Indicates the dimensions of a single embedding produced by the embedder.
 | 
			
		||||
    pub fn dimensions(&self) -> usize {
 | 
			
		||||
        // can use the dimensions of any embedder since they should match
 | 
			
		||||
        self.index.dimensions()
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// An optional distribution used to apply an affine transformation to the similarity score of a document.
 | 
			
		||||
    pub fn distribution(&self) -> Option<DistributionShift> {
 | 
			
		||||
        // 3 cases here:
 | 
			
		||||
        // 1. distribution provided by user => use that one, which was stored in search
 | 
			
		||||
        // 2. no user-provided distribution, distribution in search embedder => use that one
 | 
			
		||||
        // 2. no user-provided distribution, no distribution in search embedder => use the distribution in indexing embedder
 | 
			
		||||
        self.search.distribution().or_else(|| self.index.distribution())
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl SubEmbedder {
 | 
			
		||||
    pub fn new(options: SubEmbedderOptions) -> std::result::Result<Self, NewEmbedderError> {
 | 
			
		||||
        Ok(match options {
 | 
			
		||||
            SubEmbedderOptions::HuggingFace(options) => {
 | 
			
		||||
                Self::HuggingFace(hf::Embedder::new(options)?)
 | 
			
		||||
            }
 | 
			
		||||
            SubEmbedderOptions::OpenAi(options) => Self::OpenAi(openai::Embedder::new(options)?),
 | 
			
		||||
            SubEmbedderOptions::Ollama(options) => Self::Ollama(ollama::Embedder::new(options)?),
 | 
			
		||||
            SubEmbedderOptions::UserProvided(options) => {
 | 
			
		||||
                Self::UserProvided(manual::Embedder::new(options))
 | 
			
		||||
            }
 | 
			
		||||
            SubEmbedderOptions::Rest(options) => {
 | 
			
		||||
                Self::Rest(rest::Embedder::new(options, rest::ConfigurationSource::User)?)
 | 
			
		||||
            }
 | 
			
		||||
        })
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn embed(
 | 
			
		||||
        &self,
 | 
			
		||||
        texts: Vec<String>,
 | 
			
		||||
        deadline: Option<Instant>,
 | 
			
		||||
    ) -> std::result::Result<Vec<Embedding>, EmbedError> {
 | 
			
		||||
        match self {
 | 
			
		||||
            SubEmbedder::HuggingFace(embedder) => embedder.embed(texts),
 | 
			
		||||
            SubEmbedder::OpenAi(embedder) => embedder.embed(&texts, deadline),
 | 
			
		||||
            SubEmbedder::Ollama(embedder) => embedder.embed(&texts, deadline),
 | 
			
		||||
            SubEmbedder::UserProvided(embedder) => embedder.embed(&texts),
 | 
			
		||||
            SubEmbedder::Rest(embedder) => embedder.embed(texts, deadline),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Embed multiple chunks of texts.
 | 
			
		||||
    ///
 | 
			
		||||
    /// Each chunk is composed of one or multiple texts.
 | 
			
		||||
    pub fn embed_index(
 | 
			
		||||
        &self,
 | 
			
		||||
        text_chunks: Vec<Vec<String>>,
 | 
			
		||||
        threads: &ThreadPoolNoAbort,
 | 
			
		||||
    ) -> std::result::Result<Vec<Vec<Embedding>>, EmbedError> {
 | 
			
		||||
        match self {
 | 
			
		||||
            SubEmbedder::HuggingFace(embedder) => embedder.embed_index(text_chunks),
 | 
			
		||||
            SubEmbedder::OpenAi(embedder) => embedder.embed_index(text_chunks, threads),
 | 
			
		||||
            SubEmbedder::Ollama(embedder) => embedder.embed_index(text_chunks, threads),
 | 
			
		||||
            SubEmbedder::UserProvided(embedder) => embedder.embed_index(text_chunks),
 | 
			
		||||
            SubEmbedder::Rest(embedder) => embedder.embed_index(text_chunks, threads),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Non-owning variant of [`Self::embed_index`].
 | 
			
		||||
    pub fn embed_index_ref(
 | 
			
		||||
        &self,
 | 
			
		||||
        texts: &[&str],
 | 
			
		||||
        threads: &ThreadPoolNoAbort,
 | 
			
		||||
    ) -> std::result::Result<Vec<Embedding>, EmbedError> {
 | 
			
		||||
        match self {
 | 
			
		||||
            SubEmbedder::HuggingFace(embedder) => embedder.embed_index_ref(texts),
 | 
			
		||||
            SubEmbedder::OpenAi(embedder) => embedder.embed_index_ref(texts, threads),
 | 
			
		||||
            SubEmbedder::Ollama(embedder) => embedder.embed_index_ref(texts, threads),
 | 
			
		||||
            SubEmbedder::UserProvided(embedder) => embedder.embed_index_ref(texts),
 | 
			
		||||
            SubEmbedder::Rest(embedder) => embedder.embed_index_ref(texts, threads),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Indicates the preferred number of chunks to pass to [`Self::embed_chunks`]
 | 
			
		||||
    pub fn chunk_count_hint(&self) -> usize {
 | 
			
		||||
        match self {
 | 
			
		||||
            SubEmbedder::HuggingFace(embedder) => embedder.chunk_count_hint(),
 | 
			
		||||
            SubEmbedder::OpenAi(embedder) => embedder.chunk_count_hint(),
 | 
			
		||||
            SubEmbedder::Ollama(embedder) => embedder.chunk_count_hint(),
 | 
			
		||||
            SubEmbedder::UserProvided(_) => 100,
 | 
			
		||||
            SubEmbedder::Rest(embedder) => embedder.chunk_count_hint(),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Indicates the preferred number of texts in a single chunk passed to [`Self::embed`]
 | 
			
		||||
    pub fn prompt_count_in_chunk_hint(&self) -> usize {
 | 
			
		||||
        match self {
 | 
			
		||||
            SubEmbedder::HuggingFace(embedder) => embedder.prompt_count_in_chunk_hint(),
 | 
			
		||||
            SubEmbedder::OpenAi(embedder) => embedder.prompt_count_in_chunk_hint(),
 | 
			
		||||
            SubEmbedder::Ollama(embedder) => embedder.prompt_count_in_chunk_hint(),
 | 
			
		||||
            SubEmbedder::UserProvided(_) => 1,
 | 
			
		||||
            SubEmbedder::Rest(embedder) => embedder.prompt_count_in_chunk_hint(),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn uses_document_template(&self) -> bool {
 | 
			
		||||
        match self {
 | 
			
		||||
            SubEmbedder::HuggingFace(_)
 | 
			
		||||
            | SubEmbedder::OpenAi(_)
 | 
			
		||||
            | SubEmbedder::Ollama(_)
 | 
			
		||||
            | SubEmbedder::Rest(_) => true,
 | 
			
		||||
            SubEmbedder::UserProvided(_) => false,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Indicates the dimensions of a single embedding produced by the embedder.
 | 
			
		||||
    pub fn dimensions(&self) -> usize {
 | 
			
		||||
        match self {
 | 
			
		||||
            SubEmbedder::HuggingFace(embedder) => embedder.dimensions(),
 | 
			
		||||
            SubEmbedder::OpenAi(embedder) => embedder.dimensions(),
 | 
			
		||||
            SubEmbedder::Ollama(embedder) => embedder.dimensions(),
 | 
			
		||||
            SubEmbedder::UserProvided(embedder) => embedder.dimensions(),
 | 
			
		||||
            SubEmbedder::Rest(embedder) => embedder.dimensions(),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// An optional distribution used to apply an affine transformation to the similarity score of a document.
 | 
			
		||||
    pub fn distribution(&self) -> Option<DistributionShift> {
 | 
			
		||||
        match self {
 | 
			
		||||
            SubEmbedder::HuggingFace(embedder) => embedder.distribution(),
 | 
			
		||||
            SubEmbedder::OpenAi(embedder) => embedder.distribution(),
 | 
			
		||||
            SubEmbedder::Ollama(embedder) => embedder.distribution(),
 | 
			
		||||
            SubEmbedder::UserProvided(embedder) => embedder.distribution(),
 | 
			
		||||
            SubEmbedder::Rest(embedder) => embedder.distribution(),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn check_similarity(
 | 
			
		||||
    left: Vec<Embedding>,
 | 
			
		||||
    right: Vec<Embedding>,
 | 
			
		||||
    hint: CompositeEmbedderContainsHuggingFace,
 | 
			
		||||
) -> Result<(), NewEmbedderError> {
 | 
			
		||||
    if left.len() != right.len() {
 | 
			
		||||
        return Err(NewEmbedderError::composite_embedding_count_mismatch(left.len(), right.len()));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for (left, right) in left.into_iter().zip(right) {
 | 
			
		||||
        let left = arroy::internals::UnalignedVector::from_slice(&left);
 | 
			
		||||
        let right = arroy::internals::UnalignedVector::from_slice(&right);
 | 
			
		||||
        let left = arroy::internals::Leaf {
 | 
			
		||||
            header: arroy::distances::Cosine::new_header(&left),
 | 
			
		||||
            vector: left,
 | 
			
		||||
        };
 | 
			
		||||
        let right = arroy::internals::Leaf {
 | 
			
		||||
            header: arroy::distances::Cosine::new_header(&right),
 | 
			
		||||
            vector: right,
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        let distance = arroy::distances::Cosine::built_distance(&left, &right);
 | 
			
		||||
 | 
			
		||||
        if distance > super::MAX_COMPOSITE_DISTANCE {
 | 
			
		||||
            return Err(NewEmbedderError::composite_embedding_value_mismatch(distance, hint));
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    Ok(())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn configuration_hint(
 | 
			
		||||
    search: &SubEmbedder,
 | 
			
		||||
    index: &SubEmbedder,
 | 
			
		||||
) -> CompositeEmbedderContainsHuggingFace {
 | 
			
		||||
    match (search, index) {
 | 
			
		||||
        (SubEmbedder::HuggingFace(_), SubEmbedder::HuggingFace(_)) => {
 | 
			
		||||
            CompositeEmbedderContainsHuggingFace::Both
 | 
			
		||||
        }
 | 
			
		||||
        (SubEmbedder::HuggingFace(_), _) => CompositeEmbedderContainsHuggingFace::Search,
 | 
			
		||||
        (_, SubEmbedder::HuggingFace(_)) => CompositeEmbedderContainsHuggingFace::Indexing,
 | 
			
		||||
        _ => CompositeEmbedderContainsHuggingFace::None,
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
@@ -6,6 +6,7 @@ use hf_hub::api::sync::ApiError;
 | 
			
		||||
 | 
			
		||||
use super::parsed_vectors::ParsedVectorsDiff;
 | 
			
		||||
use super::rest::ConfigurationSource;
 | 
			
		||||
use super::MAX_COMPOSITE_DISTANCE;
 | 
			
		||||
use crate::error::FaultSource;
 | 
			
		||||
use crate::update::new::vector_document::VectorDocument;
 | 
			
		||||
use crate::{FieldDistribution, PanicCatched};
 | 
			
		||||
@@ -335,6 +336,77 @@ impl NewEmbedderError {
 | 
			
		||||
    pub(crate) fn ollama_unsupported_url(url: String) -> NewEmbedderError {
 | 
			
		||||
        Self { kind: NewEmbedderErrorKind::OllamaUnsupportedUrl(url), fault: FaultSource::User }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub(crate) fn composite_dimensions_mismatch(
 | 
			
		||||
        search_dimensions: usize,
 | 
			
		||||
        index_dimensions: usize,
 | 
			
		||||
    ) -> NewEmbedderError {
 | 
			
		||||
        Self {
 | 
			
		||||
            kind: NewEmbedderErrorKind::CompositeDimensionsMismatch {
 | 
			
		||||
                search_dimensions,
 | 
			
		||||
                index_dimensions,
 | 
			
		||||
            },
 | 
			
		||||
            fault: FaultSource::User,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub(crate) fn composite_test_embedding_failed(
 | 
			
		||||
        inner: EmbedError,
 | 
			
		||||
        failing_embedder: &'static str,
 | 
			
		||||
    ) -> NewEmbedderError {
 | 
			
		||||
        Self {
 | 
			
		||||
            kind: NewEmbedderErrorKind::CompositeTestEmbeddingFailed { inner, failing_embedder },
 | 
			
		||||
            fault: FaultSource::Runtime,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub(crate) fn composite_embedding_count_mismatch(
 | 
			
		||||
        search_count: usize,
 | 
			
		||||
        index_count: usize,
 | 
			
		||||
    ) -> NewEmbedderError {
 | 
			
		||||
        Self {
 | 
			
		||||
            kind: NewEmbedderErrorKind::CompositeEmbeddingCountMismatch {
 | 
			
		||||
                search_count,
 | 
			
		||||
                index_count,
 | 
			
		||||
            },
 | 
			
		||||
            fault: FaultSource::Runtime,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub(crate) fn composite_embedding_value_mismatch(
 | 
			
		||||
        distance: f32,
 | 
			
		||||
        hint: CompositeEmbedderContainsHuggingFace,
 | 
			
		||||
    ) -> NewEmbedderError {
 | 
			
		||||
        Self {
 | 
			
		||||
            kind: NewEmbedderErrorKind::CompositeEmbeddingValueMismatch { distance, hint },
 | 
			
		||||
            fault: FaultSource::User,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Debug, Clone, Copy)]
 | 
			
		||||
pub enum CompositeEmbedderContainsHuggingFace {
 | 
			
		||||
    Both,
 | 
			
		||||
    Search,
 | 
			
		||||
    Indexing,
 | 
			
		||||
    None,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl std::fmt::Display for CompositeEmbedderContainsHuggingFace {
 | 
			
		||||
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 | 
			
		||||
        match self {
 | 
			
		||||
            CompositeEmbedderContainsHuggingFace::Both => f.write_str(
 | 
			
		||||
                "\n  - Make sure the `model`, `revision` and `pooling` of both embedders match.",
 | 
			
		||||
            ),
 | 
			
		||||
            CompositeEmbedderContainsHuggingFace::Search => f.write_str(
 | 
			
		||||
                "\n  - Consider trying a different `pooling` method for the search embedder.",
 | 
			
		||||
            ),
 | 
			
		||||
            CompositeEmbedderContainsHuggingFace::Indexing => f.write_str(
 | 
			
		||||
                "\n  - Consider trying a different `pooling` method for the indexing embedder.",
 | 
			
		||||
            ),
 | 
			
		||||
            CompositeEmbedderContainsHuggingFace::None => Ok(()),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Debug, thiserror::Error)]
 | 
			
		||||
@@ -419,6 +491,14 @@ pub enum NewEmbedderErrorKind {
 | 
			
		||||
    CouldNotParseTemplate(String),
 | 
			
		||||
    #[error("unsupported Ollama URL.\n  - For `ollama` sources, the URL must end with `/api/embed` or `/api/embeddings`\n  - Got `{0}`")]
 | 
			
		||||
    OllamaUnsupportedUrl(String),
 | 
			
		||||
    #[error("error while generating test embeddings.\n  - the dimensions of embeddings produced at search time and at indexing time don't match.\n  - Search time dimensions: {search_dimensions}\n  - Indexing time dimensions: {index_dimensions}\n  - Note: Dimensions of embeddings produced by both embedders are required to match.")]
 | 
			
		||||
    CompositeDimensionsMismatch { search_dimensions: usize, index_dimensions: usize },
 | 
			
		||||
    #[error("error while generating test embeddings.\n  - could not generate test embedding with embedder at {failing_embedder} time.\n  - Embedding failed with {inner}")]
 | 
			
		||||
    CompositeTestEmbeddingFailed { inner: EmbedError, failing_embedder: &'static str },
 | 
			
		||||
    #[error("error while generating test embeddings.\n  - the number of generated embeddings differs.\n  - {search_count} embeddings for the search time embedder.\n  - {index_count} embeddings for the indexing time embedder.")]
 | 
			
		||||
    CompositeEmbeddingCountMismatch { search_count: usize, index_count: usize },
 | 
			
		||||
    #[error("error while generating test embeddings.\n  - the embeddings produced at search time and indexing time are not similar enough.\n  - angular distance {distance}\n  - Meilisearch requires a maximum distance of {MAX_COMPOSITE_DISTANCE}.\n  - Note: check that both embedders produce similar embeddings.{hint}")]
 | 
			
		||||
    CompositeEmbeddingValueMismatch { distance: f32, hint: CompositeEmbedderContainsHuggingFace },
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub struct PossibleEmbeddingMistakes {
 | 
			
		||||
 
 | 
			
		||||
@@ -15,6 +15,7 @@ use self::error::{EmbedError, NewEmbedderError};
 | 
			
		||||
use crate::prompt::{Prompt, PromptData};
 | 
			
		||||
use crate::ThreadPoolNoAbort;
 | 
			
		||||
 | 
			
		||||
pub mod composite;
 | 
			
		||||
pub mod error;
 | 
			
		||||
pub mod hf;
 | 
			
		||||
pub mod json_template;
 | 
			
		||||
@@ -31,6 +32,7 @@ pub use self::error::Error;
 | 
			
		||||
pub type Embedding = Vec<f32>;
 | 
			
		||||
 | 
			
		||||
pub const REQUEST_PARALLELISM: usize = 40;
 | 
			
		||||
pub const MAX_COMPOSITE_DISTANCE: f32 = 0.01;
 | 
			
		||||
 | 
			
		||||
pub struct ArroyWrapper {
 | 
			
		||||
    quantized: bool,
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user