diff --git a/crates/milli/src/vector/db.rs b/crates/milli/src/vector/db.rs index d445b47c0..bce685e05 100644 --- a/crates/milli/src/vector/db.rs +++ b/crates/milli/src/vector/db.rs @@ -12,6 +12,12 @@ use crate::vector::settings::RemoveFragments; use crate::vector::EmbeddingConfig; use crate::{CboRoaringBitmapCodec, DocumentId, UserError}; +/// DB representation of an embedder configuration. +/// +/// # Warning +/// +/// This type is serialized in and deserialized from the DB, any modification should either go +/// through dumpless upgrade or be backward-compatible #[derive(Debug, Deserialize, Serialize)] pub struct IndexEmbeddingConfig { pub name: String, diff --git a/crates/milli/src/vector/embedder/composite.rs b/crates/milli/src/vector/embedder/composite.rs index c34c31b41..dee6a35f1 100644 --- a/crates/milli/src/vector/embedder/composite.rs +++ b/crates/milli/src/vector/embedder/composite.rs @@ -24,6 +24,12 @@ pub enum SubEmbedder { Rest(rest::Embedder), } +/// Options of a subembedder, specific to each kind of embedder. +/// +/// # Warning +/// +/// This type is serialized in and deserialized from the DB, any modification should either go +/// through dumpless upgrade or be backward-compatible #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] pub enum SubEmbedderOptions { HuggingFace(hf::EmbedderOptions), @@ -51,6 +57,12 @@ pub struct Embedder { pub(super) index: SubEmbedder, } +/// Options of a composite embedder, specific to each kind of embedder. +/// +/// # Warning +/// +/// This type is serialized in and deserialized from the DB, any modification should either go +/// through dumpless upgrade or be backward-compatible #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] pub struct EmbedderOptions { pub search: SubEmbedderOptions, diff --git a/crates/milli/src/vector/embedder/hf.rs b/crates/milli/src/vector/embedder/hf.rs index 18f80dec1..13ec46cca 100644 --- a/crates/milli/src/vector/embedder/hf.rs +++ b/crates/milli/src/vector/embedder/hf.rs @@ -30,6 +30,12 @@ enum WeightSource { Pytorch, } +/// Inert embedder options for a hf embedder. +/// +/// # Warning +/// +/// This type is serialized in and deserialized from the DB, any modification should either go +/// through dumpless upgrade or be backward-compatible #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] pub struct EmbedderOptions { pub model: String, diff --git a/crates/milli/src/vector/embedder/manual.rs b/crates/milli/src/vector/embedder/manual.rs index 132aab0bf..1ca8c7205 100644 --- a/crates/milli/src/vector/embedder/manual.rs +++ b/crates/milli/src/vector/embedder/manual.rs @@ -7,6 +7,12 @@ pub struct Embedder { distribution: Option, } +/// Inert embedder options for a manual embedder. +/// +/// # Warning +/// +/// This type is serialized in and deserialized from the DB, any modification should either go +/// through dumpless upgrade or be backward-compatible #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] pub struct EmbedderOptions { pub dimensions: usize, diff --git a/crates/milli/src/vector/embedder/mod.rs b/crates/milli/src/vector/embedder/mod.rs index b7f7b1de4..c6d04059b 100644 --- a/crates/milli/src/vector/embedder/mod.rs +++ b/crates/milli/src/vector/embedder/mod.rs @@ -35,6 +35,11 @@ pub enum Embedder { } /// Configuration for an embedder. +/// +/// # Warning +/// +/// This type is serialized in and deserialized from the DB, any modification should either go +/// through dumpless upgrade or be backward-compatible #[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)] pub struct EmbeddingConfig { /// Options of the embedder, specific to each kind of embedder @@ -53,6 +58,11 @@ impl EmbeddingConfig { } /// Options of an embedder, specific to each kind of embedder. +/// +/// # Warning +/// +/// This type is serialized in and deserialized from the DB, any modification should either go +/// through dumpless upgrade or be backward-compatible #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] pub enum EmbedderOptions { HuggingFace(hf::EmbedderOptions), diff --git a/crates/milli/src/vector/embedder/ollama.rs b/crates/milli/src/vector/embedder/ollama.rs index 6e2dc185f..311893bc4 100644 --- a/crates/milli/src/vector/embedder/ollama.rs +++ b/crates/milli/src/vector/embedder/ollama.rs @@ -16,6 +16,12 @@ pub struct Embedder { rest_embedder: RestEmbedder, } +/// Inert embedder options for an ollama embedder. +/// +/// # Warning +/// +/// This type is serialized in and deserialized from the DB, any modification should either go +/// through dumpless upgrade or be backward-compatible #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] pub struct EmbedderOptions { pub embedding_model: String, diff --git a/crates/milli/src/vector/embedder/openai.rs b/crates/milli/src/vector/embedder/openai.rs index 4fec228e4..03fc1dda3 100644 --- a/crates/milli/src/vector/embedder/openai.rs +++ b/crates/milli/src/vector/embedder/openai.rs @@ -13,6 +13,12 @@ use crate::vector::error::{EmbedError, EmbedErrorKind, NewEmbedderError}; use crate::vector::{Embedding, REQUEST_PARALLELISM}; use crate::ThreadPoolNoAbort; +/// Inert embedder options for an openai embedder. +/// +/// # Warning +/// +/// This type is serialized in and deserialized from the DB, any modification should either go +/// through dumpless upgrade or be backward-compatible #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] pub struct EmbedderOptions { pub url: Option, diff --git a/crates/milli/src/vector/embedder/rest.rs b/crates/milli/src/vector/embedder/rest.rs index 7c0213c76..f1c8ad1e0 100644 --- a/crates/milli/src/vector/embedder/rest.rs +++ b/crates/milli/src/vector/embedder/rest.rs @@ -133,6 +133,12 @@ impl RequestData { } } +/// Inert embedder options for a rest embedder. +/// +/// # Warning +/// +/// This type is serialized in and deserialized from the DB, any modification should either go +/// through dumpless upgrade or be backward-compatible #[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] pub struct EmbedderOptions { pub api_key: Option, @@ -140,7 +146,9 @@ pub struct EmbedderOptions { pub dimensions: Option, pub url: String, pub request: Value, + #[serde(default)] // backward compatibility pub search_fragments: BTreeMap, + #[serde(default)] // backward compatibility pub indexing_fragments: BTreeMap, pub response: Value, pub headers: BTreeMap,