From ce136ec0c14effd3e2ed6c97e7ee5985dea8d0f2 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 10 Sep 2025 09:43:39 +0200 Subject: [PATCH 1/2] Support missing `search_fragments` and `indexing_fragments` --- crates/milli/src/vector/embedder/rest.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/milli/src/vector/embedder/rest.rs b/crates/milli/src/vector/embedder/rest.rs index 7c0213c76..54e56faea 100644 --- a/crates/milli/src/vector/embedder/rest.rs +++ b/crates/milli/src/vector/embedder/rest.rs @@ -140,7 +140,9 @@ pub struct EmbedderOptions { pub dimensions: Option, pub url: String, pub request: Value, + #[serde(default)] // backward compatibility pub search_fragments: BTreeMap, + #[serde(default)] // backward compatibility pub indexing_fragments: BTreeMap, pub response: Value, pub headers: BTreeMap, From fd795c513bae816fd8129d498736ee3afedd5ba6 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 10 Sep 2025 09:44:41 +0200 Subject: [PATCH 2/2] add documentation warnings --- crates/milli/src/vector/db.rs | 6 ++++++ crates/milli/src/vector/embedder/composite.rs | 12 ++++++++++++ crates/milli/src/vector/embedder/hf.rs | 6 ++++++ crates/milli/src/vector/embedder/manual.rs | 6 ++++++ crates/milli/src/vector/embedder/mod.rs | 10 ++++++++++ crates/milli/src/vector/embedder/ollama.rs | 6 ++++++ crates/milli/src/vector/embedder/openai.rs | 6 ++++++ crates/milli/src/vector/embedder/rest.rs | 6 ++++++ 8 files changed, 58 insertions(+) diff --git a/crates/milli/src/vector/db.rs b/crates/milli/src/vector/db.rs index d445b47c0..bce685e05 100644 --- a/crates/milli/src/vector/db.rs +++ b/crates/milli/src/vector/db.rs @@ -12,6 +12,12 @@ use crate::vector::settings::RemoveFragments; use crate::vector::EmbeddingConfig; use crate::{CboRoaringBitmapCodec, DocumentId, UserError}; +/// DB representation of an embedder configuration. +/// +/// # Warning +/// +/// This type is serialized in and deserialized from the DB, any modification should either go +/// through dumpless upgrade or be backward-compatible #[derive(Debug, Deserialize, Serialize)] pub struct IndexEmbeddingConfig { pub name: String, diff --git a/crates/milli/src/vector/embedder/composite.rs b/crates/milli/src/vector/embedder/composite.rs index c34c31b41..dee6a35f1 100644 --- a/crates/milli/src/vector/embedder/composite.rs +++ b/crates/milli/src/vector/embedder/composite.rs @@ -24,6 +24,12 @@ pub enum SubEmbedder { Rest(rest::Embedder), } +/// Options of a subembedder, specific to each kind of embedder. +/// +/// # Warning +/// +/// This type is serialized in and deserialized from the DB, any modification should either go +/// through dumpless upgrade or be backward-compatible #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] pub enum SubEmbedderOptions { HuggingFace(hf::EmbedderOptions), @@ -51,6 +57,12 @@ pub struct Embedder { pub(super) index: SubEmbedder, } +/// Options of a composite embedder, specific to each kind of embedder. +/// +/// # Warning +/// +/// This type is serialized in and deserialized from the DB, any modification should either go +/// through dumpless upgrade or be backward-compatible #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] pub struct EmbedderOptions { pub search: SubEmbedderOptions, diff --git a/crates/milli/src/vector/embedder/hf.rs b/crates/milli/src/vector/embedder/hf.rs index 18f80dec1..13ec46cca 100644 --- a/crates/milli/src/vector/embedder/hf.rs +++ b/crates/milli/src/vector/embedder/hf.rs @@ -30,6 +30,12 @@ enum WeightSource { Pytorch, } +/// Inert embedder options for a hf embedder. +/// +/// # Warning +/// +/// This type is serialized in and deserialized from the DB, any modification should either go +/// through dumpless upgrade or be backward-compatible #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] pub struct EmbedderOptions { pub model: String, diff --git a/crates/milli/src/vector/embedder/manual.rs b/crates/milli/src/vector/embedder/manual.rs index 132aab0bf..1ca8c7205 100644 --- a/crates/milli/src/vector/embedder/manual.rs +++ b/crates/milli/src/vector/embedder/manual.rs @@ -7,6 +7,12 @@ pub struct Embedder { distribution: Option, } +/// Inert embedder options for a manual embedder. +/// +/// # Warning +/// +/// This type is serialized in and deserialized from the DB, any modification should either go +/// through dumpless upgrade or be backward-compatible #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] pub struct EmbedderOptions { pub dimensions: usize, diff --git a/crates/milli/src/vector/embedder/mod.rs b/crates/milli/src/vector/embedder/mod.rs index b7f7b1de4..c6d04059b 100644 --- a/crates/milli/src/vector/embedder/mod.rs +++ b/crates/milli/src/vector/embedder/mod.rs @@ -35,6 +35,11 @@ pub enum Embedder { } /// Configuration for an embedder. +/// +/// # Warning +/// +/// This type is serialized in and deserialized from the DB, any modification should either go +/// through dumpless upgrade or be backward-compatible #[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)] pub struct EmbeddingConfig { /// Options of the embedder, specific to each kind of embedder @@ -53,6 +58,11 @@ impl EmbeddingConfig { } /// Options of an embedder, specific to each kind of embedder. +/// +/// # Warning +/// +/// This type is serialized in and deserialized from the DB, any modification should either go +/// through dumpless upgrade or be backward-compatible #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] pub enum EmbedderOptions { HuggingFace(hf::EmbedderOptions), diff --git a/crates/milli/src/vector/embedder/ollama.rs b/crates/milli/src/vector/embedder/ollama.rs index 6e2dc185f..311893bc4 100644 --- a/crates/milli/src/vector/embedder/ollama.rs +++ b/crates/milli/src/vector/embedder/ollama.rs @@ -16,6 +16,12 @@ pub struct Embedder { rest_embedder: RestEmbedder, } +/// Inert embedder options for an ollama embedder. +/// +/// # Warning +/// +/// This type is serialized in and deserialized from the DB, any modification should either go +/// through dumpless upgrade or be backward-compatible #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] pub struct EmbedderOptions { pub embedding_model: String, diff --git a/crates/milli/src/vector/embedder/openai.rs b/crates/milli/src/vector/embedder/openai.rs index 4fec228e4..03fc1dda3 100644 --- a/crates/milli/src/vector/embedder/openai.rs +++ b/crates/milli/src/vector/embedder/openai.rs @@ -13,6 +13,12 @@ use crate::vector::error::{EmbedError, EmbedErrorKind, NewEmbedderError}; use crate::vector::{Embedding, REQUEST_PARALLELISM}; use crate::ThreadPoolNoAbort; +/// Inert embedder options for an openai embedder. +/// +/// # Warning +/// +/// This type is serialized in and deserialized from the DB, any modification should either go +/// through dumpless upgrade or be backward-compatible #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] pub struct EmbedderOptions { pub url: Option, diff --git a/crates/milli/src/vector/embedder/rest.rs b/crates/milli/src/vector/embedder/rest.rs index 54e56faea..f1c8ad1e0 100644 --- a/crates/milli/src/vector/embedder/rest.rs +++ b/crates/milli/src/vector/embedder/rest.rs @@ -133,6 +133,12 @@ impl RequestData { } } +/// Inert embedder options for a rest embedder. +/// +/// # Warning +/// +/// This type is serialized in and deserialized from the DB, any modification should either go +/// through dumpless upgrade or be backward-compatible #[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] pub struct EmbedderOptions { pub api_key: Option,