Merge branch 'request-fragments-test' into fragment-filters

This commit is contained in:
Mubelotix
2025-07-07 18:45:34 +02:00
5 changed files with 2374 additions and 5 deletions

View File

@ -35,7 +35,7 @@ pub struct Server<State = Owned> {
pub static TEST_TEMP_DIR: Lazy<TempDir> = Lazy::new(|| TempDir::new().unwrap()); pub static TEST_TEMP_DIR: Lazy<TempDir> = Lazy::new(|| TempDir::new().unwrap());
impl Server<Owned> { impl Server<Owned> {
fn into_shared(self) -> Server<Shared> { pub fn into_shared(self) -> Server<Shared> {
Server { service: self.service, _dir: self._dir, _marker: PhantomData } Server { service: self.service, _dir: self._dir, _marker: PhantomData }
} }
@ -327,7 +327,7 @@ impl<State> Server<State> {
self.service.get(url).await self.service.get(url).await
} }
pub(super) fn _index(&self, uid: impl AsRef<str>) -> Index<'_> { pub fn _index(&self, uid: impl AsRef<str>) -> Index<'_> {
Index { Index {
uid: uid.as_ref().to_string(), uid: uid.as_ref().to_string(),
service: &self.service, service: &self.service,

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,5 @@
mod binary_quantized; mod binary_quantized;
mod fragments;
#[cfg(feature = "test-ollama")] #[cfg(feature = "test-ollama")]
mod ollama; mod ollama;
mod openai; mod openai;

View File

@ -101,6 +101,10 @@ impl<T> Setting<T> {
matches!(self, Self::NotSet) matches!(self, Self::NotSet)
} }
pub const fn is_reset(&self) -> bool {
matches!(self, Self::Reset)
}
/// If `Self` is `Reset`, then map self to `Set` with the provided `val`. /// If `Self` is `Reset`, then map self to `Set` with the provided `val`.
pub fn or_reset(self, val: T) -> Self { pub fn or_reset(self, val: T) -> Self {
match self { match self {
@ -1213,6 +1217,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
// new config // new config
EitherOrBoth::Right((name, mut setting)) => { EitherOrBoth::Right((name, mut setting)) => {
tracing::debug!(embedder = name, "new embedder"); tracing::debug!(embedder = name, "new embedder");
// if we are asked to reset an embedder that doesn't exist, just ignore it
if setting.is_reset() {
continue;
}
// apply the default source in case the source was not set so that it gets validated // apply the default source in case the source was not set so that it gets validated
crate::vector::settings::EmbeddingSettings::apply_default_source(&mut setting); crate::vector::settings::EmbeddingSettings::apply_default_source(&mut setting);
crate::vector::settings::EmbeddingSettings::apply_default_openai_model( crate::vector::settings::EmbeddingSettings::apply_default_openai_model(

View File

@ -59,12 +59,24 @@ pub struct EmbedderOptions {
impl Embedder { impl Embedder {
pub fn new( pub fn new(
EmbedderOptions { search, index }: EmbedderOptions, EmbedderOptions { search: search_options, index: index_options }: EmbedderOptions,
cache_cap: usize, cache_cap: usize,
) -> Result<Self, NewEmbedderError> { ) -> Result<Self, NewEmbedderError> {
let search = SubEmbedder::new(search, cache_cap)?; // don't check similarity if one child is a rest embedder with fragments
// FIXME: skipping the check isn't ideal but we are unsure how to handle fragments in this context
let mut skip_similarity_check = false;
for options in [&search_options, &index_options] {
if let SubEmbedderOptions::Rest(options) = &options {
if !options.search_fragments.is_empty() || !options.indexing_fragments.is_empty() {
skip_similarity_check = true;
break;
}
}
}
let search = SubEmbedder::new(search_options, cache_cap)?;
// cache is only used at search // cache is only used at search
let index = SubEmbedder::new(index, 0)?; let index = SubEmbedder::new(index_options, 0)?;
// check dimensions // check dimensions
if search.dimensions() != index.dimensions() { if search.dimensions() != index.dimensions() {
@ -73,7 +85,12 @@ impl Embedder {
index.dimensions(), index.dimensions(),
)); ));
} }
// check similarity // check similarity
if skip_similarity_check {
return Ok(Self { search, index });
}
let search_embeddings = search let search_embeddings = search
.embed( .embed(
vec![ vec![