Merge branch 'request-fragments-test' into fragment-filters

This commit is contained in:
Mubelotix
2025-07-07 18:45:34 +02:00
5 changed files with 2374 additions and 5 deletions

View File

@@ -35,7 +35,7 @@ pub struct Server<State = Owned> {
pub static TEST_TEMP_DIR: Lazy<TempDir> = Lazy::new(|| TempDir::new().unwrap());
impl Server<Owned> {
fn into_shared(self) -> Server<Shared> {
pub fn into_shared(self) -> Server<Shared> {
Server { service: self.service, _dir: self._dir, _marker: PhantomData }
}
@@ -327,7 +327,7 @@ impl<State> Server<State> {
self.service.get(url).await
}
pub(super) fn _index(&self, uid: impl AsRef<str>) -> Index<'_> {
pub fn _index(&self, uid: impl AsRef<str>) -> Index<'_> {
Index {
uid: uid.as_ref().to_string(),
service: &self.service,

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,5 @@
mod binary_quantized;
mod fragments;
#[cfg(feature = "test-ollama")]
mod ollama;
mod openai;

View File

@@ -101,6 +101,10 @@ impl<T> Setting<T> {
matches!(self, Self::NotSet)
}
pub const fn is_reset(&self) -> bool {
matches!(self, Self::Reset)
}
/// If `Self` is `Reset`, then map self to `Set` with the provided `val`.
pub fn or_reset(self, val: T) -> Self {
match self {
@@ -1213,6 +1217,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
// new config
EitherOrBoth::Right((name, mut setting)) => {
tracing::debug!(embedder = name, "new embedder");
// if we are asked to reset an embedder that doesn't exist, just ignore it
if setting.is_reset() {
continue;
}
// apply the default source in case the source was not set so that it gets validated
crate::vector::settings::EmbeddingSettings::apply_default_source(&mut setting);
crate::vector::settings::EmbeddingSettings::apply_default_openai_model(

View File

@@ -59,12 +59,24 @@ pub struct EmbedderOptions {
impl Embedder {
pub fn new(
EmbedderOptions { search, index }: EmbedderOptions,
EmbedderOptions { search: search_options, index: index_options }: EmbedderOptions,
cache_cap: usize,
) -> Result<Self, NewEmbedderError> {
let search = SubEmbedder::new(search, cache_cap)?;
// don't check similarity if one child is a rest embedder with fragments
// FIXME: skipping the check isn't ideal but we are unsure how to handle fragments in this context
let mut skip_similarity_check = false;
for options in [&search_options, &index_options] {
if let SubEmbedderOptions::Rest(options) = &options {
if !options.search_fragments.is_empty() || !options.indexing_fragments.is_empty() {
skip_similarity_check = true;
break;
}
}
}
let search = SubEmbedder::new(search_options, cache_cap)?;
// cache is only used at search
let index = SubEmbedder::new(index, 0)?;
let index = SubEmbedder::new(index_options, 0)?;
// check dimensions
if search.dimensions() != index.dimensions() {
@@ -73,7 +85,12 @@ impl Embedder {
index.dimensions(),
));
}
// check similarity
if skip_similarity_check {
return Ok(Self { search, index });
}
let search_embeddings = search
.embed(
vec![