mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-26 00:01:00 +00:00
Merge pull request #5734 from meilisearch/request-fragments-test
Tests for multimodal
This commit is contained in:
@ -3,8 +3,12 @@ pub mod index;
|
||||
pub mod server;
|
||||
pub mod service;
|
||||
|
||||
use std::fmt::{self, Display};
|
||||
use std::{
|
||||
collections::BTreeMap,
|
||||
fmt::{self, Display},
|
||||
};
|
||||
|
||||
use actix_http::StatusCode;
|
||||
#[allow(unused)]
|
||||
pub use index::GetAllDocumentsOptions;
|
||||
use meili_snap::json_string;
|
||||
@ -13,6 +17,10 @@ use serde::{Deserialize, Serialize};
|
||||
#[allow(unused)]
|
||||
pub use server::{default_settings, Server};
|
||||
use tokio::sync::OnceCell;
|
||||
use wiremock::{
|
||||
matchers::{method, path},
|
||||
Mock, MockServer, Request, ResponseTemplate,
|
||||
};
|
||||
|
||||
use crate::common::index::Index;
|
||||
|
||||
@ -508,3 +516,166 @@ pub async fn shared_index_with_geo_documents() -> &'static Index<'static, Shared
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn shared_index_for_fragments() -> Index<'static, Shared> {
|
||||
static INDEX: OnceCell<(Server<Shared>, String)> = OnceCell::const_new();
|
||||
let (server, uid) = INDEX
|
||||
.get_or_init(|| async {
|
||||
let (server, uid, _) = init_fragments_index().await;
|
||||
(server.into_shared(), uid)
|
||||
})
|
||||
.await;
|
||||
server._index(uid).to_shared()
|
||||
}
|
||||
|
||||
async fn fragment_mock_server() -> String {
|
||||
let text_to_embedding: BTreeMap<_, _> = vec![
|
||||
("kefir", [0.5, -0.5, 0.0]),
|
||||
("intel", [1.0, 1.0, 0.0]),
|
||||
("dustin", [-0.5, 0.5, 0.0]),
|
||||
("bulldog", [0.0, 0.0, 1.0]),
|
||||
("labrador", [0.0, 0.0, -1.0]),
|
||||
("{{ doc.", [-9999.0, -9999.0, -9999.0]), // If a template didn't render
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let mock_server = Box::leak(Box::new(MockServer::start().await));
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/"))
|
||||
.respond_with(move |req: &Request| {
|
||||
let text = String::from_utf8_lossy(&req.body).to_string();
|
||||
|
||||
let mut data = [0.0, 0.0, 0.0];
|
||||
for (inner_text, inner_data) in &text_to_embedding {
|
||||
if text.contains(inner_text) {
|
||||
for (i, &value) in inner_data.iter().enumerate() {
|
||||
data[i] += value;
|
||||
}
|
||||
}
|
||||
}
|
||||
ResponseTemplate::new(200).set_body_json(json!({ "data": data }))
|
||||
})
|
||||
.mount(mock_server)
|
||||
.await;
|
||||
|
||||
mock_server.uri()
|
||||
}
|
||||
|
||||
pub async fn init_fragments_index() -> (Server<Owned>, String, crate::common::Value) {
|
||||
let url = fragment_mock_server().await;
|
||||
let server = Server::new().await;
|
||||
let index = server.unique_index();
|
||||
|
||||
let (_response, code) = server.set_features(json!({"multimodal": true})).await;
|
||||
assert_eq!(code, StatusCode::OK);
|
||||
|
||||
// Configure the index to use our mock embedder
|
||||
let settings = json!({
|
||||
"embedders": {
|
||||
"rest": {
|
||||
"source": "rest",
|
||||
"url": url,
|
||||
"dimensions": 3,
|
||||
"request": "{{fragment}}",
|
||||
"response": {
|
||||
"data": "{{embedding}}"
|
||||
},
|
||||
"indexingFragments": {
|
||||
"withBreed": {"value": "{{ doc.name }} is a {{ doc.breed }}"},
|
||||
"basic": {"value": "{{ doc.name }} is a dog"},
|
||||
},
|
||||
"searchFragments": {
|
||||
"justBreed": {"value": "It's a {{ media.breed }}"},
|
||||
"justName": {"value": "{{ media.name }} is a dog"},
|
||||
"query": {"value": "Some pre-prompt for query {{ q }}"},
|
||||
}
|
||||
},
|
||||
},
|
||||
});
|
||||
let (response, code) = index.update_settings(settings.clone()).await;
|
||||
assert_eq!(code, StatusCode::ACCEPTED);
|
||||
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
// Send documents
|
||||
let documents = json!([
|
||||
{"id": 0, "name": "kefir"},
|
||||
{"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }},
|
||||
{"id": 2, "name": "intel", "breed": "labrador"},
|
||||
{"id": 3, "name": "dustin", "breed": "bulldog"},
|
||||
]);
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
assert_eq!(code, StatusCode::ACCEPTED);
|
||||
|
||||
let _task = index.wait_task(value.uid()).await.succeeded();
|
||||
|
||||
let uid = index.uid.clone();
|
||||
(server, uid, settings)
|
||||
}
|
||||
|
||||
pub async fn init_fragments_index_composite() -> (Server<Owned>, String, crate::common::Value) {
|
||||
let url = fragment_mock_server().await;
|
||||
let server = Server::new().await;
|
||||
let index = server.unique_index();
|
||||
|
||||
let (_response, code) = server.set_features(json!({"multimodal": true})).await;
|
||||
assert_eq!(code, StatusCode::OK);
|
||||
|
||||
let (_response, code) = server.set_features(json!({"compositeEmbedders": true})).await;
|
||||
assert_eq!(code, StatusCode::OK);
|
||||
|
||||
// Configure the index to use our mock embedder
|
||||
let settings = json!({
|
||||
"embedders": {
|
||||
"rest": {
|
||||
"source": "composite",
|
||||
"searchEmbedder": {
|
||||
"source": "rest",
|
||||
"url": url,
|
||||
"dimensions": 3,
|
||||
"request": "{{fragment}}",
|
||||
"response": {
|
||||
"data": "{{embedding}}"
|
||||
},
|
||||
"searchFragments": {
|
||||
"query": {"value": "Some pre-prompt for query {{ q }}"},
|
||||
}
|
||||
},
|
||||
"indexingEmbedder": {
|
||||
"source": "rest",
|
||||
"url": url,
|
||||
"dimensions": 3,
|
||||
"request": "{{fragment}}",
|
||||
"response": {
|
||||
"data": "{{embedding}}"
|
||||
},
|
||||
"indexingFragments": {
|
||||
"withBreed": {"value": "{{ doc.name }} is a {{ doc.breed }}"},
|
||||
"basic": {"value": "{{ doc.name }} is a dog"},
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
let (response, code) = index.update_settings(settings.clone()).await;
|
||||
assert_eq!(code, StatusCode::ACCEPTED);
|
||||
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
// Send documents
|
||||
let documents = json!([
|
||||
{"id": 0, "name": "kefir"},
|
||||
{"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }},
|
||||
{"id": 2, "name": "intel", "breed": "labrador"},
|
||||
{"id": 3, "name": "dustin", "breed": "bulldog"},
|
||||
]);
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
assert_eq!(code, StatusCode::ACCEPTED);
|
||||
|
||||
index.wait_task(value.uid()).await.succeeded();
|
||||
|
||||
let uid = index.uid.clone();
|
||||
(server, uid, settings)
|
||||
}
|
||||
|
@ -35,7 +35,7 @@ pub struct Server<State = Owned> {
|
||||
pub static TEST_TEMP_DIR: Lazy<TempDir> = Lazy::new(|| TempDir::new().unwrap());
|
||||
|
||||
impl Server<Owned> {
|
||||
fn into_shared(self) -> Server<Shared> {
|
||||
pub(super) fn into_shared(self) -> Server<Shared> {
|
||||
Server { service: self.service, _dir: self._dir, _marker: PhantomData }
|
||||
}
|
||||
|
||||
|
@ -2499,7 +2499,7 @@ pub struct LocalMeiliParams {
|
||||
|
||||
/// A server that exploits [`MockServer`] to provide an URL for testing network and the network.
|
||||
pub struct LocalMeili {
|
||||
mock_server: MockServer,
|
||||
mock_server: &'static MockServer,
|
||||
}
|
||||
|
||||
impl LocalMeili {
|
||||
@ -2508,7 +2508,7 @@ impl LocalMeili {
|
||||
}
|
||||
|
||||
pub async fn with_params(server: Arc<Server>, params: LocalMeiliParams) -> Self {
|
||||
let mock_server = MockServer::start().await;
|
||||
let mock_server = Box::leak(Box::new(MockServer::start().await));
|
||||
|
||||
// tokio won't let us execute asynchronous code from a sync function inside of an async test,
|
||||
// so instead we spawn another thread that will call the service on a brand new tokio runtime
|
||||
@ -2572,7 +2572,7 @@ impl LocalMeili {
|
||||
response.set_body_json(value)
|
||||
}
|
||||
})
|
||||
.mount(&mock_server)
|
||||
.mount(mock_server)
|
||||
.await;
|
||||
Self { mock_server }
|
||||
}
|
||||
|
2120
crates/meilisearch/tests/vector/fragments.rs
Normal file
2120
crates/meilisearch/tests/vector/fragments.rs
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,5 @@
|
||||
mod binary_quantized;
|
||||
mod fragments;
|
||||
#[cfg(feature = "test-ollama")]
|
||||
mod ollama;
|
||||
mod openai;
|
||||
|
@ -136,7 +136,7 @@ fn long_text() -> &'static str {
|
||||
})
|
||||
}
|
||||
|
||||
async fn create_mock_tokenized() -> (MockServer, Value) {
|
||||
async fn create_mock_tokenized() -> (&'static MockServer, Value) {
|
||||
create_mock_with_template("{{doc.text}}", ModelDimensions::Large, false, false).await
|
||||
}
|
||||
|
||||
@ -145,8 +145,8 @@ async fn create_mock_with_template(
|
||||
model_dimensions: ModelDimensions,
|
||||
fallible: bool,
|
||||
slow: bool,
|
||||
) -> (MockServer, Value) {
|
||||
let mock_server = MockServer::start().await;
|
||||
) -> (&'static MockServer, Value) {
|
||||
let mock_server = Box::leak(Box::new(MockServer::start().await));
|
||||
const API_KEY: &str = "my-api-key";
|
||||
const API_KEY_BEARER: &str = "Bearer my-api-key";
|
||||
|
||||
@ -299,7 +299,7 @@ async fn create_mock_with_template(
|
||||
}
|
||||
}))
|
||||
})
|
||||
.mount(&mock_server)
|
||||
.mount(mock_server)
|
||||
.await;
|
||||
let url = mock_server.uri();
|
||||
|
||||
@ -321,27 +321,27 @@ const DOGGO_TEMPLATE: &str = r#"{%- if doc.gender == "F" -%}Une chienne nommée
|
||||
Un chien nommé {{doc.name}}, né en {{doc.birthyear}}
|
||||
{%- endif %}, de race {{doc.breed}}."#;
|
||||
|
||||
async fn create_mock() -> (MockServer, Value) {
|
||||
async fn create_mock() -> (&'static MockServer, Value) {
|
||||
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, false, false).await
|
||||
}
|
||||
|
||||
async fn create_mock_dimensions() -> (MockServer, Value) {
|
||||
async fn create_mock_dimensions() -> (&'static MockServer, Value) {
|
||||
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large512, false, false).await
|
||||
}
|
||||
|
||||
async fn create_mock_small_embedding_model() -> (MockServer, Value) {
|
||||
async fn create_mock_small_embedding_model() -> (&'static MockServer, Value) {
|
||||
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Small, false, false).await
|
||||
}
|
||||
|
||||
async fn create_mock_legacy_embedding_model() -> (MockServer, Value) {
|
||||
async fn create_mock_legacy_embedding_model() -> (&'static MockServer, Value) {
|
||||
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Ada, false, false).await
|
||||
}
|
||||
|
||||
async fn create_fallible_mock() -> (MockServer, Value) {
|
||||
async fn create_fallible_mock() -> (&'static MockServer, Value) {
|
||||
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, true, false).await
|
||||
}
|
||||
|
||||
async fn create_slow_mock() -> (MockServer, Value) {
|
||||
async fn create_slow_mock() -> (&'static MockServer, Value) {
|
||||
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, true, true).await
|
||||
}
|
||||
|
||||
|
@ -12,8 +12,8 @@ use crate::common::Value;
|
||||
use crate::json;
|
||||
use crate::vector::{get_server_vector, GetAllDocumentsOptions};
|
||||
|
||||
async fn create_mock() -> (MockServer, Value) {
|
||||
let mock_server = MockServer::start().await;
|
||||
async fn create_mock() -> (&'static MockServer, Value) {
|
||||
let mock_server = Box::leak(Box::new(MockServer::start().await));
|
||||
|
||||
let text_to_embedding: BTreeMap<_, _> = vec![
|
||||
// text -> embedding
|
||||
@ -32,7 +32,7 @@ async fn create_mock() -> (MockServer, Value) {
|
||||
json!({ "data": text_to_embedding.get(text.as_str()).unwrap_or(&[99., 99., 99.]) }),
|
||||
)
|
||||
})
|
||||
.mount(&mock_server)
|
||||
.mount(mock_server)
|
||||
.await;
|
||||
let url = mock_server.uri();
|
||||
|
||||
@ -50,8 +50,8 @@ async fn create_mock() -> (MockServer, Value) {
|
||||
(mock_server, embedder_settings)
|
||||
}
|
||||
|
||||
async fn create_mock_default_template() -> (MockServer, Value) {
|
||||
let mock_server = MockServer::start().await;
|
||||
async fn create_mock_default_template() -> (&'static MockServer, Value) {
|
||||
let mock_server = Box::leak(Box::new(MockServer::start().await));
|
||||
|
||||
let text_to_embedding: BTreeMap<_, _> = vec![
|
||||
// text -> embedding
|
||||
@ -73,7 +73,7 @@ async fn create_mock_default_template() -> (MockServer, Value) {
|
||||
.set_body_json(json!({"error": "text not found", "text": text})),
|
||||
}
|
||||
})
|
||||
.mount(&mock_server)
|
||||
.mount(mock_server)
|
||||
.await;
|
||||
let url = mock_server.uri();
|
||||
|
||||
@ -106,8 +106,8 @@ struct SingleResponse {
|
||||
embedding: Vec<f32>,
|
||||
}
|
||||
|
||||
async fn create_mock_multiple() -> (MockServer, Value) {
|
||||
let mock_server = MockServer::start().await;
|
||||
async fn create_mock_multiple() -> (&'static MockServer, Value) {
|
||||
let mock_server = Box::leak(Box::new(MockServer::start().await));
|
||||
|
||||
let text_to_embedding: BTreeMap<_, _> = vec![
|
||||
// text -> embedding
|
||||
@ -146,7 +146,7 @@ async fn create_mock_multiple() -> (MockServer, Value) {
|
||||
|
||||
ResponseTemplate::new(200).set_body_json(response)
|
||||
})
|
||||
.mount(&mock_server)
|
||||
.mount(mock_server)
|
||||
.await;
|
||||
let url = mock_server.uri();
|
||||
|
||||
@ -176,8 +176,8 @@ struct SingleRequest {
|
||||
input: String,
|
||||
}
|
||||
|
||||
async fn create_mock_single_response_in_array() -> (MockServer, Value) {
|
||||
let mock_server = MockServer::start().await;
|
||||
async fn create_mock_single_response_in_array() -> (&'static MockServer, Value) {
|
||||
let mock_server = Box::leak(Box::new(MockServer::start().await));
|
||||
|
||||
let text_to_embedding: BTreeMap<_, _> = vec![
|
||||
// text -> embedding
|
||||
@ -212,7 +212,7 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) {
|
||||
|
||||
ResponseTemplate::new(200).set_body_json(response)
|
||||
})
|
||||
.mount(&mock_server)
|
||||
.mount(mock_server)
|
||||
.await;
|
||||
let url = mock_server.uri();
|
||||
|
||||
@ -236,8 +236,8 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) {
|
||||
(mock_server, embedder_settings)
|
||||
}
|
||||
|
||||
async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
|
||||
let mock_server = MockServer::start().await;
|
||||
async fn create_mock_raw_with_custom_header() -> (&'static MockServer, Value) {
|
||||
let mock_server = Box::leak(Box::new(MockServer::start().await));
|
||||
|
||||
let text_to_embedding: BTreeMap<_, _> = vec![
|
||||
// text -> embedding
|
||||
@ -277,7 +277,7 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
|
||||
|
||||
ResponseTemplate::new(200).set_body_json(output)
|
||||
})
|
||||
.mount(&mock_server)
|
||||
.mount(mock_server)
|
||||
.await;
|
||||
let url = mock_server.uri();
|
||||
|
||||
@ -293,8 +293,8 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
|
||||
(mock_server, embedder_settings)
|
||||
}
|
||||
|
||||
async fn create_mock_raw() -> (MockServer, Value) {
|
||||
let mock_server = MockServer::start().await;
|
||||
async fn create_mock_raw() -> (&'static MockServer, Value) {
|
||||
let mock_server = Box::leak(Box::new(MockServer::start().await));
|
||||
|
||||
let text_to_embedding: BTreeMap<_, _> = vec![
|
||||
// text -> embedding
|
||||
@ -321,7 +321,7 @@ async fn create_mock_raw() -> (MockServer, Value) {
|
||||
|
||||
ResponseTemplate::new(200).set_body_json(output)
|
||||
})
|
||||
.mount(&mock_server)
|
||||
.mount(mock_server)
|
||||
.await;
|
||||
let url = mock_server.uri();
|
||||
|
||||
@ -337,8 +337,8 @@ async fn create_mock_raw() -> (MockServer, Value) {
|
||||
(mock_server, embedder_settings)
|
||||
}
|
||||
|
||||
async fn create_faulty_mock_raw(sender: mpsc::Sender<()>) -> (MockServer, Value) {
|
||||
let mock_server = MockServer::start().await;
|
||||
async fn create_faulty_mock_raw(sender: mpsc::Sender<()>) -> (&'static MockServer, Value) {
|
||||
let mock_server = Box::leak(Box::new(MockServer::start().await));
|
||||
let count = AtomicUsize::new(0);
|
||||
|
||||
Mock::given(method("POST"))
|
||||
@ -355,7 +355,7 @@ async fn create_faulty_mock_raw(sender: mpsc::Sender<()>) -> (MockServer, Value)
|
||||
ResponseTemplate::new(500).set_body_string("Service Unavailable")
|
||||
}
|
||||
})
|
||||
.mount(&mock_server)
|
||||
.mount(mock_server)
|
||||
.await;
|
||||
|
||||
let url = mock_server.uri();
|
||||
|
@ -101,6 +101,10 @@ impl<T> Setting<T> {
|
||||
matches!(self, Self::NotSet)
|
||||
}
|
||||
|
||||
pub const fn is_reset(&self) -> bool {
|
||||
matches!(self, Self::Reset)
|
||||
}
|
||||
|
||||
/// If `Self` is `Reset`, then map self to `Set` with the provided `val`.
|
||||
pub fn or_reset(self, val: T) -> Self {
|
||||
match self {
|
||||
@ -1213,6 +1217,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
// new config
|
||||
EitherOrBoth::Right((name, mut setting)) => {
|
||||
tracing::debug!(embedder = name, "new embedder");
|
||||
// if we are asked to reset an embedder that doesn't exist, just ignore it
|
||||
if setting.is_reset() {
|
||||
continue;
|
||||
}
|
||||
// apply the default source in case the source was not set so that it gets validated
|
||||
crate::vector::settings::EmbeddingSettings::apply_default_source(&mut setting);
|
||||
crate::vector::settings::EmbeddingSettings::apply_default_openai_model(
|
||||
|
@ -59,12 +59,24 @@ pub struct EmbedderOptions {
|
||||
|
||||
impl Embedder {
|
||||
pub fn new(
|
||||
EmbedderOptions { search, index }: EmbedderOptions,
|
||||
EmbedderOptions { search: search_options, index: index_options }: EmbedderOptions,
|
||||
cache_cap: usize,
|
||||
) -> Result<Self, NewEmbedderError> {
|
||||
let search = SubEmbedder::new(search, cache_cap)?;
|
||||
// don't check similarity if one child is a rest embedder with fragments
|
||||
// FIXME: skipping the check isn't ideal but we are unsure how to handle fragments in this context
|
||||
let mut skip_similarity_check = false;
|
||||
for options in [&search_options, &index_options] {
|
||||
if let SubEmbedderOptions::Rest(options) = &options {
|
||||
if !options.search_fragments.is_empty() || !options.indexing_fragments.is_empty() {
|
||||
skip_similarity_check = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let search = SubEmbedder::new(search_options, cache_cap)?;
|
||||
// cache is only used at search
|
||||
let index = SubEmbedder::new(index, 0)?;
|
||||
let index = SubEmbedder::new(index_options, 0)?;
|
||||
|
||||
// check dimensions
|
||||
if search.dimensions() != index.dimensions() {
|
||||
@ -73,7 +85,12 @@ impl Embedder {
|
||||
index.dimensions(),
|
||||
));
|
||||
}
|
||||
|
||||
// check similarity
|
||||
if skip_similarity_check {
|
||||
return Ok(Self { search, index });
|
||||
}
|
||||
|
||||
let search_embeddings = search
|
||||
.embed(
|
||||
vec![
|
||||
|
Reference in New Issue
Block a user