mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-04 03:36:30 +00:00
Refactor to support less combinations
This commit is contained in:
@ -981,7 +981,7 @@ async fn vector_filter_specific_fragment_user_provided() {
|
|||||||
.await;
|
.await;
|
||||||
snapshot!(value, @r#"
|
snapshot!(value, @r#"
|
||||||
{
|
{
|
||||||
"message": "Index `[uuid]`: Vector filter cannot have both `other` and `userProvided`.\n31:43 _vectors.rest.fragments.other.userProvided EXISTS",
|
"message": "Index `[uuid]`: Vector filter cannot have both `fragments` and `userProvided`.\n15:24 _vectors.rest.fragments.other.userProvided EXISTS",
|
||||||
"code": "invalid_search_filter",
|
"code": "invalid_search_filter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||||
@ -1156,32 +1156,45 @@ async fn vector_filter_or_combination() {
|
|||||||
async fn vector_filter_regenerate() {
|
async fn vector_filter_regenerate() {
|
||||||
let index = shared_index_for_fragments().await;
|
let index = shared_index_for_fragments().await;
|
||||||
|
|
||||||
for selector in ["_vectors.rest.regenerate", "_vectors.rest.fragments.basic.regenerate"] {
|
let (value, _code) = index
|
||||||
let (value, _code) = index
|
.search_post(json!({
|
||||||
.search_post(json!({
|
"filter": format!("_vectors.rest.regenerate EXISTS"),
|
||||||
"filter": format!("{selector} EXISTS"),
|
"attributesToRetrieve": ["name"]
|
||||||
"attributesToRetrieve": ["name"]
|
}))
|
||||||
}))
|
.await;
|
||||||
.await;
|
snapshot!(value, @r#"
|
||||||
snapshot!(value, @r#"
|
{
|
||||||
{
|
"hits": [
|
||||||
"hits": [
|
{
|
||||||
{
|
"name": "kefir"
|
||||||
"name": "kefir"
|
},
|
||||||
},
|
{
|
||||||
{
|
"name": "intel"
|
||||||
"name": "intel"
|
},
|
||||||
},
|
{
|
||||||
{
|
"name": "dustin"
|
||||||
"name": "dustin"
|
}
|
||||||
}
|
],
|
||||||
],
|
"query": "",
|
||||||
"query": "",
|
"processingTimeMs": "[duration]",
|
||||||
"processingTimeMs": "[duration]",
|
"limit": 20,
|
||||||
"limit": 20,
|
"offset": 0,
|
||||||
"offset": 0,
|
"estimatedTotalHits": 3
|
||||||
"estimatedTotalHits": 3
|
|
||||||
}
|
|
||||||
"#);
|
|
||||||
}
|
}
|
||||||
|
"#);
|
||||||
|
|
||||||
|
let (value, _code) = index
|
||||||
|
.search_post(json!({
|
||||||
|
"filter": format!("_vectors.rest.fragments.basic.regenerate EXISTS"),
|
||||||
|
"attributesToRetrieve": ["name"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(value, @r#"
|
||||||
|
{
|
||||||
|
"message": "Index `[uuid]`: Vector filter cannot have both `fragments` and `regenerate`.\n15:24 _vectors.rest.fragments.basic.regenerate EXISTS",
|
||||||
|
"code": "invalid_search_filter",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||||
|
}
|
||||||
|
"#);
|
||||||
}
|
}
|
||||||
|
@ -8,16 +8,17 @@ use crate::Index;
|
|||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
enum VectorFilterInner<'a> {
|
enum VectorFilterInner<'a> {
|
||||||
Fragment { embedder_token: Token<'a>, fragment_token: Token<'a> },
|
Fragment(Token<'a>),
|
||||||
DocumentTemplate { embedder_token: Token<'a> },
|
DocumentTemplate,
|
||||||
UserProvided { embedder_token: Token<'a> },
|
UserProvided,
|
||||||
FullEmbedder { embedder_token: Token<'a> },
|
Regenerate,
|
||||||
|
None,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub(super) struct VectorFilter<'a> {
|
pub(super) struct VectorFilter<'a> {
|
||||||
inner: Option<VectorFilterInner<'a>>,
|
embedder: Option<Token<'a>>,
|
||||||
regenerate: bool,
|
inner: VectorFilterInner<'a>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, thiserror::Error)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
@ -31,8 +32,10 @@ pub enum VectorFilterError<'a> {
|
|||||||
#[error("Vector filter is inconsistent: either specify a fragment name or remove the `fragments` part.")]
|
#[error("Vector filter is inconsistent: either specify a fragment name or remove the `fragments` part.")]
|
||||||
MissingFragmentName(Token<'a>),
|
MissingFragmentName(Token<'a>),
|
||||||
|
|
||||||
#[error("Vector filter cannot have both `{}` and `{}`.", _0.0.value(), _0.1.value())]
|
#[error("Vector filter cannot have both {}.", {
|
||||||
ExclusiveOptions(Box<(Token<'a>, Token<'a>)>),
|
_0.iter().map(|t| format!("`{}`", t.value())).collect::<Vec<_>>().join(" and ")
|
||||||
|
})]
|
||||||
|
ExclusiveOptions(Vec<Token<'a>>),
|
||||||
|
|
||||||
#[error("Vector filter has leftover token: `{}`.", _0.value())]
|
#[error("Vector filter has leftover token: `{}`.", _0.value())]
|
||||||
LeftoverToken(Token<'a>),
|
LeftoverToken(Token<'a>),
|
||||||
@ -73,7 +76,12 @@ impl<'a> From<VectorFilterError<'a>> for Error {
|
|||||||
InvalidPrefix(token) | MissingFragmentName(token) | LeftoverToken(token) => {
|
InvalidPrefix(token) | MissingFragmentName(token) | LeftoverToken(token) => {
|
||||||
token.clone().as_external_error(err).into()
|
token.clone().as_external_error(err).into()
|
||||||
}
|
}
|
||||||
ExclusiveOptions(tokens) => tokens.1.clone().as_external_error(err).into(),
|
ExclusiveOptions(tokens) => tokens
|
||||||
|
.first()
|
||||||
|
.cloned()
|
||||||
|
.unwrap_or_else(|| Token::from("")) // Should never happen: tokens is never created empty
|
||||||
|
.as_external_error(err)
|
||||||
|
.into(),
|
||||||
EmbedderDoesNotExist { embedder: token, .. }
|
EmbedderDoesNotExist { embedder: token, .. }
|
||||||
| FragmentDoesNotExist { fragment: token, .. } => token.as_external_error(err).into(),
|
| FragmentDoesNotExist { fragment: token, .. } => token.as_external_error(err).into(),
|
||||||
}
|
}
|
||||||
@ -92,11 +100,8 @@ impl<'a> VectorFilter<'a> {
|
|||||||
/// - `_vectors.{embedder_name}`
|
/// - `_vectors.{embedder_name}`
|
||||||
/// - `_vectors.{embedder_name}.regenerate`
|
/// - `_vectors.{embedder_name}.regenerate`
|
||||||
/// - `_vectors.{embedder_name}.userProvided`
|
/// - `_vectors.{embedder_name}.userProvided`
|
||||||
/// - `_vectors.{embedder_name}.userProvided.regenerate`
|
|
||||||
/// - `_vectors.{embedder_name}.documentTemplate`
|
/// - `_vectors.{embedder_name}.documentTemplate`
|
||||||
/// - `_vectors.{embedder_name}.documentTemplate.regenerate`
|
|
||||||
/// - `_vectors.{embedder_name}.fragments.{fragment_name}`
|
/// - `_vectors.{embedder_name}.fragments.{fragment_name}`
|
||||||
/// - `_vectors.{embedder_name}.fragments.{fragment_name}.regenerate`
|
|
||||||
pub(super) fn parse(s: &'a Token<'a>) -> Result<Self, VectorFilterError<'a>> {
|
pub(super) fn parse(s: &'a Token<'a>) -> Result<Self, VectorFilterError<'a>> {
|
||||||
let mut split = s.split(".").peekable();
|
let mut split = s.split(".").peekable();
|
||||||
|
|
||||||
@ -108,54 +113,53 @@ impl<'a> VectorFilter<'a> {
|
|||||||
|
|
||||||
let embedder_name = split.next();
|
let embedder_name = split.next();
|
||||||
|
|
||||||
let mut fragment_name = None;
|
let mut fragment_tokens = None;
|
||||||
if split.peek().map(|t| t.value()) == Some("fragments") {
|
if split.peek().map(|t| t.value()) == Some("fragments") {
|
||||||
let token = split.next().expect("it was peeked before");
|
let token = split.next().expect("it was peeked before");
|
||||||
|
let name = split.next().ok_or_else(|| MissingFragmentName(token.clone()))?;
|
||||||
|
|
||||||
fragment_name = Some(split.next().ok_or(MissingFragmentName(token))?);
|
fragment_tokens = Some((token, name));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut remaining_tokens = split.collect::<Vec<_>>();
|
||||||
|
|
||||||
let mut user_provided_token = None;
|
let mut user_provided_token = None;
|
||||||
if split.peek().map(|t| t.value()) == Some("userProvided") {
|
if let Some(position) = remaining_tokens.iter().position(|t| t.value() == "userProvided") {
|
||||||
user_provided_token = split.next();
|
user_provided_token = Some(remaining_tokens.remove(position));
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut document_template_token = None;
|
let mut document_template_token = None;
|
||||||
if split.peek().map(|t| t.value()) == Some("documentTemplate") {
|
if let Some(position) =
|
||||||
document_template_token = split.next();
|
remaining_tokens.iter().position(|t| t.value() == "documentTemplate")
|
||||||
|
{
|
||||||
|
document_template_token = Some(remaining_tokens.remove(position));
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut regenerate_token = None;
|
let mut regenerate_token = None;
|
||||||
if split.peek().map(|t| t.value()) == Some("regenerate") {
|
if let Some(position) = remaining_tokens.iter().position(|t| t.value() == "regenerate") {
|
||||||
regenerate_token = split.next();
|
regenerate_token = Some(remaining_tokens.remove(position));
|
||||||
}
|
}
|
||||||
|
|
||||||
let inner = match (fragment_name, user_provided_token, document_template_token) {
|
if !remaining_tokens.is_empty() {
|
||||||
(Some(fragment_name), None, None) => Some(VectorFilterInner::Fragment {
|
return Err(LeftoverToken(remaining_tokens.remove(0)));
|
||||||
embedder_token: embedder_name
|
|
||||||
.expect("embedder name comes before fragment so it's always Some"),
|
|
||||||
fragment_token: fragment_name,
|
|
||||||
}),
|
|
||||||
(None, Some(_), None) => Some(VectorFilterInner::UserProvided {
|
|
||||||
embedder_token: embedder_name
|
|
||||||
.expect("embedder name comes before userProvided so it's always Some"),
|
|
||||||
}),
|
|
||||||
(None, None, Some(_)) => Some(VectorFilterInner::DocumentTemplate {
|
|
||||||
embedder_token: embedder_name
|
|
||||||
.expect("embedder name comes before documentTemplate so it's always Some"),
|
|
||||||
}),
|
|
||||||
(Some(a), Some(b), _) | (_, Some(a), Some(b)) | (Some(a), None, Some(b)) => {
|
|
||||||
return Err(ExclusiveOptions(Box::new((a, b))));
|
|
||||||
}
|
|
||||||
(None, None, None) => embedder_name
|
|
||||||
.map(|embedder_token| VectorFilterInner::FullEmbedder { embedder_token }),
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Some(next) = split.next() {
|
|
||||||
return Err(LeftoverToken(next))?;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Self { inner, regenerate: regenerate_token.is_some() })
|
let inner =
|
||||||
|
match (fragment_tokens, user_provided_token, document_template_token, regenerate_token)
|
||||||
|
{
|
||||||
|
(Some((_token, name)), None, None, None) => VectorFilterInner::Fragment(name),
|
||||||
|
(None, Some(_), None, None) => VectorFilterInner::UserProvided,
|
||||||
|
(None, None, Some(_), None) => VectorFilterInner::DocumentTemplate,
|
||||||
|
(None, None, None, Some(_)) => VectorFilterInner::Regenerate,
|
||||||
|
(None, None, None, None) => VectorFilterInner::None,
|
||||||
|
(a, b, c, d) => {
|
||||||
|
let a = a.map(|(token, _)| token);
|
||||||
|
let present = [a, b, c, d].into_iter().flatten().collect();
|
||||||
|
return Err(ExclusiveOptions(present));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(Self { inner, embedder: embedder_name })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn evaluate(
|
pub(super) fn evaluate(
|
||||||
@ -167,19 +171,16 @@ impl<'a> VectorFilter<'a> {
|
|||||||
let index_embedding_configs = index.embedding_configs();
|
let index_embedding_configs = index.embedding_configs();
|
||||||
let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?;
|
let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?;
|
||||||
|
|
||||||
let inners = match self.inner {
|
let embedders = match self.embedder {
|
||||||
Some(inner) => vec![inner],
|
Some(embedder) => vec![embedder],
|
||||||
None => embedding_configs
|
None => {
|
||||||
.iter()
|
embedding_configs.iter().map(|config| Token::from(config.name.as_str())).collect()
|
||||||
.map(|config| VectorFilterInner::FullEmbedder {
|
}
|
||||||
embedder_token: Token::from(config.name.as_str()),
|
|
||||||
})
|
|
||||||
.collect(),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut docids = inners
|
let mut docids = embedders
|
||||||
.iter()
|
.iter()
|
||||||
.map(|i| i.evaluate_inner(rtxn, index, &embedding_configs, self.regenerate))
|
.map(|e| self.inner.evaluate(rtxn, index, e, &embedding_configs))
|
||||||
.union()?;
|
.union()?;
|
||||||
|
|
||||||
if let Some(universe) = universe {
|
if let Some(universe) = universe {
|
||||||
@ -191,19 +192,13 @@ impl<'a> VectorFilter<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl VectorFilterInner<'_> {
|
impl VectorFilterInner<'_> {
|
||||||
fn evaluate_inner(
|
fn evaluate(
|
||||||
&self,
|
&self,
|
||||||
rtxn: &heed::RoTxn<'_>,
|
rtxn: &heed::RoTxn<'_>,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
|
embedder: &Token<'_>,
|
||||||
embedding_configs: &[IndexEmbeddingConfig],
|
embedding_configs: &[IndexEmbeddingConfig],
|
||||||
regenerate: bool,
|
|
||||||
) -> crate::Result<RoaringBitmap> {
|
) -> crate::Result<RoaringBitmap> {
|
||||||
let embedder = match self {
|
|
||||||
VectorFilterInner::Fragment { embedder_token, .. } => embedder_token,
|
|
||||||
VectorFilterInner::DocumentTemplate { embedder_token } => embedder_token,
|
|
||||||
VectorFilterInner::UserProvided { embedder_token } => embedder_token,
|
|
||||||
VectorFilterInner::FullEmbedder { embedder_token } => embedder_token,
|
|
||||||
};
|
|
||||||
let embedder_name = embedder.value();
|
let embedder_name = embedder.value();
|
||||||
let available_embedders =
|
let available_embedders =
|
||||||
|| embedding_configs.iter().map(|c| c.name.clone()).collect::<Vec<_>>();
|
|| embedding_configs.iter().map(|c| c.name.clone()).collect::<Vec<_>>();
|
||||||
@ -224,8 +219,8 @@ impl VectorFilterInner<'_> {
|
|||||||
embedding_config.config.quantized(),
|
embedding_config.config.quantized(),
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut docids = match self {
|
let docids = match self {
|
||||||
VectorFilterInner::Fragment { embedder_token: embedder, fragment_token: fragment } => {
|
VectorFilterInner::Fragment(fragment) => {
|
||||||
let fragment_name = fragment.value();
|
let fragment_name = fragment.value();
|
||||||
let fragment_config = embedding_config
|
let fragment_config = embedding_config
|
||||||
.fragments
|
.fragments
|
||||||
@ -245,7 +240,7 @@ impl VectorFilterInner<'_> {
|
|||||||
|
|
||||||
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())?
|
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())?
|
||||||
}
|
}
|
||||||
VectorFilterInner::DocumentTemplate { .. } => {
|
VectorFilterInner::DocumentTemplate => {
|
||||||
if !embedding_config.fragments.as_slice().is_empty() {
|
if !embedding_config.fragments.as_slice().is_empty() {
|
||||||
return Ok(RoaringBitmap::new());
|
return Ok(RoaringBitmap::new());
|
||||||
}
|
}
|
||||||
@ -255,22 +250,23 @@ impl VectorFilterInner<'_> {
|
|||||||
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||||
stats.documents - user_provided_docsids.clone()
|
stats.documents - user_provided_docsids.clone()
|
||||||
}
|
}
|
||||||
VectorFilterInner::UserProvided { .. } => {
|
VectorFilterInner::UserProvided => {
|
||||||
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
|
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
|
||||||
user_provided_docsids.clone()
|
user_provided_docsids.clone()
|
||||||
}
|
}
|
||||||
VectorFilterInner::FullEmbedder { .. } => {
|
VectorFilterInner::Regenerate => {
|
||||||
|
let mut stats = ArroyStats::default();
|
||||||
|
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||||
|
let skip_regenerate = embedder_info.embedding_status.skip_regenerate_docids();
|
||||||
|
stats.documents - skip_regenerate
|
||||||
|
}
|
||||||
|
VectorFilterInner::None => {
|
||||||
let mut stats = ArroyStats::default();
|
let mut stats = ArroyStats::default();
|
||||||
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||||
stats.documents
|
stats.documents
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if regenerate {
|
|
||||||
let skip_regenerate = embedder_info.embedding_status.skip_regenerate_docids();
|
|
||||||
docids -= skip_regenerate;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(docids)
|
Ok(docids)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user