mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-27 08:41:00 +00:00
Format
This commit is contained in:
@ -1152,19 +1152,18 @@ async fn vector_filter_or_combination() {
|
|||||||
"#);
|
"#);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn vector_filter_regenerate() {
|
async fn vector_filter_regenerate() {
|
||||||
let index = shared_index_for_fragments().await;
|
let index = shared_index_for_fragments().await;
|
||||||
|
|
||||||
for selector in ["_vectors.rest.regenerate", "_vectors.rest.fragments.basic.regenerate"] {
|
for selector in ["_vectors.rest.regenerate", "_vectors.rest.fragments.basic.regenerate"] {
|
||||||
let (value, _code) = index
|
let (value, _code) = index
|
||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"filter": format!("{selector} EXISTS"),
|
"filter": format!("{selector} EXISTS"),
|
||||||
"attributesToRetrieve": ["name"]
|
"attributesToRetrieve": ["name"]
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(value, @r#"
|
snapshot!(value, @r#"
|
||||||
{
|
{
|
||||||
"hits": [
|
"hits": [
|
||||||
{
|
{
|
||||||
@ -1186,4 +1185,3 @@ async fn vector_filter_regenerate() {
|
|||||||
"#);
|
"#);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -14,108 +14,49 @@ enum VectorFilterInner<'a> {
|
|||||||
FullEmbedder { embedder_token: Token<'a> },
|
FullEmbedder { embedder_token: Token<'a> },
|
||||||
}
|
}
|
||||||
|
|
||||||
impl VectorFilterInner<'_> {
|
|
||||||
fn evaluate_inner(
|
|
||||||
&self,
|
|
||||||
rtxn: &heed::RoTxn<'_>,
|
|
||||||
index: &Index,
|
|
||||||
embedding_configs: &[IndexEmbeddingConfig],
|
|
||||||
regenerate: bool,
|
|
||||||
) -> crate::Result<RoaringBitmap> {
|
|
||||||
let embedder = match self {
|
|
||||||
VectorFilterInner::Fragment { embedder_token, .. } => embedder_token,
|
|
||||||
VectorFilterInner::DocumentTemplate { embedder_token } => embedder_token,
|
|
||||||
VectorFilterInner::UserProvided { embedder_token } => embedder_token,
|
|
||||||
VectorFilterInner::FullEmbedder { embedder_token } => embedder_token,
|
|
||||||
};
|
|
||||||
let embedder_name = embedder.value();
|
|
||||||
let available_embedders =
|
|
||||||
|| embedding_configs.iter().map(|c| c.name.clone()).collect::<Vec<_>>();
|
|
||||||
|
|
||||||
let embedding_config = embedding_configs
|
|
||||||
.iter()
|
|
||||||
.find(|config| config.name == embedder_name)
|
|
||||||
.ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?;
|
|
||||||
|
|
||||||
let embedder_info = index
|
|
||||||
.embedding_configs()
|
|
||||||
.embedder_info(rtxn, embedder_name)?
|
|
||||||
.ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?;
|
|
||||||
|
|
||||||
let arroy_wrapper = ArroyWrapper::new(
|
|
||||||
index.vector_arroy,
|
|
||||||
embedder_info.embedder_id,
|
|
||||||
embedding_config.config.quantized(),
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut docids = match self {
|
|
||||||
VectorFilterInner::Fragment { embedder_token: embedder, fragment_token: fragment } => {
|
|
||||||
let fragment_name = fragment.value();
|
|
||||||
let fragment_config = embedding_config
|
|
||||||
.fragments
|
|
||||||
.as_slice()
|
|
||||||
.iter()
|
|
||||||
.find(|fragment| fragment.name == fragment_name)
|
|
||||||
.ok_or_else(|| FragmentDoesNotExist {
|
|
||||||
embedder,
|
|
||||||
fragment,
|
|
||||||
available: embedding_config
|
|
||||||
.fragments
|
|
||||||
.as_slice()
|
|
||||||
.iter()
|
|
||||||
.map(|f| f.name.clone())
|
|
||||||
.collect(),
|
|
||||||
})?;
|
|
||||||
|
|
||||||
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())?
|
|
||||||
}
|
|
||||||
VectorFilterInner::DocumentTemplate { .. } => {
|
|
||||||
if !embedding_config.fragments.as_slice().is_empty() {
|
|
||||||
return Ok(RoaringBitmap::new());
|
|
||||||
}
|
|
||||||
|
|
||||||
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
|
|
||||||
let mut stats = ArroyStats::default();
|
|
||||||
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
|
||||||
stats.documents - user_provided_docsids.clone()
|
|
||||||
}
|
|
||||||
VectorFilterInner::UserProvided { .. } => {
|
|
||||||
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
|
|
||||||
user_provided_docsids.clone()
|
|
||||||
}
|
|
||||||
VectorFilterInner::FullEmbedder { .. } => {
|
|
||||||
let mut stats = ArroyStats::default();
|
|
||||||
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
|
||||||
stats.documents
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
if regenerate {
|
|
||||||
let skip_regenerate = embedder_info.embedding_status.skip_regenerate_docids();
|
|
||||||
docids -= skip_regenerate;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(docids)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub(super) struct VectorFilter<'a> {
|
pub(super) struct VectorFilter<'a> {
|
||||||
inner: Option<VectorFilterInner<'a>>,
|
inner: Option<VectorFilterInner<'a>>,
|
||||||
regenerate: bool,
|
regenerate: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
pub enum VectorFilterError<'a> {
|
pub enum VectorFilterError<'a> {
|
||||||
|
#[error("Vector filter cannot be empty.")]
|
||||||
EmptyFilter,
|
EmptyFilter,
|
||||||
|
|
||||||
|
#[error("Vector filter must start with `_vectors` but found `{}`.", _0.value())]
|
||||||
InvalidPrefix(Token<'a>),
|
InvalidPrefix(Token<'a>),
|
||||||
|
|
||||||
|
#[error("Vector filter is inconsistent: either specify a fragment name or remove the `fragments` part.")]
|
||||||
MissingFragmentName(Token<'a>),
|
MissingFragmentName(Token<'a>),
|
||||||
|
|
||||||
|
#[error("Vector filter cannot have both `{}` and `{}`.", _0.0.value(), _0.1.value())]
|
||||||
ExclusiveOptions(Box<(Token<'a>, Token<'a>)>),
|
ExclusiveOptions(Box<(Token<'a>, Token<'a>)>),
|
||||||
|
|
||||||
|
#[error("Vector filter has leftover token: `{}`.", _0.value())]
|
||||||
LeftoverToken(Token<'a>),
|
LeftoverToken(Token<'a>),
|
||||||
EmbedderDoesNotExist {
|
|
||||||
embedder: &'a Token<'a>,
|
#[error("The embedder `{}` does not exist. {}", embedder.value(), {
|
||||||
available: Vec<String>,
|
if available.is_empty() {
|
||||||
},
|
String::from("This index does not have any configured embedders.")
|
||||||
|
} else {
|
||||||
|
let mut available = available.clone();
|
||||||
|
available.sort_unstable();
|
||||||
|
format!("Available embedders are: {}.", available.iter().map(|e| format!("`{e}`")).collect::<Vec<_>>().join(", "))
|
||||||
|
}
|
||||||
|
})]
|
||||||
|
EmbedderDoesNotExist { embedder: &'a Token<'a>, available: Vec<String> },
|
||||||
|
|
||||||
|
#[error("The fragment `{}` does not exist on embedder `{}`. {}", fragment.value(), embedder.value(), {
|
||||||
|
if available.is_empty() {
|
||||||
|
String::from("This embedder does not have any configured fragments.")
|
||||||
|
} else {
|
||||||
|
let mut available = available.clone();
|
||||||
|
available.sort_unstable();
|
||||||
|
format!("Available fragments on this embedder are: {}.", available.iter().map(|f| format!("`{f}`")).collect::<Vec<_>>().join(", "))
|
||||||
|
}
|
||||||
|
})]
|
||||||
FragmentDoesNotExist {
|
FragmentDoesNotExist {
|
||||||
embedder: &'a Token<'a>,
|
embedder: &'a Token<'a>,
|
||||||
fragment: &'a Token<'a>,
|
fragment: &'a Token<'a>,
|
||||||
@ -125,78 +66,6 @@ pub enum VectorFilterError<'a> {
|
|||||||
|
|
||||||
use VectorFilterError::*;
|
use VectorFilterError::*;
|
||||||
|
|
||||||
impl std::error::Error for VectorFilterError<'_> {}
|
|
||||||
|
|
||||||
impl std::fmt::Display for VectorFilterError<'_> {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
match self {
|
|
||||||
EmptyFilter => {
|
|
||||||
write!(f, "Vector filter cannot be empty.")
|
|
||||||
}
|
|
||||||
InvalidPrefix(prefix) => {
|
|
||||||
write!(
|
|
||||||
f,
|
|
||||||
"Vector filter must start with `_vectors` but found `{}`.",
|
|
||||||
prefix.value()
|
|
||||||
)
|
|
||||||
}
|
|
||||||
MissingFragmentName(_token) => {
|
|
||||||
write!(f, "Vector filter is inconsistent: either specify a fragment name or remove the `fragments` part.")
|
|
||||||
}
|
|
||||||
ExclusiveOptions(tokens) => {
|
|
||||||
write!(
|
|
||||||
f,
|
|
||||||
"Vector filter cannot have both `{}` and `{}`.",
|
|
||||||
tokens.0.value(),
|
|
||||||
tokens.1.value()
|
|
||||||
)
|
|
||||||
}
|
|
||||||
LeftoverToken(token) => {
|
|
||||||
write!(f, "Vector filter has leftover token: `{}`.", token.value())
|
|
||||||
}
|
|
||||||
EmbedderDoesNotExist { embedder, available } => {
|
|
||||||
write!(f, "The embedder `{}` does not exist.", embedder.value())?;
|
|
||||||
if available.is_empty() {
|
|
||||||
write!(f, " This index does not have configured embedders.")
|
|
||||||
} else {
|
|
||||||
write!(f, " Available embedders are: ")?;
|
|
||||||
let mut available = available.clone();
|
|
||||||
available.sort_unstable();
|
|
||||||
for (idx, embedder) in available.iter().enumerate() {
|
|
||||||
write!(f, "`{embedder}`")?;
|
|
||||||
if idx != available.len() - 1 {
|
|
||||||
write!(f, ", ")?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
write!(f, ".")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
FragmentDoesNotExist { embedder, fragment, available } => {
|
|
||||||
write!(
|
|
||||||
f,
|
|
||||||
"The fragment `{}` does not exist on embedder `{}`.",
|
|
||||||
fragment.value(),
|
|
||||||
embedder.value(),
|
|
||||||
)?;
|
|
||||||
if available.is_empty() {
|
|
||||||
write!(f, " This embedder does not have configured fragments.")
|
|
||||||
} else {
|
|
||||||
write!(f, " Available fragments on this embedder are: ")?;
|
|
||||||
let mut available = available.clone();
|
|
||||||
available.sort_unstable();
|
|
||||||
for (idx, fragment) in available.iter().enumerate() {
|
|
||||||
write!(f, "`{fragment}`")?;
|
|
||||||
if idx != available.len() - 1 {
|
|
||||||
write!(f, ", ")?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
write!(f, ".")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> From<VectorFilterError<'a>> for Error {
|
impl<'a> From<VectorFilterError<'a>> for Error {
|
||||||
fn from(err: VectorFilterError<'a>) -> Self {
|
fn from(err: VectorFilterError<'a>) -> Self {
|
||||||
match &err {
|
match &err {
|
||||||
@ -320,3 +189,88 @@ impl<'a> VectorFilter<'a> {
|
|||||||
Ok(docids)
|
Ok(docids)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl VectorFilterInner<'_> {
|
||||||
|
fn evaluate_inner(
|
||||||
|
&self,
|
||||||
|
rtxn: &heed::RoTxn<'_>,
|
||||||
|
index: &Index,
|
||||||
|
embedding_configs: &[IndexEmbeddingConfig],
|
||||||
|
regenerate: bool,
|
||||||
|
) -> crate::Result<RoaringBitmap> {
|
||||||
|
let embedder = match self {
|
||||||
|
VectorFilterInner::Fragment { embedder_token, .. } => embedder_token,
|
||||||
|
VectorFilterInner::DocumentTemplate { embedder_token } => embedder_token,
|
||||||
|
VectorFilterInner::UserProvided { embedder_token } => embedder_token,
|
||||||
|
VectorFilterInner::FullEmbedder { embedder_token } => embedder_token,
|
||||||
|
};
|
||||||
|
let embedder_name = embedder.value();
|
||||||
|
let available_embedders =
|
||||||
|
|| embedding_configs.iter().map(|c| c.name.clone()).collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let embedding_config = embedding_configs
|
||||||
|
.iter()
|
||||||
|
.find(|config| config.name == embedder_name)
|
||||||
|
.ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?;
|
||||||
|
|
||||||
|
let embedder_info = index
|
||||||
|
.embedding_configs()
|
||||||
|
.embedder_info(rtxn, embedder_name)?
|
||||||
|
.ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?;
|
||||||
|
|
||||||
|
let arroy_wrapper = ArroyWrapper::new(
|
||||||
|
index.vector_arroy,
|
||||||
|
embedder_info.embedder_id,
|
||||||
|
embedding_config.config.quantized(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut docids = match self {
|
||||||
|
VectorFilterInner::Fragment { embedder_token: embedder, fragment_token: fragment } => {
|
||||||
|
let fragment_name = fragment.value();
|
||||||
|
let fragment_config = embedding_config
|
||||||
|
.fragments
|
||||||
|
.as_slice()
|
||||||
|
.iter()
|
||||||
|
.find(|fragment| fragment.name == fragment_name)
|
||||||
|
.ok_or_else(|| FragmentDoesNotExist {
|
||||||
|
embedder,
|
||||||
|
fragment,
|
||||||
|
available: embedding_config
|
||||||
|
.fragments
|
||||||
|
.as_slice()
|
||||||
|
.iter()
|
||||||
|
.map(|f| f.name.clone())
|
||||||
|
.collect(),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())?
|
||||||
|
}
|
||||||
|
VectorFilterInner::DocumentTemplate { .. } => {
|
||||||
|
if !embedding_config.fragments.as_slice().is_empty() {
|
||||||
|
return Ok(RoaringBitmap::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
|
||||||
|
let mut stats = ArroyStats::default();
|
||||||
|
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||||
|
stats.documents - user_provided_docsids.clone()
|
||||||
|
}
|
||||||
|
VectorFilterInner::UserProvided { .. } => {
|
||||||
|
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
|
||||||
|
user_provided_docsids.clone()
|
||||||
|
}
|
||||||
|
VectorFilterInner::FullEmbedder { .. } => {
|
||||||
|
let mut stats = ArroyStats::default();
|
||||||
|
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||||
|
stats.documents
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if regenerate {
|
||||||
|
let skip_regenerate = embedder_info.embedding_status.skip_regenerate_docids();
|
||||||
|
docids -= skip_regenerate;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(docids)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user