mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-27 00:31:02 +00:00
Format
This commit is contained in:
@ -1152,7 +1152,6 @@ async fn vector_filter_or_combination() {
|
||||
"#);
|
||||
}
|
||||
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn vector_filter_regenerate() {
|
||||
let index = shared_index_for_fragments().await;
|
||||
@ -1186,4 +1185,3 @@ async fn vector_filter_regenerate() {
|
||||
"#);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -14,108 +14,49 @@ enum VectorFilterInner<'a> {
|
||||
FullEmbedder { embedder_token: Token<'a> },
|
||||
}
|
||||
|
||||
impl VectorFilterInner<'_> {
|
||||
fn evaluate_inner(
|
||||
&self,
|
||||
rtxn: &heed::RoTxn<'_>,
|
||||
index: &Index,
|
||||
embedding_configs: &[IndexEmbeddingConfig],
|
||||
regenerate: bool,
|
||||
) -> crate::Result<RoaringBitmap> {
|
||||
let embedder = match self {
|
||||
VectorFilterInner::Fragment { embedder_token, .. } => embedder_token,
|
||||
VectorFilterInner::DocumentTemplate { embedder_token } => embedder_token,
|
||||
VectorFilterInner::UserProvided { embedder_token } => embedder_token,
|
||||
VectorFilterInner::FullEmbedder { embedder_token } => embedder_token,
|
||||
};
|
||||
let embedder_name = embedder.value();
|
||||
let available_embedders =
|
||||
|| embedding_configs.iter().map(|c| c.name.clone()).collect::<Vec<_>>();
|
||||
|
||||
let embedding_config = embedding_configs
|
||||
.iter()
|
||||
.find(|config| config.name == embedder_name)
|
||||
.ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?;
|
||||
|
||||
let embedder_info = index
|
||||
.embedding_configs()
|
||||
.embedder_info(rtxn, embedder_name)?
|
||||
.ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?;
|
||||
|
||||
let arroy_wrapper = ArroyWrapper::new(
|
||||
index.vector_arroy,
|
||||
embedder_info.embedder_id,
|
||||
embedding_config.config.quantized(),
|
||||
);
|
||||
|
||||
let mut docids = match self {
|
||||
VectorFilterInner::Fragment { embedder_token: embedder, fragment_token: fragment } => {
|
||||
let fragment_name = fragment.value();
|
||||
let fragment_config = embedding_config
|
||||
.fragments
|
||||
.as_slice()
|
||||
.iter()
|
||||
.find(|fragment| fragment.name == fragment_name)
|
||||
.ok_or_else(|| FragmentDoesNotExist {
|
||||
embedder,
|
||||
fragment,
|
||||
available: embedding_config
|
||||
.fragments
|
||||
.as_slice()
|
||||
.iter()
|
||||
.map(|f| f.name.clone())
|
||||
.collect(),
|
||||
})?;
|
||||
|
||||
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())?
|
||||
}
|
||||
VectorFilterInner::DocumentTemplate { .. } => {
|
||||
if !embedding_config.fragments.as_slice().is_empty() {
|
||||
return Ok(RoaringBitmap::new());
|
||||
}
|
||||
|
||||
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
|
||||
let mut stats = ArroyStats::default();
|
||||
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||
stats.documents - user_provided_docsids.clone()
|
||||
}
|
||||
VectorFilterInner::UserProvided { .. } => {
|
||||
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
|
||||
user_provided_docsids.clone()
|
||||
}
|
||||
VectorFilterInner::FullEmbedder { .. } => {
|
||||
let mut stats = ArroyStats::default();
|
||||
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||
stats.documents
|
||||
}
|
||||
};
|
||||
|
||||
if regenerate {
|
||||
let skip_regenerate = embedder_info.embedding_status.skip_regenerate_docids();
|
||||
docids -= skip_regenerate;
|
||||
}
|
||||
|
||||
Ok(docids)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(super) struct VectorFilter<'a> {
|
||||
inner: Option<VectorFilterInner<'a>>,
|
||||
regenerate: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum VectorFilterError<'a> {
|
||||
#[error("Vector filter cannot be empty.")]
|
||||
EmptyFilter,
|
||||
|
||||
#[error("Vector filter must start with `_vectors` but found `{}`.", _0.value())]
|
||||
InvalidPrefix(Token<'a>),
|
||||
|
||||
#[error("Vector filter is inconsistent: either specify a fragment name or remove the `fragments` part.")]
|
||||
MissingFragmentName(Token<'a>),
|
||||
|
||||
#[error("Vector filter cannot have both `{}` and `{}`.", _0.0.value(), _0.1.value())]
|
||||
ExclusiveOptions(Box<(Token<'a>, Token<'a>)>),
|
||||
|
||||
#[error("Vector filter has leftover token: `{}`.", _0.value())]
|
||||
LeftoverToken(Token<'a>),
|
||||
EmbedderDoesNotExist {
|
||||
embedder: &'a Token<'a>,
|
||||
available: Vec<String>,
|
||||
},
|
||||
|
||||
#[error("The embedder `{}` does not exist. {}", embedder.value(), {
|
||||
if available.is_empty() {
|
||||
String::from("This index does not have any configured embedders.")
|
||||
} else {
|
||||
let mut available = available.clone();
|
||||
available.sort_unstable();
|
||||
format!("Available embedders are: {}.", available.iter().map(|e| format!("`{e}`")).collect::<Vec<_>>().join(", "))
|
||||
}
|
||||
})]
|
||||
EmbedderDoesNotExist { embedder: &'a Token<'a>, available: Vec<String> },
|
||||
|
||||
#[error("The fragment `{}` does not exist on embedder `{}`. {}", fragment.value(), embedder.value(), {
|
||||
if available.is_empty() {
|
||||
String::from("This embedder does not have any configured fragments.")
|
||||
} else {
|
||||
let mut available = available.clone();
|
||||
available.sort_unstable();
|
||||
format!("Available fragments on this embedder are: {}.", available.iter().map(|f| format!("`{f}`")).collect::<Vec<_>>().join(", "))
|
||||
}
|
||||
})]
|
||||
FragmentDoesNotExist {
|
||||
embedder: &'a Token<'a>,
|
||||
fragment: &'a Token<'a>,
|
||||
@ -125,78 +66,6 @@ pub enum VectorFilterError<'a> {
|
||||
|
||||
use VectorFilterError::*;
|
||||
|
||||
impl std::error::Error for VectorFilterError<'_> {}
|
||||
|
||||
impl std::fmt::Display for VectorFilterError<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
EmptyFilter => {
|
||||
write!(f, "Vector filter cannot be empty.")
|
||||
}
|
||||
InvalidPrefix(prefix) => {
|
||||
write!(
|
||||
f,
|
||||
"Vector filter must start with `_vectors` but found `{}`.",
|
||||
prefix.value()
|
||||
)
|
||||
}
|
||||
MissingFragmentName(_token) => {
|
||||
write!(f, "Vector filter is inconsistent: either specify a fragment name or remove the `fragments` part.")
|
||||
}
|
||||
ExclusiveOptions(tokens) => {
|
||||
write!(
|
||||
f,
|
||||
"Vector filter cannot have both `{}` and `{}`.",
|
||||
tokens.0.value(),
|
||||
tokens.1.value()
|
||||
)
|
||||
}
|
||||
LeftoverToken(token) => {
|
||||
write!(f, "Vector filter has leftover token: `{}`.", token.value())
|
||||
}
|
||||
EmbedderDoesNotExist { embedder, available } => {
|
||||
write!(f, "The embedder `{}` does not exist.", embedder.value())?;
|
||||
if available.is_empty() {
|
||||
write!(f, " This index does not have configured embedders.")
|
||||
} else {
|
||||
write!(f, " Available embedders are: ")?;
|
||||
let mut available = available.clone();
|
||||
available.sort_unstable();
|
||||
for (idx, embedder) in available.iter().enumerate() {
|
||||
write!(f, "`{embedder}`")?;
|
||||
if idx != available.len() - 1 {
|
||||
write!(f, ", ")?;
|
||||
}
|
||||
}
|
||||
write!(f, ".")
|
||||
}
|
||||
}
|
||||
FragmentDoesNotExist { embedder, fragment, available } => {
|
||||
write!(
|
||||
f,
|
||||
"The fragment `{}` does not exist on embedder `{}`.",
|
||||
fragment.value(),
|
||||
embedder.value(),
|
||||
)?;
|
||||
if available.is_empty() {
|
||||
write!(f, " This embedder does not have configured fragments.")
|
||||
} else {
|
||||
write!(f, " Available fragments on this embedder are: ")?;
|
||||
let mut available = available.clone();
|
||||
available.sort_unstable();
|
||||
for (idx, fragment) in available.iter().enumerate() {
|
||||
write!(f, "`{fragment}`")?;
|
||||
if idx != available.len() - 1 {
|
||||
write!(f, ", ")?;
|
||||
}
|
||||
}
|
||||
write!(f, ".")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<VectorFilterError<'a>> for Error {
|
||||
fn from(err: VectorFilterError<'a>) -> Self {
|
||||
match &err {
|
||||
@ -320,3 +189,88 @@ impl<'a> VectorFilter<'a> {
|
||||
Ok(docids)
|
||||
}
|
||||
}
|
||||
|
||||
impl VectorFilterInner<'_> {
|
||||
fn evaluate_inner(
|
||||
&self,
|
||||
rtxn: &heed::RoTxn<'_>,
|
||||
index: &Index,
|
||||
embedding_configs: &[IndexEmbeddingConfig],
|
||||
regenerate: bool,
|
||||
) -> crate::Result<RoaringBitmap> {
|
||||
let embedder = match self {
|
||||
VectorFilterInner::Fragment { embedder_token, .. } => embedder_token,
|
||||
VectorFilterInner::DocumentTemplate { embedder_token } => embedder_token,
|
||||
VectorFilterInner::UserProvided { embedder_token } => embedder_token,
|
||||
VectorFilterInner::FullEmbedder { embedder_token } => embedder_token,
|
||||
};
|
||||
let embedder_name = embedder.value();
|
||||
let available_embedders =
|
||||
|| embedding_configs.iter().map(|c| c.name.clone()).collect::<Vec<_>>();
|
||||
|
||||
let embedding_config = embedding_configs
|
||||
.iter()
|
||||
.find(|config| config.name == embedder_name)
|
||||
.ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?;
|
||||
|
||||
let embedder_info = index
|
||||
.embedding_configs()
|
||||
.embedder_info(rtxn, embedder_name)?
|
||||
.ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?;
|
||||
|
||||
let arroy_wrapper = ArroyWrapper::new(
|
||||
index.vector_arroy,
|
||||
embedder_info.embedder_id,
|
||||
embedding_config.config.quantized(),
|
||||
);
|
||||
|
||||
let mut docids = match self {
|
||||
VectorFilterInner::Fragment { embedder_token: embedder, fragment_token: fragment } => {
|
||||
let fragment_name = fragment.value();
|
||||
let fragment_config = embedding_config
|
||||
.fragments
|
||||
.as_slice()
|
||||
.iter()
|
||||
.find(|fragment| fragment.name == fragment_name)
|
||||
.ok_or_else(|| FragmentDoesNotExist {
|
||||
embedder,
|
||||
fragment,
|
||||
available: embedding_config
|
||||
.fragments
|
||||
.as_slice()
|
||||
.iter()
|
||||
.map(|f| f.name.clone())
|
||||
.collect(),
|
||||
})?;
|
||||
|
||||
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())?
|
||||
}
|
||||
VectorFilterInner::DocumentTemplate { .. } => {
|
||||
if !embedding_config.fragments.as_slice().is_empty() {
|
||||
return Ok(RoaringBitmap::new());
|
||||
}
|
||||
|
||||
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
|
||||
let mut stats = ArroyStats::default();
|
||||
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||
stats.documents - user_provided_docsids.clone()
|
||||
}
|
||||
VectorFilterInner::UserProvided { .. } => {
|
||||
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
|
||||
user_provided_docsids.clone()
|
||||
}
|
||||
VectorFilterInner::FullEmbedder { .. } => {
|
||||
let mut stats = ArroyStats::default();
|
||||
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||
stats.documents
|
||||
}
|
||||
};
|
||||
|
||||
if regenerate {
|
||||
let skip_regenerate = embedder_info.embedding_status.skip_regenerate_docids();
|
||||
docids -= skip_regenerate;
|
||||
}
|
||||
|
||||
Ok(docids)
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user