Rename HannoyStats to VectorStoreStats

The stats can be provided by any backend
This commit is contained in:
Louis Dureuil
2025-09-03 14:45:31 +02:00
parent b05bcf2c13
commit c32c74671d
5 changed files with 13 additions and 13 deletions

View File

@ -143,10 +143,10 @@ impl IndexStats {
/// ///
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`. /// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> { pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> {
let hannoy_stats = index.hannoy_stats(rtxn)?; let vector_store_stats = index.vector_store_stats(rtxn)?;
Ok(IndexStats { Ok(IndexStats {
number_of_embeddings: Some(hannoy_stats.number_of_embeddings), number_of_embeddings: Some(vector_store_stats.number_of_embeddings),
number_of_embedded_documents: Some(hannoy_stats.documents.len()), number_of_embedded_documents: Some(vector_store_stats.documents.len()),
documents_database_stats: index.documents_stats(rtxn)?.unwrap_or_default(), documents_database_stats: index.documents_stats(rtxn)?.unwrap_or_default(),
number_of_documents: None, number_of_documents: None,
database_size: index.on_disk_size()?, database_size: index.on_disk_size()?,

View File

@ -31,7 +31,7 @@ use crate::prompt::PromptData;
use crate::proximity::ProximityPrecision; use crate::proximity::ProximityPrecision;
use crate::update::new::StdResult; use crate::update::new::StdResult;
use crate::vector::db::IndexEmbeddingConfigs; use crate::vector::db::IndexEmbeddingConfigs;
use crate::vector::{Embedding, HannoyStats, VectorStore, VectorStoreBackend}; use crate::vector::{Embedding, VectorStore, VectorStoreBackend, VectorStoreStats};
use crate::{ use crate::{
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec, FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
@ -1825,8 +1825,8 @@ impl Index {
Ok(PrefixSettings { compute_prefixes, max_prefix_length: 4, prefix_count_threshold: 100 }) Ok(PrefixSettings { compute_prefixes, max_prefix_length: 4, prefix_count_threshold: 100 })
} }
pub fn hannoy_stats(&self, rtxn: &RoTxn<'_>) -> Result<HannoyStats> { pub fn vector_store_stats(&self, rtxn: &RoTxn<'_>) -> Result<VectorStoreStats> {
let mut stats = HannoyStats::default(); let mut stats = VectorStoreStats::default();
let embedding_configs = self.embedding_configs(); let embedding_configs = self.embedding_configs();
let backend = self.get_vector_store(rtxn)?; let backend = self.get_vector_store(rtxn)?;

View File

@ -3,7 +3,7 @@ use roaring::{MultiOps, RoaringBitmap};
use crate::error::{DidYouMean, Error}; use crate::error::{DidYouMean, Error};
use crate::vector::db::IndexEmbeddingConfig; use crate::vector::db::IndexEmbeddingConfig;
use crate::vector::{HannoyStats, VectorStore}; use crate::vector::{VectorStoreStats, VectorStore};
use crate::Index; use crate::Index;
#[derive(Debug, thiserror::Error)] #[derive(Debug, thiserror::Error)]
@ -134,7 +134,7 @@ fn evaluate_inner(
} }
let user_provided_docids = embedder_info.embedding_status.user_provided_docids(); let user_provided_docids = embedder_info.embedding_status.user_provided_docids();
let mut stats = HannoyStats::default(); let mut stats = VectorStoreStats::default();
vector_store.aggregate_stats(rtxn, &mut stats)?; vector_store.aggregate_stats(rtxn, &mut stats)?;
stats.documents - user_provided_docids.clone() stats.documents - user_provided_docids.clone()
} }
@ -143,13 +143,13 @@ fn evaluate_inner(
user_provided_docids.clone() user_provided_docids.clone()
} }
VectorFilter::Regenerate => { VectorFilter::Regenerate => {
let mut stats = HannoyStats::default(); let mut stats = VectorStoreStats::default();
vector_store.aggregate_stats(rtxn, &mut stats)?; vector_store.aggregate_stats(rtxn, &mut stats)?;
let skip_regenerate = embedder_info.embedding_status.skip_regenerate_docids(); let skip_regenerate = embedder_info.embedding_status.skip_regenerate_docids();
stats.documents - skip_regenerate stats.documents - skip_regenerate
} }
VectorFilter::None => { VectorFilter::None => {
let mut stats = HannoyStats::default(); let mut stats = VectorStoreStats::default();
vector_store.aggregate_stats(rtxn, &mut stats)?; vector_store.aggregate_stats(rtxn, &mut stats)?;
stats.documents stats.documents
} }

View File

@ -19,7 +19,7 @@ pub use distribution::DistributionShift;
pub use embedder::{Embedder, EmbedderOptions, EmbeddingConfig, SearchQuery}; pub use embedder::{Embedder, EmbedderOptions, EmbeddingConfig, SearchQuery};
pub use embeddings::Embeddings; pub use embeddings::Embeddings;
pub use runtime::{RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment}; pub use runtime::{RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment};
pub use store::{HannoyStats, VectorStore, VectorStoreBackend}; pub use store::{VectorStore, VectorStoreBackend, VectorStoreStats};
pub const REQUEST_PARALLELISM: usize = 40; pub const REQUEST_PARALLELISM: usize = 40;

View File

@ -645,7 +645,7 @@ impl VectorStore {
pub fn aggregate_stats( pub fn aggregate_stats(
&self, &self,
rtxn: &RoTxn, rtxn: &RoTxn,
stats: &mut HannoyStats, stats: &mut VectorStoreStats,
) -> Result<(), crate::Error> { ) -> Result<(), crate::Error> {
if self.backend == VectorStoreBackend::Arroy { if self.backend == VectorStoreBackend::Arroy {
if self.quantized { if self.quantized {
@ -1161,7 +1161,7 @@ where
} }
#[derive(Debug, Default, Clone)] #[derive(Debug, Default, Clone)]
pub struct HannoyStats { pub struct VectorStoreStats {
pub number_of_embeddings: u64, pub number_of_embeddings: u64,
pub documents: RoaringBitmap, pub documents: RoaringBitmap,
} }