mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Add embedders stats
This commit is contained in:
		| @@ -22,7 +22,7 @@ use crate::heed_codec::version::VersionCodec; | ||||
| use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec}; | ||||
| use crate::order_by_map::OrderByMap; | ||||
| use crate::proximity::ProximityPrecision; | ||||
| use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig}; | ||||
| use crate::vector::{ArroyStats, ArroyWrapper, Embedding, EmbeddingConfig}; | ||||
| use crate::{ | ||||
|     default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, | ||||
|     FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec, | ||||
| @@ -1731,6 +1731,18 @@ impl Index { | ||||
|         let compute_prefixes = self.prefix_search(rtxn)?.unwrap_or_default(); | ||||
|         Ok(PrefixSettings { compute_prefixes, max_prefix_length: 4, prefix_count_threshold: 100 }) | ||||
|     } | ||||
|  | ||||
|     pub fn arroy_stats(&self, rtxn: &RoTxn<'_>) -> Result<ArroyStats> { | ||||
|         let mut stats = ArroyStats::default(); | ||||
|         let embedding_configs = self.embedding_configs(rtxn)?; | ||||
|         for config in embedding_configs { | ||||
|             let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap(); | ||||
|             let reader = | ||||
|                 ArroyWrapper::new(self.vector_arroy, embedder_id, config.config.quantized()); | ||||
|             reader.aggregate_stats(rtxn, &mut stats)?; | ||||
|         } | ||||
|         Ok(stats) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Deserialize, Serialize)] | ||||
|   | ||||
| @@ -410,8 +410,43 @@ impl ArroyWrapper { | ||||
|     fn quantized_db(&self) -> arroy::Database<BinaryQuantizedCosine> { | ||||
|         self.database.remap_data_type() | ||||
|     } | ||||
|  | ||||
|     pub fn aggregate_stats( | ||||
|         &self, | ||||
|         rtxn: &RoTxn, | ||||
|         stats: &mut ArroyStats, | ||||
|     ) -> Result<(), arroy::Error> { | ||||
|         if self.quantized { | ||||
|             for reader in self.readers(rtxn, self.quantized_db()) { | ||||
|                 let reader = reader?; | ||||
|                 let documents = reader.item_ids(); | ||||
|                 if documents.is_empty() { | ||||
|                     break; | ||||
|                 } | ||||
|                 stats.documents |= documents; | ||||
|                 stats.number_of_embeddings += documents.len() as u64; | ||||
|             } | ||||
|         } else { | ||||
|             for reader in self.readers(rtxn, self.angular_db()) { | ||||
|                 let reader = reader?; | ||||
|                 let documents = reader.item_ids(); | ||||
|                 if documents.is_empty() { | ||||
|                     break; | ||||
|                 } | ||||
|                 stats.documents |= documents; | ||||
|                 stats.number_of_embeddings += documents.len() as u64; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Default, Clone)] | ||||
| pub struct ArroyStats { | ||||
|     pub number_of_embeddings: u64, | ||||
|     pub documents: RoaringBitmap, | ||||
| } | ||||
| /// One or multiple embeddings stored consecutively in a flat vector. | ||||
| pub struct Embeddings<F> { | ||||
|     data: Vec<F>, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user