mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 07:56:28 +00:00 
			
		
		
		
	Merge #5341
5341: Embeddings stats r=ManyTheFish a=ManyTheFish # Pull Request ## Related issue Fixes #5321 ## What does this PR do? - Add embedding stats - force dumpless upgrade to recompute stats - add tests Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
		| @@ -410,8 +410,43 @@ impl ArroyWrapper { | ||||
|     fn quantized_db(&self) -> arroy::Database<BinaryQuantizedCosine> { | ||||
|         self.database.remap_data_type() | ||||
|     } | ||||
|  | ||||
|     pub fn aggregate_stats( | ||||
|         &self, | ||||
|         rtxn: &RoTxn, | ||||
|         stats: &mut ArroyStats, | ||||
|     ) -> Result<(), arroy::Error> { | ||||
|         if self.quantized { | ||||
|             for reader in self.readers(rtxn, self.quantized_db()) { | ||||
|                 let reader = reader?; | ||||
|                 let documents = reader.item_ids(); | ||||
|                 if documents.is_empty() { | ||||
|                     break; | ||||
|                 } | ||||
|                 stats.documents |= documents; | ||||
|                 stats.number_of_embeddings += documents.len(); | ||||
|             } | ||||
|         } else { | ||||
|             for reader in self.readers(rtxn, self.angular_db()) { | ||||
|                 let reader = reader?; | ||||
|                 let documents = reader.item_ids(); | ||||
|                 if documents.is_empty() { | ||||
|                     break; | ||||
|                 } | ||||
|                 stats.documents |= documents; | ||||
|                 stats.number_of_embeddings += documents.len(); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Default, Clone)] | ||||
| pub struct ArroyStats { | ||||
|     pub number_of_embeddings: u64, | ||||
|     pub documents: RoaringBitmap, | ||||
| } | ||||
| /// One or multiple embeddings stored consecutively in a flat vector. | ||||
| pub struct Embeddings<F> { | ||||
|     data: Vec<F>, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user