feat: add the documents fields repartition into stats

This commit is contained in:
Quentin de Quelen
2019-09-17 15:41:20 +02:00
parent 97cf5cca2a
commit 80caa8b60d
4 changed files with 105 additions and 3 deletions

View File

@ -1,7 +1,8 @@
use std::convert::TryInto;
use std::collections::HashMap;
use meilidb_core::DocumentId;
use meilidb_schema::SchemaAttr;
use meilidb_schema::{Schema, SchemaAttr};
use rocksdb::DBVector;
use crate::document_attr_key::DocumentAttrKey;
@ -54,6 +55,20 @@ impl DocumentsIndex {
Ok(DocumentFieldsIter(iter))
}
pub fn documents_fields_repartition(&self, schema: Schema) -> RocksDbResult<HashMap<String, u64>> {
let iter = self.0.iter()?;
let mut repartition_attributes_id = HashMap::new();
for key in DocumentsKeysIter(iter) {
let counter = repartition_attributes_id.entry(key.attribute).or_insert(0);
*counter += 1u64;
}
let mut repartition_with_attribute_name = HashMap::new();
for (key, val) in repartition_attributes_id {
repartition_with_attribute_name.insert(schema.attribute_name(key).to_owned(), val);
}
Ok(repartition_with_attribute_name)
}
pub fn len(&self) -> RocksDbResult<u64> {
let mut last_document_id = None;
let mut count = 0;
@ -88,3 +103,20 @@ impl Iterator for DocumentFieldsIter<'_> {
}
}
}
pub struct DocumentsKeysIter<'a>(crate::CfIter<'a>);
impl Iterator for DocumentsKeysIter<'_> {
type Item = DocumentAttrKey;
fn next(&mut self) -> Option<Self::Item> {
match self.0.next() {
Some((key, _)) => {
let array = key.as_ref().try_into().unwrap();
let key = DocumentAttrKey::from_be_bytes(array);
Some(key)
},
None => None,
}
}
}

View File

@ -1,4 +1,4 @@
use std::collections::{HashSet, BTreeMap};
use std::collections::{HashMap, HashSet, BTreeMap};
use std::convert::TryInto;
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
@ -155,11 +155,12 @@ fn last_update_id(
Ok(uikey.max(urikey).unwrap_or(0))
}
#[derive(Copy, Clone)]
#[derive(Clone)]
pub struct IndexStats {
pub number_of_words: usize,
pub number_of_documents: u64,
pub number_attrs_in_ranked_map: usize,
pub documents_fields_repartition: HashMap<String, u64>,
}
#[derive(Clone)]
@ -275,10 +276,12 @@ impl Index {
pub fn stats(&self) -> RocksDbResult<IndexStats> {
let cache = self.cache.load();
let documents_fields_repartition = self.documents_index.documents_fields_repartition(cache.schema.clone())?;
Ok(IndexStats {
number_of_words: cache.words.len(),
number_of_documents: cache.number_of_documents,
number_attrs_in_ranked_map: cache.ranked_map.len(),
documents_fields_repartition,
})
}