implement index mock

This commit is contained in:
mpostma
2021-10-04 12:15:21 +02:00
parent 607e28749a
commit 4835d82a0b
10 changed files with 386 additions and 377 deletions

View File

@@ -13,7 +13,7 @@ use crate::index::update_handler::UpdateHandler;
use crate::index::updates::apply_settings_to_builder;
use super::error::Result;
use super::{Index, Settings, Unchecked};
use super::{index::Index, Settings, Unchecked};
#[derive(Serialize, Deserialize)]
struct DumpMeta {

View File

@@ -1,287 +1,294 @@
use std::collections::{BTreeSet, HashSet};
use std::fs::create_dir_all;
use std::marker::PhantomData;
use std::ops::Deref;
use std::path::Path;
use std::sync::Arc;
use chrono::{DateTime, Utc};
use heed::{EnvOpenOptions, RoTxn};
use milli::update::Setting;
use milli::{obkv_to_json, FieldDistribution, FieldId};
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use error::Result;
pub use search::{default_crop_length, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecked};
use uuid::Uuid;
use crate::index_controller::update_file_store::UpdateFileStore;
use crate::EnvSizer;
use self::error::IndexError;
use self::update_handler::UpdateHandler;
pub mod error;
pub mod update_handler;
mod dump;
mod search;
mod updates;
mod index;
pub type Document = Map<String, Value>;
pub use index::{Document, IndexMeta, IndexStats};
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct IndexMeta {
created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub primary_key: Option<String>,
}
#[cfg(not(test))]
pub use index::Index;
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct IndexStats {
#[serde(skip)]
pub size: u64,
pub number_of_documents: u64,
/// Whether the current index is performing an update. It is initially `None` when the
/// index returns it, since it is the `UpdateStore` that knows what index is currently indexing. It is
/// later set to either true or false, we we retrieve the information from the `UpdateStore`
pub is_indexing: Option<bool>,
pub field_distribution: FieldDistribution,
}
#[cfg(test)]
pub use test::MockIndex as Index;
impl IndexMeta {
pub fn new(index: &Index) -> Result<Self> {
let txn = index.read_txn()?;
Self::new_txn(index, &txn)
#[cfg(test)]
mod test {
use std::any::Any;
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Mutex;
use std::{path::Path, sync::Arc};
use serde_json::{Map, Value};
use uuid::Uuid;
use crate::index_controller::update_file_store::UpdateFileStore;
use crate::index_controller::updates::status::{Failed, Processed, Processing};
use super::{Checked, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings};
use super::index::Index;
use super::error::Result;
use super::update_handler::UpdateHandler;
#[derive(Debug, Clone)]
pub enum MockIndex {
Vrai(Index),
Faux(Arc<FauxIndex>),
}
fn new_txn(index: &Index, txn: &heed::RoTxn) -> Result<Self> {
let created_at = index.created_at(txn)?;
let updated_at = index.updated_at(txn)?;
let primary_key = index.primary_key(txn)?.map(String::from);
Ok(Self {
created_at,
updated_at,
primary_key,
})
}
}
#[derive(Clone, derivative::Derivative)]
#[derivative(Debug)]
pub struct Index {
pub uuid: Uuid,
#[derivative(Debug = "ignore")]
pub inner: Arc<milli::Index>,
#[derivative(Debug = "ignore")]
update_file_store: Arc<UpdateFileStore>,
#[derivative(Debug = "ignore")]
update_handler: Arc<UpdateHandler>,
}
impl Deref for Index {
type Target = milli::Index;
fn deref(&self) -> &Self::Target {
self.inner.as_ref()
}
}
impl Index {
pub fn open(
path: impl AsRef<Path>,
size: usize,
update_file_store: Arc<UpdateFileStore>,
uuid: Uuid,
update_handler: Arc<UpdateHandler>,
) -> Result<Self> {
create_dir_all(&path)?;
let mut options = EnvOpenOptions::new();
options.map_size(size);
let inner = Arc::new(milli::Index::new(options, &path)?);
Ok(Index {
inner,
update_file_store,
uuid,
update_handler,
})
pub struct Stub<A, R> {
name: String,
times: Option<usize>,
stub: Box<dyn Fn(A) -> R + Sync + Send>,
exact: bool,
}
pub fn stats(&self) -> Result<IndexStats> {
let rtxn = self.read_txn()?;
Ok(IndexStats {
size: self.size(),
number_of_documents: self.number_of_documents(&rtxn)?,
is_indexing: None,
field_distribution: self.field_distribution(&rtxn)?,
})
impl<A, R> Drop for Stub<A, R> {
fn drop(&mut self) {
if self.exact {
if !matches!(self.times, Some(0)) {
panic!("{} not called the correct amount of times", self.name);
}
}
}
}
pub fn meta(&self) -> Result<IndexMeta> {
IndexMeta::new(self)
}
pub fn settings(&self) -> Result<Settings<Checked>> {
let txn = self.read_txn()?;
self.settings_txn(&txn)
impl<A, R> Stub<A, R> {
fn call(&mut self, args: A) -> R {
match self.times {
Some(0) => panic!("{} called to many times", self.name),
Some(ref mut times) => { *times -= 1; },
None => (),
}
(self.stub)(args)
}
}
pub fn settings_txn(&self, txn: &RoTxn) -> Result<Settings<Checked>> {
let displayed_attributes = self
.displayed_fields(txn)?
.map(|fields| fields.into_iter().map(String::from).collect());
let searchable_attributes = self
.searchable_fields(txn)?
.map(|fields| fields.into_iter().map(String::from).collect());
let filterable_attributes = self.filterable_fields(txn)?.into_iter().collect();
let sortable_attributes = self.sortable_fields(txn)?.into_iter().collect();
let criteria = self
.criteria(txn)?
.into_iter()
.map(|c| c.to_string())
.collect();
let stop_words = self
.stop_words(txn)?
.map(|stop_words| -> Result<BTreeSet<_>> {
Ok(stop_words.stream().into_strs()?.into_iter().collect())
})
.transpose()?
.unwrap_or_else(BTreeSet::new);
let distinct_field = self.distinct_field(txn)?.map(String::from);
// in milli each word in the synonyms map were split on their separator. Since we lost
// this information we are going to put space between words.
let synonyms = self
.synonyms(txn)?
.iter()
.map(|(key, values)| {
(
key.join(" "),
values.iter().map(|value| value.join(" ")).collect(),
)
})
.collect();
Ok(Settings {
displayed_attributes: match displayed_attributes {
Some(attrs) => Setting::Set(attrs),
None => Setting::Reset,
},
searchable_attributes: match searchable_attributes {
Some(attrs) => Setting::Set(attrs),
None => Setting::Reset,
},
filterable_attributes: Setting::Set(filterable_attributes),
sortable_attributes: Setting::Set(sortable_attributes),
ranking_rules: Setting::Set(criteria),
stop_words: Setting::Set(stop_words),
distinct_attribute: match distinct_field {
Some(field) => Setting::Set(field),
None => Setting::Reset,
},
synonyms: Setting::Set(synonyms),
_kind: PhantomData,
})
#[derive(Debug, Default)]
struct StubStore {
inner: Arc<Mutex<HashMap<String, Box<dyn Any + Sync + Send>>>>
}
pub fn retrieve_documents<S: AsRef<str>>(
&self,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<S>>,
) -> Result<Vec<Map<String, Value>>> {
let txn = self.read_txn()?;
#[derive(Debug, Default)]
pub struct FauxIndex {
store: StubStore,
}
let fields_ids_map = self.fields_ids_map(&txn)?;
let fields_to_display =
self.fields_to_display(&txn, &attributes_to_retrieve, &fields_ids_map)?;
let iter = self.documents.range(&txn, &(..))?.skip(offset).take(limit);
let mut documents = Vec::new();
for entry in iter {
let (_id, obkv) = entry?;
let object = obkv_to_json(&fields_to_display, &fields_ids_map, obkv)?;
documents.push(object);
impl StubStore {
pub fn insert<A: 'static, R: 'static>(&self, name: String, stub: Stub<A, R>) {
let mut lock = self.inner.lock().unwrap();
lock.insert(name, Box::new(stub));
}
Ok(documents)
pub fn get_mut<A, B>(&self, name: &str) -> Option<&mut Stub<A, B>> {
let mut lock = self.inner.lock().unwrap();
match lock.get_mut(name) {
Some(s) => {
let s = s.as_mut() as *mut dyn Any as *mut Stub<A, B>;
Some(unsafe { &mut *s })
}
None => None,
}
}
}
pub fn retrieve_document<S: AsRef<str>>(
&self,
doc_id: String,
attributes_to_retrieve: Option<Vec<S>>,
) -> Result<Map<String, Value>> {
let txn = self.read_txn()?;
let fields_ids_map = self.fields_ids_map(&txn)?;
let fields_to_display =
self.fields_to_display(&txn, &attributes_to_retrieve, &fields_ids_map)?;
let internal_id = self
.external_documents_ids(&txn)?
.get(doc_id.as_bytes())
.ok_or_else(|| IndexError::DocumentNotFound(doc_id.clone()))?;
let document = self
.documents(&txn, std::iter::once(internal_id))?
.into_iter()
.next()
.map(|(_, d)| d)
.ok_or(IndexError::DocumentNotFound(doc_id))?;
let document = obkv_to_json(&fields_to_display, &fields_ids_map, document)?;
Ok(document)
pub struct StubBuilder<'a> {
name: String,
store: &'a StubStore,
times: Option<usize>,
exact: bool,
}
pub fn size(&self) -> u64 {
self.env.size()
impl<'a> StubBuilder<'a> {
#[must_use]
pub fn times(mut self, times: usize) -> Self {
self.times = Some(times);
self
}
#[must_use]
pub fn exact(mut self, times: usize) -> Self {
self.times = Some(times);
self.exact = true;
self
}
pub fn then<A: 'static, R: 'static>(self, f: impl Fn(A) -> R + Sync + Send + 'static) {
let stub = Stub {
stub: Box::new(f),
times: self.times,
exact: self.exact,
name: self.name.clone(),
};
self.store.insert(self.name, stub);
}
}
fn fields_to_display<S: AsRef<str>>(
&self,
txn: &heed::RoTxn,
attributes_to_retrieve: &Option<Vec<S>>,
fields_ids_map: &milli::FieldsIdsMap,
) -> Result<Vec<FieldId>> {
let mut displayed_fields_ids = match self.displayed_fields_ids(txn)? {
Some(ids) => ids.into_iter().collect::<Vec<_>>(),
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
};
impl FauxIndex {
pub fn when(&self, name: &str) -> StubBuilder {
StubBuilder {
name: name.to_string(),
store: &self.store,
times: None,
exact: false,
}
}
let attributes_to_retrieve_ids = match attributes_to_retrieve {
Some(attrs) => attrs
.iter()
.filter_map(|f| fields_ids_map.id(f.as_ref()))
.collect::<HashSet<_>>(),
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
};
displayed_fields_ids.retain(|fid| attributes_to_retrieve_ids.contains(fid));
Ok(displayed_fields_ids)
pub fn get<'a, A, R>(&'a self, name: &str) -> &'a mut Stub<A, R> {
match self.store.get_mut(name) {
Some(stub) => stub,
None => panic!("unexpected call to {}", name),
}
}
}
pub fn snapshot(&self, path: impl AsRef<Path>) -> Result<()> {
let mut dst = path.as_ref().join(format!("indexes/{}/", self.uuid));
create_dir_all(&dst)?;
dst.push("data.mdb");
let _txn = self.write_txn()?;
self.inner
.env
.copy_to_path(dst, heed::CompactionOption::Enabled)?;
Ok(())
impl MockIndex {
pub fn faux(faux: FauxIndex) -> Self {
Self::Faux(Arc::new(faux))
}
pub fn open(
path: impl AsRef<Path>,
size: usize,
update_file_store: Arc<UpdateFileStore>,
uuid: Uuid,
update_handler: Arc<UpdateHandler>,
) -> Result<Self> {
let index = Index::open(path, size, update_file_store, uuid, update_handler)?;
Ok(Self::Vrai(index))
}
pub fn load_dump(
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
size: usize,
update_handler: &UpdateHandler,
) -> anyhow::Result<()> {
Index::load_dump(src, dst, size, update_handler)?;
Ok(())
}
pub fn handle_update(&self, update: Processing) -> std::result::Result<Processed, Failed> {
match self {
MockIndex::Vrai(index) => index.handle_update(update),
MockIndex::Faux(_) => todo!(),
}
}
pub fn uuid(&self) -> Uuid {
match self {
MockIndex::Vrai(index) => index.uuid(),
MockIndex::Faux(_) => todo!(),
}
}
pub fn stats(&self) -> Result<IndexStats> {
match self {
MockIndex::Vrai(index) => index.stats(),
MockIndex::Faux(_) => todo!(),
}
}
pub fn meta(&self) -> Result<IndexMeta> {
match self {
MockIndex::Vrai(index) => index.meta(),
MockIndex::Faux(_) => todo!(),
}
}
pub fn settings(&self) -> Result<Settings<Checked>> {
match self {
MockIndex::Vrai(index) => index.settings(),
MockIndex::Faux(_) => todo!(),
}
}
pub fn retrieve_documents<S: AsRef<str>>(
&self,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<S>>,
) -> Result<Vec<Map<String, Value>>> {
match self {
MockIndex::Vrai(index) => index.retrieve_documents(offset, limit, attributes_to_retrieve),
MockIndex::Faux(_) => todo!(),
}
}
pub fn retrieve_document<S: AsRef<str>>(
&self,
doc_id: String,
attributes_to_retrieve: Option<Vec<S>>,
) -> Result<Map<String, Value>> {
match self {
MockIndex::Vrai(index) => index.retrieve_document(doc_id, attributes_to_retrieve),
MockIndex::Faux(_) => todo!(),
}
}
pub fn size(&self) -> u64 {
match self {
MockIndex::Vrai(index) => index.size(),
MockIndex::Faux(_) => todo!(),
}
}
pub fn snapshot(&self, path: impl AsRef<Path>) -> Result<()> {
match self {
MockIndex::Vrai(index) => index.snapshot(path),
MockIndex::Faux(faux) => faux.get("snapshot").call(path.as_ref())
}
}
pub fn inner(&self) -> &milli::Index {
match self {
MockIndex::Vrai(index) => index.inner(),
MockIndex::Faux(_) => todo!(),
}
}
pub fn update_primary_key(&self, primary_key: Option<String>) -> Result<IndexMeta> {
match self {
MockIndex::Vrai(index) => index.update_primary_key(primary_key),
MockIndex::Faux(_) => todo!(),
}
}
pub fn perform_search(&self, query: SearchQuery) -> Result<SearchResult> {
match self {
MockIndex::Vrai(index) => index.perform_search(query),
MockIndex::Faux(_) => todo!(),
}
}
pub fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
match self {
MockIndex::Vrai(index) => index.dump(path),
MockIndex::Faux(_) => todo!(),
}
}
}
#[test]
fn test_faux_index() {
let faux = FauxIndex::default();
faux
.when("snapshot")
.exact(2)
.then(|path: &Path| -> Result<()> {
println!("path: {}", path.display());
Ok(())
});
let index = MockIndex::faux(faux);
let path = PathBuf::from("hello");
index.snapshot(&path).unwrap();
index.snapshot(&path).unwrap();
}
}

View File

@@ -12,10 +12,9 @@ use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use crate::index::error::FacetError;
use crate::index::IndexError;
use super::error::Result;
use super::Index;
use super::error::{Result, IndexError};
use super::index::Index;
pub type Document = IndexMap<String, Value>;
type MatchesInfo = BTreeMap<String, Vec<MatchInfo>>;

View File

@@ -12,7 +12,7 @@ use crate::index_controller::updates::status::{Failed, Processed, Processing, Up
use crate::Update;
use super::error::{IndexError, Result};
use super::{Index, IndexMeta};
use super::index::{Index, IndexMeta};
fn serialize_with_wildcard<S>(
field: &Setting<Vec<String>>,