Introduce a basically working rkv based MeiliDB

This commit is contained in:
Clément Renault
2019-10-02 17:34:32 +02:00
parent 905bc5c1a6
commit 39e0d9fc4a
22 changed files with 2287 additions and 0 deletions

26
src/store/mod.rs Normal file
View File

@ -0,0 +1,26 @@
mod words;
mod synonyms;
pub use self::words::Words;
pub use self::synonyms::Synonyms;
const SCHEMA_KEY: &str = "schema";
const WORDS_KEY: &str = "words";
const SYNONYMS_KEY: &str = "synonyms";
const RANKED_MAP_KEY: &str = "ranked-map";
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
fn aligned_to(bytes: &[u8], align: usize) -> bool {
(bytes as *const _ as *const () as usize) % align == 0
}
pub fn create(env: &rkv::Rkv, name: &str) -> Result<(Words, Synonyms), rkv::StoreError> {
let main = env.open_single(name, rkv::StoreOptions::create())?;
let words_indexes = env.open_single(format!("{}-words-indexes", name).as_str(), rkv::StoreOptions::create())?;
let synonyms = env.open_single(format!("{}-synonyms", name).as_str(), rkv::StoreOptions::create())?;
let words = Words { main, words_indexes };
let synonyms = Synonyms { main, synonyms };
Ok((words, synonyms))
}

23
src/store/synonyms.rs Normal file
View File

@ -0,0 +1,23 @@
pub struct Synonyms {
pub(crate) main: rkv::SingleStore,
pub(crate) synonyms: rkv::SingleStore,
}
impl Synonyms {
pub fn synonyms_fst<T: rkv::Readable>(
&self,
reader: &T,
) -> Result<fst::Set, rkv::StoreError>
{
Ok(fst::Set::default())
}
pub fn alternatives_to<T: rkv::Readable>(
&self,
reader: &T,
word: &[u8],
) -> Result<Option<fst::Set>, rkv::StoreError>
{
unimplemented!()
}
}

91
src/store/words.rs Normal file
View File

@ -0,0 +1,91 @@
use std::borrow::Cow;
use std::sync::Arc;
use std::{mem, ptr};
use zerocopy::{AsBytes, LayoutVerified};
use crate::DocIndex;
use crate::store::aligned_to;
use crate::store::WORDS_KEY;
pub struct Words {
pub(crate) main: rkv::SingleStore,
pub(crate) words_indexes: rkv::SingleStore,
}
impl Words {
pub fn put_words_fst(
&self,
writer: &mut rkv::Writer,
fst: &fst::Set,
) -> Result<(), rkv::StoreError>
{
let blob = rkv::Value::Blob(fst.as_fst().as_bytes());
self.main.put(writer, WORDS_KEY, &blob)
}
pub fn words_fst<T: rkv::Readable>(
&self,
reader: &T,
) -> Result<fst::Set, rkv::StoreError>
{
match self.main.get(reader, WORDS_KEY)? {
Some(rkv::Value::Blob(bytes)) => {
let len = bytes.len();
let bytes = Arc::from(bytes);
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
Ok(fst::Set::from(fst))
},
Some(value) => panic!("invalid type {:?}", value),
None => panic!("could not find word index"),
}
}
pub fn put_words_indexes(
&self,
writer: &mut rkv::Writer,
word: &[u8],
words_indexes: &[DocIndex],
) -> Result<(), rkv::StoreError>
{
let blob = rkv::Value::Blob(words_indexes.as_bytes());
self.main.put(writer, word, &blob)
}
pub fn word_indexes<'a, T: rkv::Readable>(
&self,
reader: &'a T,
word: &[u8],
) -> Result<Option<Cow<'a, [DocIndex]>>, rkv::StoreError>
{
let bytes = match self.main.get(reader, word)? {
Some(rkv::Value::Blob(bytes)) => bytes,
Some(value) => panic!("invalid type {:?}", value),
None => return Ok(None),
};
match LayoutVerified::new_slice(bytes) {
Some(layout) => Ok(Some(Cow::Borrowed(layout.into_slice()))),
None => {
let len = bytes.len();
let elem_size = mem::size_of::<DocIndex>();
// ensure that it is the alignment that is wrong
// and the length is valid
if len % elem_size == 0 && !aligned_to(bytes, mem::align_of::<DocIndex>()) {
let elems = len / elem_size;
let mut vec = Vec::<DocIndex>::with_capacity(elems);
unsafe {
let dst = vec.as_mut_ptr() as *mut u8;
ptr::copy_nonoverlapping(bytes.as_ptr(), dst, len);
vec.set_len(elems);
}
return Ok(Some(Cow::Owned(vec)))
}
Ok(None)
},
}
}
}