feat: Move the Schema to its own workspace crate

This commit is contained in:
Clément Renault
2019-05-29 15:26:18 +02:00
parent ab2ca15c5c
commit 994a0e78f1
22 changed files with 43 additions and 30 deletions

View File

@ -9,15 +9,13 @@ arc-swap = "0.3.11"
bincode = "1.1.2"
deunicode = "1.0.0"
hashbrown = { version = "0.2.2", features = ["serde"] }
linked-hash-map = { version = "0.5.2", features = ["serde_impl"] }
meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
meilidb-schema = { path = "../meilidb-schema", version = "0.1.0" }
ordered-float = { version = "1.0.2", features = ["serde"] }
sdset = "0.3.2"
serde = { version = "1.0.91", features = ["derive"] }
serde_json = { version = "1.0.39", features = ["preserve_order"] }
rocksdb = { version = "0.12.2", default-features = false }
toml = { version = "0.5.0", features = ["preserve_order"] }
zerocopy = "0.2.2"
[dependencies.rmp-serde]
@ -30,3 +28,4 @@ branch = "arc-byte-slice"
[dev-dependencies]
tempfile = "3.0.7"
serde_json = "1.0.39"

View File

@ -1,11 +1,11 @@
use std::convert::TryInto;
use meilidb_core::DocumentId;
use meilidb_schema::SchemaAttr;
use rocksdb::DBVector;
use crate::database::raw_index::InnerRawIndex;
use crate::document_attr_key::DocumentAttrKey;
use crate::schema::SchemaAttr;
#[derive(Clone)]
pub struct DocumentsIndex(pub(crate) InnerRawIndex);
@ -52,7 +52,7 @@ impl DocumentsIndex {
let from = rocksdb::IteratorMode::Start;
let iterator = self.0.iterator(from)?;
for (key, value) in iterator {
for (key, _) in iterator {
let slice = key.as_ref().try_into().unwrap();
let document_id = DocumentAttrKey::from_be_bytes(slice).document_id;

View File

@ -5,11 +5,11 @@ use std::sync::Arc;
use arc_swap::{ArcSwap, Lease};
use meilidb_core::criterion::Criteria;
use meilidb_core::{DocIndex, Store, DocumentId, QueryBuilder};
use meilidb_schema::Schema;
use rmp_serde::decode::Error as RmpError;
use serde::de;
use crate::ranked_map::RankedMap;
use crate::schema::Schema;
use crate::serde::Deserializer;
use super::{Error, CustomSettings};

View File

@ -1,8 +1,9 @@
use std::sync::Arc;
use meilidb_schema::Schema;
use crate::database::raw_index::InnerRawIndex;
use crate::ranked_map::RankedMap;
use crate::schema::Schema;
use super::Error;

View File

@ -2,8 +2,7 @@ use std::collections::hash_map::Entry;
use std::collections::{HashSet, HashMap};
use std::path::Path;
use std::sync::{Arc, RwLock};
use crate::Schema;
use meilidb_schema::Schema;
mod custom_settings;
mod docs_words_index;

View File

@ -1,5 +1,5 @@
use meilidb_core::DocumentId;
use crate::schema::SchemaAttr;
use meilidb_schema::SchemaAttr;
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct DocumentAttrKey {

View File

@ -3,11 +3,10 @@ use std::convert::TryFrom;
use deunicode::deunicode_with_tofu;
use meilidb_core::{DocumentId, DocIndex};
use meilidb_schema::SchemaAttr;
use meilidb_tokenizer::{is_cjk, Tokenizer, SeqTokenizer, Token};
use sdset::SetBuf;
use crate::SchemaAttr;
type Word = Vec<u8>; // TODO make it be a SmallVec
pub struct Indexer {

View File

@ -4,10 +4,8 @@ mod indexer;
mod number;
mod ranked_map;
mod serde;
pub mod schema;
pub use rocksdb;
pub use self::database::{Database, Index, CustomSettings};
pub use self::number::Number;
pub use self::ranked_map::RankedMap;
pub use self::schema::{Schema, SchemaAttr};

View File

@ -2,8 +2,9 @@ use std::io::{Read, Write};
use hashbrown::HashMap;
use meilidb_core::DocumentId;
use meilidb_schema::SchemaAttr;
use crate::{SchemaAttr, Number};
use crate::Number;
#[derive(Debug, Default, Clone, PartialEq, Eq)]
pub struct RankedMap(HashMap<(DocumentId, SchemaAttr), Number>);

View File

@ -1,315 +0,0 @@
use std::collections::{HashMap, BTreeMap};
use std::io::{Read, Write};
use std::error::Error;
use std::{fmt, u16};
use std::ops::BitOr;
use std::sync::Arc;
use serde::{Serialize, Deserialize};
use linked_hash_map::LinkedHashMap;
pub const STORED: SchemaProps = SchemaProps { stored: true, indexed: false, ranked: false };
pub const INDEXED: SchemaProps = SchemaProps { stored: false, indexed: true, ranked: false };
pub const RANKED: SchemaProps = SchemaProps { stored: false, indexed: false, ranked: true };
#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SchemaProps {
#[serde(default)]
stored: bool,
#[serde(default)]
indexed: bool,
#[serde(default)]
ranked: bool,
}
impl SchemaProps {
pub fn is_stored(self) -> bool {
self.stored
}
pub fn is_indexed(self) -> bool {
self.indexed
}
pub fn is_ranked(self) -> bool {
self.ranked
}
}
impl BitOr for SchemaProps {
type Output = Self;
fn bitor(self, other: Self) -> Self::Output {
SchemaProps {
stored: self.stored | other.stored,
indexed: self.indexed | other.indexed,
ranked: self.ranked | other.ranked,
}
}
}
#[derive(Serialize, Deserialize)]
pub struct SchemaBuilder {
identifier: String,
attributes: LinkedHashMap<String, SchemaProps>,
}
impl SchemaBuilder {
pub fn with_identifier<S: Into<String>>(name: S) -> SchemaBuilder {
SchemaBuilder {
identifier: name.into(),
attributes: LinkedHashMap::new(),
}
}
pub fn new_attribute<S: Into<String>>(&mut self, name: S, props: SchemaProps) -> SchemaAttr {
let len = self.attributes.len();
if self.attributes.insert(name.into(), props).is_some() {
panic!("Field already inserted.")
}
SchemaAttr(len as u16)
}
pub fn build(self) -> Schema {
let mut attrs = HashMap::new();
let mut props = Vec::new();
for (i, (name, prop)) in self.attributes.into_iter().enumerate() {
attrs.insert(name.clone(), SchemaAttr(i as u16));
props.push((name, prop));
}
let identifier = self.identifier;
Schema { inner: Arc::new(InnerSchema { identifier, attrs, props }) }
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Schema {
inner: Arc<InnerSchema>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct InnerSchema {
identifier: String,
attrs: HashMap<String, SchemaAttr>,
props: Vec<(String, SchemaProps)>,
}
impl Schema {
pub fn from_toml<R: Read>(mut reader: R) -> Result<Schema, Box<Error>> {
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer)?;
let builder: SchemaBuilder = toml::from_slice(&buffer)?;
Ok(builder.build())
}
pub fn to_toml<W: Write>(&self, mut writer: W) -> Result<(), Box<Error>> {
let identifier = self.inner.identifier.clone();
let attributes = self.attributes_ordered();
let builder = SchemaBuilder { identifier, attributes };
let string = toml::to_string_pretty(&builder)?;
writer.write_all(string.as_bytes())?;
Ok(())
}
pub fn from_json<R: Read>(mut reader: R) -> Result<Schema, Box<Error>> {
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer)?;
let builder: SchemaBuilder = serde_json::from_slice(&buffer)?;
Ok(builder.build())
}
pub fn to_json<W: Write>(&self, mut writer: W) -> Result<(), Box<Error>> {
let identifier = self.inner.identifier.clone();
let attributes = self.attributes_ordered();
let builder = SchemaBuilder { identifier, attributes };
let string = serde_json::to_string_pretty(&builder)?;
writer.write_all(string.as_bytes())?;
Ok(())
}
pub fn read_from_bin<R: Read>(reader: R) -> bincode::Result<Schema> {
let builder: SchemaBuilder = bincode::deserialize_from(reader)?;
Ok(builder.build())
}
pub fn write_to_bin<W: Write>(&self, writer: W) -> bincode::Result<()> {
let identifier = self.inner.identifier.clone();
let attributes = self.attributes_ordered();
let builder = SchemaBuilder { identifier, attributes };
bincode::serialize_into(writer, &builder)
}
fn attributes_ordered(&self) -> LinkedHashMap<String, SchemaProps> {
let mut ordered = BTreeMap::new();
for (name, attr) in &self.inner.attrs {
let (_, props) = self.inner.props[attr.0 as usize];
ordered.insert(attr.0, (name, props));
}
let mut attributes = LinkedHashMap::with_capacity(ordered.len());
for (_, (name, props)) in ordered {
attributes.insert(name.clone(), props);
}
attributes
}
pub fn props(&self, attr: SchemaAttr) -> SchemaProps {
let (_, props) = self.inner.props[attr.0 as usize];
props
}
pub fn identifier_name(&self) -> &str {
&self.inner.identifier
}
pub fn attribute<S: AsRef<str>>(&self, name: S) -> Option<SchemaAttr> {
self.inner.attrs.get(name.as_ref()).cloned()
}
pub fn attribute_name(&self, attr: SchemaAttr) -> &str {
let (name, _) = &self.inner.props[attr.0 as usize];
name
}
pub fn iter<'a>(&'a self) -> impl Iterator<Item=(&str, SchemaAttr, SchemaProps)> + 'a {
self.inner.props.iter()
.map(move |(name, prop)| {
let attr = self.inner.attrs.get(name).unwrap();
(name.as_str(), *attr, *prop)
})
}
}
#[derive(Serialize, Deserialize)]
#[derive(Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
pub struct SchemaAttr(pub u16);
impl SchemaAttr {
pub const fn new(value: u16) -> SchemaAttr {
SchemaAttr(value)
}
pub const fn min() -> SchemaAttr {
SchemaAttr(u16::min_value())
}
pub const fn max() -> SchemaAttr {
SchemaAttr(u16::max_value())
}
pub fn next(self) -> Option<SchemaAttr> {
self.0.checked_add(1).map(SchemaAttr)
}
pub fn prev(self) -> Option<SchemaAttr> {
self.0.checked_sub(1).map(SchemaAttr)
}
}
impl fmt::Display for SchemaAttr {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.0.fmt(f)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::error::Error;
#[test]
fn serialize_deserialize() -> bincode::Result<()> {
let mut builder = SchemaBuilder::with_identifier("id");
builder.new_attribute("alpha", STORED);
builder.new_attribute("beta", STORED | INDEXED);
builder.new_attribute("gamma", INDEXED);
let schema = builder.build();
let mut buffer = Vec::new();
schema.write_to_bin(&mut buffer)?;
let schema2 = Schema::read_from_bin(buffer.as_slice())?;
assert_eq!(schema, schema2);
Ok(())
}
#[test]
fn serialize_deserialize_toml() -> Result<(), Box<Error>> {
let mut builder = SchemaBuilder::with_identifier("id");
builder.new_attribute("alpha", STORED);
builder.new_attribute("beta", STORED | INDEXED);
builder.new_attribute("gamma", INDEXED);
let schema = builder.build();
let mut buffer = Vec::new();
schema.to_toml(&mut buffer)?;
let schema2 = Schema::from_toml(buffer.as_slice())?;
assert_eq!(schema, schema2);
let data = r#"
identifier = "id"
[attributes."alpha"]
stored = true
[attributes."beta"]
stored = true
indexed = true
[attributes."gamma"]
indexed = true
"#;
let schema2 = Schema::from_toml(data.as_bytes())?;
assert_eq!(schema, schema2);
Ok(())
}
#[test]
fn serialize_deserialize_json() -> Result<(), Box<Error>> {
let mut builder = SchemaBuilder::with_identifier("id");
builder.new_attribute("alpha", STORED);
builder.new_attribute("beta", STORED | INDEXED);
builder.new_attribute("gamma", INDEXED);
let schema = builder.build();
let mut buffer = Vec::new();
schema.to_json(&mut buffer)?;
let schema2 = Schema::from_json(buffer.as_slice())?;
assert_eq!(schema, schema2);
let data = r#"
{
"identifier": "id",
"attributes": {
"alpha": {
"stored": true
},
"beta": {
"stored": true,
"indexed": true
},
"gamma": {
"indexed": true
}
}
}"#;
let schema2 = Schema::from_json(data.as_bytes())?;
assert_eq!(schema, schema2);
Ok(())
}
}

View File

@ -2,12 +2,12 @@ use std::collections::HashSet;
use std::io::Cursor;
use meilidb_core::DocumentId;
use meilidb_schema::SchemaAttr;
use rmp_serde::decode::{Deserializer as RmpDeserializer, ReadReader};
use rmp_serde::decode::{Error as RmpError};
use serde::{de, forward_to_deserialize_any};
use crate::database::Index;
use crate::SchemaAttr;
pub struct Deserializer<'a> {
pub document_id: DocumentId,

View File

@ -1,9 +1,9 @@
use meilidb_core::DocumentId;
use meilidb_schema::SchemaAttr;
use serde::ser;
use serde::Serialize;
use crate::indexer::Indexer as RawIndexer;
use crate::schema::SchemaAttr;
use super::{SerializerError, ConvertToString};
pub struct Indexer<'a> {

View File

@ -26,11 +26,11 @@ use std::collections::BTreeMap;
use std::{fmt, error::Error};
use meilidb_core::DocumentId;
use meilidb_schema::SchemaAttr;
use rmp_serde::encode::Error as RmpError;
use serde::ser;
use crate::number::ParseNumberError;
use crate::schema::SchemaAttr;
#[derive(Debug)]
pub enum SerializerError {

View File

@ -1,9 +1,9 @@
use meilidb_core::DocumentId;
use meilidb_schema::Schema;
use serde::ser;
use crate::indexer::Indexer as RawIndexer;
use crate::ranked_map::RankedMap;
use crate::schema::Schema;
use super::{RamDocumentStore, SerializerError, ConvertToString, ConvertToNumber, Indexer};
pub struct Serializer<'a> {

View File

@ -1,6 +1,6 @@
use serde_json::json;
use meilidb_data::{Database, Schema};
use meilidb_data::schema::{SchemaBuilder, STORED, INDEXED};
use meilidb_data::Database;
use meilidb_schema::{Schema, SchemaBuilder, STORED, INDEXED};
fn simple_schema() -> Schema {
let mut builder = SchemaBuilder::with_identifier("objectId");