feat: Implemented a basic deserialiazation

This commit is contained in:
Clément Renault
2018-12-03 22:26:24 +01:00
parent 2a35d72fe2
commit b2cec98805
9 changed files with 655 additions and 60 deletions

View File

@@ -1,13 +1,15 @@
use std::error::Error; use std::error::Error;
use std::marker; use std::{fmt, marker};
use rocksdb::rocksdb::{DB, Snapshot}; use rocksdb::rocksdb::{DB, DBVector, Snapshot, SeekKey};
use rocksdb::rocksdb_options::ReadOptions;
use serde::de::DeserializeOwned; use serde::de::DeserializeOwned;
use crate::index::schema::Schema;
use crate::blob::positive::PositiveBlob;
use crate::database::deserializer::{Deserializer, DeserializerError}; use crate::database::deserializer::{Deserializer, DeserializerError};
use crate::database::{DATA_INDEX, DATA_SCHEMA}; use crate::database::{DATA_INDEX, DATA_SCHEMA};
use crate::blob::positive::PositiveBlob;
use crate::index::schema::Schema;
use crate::database::{DocumentKey, DocumentKeyAttr};
use crate::DocumentId; use crate::DocumentId;
// FIXME Do not panic! // FIXME Do not panic!
@@ -40,6 +42,10 @@ impl<'a> DatabaseView<'a> {
self.snapshot self.snapshot
} }
pub fn get(&self, key: &[u8]) -> Result<Option<DBVector>, Box<Error>> {
Ok(self.snapshot.get(key)?)
}
// TODO create an enum error type // TODO create an enum error type
pub fn retrieve_document<D>(&self, id: DocumentId) -> Result<D, Box<Error>> pub fn retrieve_document<D>(&self, id: DocumentId) -> Result<D, Box<Error>>
where D: DeserializeOwned where D: DeserializeOwned
@@ -60,6 +66,36 @@ impl<'a> DatabaseView<'a> {
} }
} }
impl<'a> fmt::Debug for DatabaseView<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut options = ReadOptions::new();
let lower = DocumentKey::new(0);
options.set_iterate_lower_bound(lower.as_ref());
let mut iter = self.snapshot.iter_opt(options);
iter.seek(SeekKey::Start);
let iter = iter.map(|(key, _)| DocumentKeyAttr::from_bytes(&key));
if f.alternate() {
writeln!(f, "DatabaseView(")?;
} else {
write!(f, "DatabaseView(")?;
}
self.schema.fmt(f)?;
if f.alternate() {
writeln!(f, ",")?;
} else {
write!(f, ", ")?;
}
f.debug_list().entries(iter).finish()?;
write!(f, ")")
}
}
// TODO this is just an iter::Map !!! // TODO this is just an iter::Map !!!
pub struct DocumentIter<'a, D, I> { pub struct DocumentIter<'a, D, I> {
database_view: &'a DatabaseView<'a>, database_view: &'a DatabaseView<'a>,

View File

@@ -1,11 +1,11 @@
use std::error::Error; use std::error::Error;
use std::fmt; use std::fmt;
use rocksdb::rocksdb::{DB, Snapshot}; use rocksdb::rocksdb::{DB, Snapshot, SeekKey};
use rocksdb::rocksdb_options::ReadOptions; use rocksdb::rocksdb_options::ReadOptions;
use serde::de::value::MapDeserializer;
use serde::forward_to_deserialize_any; use serde::forward_to_deserialize_any;
use serde::de::Visitor; use serde::de::value::MapDeserializer;
use serde::de::{self, Visitor, IntoDeserializer};
use crate::database::document_key::{DocumentKey, DocumentKeyAttr}; use crate::database::document_key::{DocumentKey, DocumentKeyAttr};
use crate::index::schema::Schema; use crate::index::schema::Schema;
@@ -23,7 +23,7 @@ impl<'a> Deserializer<'a> {
} }
} }
impl<'de, 'a, 'b> serde::de::Deserializer<'de> for &'b mut Deserializer<'a> { impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> {
type Error = DeserializerError; type Error = DeserializerError;
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error> fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
@@ -35,8 +35,7 @@ impl<'de, 'a, 'b> serde::de::Deserializer<'de> for &'b mut Deserializer<'a> {
forward_to_deserialize_any! { forward_to_deserialize_any! {
bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string unit seq bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string unit seq
bytes byte_buf unit_struct tuple_struct bytes byte_buf unit_struct tuple_struct
identifier tuple ignored_any option newtype_struct enum identifier tuple ignored_any option newtype_struct enum struct
struct
} }
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error> fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
@@ -48,14 +47,20 @@ impl<'de, 'a, 'b> serde::de::Deserializer<'de> for &'b mut Deserializer<'a> {
options.set_iterate_lower_bound(lower.as_ref()); options.set_iterate_lower_bound(lower.as_ref());
options.set_iterate_upper_bound(upper.as_ref()); options.set_iterate_upper_bound(upper.as_ref());
let mut db_iter = self.snapshot.iter_opt(options); let mut iter = self.snapshot.iter_opt(options);
let iter = db_iter.map(|(key, value)| { iter.seek(SeekKey::Start);
if iter.kv().is_none() {
// FIXME return an error
}
let iter = iter.map(|(key, value)| {
// retrieve the schema attribute name // retrieve the schema attribute name
// from the schema attribute number // from the schema attribute number
let document_key_attr = DocumentKeyAttr::from_bytes(&key); let document_key_attr = DocumentKeyAttr::from_bytes(&key);
let schema_attr = document_key_attr.attribute(); let schema_attr = document_key_attr.attribute();
let attribute_name = self.schema.attribute_name(schema_attr); let attribute_name = self.schema.attribute_name(schema_attr);
(attribute_name, value) (attribute_name, Value(value))
}); });
let map_deserializer = MapDeserializer::new(iter); let map_deserializer = MapDeserializer::new(iter);
@@ -63,12 +68,101 @@ impl<'de, 'a, 'b> serde::de::Deserializer<'de> for &'b mut Deserializer<'a> {
} }
} }
struct Value(Vec<u8>);
impl<'de> IntoDeserializer<'de, DeserializerError> for Value {
type Deserializer = Self;
fn into_deserializer(self) -> Self::Deserializer {
self
}
}
macro_rules! forward_to_bincode_values {
($($ty:ident => $de_method:ident,)*) => {
$(
fn $de_method<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where V: de::Visitor<'de>
{
match bincode::deserialize::<$ty>(&self.0) {
Ok(val) => val.into_deserializer().$de_method(visitor),
Err(e) => Err(de::Error::custom(e)),
}
}
)*
}
}
impl<'de, 'a> de::Deserializer<'de> for Value {
type Error = DeserializerError;
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where V: Visitor<'de>
{
self.0.into_deserializer().deserialize_any(visitor)
}
fn deserialize_str<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where V: Visitor<'de>
{
self.deserialize_string(visitor)
}
fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where V: Visitor<'de>
{
match bincode::deserialize::<String>(&self.0) {
Ok(val) => val.into_deserializer().deserialize_string(visitor),
Err(e) => Err(de::Error::custom(e)),
}
}
fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where V: Visitor<'de>
{
self.deserialize_byte_buf(visitor)
}
fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where V: Visitor<'de>
{
match bincode::deserialize::<Vec<u8>>(&self.0) {
Ok(val) => val.into_deserializer().deserialize_byte_buf(visitor),
Err(e) => Err(de::Error::custom(e)),
}
}
forward_to_bincode_values! {
char => deserialize_char,
bool => deserialize_bool,
u8 => deserialize_u8,
u16 => deserialize_u16,
u32 => deserialize_u32,
u64 => deserialize_u64,
i8 => deserialize_i8,
i16 => deserialize_i16,
i32 => deserialize_i32,
i64 => deserialize_i64,
f32 => deserialize_f32,
f64 => deserialize_f64,
}
forward_to_deserialize_any! {
unit seq map
unit_struct tuple_struct
identifier tuple ignored_any option newtype_struct enum struct
}
}
#[derive(Debug)] #[derive(Debug)]
pub enum DeserializerError { pub enum DeserializerError {
Custom(String), Custom(String),
} }
impl serde::de::Error for DeserializerError { impl de::Error for DeserializerError {
fn custom<T: fmt::Display>(msg: T) -> Self { fn custom<T: fmt::Display>(msg: T) -> Self {
DeserializerError::Custom(msg.to_string()) DeserializerError::Custom(msg.to_string())
} }

View File

@@ -1,5 +1,6 @@
use std::io::{Cursor, Read, Write}; use std::io::{Cursor, Read, Write};
use std::mem::size_of; use std::mem::size_of;
use std::fmt;
use byteorder::{NativeEndian, WriteBytesExt, ReadBytesExt}; use byteorder::{NativeEndian, WriteBytesExt, ReadBytesExt};
@@ -48,6 +49,14 @@ impl AsRef<[u8]> for DocumentKey {
} }
} }
impl fmt::Debug for DocumentKey {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("DocumentKey")
.field("document_id", &self.document_id())
.finish()
}
}
#[derive(Copy, Clone)] #[derive(Copy, Clone)]
pub struct DocumentKeyAttr([u8; DOC_KEY_ATTR_LEN]); pub struct DocumentKeyAttr([u8; DOC_KEY_ATTR_LEN]);
@@ -94,3 +103,12 @@ impl AsRef<[u8]> for DocumentKeyAttr {
&self.0 &self.0
} }
} }
impl fmt::Debug for DocumentKeyAttr {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("DocumentKeyAttr")
.field("document_id", &self.document_id())
.field("attribute", &self.attribute().as_u32())
.finish()
}
}

View File

@@ -1,11 +1,13 @@
use std::error::Error; use std::error::Error;
use std::path::Path; use std::path::Path;
use std::fmt;
use rocksdb::rocksdb_options::{DBOptions, IngestExternalFileOptions, ColumnFamilyOptions}; use rocksdb::rocksdb_options::{DBOptions, IngestExternalFileOptions, ColumnFamilyOptions};
use rocksdb::{DB, MergeOperands}; use rocksdb::{DB, DBVector, MergeOperands, SeekKey};
use rocksdb::rocksdb::Writable; use rocksdb::rocksdb::Writable;
pub use crate::database::database_view::DatabaseView; pub use crate::database::database_view::DatabaseView;
pub use crate::database::document_key::{DocumentKey, DocumentKeyAttr};
use crate::index::update::Update; use crate::index::update::Update;
use crate::index::schema::Schema; use crate::index::schema::Schema;
use crate::blob::{self, Blob}; use crate::blob::{self, Blob};
@@ -30,6 +32,7 @@ impl Database {
let path = path.to_string_lossy(); let path = path.to_string_lossy();
let mut opts = DBOptions::new(); let mut opts = DBOptions::new();
opts.create_if_missing(true); opts.create_if_missing(true);
// opts.error_if_exists(true); // FIXME pull request that
let mut cf_opts = ColumnFamilyOptions::new(); let mut cf_opts = ColumnFamilyOptions::new();
cf_opts.add_merge_operator("data-index merge operator", merge_indexes); cf_opts.add_merge_operator("data-index merge operator", merge_indexes);
@@ -80,14 +83,40 @@ impl Database {
Ok(()) Ok(())
} }
pub fn get(&self, key: &[u8]) -> Result<Option<DBVector>, Box<Error>> {
Ok(self.0.get(key)?)
}
pub fn flush(&self) -> Result<(), Box<Error>> {
Ok(self.0.flush(true)?)
}
pub fn view(&self) -> Result<DatabaseView, Box<Error>> { pub fn view(&self) -> Result<DatabaseView, Box<Error>> {
let snapshot = self.0.snapshot(); let snapshot = self.0.snapshot();
DatabaseView::new(snapshot) DatabaseView::new(snapshot)
} }
} }
impl fmt::Debug for Database {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Database([")?;
let mut iter = self.0.iter();
iter.seek(SeekKey::Start);
let mut first = true;
for (key, value) in &mut iter {
if !first { write!(f, ", ")?; }
first = false;
let key = String::from_utf8_lossy(&key);
write!(f, "{:?}", key)?;
}
write!(f, "])")
}
}
fn merge_indexes(key: &[u8], existing_value: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> { fn merge_indexes(key: &[u8], existing_value: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> {
if key != DATA_INDEX { panic!("The merge operator only supports \"data-index\" merging") } if key != DATA_INDEX {
panic!("The merge operator only supports \"data-index\" merging")
}
let capacity = { let capacity = {
let remaining = operands.size_hint().0; let remaining = operands.size_hint().0;
@@ -109,3 +138,90 @@ fn merge_indexes(key: &[u8], existing_value: Option<&[u8]>, operands: &mut Merge
let blob = op.merge().expect("BUG: could not merge blobs"); let blob = op.merge().expect("BUG: could not merge blobs");
bincode::serialize(&blob).expect("BUG: could not serialize merged blob") bincode::serialize(&blob).expect("BUG: could not serialize merged blob")
} }
#[cfg(test)]
mod tests {
use super::*;
use std::error::Error;
use std::path::PathBuf;
use serde_derive::{Serialize, Deserialize};
use tempfile::tempdir;
use crate::tokenizer::DefaultBuilder;
use crate::index::update::PositiveUpdateBuilder;
use crate::index::schema::{Schema, SchemaBuilder, STORED, INDEXED};
#[test]
fn ingest_update_file() -> Result<(), Box<Error>> {
let dir = tempdir()?;
let rocksdb_path = dir.path().join("rocksdb.rdb");
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
struct SimpleDoc {
title: String,
description: String,
}
let title;
let description;
let schema = {
let mut builder = SchemaBuilder::new();
title = builder.new_attribute("title", STORED | INDEXED);
description = builder.new_attribute("description", STORED | INDEXED);
builder.build()
};
let database = Database::create(&rocksdb_path, schema.clone())?;
let tokenizer_builder = DefaultBuilder::new();
let update_path = dir.path().join("update.sst");
let doc0 = SimpleDoc {
title: String::from("I am a title"),
description: String::from("I am a description"),
};
let doc1 = SimpleDoc {
title: String::from("I am the second title"),
description: String::from("I am the second description"),
};
let mut update = {
let mut builder = PositiveUpdateBuilder::new(update_path, schema, tokenizer_builder);
// builder.update_field(0, title, doc0.title.clone());
// builder.update_field(0, description, doc0.description.clone());
// builder.update_field(1, title, doc1.title.clone());
// builder.update_field(1, description, doc1.description.clone());
builder.update(0, &doc0).unwrap();
builder.update(1, &doc1).unwrap();
builder.build()?
};
update.set_move(true);
database.ingest_update_file(update)?;
let view = database.view()?;
println!("{:?}", view);
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
struct DeSimpleDoc {
title: char,
}
let de_doc0: DeSimpleDoc = view.retrieve_document(0)?;
let de_doc1: DeSimpleDoc = view.retrieve_document(1)?;
println!("{:?}", de_doc0);
println!("{:?}", de_doc1);
// assert_eq!(doc0, de_doc0);
// assert_eq!(doc1, de_doc1);
Ok(dir.close()?)
}
}

View File

@@ -111,7 +111,11 @@ impl Schema {
} }
pub fn attribute_name(&self, attr: SchemaAttr) -> &str { pub fn attribute_name(&self, attr: SchemaAttr) -> &str {
unimplemented!("cannot retrieve the attribute name by its attribute number") // FIXME complexity is insane !
for (key, &value) in &self.attrs {
if value == attr { return &key }
}
panic!("schema attribute name not found for {:?}", attr)
} }
} }

View File

@@ -13,9 +13,6 @@ mod positive;
pub use self::positive::{PositiveUpdateBuilder, NewState}; pub use self::positive::{PositiveUpdateBuilder, NewState};
pub use self::negative::NegativeUpdateBuilder; pub use self::negative::NegativeUpdateBuilder;
const DOC_KEY_LEN: usize = 4 + std::mem::size_of::<u64>();
const DOC_KEY_ATTR_LEN: usize = DOC_KEY_LEN + 1 + std::mem::size_of::<u32>();
pub struct Update { pub struct Update {
path: PathBuf, path: PathBuf,
can_be_moved: bool, can_be_moved: bool,
@@ -30,6 +27,10 @@ impl Update {
Ok(Update { path: path.into(), can_be_moved: true }) Ok(Update { path: path.into(), can_be_moved: true })
} }
pub fn set_move(&mut self, can_be_moved: bool) {
self.can_be_moved = can_be_moved
}
pub fn can_be_moved(&self) -> bool { pub fn can_be_moved(&self) -> bool {
self.can_be_moved self.can_be_moved
} }
@@ -38,27 +39,3 @@ impl Update {
self.path self.path
} }
} }
// "doc-{ID_8_BYTES}"
fn raw_document_key(id: DocumentId) -> [u8; DOC_KEY_LEN] {
let mut key = [0; DOC_KEY_LEN];
let mut wtr = Cursor::new(&mut key[..]);
wtr.write_all(b"doc-").unwrap();
wtr.write_u64::<NetworkEndian>(id).unwrap();
key
}
// "doc-{ID_8_BYTES}-{ATTR_4_BYTES}"
fn raw_document_key_attr(id: DocumentId, attr: SchemaAttr) -> [u8; DOC_KEY_ATTR_LEN] {
let mut key = [0; DOC_KEY_ATTR_LEN];
let raw_key = raw_document_key(id);
let mut wtr = Cursor::new(&mut key[..]);
wtr.write_all(&raw_key).unwrap();
wtr.write_all(b"-").unwrap();
wtr.write_u32::<NetworkEndian>(attr.as_u32()).unwrap();
key
}

View File

@@ -4,7 +4,8 @@ use std::error::Error;
use ::rocksdb::rocksdb_options; use ::rocksdb::rocksdb_options;
use crate::index::update::negative::unordered_builder::UnorderedNegativeBlobBuilder; use crate::index::update::negative::unordered_builder::UnorderedNegativeBlobBuilder;
use crate::index::update::{Update, raw_document_key}; use crate::index::update::Update;
use crate::database::{DocumentKey, DocumentKeyAttr};
use crate::blob::{Blob, NegativeBlob}; use crate::blob::{Blob, NegativeBlob};
use crate::index::DATA_INDEX; use crate::index::DATA_INDEX;
use crate::DocumentId; use crate::DocumentId;
@@ -48,9 +49,9 @@ impl NegativeUpdateBuilder {
}; };
for &document_id in negative_blob.as_ref() { for &document_id in negative_blob.as_ref() {
let start = raw_document_key(document_id); let start = DocumentKey::new(document_id);
let end = raw_document_key(document_id + 1); let end = DocumentKey::new(document_id + 1);
file_writer.delete_range(&start, &end)?; file_writer.delete_range(start.as_ref(), end.as_ref())?;
} }
file_writer.finish()?; file_writer.finish()?;

View File

@@ -1,12 +1,15 @@
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::path::PathBuf; use std::path::PathBuf;
use std::error::Error; use std::error::Error;
use std::fmt;
use ::rocksdb::rocksdb_options; use ::rocksdb::rocksdb_options;
use serde::ser::{self, Serialize};
use crate::index::update::positive::unordered_builder::UnorderedPositiveBlobBuilder; use crate::index::update::positive::unordered_builder::UnorderedPositiveBlobBuilder;
use crate::index::schema::{SchemaProps, Schema, SchemaAttr}; use crate::index::schema::{SchemaProps, Schema, SchemaAttr};
use crate::index::update::{Update, raw_document_key_attr}; use crate::index::update::Update;
use crate::database::{DocumentKey, DocumentKeyAttr};
use crate::blob::positive::PositiveBlob; use crate::blob::positive::PositiveBlob;
use crate::tokenizer::TokenizerBuilder; use crate::tokenizer::TokenizerBuilder;
use crate::{DocumentId, DocIndex}; use crate::{DocumentId, DocIndex};
@@ -14,10 +17,7 @@ use crate::index::DATA_INDEX;
use crate::blob::Blob; use crate::blob::Blob;
pub enum NewState { pub enum NewState {
Updated { Updated { value: String },
value: String,
props: SchemaProps,
},
Removed, Removed,
} }
@@ -38,10 +38,19 @@ impl<B> PositiveUpdateBuilder<B> {
} }
} }
pub fn update<T: Serialize>(&mut self, id: DocumentId, document: &T) -> Result<(), Box<Error>> {
let serializer = Serializer {
schema: &self.schema,
document_id: id,
new_states: &mut self.new_states
};
Ok(ser::Serialize::serialize(document, serializer)?)
}
// TODO value must be a field that can be indexed // TODO value must be a field that can be indexed
pub fn update_field(&mut self, id: DocumentId, field: SchemaAttr, value: String) { pub fn update_field(&mut self, id: DocumentId, field: SchemaAttr, value: String) {
let state = NewState::Updated { value, props: self.schema.props(field) }; self.new_states.insert((id, field), NewState::Updated { value });
self.new_states.insert((id, field), state);
} }
pub fn remove_field(&mut self, id: DocumentId, field: SchemaAttr) { pub fn remove_field(&mut self, id: DocumentId, field: SchemaAttr) {
@@ -49,6 +58,298 @@ impl<B> PositiveUpdateBuilder<B> {
} }
} }
#[derive(Debug)]
pub enum SerializerError {
SchemaDontMatch { attribute: String },
UnserializableType { name: &'static str },
Custom(String),
}
impl ser::Error for SerializerError {
fn custom<T: fmt::Display>(msg: T) -> Self {
SerializerError::Custom(msg.to_string())
}
}
impl fmt::Display for SerializerError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
SerializerError::SchemaDontMatch { attribute } => {
write!(f, "serialized document try to specify the \
{:?} attribute that is not known by the schema", attribute)
},
SerializerError::UnserializableType { name } => {
write!(f, "Only struct and map types are considered valid documents and
can be serialized, not {} types directly.", name)
},
SerializerError::Custom(s) => f.write_str(&s),
}
}
}
impl Error for SerializerError {}
struct Serializer<'a> {
schema: &'a Schema,
document_id: DocumentId,
new_states: &'a mut BTreeMap<(DocumentId, SchemaAttr), NewState>,
}
macro_rules! forward_to_unserializable_type {
($($ty:ident => $se_method:ident,)*) => {
$(
fn $se_method(self, v: $ty) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { name: "$ty" })
}
)*
}
}
impl<'a> ser::Serializer for Serializer<'a> {
type Ok = ();
type Error = SerializerError;
type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
type SerializeMap = MapSerializer<'a>;
type SerializeStruct = StructSerializer<'a>;
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
forward_to_unserializable_type! {
bool => serialize_bool,
char => serialize_char,
i8 => serialize_i8,
i16 => serialize_i16,
i32 => serialize_i32,
i64 => serialize_i64,
u8 => serialize_u8,
u16 => serialize_u16,
u32 => serialize_u32,
u64 => serialize_u64,
f32 => serialize_f32,
f64 => serialize_f64,
}
fn serialize_str(self, v: &str) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { name: "str" })
}
fn serialize_bytes(self, v: &[u8]) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { name: "&[u8]" })
}
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { name: "Option" })
}
fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
where T: Serialize,
{
Err(SerializerError::UnserializableType { name: "Option" })
}
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { name: "()" })
}
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { name: "unit struct" })
}
fn serialize_unit_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str
) -> Result<Self::Ok, Self::Error>
{
Err(SerializerError::UnserializableType { name: "unit variant" })
}
fn serialize_newtype_struct<T: ?Sized>(
self,
_name: &'static str,
value: &T
) -> Result<Self::Ok, Self::Error>
where T: Serialize,
{
value.serialize(self)
}
fn serialize_newtype_variant<T: ?Sized>(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_value: &T
) -> Result<Self::Ok, Self::Error>
where T: Serialize,
{
Err(SerializerError::UnserializableType { name: "newtype variant" })
}
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
Err(SerializerError::UnserializableType { name: "sequence" })
}
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
Err(SerializerError::UnserializableType { name: "tuple" })
}
fn serialize_tuple_struct(
self,
_name: &'static str,
_len: usize
) -> Result<Self::SerializeTupleStruct, Self::Error>
{
Err(SerializerError::UnserializableType { name: "tuple struct" })
}
fn serialize_tuple_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_len: usize
) -> Result<Self::SerializeTupleVariant, Self::Error>
{
Err(SerializerError::UnserializableType { name: "tuple variant" })
}
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
Ok(MapSerializer {
schema: self.schema,
document_id: self.document_id,
new_states: self.new_states,
})
}
fn serialize_struct(
self,
_name: &'static str,
_len: usize
) -> Result<Self::SerializeStruct, Self::Error>
{
Ok(StructSerializer {
schema: self.schema,
document_id: self.document_id,
new_states: self.new_states,
})
}
fn serialize_struct_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_len: usize
) -> Result<Self::SerializeStructVariant, Self::Error>
{
Err(SerializerError::UnserializableType { name: "struct variant" })
}
}
fn serialize_field<T: ?Sized>(
schema: &Schema,
document_id: DocumentId,
new_states: &mut BTreeMap<(DocumentId, SchemaAttr), NewState>,
name: &str,
value: &T
) -> Result<(), SerializerError>
where T: Serialize,
{
match schema.attribute(name) {
Some(attr) => {
if schema.props(attr).is_stored() {
let value = unimplemented!();
new_states.insert((document_id, attr), NewState::Updated { value });
}
Ok(())
},
None => Err(SerializerError::SchemaDontMatch { attribute: name.to_owned() }),
}
}
struct StructSerializer<'a> {
schema: &'a Schema,
document_id: DocumentId,
new_states: &'a mut BTreeMap<(DocumentId, SchemaAttr), NewState>,
}
impl<'a> ser::SerializeStruct for StructSerializer<'a> {
type Ok = ();
type Error = SerializerError;
fn serialize_field<T: ?Sized>(
&mut self,
key: &'static str,
value: &T
) -> Result<(), Self::Error>
where T: Serialize,
{
serialize_field(self.schema, self.document_id, self.new_states, key, value)
}
fn end(self) -> Result<Self::Ok, Self::Error> {
Ok(())
}
}
struct MapSerializer<'a> {
schema: &'a Schema,
document_id: DocumentId,
new_states: &'a mut BTreeMap<(DocumentId, SchemaAttr), NewState>,
// pending_key: Option<String>,
}
impl<'a> ser::SerializeMap for MapSerializer<'a> {
type Ok = ();
type Error = SerializerError;
fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
where T: Serialize
{
Err(SerializerError::UnserializableType { name: "setmap" })
}
fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
where T: Serialize
{
unimplemented!()
}
fn end(self) -> Result<Self::Ok, Self::Error> {
Ok(())
}
fn serialize_entry<K: ?Sized, V: ?Sized>(
&mut self,
key: &K,
value: &V
) -> Result<(), Self::Error>
where K: Serialize, V: Serialize,
{
let key = unimplemented!();
serialize_field(self.schema, self.document_id, self.new_states, key, value)
}
}
// struct MapKeySerializer;
// impl ser::Serializer for MapKeySerializer {
// type Ok = String;
// type Error = SerializerError;
// #[inline]
// fn serialize_str(self, value: &str) -> Result<()> {
// unimplemented!()
// }
// }
impl<B> PositiveUpdateBuilder<B> impl<B> PositiveUpdateBuilder<B>
where B: TokenizerBuilder where B: TokenizerBuilder
{ {
@@ -60,8 +361,9 @@ where B: TokenizerBuilder
let mut builder = UnorderedPositiveBlobBuilder::memory(); let mut builder = UnorderedPositiveBlobBuilder::memory();
for ((document_id, attr), state) in &self.new_states { for ((document_id, attr), state) in &self.new_states {
let props = self.schema.props(*attr);
let value = match state { let value = match state {
NewState::Updated { value, props } if props.is_indexed() => value, NewState::Updated { value } if props.is_indexed() => value,
_ => continue, _ => continue,
}; };
@@ -95,12 +397,13 @@ where B: TokenizerBuilder
// write all the documents fields updates // write all the documents fields updates
for ((id, attr), state) in self.new_states { for ((id, attr), state) in self.new_states {
let key = raw_document_key_attr(id, attr); let key = DocumentKeyAttr::new(id, attr);
let props = self.schema.props(attr);
match state { match state {
NewState::Updated { value, props } => if props.is_stored() { NewState::Updated { value } => if props.is_stored() {
file_writer.put(&key, value.as_bytes())? file_writer.put(key.as_ref(), value.as_bytes())?
}, },
NewState::Removed => file_writer.delete(&key)?, NewState::Removed => file_writer.delete(key.as_ref())?,
} }
} }

View File

@@ -1,6 +1,52 @@
use std::mem; use std::mem;
use self::Separator::*; use self::Separator::*;
struct MegaTokenizer<I> {
strings: I,
}
impl From<String> for MegaTokenizer<Option<String>> {
fn from(string: String) -> Self {
MegaTokenizer { strings: Some(string) }
}
}
impl From<Vec<String>> for MegaTokenizer<Vec<String>> {
fn from(strings: Vec<String>) -> Self {
MegaTokenizer { strings }
}
}
impl<I> Iterator for MegaTokenizer<I> {
type Item = (usize, String);
fn next(&mut self) -> Option<Self::Item> {
unimplemented!()
}
}
#[test]
fn xxx() {
let s1 = "hello world!";
let mut s1 = MegaTokenizer::from(s1.to_owned());
assert_eq!(s1.next(), Some((0, "hello".into())));
assert_eq!(s1.next(), Some((1, "world".into())));
assert_eq!(s1.next(), None);
let v1 = vec!["Vin Diesel".to_owned(), "Quentin Tarantino".to_owned()];
let mut v1 = MegaTokenizer::from(v1);
assert_eq!(v1.next(), Some((0, "Vin".into())));
assert_eq!(v1.next(), Some((1, "Diesel".into())));
assert_eq!(v1.next(), Some((8, "Quentin".into())));
assert_eq!(v1.next(), Some((9, "Tarantino".into())));
assert_eq!(v1.next(), None);
}
pub trait TokenizerBuilder { pub trait TokenizerBuilder {
fn build<'a>(&self, text: &'a str) -> Box<Iterator<Item=(usize, &'a str)> + 'a>; fn build<'a>(&self, text: &'a str) -> Box<Iterator<Item=(usize, &'a str)> + 'a>;
} }