mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	feat: Implemented a basic deserialiazation
This commit is contained in:
		| @@ -1,13 +1,15 @@ | |||||||
| use std::error::Error; | use std::error::Error; | ||||||
| use std::marker; | use std::{fmt, marker}; | ||||||
|  |  | ||||||
| use rocksdb::rocksdb::{DB, Snapshot}; | use rocksdb::rocksdb::{DB, DBVector, Snapshot, SeekKey}; | ||||||
|  | use rocksdb::rocksdb_options::ReadOptions; | ||||||
| use serde::de::DeserializeOwned; | use serde::de::DeserializeOwned; | ||||||
|  |  | ||||||
| use crate::index::schema::Schema; |  | ||||||
| use crate::blob::positive::PositiveBlob; |  | ||||||
| use crate::database::deserializer::{Deserializer, DeserializerError}; | use crate::database::deserializer::{Deserializer, DeserializerError}; | ||||||
| use crate::database::{DATA_INDEX, DATA_SCHEMA}; | use crate::database::{DATA_INDEX, DATA_SCHEMA}; | ||||||
|  | use crate::blob::positive::PositiveBlob; | ||||||
|  | use crate::index::schema::Schema; | ||||||
|  | use crate::database::{DocumentKey, DocumentKeyAttr}; | ||||||
| use crate::DocumentId; | use crate::DocumentId; | ||||||
|  |  | ||||||
| // FIXME Do not panic! | // FIXME Do not panic! | ||||||
| @@ -40,6 +42,10 @@ impl<'a> DatabaseView<'a> { | |||||||
|         self.snapshot |         self.snapshot | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn get(&self, key: &[u8]) -> Result<Option<DBVector>, Box<Error>> { | ||||||
|  |         Ok(self.snapshot.get(key)?) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     // TODO create an enum error type |     // TODO create an enum error type | ||||||
|     pub fn retrieve_document<D>(&self, id: DocumentId) -> Result<D, Box<Error>> |     pub fn retrieve_document<D>(&self, id: DocumentId) -> Result<D, Box<Error>> | ||||||
|     where D: DeserializeOwned |     where D: DeserializeOwned | ||||||
| @@ -60,6 +66,36 @@ impl<'a> DatabaseView<'a> { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | impl<'a> fmt::Debug for DatabaseView<'a> { | ||||||
|  |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||||
|  |         let mut options = ReadOptions::new(); | ||||||
|  |         let lower = DocumentKey::new(0); | ||||||
|  |         options.set_iterate_lower_bound(lower.as_ref()); | ||||||
|  |  | ||||||
|  |         let mut iter = self.snapshot.iter_opt(options); | ||||||
|  |         iter.seek(SeekKey::Start); | ||||||
|  |         let iter = iter.map(|(key, _)| DocumentKeyAttr::from_bytes(&key)); | ||||||
|  |  | ||||||
|  |         if f.alternate() { | ||||||
|  |             writeln!(f, "DatabaseView(")?; | ||||||
|  |         } else { | ||||||
|  |             write!(f, "DatabaseView(")?; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         self.schema.fmt(f)?; | ||||||
|  |  | ||||||
|  |         if f.alternate() { | ||||||
|  |             writeln!(f, ",")?; | ||||||
|  |         } else { | ||||||
|  |             write!(f, ", ")?; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         f.debug_list().entries(iter).finish()?; | ||||||
|  |  | ||||||
|  |         write!(f, ")") | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| // TODO this is just an iter::Map !!! | // TODO this is just an iter::Map !!! | ||||||
| pub struct DocumentIter<'a, D, I> { | pub struct DocumentIter<'a, D, I> { | ||||||
|     database_view: &'a DatabaseView<'a>, |     database_view: &'a DatabaseView<'a>, | ||||||
|   | |||||||
| @@ -1,11 +1,11 @@ | |||||||
| use std::error::Error; | use std::error::Error; | ||||||
| use std::fmt; | use std::fmt; | ||||||
|  |  | ||||||
| use rocksdb::rocksdb::{DB, Snapshot}; | use rocksdb::rocksdb::{DB, Snapshot, SeekKey}; | ||||||
| use rocksdb::rocksdb_options::ReadOptions; | use rocksdb::rocksdb_options::ReadOptions; | ||||||
| use serde::de::value::MapDeserializer; |  | ||||||
| use serde::forward_to_deserialize_any; | use serde::forward_to_deserialize_any; | ||||||
| use serde::de::Visitor; | use serde::de::value::MapDeserializer; | ||||||
|  | use serde::de::{self, Visitor, IntoDeserializer}; | ||||||
|  |  | ||||||
| use crate::database::document_key::{DocumentKey, DocumentKeyAttr}; | use crate::database::document_key::{DocumentKey, DocumentKeyAttr}; | ||||||
| use crate::index::schema::Schema; | use crate::index::schema::Schema; | ||||||
| @@ -23,7 +23,7 @@ impl<'a> Deserializer<'a> { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'de, 'a, 'b> serde::de::Deserializer<'de> for &'b mut Deserializer<'a> { | impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> { | ||||||
|     type Error = DeserializerError; |     type Error = DeserializerError; | ||||||
|  |  | ||||||
|     fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error> |     fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error> | ||||||
| @@ -35,8 +35,7 @@ impl<'de, 'a, 'b> serde::de::Deserializer<'de> for &'b mut Deserializer<'a> { | |||||||
|     forward_to_deserialize_any! { |     forward_to_deserialize_any! { | ||||||
|         bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string unit seq |         bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string unit seq | ||||||
|         bytes byte_buf unit_struct tuple_struct |         bytes byte_buf unit_struct tuple_struct | ||||||
|         identifier tuple ignored_any option newtype_struct enum |         identifier tuple ignored_any option newtype_struct enum struct | ||||||
|         struct |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error> |     fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error> | ||||||
| @@ -48,14 +47,20 @@ impl<'de, 'a, 'b> serde::de::Deserializer<'de> for &'b mut Deserializer<'a> { | |||||||
|         options.set_iterate_lower_bound(lower.as_ref()); |         options.set_iterate_lower_bound(lower.as_ref()); | ||||||
|         options.set_iterate_upper_bound(upper.as_ref()); |         options.set_iterate_upper_bound(upper.as_ref()); | ||||||
|  |  | ||||||
|         let mut db_iter = self.snapshot.iter_opt(options); |         let mut iter = self.snapshot.iter_opt(options); | ||||||
|         let iter = db_iter.map(|(key, value)| { |         iter.seek(SeekKey::Start); | ||||||
|  |  | ||||||
|  |         if iter.kv().is_none() { | ||||||
|  |             // FIXME return an error | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         let iter = iter.map(|(key, value)| { | ||||||
|             // retrieve the schema attribute name |             // retrieve the schema attribute name | ||||||
|             // from the schema attribute number |             // from the schema attribute number | ||||||
|             let document_key_attr = DocumentKeyAttr::from_bytes(&key); |             let document_key_attr = DocumentKeyAttr::from_bytes(&key); | ||||||
|             let schema_attr = document_key_attr.attribute(); |             let schema_attr = document_key_attr.attribute(); | ||||||
|             let attribute_name = self.schema.attribute_name(schema_attr); |             let attribute_name = self.schema.attribute_name(schema_attr); | ||||||
|             (attribute_name, value) |             (attribute_name, Value(value)) | ||||||
|         }); |         }); | ||||||
|  |  | ||||||
|         let map_deserializer = MapDeserializer::new(iter); |         let map_deserializer = MapDeserializer::new(iter); | ||||||
| @@ -63,12 +68,101 @@ impl<'de, 'a, 'b> serde::de::Deserializer<'de> for &'b mut Deserializer<'a> { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | struct Value(Vec<u8>); | ||||||
|  |  | ||||||
|  | impl<'de> IntoDeserializer<'de, DeserializerError> for Value { | ||||||
|  |     type Deserializer = Self; | ||||||
|  |  | ||||||
|  |     fn into_deserializer(self) -> Self::Deserializer { | ||||||
|  |         self | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | macro_rules! forward_to_bincode_values { | ||||||
|  |     ($($ty:ident => $de_method:ident,)*) => { | ||||||
|  |         $( | ||||||
|  |             fn $de_method<V>(self, visitor: V) -> Result<V::Value, Self::Error> | ||||||
|  |                 where V: de::Visitor<'de> | ||||||
|  |             { | ||||||
|  |                 match bincode::deserialize::<$ty>(&self.0) { | ||||||
|  |                     Ok(val) => val.into_deserializer().$de_method(visitor), | ||||||
|  |                     Err(e) => Err(de::Error::custom(e)), | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         )* | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'de, 'a> de::Deserializer<'de> for Value { | ||||||
|  |     type Error = DeserializerError; | ||||||
|  |  | ||||||
|  |     fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error> | ||||||
|  |     where V: Visitor<'de> | ||||||
|  |     { | ||||||
|  |         self.0.into_deserializer().deserialize_any(visitor) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn deserialize_str<V>(self, visitor: V) -> Result<V::Value, Self::Error> | ||||||
|  |     where V: Visitor<'de> | ||||||
|  |     { | ||||||
|  |         self.deserialize_string(visitor) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, Self::Error> | ||||||
|  |     where V: Visitor<'de> | ||||||
|  |     { | ||||||
|  |         match bincode::deserialize::<String>(&self.0) { | ||||||
|  |             Ok(val) => val.into_deserializer().deserialize_string(visitor), | ||||||
|  |             Err(e) => Err(de::Error::custom(e)), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value, Self::Error> | ||||||
|  |     where V: Visitor<'de> | ||||||
|  |     { | ||||||
|  |         self.deserialize_byte_buf(visitor) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, Self::Error> | ||||||
|  |     where V: Visitor<'de> | ||||||
|  |     { | ||||||
|  |         match bincode::deserialize::<Vec<u8>>(&self.0) { | ||||||
|  |             Ok(val) => val.into_deserializer().deserialize_byte_buf(visitor), | ||||||
|  |             Err(e) => Err(de::Error::custom(e)), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     forward_to_bincode_values! { | ||||||
|  |         char => deserialize_char, | ||||||
|  |         bool => deserialize_bool, | ||||||
|  |  | ||||||
|  |         u8  => deserialize_u8, | ||||||
|  |         u16 => deserialize_u16, | ||||||
|  |         u32 => deserialize_u32, | ||||||
|  |         u64 => deserialize_u64, | ||||||
|  |  | ||||||
|  |         i8  => deserialize_i8, | ||||||
|  |         i16 => deserialize_i16, | ||||||
|  |         i32 => deserialize_i32, | ||||||
|  |         i64 => deserialize_i64, | ||||||
|  |  | ||||||
|  |         f32 => deserialize_f32, | ||||||
|  |         f64 => deserialize_f64, | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     forward_to_deserialize_any! { | ||||||
|  |         unit seq map | ||||||
|  |         unit_struct tuple_struct | ||||||
|  |         identifier tuple ignored_any option newtype_struct enum struct | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| #[derive(Debug)] | #[derive(Debug)] | ||||||
| pub enum DeserializerError { | pub enum DeserializerError { | ||||||
|     Custom(String), |     Custom(String), | ||||||
| } | } | ||||||
|  |  | ||||||
| impl serde::de::Error for DeserializerError { | impl de::Error for DeserializerError { | ||||||
|     fn custom<T: fmt::Display>(msg: T) -> Self { |     fn custom<T: fmt::Display>(msg: T) -> Self { | ||||||
|         DeserializerError::Custom(msg.to_string()) |         DeserializerError::Custom(msg.to_string()) | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -1,5 +1,6 @@ | |||||||
| use std::io::{Cursor, Read, Write}; | use std::io::{Cursor, Read, Write}; | ||||||
| use std::mem::size_of; | use std::mem::size_of; | ||||||
|  | use std::fmt; | ||||||
|  |  | ||||||
| use byteorder::{NativeEndian, WriteBytesExt, ReadBytesExt}; | use byteorder::{NativeEndian, WriteBytesExt, ReadBytesExt}; | ||||||
|  |  | ||||||
| @@ -48,6 +49,14 @@ impl AsRef<[u8]> for DocumentKey { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | impl fmt::Debug for DocumentKey { | ||||||
|  |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||||
|  |         f.debug_struct("DocumentKey") | ||||||
|  |             .field("document_id", &self.document_id()) | ||||||
|  |             .finish() | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| #[derive(Copy, Clone)] | #[derive(Copy, Clone)] | ||||||
| pub struct DocumentKeyAttr([u8; DOC_KEY_ATTR_LEN]); | pub struct DocumentKeyAttr([u8; DOC_KEY_ATTR_LEN]); | ||||||
|  |  | ||||||
| @@ -94,3 +103,12 @@ impl AsRef<[u8]> for DocumentKeyAttr { | |||||||
|         &self.0 |         &self.0 | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | impl fmt::Debug for DocumentKeyAttr { | ||||||
|  |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||||
|  |         f.debug_struct("DocumentKeyAttr") | ||||||
|  |             .field("document_id", &self.document_id()) | ||||||
|  |             .field("attribute", &self.attribute().as_u32()) | ||||||
|  |             .finish() | ||||||
|  |     } | ||||||
|  | } | ||||||
|   | |||||||
| @@ -1,11 +1,13 @@ | |||||||
| use std::error::Error; | use std::error::Error; | ||||||
| use std::path::Path; | use std::path::Path; | ||||||
|  | use std::fmt; | ||||||
|  |  | ||||||
| use rocksdb::rocksdb_options::{DBOptions, IngestExternalFileOptions, ColumnFamilyOptions}; | use rocksdb::rocksdb_options::{DBOptions, IngestExternalFileOptions, ColumnFamilyOptions}; | ||||||
| use rocksdb::{DB, MergeOperands}; | use rocksdb::{DB, DBVector, MergeOperands, SeekKey}; | ||||||
| use rocksdb::rocksdb::Writable; | use rocksdb::rocksdb::Writable; | ||||||
|  |  | ||||||
| pub use crate::database::database_view::DatabaseView; | pub use crate::database::database_view::DatabaseView; | ||||||
|  | pub use crate::database::document_key::{DocumentKey, DocumentKeyAttr}; | ||||||
| use crate::index::update::Update; | use crate::index::update::Update; | ||||||
| use crate::index::schema::Schema; | use crate::index::schema::Schema; | ||||||
| use crate::blob::{self, Blob}; | use crate::blob::{self, Blob}; | ||||||
| @@ -30,6 +32,7 @@ impl Database { | |||||||
|         let path = path.to_string_lossy(); |         let path = path.to_string_lossy(); | ||||||
|         let mut opts = DBOptions::new(); |         let mut opts = DBOptions::new(); | ||||||
|         opts.create_if_missing(true); |         opts.create_if_missing(true); | ||||||
|  |         // opts.error_if_exists(true); // FIXME pull request that | ||||||
|  |  | ||||||
|         let mut cf_opts = ColumnFamilyOptions::new(); |         let mut cf_opts = ColumnFamilyOptions::new(); | ||||||
|         cf_opts.add_merge_operator("data-index merge operator", merge_indexes); |         cf_opts.add_merge_operator("data-index merge operator", merge_indexes); | ||||||
| @@ -80,14 +83,40 @@ impl Database { | |||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn get(&self, key: &[u8]) -> Result<Option<DBVector>, Box<Error>> { | ||||||
|  |         Ok(self.0.get(key)?) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn flush(&self) -> Result<(), Box<Error>> { | ||||||
|  |         Ok(self.0.flush(true)?) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn view(&self) -> Result<DatabaseView, Box<Error>> { |     pub fn view(&self) -> Result<DatabaseView, Box<Error>> { | ||||||
|         let snapshot = self.0.snapshot(); |         let snapshot = self.0.snapshot(); | ||||||
|         DatabaseView::new(snapshot) |         DatabaseView::new(snapshot) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | impl fmt::Debug for Database { | ||||||
|  |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||||
|  |         write!(f, "Database([")?; | ||||||
|  |         let mut iter = self.0.iter(); | ||||||
|  |         iter.seek(SeekKey::Start); | ||||||
|  |         let mut first = true; | ||||||
|  |         for (key, value) in &mut iter { | ||||||
|  |             if !first { write!(f, ", ")?; } | ||||||
|  |             first = false; | ||||||
|  |             let key = String::from_utf8_lossy(&key); | ||||||
|  |             write!(f, "{:?}", key)?; | ||||||
|  |         } | ||||||
|  |         write!(f, "])") | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| fn merge_indexes(key: &[u8], existing_value: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> { | fn merge_indexes(key: &[u8], existing_value: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> { | ||||||
|     if key != DATA_INDEX { panic!("The merge operator only supports \"data-index\" merging") } |     if key != DATA_INDEX { | ||||||
|  |         panic!("The merge operator only supports \"data-index\" merging") | ||||||
|  |     } | ||||||
|  |  | ||||||
|     let capacity = { |     let capacity = { | ||||||
|         let remaining = operands.size_hint().0; |         let remaining = operands.size_hint().0; | ||||||
| @@ -109,3 +138,90 @@ fn merge_indexes(key: &[u8], existing_value: Option<&[u8]>, operands: &mut Merge | |||||||
|     let blob = op.merge().expect("BUG: could not merge blobs"); |     let blob = op.merge().expect("BUG: could not merge blobs"); | ||||||
|     bincode::serialize(&blob).expect("BUG: could not serialize merged blob") |     bincode::serialize(&blob).expect("BUG: could not serialize merged blob") | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[cfg(test)] | ||||||
|  | mod tests { | ||||||
|  |     use super::*; | ||||||
|  |     use std::error::Error; | ||||||
|  |     use std::path::PathBuf; | ||||||
|  |  | ||||||
|  |     use serde_derive::{Serialize, Deserialize}; | ||||||
|  |     use tempfile::tempdir; | ||||||
|  |  | ||||||
|  |     use crate::tokenizer::DefaultBuilder; | ||||||
|  |     use crate::index::update::PositiveUpdateBuilder; | ||||||
|  |     use crate::index::schema::{Schema, SchemaBuilder, STORED, INDEXED}; | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn ingest_update_file() -> Result<(), Box<Error>> { | ||||||
|  |         let dir = tempdir()?; | ||||||
|  |  | ||||||
|  |         let rocksdb_path = dir.path().join("rocksdb.rdb"); | ||||||
|  |  | ||||||
|  |         #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] | ||||||
|  |         struct SimpleDoc { | ||||||
|  |             title: String, | ||||||
|  |             description: String, | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         let title; | ||||||
|  |         let description; | ||||||
|  |         let schema = { | ||||||
|  |             let mut builder = SchemaBuilder::new(); | ||||||
|  |             title = builder.new_attribute("title", STORED | INDEXED); | ||||||
|  |             description = builder.new_attribute("description", STORED | INDEXED); | ||||||
|  |             builder.build() | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         let database = Database::create(&rocksdb_path, schema.clone())?; | ||||||
|  |         let tokenizer_builder = DefaultBuilder::new(); | ||||||
|  |  | ||||||
|  |         let update_path = dir.path().join("update.sst"); | ||||||
|  |  | ||||||
|  |         let doc0 = SimpleDoc { | ||||||
|  |             title: String::from("I am a title"), | ||||||
|  |             description: String::from("I am a description"), | ||||||
|  |         }; | ||||||
|  |         let doc1 = SimpleDoc { | ||||||
|  |             title: String::from("I am the second title"), | ||||||
|  |             description: String::from("I am the second description"), | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         let mut update = { | ||||||
|  |             let mut builder = PositiveUpdateBuilder::new(update_path, schema, tokenizer_builder); | ||||||
|  |  | ||||||
|  |             // builder.update_field(0, title, doc0.title.clone()); | ||||||
|  |             // builder.update_field(0, description, doc0.description.clone()); | ||||||
|  |  | ||||||
|  |             // builder.update_field(1, title, doc1.title.clone()); | ||||||
|  |             // builder.update_field(1, description, doc1.description.clone()); | ||||||
|  |  | ||||||
|  |             builder.update(0, &doc0).unwrap(); | ||||||
|  |             builder.update(1, &doc1).unwrap(); | ||||||
|  |  | ||||||
|  |             builder.build()? | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         update.set_move(true); | ||||||
|  |         database.ingest_update_file(update)?; | ||||||
|  |         let view = database.view()?; | ||||||
|  |  | ||||||
|  |         println!("{:?}", view); | ||||||
|  |  | ||||||
|  |         #[derive(Deserialize, Debug, Clone, PartialEq, Eq)] | ||||||
|  |         struct DeSimpleDoc { | ||||||
|  |             title: char, | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         let de_doc0: DeSimpleDoc = view.retrieve_document(0)?; | ||||||
|  |         let de_doc1: DeSimpleDoc = view.retrieve_document(1)?; | ||||||
|  |  | ||||||
|  |         println!("{:?}", de_doc0); | ||||||
|  |         println!("{:?}", de_doc1); | ||||||
|  |  | ||||||
|  |         // assert_eq!(doc0, de_doc0); | ||||||
|  |         // assert_eq!(doc1, de_doc1); | ||||||
|  |  | ||||||
|  |         Ok(dir.close()?) | ||||||
|  |     } | ||||||
|  | } | ||||||
|   | |||||||
| @@ -111,7 +111,11 @@ impl Schema { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn attribute_name(&self, attr: SchemaAttr) -> &str { |     pub fn attribute_name(&self, attr: SchemaAttr) -> &str { | ||||||
|         unimplemented!("cannot retrieve the attribute name by its attribute number") |         // FIXME complexity is insane ! | ||||||
|  |         for (key, &value) in &self.attrs { | ||||||
|  |             if value == attr { return &key } | ||||||
|  |         } | ||||||
|  |         panic!("schema attribute name not found for {:?}", attr) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -13,9 +13,6 @@ mod positive; | |||||||
| pub use self::positive::{PositiveUpdateBuilder, NewState}; | pub use self::positive::{PositiveUpdateBuilder, NewState}; | ||||||
| pub use self::negative::NegativeUpdateBuilder; | pub use self::negative::NegativeUpdateBuilder; | ||||||
|  |  | ||||||
| const DOC_KEY_LEN:      usize = 4 + std::mem::size_of::<u64>(); |  | ||||||
| const DOC_KEY_ATTR_LEN: usize = DOC_KEY_LEN + 1 + std::mem::size_of::<u32>(); |  | ||||||
|  |  | ||||||
| pub struct Update { | pub struct Update { | ||||||
|     path: PathBuf, |     path: PathBuf, | ||||||
|     can_be_moved: bool, |     can_be_moved: bool, | ||||||
| @@ -30,6 +27,10 @@ impl Update { | |||||||
|         Ok(Update { path: path.into(), can_be_moved: true }) |         Ok(Update { path: path.into(), can_be_moved: true }) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn set_move(&mut self, can_be_moved: bool) { | ||||||
|  |         self.can_be_moved = can_be_moved | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn can_be_moved(&self) -> bool { |     pub fn can_be_moved(&self) -> bool { | ||||||
|         self.can_be_moved |         self.can_be_moved | ||||||
|     } |     } | ||||||
| @@ -38,27 +39,3 @@ impl Update { | |||||||
|         self.path |         self.path | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| // "doc-{ID_8_BYTES}" |  | ||||||
| fn raw_document_key(id: DocumentId) -> [u8; DOC_KEY_LEN] { |  | ||||||
|     let mut key = [0; DOC_KEY_LEN]; |  | ||||||
|  |  | ||||||
|     let mut wtr = Cursor::new(&mut key[..]); |  | ||||||
|     wtr.write_all(b"doc-").unwrap(); |  | ||||||
|     wtr.write_u64::<NetworkEndian>(id).unwrap(); |  | ||||||
|  |  | ||||||
|     key |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // "doc-{ID_8_BYTES}-{ATTR_4_BYTES}" |  | ||||||
| fn raw_document_key_attr(id: DocumentId, attr: SchemaAttr) -> [u8; DOC_KEY_ATTR_LEN] { |  | ||||||
|     let mut key = [0; DOC_KEY_ATTR_LEN]; |  | ||||||
|     let raw_key = raw_document_key(id); |  | ||||||
|  |  | ||||||
|     let mut wtr = Cursor::new(&mut key[..]); |  | ||||||
|     wtr.write_all(&raw_key).unwrap(); |  | ||||||
|     wtr.write_all(b"-").unwrap(); |  | ||||||
|     wtr.write_u32::<NetworkEndian>(attr.as_u32()).unwrap(); |  | ||||||
|  |  | ||||||
|     key |  | ||||||
| } |  | ||||||
|   | |||||||
| @@ -4,7 +4,8 @@ use std::error::Error; | |||||||
| use ::rocksdb::rocksdb_options; | use ::rocksdb::rocksdb_options; | ||||||
|  |  | ||||||
| use crate::index::update::negative::unordered_builder::UnorderedNegativeBlobBuilder; | use crate::index::update::negative::unordered_builder::UnorderedNegativeBlobBuilder; | ||||||
| use crate::index::update::{Update, raw_document_key}; | use crate::index::update::Update; | ||||||
|  | use crate::database::{DocumentKey, DocumentKeyAttr}; | ||||||
| use crate::blob::{Blob, NegativeBlob}; | use crate::blob::{Blob, NegativeBlob}; | ||||||
| use crate::index::DATA_INDEX; | use crate::index::DATA_INDEX; | ||||||
| use crate::DocumentId; | use crate::DocumentId; | ||||||
| @@ -48,9 +49,9 @@ impl NegativeUpdateBuilder { | |||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         for &document_id in negative_blob.as_ref() { |         for &document_id in negative_blob.as_ref() { | ||||||
|             let start = raw_document_key(document_id); |             let start = DocumentKey::new(document_id); | ||||||
|             let end = raw_document_key(document_id + 1); |             let end = DocumentKey::new(document_id + 1); | ||||||
|             file_writer.delete_range(&start, &end)?; |             file_writer.delete_range(start.as_ref(), end.as_ref())?; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         file_writer.finish()?; |         file_writer.finish()?; | ||||||
|   | |||||||
| @@ -1,12 +1,15 @@ | |||||||
| use std::collections::BTreeMap; | use std::collections::BTreeMap; | ||||||
| use std::path::PathBuf; | use std::path::PathBuf; | ||||||
| use std::error::Error; | use std::error::Error; | ||||||
|  | use std::fmt; | ||||||
|  |  | ||||||
| use ::rocksdb::rocksdb_options; | use ::rocksdb::rocksdb_options; | ||||||
|  | use serde::ser::{self, Serialize}; | ||||||
|  |  | ||||||
| use crate::index::update::positive::unordered_builder::UnorderedPositiveBlobBuilder; | use crate::index::update::positive::unordered_builder::UnorderedPositiveBlobBuilder; | ||||||
| use crate::index::schema::{SchemaProps, Schema, SchemaAttr}; | use crate::index::schema::{SchemaProps, Schema, SchemaAttr}; | ||||||
| use crate::index::update::{Update, raw_document_key_attr}; | use crate::index::update::Update; | ||||||
|  | use crate::database::{DocumentKey, DocumentKeyAttr}; | ||||||
| use crate::blob::positive::PositiveBlob; | use crate::blob::positive::PositiveBlob; | ||||||
| use crate::tokenizer::TokenizerBuilder; | use crate::tokenizer::TokenizerBuilder; | ||||||
| use crate::{DocumentId, DocIndex}; | use crate::{DocumentId, DocIndex}; | ||||||
| @@ -14,10 +17,7 @@ use crate::index::DATA_INDEX; | |||||||
| use crate::blob::Blob; | use crate::blob::Blob; | ||||||
|  |  | ||||||
| pub enum NewState { | pub enum NewState { | ||||||
|     Updated { |     Updated { value: String }, | ||||||
|         value: String, |  | ||||||
|         props: SchemaProps, |  | ||||||
|     }, |  | ||||||
|     Removed, |     Removed, | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -38,10 +38,19 @@ impl<B> PositiveUpdateBuilder<B> { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn update<T: Serialize>(&mut self, id: DocumentId, document: &T) -> Result<(), Box<Error>> { | ||||||
|  |         let serializer = Serializer { | ||||||
|  |             schema: &self.schema, | ||||||
|  |             document_id: id, | ||||||
|  |             new_states: &mut self.new_states | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         Ok(ser::Serialize::serialize(document, serializer)?) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     // TODO value must be a field that can be indexed |     // TODO value must be a field that can be indexed | ||||||
|     pub fn update_field(&mut self, id: DocumentId, field: SchemaAttr, value: String) { |     pub fn update_field(&mut self, id: DocumentId, field: SchemaAttr, value: String) { | ||||||
|         let state = NewState::Updated { value, props: self.schema.props(field) }; |         self.new_states.insert((id, field), NewState::Updated { value }); | ||||||
|         self.new_states.insert((id, field), state); |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn remove_field(&mut self, id: DocumentId, field: SchemaAttr) { |     pub fn remove_field(&mut self, id: DocumentId, field: SchemaAttr) { | ||||||
| @@ -49,6 +58,298 @@ impl<B> PositiveUpdateBuilder<B> { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[derive(Debug)] | ||||||
|  | pub enum SerializerError { | ||||||
|  |     SchemaDontMatch { attribute: String }, | ||||||
|  |     UnserializableType { name: &'static str }, | ||||||
|  |     Custom(String), | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl ser::Error for SerializerError { | ||||||
|  |     fn custom<T: fmt::Display>(msg: T) -> Self { | ||||||
|  |         SerializerError::Custom(msg.to_string()) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl fmt::Display for SerializerError { | ||||||
|  |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||||
|  |         match self { | ||||||
|  |             SerializerError::SchemaDontMatch { attribute } => { | ||||||
|  |                 write!(f, "serialized document try to specify the \ | ||||||
|  |                            {:?} attribute that is not known by the schema", attribute) | ||||||
|  |             }, | ||||||
|  |             SerializerError::UnserializableType { name } => { | ||||||
|  |                 write!(f, "Only struct and map types are considered valid documents and | ||||||
|  |                            can be serialized, not {} types directly.", name) | ||||||
|  |             }, | ||||||
|  |             SerializerError::Custom(s) => f.write_str(&s), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Error for SerializerError {} | ||||||
|  |  | ||||||
|  | struct Serializer<'a> { | ||||||
|  |     schema: &'a Schema, | ||||||
|  |     document_id: DocumentId, | ||||||
|  |     new_states: &'a mut BTreeMap<(DocumentId, SchemaAttr), NewState>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | macro_rules! forward_to_unserializable_type { | ||||||
|  |     ($($ty:ident => $se_method:ident,)*) => { | ||||||
|  |         $( | ||||||
|  |             fn $se_method(self, v: $ty) -> Result<Self::Ok, Self::Error> { | ||||||
|  |                 Err(SerializerError::UnserializableType { name: "$ty" }) | ||||||
|  |             } | ||||||
|  |         )* | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'a> ser::Serializer for Serializer<'a> { | ||||||
|  |     type Ok = (); | ||||||
|  |     type Error = SerializerError; | ||||||
|  |     type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>; | ||||||
|  |     type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>; | ||||||
|  |     type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>; | ||||||
|  |     type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>; | ||||||
|  |     type SerializeMap = MapSerializer<'a>; | ||||||
|  |     type SerializeStruct = StructSerializer<'a>; | ||||||
|  |     type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>; | ||||||
|  |  | ||||||
|  |     forward_to_unserializable_type! { | ||||||
|  |         bool => serialize_bool, | ||||||
|  |         char => serialize_char, | ||||||
|  |  | ||||||
|  |         i8  => serialize_i8, | ||||||
|  |         i16 => serialize_i16, | ||||||
|  |         i32 => serialize_i32, | ||||||
|  |         i64 => serialize_i64, | ||||||
|  |  | ||||||
|  |         u8  => serialize_u8, | ||||||
|  |         u16 => serialize_u16, | ||||||
|  |         u32 => serialize_u32, | ||||||
|  |         u64 => serialize_u64, | ||||||
|  |  | ||||||
|  |         f32 => serialize_f32, | ||||||
|  |         f64 => serialize_f64, | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn serialize_str(self, v: &str) -> Result<Self::Ok, Self::Error> { | ||||||
|  |         Err(SerializerError::UnserializableType { name: "str" }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn serialize_bytes(self, v: &[u8]) -> Result<Self::Ok, Self::Error> { | ||||||
|  |         Err(SerializerError::UnserializableType { name: "&[u8]" }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn serialize_none(self) -> Result<Self::Ok, Self::Error> { | ||||||
|  |         Err(SerializerError::UnserializableType { name: "Option" }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error> | ||||||
|  |     where T: Serialize, | ||||||
|  |     { | ||||||
|  |         Err(SerializerError::UnserializableType { name: "Option" }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn serialize_unit(self) -> Result<Self::Ok, Self::Error> { | ||||||
|  |         Err(SerializerError::UnserializableType { name: "()" }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> { | ||||||
|  |         Err(SerializerError::UnserializableType { name: "unit struct" }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn serialize_unit_variant( | ||||||
|  |         self, | ||||||
|  |         _name: &'static str, | ||||||
|  |         _variant_index: u32, | ||||||
|  |         _variant: &'static str | ||||||
|  |     ) -> Result<Self::Ok, Self::Error> | ||||||
|  |     { | ||||||
|  |         Err(SerializerError::UnserializableType { name: "unit variant" }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn serialize_newtype_struct<T: ?Sized>( | ||||||
|  |         self, | ||||||
|  |         _name: &'static str, | ||||||
|  |         value: &T | ||||||
|  |     ) -> Result<Self::Ok, Self::Error> | ||||||
|  |     where T: Serialize, | ||||||
|  |     { | ||||||
|  |         value.serialize(self) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn serialize_newtype_variant<T: ?Sized>( | ||||||
|  |         self, | ||||||
|  |         _name: &'static str, | ||||||
|  |         _variant_index: u32, | ||||||
|  |         _variant: &'static str, | ||||||
|  |         _value: &T | ||||||
|  |     ) -> Result<Self::Ok, Self::Error> | ||||||
|  |     where T: Serialize, | ||||||
|  |     { | ||||||
|  |         Err(SerializerError::UnserializableType { name: "newtype variant" }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> { | ||||||
|  |         Err(SerializerError::UnserializableType { name: "sequence" }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> { | ||||||
|  |         Err(SerializerError::UnserializableType { name: "tuple" }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn serialize_tuple_struct( | ||||||
|  |         self, | ||||||
|  |         _name: &'static str, | ||||||
|  |         _len: usize | ||||||
|  |     ) -> Result<Self::SerializeTupleStruct, Self::Error> | ||||||
|  |     { | ||||||
|  |         Err(SerializerError::UnserializableType { name: "tuple struct" }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn serialize_tuple_variant( | ||||||
|  |         self, | ||||||
|  |         _name: &'static str, | ||||||
|  |         _variant_index: u32, | ||||||
|  |         _variant: &'static str, | ||||||
|  |         _len: usize | ||||||
|  |     ) -> Result<Self::SerializeTupleVariant, Self::Error> | ||||||
|  |     { | ||||||
|  |         Err(SerializerError::UnserializableType { name: "tuple variant" }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> { | ||||||
|  |         Ok(MapSerializer { | ||||||
|  |             schema: self.schema, | ||||||
|  |             document_id: self.document_id, | ||||||
|  |             new_states: self.new_states, | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn serialize_struct( | ||||||
|  |         self, | ||||||
|  |         _name: &'static str, | ||||||
|  |         _len: usize | ||||||
|  |     ) -> Result<Self::SerializeStruct, Self::Error> | ||||||
|  |     { | ||||||
|  |         Ok(StructSerializer { | ||||||
|  |             schema: self.schema, | ||||||
|  |             document_id: self.document_id, | ||||||
|  |             new_states: self.new_states, | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn serialize_struct_variant( | ||||||
|  |         self, | ||||||
|  |         _name: &'static str, | ||||||
|  |         _variant_index: u32, | ||||||
|  |         _variant: &'static str, | ||||||
|  |         _len: usize | ||||||
|  |     ) -> Result<Self::SerializeStructVariant, Self::Error> | ||||||
|  |     { | ||||||
|  |         Err(SerializerError::UnserializableType { name: "struct variant" }) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | fn serialize_field<T: ?Sized>( | ||||||
|  |     schema: &Schema, | ||||||
|  |     document_id: DocumentId, | ||||||
|  |     new_states: &mut BTreeMap<(DocumentId, SchemaAttr), NewState>, | ||||||
|  |     name: &str, | ||||||
|  |     value: &T | ||||||
|  | ) -> Result<(), SerializerError> | ||||||
|  | where T: Serialize, | ||||||
|  | { | ||||||
|  |     match schema.attribute(name) { | ||||||
|  |         Some(attr) => { | ||||||
|  |             if schema.props(attr).is_stored() { | ||||||
|  |                 let value = unimplemented!(); | ||||||
|  |                 new_states.insert((document_id, attr), NewState::Updated { value }); | ||||||
|  |             } | ||||||
|  |             Ok(()) | ||||||
|  |         }, | ||||||
|  |         None => Err(SerializerError::SchemaDontMatch { attribute: name.to_owned() }), | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | struct StructSerializer<'a> { | ||||||
|  |     schema: &'a Schema, | ||||||
|  |     document_id: DocumentId, | ||||||
|  |     new_states: &'a mut BTreeMap<(DocumentId, SchemaAttr), NewState>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'a> ser::SerializeStruct for StructSerializer<'a> { | ||||||
|  |     type Ok = (); | ||||||
|  |     type Error = SerializerError; | ||||||
|  |  | ||||||
|  |     fn serialize_field<T: ?Sized>( | ||||||
|  |         &mut self, | ||||||
|  |         key: &'static str, | ||||||
|  |         value: &T | ||||||
|  |     ) -> Result<(), Self::Error> | ||||||
|  |     where T: Serialize, | ||||||
|  |     { | ||||||
|  |         serialize_field(self.schema, self.document_id, self.new_states, key, value) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn end(self) -> Result<Self::Ok, Self::Error> { | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | struct MapSerializer<'a> { | ||||||
|  |     schema: &'a Schema, | ||||||
|  |     document_id: DocumentId, | ||||||
|  |     new_states: &'a mut BTreeMap<(DocumentId, SchemaAttr), NewState>, | ||||||
|  |     // pending_key: Option<String>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'a> ser::SerializeMap for MapSerializer<'a> { | ||||||
|  |     type Ok = (); | ||||||
|  |     type Error = SerializerError; | ||||||
|  |  | ||||||
|  |     fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error> | ||||||
|  |     where T: Serialize | ||||||
|  |     { | ||||||
|  |         Err(SerializerError::UnserializableType { name: "setmap" }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error> | ||||||
|  |     where T: Serialize | ||||||
|  |     { | ||||||
|  |         unimplemented!() | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn end(self) -> Result<Self::Ok, Self::Error> { | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn serialize_entry<K: ?Sized, V: ?Sized>( | ||||||
|  |         &mut self, | ||||||
|  |         key: &K, | ||||||
|  |         value: &V | ||||||
|  |     ) -> Result<(), Self::Error> | ||||||
|  |     where K: Serialize, V: Serialize, | ||||||
|  |     { | ||||||
|  |         let key = unimplemented!(); | ||||||
|  |         serialize_field(self.schema, self.document_id, self.new_states, key, value) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // struct MapKeySerializer; | ||||||
|  |  | ||||||
|  | // impl ser::Serializer for MapKeySerializer { | ||||||
|  | //     type Ok = String; | ||||||
|  | //     type Error = SerializerError; | ||||||
|  |  | ||||||
|  | //     #[inline] | ||||||
|  | //     fn serialize_str(self, value: &str) -> Result<()> { | ||||||
|  | //         unimplemented!() | ||||||
|  | //     } | ||||||
|  | // } | ||||||
|  |  | ||||||
| impl<B> PositiveUpdateBuilder<B> | impl<B> PositiveUpdateBuilder<B> | ||||||
| where B: TokenizerBuilder | where B: TokenizerBuilder | ||||||
| { | { | ||||||
| @@ -60,8 +361,9 @@ where B: TokenizerBuilder | |||||||
|  |  | ||||||
|         let mut builder = UnorderedPositiveBlobBuilder::memory(); |         let mut builder = UnorderedPositiveBlobBuilder::memory(); | ||||||
|         for ((document_id, attr), state) in &self.new_states { |         for ((document_id, attr), state) in &self.new_states { | ||||||
|  |             let props = self.schema.props(*attr); | ||||||
|             let value = match state { |             let value = match state { | ||||||
|                 NewState::Updated { value, props } if props.is_indexed() => value, |                 NewState::Updated { value } if props.is_indexed() => value, | ||||||
|                 _ => continue, |                 _ => continue, | ||||||
|             }; |             }; | ||||||
|  |  | ||||||
| @@ -95,12 +397,13 @@ where B: TokenizerBuilder | |||||||
|  |  | ||||||
|         // write all the documents fields updates |         // write all the documents fields updates | ||||||
|         for ((id, attr), state) in self.new_states { |         for ((id, attr), state) in self.new_states { | ||||||
|             let key = raw_document_key_attr(id, attr); |             let key = DocumentKeyAttr::new(id, attr); | ||||||
|  |             let props = self.schema.props(attr); | ||||||
|             match state { |             match state { | ||||||
|                 NewState::Updated { value, props } => if props.is_stored() { |                 NewState::Updated { value } => if props.is_stored() { | ||||||
|                     file_writer.put(&key, value.as_bytes())? |                     file_writer.put(key.as_ref(), value.as_bytes())? | ||||||
|                 }, |                 }, | ||||||
|                 NewState::Removed => file_writer.delete(&key)?, |                 NewState::Removed => file_writer.delete(key.as_ref())?, | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,6 +1,52 @@ | |||||||
| use std::mem; | use std::mem; | ||||||
| use self::Separator::*; | use self::Separator::*; | ||||||
|  |  | ||||||
|  | struct MegaTokenizer<I> { | ||||||
|  |     strings: I, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl From<String> for MegaTokenizer<Option<String>> { | ||||||
|  |     fn from(string: String) -> Self { | ||||||
|  |         MegaTokenizer { strings: Some(string) } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl From<Vec<String>> for MegaTokenizer<Vec<String>> { | ||||||
|  |     fn from(strings: Vec<String>) -> Self { | ||||||
|  |         MegaTokenizer { strings } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<I> Iterator for MegaTokenizer<I> { | ||||||
|  |     type Item = (usize, String); | ||||||
|  |  | ||||||
|  |     fn next(&mut self) -> Option<Self::Item> { | ||||||
|  |         unimplemented!() | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn xxx() { | ||||||
|  |     let s1 = "hello world!"; | ||||||
|  |     let mut s1 = MegaTokenizer::from(s1.to_owned()); | ||||||
|  |  | ||||||
|  |     assert_eq!(s1.next(), Some((0, "hello".into()))); | ||||||
|  |     assert_eq!(s1.next(), Some((1, "world".into()))); | ||||||
|  |  | ||||||
|  |     assert_eq!(s1.next(), None); | ||||||
|  |  | ||||||
|  |     let v1 = vec!["Vin Diesel".to_owned(), "Quentin Tarantino".to_owned()]; | ||||||
|  |     let mut v1 = MegaTokenizer::from(v1); | ||||||
|  |  | ||||||
|  |     assert_eq!(v1.next(), Some((0, "Vin".into()))); | ||||||
|  |     assert_eq!(v1.next(), Some((1, "Diesel".into()))); | ||||||
|  |  | ||||||
|  |     assert_eq!(v1.next(), Some((8, "Quentin".into()))); | ||||||
|  |     assert_eq!(v1.next(), Some((9, "Tarantino".into()))); | ||||||
|  |  | ||||||
|  |     assert_eq!(v1.next(), None); | ||||||
|  | } | ||||||
|  |  | ||||||
| pub trait TokenizerBuilder { | pub trait TokenizerBuilder { | ||||||
|     fn build<'a>(&self, text: &'a str) -> Box<Iterator<Item=(usize, &'a str)> + 'a>; |     fn build<'a>(&self, text: &'a str) -> Box<Iterator<Item=(usize, &'a str)> + 'a>; | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user