mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-30 23:46:28 +00:00 
			
		
		
		
	chore: Remove the database module from meilidb
This commit is contained in:
		| @@ -1,46 +0,0 @@ | ||||
| use std::collections::{HashSet, HashMap}; | ||||
| use serde_derive::{Serialize, Deserialize}; | ||||
|  | ||||
| #[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)] | ||||
| #[serde(rename_all = "lowercase")] | ||||
| pub enum RankingOrdering { | ||||
|     Asc, | ||||
|     Dsc | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] | ||||
| pub struct AccessToken { | ||||
|     pub read_key: String, | ||||
|     pub write_key: String, | ||||
|     pub admin_key: String, | ||||
| } | ||||
|  | ||||
|  | ||||
| #[derive(Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] | ||||
| pub struct Config { | ||||
|     pub stop_words: Option<HashSet<String>>, | ||||
|     pub ranking_order: Option<Vec<String>>, | ||||
|     pub distinct_field: Option<String>, | ||||
|     pub ranking_rules: Option<HashMap<String, RankingOrdering>>, | ||||
|     pub access_token: Option<AccessToken>, | ||||
| } | ||||
|  | ||||
| impl Config { | ||||
|     pub fn update_with(&mut self, new: Config) { | ||||
|         if let Some(stop_words) = new.stop_words { | ||||
|             self.stop_words = Some(stop_words); | ||||
|         }; | ||||
|         if let Some(ranking_order) = new.ranking_order { | ||||
|             self.ranking_order = Some(ranking_order); | ||||
|         }; | ||||
|         if let Some(distinct_field) = new.distinct_field { | ||||
|             self.distinct_field = Some(distinct_field); | ||||
|         }; | ||||
|         if let Some(ranking_rules) = new.ranking_rules { | ||||
|             self.ranking_rules = Some(ranking_rules); | ||||
|         }; | ||||
|         if let Some(access_token) = new.access_token { | ||||
|             self.access_token = Some(access_token); | ||||
|         }; | ||||
|     } | ||||
| } | ||||
| @@ -1,149 +0,0 @@ | ||||
| use std::io::{Cursor, Read, Write}; | ||||
| use std::mem::size_of; | ||||
| use std::fmt; | ||||
|  | ||||
| use byteorder::{BigEndian, WriteBytesExt, ReadBytesExt}; | ||||
|  | ||||
| use crate::database::schema::SchemaAttr; | ||||
| use meilidb_core::DocumentId; | ||||
|  | ||||
| const DOC_KEY_LEN:      usize = 4 + size_of::<u64>(); | ||||
| const DOC_KEY_ATTR_LEN: usize = DOC_KEY_LEN + 1 + size_of::<u16>(); | ||||
|  | ||||
| #[derive(Copy, Clone)] | ||||
| pub struct DocumentKey([u8; DOC_KEY_LEN]); | ||||
|  | ||||
| impl DocumentKey { | ||||
|     pub fn new(id: DocumentId) -> DocumentKey { | ||||
|         let mut buffer = [0; DOC_KEY_LEN]; | ||||
|  | ||||
|         let mut wtr = Cursor::new(&mut buffer[..]); | ||||
|         wtr.write_all(b"doc-").unwrap(); | ||||
|         wtr.write_u64::<BigEndian>(id.0).unwrap(); | ||||
|  | ||||
|         DocumentKey(buffer) | ||||
|     } | ||||
|  | ||||
|     pub fn from_bytes(mut bytes: &[u8]) -> DocumentKey { | ||||
|         assert!(bytes.len() >= DOC_KEY_LEN); | ||||
|         assert_eq!(&bytes[..4], b"doc-"); | ||||
|  | ||||
|         let mut buffer = [0; DOC_KEY_LEN]; | ||||
|         bytes.read_exact(&mut buffer).unwrap(); | ||||
|  | ||||
|         DocumentKey(buffer) | ||||
|     } | ||||
|  | ||||
|     pub fn with_attribute(&self, attr: SchemaAttr) -> DocumentKeyAttr { | ||||
|         DocumentKeyAttr::new(self.document_id(), attr) | ||||
|     } | ||||
|  | ||||
|     pub fn with_attribute_min(&self) -> DocumentKeyAttr { | ||||
|         DocumentKeyAttr::new(self.document_id(), SchemaAttr::min()) | ||||
|     } | ||||
|  | ||||
|     pub fn with_attribute_max(&self) -> DocumentKeyAttr { | ||||
|         DocumentKeyAttr::new(self.document_id(), SchemaAttr::max()) | ||||
|     } | ||||
|  | ||||
|     pub fn document_id(&self) -> DocumentId { | ||||
|         let id = (&self.0[4..]).read_u64::<BigEndian>().unwrap(); | ||||
|         DocumentId(id) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl AsRef<[u8]> for DocumentKey { | ||||
|     fn as_ref(&self) -> &[u8] { | ||||
|         &self.0 | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl fmt::Debug for DocumentKey { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
|         f.debug_struct("DocumentKey") | ||||
|             .field("document_id", &self.document_id()) | ||||
|             .finish() | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] | ||||
| pub struct DocumentKeyAttr([u8; DOC_KEY_ATTR_LEN]); | ||||
|  | ||||
| impl DocumentKeyAttr { | ||||
|     pub fn new(id: DocumentId, attr: SchemaAttr) -> DocumentKeyAttr { | ||||
|         let mut buffer = [0; DOC_KEY_ATTR_LEN]; | ||||
|         let DocumentKey(raw_key) = DocumentKey::new(id); | ||||
|  | ||||
|         let mut wtr = Cursor::new(&mut buffer[..]); | ||||
|         wtr.write_all(&raw_key).unwrap(); | ||||
|         wtr.write_all(b"-").unwrap(); | ||||
|         wtr.write_u16::<BigEndian>(attr.0).unwrap(); | ||||
|  | ||||
|         DocumentKeyAttr(buffer) | ||||
|     } | ||||
|  | ||||
|     pub fn with_attribute_min(id: DocumentId) -> DocumentKeyAttr { | ||||
|         DocumentKeyAttr::new(id, SchemaAttr::min()) | ||||
|     } | ||||
|  | ||||
|     pub fn with_attribute_max(id: DocumentId) -> DocumentKeyAttr { | ||||
|         DocumentKeyAttr::new(id, SchemaAttr::max()) | ||||
|     } | ||||
|  | ||||
|     pub fn from_bytes(mut bytes: &[u8]) -> DocumentKeyAttr { | ||||
|         assert!(bytes.len() >= DOC_KEY_ATTR_LEN); | ||||
|         assert_eq!(&bytes[..4], b"doc-"); | ||||
|  | ||||
|         let mut buffer = [0; DOC_KEY_ATTR_LEN]; | ||||
|         bytes.read_exact(&mut buffer).unwrap(); | ||||
|  | ||||
|         DocumentKeyAttr(buffer) | ||||
|     } | ||||
|  | ||||
|     pub fn document_id(&self) -> DocumentId { | ||||
|         let id = (&self.0[4..]).read_u64::<BigEndian>().unwrap(); | ||||
|         DocumentId(id) | ||||
|     } | ||||
|  | ||||
|     pub fn attribute(&self) -> SchemaAttr { | ||||
|         let offset = 4 + size_of::<u64>() + 1; | ||||
|         let value = (&self.0[offset..]).read_u16::<BigEndian>().unwrap(); | ||||
|         SchemaAttr::new(value) | ||||
|     } | ||||
|  | ||||
|     pub fn into_document_key(self) -> DocumentKey { | ||||
|         DocumentKey::new(self.document_id()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl AsRef<[u8]> for DocumentKeyAttr { | ||||
|     fn as_ref(&self) -> &[u8] { | ||||
|         &self.0 | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl fmt::Debug for DocumentKeyAttr { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
|         f.debug_struct("DocumentKeyAttr") | ||||
|             .field("document_id", &self.document_id()) | ||||
|             .field("attribute", &self.attribute().0) | ||||
|             .finish() | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use super::*; | ||||
|  | ||||
|     #[test] | ||||
|     fn keep_as_ref_order() { | ||||
|         for (a, b) in (0..).zip(1..).take(u16::max_value() as usize - 1) { | ||||
|             let id = DocumentId(0); | ||||
|             let a = DocumentKeyAttr::new(id, SchemaAttr(a)); | ||||
|             let b = DocumentKeyAttr::new(id, SchemaAttr(b)); | ||||
|  | ||||
|             assert!(a < b); | ||||
|             assert!(a.as_ref() < b.as_ref()); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -1,899 +0,0 @@ | ||||
| use std::time::Instant; | ||||
| use std::error::Error; | ||||
| use std::ffi::OsStr; | ||||
| use std::sync::Arc; | ||||
| use std::fs; | ||||
| use std::path::{Path, PathBuf}; | ||||
| use std::sync::atomic::{AtomicBool, Ordering}; | ||||
| use std::ops::{Deref, DerefMut}; | ||||
|  | ||||
| use rocksdb::rocksdb_options::{DBOptions, ColumnFamilyOptions}; | ||||
| use rocksdb::rocksdb::{Writable, Snapshot}; | ||||
| use rocksdb::{DB, MergeOperands}; | ||||
| use size_format::SizeFormatterBinary; | ||||
| use arc_swap::ArcSwap; | ||||
| use lockfree::map::Map; | ||||
| use hashbrown::HashMap; | ||||
| use log::{info, error, warn}; | ||||
|  | ||||
| use crate::database::schema::SchemaAttr; | ||||
| use meilidb_core::shared_data_cursor::FromSharedDataCursor; | ||||
| use meilidb_core::write_to_bytes::WriteToBytes; | ||||
| use meilidb_core::{Index, DocumentId}; | ||||
|  | ||||
| use self::update::{ReadIndexEvent, ReadRankedMapEvent}; | ||||
|  | ||||
| pub use self::config::Config; | ||||
| pub use self::document_key::{DocumentKey, DocumentKeyAttr}; | ||||
| pub use self::view::{DatabaseView, DocumentIter}; | ||||
| pub use self::update::Update; | ||||
| pub use self::serde::SerializerError; | ||||
| pub use self::schema::Schema; | ||||
| pub use self::number::{Number, ParseNumberError}; | ||||
|  | ||||
| pub type RankedMap = HashMap<(DocumentId, SchemaAttr), Number>; | ||||
|  | ||||
| const DATA_INDEX:      &[u8] = b"data-index"; | ||||
| const DATA_RANKED_MAP: &[u8] = b"data-ranked-map"; | ||||
| const DATA_SCHEMA:     &[u8] = b"data-schema"; | ||||
| const CONFIG:          &[u8] = b"config"; | ||||
|  | ||||
| pub mod config; | ||||
| pub mod schema; | ||||
| mod number; | ||||
| mod document_key; | ||||
| mod serde; | ||||
| mod update; | ||||
| mod view; | ||||
|  | ||||
| fn retrieve_data_schema<D>(snapshot: &Snapshot<D>) -> Result<Schema, Box<Error>> | ||||
| where D: Deref<Target=DB> | ||||
| { | ||||
|     match snapshot.get(DATA_SCHEMA)? { | ||||
|         Some(vector) => Ok(Schema::read_from_bin(&*vector)?), | ||||
|         None => Err(String::from("BUG: no schema found in the database").into()), | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn retrieve_data_index<D>(snapshot: &Snapshot<D>) -> Result<Index, Box<Error>> | ||||
| where D: Deref<Target=DB> | ||||
| { | ||||
|     let start = Instant::now(); | ||||
|     let vector = snapshot.get(DATA_INDEX)?; | ||||
|     info!("loading index from kv-store took {:.2?}", start.elapsed()); | ||||
|  | ||||
|     match vector { | ||||
|         Some(vector) => { | ||||
|             let start = Instant::now(); | ||||
|  | ||||
|             let bytes = vector.as_ref().to_vec(); | ||||
|             info!("index size is {}B", SizeFormatterBinary::new(bytes.len() as u64)); | ||||
|  | ||||
|             let event = ReadIndexEvent::from_bytes(bytes)?; | ||||
|             let index = event.updated_documents().expect("BUG: invalid event deserialized"); | ||||
|  | ||||
|             info!("loading index from bytes took {:.2?}", start.elapsed()); | ||||
|  | ||||
|             Ok(index) | ||||
|         }, | ||||
|         None => Ok(Index::default()), | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn retrieve_data_ranked_map<D>(snapshot: &Snapshot<D>) -> Result<RankedMap, Box<Error>> | ||||
| where D: Deref<Target=DB>, | ||||
| { | ||||
|     let start = Instant::now(); | ||||
|     let vector = snapshot.get(DATA_RANKED_MAP)?; | ||||
|     info!("loading ranked map from kv-store took {:.2?}", start.elapsed()); | ||||
|  | ||||
|     match vector { | ||||
|         Some(vector) => { | ||||
|             let start = Instant::now(); | ||||
|  | ||||
|             let bytes = vector.as_ref().to_vec(); | ||||
|             info!("ranked map size is {}B", SizeFormatterBinary::new(bytes.len() as u64)); | ||||
|  | ||||
|             let event = ReadRankedMapEvent::from_bytes(bytes)?; | ||||
|             let ranked_map = event.updated_documents().expect("BUG: invalid event deserialized"); | ||||
|  | ||||
|             info!("loading ranked map from bytes took {:.2?}", start.elapsed()); | ||||
|  | ||||
|             Ok(ranked_map) | ||||
|         }, | ||||
|         None => Ok(RankedMap::new()), | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn retrieve_config<D>(snapshot: &Snapshot<D>) -> Result<Config, Box<Error>> | ||||
| where D: Deref<Target=DB>, | ||||
| { | ||||
|     match snapshot.get(CONFIG)? { | ||||
|         Some(vector) => Ok(bincode::deserialize(&*vector)?), | ||||
|         None => Ok(Config::default()), | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn merge_indexes(existing: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> { | ||||
|     use self::update::ReadIndexEvent::{self, *}; | ||||
|     use self::update::WriteIndexEvent; | ||||
|  | ||||
|     let mut index = Index::default(); | ||||
|     for bytes in existing.into_iter().chain(operands) { | ||||
|         match ReadIndexEvent::from_bytes(bytes.to_vec()).unwrap() { | ||||
|             RemovedDocuments(d) => index = index.remove_documents(d.as_ref()), | ||||
|             UpdatedDocuments(i) => index = index.union(&i), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     WriteIndexEvent::UpdatedDocuments(&index).into_bytes() | ||||
| } | ||||
|  | ||||
| fn merge_ranked_maps(existing: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> { | ||||
|     use self::update::ReadRankedMapEvent::{self, *}; | ||||
|     use self::update::WriteRankedMapEvent; | ||||
|  | ||||
|     let mut ranked_map = RankedMap::default(); | ||||
|     for bytes in existing.into_iter().chain(operands) { | ||||
|         match ReadRankedMapEvent::from_bytes(bytes.to_vec()).unwrap() { | ||||
|             RemovedDocuments(d) => ranked_map.retain(|(k, _), _| !d.as_ref().binary_search(k).is_ok()), | ||||
|             UpdatedDocuments(i) => ranked_map.extend(i), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     WriteRankedMapEvent::UpdatedDocuments(&ranked_map).into_bytes() | ||||
| } | ||||
|  | ||||
| fn merge_operator(key: &[u8], existing: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> { | ||||
|     match key { | ||||
|         DATA_INDEX      => merge_indexes(existing, operands), | ||||
|         DATA_RANKED_MAP => merge_ranked_maps(existing, operands), | ||||
|         key             => panic!("The merge operator does not support merging {:?}", key), | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct IndexUpdate { | ||||
|     index: String, | ||||
|     update: Update, | ||||
| } | ||||
|  | ||||
| impl Deref for IndexUpdate { | ||||
|     type Target = Update; | ||||
|  | ||||
|     fn deref(&self) -> &Update { | ||||
|         &self.update | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl DerefMut for IndexUpdate { | ||||
|     fn deref_mut(&mut self) -> &mut Update { | ||||
|         &mut self.update | ||||
|     } | ||||
| } | ||||
|  | ||||
| struct DatabaseIndex { | ||||
|     db: Arc<DB>, | ||||
|  | ||||
|     // This view is updated each time the DB ingests an update. | ||||
|     view: ArcSwap<DatabaseView<Arc<DB>>>, | ||||
|  | ||||
|     // The path of the mdb folder stored on disk. | ||||
|     path: PathBuf, | ||||
|  | ||||
|     // must_die false by default, must be set as true when the Index is dropped. | ||||
|     // It is used to erase the folder saved on disk when the user request to delete an index. | ||||
|     must_die: AtomicBool, | ||||
| } | ||||
|  | ||||
| impl DatabaseIndex { | ||||
|     fn create<P: AsRef<Path>>(path: P, schema: &Schema) -> Result<DatabaseIndex, Box<Error>> { | ||||
|         let path = path.as_ref(); | ||||
|         if path.exists() { | ||||
|             return Err(format!("File already exists at path: {}, cannot create database.", | ||||
|                                 path.display()).into()) | ||||
|         } | ||||
|  | ||||
|         let path_lossy = path.to_string_lossy(); | ||||
|         let mut opts = DBOptions::new(); | ||||
|         opts.create_if_missing(true); | ||||
|         // opts.error_if_exists(true); // FIXME pull request that | ||||
|  | ||||
|         let mut cf_opts = ColumnFamilyOptions::new(); | ||||
|         cf_opts.add_merge_operator("data merge operator", merge_operator); | ||||
|  | ||||
|         let db = DB::open_cf(opts, &path_lossy, vec![("default", cf_opts)])?; | ||||
|  | ||||
|         let mut schema_bytes = Vec::new(); | ||||
|         schema.write_to_bin(&mut schema_bytes)?; | ||||
|         db.put(DATA_SCHEMA, &schema_bytes)?; | ||||
|  | ||||
|         let db = Arc::new(db); | ||||
|         let snapshot = Snapshot::new(db.clone()); | ||||
|         let view = ArcSwap::new(Arc::new(DatabaseView::new(snapshot)?)); | ||||
|  | ||||
|         Ok(DatabaseIndex { | ||||
|             db: db, | ||||
|             view: view, | ||||
|             path: path.to_path_buf(), | ||||
|             must_die: AtomicBool::new(false) | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     fn open<P: AsRef<Path>>(path: P) -> Result<DatabaseIndex, Box<Error>> { | ||||
|         let path_lossy = path.as_ref().to_string_lossy(); | ||||
|  | ||||
|         let mut opts = DBOptions::new(); | ||||
|         opts.create_if_missing(false); | ||||
|  | ||||
|         let mut cf_opts = ColumnFamilyOptions::new(); | ||||
|         cf_opts.add_merge_operator("data merge operator", merge_operator); | ||||
|  | ||||
|         let db = DB::open_cf(opts, &path_lossy, vec![("default", cf_opts)])?; | ||||
|  | ||||
|         // FIXME create a generic function to do that ! | ||||
|         let _schema = match db.get(DATA_SCHEMA)? { | ||||
|             Some(value) => Schema::read_from_bin(&*value)?, | ||||
|             None => return Err(String::from("Database does not contain a schema").into()), | ||||
|         }; | ||||
|  | ||||
|         let db = Arc::new(db); | ||||
|         let snapshot = Snapshot::new(db.clone()); | ||||
|         let view = ArcSwap::new(Arc::new(DatabaseView::new(snapshot)?)); | ||||
|  | ||||
|         Ok(DatabaseIndex { | ||||
|             db: db, | ||||
|             view: view, | ||||
|             path: path.as_ref().to_path_buf(), | ||||
|             must_die: AtomicBool::new(false) | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     fn must_die(&self) { | ||||
|         self.must_die.store(true, Ordering::Relaxed) | ||||
|     } | ||||
|  | ||||
|     fn start_update(&self) -> Result<Update, Box<Error>> { | ||||
|         let schema = match self.db.get(DATA_SCHEMA)? { | ||||
|             Some(value) => Schema::read_from_bin(&*value)?, | ||||
|             None => panic!("Database does not contain a schema"), | ||||
|         }; | ||||
|  | ||||
|         Ok(Update::new(schema)) | ||||
|     } | ||||
|  | ||||
|     fn commit_update(&self, update: Update) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>> { | ||||
|         let batch = update.build()?; | ||||
|         self.db.write(batch)?; | ||||
|         self.db.compact_range(None, None); | ||||
|         self.db.flush(true)?; | ||||
|  | ||||
|         let snapshot = Snapshot::new(self.db.clone()); | ||||
|         let view = Arc::new(DatabaseView::new(snapshot)?); | ||||
|         self.view.store(view.clone()); | ||||
|  | ||||
|         Ok(view) | ||||
|     } | ||||
|  | ||||
|     fn view(&self) -> Arc<DatabaseView<Arc<DB>>> { | ||||
|         self.view.load() | ||||
|     } | ||||
|  | ||||
|     fn get_config(&self) -> Config { | ||||
|         self.view().config().clone() | ||||
|     } | ||||
|  | ||||
|     fn update_config(&self, config: Config) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>>{ | ||||
|         let data = bincode::serialize(&config)?; | ||||
|         self.db.put(CONFIG, &data)?; | ||||
|  | ||||
|         let snapshot = Snapshot::new(self.db.clone()); | ||||
|         let view = Arc::new(DatabaseView::new(snapshot)?); | ||||
|         self.view.store(view.clone()); | ||||
|  | ||||
|         Ok(view) | ||||
|     } | ||||
|  | ||||
|     fn path(&self) -> &Path { | ||||
|         self.path.as_path() | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Drop for DatabaseIndex { | ||||
|     fn drop(&mut self) { | ||||
|         if self.must_die.load(Ordering::Relaxed) { | ||||
|             if let Err(err) = fs::remove_dir_all(&self.path) { | ||||
|                 error!("Impossible to remove mdb when Database is dropped; {}", err); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct Database { | ||||
|     indexes: Map<String, Arc<DatabaseIndex>>, | ||||
|     path: PathBuf, | ||||
| } | ||||
|  | ||||
| impl Database { | ||||
|     pub fn create<P: AsRef<Path>>(path: P) -> Result<Database, Box<Error>> { | ||||
|         Ok(Database { | ||||
|             indexes: Map::new(), | ||||
|             path: path.as_ref().to_path_buf(), | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     pub fn open<P: AsRef<Path>>(path: P) -> Result<Database, Box<Error>> { | ||||
|         let entries = fs::read_dir(&path)?; | ||||
|  | ||||
|         let indexes = Map::new(); | ||||
|         for entry in entries { | ||||
|             let path = match entry { | ||||
|                 Ok(p) => p.path(), | ||||
|                 Err(err) => { | ||||
|                     warn!("Impossible to retrieve the path from an entry; {}", err); | ||||
|                     continue | ||||
|                 } | ||||
|             }; | ||||
|  | ||||
|             let name = match path.file_stem().and_then(OsStr::to_str) { | ||||
|                 Some(name) => name.to_owned(), | ||||
|                 None => continue | ||||
|             }; | ||||
|  | ||||
|             let db = match DatabaseIndex::open(path.clone()) { | ||||
|                 Ok(db) => db, | ||||
|                 Err(err) => { | ||||
|                     warn!("Impossible to open the database; {}", err); | ||||
|                     continue | ||||
|                 } | ||||
|             }; | ||||
|  | ||||
|             info!("Load database {}", name); | ||||
|             indexes.insert(name, Arc::new(db)); | ||||
|         } | ||||
|  | ||||
|         Ok(Database { | ||||
|             indexes: indexes, | ||||
|             path: path.as_ref().to_path_buf(), | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     pub fn create_index(&self, name: &str, schema: &Schema) -> Result<(), Box<Error>> { | ||||
|         let index_path = self.path.join(name); | ||||
|  | ||||
|         if index_path.exists() { | ||||
|             return Err("Index already exists".into()); | ||||
|         } | ||||
|  | ||||
|         let index = DatabaseIndex::create(index_path, schema)?; | ||||
|         self.indexes.insert(name.to_owned(), Arc::new(index)); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn delete_index(&self, name: &str) -> Result<(), Box<Error>> { | ||||
|         let index_guard = self.indexes.remove(name).ok_or("Index not found")?; | ||||
|         index_guard.val().must_die(); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn list_indexes(&self) -> Vec<String> { | ||||
|         self.indexes.iter().map(|g| g.key().clone()).collect() | ||||
|     } | ||||
|  | ||||
|     pub fn start_update(&self, index: &str) -> Result<IndexUpdate, Box<Error>> { | ||||
|         let index_guard = self.indexes.get(index).ok_or("Index not found")?; | ||||
|         let update = index_guard.val().start_update()?; | ||||
|  | ||||
|         Ok(IndexUpdate { index: index.to_owned(), update }) | ||||
|     } | ||||
|  | ||||
|     pub fn commit_update(&self, update: IndexUpdate)-> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>> { | ||||
|         let index_guard = self.indexes.get(&update.index).ok_or("Index not found")?; | ||||
|  | ||||
|         index_guard.val().commit_update(update.update) | ||||
|     } | ||||
|  | ||||
|     pub fn view(&self, index: &str) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>> { | ||||
|         let index_guard = self.indexes.get(index).ok_or("Index not found")?; | ||||
|  | ||||
|         Ok(index_guard.val().view()) | ||||
|     } | ||||
|  | ||||
|     pub fn get_config(&self, index: &str) -> Result<Config, Box<Error>> { | ||||
|         let index_guard = self.indexes.get(index).ok_or("Index not found")?; | ||||
|  | ||||
|         Ok(index_guard.val().get_config()) | ||||
|     } | ||||
|  | ||||
|     pub fn update_config(&self, index: &str, config: Config) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>>{ | ||||
|         let index_guard = self.indexes.get(index).ok_or("Index not found")?; | ||||
|  | ||||
|         Ok(index_guard.val().update_config(config)?) | ||||
|     } | ||||
|  | ||||
|     pub fn path(&self) -> &Path { | ||||
|         self.path.as_path() | ||||
|     } | ||||
|  | ||||
|     pub fn index_path(&self, index: &str) -> Result<PathBuf, Box<Error>> { | ||||
|         let index_guard = self.indexes.get(index).ok_or("Index not found")?; | ||||
|         let path = index_guard.val().path(); | ||||
|         Ok(path.to_path_buf()) | ||||
|     } | ||||
|  | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use std::collections::HashSet; | ||||
|     use std::error::Error; | ||||
|  | ||||
|     use serde_derive::{Serialize, Deserialize}; | ||||
|  | ||||
|     use crate::database::schema::{SchemaBuilder, STORED, INDEXED}; | ||||
|  | ||||
|     use super::*; | ||||
|  | ||||
|     #[test] | ||||
|     fn ingest_one_easy_update() -> Result<(), Box<Error>> { | ||||
|         let dir = tempfile::tempdir()?; | ||||
|         let stop_words = HashSet::new(); | ||||
|  | ||||
|         let meilidb_path = dir.path().join("meilidb.mdb"); | ||||
|         let meilidb_index_name = "default"; | ||||
|  | ||||
|         #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] | ||||
|         struct SimpleDoc { | ||||
|             id: u64, | ||||
|             title: String, | ||||
|             description: String, | ||||
|             timestamp: u64, | ||||
|         } | ||||
|  | ||||
|         let schema = { | ||||
|             let mut builder = SchemaBuilder::with_identifier("id"); | ||||
|             builder.new_attribute("id", STORED); | ||||
|             builder.new_attribute("title", STORED | INDEXED); | ||||
|             builder.new_attribute("description", STORED | INDEXED); | ||||
|             builder.new_attribute("timestamp", STORED); | ||||
|             builder.build() | ||||
|         }; | ||||
|  | ||||
|         let database = Database::create(&meilidb_path)?; | ||||
|  | ||||
|         database.create_index(meilidb_index_name, &schema)?; | ||||
|  | ||||
|         let doc0 = SimpleDoc { | ||||
|             id: 0, | ||||
|             title: String::from("I am a title"), | ||||
|             description: String::from("I am a description"), | ||||
|             timestamp: 1234567, | ||||
|         }; | ||||
|         let doc1 = SimpleDoc { | ||||
|             id: 1, | ||||
|             title: String::from("I am the second title"), | ||||
|             description: String::from("I am the second description"), | ||||
|             timestamp: 7654321, | ||||
|         }; | ||||
|  | ||||
|         let mut builder = database.start_update(meilidb_index_name)?; | ||||
|  | ||||
|         let docid0 = builder.update_document(&doc0, &stop_words)?; | ||||
|         let docid1 = builder.update_document(&doc1, &stop_words)?; | ||||
|  | ||||
|         let view = database.commit_update(builder)?; | ||||
|  | ||||
|         let de_doc0: SimpleDoc = view.document_by_id(docid0)?; | ||||
|         let de_doc1: SimpleDoc = view.document_by_id(docid1)?; | ||||
|  | ||||
|         assert_eq!(doc0, de_doc0); | ||||
|         assert_eq!(doc1, de_doc1); | ||||
|  | ||||
|         Ok(dir.close()?) | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn ingest_two_easy_updates() -> Result<(), Box<Error>> { | ||||
|         let dir = tempfile::tempdir()?; | ||||
|         let stop_words = HashSet::new(); | ||||
|  | ||||
|         let meilidb_path = dir.path().join("meilidb.mdb"); | ||||
|         let meilidb_index_name = "default"; | ||||
|  | ||||
|         #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] | ||||
|         struct SimpleDoc { | ||||
|             id: u64, | ||||
|             title: String, | ||||
|             description: String, | ||||
|             timestamp: u64, | ||||
|         } | ||||
|  | ||||
|         let schema = { | ||||
|             let mut builder = SchemaBuilder::with_identifier("id"); | ||||
|             builder.new_attribute("id", STORED); | ||||
|             builder.new_attribute("title", STORED | INDEXED); | ||||
|             builder.new_attribute("description", STORED | INDEXED); | ||||
|             builder.new_attribute("timestamp", STORED); | ||||
|             builder.build() | ||||
|         }; | ||||
|  | ||||
|         let database = Database::create(&meilidb_path)?; | ||||
|  | ||||
|         database.create_index(meilidb_index_name, &schema)?; | ||||
|  | ||||
|         let doc0 = SimpleDoc { | ||||
|             id: 0, | ||||
|             title: String::from("I am a title"), | ||||
|             description: String::from("I am a description"), | ||||
|             timestamp: 1234567, | ||||
|         }; | ||||
|         let doc1 = SimpleDoc { | ||||
|             id: 1, | ||||
|             title: String::from("I am the second title"), | ||||
|             description: String::from("I am the second description"), | ||||
|             timestamp: 7654321, | ||||
|         }; | ||||
|         let doc2 = SimpleDoc { | ||||
|             id: 2, | ||||
|             title: String::from("I am the third title"), | ||||
|             description: String::from("I am the third description"), | ||||
|             timestamp: 7654321, | ||||
|         }; | ||||
|         let doc3 = SimpleDoc { | ||||
|             id: 3, | ||||
|             title: String::from("I am the fourth title"), | ||||
|             description: String::from("I am the fourth description"), | ||||
|             timestamp: 7654321, | ||||
|         }; | ||||
|  | ||||
|         let mut builder = database.start_update(meilidb_index_name)?; | ||||
|         let docid0 = builder.update_document(&doc0, &stop_words)?; | ||||
|         let docid1 = builder.update_document(&doc1, &stop_words)?; | ||||
|         database.commit_update(builder)?; | ||||
|  | ||||
|         let mut builder = database.start_update(meilidb_index_name)?; | ||||
|         let docid2 = builder.update_document(&doc2, &stop_words)?; | ||||
|         let docid3 = builder.update_document(&doc3, &stop_words)?; | ||||
|         let view = database.commit_update(builder)?; | ||||
|  | ||||
|         let de_doc0: SimpleDoc = view.document_by_id(docid0)?; | ||||
|         let de_doc1: SimpleDoc = view.document_by_id(docid1)?; | ||||
|  | ||||
|         assert_eq!(doc0, de_doc0); | ||||
|         assert_eq!(doc1, de_doc1); | ||||
|  | ||||
|         let de_doc2: SimpleDoc = view.document_by_id(docid2)?; | ||||
|         let de_doc3: SimpleDoc = view.document_by_id(docid3)?; | ||||
|  | ||||
|         assert_eq!(doc2, de_doc2); | ||||
|         assert_eq!(doc3, de_doc3); | ||||
|  | ||||
|         Ok(dir.close()?) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(all(feature = "nightly", test))] | ||||
| mod bench { | ||||
|     extern crate test; | ||||
|  | ||||
|     use std::collections::HashSet; | ||||
|     use std::error::Error; | ||||
|     use std::iter::repeat_with; | ||||
|     use self::test::Bencher; | ||||
|  | ||||
|     use rand::distributions::Alphanumeric; | ||||
|     use rand_xorshift::XorShiftRng; | ||||
|     use rand::{Rng, SeedableRng}; | ||||
|     use serde_derive::Serialize; | ||||
|     use rand::seq::SliceRandom; | ||||
|  | ||||
|     use crate::tokenizer::DefaultBuilder; | ||||
|     use crate::database::schema::*; | ||||
|  | ||||
|     use super::*; | ||||
|  | ||||
|     fn random_sentences<R: Rng>(number: usize, rng: &mut R) -> String { | ||||
|         let mut words = String::new(); | ||||
|  | ||||
|         for i in 0..number { | ||||
|             let word_len = rng.gen_range(1, 12); | ||||
|             let iter = repeat_with(|| rng.sample(Alphanumeric)).take(word_len); | ||||
|             words.extend(iter); | ||||
|  | ||||
|             if i == number - 1 { // last word | ||||
|                 let final_ = [".", "?", "!", "..."].choose(rng).cloned(); | ||||
|                 words.extend(final_); | ||||
|             } else { | ||||
|                 let middle = [",", ", "].choose(rng).cloned(); | ||||
|                 words.extend(middle); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         words | ||||
|     } | ||||
|  | ||||
|     #[bench] | ||||
|     fn open_little_database(bench: &mut Bencher) -> Result<(), Box<Error>> { | ||||
|         let dir = tempfile::tempdir()?; | ||||
|         let stop_words = HashSet::new(); | ||||
|  | ||||
|         let mut builder = SchemaBuilder::with_identifier("id"); | ||||
|         builder.new_attribute("title", STORED | INDEXED); | ||||
|         builder.new_attribute("description", STORED | INDEXED); | ||||
|         let schema = builder.build(); | ||||
|  | ||||
|         let db_path = dir.path().join("bench.mdb"); | ||||
|         let index_name = "default"; | ||||
|  | ||||
|         let database = Database::create(&db_path)?; | ||||
|         database.create_index(index_name, &schema)?; | ||||
|  | ||||
|         #[derive(Serialize)] | ||||
|         struct Document { | ||||
|             id: u64, | ||||
|             title: String, | ||||
|             description: String, | ||||
|         } | ||||
|  | ||||
|         let mut builder = database.start_update(index_name)?; | ||||
|         let mut rng = XorShiftRng::seed_from_u64(42); | ||||
|  | ||||
|         for i in 0..300 { | ||||
|             let document = Document { | ||||
|                 id: i, | ||||
|                 title: random_sentences(rng.gen_range(1, 8), &mut rng), | ||||
|                 description: random_sentences(rng.gen_range(20, 200), &mut rng), | ||||
|             }; | ||||
|             builder.update_document(&document, &stop_words)?; | ||||
|         } | ||||
|  | ||||
|         database.commit_update(builder)?; | ||||
|  | ||||
|         drop(database); | ||||
|  | ||||
|         bench.iter(|| { | ||||
|             let database = Database::open(db_path.clone()).unwrap(); | ||||
|             test::black_box(|| database); | ||||
|         }); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     #[bench] | ||||
|     fn open_medium_database(bench: &mut Bencher) -> Result<(), Box<Error>> { | ||||
|         let dir = tempfile::tempdir()?; | ||||
|         let stop_words = HashSet::new(); | ||||
|  | ||||
|         let mut builder = SchemaBuilder::with_identifier("id"); | ||||
|         builder.new_attribute("title", STORED | INDEXED); | ||||
|         builder.new_attribute("description", STORED | INDEXED); | ||||
|         let schema = builder.build(); | ||||
|  | ||||
|         let db_path = dir.path().join("bench.mdb"); | ||||
|         let index_name = "default"; | ||||
|  | ||||
|         let database = Database::create(&db_path)?; | ||||
|         database.create_index(index_name, &schema)?; | ||||
|  | ||||
|         #[derive(Serialize)] | ||||
|         struct Document { | ||||
|             id: u64, | ||||
|             title: String, | ||||
|             description: String, | ||||
|         } | ||||
|  | ||||
|         let mut builder = database.start_update(index_name)?; | ||||
|         let mut rng = XorShiftRng::seed_from_u64(42); | ||||
|  | ||||
|         for i in 0..3000 { | ||||
|             let document = Document { | ||||
|                 id: i, | ||||
|                 title: random_sentences(rng.gen_range(1, 8), &mut rng), | ||||
|                 description: random_sentences(rng.gen_range(20, 200), &mut rng), | ||||
|             }; | ||||
|             builder.update_document(&document, &stop_words)?; | ||||
|         } | ||||
|  | ||||
|         database.commit_update(builder)?; | ||||
|  | ||||
|         drop(database); | ||||
|  | ||||
|         bench.iter(|| { | ||||
|             let database = Database::open(db_path.clone()).unwrap(); | ||||
|             test::black_box(|| database); | ||||
|         }); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     #[bench] | ||||
|     #[ignore] | ||||
|     fn open_big_database(bench: &mut Bencher) -> Result<(), Box<Error>> { | ||||
|         let dir = tempfile::tempdir()?; | ||||
|         let stop_words = HashSet::new(); | ||||
|  | ||||
|         let mut builder = SchemaBuilder::with_identifier("id"); | ||||
|         builder.new_attribute("title", STORED | INDEXED); | ||||
|         builder.new_attribute("description", STORED | INDEXED); | ||||
|         let schema = builder.build(); | ||||
|  | ||||
|         let db_path = dir.path().join("bench.mdb"); | ||||
|         let index_name = "default"; | ||||
|  | ||||
|         let database = Database::create(&db_path)?; | ||||
|         database.create_index(index_name, &schema)?; | ||||
|  | ||||
|         #[derive(Serialize)] | ||||
|         struct Document { | ||||
|             id: u64, | ||||
|             title: String, | ||||
|             description: String, | ||||
|         } | ||||
|  | ||||
|         let mut builder = database.start_update(index_name)?; | ||||
|         let mut rng = XorShiftRng::seed_from_u64(42); | ||||
|  | ||||
|         for i in 0..30_000 { | ||||
|             let document = Document { | ||||
|                 id: i, | ||||
|                 title: random_sentences(rng.gen_range(1, 8), &mut rng), | ||||
|                 description: random_sentences(rng.gen_range(20, 200), &mut rng), | ||||
|             }; | ||||
|             builder.update_document(&document, &stop_words)?; | ||||
|         } | ||||
|  | ||||
|         database.commit_update(builder)?; | ||||
|  | ||||
|         drop(database); | ||||
|  | ||||
|         bench.iter(|| { | ||||
|             let database = Database::open(db_path.clone()).unwrap(); | ||||
|             test::black_box(|| database); | ||||
|         }); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     #[bench] | ||||
|     fn search_oneletter_little_database(bench: &mut Bencher) -> Result<(), Box<Error>> { | ||||
|         let dir = tempfile::tempdir()?; | ||||
|         let stop_words = HashSet::new(); | ||||
|  | ||||
|         let mut builder = SchemaBuilder::with_identifier("id"); | ||||
|         builder.new_attribute("title", STORED | INDEXED); | ||||
|         builder.new_attribute("description", STORED | INDEXED); | ||||
|         let schema = builder.build(); | ||||
|  | ||||
|         let db_path = dir.path().join("bench.mdb"); | ||||
|         let index_name = "default"; | ||||
|  | ||||
|         let database = Database::create(&db_path)?; | ||||
|         database.create_index(index_name, &schema)?; | ||||
|  | ||||
|         #[derive(Serialize)] | ||||
|         struct Document { | ||||
|             id: u64, | ||||
|             title: String, | ||||
|             description: String, | ||||
|         } | ||||
|  | ||||
|         let mut builder = database.start_update(index_name)?; | ||||
|         let mut rng = XorShiftRng::seed_from_u64(42); | ||||
|  | ||||
|         for i in 0..300 { | ||||
|             let document = Document { | ||||
|                 id: i, | ||||
|                 title: random_sentences(rng.gen_range(1, 8), &mut rng), | ||||
|                 description: random_sentences(rng.gen_range(20, 200), &mut rng), | ||||
|             }; | ||||
|             builder.update_document(&document, &stop_words)?; | ||||
|         } | ||||
|  | ||||
|         let view = database.commit_update(builder)?; | ||||
|  | ||||
|         bench.iter(|| { | ||||
|             for q in &["a", "b", "c", "d", "e"] { | ||||
|                 let documents = view.query_builder().query(q, 0..20); | ||||
|                 test::black_box(|| documents); | ||||
|             } | ||||
|         }); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     #[bench] | ||||
|     fn search_oneletter_medium_database(bench: &mut Bencher) -> Result<(), Box<Error>> { | ||||
|         let dir = tempfile::tempdir()?; | ||||
|         let stop_words = HashSet::new(); | ||||
|  | ||||
|         let mut builder = SchemaBuilder::with_identifier("id"); | ||||
|         builder.new_attribute("title", STORED | INDEXED); | ||||
|         builder.new_attribute("description", STORED | INDEXED); | ||||
|         let schema = builder.build(); | ||||
|  | ||||
|         let db_path = dir.path().join("bench.mdb"); | ||||
|         let index_name = "default"; | ||||
|  | ||||
|         let database = Database::create(&db_path)?; | ||||
|         database.create_index(index_name, &schema)?; | ||||
|  | ||||
|         #[derive(Serialize)] | ||||
|         struct Document { | ||||
|             id: u64, | ||||
|             title: String, | ||||
|             description: String, | ||||
|         } | ||||
|  | ||||
|         let mut builder = database.start_update(index_name)?; | ||||
|         let mut rng = XorShiftRng::seed_from_u64(42); | ||||
|  | ||||
|         for i in 0..3000 { | ||||
|             let document = Document { | ||||
|                 id: i, | ||||
|                 title: random_sentences(rng.gen_range(1, 8), &mut rng), | ||||
|                 description: random_sentences(rng.gen_range(20, 200), &mut rng), | ||||
|             }; | ||||
|             builder.update_document(&document, &stop_words)?; | ||||
|         } | ||||
|  | ||||
|         let view = database.commit_update(builder)?; | ||||
|  | ||||
|         bench.iter(|| { | ||||
|             for q in &["a", "b", "c", "d", "e"] { | ||||
|                 let documents = view.query_builder().query(q, 0..20); | ||||
|                 test::black_box(|| documents); | ||||
|             } | ||||
|         }); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     #[bench] | ||||
|     #[ignore] | ||||
|     fn search_oneletter_big_database(bench: &mut Bencher) -> Result<(), Box<Error>> { | ||||
|         let dir = tempfile::tempdir()?; | ||||
|         let stop_words = HashSet::new(); | ||||
|  | ||||
|         let mut builder = SchemaBuilder::with_identifier("id"); | ||||
|         builder.new_attribute("title", STORED | INDEXED); | ||||
|         builder.new_attribute("description", STORED | INDEXED); | ||||
|         let schema = builder.build(); | ||||
|  | ||||
|         let db_path = dir.path().join("bench.mdb"); | ||||
|         let index_name = "default"; | ||||
|  | ||||
|         let database = Database::create(&db_path)?; | ||||
|         database.create_index(index_name, &schema)?; | ||||
|  | ||||
|         #[derive(Serialize)] | ||||
|         struct Document { | ||||
|             id: u64, | ||||
|             title: String, | ||||
|             description: String, | ||||
|         } | ||||
|  | ||||
|         let mut builder = database.start_update(index_name)?; | ||||
|         let mut rng = XorShiftRng::seed_from_u64(42); | ||||
|  | ||||
|         for i in 0..30_000 { | ||||
|             let document = Document { | ||||
|                 id: i, | ||||
|                 title: random_sentences(rng.gen_range(1, 8), &mut rng), | ||||
|                 description: random_sentences(rng.gen_range(20, 200), &mut rng), | ||||
|             }; | ||||
|             builder.update_document(&document, &stop_words)?; | ||||
|         } | ||||
|  | ||||
|         let view = database.commit_update(builder)?; | ||||
|  | ||||
|         bench.iter(|| { | ||||
|             for q in &["a", "b", "c", "d", "e"] { | ||||
|                 let documents = view.query_builder().query(q, 0..20); | ||||
|                 test::black_box(|| documents); | ||||
|             } | ||||
|         }); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
| @@ -1,98 +0,0 @@ | ||||
| use std::cmp::Ordering; | ||||
| use std::str::FromStr; | ||||
| use std::fmt; | ||||
|  | ||||
| use serde_derive::{Serialize, Deserialize}; | ||||
|  | ||||
| #[derive(Serialize, Deserialize)] | ||||
| #[derive(Debug, Copy, Clone)] | ||||
| pub enum Number { | ||||
|     Unsigned(u64), | ||||
|     Signed(i64), | ||||
|     Float(f64), | ||||
| } | ||||
|  | ||||
| impl FromStr for Number { | ||||
|     type Err = ParseNumberError; | ||||
|  | ||||
|     fn from_str(s: &str) -> Result<Self, Self::Err> { | ||||
|         if let Ok(unsigned) = u64::from_str(s) { | ||||
|             return Ok(Number::Unsigned(unsigned)) | ||||
|         } | ||||
|  | ||||
|         if let Ok(signed) = i64::from_str(s) { | ||||
|             return Ok(Number::Signed(signed)) | ||||
|         } | ||||
|  | ||||
|         if let Ok(float) = f64::from_str(s) { | ||||
|             if float == 0.0 || float.is_normal() { | ||||
|                 return Ok(Number::Float(float)) | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Err(ParseNumberError) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl PartialOrd for Number { | ||||
|     fn partial_cmp(&self, other: &Number) -> Option<Ordering> { | ||||
|         Some(self.cmp(other)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Ord for Number { | ||||
|     fn cmp(&self, other: &Number) -> Ordering { | ||||
|         use Number::*; | ||||
|         match (self, other) { | ||||
|             (Unsigned(s), Unsigned(o)) => s.cmp(o), | ||||
|             (Unsigned(s), Signed(o)) => { | ||||
|                 let s = i128::from(*s); | ||||
|                 let o = i128::from(*o); | ||||
|                 s.cmp(&o) | ||||
|             }, | ||||
|             (Unsigned(s), Float(o)) => { | ||||
|                 let s = *s as f64; | ||||
|                 s.partial_cmp(&o).unwrap_or(Ordering::Equal) | ||||
|             }, | ||||
|  | ||||
|             (Signed(s), Unsigned(o)) => { | ||||
|                 let s = i128::from(*s); | ||||
|                 let o = i128::from(*o); | ||||
|                 s.cmp(&o) | ||||
|             }, | ||||
|             (Signed(s), Signed(o)) => s.cmp(o), | ||||
|             (Signed(s), Float(o)) => { | ||||
|                 let s = *s as f64; | ||||
|                 s.partial_cmp(o).unwrap_or(Ordering::Equal) | ||||
|             }, | ||||
|  | ||||
|             (Float(s), Unsigned(o)) => { | ||||
|                 let o = *o as f64; | ||||
|                 s.partial_cmp(&o).unwrap_or(Ordering::Equal) | ||||
|             }, | ||||
|             (Float(s), Signed(o)) => { | ||||
|                 let o = *o as f64; | ||||
|                 s.partial_cmp(&o).unwrap_or(Ordering::Equal) | ||||
|             }, | ||||
|             (Float(s), Float(o)) => { | ||||
|                 s.partial_cmp(o).unwrap_or(Ordering::Equal) | ||||
|             }, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl PartialEq for Number { | ||||
|     fn eq(&self, other: &Number) -> bool { | ||||
|         self.cmp(other) == Ordering::Equal | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Eq for Number { } | ||||
|  | ||||
| pub struct ParseNumberError; | ||||
|  | ||||
| impl fmt::Display for ParseNumberError { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
|         f.write_str("can not parse number") | ||||
|     } | ||||
| } | ||||
| @@ -1,319 +0,0 @@ | ||||
| use std::collections::{HashMap, BTreeMap}; | ||||
| use std::io::{Read, Write}; | ||||
| use std::error::Error; | ||||
| use std::{fmt, u16}; | ||||
| use std::ops::BitOr; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| use serde_derive::{Serialize, Deserialize}; | ||||
| use linked_hash_map::LinkedHashMap; | ||||
|  | ||||
| use crate::database::serde::find_id::FindDocumentIdSerializer; | ||||
| use crate::database::serde::SerializerError; | ||||
| use meilidb_core::DocumentId; | ||||
|  | ||||
| pub const STORED: SchemaProps  = SchemaProps { stored: true,  indexed: false, ranked: false }; | ||||
| pub const INDEXED: SchemaProps = SchemaProps { stored: false, indexed: true,  ranked: false }; | ||||
| pub const RANKED: SchemaProps  = SchemaProps { stored: false, indexed: false, ranked: true  }; | ||||
|  | ||||
| #[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)] | ||||
| pub struct SchemaProps { | ||||
|     #[serde(default)] | ||||
|     stored: bool, | ||||
|  | ||||
|     #[serde(default)] | ||||
|     indexed: bool, | ||||
|  | ||||
|     #[serde(default)] | ||||
|     ranked: bool, | ||||
| } | ||||
|  | ||||
| impl SchemaProps { | ||||
|     pub fn is_stored(self) -> bool { | ||||
|         self.stored | ||||
|     } | ||||
|  | ||||
|     pub fn is_indexed(self) -> bool { | ||||
|         self.indexed | ||||
|     } | ||||
|  | ||||
|     pub fn is_ranked(self) -> bool { | ||||
|         self.ranked | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl BitOr for SchemaProps { | ||||
|     type Output = Self; | ||||
|  | ||||
|     fn bitor(self, other: Self) -> Self::Output { | ||||
|         SchemaProps { | ||||
|             stored: self.stored | other.stored, | ||||
|             indexed: self.indexed | other.indexed, | ||||
|             ranked: self.ranked | other.ranked, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Serialize, Deserialize)] | ||||
| pub struct SchemaBuilder { | ||||
|     identifier: String, | ||||
|     attributes: LinkedHashMap<String, SchemaProps>, | ||||
| } | ||||
|  | ||||
| impl SchemaBuilder { | ||||
|     pub fn with_identifier<S: Into<String>>(name: S) -> SchemaBuilder { | ||||
|         SchemaBuilder { | ||||
|             identifier: name.into(), | ||||
|             attributes: LinkedHashMap::new(), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn new_attribute<S: Into<String>>(&mut self, name: S, props: SchemaProps) -> SchemaAttr { | ||||
|         let len = self.attributes.len(); | ||||
|         if self.attributes.insert(name.into(), props).is_some() { | ||||
|             panic!("Field already inserted.") | ||||
|         } | ||||
|         SchemaAttr(len as u16) | ||||
|     } | ||||
|  | ||||
|     pub fn build(self) -> Schema { | ||||
|         let mut attrs = HashMap::new(); | ||||
|         let mut props = Vec::new(); | ||||
|  | ||||
|         for (i, (name, prop)) in self.attributes.into_iter().enumerate() { | ||||
|             attrs.insert(name.clone(), SchemaAttr(i as u16)); | ||||
|             props.push((name, prop)); | ||||
|         } | ||||
|  | ||||
|         let identifier = self.identifier; | ||||
|         Schema { inner: Arc::new(InnerSchema { identifier, attrs, props }) } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, PartialEq, Eq)] | ||||
| pub struct Schema { | ||||
|     inner: Arc<InnerSchema>, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, PartialEq, Eq)] | ||||
| struct InnerSchema { | ||||
|     identifier: String, | ||||
|     attrs: HashMap<String, SchemaAttr>, | ||||
|     props: Vec<(String, SchemaProps)>, | ||||
| } | ||||
|  | ||||
| impl Schema { | ||||
|     pub fn from_toml<R: Read>(mut reader: R) -> Result<Schema, Box<Error>> { | ||||
|         let mut buffer = Vec::new(); | ||||
|         reader.read_to_end(&mut buffer)?; | ||||
|         let builder: SchemaBuilder = toml::from_slice(&buffer)?; | ||||
|         Ok(builder.build()) | ||||
|     } | ||||
|  | ||||
|     pub fn to_toml<W: Write>(&self, mut writer: W) -> Result<(), Box<Error>> { | ||||
|         let identifier = self.inner.identifier.clone(); | ||||
|         let attributes = self.attributes_ordered(); | ||||
|         let builder = SchemaBuilder { identifier, attributes }; | ||||
|  | ||||
|         let string = toml::to_string_pretty(&builder)?; | ||||
|         writer.write_all(string.as_bytes())?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn from_json<R: Read>(mut reader: R) -> Result<Schema, Box<Error>> { | ||||
|         let mut buffer = Vec::new(); | ||||
|         reader.read_to_end(&mut buffer)?; | ||||
|         let builder: SchemaBuilder = serde_json::from_slice(&buffer)?; | ||||
|         Ok(builder.build()) | ||||
|     } | ||||
|  | ||||
|     pub fn to_json<W: Write>(&self, mut writer: W) -> Result<(), Box<Error>> { | ||||
|         let identifier = self.inner.identifier.clone(); | ||||
|         let attributes = self.attributes_ordered(); | ||||
|         let builder = SchemaBuilder { identifier, attributes }; | ||||
|         let string = serde_json::to_string_pretty(&builder)?; | ||||
|         writer.write_all(string.as_bytes())?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn read_from_bin<R: Read>(reader: R) -> bincode::Result<Schema> { | ||||
|         let builder: SchemaBuilder = bincode::deserialize_from(reader)?; | ||||
|         Ok(builder.build()) | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn write_to_bin<W: Write>(&self, writer: W) -> bincode::Result<()> { | ||||
|         let identifier = self.inner.identifier.clone(); | ||||
|         let attributes = self.attributes_ordered(); | ||||
|         let builder = SchemaBuilder { identifier, attributes }; | ||||
|  | ||||
|         bincode::serialize_into(writer, &builder) | ||||
|     } | ||||
|  | ||||
|     fn attributes_ordered(&self) -> LinkedHashMap<String, SchemaProps> { | ||||
|         let mut ordered = BTreeMap::new(); | ||||
|         for (name, attr) in &self.inner.attrs { | ||||
|             let (_, props) = self.inner.props[attr.0 as usize]; | ||||
|             ordered.insert(attr.0, (name, props)); | ||||
|         } | ||||
|  | ||||
|         let mut attributes = LinkedHashMap::with_capacity(ordered.len()); | ||||
|         for (_, (name, props)) in ordered { | ||||
|             attributes.insert(name.clone(), props); | ||||
|         } | ||||
|  | ||||
|         attributes | ||||
|     } | ||||
|  | ||||
|     pub fn document_id<T>(&self, document: T) -> Result<DocumentId, SerializerError> | ||||
|     where T: serde::Serialize, | ||||
|     { | ||||
|         let id_attribute_name = &self.inner.identifier; | ||||
|         let serializer = FindDocumentIdSerializer { id_attribute_name }; | ||||
|         document.serialize(serializer) | ||||
|     } | ||||
|  | ||||
|     pub fn props(&self, attr: SchemaAttr) -> SchemaProps { | ||||
|         let (_, props) = self.inner.props[attr.0 as usize]; | ||||
|         props | ||||
|     } | ||||
|  | ||||
|     pub fn identifier_name(&self) -> &str { | ||||
|         &self.inner.identifier | ||||
|     } | ||||
|  | ||||
|     pub fn attribute<S: AsRef<str>>(&self, name: S) -> Option<SchemaAttr> { | ||||
|         self.inner.attrs.get(name.as_ref()).cloned() | ||||
|     } | ||||
|  | ||||
|     pub fn attribute_name(&self, attr: SchemaAttr) -> &str { | ||||
|         let (name, _) = &self.inner.props[attr.0 as usize]; | ||||
|         name | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Serialize, Deserialize)] | ||||
| #[derive(Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)] | ||||
| pub struct SchemaAttr(pub u16); | ||||
|  | ||||
| impl SchemaAttr { | ||||
|     pub fn new(value: u16) -> SchemaAttr { | ||||
|         SchemaAttr(value) | ||||
|     } | ||||
|  | ||||
|     pub fn min() -> SchemaAttr { | ||||
|         SchemaAttr(0) | ||||
|     } | ||||
|  | ||||
|     pub fn next(self) -> Option<SchemaAttr> { | ||||
|         self.0.checked_add(1).map(SchemaAttr) | ||||
|     } | ||||
|  | ||||
|     pub fn prev(self) -> Option<SchemaAttr> { | ||||
|         self.0.checked_sub(1).map(SchemaAttr) | ||||
|     } | ||||
|  | ||||
|     pub fn max() -> SchemaAttr { | ||||
|         SchemaAttr(u16::MAX) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl fmt::Display for SchemaAttr { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
|         self.0.fmt(f) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use super::*; | ||||
|     use std::error::Error; | ||||
|  | ||||
|     #[test] | ||||
|     fn serialize_deserialize() -> bincode::Result<()> { | ||||
|         let mut builder = SchemaBuilder::with_identifier("id"); | ||||
|         builder.new_attribute("alpha", STORED); | ||||
|         builder.new_attribute("beta", STORED | INDEXED); | ||||
|         builder.new_attribute("gamma", INDEXED); | ||||
|         let schema = builder.build(); | ||||
|  | ||||
|         let mut buffer = Vec::new(); | ||||
|  | ||||
|         schema.write_to_bin(&mut buffer)?; | ||||
|         let schema2 = Schema::read_from_bin(buffer.as_slice())?; | ||||
|  | ||||
|         assert_eq!(schema, schema2); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn serialize_deserialize_toml() -> Result<(), Box<Error>> { | ||||
|         let mut builder = SchemaBuilder::with_identifier("id"); | ||||
|         builder.new_attribute("alpha", STORED); | ||||
|         builder.new_attribute("beta", STORED | INDEXED); | ||||
|         builder.new_attribute("gamma", INDEXED); | ||||
|         let schema = builder.build(); | ||||
|  | ||||
|         let mut buffer = Vec::new(); | ||||
|         schema.to_toml(&mut buffer)?; | ||||
|  | ||||
|         let schema2 = Schema::from_toml(buffer.as_slice())?; | ||||
|         assert_eq!(schema, schema2); | ||||
|  | ||||
|         let data = r#" | ||||
|             identifier = "id" | ||||
|  | ||||
|             [attributes."alpha"] | ||||
|             stored = true | ||||
|  | ||||
|             [attributes."beta"] | ||||
|             stored = true | ||||
|             indexed = true | ||||
|  | ||||
|             [attributes."gamma"] | ||||
|             indexed = true | ||||
|         "#; | ||||
|         let schema2 = Schema::from_toml(data.as_bytes())?; | ||||
|         assert_eq!(schema, schema2); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn serialize_deserialize_json() -> Result<(), Box<Error>> { | ||||
|         let mut builder = SchemaBuilder::with_identifier("id"); | ||||
|         builder.new_attribute("alpha", STORED); | ||||
|         builder.new_attribute("beta", STORED | INDEXED); | ||||
|         builder.new_attribute("gamma", INDEXED); | ||||
|         let schema = builder.build(); | ||||
|  | ||||
|         let mut buffer = Vec::new(); | ||||
|         schema.to_json(&mut buffer)?; | ||||
|  | ||||
|         let schema2 = Schema::from_json(buffer.as_slice())?; | ||||
|         assert_eq!(schema, schema2); | ||||
|  | ||||
|         let data = r#" | ||||
|             { | ||||
|                 "identifier": "id", | ||||
|                 "attributes": { | ||||
|                     "alpha": { | ||||
|                         "stored": true | ||||
|                     }, | ||||
|                     "beta": { | ||||
|                         "stored": true, | ||||
|                         "indexed": true | ||||
|                     }, | ||||
|                     "gamma": { | ||||
|                         "indexed": true | ||||
|                     } | ||||
|                 } | ||||
|             }"#; | ||||
|         let schema2 = Schema::from_json(data.as_bytes())?; | ||||
|         assert_eq!(schema, schema2); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
| @@ -1,186 +0,0 @@ | ||||
| use std::error::Error; | ||||
| use std::ops::Deref; | ||||
| use std::fmt; | ||||
|  | ||||
| use rocksdb::rocksdb::{DB, Snapshot, SeekKey}; | ||||
| use rocksdb::rocksdb_options::ReadOptions; | ||||
| use serde::forward_to_deserialize_any; | ||||
| use serde::de::value::MapDeserializer; | ||||
| use serde::de::{self, Visitor, IntoDeserializer}; | ||||
|  | ||||
| use crate::database::document_key::{DocumentKey, DocumentKeyAttr}; | ||||
| use crate::database::schema::Schema; | ||||
| use meilidb_core::DocumentId; | ||||
|  | ||||
| pub struct Deserializer<'a, D> | ||||
| where D: Deref<Target=DB> | ||||
| { | ||||
|     snapshot: &'a Snapshot<D>, | ||||
|     schema: &'a Schema, | ||||
|     document_id: DocumentId, | ||||
| } | ||||
|  | ||||
| impl<'a, D> Deserializer<'a, D> | ||||
| where D: Deref<Target=DB> | ||||
| { | ||||
|     pub fn new(snapshot: &'a Snapshot<D>, schema: &'a Schema, doc: DocumentId) -> Self { | ||||
|         Deserializer { snapshot, schema, document_id: doc } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'de, 'a, 'b, D> de::Deserializer<'de> for &'b mut Deserializer<'a, D> | ||||
| where D: Deref<Target=DB> | ||||
| { | ||||
|     type Error = DeserializerError; | ||||
|  | ||||
|     fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error> | ||||
|     where V: Visitor<'de> | ||||
|     { | ||||
|         self.deserialize_map(visitor) | ||||
|     } | ||||
|  | ||||
|     forward_to_deserialize_any! { | ||||
|         bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string unit seq | ||||
|         bytes byte_buf unit_struct tuple_struct | ||||
|         identifier tuple ignored_any option newtype_struct enum struct | ||||
|     } | ||||
|  | ||||
|     fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error> | ||||
|     where V: Visitor<'de> | ||||
|     { | ||||
|         let mut options = ReadOptions::new(); | ||||
|         let lower = DocumentKey::new(self.document_id); | ||||
|         let upper = lower.with_attribute_max(); | ||||
|         options.set_iterate_lower_bound(lower.as_ref()); | ||||
|         options.set_iterate_upper_bound(upper.as_ref()); | ||||
|  | ||||
|         let mut iter = self.snapshot.iter_opt(options); | ||||
|         iter.seek(SeekKey::Start); | ||||
|  | ||||
|         if iter.kv().is_none() { | ||||
|             // FIXME return an error | ||||
|         } | ||||
|  | ||||
|         let iter = iter.map(|(key, value)| { | ||||
|             // retrieve the schema attribute name | ||||
|             // from the schema attribute number | ||||
|             let document_key_attr = DocumentKeyAttr::from_bytes(&key); | ||||
|             let schema_attr = document_key_attr.attribute(); | ||||
|             let attribute_name = self.schema.attribute_name(schema_attr); | ||||
|             (attribute_name, Value(value)) | ||||
|         }); | ||||
|  | ||||
|         let map_deserializer = MapDeserializer::new(iter); | ||||
|         visitor.visit_map(map_deserializer) | ||||
|     } | ||||
| } | ||||
|  | ||||
| struct Value(Vec<u8>); | ||||
|  | ||||
| impl<'de> IntoDeserializer<'de, DeserializerError> for Value { | ||||
|     type Deserializer = Self; | ||||
|  | ||||
|     fn into_deserializer(self) -> Self::Deserializer { | ||||
|         self | ||||
|     } | ||||
| } | ||||
|  | ||||
| macro_rules! forward_to_bincode_values { | ||||
|     ($($ty:ident => $de_method:ident,)*) => { | ||||
|         $( | ||||
|             fn $de_method<V>(self, visitor: V) -> Result<V::Value, Self::Error> | ||||
|                 where V: de::Visitor<'de> | ||||
|             { | ||||
|                 match bincode::deserialize::<$ty>(&self.0) { | ||||
|                     Ok(val) => val.into_deserializer().$de_method(visitor), | ||||
|                     Err(e) => Err(de::Error::custom(e)), | ||||
|                 } | ||||
|             } | ||||
|         )* | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'de, 'a> de::Deserializer<'de> for Value { | ||||
|     type Error = DeserializerError; | ||||
|  | ||||
|     fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error> | ||||
|     where V: Visitor<'de> | ||||
|     { | ||||
|         self.0.into_deserializer().deserialize_any(visitor) | ||||
|     } | ||||
|  | ||||
|     fn deserialize_str<V>(self, visitor: V) -> Result<V::Value, Self::Error> | ||||
|     where V: Visitor<'de> | ||||
|     { | ||||
|         self.deserialize_string(visitor) | ||||
|     } | ||||
|  | ||||
|     fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, Self::Error> | ||||
|     where V: Visitor<'de> | ||||
|     { | ||||
|         match bincode::deserialize::<String>(&self.0) { | ||||
|             Ok(val) => val.into_deserializer().deserialize_string(visitor), | ||||
|             Err(e) => Err(de::Error::custom(e)), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value, Self::Error> | ||||
|     where V: Visitor<'de> | ||||
|     { | ||||
|         self.deserialize_byte_buf(visitor) | ||||
|     } | ||||
|  | ||||
|     fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, Self::Error> | ||||
|     where V: Visitor<'de> | ||||
|     { | ||||
|         match bincode::deserialize::<Vec<u8>>(&self.0) { | ||||
|             Ok(val) => val.into_deserializer().deserialize_byte_buf(visitor), | ||||
|             Err(e) => Err(de::Error::custom(e)), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     forward_to_bincode_values! { | ||||
|         char => deserialize_char, | ||||
|         bool => deserialize_bool, | ||||
|  | ||||
|         u8  => deserialize_u8, | ||||
|         u16 => deserialize_u16, | ||||
|         u32 => deserialize_u32, | ||||
|         u64 => deserialize_u64, | ||||
|  | ||||
|         i8  => deserialize_i8, | ||||
|         i16 => deserialize_i16, | ||||
|         i32 => deserialize_i32, | ||||
|         i64 => deserialize_i64, | ||||
|  | ||||
|         f32 => deserialize_f32, | ||||
|         f64 => deserialize_f64, | ||||
|     } | ||||
|  | ||||
|     forward_to_deserialize_any! { | ||||
|         unit seq map | ||||
|         unit_struct tuple_struct | ||||
|         identifier tuple ignored_any option newtype_struct enum struct | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug)] | ||||
| pub enum DeserializerError { | ||||
|     Custom(String), | ||||
| } | ||||
|  | ||||
| impl de::Error for DeserializerError { | ||||
|     fn custom<T: fmt::Display>(msg: T) -> Self { | ||||
|         DeserializerError::Custom(msg.to_string()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl fmt::Display for DeserializerError { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
|         match self { | ||||
|             DeserializerError::Custom(s) => f.write_str(&s), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Error for DeserializerError {} | ||||
| @@ -1,243 +0,0 @@ | ||||
| use serde::Serialize; | ||||
| use serde::ser; | ||||
|  | ||||
| use crate::database::serde::key_to_string::KeyToStringSerializer; | ||||
| use crate::database::serde::{SerializerError, calculate_hash}; | ||||
| use meilidb_core::DocumentId; | ||||
|  | ||||
| pub struct FindDocumentIdSerializer<'a> { | ||||
|     pub id_attribute_name: &'a str, | ||||
| } | ||||
|  | ||||
| impl<'a> ser::Serializer for FindDocumentIdSerializer<'a> { | ||||
|     type Ok = DocumentId; | ||||
|     type Error = SerializerError; | ||||
|     type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeMap = FindDocumentIdMapSerializer<'a>; | ||||
|     type SerializeStruct = FindDocumentIdStructSerializer<'a>; | ||||
|     type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>; | ||||
|  | ||||
|     forward_to_unserializable_type! { | ||||
|         bool => serialize_bool, | ||||
|         char => serialize_char, | ||||
|  | ||||
|         i8  => serialize_i8, | ||||
|         i16 => serialize_i16, | ||||
|         i32 => serialize_i32, | ||||
|         i64 => serialize_i64, | ||||
|  | ||||
|         u8  => serialize_u8, | ||||
|         u16 => serialize_u16, | ||||
|         u32 => serialize_u32, | ||||
|         u64 => serialize_u64, | ||||
|  | ||||
|         f32 => serialize_f32, | ||||
|         f64 => serialize_f64, | ||||
|     } | ||||
|  | ||||
|     fn serialize_str(self, _v: &str) -> Result<Self::Ok, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "str" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "&[u8]" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_none(self) -> Result<Self::Ok, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "Option" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "Option" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_unit(self) -> Result<Self::Ok, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "()" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "unit struct" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_unit_variant( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _variant_index: u32, | ||||
|         _variant: &'static str | ||||
|     ) -> Result<Self::Ok, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "unit variant" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_newtype_struct<T: ?Sized>( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         value: &T | ||||
|     ) -> Result<Self::Ok, Self::Error> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         value.serialize(self) | ||||
|     } | ||||
|  | ||||
|     fn serialize_newtype_variant<T: ?Sized>( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _variant_index: u32, | ||||
|         _variant: &'static str, | ||||
|         _value: &T | ||||
|     ) -> Result<Self::Ok, Self::Error> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "newtype variant" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "sequence" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "tuple" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_tuple_struct( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _len: usize | ||||
|     ) -> Result<Self::SerializeTupleStruct, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "tuple struct" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_tuple_variant( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _variant_index: u32, | ||||
|         _variant: &'static str, | ||||
|         _len: usize | ||||
|     ) -> Result<Self::SerializeTupleVariant, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "tuple variant" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> { | ||||
|         Ok(FindDocumentIdMapSerializer { | ||||
|             id_attribute_name: self.id_attribute_name, | ||||
|             document_id: None, | ||||
|             current_key_name: None, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_struct( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _len: usize | ||||
|     ) -> Result<Self::SerializeStruct, Self::Error> | ||||
|     { | ||||
|         Ok(FindDocumentIdStructSerializer { | ||||
|             id_attribute_name: self.id_attribute_name, | ||||
|             document_id: None, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_struct_variant( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _variant_index: u32, | ||||
|         _variant: &'static str, | ||||
|         _len: usize | ||||
|     ) -> Result<Self::SerializeStructVariant, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "struct variant" }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct FindDocumentIdMapSerializer<'a> { | ||||
|     id_attribute_name: &'a str, | ||||
|     document_id: Option<DocumentId>, | ||||
|     current_key_name: Option<String>, | ||||
| } | ||||
|  | ||||
| impl<'a> ser::SerializeMap for FindDocumentIdMapSerializer<'a> { | ||||
|     type Ok = DocumentId; | ||||
|     type Error = SerializerError; | ||||
|  | ||||
|     fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         let key = key.serialize(KeyToStringSerializer)?; | ||||
|         self.current_key_name = Some(key); | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         let key = self.current_key_name.take().unwrap(); | ||||
|         self.serialize_entry(&key, value) | ||||
|     } | ||||
|  | ||||
|     fn serialize_entry<K: ?Sized, V: ?Sized>( | ||||
|         &mut self, | ||||
|         key: &K, | ||||
|         value: &V | ||||
|     ) -> Result<(), Self::Error> | ||||
|     where K: Serialize, V: Serialize, | ||||
|     { | ||||
|         let key = key.serialize(KeyToStringSerializer)?; | ||||
|  | ||||
|         if self.id_attribute_name == key { | ||||
|             // TODO is it possible to have multiple ids? | ||||
|             let id = bincode::serialize(value).unwrap(); | ||||
|             let hash = calculate_hash(&id); | ||||
|             self.document_id = Some(DocumentId(hash)); | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn end(self) -> Result<Self::Ok, Self::Error> { | ||||
|         match self.document_id { | ||||
|             Some(document_id) => Ok(document_id), | ||||
|             None => Err(SerializerError::DocumentIdNotFound) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct FindDocumentIdStructSerializer<'a> { | ||||
|     id_attribute_name: &'a str, | ||||
|     document_id: Option<DocumentId>, | ||||
| } | ||||
|  | ||||
| impl<'a> ser::SerializeStruct for FindDocumentIdStructSerializer<'a> { | ||||
|     type Ok = DocumentId; | ||||
|     type Error = SerializerError; | ||||
|  | ||||
|     fn serialize_field<T: ?Sized>( | ||||
|         &mut self, | ||||
|         key: &'static str, | ||||
|         value: &T | ||||
|     ) -> Result<(), Self::Error> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         if self.id_attribute_name == key { | ||||
|             // TODO can it be possible to have multiple ids? | ||||
|             let id = bincode::serialize(value).unwrap(); | ||||
|             let hash = calculate_hash(&id); | ||||
|             self.document_id = Some(DocumentId(hash)); | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn end(self) -> Result<Self::Ok, Self::Error> { | ||||
|         match self.document_id { | ||||
|             Some(document_id) => Ok(document_id), | ||||
|             None => Err(SerializerError::DocumentIdNotFound) | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -1,190 +0,0 @@ | ||||
| use std::collections::HashSet; | ||||
|  | ||||
| use serde::Serialize; | ||||
| use serde::ser; | ||||
| use meilidb_core::{DocumentId, DocIndex}; | ||||
| use meilidb_tokenizer::{Tokenizer, Token, is_cjk}; | ||||
|  | ||||
| use crate::database::update::DocumentUpdate; | ||||
| use crate::database::serde::SerializerError; | ||||
| use crate::database::schema::SchemaAttr; | ||||
|  | ||||
| pub struct IndexerSerializer<'a, 'b> { | ||||
|     pub update: &'a mut DocumentUpdate<'b>, | ||||
|     pub document_id: DocumentId, | ||||
|     pub attribute: SchemaAttr, | ||||
|     pub stop_words: &'a HashSet<String>, | ||||
| } | ||||
|  | ||||
| impl<'a, 'b> ser::Serializer for IndexerSerializer<'a, 'b> { | ||||
|     type Ok = (); | ||||
|     type Error = SerializerError; | ||||
|     type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeMap = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeStruct = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>; | ||||
|  | ||||
|     forward_to_unserializable_type! { | ||||
|         bool => serialize_bool, | ||||
|         char => serialize_char, | ||||
|  | ||||
|         i8  => serialize_i8, | ||||
|         i16 => serialize_i16, | ||||
|         i32 => serialize_i32, | ||||
|         i64 => serialize_i64, | ||||
|  | ||||
|         u8  => serialize_u8, | ||||
|         u16 => serialize_u16, | ||||
|         u32 => serialize_u32, | ||||
|         u64 => serialize_u64, | ||||
|  | ||||
|         f32 => serialize_f32, | ||||
|         f64 => serialize_f64, | ||||
|     } | ||||
|  | ||||
|     fn serialize_str(self, v: &str) -> Result<Self::Ok, Self::Error> { | ||||
|         for token in Tokenizer::new(v) { | ||||
|             let Token { word, word_index, char_index } = token; | ||||
|             let document_id = self.document_id; | ||||
|  | ||||
|             // FIXME must u32::try_from instead | ||||
|             let attribute = self.attribute.0; | ||||
|             let word_index = word_index as u16; | ||||
|  | ||||
|             // insert the exact representation | ||||
|             let word_lower = word.to_lowercase(); | ||||
|             let length = word.chars().count() as u16; | ||||
|  | ||||
|             if self.stop_words.contains(&word_lower) { continue } | ||||
|  | ||||
|             // and the unidecoded lowercased version | ||||
|             if !word_lower.chars().any(is_cjk) { | ||||
|                 let word_unidecoded = unidecode::unidecode(word).to_lowercase(); | ||||
|                 let word_unidecoded = word_unidecoded.trim(); | ||||
|                 if word_lower != word_unidecoded { | ||||
|                     let char_index = char_index as u16; | ||||
|                     let char_length = length; | ||||
|  | ||||
|                     let doc_index = DocIndex { document_id, attribute, word_index, char_index, char_length }; | ||||
|                     self.update.insert_doc_index(word_unidecoded.as_bytes().to_vec(), doc_index)?; | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             let char_index = char_index as u16; | ||||
|             let char_length = length; | ||||
|  | ||||
|             let doc_index = DocIndex { document_id, attribute, word_index, char_index, char_length }; | ||||
|             self.update.insert_doc_index(word_lower.into_bytes(), doc_index)?; | ||||
|         } | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "&[u8]" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_none(self) -> Result<Self::Ok, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "Option" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "Option" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_unit(self) -> Result<Self::Ok, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "()" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "unit struct" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_unit_variant( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _variant_index: u32, | ||||
|         _variant: &'static str | ||||
|     ) -> Result<Self::Ok, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "unit variant" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_newtype_struct<T: ?Sized>( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         value: &T | ||||
|     ) -> Result<Self::Ok, Self::Error> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         value.serialize(self) | ||||
|     } | ||||
|  | ||||
|     fn serialize_newtype_variant<T: ?Sized>( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _variant_index: u32, | ||||
|         _variant: &'static str, | ||||
|         _value: &T | ||||
|     ) -> Result<Self::Ok, Self::Error> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "newtype variant" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "seq" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "tuple" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_tuple_struct( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _len: usize | ||||
|     ) -> Result<Self::SerializeTupleStruct, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "tuple struct" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_tuple_variant( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _variant_index: u32, | ||||
|         _variant: &'static str, | ||||
|         _len: usize | ||||
|     ) -> Result<Self::SerializeTupleVariant, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "tuple variant" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "map" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_struct( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _len: usize | ||||
|     ) -> Result<Self::SerializeStruct, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "struct" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_struct_variant( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _variant_index: u32, | ||||
|         _variant: &'static str, | ||||
|         _len: usize | ||||
|     ) -> Result<Self::SerializeStructVariant, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "struct variant" }) | ||||
|     } | ||||
| } | ||||
| @@ -1,146 +0,0 @@ | ||||
| use serde::Serialize; | ||||
| use serde::ser; | ||||
|  | ||||
| use crate::database::serde::SerializerError; | ||||
|  | ||||
| pub struct KeyToStringSerializer; | ||||
|  | ||||
| impl ser::Serializer for KeyToStringSerializer { | ||||
|     type Ok = String; | ||||
|     type Error = SerializerError; | ||||
|     type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeMap = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeStruct = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>; | ||||
|  | ||||
|     forward_to_unserializable_type! { | ||||
|         bool => serialize_bool, | ||||
|         char => serialize_char, | ||||
|  | ||||
|         i8  => serialize_i8, | ||||
|         i16 => serialize_i16, | ||||
|         i32 => serialize_i32, | ||||
|         i64 => serialize_i64, | ||||
|  | ||||
|         u8  => serialize_u8, | ||||
|         u16 => serialize_u16, | ||||
|         u32 => serialize_u32, | ||||
|         u64 => serialize_u64, | ||||
|  | ||||
|         f32 => serialize_f32, | ||||
|         f64 => serialize_f64, | ||||
|     } | ||||
|  | ||||
|     fn serialize_str(self, value: &str) -> Result<Self::Ok, Self::Error> { | ||||
|         Ok(value.to_string()) | ||||
|     } | ||||
|  | ||||
|     fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "&[u8]" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_none(self) -> Result<Self::Ok, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "Option" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "Option" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_unit(self) -> Result<Self::Ok, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "()" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "unit struct" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_unit_variant( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _variant_index: u32, | ||||
|         _variant: &'static str | ||||
|     ) -> Result<Self::Ok, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "unit variant" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_newtype_struct<T: ?Sized>( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         value: &T | ||||
|     ) -> Result<Self::Ok, Self::Error> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         value.serialize(self) | ||||
|     } | ||||
|  | ||||
|     fn serialize_newtype_variant<T: ?Sized>( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _variant_index: u32, | ||||
|         _variant: &'static str, | ||||
|         _value: &T | ||||
|     ) -> Result<Self::Ok, Self::Error> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "newtype variant" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "sequence" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "tuple" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_tuple_struct( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _len: usize | ||||
|     ) -> Result<Self::SerializeTupleStruct, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "tuple struct" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_tuple_variant( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _variant_index: u32, | ||||
|         _variant: &'static str, | ||||
|         _len: usize | ||||
|     ) -> Result<Self::SerializeTupleVariant, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "tuple variant" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "map" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_struct( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _len: usize | ||||
|     ) -> Result<Self::SerializeStruct, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "struct" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_struct_variant( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _variant_index: u32, | ||||
|         _variant: &'static str, | ||||
|         _len: usize | ||||
|     ) -> Result<Self::SerializeStructVariant, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "struct variant" }) | ||||
|     } | ||||
| } | ||||
| @@ -1,65 +0,0 @@ | ||||
| use std::collections::hash_map::DefaultHasher; | ||||
| use std::hash::{Hash, Hasher}; | ||||
| use std::error::Error; | ||||
| use std::fmt; | ||||
|  | ||||
| use serde::ser; | ||||
|  | ||||
| macro_rules! forward_to_unserializable_type { | ||||
|     ($($ty:ident => $se_method:ident,)*) => { | ||||
|         $( | ||||
|             fn $se_method(self, _v: $ty) -> Result<Self::Ok, Self::Error> { | ||||
|                 Err(SerializerError::UnserializableType { name: "$ty" }) | ||||
|             } | ||||
|         )* | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub mod find_id; | ||||
| pub mod key_to_string; | ||||
| pub mod value_to_number; | ||||
| pub mod serializer; | ||||
| pub mod indexer_serializer; | ||||
| pub mod deserializer; | ||||
|  | ||||
| pub fn calculate_hash<T: Hash>(t: &T) -> u64 { | ||||
|     let mut s = DefaultHasher::new(); | ||||
|     t.hash(&mut s); | ||||
|     s.finish() | ||||
| } | ||||
|  | ||||
| #[derive(Debug)] | ||||
| pub enum SerializerError { | ||||
|     DocumentIdNotFound, | ||||
|     UnserializableType { name: &'static str }, | ||||
|     Custom(String), | ||||
| } | ||||
|  | ||||
| impl ser::Error for SerializerError { | ||||
|     fn custom<T: fmt::Display>(msg: T) -> Self { | ||||
|         SerializerError::Custom(msg.to_string()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl fmt::Display for SerializerError { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
|         match self { | ||||
|             SerializerError::DocumentIdNotFound => { | ||||
|                 write!(f, "serialized document does not have an id according to the schema") | ||||
|             } | ||||
|             SerializerError::UnserializableType { name } => { | ||||
|                 write!(f, "Only struct and map types are considered valid documents and | ||||
|                            can be serialized, not {} types directly.", name) | ||||
|             }, | ||||
|             SerializerError::Custom(s) => f.write_str(&s), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Error for SerializerError {} | ||||
|  | ||||
| impl From<String> for SerializerError { | ||||
|     fn from(value: String) -> SerializerError { | ||||
|         SerializerError::Custom(value) | ||||
|     } | ||||
| } | ||||
| @@ -1,282 +0,0 @@ | ||||
| use std::collections::HashSet; | ||||
|  | ||||
| use serde::Serialize; | ||||
| use serde::ser; | ||||
|  | ||||
| use crate::database::serde::indexer_serializer::IndexerSerializer; | ||||
| use crate::database::serde::key_to_string::KeyToStringSerializer; | ||||
| use crate::database::serde::value_to_number::ValueToNumberSerializer; | ||||
| use crate::database::update::DocumentUpdate; | ||||
| use crate::database::serde::SerializerError; | ||||
| use crate::database::schema::Schema; | ||||
| use meilidb_core::DocumentId; | ||||
|  | ||||
| pub struct Serializer<'a, 'b> { | ||||
|     pub schema: &'a Schema, | ||||
|     pub update: &'a mut DocumentUpdate<'b>, | ||||
|     pub document_id: DocumentId, | ||||
|     pub stop_words: &'a HashSet<String>, | ||||
| } | ||||
|  | ||||
| impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> { | ||||
|     type Ok = (); | ||||
|     type Error = SerializerError; | ||||
|     type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeMap = MapSerializer<'a, 'b>; | ||||
|     type SerializeStruct = StructSerializer<'a, 'b>; | ||||
|     type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>; | ||||
|  | ||||
|     forward_to_unserializable_type! { | ||||
|         bool => serialize_bool, | ||||
|         char => serialize_char, | ||||
|  | ||||
|         i8  => serialize_i8, | ||||
|         i16 => serialize_i16, | ||||
|         i32 => serialize_i32, | ||||
|         i64 => serialize_i64, | ||||
|  | ||||
|         u8  => serialize_u8, | ||||
|         u16 => serialize_u16, | ||||
|         u32 => serialize_u32, | ||||
|         u64 => serialize_u64, | ||||
|  | ||||
|         f32 => serialize_f32, | ||||
|         f64 => serialize_f64, | ||||
|     } | ||||
|  | ||||
|     fn serialize_str(self, _v: &str) -> Result<Self::Ok, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "str" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "&[u8]" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_none(self) -> Result<Self::Ok, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "Option" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "Option" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_unit(self) -> Result<Self::Ok, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "()" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "unit struct" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_unit_variant( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _variant_index: u32, | ||||
|         _variant: &'static str | ||||
|     ) -> Result<Self::Ok, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "unit variant" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_newtype_struct<T: ?Sized>( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         value: &T | ||||
|     ) -> Result<Self::Ok, Self::Error> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         value.serialize(self) | ||||
|     } | ||||
|  | ||||
|     fn serialize_newtype_variant<T: ?Sized>( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _variant_index: u32, | ||||
|         _variant: &'static str, | ||||
|         _value: &T | ||||
|     ) -> Result<Self::Ok, Self::Error> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "newtype variant" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "sequence" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "tuple" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_tuple_struct( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _len: usize | ||||
|     ) -> Result<Self::SerializeTupleStruct, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "tuple struct" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_tuple_variant( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _variant_index: u32, | ||||
|         _variant: &'static str, | ||||
|         _len: usize | ||||
|     ) -> Result<Self::SerializeTupleVariant, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "tuple variant" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> { | ||||
|         Ok(MapSerializer { | ||||
|             schema: self.schema, | ||||
|             document_id: self.document_id, | ||||
|             update: self.update, | ||||
|             stop_words: self.stop_words, | ||||
|             current_key_name: None, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_struct( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _len: usize | ||||
|     ) -> Result<Self::SerializeStruct, Self::Error> | ||||
|     { | ||||
|         Ok(StructSerializer { | ||||
|             schema: self.schema, | ||||
|             document_id: self.document_id, | ||||
|             update: self.update, | ||||
|             stop_words: self.stop_words, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_struct_variant( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _variant_index: u32, | ||||
|         _variant: &'static str, | ||||
|         _len: usize | ||||
|     ) -> Result<Self::SerializeStructVariant, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "struct variant" }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct MapSerializer<'a, 'b> { | ||||
|     pub schema: &'a Schema, | ||||
|     pub document_id: DocumentId, | ||||
|     pub update: &'a mut DocumentUpdate<'b>, | ||||
|     pub stop_words: &'a HashSet<String>, | ||||
|     pub current_key_name: Option<String>, | ||||
| } | ||||
|  | ||||
| impl<'a, 'b> ser::SerializeMap for MapSerializer<'a, 'b> { | ||||
|     type Ok = (); | ||||
|     type Error = SerializerError; | ||||
|  | ||||
|     fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         let key = key.serialize(KeyToStringSerializer)?; | ||||
|         self.current_key_name = Some(key); | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         let key = self.current_key_name.take().unwrap(); | ||||
|         self.serialize_entry(&key, value) | ||||
|     } | ||||
|  | ||||
|     fn serialize_entry<K: ?Sized, V: ?Sized>( | ||||
|         &mut self, | ||||
|         key: &K, | ||||
|         value: &V, | ||||
|     ) -> Result<(), Self::Error> | ||||
|     where K: Serialize, V: Serialize, | ||||
|     { | ||||
|         let key = key.serialize(KeyToStringSerializer)?; | ||||
|  | ||||
|         if let Some(attr) = self.schema.attribute(key) { | ||||
|             let props = self.schema.props(attr); | ||||
|             if props.is_stored() { | ||||
|                 let value = bincode::serialize(value).unwrap(); | ||||
|                 self.update.insert_attribute_value(attr, &value)?; | ||||
|             } | ||||
|             if props.is_indexed() { | ||||
|                 let serializer = IndexerSerializer { | ||||
|                     update: self.update, | ||||
|                     document_id: self.document_id, | ||||
|                     attribute: attr, | ||||
|                     stop_words: self.stop_words, | ||||
|                 }; | ||||
|                 value.serialize(serializer)?; | ||||
|             } | ||||
|             if props.is_ranked() { | ||||
|                 let number = value.serialize(ValueToNumberSerializer)?; | ||||
|                 self.update.register_ranked_attribute(attr, number)?; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn end(self) -> Result<Self::Ok, Self::Error> { | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct StructSerializer<'a, 'b> { | ||||
|     pub schema: &'a Schema, | ||||
|     pub document_id: DocumentId, | ||||
|     pub update: &'a mut DocumentUpdate<'b>, | ||||
|     pub stop_words: &'a HashSet<String>, | ||||
| } | ||||
|  | ||||
| impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> { | ||||
|     type Ok = (); | ||||
|     type Error = SerializerError; | ||||
|  | ||||
|     fn serialize_field<T: ?Sized>( | ||||
|         &mut self, | ||||
|         key: &'static str, | ||||
|         value: &T | ||||
|     ) -> Result<(), Self::Error> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         if let Some(attr) = self.schema.attribute(key) { | ||||
|             let props = self.schema.props(attr); | ||||
|             if props.is_stored() { | ||||
|                 let value = bincode::serialize(value).unwrap(); | ||||
|                 self.update.insert_attribute_value(attr, &value)?; | ||||
|             } | ||||
|             if props.is_indexed() { | ||||
|                 let serializer = IndexerSerializer { | ||||
|                     update: self.update, | ||||
|                     document_id: self.document_id, | ||||
|                     attribute: attr, | ||||
|                     stop_words: self.stop_words, | ||||
|                 }; | ||||
|                 value.serialize(serializer)?; | ||||
|             } | ||||
|             if props.is_ranked() { | ||||
|                 let integer = value.serialize(ValueToNumberSerializer)?; | ||||
|                 self.update.register_ranked_attribute(attr, integer)?; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn end(self) -> Result<Self::Ok, Self::Error> { | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
| @@ -1,176 +0,0 @@ | ||||
| use std::str::FromStr; | ||||
|  | ||||
| use serde::Serialize; | ||||
| use serde::{ser, ser::Error}; | ||||
|  | ||||
| use crate::database::serde::SerializerError; | ||||
| use crate::database::Number; | ||||
|  | ||||
| pub struct ValueToNumberSerializer; | ||||
|  | ||||
| impl ser::Serializer for ValueToNumberSerializer { | ||||
|     type Ok = Number; | ||||
|     type Error = SerializerError; | ||||
|     type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeMap = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeStruct = ser::Impossible<Self::Ok, Self::Error>; | ||||
|     type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>; | ||||
|  | ||||
|     forward_to_unserializable_type! { | ||||
|         bool => serialize_bool, | ||||
|         char => serialize_char, | ||||
|     } | ||||
|  | ||||
|     fn serialize_i8(self, value: i8) -> Result<Self::Ok, Self::Error> { | ||||
|         Ok(Number::Signed(value as i64)) | ||||
|     } | ||||
|  | ||||
|     fn serialize_i16(self, value: i16) -> Result<Self::Ok, Self::Error> { | ||||
|         Ok(Number::Signed(value as i64)) | ||||
|     } | ||||
|  | ||||
|     fn serialize_i32(self, value: i32) -> Result<Self::Ok, Self::Error> { | ||||
|         Ok(Number::Signed(value as i64)) | ||||
|     } | ||||
|  | ||||
|     fn serialize_i64(self, value: i64) -> Result<Self::Ok, Self::Error> { | ||||
|         Ok(Number::Signed(value as i64)) | ||||
|     } | ||||
|  | ||||
|     fn serialize_u8(self, value: u8) -> Result<Self::Ok, Self::Error> { | ||||
|         Ok(Number::Unsigned(value as u64)) | ||||
|     } | ||||
|  | ||||
|     fn serialize_u16(self, value: u16) -> Result<Self::Ok, Self::Error> { | ||||
|         Ok(Number::Unsigned(value as u64)) | ||||
|     } | ||||
|  | ||||
|     fn serialize_u32(self, value: u32) -> Result<Self::Ok, Self::Error> { | ||||
|         Ok(Number::Unsigned(value as u64)) | ||||
|     } | ||||
|  | ||||
|     fn serialize_u64(self, value: u64) -> Result<Self::Ok, Self::Error> { | ||||
|         Ok(Number::Unsigned(value as u64)) | ||||
|     } | ||||
|  | ||||
|     fn serialize_f32(self, value: f32) -> Result<Self::Ok, Self::Error> { | ||||
|         Ok(Number::Float(value as f64)) | ||||
|     } | ||||
|  | ||||
|     fn serialize_f64(self, value: f64) -> Result<Self::Ok, Self::Error> { | ||||
|         Ok(Number::Float(value)) | ||||
|     } | ||||
|  | ||||
|     fn serialize_str(self, value: &str) -> Result<Self::Ok, Self::Error> { | ||||
|         Number::from_str(value).map_err(SerializerError::custom) | ||||
|     } | ||||
|  | ||||
|     fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "&[u8]" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_none(self) -> Result<Self::Ok, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "Option" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "Option" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_unit(self) -> Result<Self::Ok, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "()" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "unit struct" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_unit_variant( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _variant_index: u32, | ||||
|         _variant: &'static str | ||||
|     ) -> Result<Self::Ok, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "unit variant" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_newtype_struct<T: ?Sized>( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         value: &T | ||||
|     ) -> Result<Self::Ok, Self::Error> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         value.serialize(self) | ||||
|     } | ||||
|  | ||||
|     fn serialize_newtype_variant<T: ?Sized>( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _variant_index: u32, | ||||
|         _variant: &'static str, | ||||
|         _value: &T | ||||
|     ) -> Result<Self::Ok, Self::Error> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "newtype variant" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "sequence" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "tuple" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_tuple_struct( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _len: usize | ||||
|     ) -> Result<Self::SerializeTupleStruct, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "tuple struct" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_tuple_variant( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _variant_index: u32, | ||||
|         _variant: &'static str, | ||||
|         _len: usize | ||||
|     ) -> Result<Self::SerializeTupleVariant, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "tuple variant" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> { | ||||
|         Err(SerializerError::UnserializableType { name: "map" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_struct( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _len: usize | ||||
|     ) -> Result<Self::SerializeStruct, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "struct" }) | ||||
|     } | ||||
|  | ||||
|     fn serialize_struct_variant( | ||||
|         self, | ||||
|         _name: &'static str, | ||||
|         _variant_index: u32, | ||||
|         _variant: &'static str, | ||||
|         _len: usize | ||||
|     ) -> Result<Self::SerializeStructVariant, Self::Error> | ||||
|     { | ||||
|         Err(SerializerError::UnserializableType { name: "struct variant" }) | ||||
|     } | ||||
| } | ||||
| @@ -1,55 +0,0 @@ | ||||
| use std::error::Error; | ||||
|  | ||||
| use byteorder::{ReadBytesExt, WriteBytesExt}; | ||||
| use meilidb_core::shared_data_cursor::{SharedDataCursor, FromSharedDataCursor}; | ||||
| use meilidb_core::write_to_bytes::WriteToBytes; | ||||
| use meilidb_core::data::DocIds; | ||||
|  | ||||
| use crate::database::Index; | ||||
|  | ||||
| pub enum WriteIndexEvent<'a> { | ||||
|     RemovedDocuments(&'a DocIds), | ||||
|     UpdatedDocuments(&'a Index), | ||||
| } | ||||
|  | ||||
| impl<'a> WriteToBytes for WriteIndexEvent<'a> { | ||||
|     fn write_to_bytes(&self, bytes: &mut Vec<u8>) { | ||||
|         match self { | ||||
|             WriteIndexEvent::RemovedDocuments(doc_ids) => { | ||||
|                 let _ = bytes.write_u8(0); | ||||
|                 doc_ids.write_to_bytes(bytes); | ||||
|             }, | ||||
|             WriteIndexEvent::UpdatedDocuments(index) => { | ||||
|                 let _ = bytes.write_u8(1); | ||||
|                 index.write_to_bytes(bytes); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub enum ReadIndexEvent { | ||||
|     RemovedDocuments(DocIds), | ||||
|     UpdatedDocuments(Index), | ||||
| } | ||||
|  | ||||
| impl ReadIndexEvent { | ||||
|     pub fn updated_documents(self) -> Option<Index> { | ||||
|         use ReadIndexEvent::*; | ||||
|         match self { | ||||
|             RemovedDocuments(_) => None, | ||||
|             UpdatedDocuments(index) => Some(index), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl FromSharedDataCursor for ReadIndexEvent { | ||||
|     type Error = Box<Error>; | ||||
|  | ||||
|     fn from_shared_data_cursor(cursor: &mut SharedDataCursor) -> Result<Self, Self::Error> { | ||||
|         match cursor.read_u8()? { | ||||
|             0 => DocIds::from_shared_data_cursor(cursor).map(ReadIndexEvent::RemovedDocuments), | ||||
|             1 => Index::from_shared_data_cursor(cursor).map(ReadIndexEvent::UpdatedDocuments), | ||||
|             _ => unreachable!(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -1,234 +0,0 @@ | ||||
| use std::collections::{HashSet, BTreeMap}; | ||||
| use std::error::Error; | ||||
|  | ||||
| use rocksdb::rocksdb::{Writable, WriteBatch}; | ||||
| use hashbrown::hash_map::HashMap; | ||||
| use sdset::{Set, SetBuf}; | ||||
| use serde::Serialize; | ||||
| use meilidb_core::write_to_bytes::WriteToBytes; | ||||
| use meilidb_core::data::DocIds; | ||||
| use meilidb_core::{IndexBuilder, DocumentId, DocIndex}; | ||||
|  | ||||
| use crate::database::document_key::{DocumentKey, DocumentKeyAttr}; | ||||
| use crate::database::serde::serializer::Serializer; | ||||
| use crate::database::serde::SerializerError; | ||||
| use crate::database::schema::SchemaAttr; | ||||
| use crate::database::schema::Schema; | ||||
| use crate::database::{DATA_INDEX, DATA_RANKED_MAP}; | ||||
| use crate::database::{RankedMap, Number}; | ||||
|  | ||||
| pub use self::index_event::{ReadIndexEvent, WriteIndexEvent}; | ||||
| pub use self::ranked_map_event::{ReadRankedMapEvent, WriteRankedMapEvent}; | ||||
|  | ||||
| mod index_event; | ||||
| mod ranked_map_event; | ||||
|  | ||||
| pub type Token = Vec<u8>; // TODO could be replaced by a SmallVec | ||||
|  | ||||
| pub struct Update { | ||||
|     schema: Schema, | ||||
|     raw_builder: RawUpdateBuilder, | ||||
| } | ||||
|  | ||||
| impl Update { | ||||
|     pub(crate) fn new(schema: Schema) -> Update { | ||||
|         Update { schema, raw_builder: RawUpdateBuilder::new() } | ||||
|     } | ||||
|  | ||||
|     pub fn update_document<T>( | ||||
|         &mut self, | ||||
|         document: T, | ||||
|         stop_words: &HashSet<String>, | ||||
|     ) -> Result<DocumentId, SerializerError> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         let document_id = self.schema.document_id(&document)?; | ||||
|  | ||||
|         let serializer = Serializer { | ||||
|             schema: &self.schema, | ||||
|             document_id: document_id, | ||||
|             update: &mut self.raw_builder.document_update(document_id)?, | ||||
|             stop_words: stop_words, | ||||
|         }; | ||||
|  | ||||
|         document.serialize(serializer)?; | ||||
|  | ||||
|         Ok(document_id) | ||||
|     } | ||||
|  | ||||
|     pub fn remove_document<T>(&mut self, document: T) -> Result<DocumentId, SerializerError> | ||||
|     where T: Serialize, | ||||
|     { | ||||
|         let document_id = self.schema.document_id(&document)?; | ||||
|         self.raw_builder.document_update(document_id)?.remove()?; | ||||
|         Ok(document_id) | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn build(self) -> Result<WriteBatch, Box<Error>> { | ||||
|         self.raw_builder.build() | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Copy, Clone, PartialEq, Eq)] | ||||
| enum UpdateType { | ||||
|     Updated, | ||||
|     Deleted, | ||||
| } | ||||
|  | ||||
| use UpdateType::{Updated, Deleted}; | ||||
|  | ||||
| pub struct RawUpdateBuilder { | ||||
|     documents_update: HashMap<DocumentId, UpdateType>, | ||||
|     documents_ranked_fields: RankedMap, | ||||
|     indexed_words: BTreeMap<Token, Vec<DocIndex>>, | ||||
|     batch: WriteBatch, | ||||
| } | ||||
|  | ||||
| impl RawUpdateBuilder { | ||||
|     pub fn new() -> RawUpdateBuilder { | ||||
|         RawUpdateBuilder { | ||||
|             documents_update: HashMap::new(), | ||||
|             documents_ranked_fields: HashMap::new(), | ||||
|             indexed_words: BTreeMap::new(), | ||||
|             batch: WriteBatch::new(), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn document_update(&mut self, document_id: DocumentId) -> Result<DocumentUpdate, SerializerError> { | ||||
|         use serde::ser::Error; | ||||
|  | ||||
|         match self.documents_update.get(&document_id) { | ||||
|             Some(Deleted) | None => Ok(DocumentUpdate { document_id, inner: self }), | ||||
|             Some(Updated) => Err(SerializerError::custom( | ||||
|                 "This document has already been removed and cannot be updated in the same update" | ||||
|             )), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn build(self) -> Result<WriteBatch, Box<Error>> { | ||||
|         // create the list of all the removed documents | ||||
|         let removed_documents = { | ||||
|             let mut document_ids = Vec::new(); | ||||
|             for (id, update_type) in self.documents_update { | ||||
|                 if update_type == Deleted { | ||||
|                     document_ids.push(id); | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             document_ids.sort_unstable(); | ||||
|             let setbuf = SetBuf::new_unchecked(document_ids); | ||||
|             DocIds::new(&setbuf) | ||||
|         }; | ||||
|  | ||||
|         // create the Index of all the document updates | ||||
|         let index = { | ||||
|             let mut builder = IndexBuilder::new(); | ||||
|             for (key, mut indexes) in self.indexed_words { | ||||
|                 indexes.sort_unstable(); | ||||
|                 let indexes = Set::new_unchecked(&indexes); | ||||
|                 builder.insert(key, indexes).unwrap(); | ||||
|             } | ||||
|             builder.build() | ||||
|         }; | ||||
|  | ||||
|         // WARN: removed documents must absolutely | ||||
|         //       be merged *before* document updates | ||||
|  | ||||
|         // === index === | ||||
|  | ||||
|         if !removed_documents.is_empty() { | ||||
|             // remove the documents using the appropriate IndexEvent | ||||
|             let event_bytes = WriteIndexEvent::RemovedDocuments(&removed_documents).into_bytes(); | ||||
|             self.batch.merge(DATA_INDEX, &event_bytes)?; | ||||
|         } | ||||
|  | ||||
|         // update the documents using the appropriate IndexEvent | ||||
|         let event_bytes = WriteIndexEvent::UpdatedDocuments(&index).into_bytes(); | ||||
|         self.batch.merge(DATA_INDEX, &event_bytes)?; | ||||
|  | ||||
|         // === ranked map === | ||||
|  | ||||
|         if !removed_documents.is_empty() { | ||||
|             // update the ranked map using the appropriate RankedMapEvent | ||||
|             let event_bytes = WriteRankedMapEvent::RemovedDocuments(&removed_documents).into_bytes(); | ||||
|             self.batch.merge(DATA_RANKED_MAP, &event_bytes)?; | ||||
|         } | ||||
|  | ||||
|         // update the documents using the appropriate IndexEvent | ||||
|         let event_bytes = WriteRankedMapEvent::UpdatedDocuments(&self.documents_ranked_fields).into_bytes(); | ||||
|         self.batch.merge(DATA_RANKED_MAP, &event_bytes)?; | ||||
|  | ||||
|         Ok(self.batch) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct DocumentUpdate<'a> { | ||||
|     document_id: DocumentId, | ||||
|     inner: &'a mut RawUpdateBuilder, | ||||
| } | ||||
|  | ||||
| impl<'a> DocumentUpdate<'a> { | ||||
|     pub fn remove(&mut self) -> Result<(), SerializerError> { | ||||
|         use serde::ser::Error; | ||||
|  | ||||
|         if let Updated = self.inner.documents_update.entry(self.document_id).or_insert(Deleted) { | ||||
|             return Err(SerializerError::custom( | ||||
|                 "This document has already been updated and cannot be removed in the same update" | ||||
|             )); | ||||
|         } | ||||
|  | ||||
|         let start = DocumentKey::new(self.document_id).with_attribute_min(); | ||||
|         let end = DocumentKey::new(self.document_id).with_attribute_max(); // FIXME max + 1 | ||||
|         self.inner.batch.delete_range(start.as_ref(), end.as_ref())?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn insert_attribute_value(&mut self, attr: SchemaAttr, value: &[u8]) -> Result<(), SerializerError> { | ||||
|         use serde::ser::Error; | ||||
|  | ||||
|         if let Deleted = self.inner.documents_update.entry(self.document_id).or_insert(Updated) { | ||||
|             return Err(SerializerError::custom( | ||||
|                 "This document has already been deleted and cannot be updated in the same update" | ||||
|             )); | ||||
|         } | ||||
|  | ||||
|         let key = DocumentKeyAttr::new(self.document_id, attr); | ||||
|         self.inner.batch.put(key.as_ref(), &value)?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn insert_doc_index(&mut self, token: Token, doc_index: DocIndex) -> Result<(), SerializerError> { | ||||
|         use serde::ser::Error; | ||||
|  | ||||
|         if let Deleted = self.inner.documents_update.entry(self.document_id).or_insert(Updated) { | ||||
|             return Err(SerializerError::custom( | ||||
|                 "This document has already been deleted and cannot be updated in the same update" | ||||
|             )); | ||||
|         } | ||||
|  | ||||
|         self.inner.indexed_words.entry(token).or_insert_with(Vec::new).push(doc_index); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn register_ranked_attribute( | ||||
|         &mut self, | ||||
|         attr: SchemaAttr, | ||||
|         number: Number, | ||||
|     ) -> Result<(), SerializerError> | ||||
|     { | ||||
|         use serde::ser::Error; | ||||
|  | ||||
|         if let Deleted = self.inner.documents_update.entry(self.document_id).or_insert(Updated) { | ||||
|             return Err(SerializerError::custom( | ||||
|                 "This document has already been deleted, ranked attributes cannot be added in the same update" | ||||
|             )); | ||||
|         } | ||||
|  | ||||
|         self.inner.documents_ranked_fields.insert((self.document_id, attr), number); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
| @@ -1,58 +0,0 @@ | ||||
| use std::error::Error; | ||||
|  | ||||
| use byteorder::{ReadBytesExt, WriteBytesExt}; | ||||
| use meilidb_core::shared_data_cursor::{SharedDataCursor, FromSharedDataCursor}; | ||||
| use meilidb_core::write_to_bytes::WriteToBytes; | ||||
| use meilidb_core::data::DocIds; | ||||
|  | ||||
| use crate::database::RankedMap; | ||||
|  | ||||
| pub enum WriteRankedMapEvent<'a> { | ||||
|     RemovedDocuments(&'a DocIds), | ||||
|     UpdatedDocuments(&'a RankedMap), | ||||
| } | ||||
|  | ||||
| impl<'a> WriteToBytes for WriteRankedMapEvent<'a> { | ||||
|     fn write_to_bytes(&self, bytes: &mut Vec<u8>) { | ||||
|         match self { | ||||
|             WriteRankedMapEvent::RemovedDocuments(doc_ids) => { | ||||
|                 let _ = bytes.write_u8(0); | ||||
|                 doc_ids.write_to_bytes(bytes); | ||||
|             }, | ||||
|             WriteRankedMapEvent::UpdatedDocuments(ranked_map) => { | ||||
|                 let _ = bytes.write_u8(1); | ||||
|                 bincode::serialize_into(bytes, ranked_map).unwrap() | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub enum ReadRankedMapEvent { | ||||
|     RemovedDocuments(DocIds), | ||||
|     UpdatedDocuments(RankedMap), | ||||
| } | ||||
|  | ||||
| impl ReadRankedMapEvent { | ||||
|     pub fn updated_documents(self) -> Option<RankedMap> { | ||||
|         use ReadRankedMapEvent::*; | ||||
|         match self { | ||||
|             RemovedDocuments(_) => None, | ||||
|             UpdatedDocuments(ranked_map) => Some(ranked_map), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl FromSharedDataCursor for ReadRankedMapEvent { | ||||
|     type Error = Box<Error>; | ||||
|  | ||||
|     fn from_shared_data_cursor(cursor: &mut SharedDataCursor) -> Result<Self, Self::Error> { | ||||
|         match cursor.read_u8()? { | ||||
|             0 => DocIds::from_shared_data_cursor(cursor).map(ReadRankedMapEvent::RemovedDocuments), | ||||
|             1 => { | ||||
|                 let ranked_map = bincode::deserialize_from(cursor)?; | ||||
|                 Ok(ReadRankedMapEvent::UpdatedDocuments(ranked_map)) | ||||
|             }, | ||||
|             _ => unreachable!(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -1,199 +0,0 @@ | ||||
| use std::error::Error; | ||||
| use std::path::Path; | ||||
| use std::ops::Deref; | ||||
| use std::{fmt, marker}; | ||||
|  | ||||
| use rocksdb::rocksdb_options::{ReadOptions, EnvOptions, ColumnFamilyOptions}; | ||||
| use rocksdb::rocksdb::{DB, DBVector, Snapshot, SeekKey, SstFileWriter}; | ||||
| use serde::de::DeserializeOwned; | ||||
| use meilidb_core::{Index, QueryBuilder, DocumentId}; | ||||
|  | ||||
| use crate::database::{retrieve_data_schema, retrieve_data_index, retrieve_data_ranked_map, retrieve_config}; | ||||
| use crate::database::serde::deserializer::Deserializer; | ||||
| use crate::database::{DocumentKey, DocumentKeyAttr}; | ||||
| use crate::database::schema::Schema; | ||||
| use crate::database::RankedMap; | ||||
| use crate::database::Config; | ||||
|  | ||||
| pub struct DatabaseView<D> | ||||
| where D: Deref<Target=DB> | ||||
| { | ||||
|     snapshot: Snapshot<D>, | ||||
|     index: Index, | ||||
|     ranked_map: RankedMap, | ||||
|     schema: Schema, | ||||
|     config: Config, | ||||
| } | ||||
|  | ||||
| impl<D> DatabaseView<D> | ||||
| where D: Deref<Target=DB> | ||||
| { | ||||
|     pub fn new(snapshot: Snapshot<D>) -> Result<DatabaseView<D>, Box<Error>> { | ||||
|         let schema = retrieve_data_schema(&snapshot)?; | ||||
|         let index = retrieve_data_index(&snapshot)?; | ||||
|         let ranked_map = retrieve_data_ranked_map(&snapshot)?; | ||||
|         let config = retrieve_config(&snapshot)?; | ||||
|         Ok(DatabaseView { snapshot, index, ranked_map, schema, config }) | ||||
|     } | ||||
|  | ||||
|     pub fn schema(&self) -> &Schema { | ||||
|         &self.schema | ||||
|     } | ||||
|  | ||||
|     pub fn index(&self) -> &Index { | ||||
|         &self.index | ||||
|     } | ||||
|  | ||||
|     pub fn ranked_map(&self) -> &RankedMap { | ||||
|         &self.ranked_map | ||||
|     } | ||||
|  | ||||
|     pub fn into_snapshot(self) -> Snapshot<D> { | ||||
|         self.snapshot | ||||
|     } | ||||
|  | ||||
|     pub fn snapshot(&self) -> &Snapshot<D> { | ||||
|         &self.snapshot | ||||
|     } | ||||
|  | ||||
|     pub fn config(&self) -> &Config { | ||||
|         &self.config | ||||
|     } | ||||
|  | ||||
|     pub fn get(&self, key: &[u8]) -> Result<Option<DBVector>, Box<Error>> { | ||||
|         Ok(self.snapshot.get(key)?) | ||||
|     } | ||||
|  | ||||
|     pub fn dump_all<P: AsRef<Path>>(&self, path: P) -> Result<(), Box<Error>> { | ||||
|         let path = path.as_ref().to_string_lossy(); | ||||
|  | ||||
|         let env_options = EnvOptions::new(); | ||||
|         let column_family_options = ColumnFamilyOptions::new(); | ||||
|         let mut file_writer = SstFileWriter::new(env_options, column_family_options); | ||||
|         file_writer.open(&path)?; | ||||
|  | ||||
|         let mut iter = self.snapshot.iter(); | ||||
|         iter.seek(SeekKey::Start); | ||||
|  | ||||
|         for (key, value) in &mut iter { | ||||
|             file_writer.put(&key, &value)?; | ||||
|         } | ||||
|  | ||||
|         file_writer.finish()?; | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn query_builder(&self) -> QueryBuilder { | ||||
|         QueryBuilder::new(self.index()) | ||||
|     } | ||||
|  | ||||
|     pub fn raw_field_by_document_id( | ||||
|         &self, | ||||
|         name: &str, | ||||
|         id: DocumentId | ||||
|     ) -> Result<Option<Vec<u8>>, Box<Error>> | ||||
|     { | ||||
|         let attr = self.schema.attribute(name).ok_or("field not found")?; | ||||
|         let key = DocumentKeyAttr::new(id, attr); | ||||
|         let vector = self.snapshot.get(key.as_ref())?; | ||||
|  | ||||
|         Ok(vector.map(|v| v.to_vec())) | ||||
|     } | ||||
|  | ||||
|     pub fn document_by_id<T>(&self, id: DocumentId) -> Result<T, Box<Error>> | ||||
|     where T: DeserializeOwned, | ||||
|     { | ||||
|         let mut deserializer = Deserializer::new(&self.snapshot, &self.schema, id); | ||||
|         Ok(T::deserialize(&mut deserializer)?) | ||||
|     } | ||||
|  | ||||
|     pub fn documents_by_id<T, I>(&self, ids: I) -> DocumentIter<D, T, I::IntoIter> | ||||
|     where T: DeserializeOwned, | ||||
|           I: IntoIterator<Item=DocumentId>, | ||||
|     { | ||||
|         DocumentIter { | ||||
|             database_view: self, | ||||
|             document_ids: ids.into_iter(), | ||||
|             _phantom: marker::PhantomData, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<D> fmt::Debug for DatabaseView<D> | ||||
| where D: Deref<Target=DB> | ||||
| { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
|         let mut options = ReadOptions::new(); | ||||
|         let lower = DocumentKey::new(DocumentId(0)); | ||||
|         options.set_iterate_lower_bound(lower.as_ref()); | ||||
|  | ||||
|         let mut iter = self.snapshot.iter_opt(options); | ||||
|         iter.seek(SeekKey::Start); | ||||
|         let iter = iter.map(|(key, _)| DocumentKeyAttr::from_bytes(&key)); | ||||
|  | ||||
|         if f.alternate() { | ||||
|             writeln!(f, "DatabaseView(")?; | ||||
|         } else { | ||||
|             write!(f, "DatabaseView(")?; | ||||
|         } | ||||
|  | ||||
|         self.schema.fmt(f)?; | ||||
|  | ||||
|         if f.alternate() { | ||||
|             writeln!(f, ",")?; | ||||
|         } else { | ||||
|             write!(f, ", ")?; | ||||
|         } | ||||
|  | ||||
|         f.debug_list().entries(iter).finish()?; | ||||
|  | ||||
|         write!(f, ")") | ||||
|     } | ||||
| } | ||||
|  | ||||
| // TODO this is just an iter::Map !!! | ||||
| pub struct DocumentIter<'a, D, T, I> | ||||
| where D: Deref<Target=DB> | ||||
| { | ||||
|     database_view: &'a DatabaseView<D>, | ||||
|     document_ids: I, | ||||
|     _phantom: marker::PhantomData<T>, | ||||
| } | ||||
|  | ||||
| impl<'a, D, T, I> Iterator for DocumentIter<'a, D, T, I> | ||||
| where D: Deref<Target=DB>, | ||||
|       T: DeserializeOwned, | ||||
|       I: Iterator<Item=DocumentId>, | ||||
| { | ||||
|     type Item = Result<T, Box<Error>>; | ||||
|  | ||||
|     fn size_hint(&self) -> (usize, Option<usize>) { | ||||
|         self.document_ids.size_hint() | ||||
|     } | ||||
|  | ||||
|     fn next(&mut self) -> Option<Self::Item> { | ||||
|         match self.document_ids.next() { | ||||
|             Some(id) => Some(self.database_view.document_by_id(id)), | ||||
|             None => None | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a, D, T, I> ExactSizeIterator for DocumentIter<'a, D, T, I> | ||||
| where D: Deref<Target=DB>, | ||||
|       T: DeserializeOwned, | ||||
|       I: ExactSizeIterator + Iterator<Item=DocumentId>, | ||||
| { } | ||||
|  | ||||
| impl<'a, D, T, I> DoubleEndedIterator for DocumentIter<'a, D, T, I> | ||||
| where D: Deref<Target=DB>, | ||||
|       T: DeserializeOwned, | ||||
|       I: DoubleEndedIterator + Iterator<Item=DocumentId>, | ||||
| { | ||||
|     fn next_back(&mut self) -> Option<Self::Item> { | ||||
|         match self.document_ids.next_back() { | ||||
|             Some(id) => Some(self.database_view.document_by_id(id)), | ||||
|             None => None | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -1,10 +1,7 @@ | ||||
| #![cfg_attr(feature = "nightly", feature(test))] | ||||
|  | ||||
| pub mod database; | ||||
| mod common_words; | ||||
| mod sort_by_attr; | ||||
|  | ||||
| pub use rocksdb; | ||||
|  | ||||
| pub use self::sort_by_attr::SortByAttr; | ||||
| pub use self::common_words::CommonWords; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user