mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	feat: Introduce a working key-value based database
This commit is contained in:
		
							
								
								
									
										103
									
								
								src/blob/mod.rs
									
									
									
									
									
								
							
							
						
						
									
										103
									
								
								src/blob/mod.rs
									
									
									
									
									
								
							| @@ -9,7 +9,13 @@ pub use self::positive_blob::{PositiveBlob, PositiveBlobBuilder}; | ||||
| pub use self::negative_blob::{NegativeBlob, NegativeBlobBuilder}; | ||||
|  | ||||
| use std::error::Error; | ||||
| use std::io::{Write, Read}; | ||||
| use std::{io, fmt, mem}; | ||||
|  | ||||
| use fst::Map; | ||||
| use uuid::Uuid; | ||||
| use rocksdb::rocksdb::{DB, Snapshot}; | ||||
|  | ||||
| use crate::data::DocIndexes; | ||||
|  | ||||
| pub enum Blob { | ||||
| @@ -26,14 +32,14 @@ impl Blob { | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] | ||||
| #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] | ||||
| pub enum Sign { | ||||
|     Positive, | ||||
|     Negative, | ||||
| } | ||||
|  | ||||
| impl Sign { | ||||
|     pub fn alternate(self) -> Sign { | ||||
|     pub fn invert(self) -> Sign { | ||||
|         match self { | ||||
|             Sign::Positive => Sign::Negative, | ||||
|             Sign::Negative => Sign::Positive, | ||||
| @@ -41,6 +47,95 @@ impl Sign { | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub fn ordered_blobs_from_slice(slice: &[u8]) -> Result<Vec<Blob>, Box<Error>> { | ||||
|     unimplemented!() | ||||
| #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] | ||||
| pub struct BlobName(Uuid); | ||||
|  | ||||
| impl BlobName { | ||||
|     pub fn new() -> BlobName { | ||||
|         BlobName(Uuid::new_v4()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl fmt::Display for BlobName { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
|         f.debug_tuple("BlobName") | ||||
|             .field(&self.0.to_hyphenated().to_string()) | ||||
|             .finish() | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] | ||||
| pub struct BlobInfo { | ||||
|     pub sign: Sign, | ||||
|     pub name: BlobName, | ||||
| } | ||||
|  | ||||
| impl BlobInfo { | ||||
|     pub fn new_positive() -> BlobInfo { | ||||
|         BlobInfo { | ||||
|             sign: Sign::Positive, | ||||
|             name: BlobName::new(), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn new_negative() -> BlobInfo { | ||||
|         BlobInfo { | ||||
|             sign: Sign::Negative, | ||||
|             name: BlobName::new(), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn read_from<R: Read>(reader: R) -> bincode::Result<BlobInfo> { | ||||
|         bincode::deserialize_from(reader) | ||||
|     } | ||||
|  | ||||
|     pub fn read_from_slice(slice: &[u8]) -> bincode::Result<Vec<BlobInfo>> { | ||||
|         let len = slice.len() / mem::size_of::<BlobInfo>(); | ||||
|         let mut blob_infos = Vec::with_capacity(len); | ||||
|  | ||||
|         let mut cursor = io::Cursor::new(slice); | ||||
|         while blob_infos.len() != len { | ||||
|             let blob_info = BlobInfo::read_from(&mut cursor)?; | ||||
|             blob_infos.push(blob_info); | ||||
|         } | ||||
|  | ||||
|         Ok(blob_infos) | ||||
|     } | ||||
|  | ||||
|     pub fn write_into<W: Write>(&self, writer: W) -> bincode::Result<()> { | ||||
|         bincode::serialize_into(writer, self) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub fn blobs_from_blob_infos(infos: &[BlobInfo], snapshot: &Snapshot<&DB>) -> Result<Vec<Blob>, Box<Error>> { | ||||
|     let mut blobs = Vec::with_capacity(infos.len()); | ||||
|  | ||||
|     for info in infos { | ||||
|         let blob = match info.sign { | ||||
|             Sign::Positive => { | ||||
|                 let key_map = format!("blob-{}-fst", info.name); | ||||
|                 let map = match snapshot.get(key_map.as_bytes())? { | ||||
|                     Some(value) => value.to_vec(), | ||||
|                     None => return Err(format!("No fst entry found for blob {}", info.name).into()), | ||||
|                 }; | ||||
|                 let key_doc_idx = format!("blob-{}-doc-idx", info.name); | ||||
|                 let doc_idx = match snapshot.get(key_doc_idx.as_bytes())? { | ||||
|                     Some(value) => value.to_vec(), | ||||
|                     None => return Err(format!("No doc-idx entry found for blob {}", info.name).into()), | ||||
|                 }; | ||||
|                 PositiveBlob::from_bytes(map, doc_idx).map(Blob::Positive)? | ||||
|             }, | ||||
|             Sign::Negative => { | ||||
|                 let key_doc_ids = format!("blob-{}-doc-ids", info.name); | ||||
|                 let doc_ids = match snapshot.get(key_doc_ids.as_bytes())? { | ||||
|                     Some(value) => value.to_vec(), | ||||
|                     None => return Err(format!("No doc-ids entry found for blob {}", info.name).into()), | ||||
|                 }; | ||||
|                 NegativeBlob::from_bytes(doc_ids).map(Blob::Negative)? | ||||
|             }, | ||||
|         }; | ||||
|         blobs.push(blob); | ||||
|     } | ||||
|  | ||||
|     Ok(blobs) | ||||
| } | ||||
|   | ||||
| @@ -1,16 +0,0 @@ | ||||
| use std::fmt; | ||||
|  | ||||
| #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||||
| pub struct BlobName; | ||||
|  | ||||
| impl BlobName { | ||||
|     pub fn new() -> BlobName { | ||||
|         unimplemented!() | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl fmt::Display for BlobName { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
|         unimplemented!() | ||||
|     } | ||||
| } | ||||
| @@ -1,4 +1,3 @@ | ||||
| pub mod blob_name; | ||||
| pub mod schema; | ||||
| pub mod update; | ||||
|  | ||||
| @@ -21,8 +20,7 @@ use crate::data::DocIdsBuilder; | ||||
| use crate::{DocIndex, DocumentId}; | ||||
| use crate::index::schema::Schema; | ||||
| use crate::index::update::Update; | ||||
| use crate::blob::{PositiveBlobBuilder, Blob, Sign}; | ||||
| use crate::blob::ordered_blobs_from_slice; | ||||
| use crate::blob::{PositiveBlobBuilder, BlobInfo, Sign, Blob, blobs_from_blob_infos}; | ||||
| use crate::tokenizer::{TokenizerBuilder, DefaultBuilder, Tokenizer}; | ||||
| use crate::rank::{criterion, Config, RankedStream}; | ||||
| use crate::automaton; | ||||
| @@ -112,12 +110,14 @@ impl Index { | ||||
|     } | ||||
|  | ||||
|     pub fn search(&self, query: &str) -> Result<Vec<Document>, Box<Error>> { | ||||
|         // this snapshot will allow consistent operations on documents | ||||
|         // this snapshot will allow consistent reads for the whole search operation | ||||
|         let snapshot = self.database.snapshot(); | ||||
|  | ||||
|         // FIXME create a SNAPSHOT for the search ! | ||||
|         let blobs = match snapshot.get(DATA_BLOBS_ORDER.as_bytes())? { | ||||
|             Some(value) => ordered_blobs_from_slice(&value)?, | ||||
|             Some(value) => { | ||||
|                 let blob_infos = BlobInfo::read_from_slice(&value)?; | ||||
|                 blobs_from_blob_infos(&blob_infos, &snapshot)? | ||||
|             }, | ||||
|             None => Vec::new(), | ||||
|         }; | ||||
|  | ||||
| @@ -143,7 +143,7 @@ mod tests { | ||||
|     use tempfile::NamedTempFile; | ||||
|  | ||||
|     use super::*; | ||||
|     use crate::index::schema::Schema; | ||||
|     use crate::index::schema::{Schema, SchemaBuilder, STORED, INDEXED}; | ||||
|     use crate::index::update::{PositiveUpdateBuilder, NegativeUpdateBuilder}; | ||||
|  | ||||
|     #[test] | ||||
| @@ -151,7 +151,8 @@ mod tests { | ||||
|         let path = NamedTempFile::new()?.into_temp_path(); | ||||
|         let mut builder = NegativeUpdateBuilder::new(&path); | ||||
|  | ||||
|         // you can insert documents in any order, it is sorted internally | ||||
|         // you can insert documents in any order, | ||||
|         // it is sorted internally | ||||
|         builder.remove(1); | ||||
|         builder.remove(5); | ||||
|         builder.remove(2); | ||||
| @@ -165,19 +166,26 @@ mod tests { | ||||
|  | ||||
|     #[test] | ||||
|     fn generate_positive_update() -> Result<(), Box<Error>> { | ||||
|         let title; | ||||
|         let description; | ||||
|         let schema = { | ||||
|             let mut builder = SchemaBuilder::new(); | ||||
|             title =       builder.new_attribute("title",       STORED | INDEXED); | ||||
|             description = builder.new_attribute("description", STORED | INDEXED); | ||||
|             builder.build() | ||||
|         }; | ||||
|  | ||||
|         let schema = Schema::open("/meili/default.sch")?; | ||||
|         let sst_path = NamedTempFile::new()?.into_temp_path(); | ||||
|         let tokenizer_builder = DefaultBuilder::new(); | ||||
|         let mut builder = PositiveUpdateBuilder::new("update-positive-0001.sst", schema.clone(), tokenizer_builder); | ||||
|         let mut builder = PositiveUpdateBuilder::new(&sst_path, schema.clone(), tokenizer_builder); | ||||
|  | ||||
|         // you can insert documents in any order, it is sorted internally | ||||
|         let title_field = schema.attribute("title").unwrap(); | ||||
|         builder.update_field(1, title_field, "hallo!".to_owned()); | ||||
|         builder.update_field(5, title_field, "hello!".to_owned()); | ||||
|         builder.update_field(2, title_field, "hi!".to_owned()); | ||||
|         // you can insert documents in any order, | ||||
|         // it is sorted internally | ||||
|         builder.update_field(1, title, "hallo!".to_owned()); | ||||
|         builder.update_field(5, title, "hello!".to_owned()); | ||||
|         builder.update_field(2, title, "hi!".to_owned()); | ||||
|  | ||||
|         let name_field = schema.attribute("name").unwrap(); | ||||
|         builder.remove_field(4, name_field); | ||||
|         builder.remove_field(4, description); | ||||
|  | ||||
|         let update = builder.build()?; | ||||
|  | ||||
|   | ||||
| @@ -46,9 +46,11 @@ impl SchemaBuilder { | ||||
|         SchemaBuilder { attrs: LinkedHashMap::new() } | ||||
|     } | ||||
|  | ||||
|     pub fn new_field<S: Into<String>>(&mut self, name: S, props: SchemaProps) -> SchemaAttr { | ||||
|     pub fn new_attribute<S: Into<String>>(&mut self, name: S, props: SchemaProps) -> SchemaAttr { | ||||
|         let len = self.attrs.len(); | ||||
|         self.attrs.insert(name.into(), props); | ||||
|         if self.attrs.insert(name.into(), props).is_some() { | ||||
|             panic!("Field already inserted.") | ||||
|         } | ||||
|         SchemaAttr(len as u32) | ||||
|     } | ||||
|  | ||||
| @@ -119,7 +121,7 @@ impl SchemaAttr { | ||||
|  | ||||
| impl fmt::Display for SchemaAttr { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
|         write!(f, "{}", self.0) | ||||
|         self.0.fmt(f) | ||||
|     } | ||||
| } | ||||
|  | ||||
| @@ -130,9 +132,9 @@ mod tests { | ||||
|     #[test] | ||||
|     fn serialize_deserialize() -> bincode::Result<()> { | ||||
|         let mut builder = SchemaBuilder::new(); | ||||
|         builder.new_field("alphabet", STORED); | ||||
|         builder.new_field("beta", STORED | INDEXED); | ||||
|         builder.new_field("gamma", INDEXED); | ||||
|         builder.new_attribute("alphabet", STORED); | ||||
|         builder.new_attribute("beta", STORED | INDEXED); | ||||
|         builder.new_attribute("gamma", INDEXED); | ||||
|         let schema = builder.build(); | ||||
|  | ||||
|         let mut buffer = Vec::new(); | ||||
|   | ||||
| @@ -3,8 +3,7 @@ use std::error::Error; | ||||
|  | ||||
| use ::rocksdb::rocksdb_options; | ||||
|  | ||||
| use crate::index::blob_name::BlobName; | ||||
| use crate::blob::Sign; | ||||
| use crate::blob::{BlobName, Sign}; | ||||
|  | ||||
| mod negative_update; | ||||
| mod positive_update; | ||||
| @@ -18,17 +17,7 @@ pub struct Update { | ||||
|  | ||||
| impl Update { | ||||
|     pub fn open<P: Into<PathBuf>>(path: P) -> Result<Update, Box<Error>> { | ||||
|         let path = path.into(); | ||||
|  | ||||
|         let env_options = rocksdb_options::EnvOptions::new(); | ||||
|         let column_family_options = rocksdb_options::ColumnFamilyOptions::new(); | ||||
|         let mut file_writer = rocksdb::SstFileWriter::new(env_options, column_family_options); | ||||
|         file_writer.open(&path.to_string_lossy())?; | ||||
|         let infos = file_writer.finish()?; | ||||
|  | ||||
|         // FIXME check if the update contains a blobs-order entry | ||||
|  | ||||
|         Ok(Update { path }) | ||||
|         Ok(Update { path: path.into() }) | ||||
|     } | ||||
|  | ||||
|     pub fn into_path_buf(self) -> PathBuf { | ||||
|   | ||||
| @@ -3,9 +3,9 @@ use std::error::Error; | ||||
|  | ||||
| use ::rocksdb::rocksdb_options; | ||||
|  | ||||
| use crate::blob::BlobInfo; | ||||
| use crate::index::DATA_BLOBS_ORDER; | ||||
| use crate::index::update::Update; | ||||
| use crate::index::blob_name::BlobName; | ||||
| use crate::data::DocIdsBuilder; | ||||
| use crate::DocumentId; | ||||
|  | ||||
| @@ -27,30 +27,28 @@ impl NegativeUpdateBuilder { | ||||
|     } | ||||
|  | ||||
|     pub fn build(self) -> Result<Update, Box<Error>> { | ||||
|         let blob_name = BlobName::new(); | ||||
|         let blob_info = BlobInfo::new_negative(); | ||||
|  | ||||
|         let env_options = rocksdb_options::EnvOptions::new(); | ||||
|         let column_family_options = rocksdb_options::ColumnFamilyOptions::new(); | ||||
|         let mut file_writer = rocksdb::SstFileWriter::new(env_options, column_family_options); | ||||
|  | ||||
|         file_writer.open(&self.path.to_string_lossy())?; | ||||
|  | ||||
|         // TODO the blob-name must be written in bytes (16 bytes) | ||||
|         //      along with the sign | ||||
|         unimplemented!("write the blob sign and name"); | ||||
|  | ||||
|         // write the blob name to be merged | ||||
|         let blob_name = blob_name.to_string(); | ||||
|         file_writer.merge(DATA_BLOBS_ORDER.as_bytes(), blob_name.as_bytes())?; | ||||
|  | ||||
|         // write the doc ids | ||||
|         let blob_key = format!("BLOB-{}-doc-ids", blob_name); | ||||
|         let blob_key = format!("blob-{}-doc-ids", blob_info.name); | ||||
|         let blob_doc_ids = self.doc_ids.into_inner()?; | ||||
|         file_writer.put(blob_key.as_bytes(), &blob_doc_ids)?; | ||||
|  | ||||
|         { | ||||
|             // write the blob name to be merged | ||||
|             let mut buffer = Vec::new(); | ||||
|             blob_info.write_into(&mut buffer); | ||||
|             file_writer.merge(DATA_BLOBS_ORDER.as_bytes(), &buffer)?; | ||||
|         } | ||||
|  | ||||
|         for id in blob_doc_ids { | ||||
|             let start = format!("DOCU-{}", id); | ||||
|             let end = format!("DOCU-{}", id + 1); | ||||
|             let start = format!("docu-{}", id); | ||||
|             let end = format!("docu-{}", id + 1); | ||||
|             file_writer.delete_range(start.as_bytes(), end.as_bytes())?; | ||||
|         } | ||||
|  | ||||
|   | ||||
| @@ -7,10 +7,9 @@ use ::rocksdb::rocksdb_options; | ||||
|  | ||||
| use crate::index::DATA_BLOBS_ORDER; | ||||
| use crate::index::update::Update; | ||||
| use crate::index::blob_name::BlobName; | ||||
| use crate::index::schema::{SchemaProps, Schema, SchemaAttr}; | ||||
| use crate::tokenizer::TokenizerBuilder; | ||||
| use crate::blob::PositiveBlobBuilder; | ||||
| use crate::blob::{BlobInfo, PositiveBlobBuilder}; | ||||
| use crate::{DocIndex, DocumentId}; | ||||
|  | ||||
| pub enum NewState { | ||||
| @@ -53,7 +52,7 @@ impl<B> PositiveUpdateBuilder<B> | ||||
| where B: TokenizerBuilder | ||||
| { | ||||
|     pub fn build(self) -> Result<Update, Box<Error>> { | ||||
|         let blob_name = BlobName::new(); | ||||
|         let blob_info = BlobInfo::new_positive(); | ||||
|  | ||||
|         let env_options = rocksdb_options::EnvOptions::new(); | ||||
|         let column_family_options = rocksdb_options::ColumnFamilyOptions::new(); | ||||
| @@ -61,14 +60,6 @@ where B: TokenizerBuilder | ||||
|  | ||||
|         file_writer.open(&self.path.to_string_lossy())?; | ||||
|  | ||||
|         // TODO the blob-name must be written in bytes (16 bytes) | ||||
|         //      along with the sign | ||||
|         unimplemented!("write the blob sign and name"); | ||||
|  | ||||
|         // write the blob name to be merged | ||||
|         let blob_name = blob_name.to_string(); | ||||
|         file_writer.put(DATA_BLOBS_ORDER.as_bytes(), blob_name.as_bytes())?; | ||||
|  | ||||
|         let mut builder = PositiveBlobBuilder::new(Vec::new(), Vec::new()); | ||||
|         for ((document_id, field), state) in &self.new_states { | ||||
|             let value = match state { | ||||
| @@ -96,18 +87,27 @@ where B: TokenizerBuilder | ||||
|         } | ||||
|         let (blob_fst_map, blob_doc_idx) = builder.into_inner()?; | ||||
|  | ||||
|         // write the fst | ||||
|         let blob_key = format!("BLOB-{}-fst", blob_name); | ||||
|         file_writer.put(blob_key.as_bytes(), &blob_fst_map)?; | ||||
|  | ||||
|         // write the doc-idx | ||||
|         let blob_key = format!("BLOB-{}-doc-idx", blob_name); | ||||
|         let blob_key = format!("blob-{}-doc-idx", blob_info.name); | ||||
|         file_writer.put(blob_key.as_bytes(), &blob_doc_idx)?; | ||||
|  | ||||
|         // write the fst | ||||
|         let blob_key = format!("blob-{}-fst", blob_info.name); | ||||
|         file_writer.put(blob_key.as_bytes(), &blob_fst_map)?; | ||||
|  | ||||
|         { | ||||
|             // write the blob name to be merged | ||||
|             let mut buffer = Vec::new(); | ||||
|             blob_info.write_into(&mut buffer); | ||||
|             file_writer.merge(DATA_BLOBS_ORDER.as_bytes(), &buffer)?; | ||||
|         } | ||||
|  | ||||
|         // write all the documents fields updates | ||||
|         let mut key = String::from("DOCU-"); | ||||
|         let mut key = String::from("docu-"); | ||||
|         let prefix_len = key.len(); | ||||
|  | ||||
|         // FIXME write numbers in bytes not decimal representation | ||||
|  | ||||
|         for ((id, field), state) in self.new_states { | ||||
|             key.truncate(prefix_len); | ||||
|             write!(&mut key, "{}-{}", id, field)?; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user