mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	feat: Introduce typed keys constructors
This commit is contained in:
		| @@ -16,6 +16,7 @@ use fst::Map; | ||||
| use uuid::Uuid; | ||||
| use rocksdb::rocksdb::{DB, Snapshot}; | ||||
|  | ||||
| use crate::index::identifier::Identifier; | ||||
| use crate::data::DocIndexes; | ||||
|  | ||||
| pub enum Blob { | ||||
| @@ -54,6 +55,10 @@ impl BlobName { | ||||
|     pub fn new() -> BlobName { | ||||
|         BlobName(Uuid::new_v4()) | ||||
|     } | ||||
|  | ||||
|     pub fn as_bytes(&self) -> &[u8; 16] { | ||||
|         self.0.as_bytes() | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl fmt::Display for BlobName { | ||||
| @@ -113,21 +118,21 @@ pub fn blobs_from_blob_infos(infos: &[BlobInfo], snapshot: &Snapshot<&DB>) -> Re | ||||
|     for info in infos { | ||||
|         let blob = match info.sign { | ||||
|             Sign::Positive => { | ||||
|                 let key_map = format!("blob-{}-fst", info.name); | ||||
|                 let map = match snapshot.get(key_map.as_bytes())? { | ||||
|                 let blob_key = Identifier::blob(info.name).fst_map().build(); | ||||
|                 let map = match snapshot.get(&blob_key)? { | ||||
|                     Some(value) => value.to_vec(), | ||||
|                     None => return Err(format!("No fst entry found for blob {}", info.name).into()), | ||||
|                 }; | ||||
|                 let key_doc_idx = format!("blob-{}-doc-idx", info.name); | ||||
|                 let doc_idx = match snapshot.get(key_doc_idx.as_bytes())? { | ||||
|                 let blob_key = Identifier::blob(info.name).document_indexes().build(); | ||||
|                 let doc_idx = match snapshot.get(&blob_key)? { | ||||
|                     Some(value) => value.to_vec(), | ||||
|                     None => return Err(format!("No doc-idx entry found for blob {}", info.name).into()), | ||||
|                 }; | ||||
|                 PositiveBlob::from_bytes(map, doc_idx).map(Blob::Positive)? | ||||
|             }, | ||||
|             Sign::Negative => { | ||||
|                 let key_doc_ids = format!("blob-{}-doc-ids", info.name); | ||||
|                 let doc_ids = match snapshot.get(key_doc_ids.as_bytes())? { | ||||
|                 let blob_key = Identifier::blob(info.name).document_ids().build(); | ||||
|                 let doc_ids = match snapshot.get(&blob_key)? { | ||||
|                     Some(value) => value.to_vec(), | ||||
|                     None => return Err(format!("No doc-ids entry found for blob {}", info.name).into()), | ||||
|                 }; | ||||
|   | ||||
| @@ -23,7 +23,8 @@ impl DocIds { | ||||
|         Ok(DocIds { doc_ids }) | ||||
|     } | ||||
|  | ||||
|     pub fn from_bytes(vec: Vec<u8>) -> io::Result<Self> { | ||||
|     pub fn from_bytes(vec: Vec<u8>) -> Result<Self, Box<Error>> { | ||||
|         // FIXME check if modulo DocumentId | ||||
|         let len = vec.len(); | ||||
|         let doc_ids = Data::Shared { | ||||
|             vec: Arc::new(vec), | ||||
|   | ||||
							
								
								
									
										100
									
								
								src/index/identifier.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										100
									
								
								src/index/identifier.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,100 @@ | ||||
| use std::io::Write; | ||||
|  | ||||
| use byteorder::{NetworkEndian, WriteBytesExt}; | ||||
|  | ||||
| use crate::index::schema::SchemaAttr; | ||||
| use crate::blob::BlobName; | ||||
| use crate::DocumentId; | ||||
|  | ||||
| pub struct Identifier { | ||||
|     inner: Vec<u8>, | ||||
| } | ||||
|  | ||||
| impl Identifier { | ||||
|     pub fn data() -> Data { | ||||
|         let mut inner = Vec::new(); | ||||
|         let _ = inner.write(b"data"); | ||||
|         Data { inner } | ||||
|     } | ||||
|  | ||||
|     pub fn blob(name: BlobName) -> Blob { | ||||
|         let mut inner = Vec::new(); | ||||
|         let _ = inner.write(b"blob"); | ||||
|         let _ = inner.write(name.as_bytes()); | ||||
|         Blob { inner } | ||||
|     } | ||||
|  | ||||
|     pub fn document(id: DocumentId) -> Document { | ||||
|         let mut inner = Vec::new(); | ||||
|         let _ = inner.write(b"docu"); | ||||
|         let _ = inner.write(b"-"); | ||||
|         let _ = inner.write_u64::<NetworkEndian>(id); | ||||
|         Document { inner } | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct Data { | ||||
|     inner: Vec<u8>, | ||||
| } | ||||
|  | ||||
| impl Data { | ||||
|     pub fn blobs_order(mut self) -> Self { | ||||
|         let _ = self.inner.write(b"-"); | ||||
|         let _ = self.inner.write(b"blobs-order"); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn schema(mut self) -> Self { | ||||
|         let _ = self.inner.write(b"-"); | ||||
|         let _ = self.inner.write(b"schema"); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn build(self) -> Vec<u8> { | ||||
|         self.inner | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct Blob { | ||||
|     inner: Vec<u8>, | ||||
| } | ||||
|  | ||||
| impl Blob { | ||||
|     pub fn document_indexes(mut self) -> Self { | ||||
|         let _ = self.inner.write(b"-"); | ||||
|         let _ = self.inner.write(b"doc-idx"); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn document_ids(mut self) -> Self { | ||||
|         let _ = self.inner.write(b"-"); | ||||
|         let _ = self.inner.write(b"doc-ids"); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn fst_map(mut self) -> Self { | ||||
|         let _ = self.inner.write(b"-"); | ||||
|         let _ = self.inner.write(b"fst"); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn build(self) -> Vec<u8> { | ||||
|         self.inner | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct Document { | ||||
|     inner: Vec<u8>, | ||||
| } | ||||
|  | ||||
| impl Document { | ||||
|     pub fn attribute(mut self, attr: SchemaAttr) -> Self { | ||||
|         let _ = self.inner.write(b"-"); | ||||
|         let _ = self.inner.write_u32::<NetworkEndian>(attr.as_u32()); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn build(self) -> Vec<u8> { | ||||
|         self.inner | ||||
|     } | ||||
| } | ||||
| @@ -1,3 +1,4 @@ | ||||
| pub mod identifier; | ||||
| pub mod schema; | ||||
| pub mod update; | ||||
|  | ||||
| @@ -20,18 +21,12 @@ use crate::data::DocIdsBuilder; | ||||
| use crate::{DocIndex, DocumentId}; | ||||
| use crate::index::schema::Schema; | ||||
| use crate::index::update::Update; | ||||
| use crate::index::identifier::Identifier; | ||||
| use crate::blob::{PositiveBlobBuilder, BlobInfo, Sign, Blob, blobs_from_blob_infos}; | ||||
| use crate::tokenizer::{TokenizerBuilder, DefaultBuilder, Tokenizer}; | ||||
| use crate::rank::{criterion, Config, RankedStream}; | ||||
| use crate::automaton; | ||||
|  | ||||
| const DATA_PREFIX: &str = "data"; | ||||
| const BLOB_PREFIX: &str = "blob"; | ||||
| const DOCU_PREFIX: &str = "docu"; | ||||
|  | ||||
| const DATA_BLOBS_ORDER: &str = "data-blobs-order"; | ||||
| const DATA_SCHEMA:      &str = "data-schema"; | ||||
|  | ||||
| fn simple_vec_append(key: &[u8], value: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> { | ||||
|     let mut output = Vec::new(); | ||||
|     for bytes in operands.chain(value) { | ||||
| @@ -67,7 +62,8 @@ impl Index { | ||||
|  | ||||
|         let mut schema_bytes = Vec::new(); | ||||
|         schema.write_to(&mut schema_bytes)?; | ||||
|         database.put(DATA_SCHEMA.as_bytes(), &schema_bytes)?; | ||||
|         let data_key = Identifier::data().schema().build(); | ||||
|         database.put(&data_key, &schema_bytes)?; | ||||
|  | ||||
|         Ok(Self { database }) | ||||
|     } | ||||
| @@ -83,7 +79,8 @@ impl Index { | ||||
|  | ||||
|         let database = rocksdb::DB::open_cf(opts, &path, vec![("default", cf_opts)])?; | ||||
|  | ||||
|         let _schema = match database.get(DATA_SCHEMA.as_bytes())? { | ||||
|         let data_key = Identifier::data().schema().build(); | ||||
|         let _schema = match database.get(&data_key)? { | ||||
|             Some(value) => Schema::read_from(&*value)?, | ||||
|             None => return Err(String::from("Database does not contain a schema").into()), | ||||
|         }; | ||||
| @@ -105,7 +102,8 @@ impl Index { | ||||
|     } | ||||
|  | ||||
|     pub fn schema(&self) -> Result<Schema, Box<Error>> { | ||||
|         let bytes = self.database.get(DATA_SCHEMA.as_bytes())?.expect("data-schema entry not found"); | ||||
|         let data_key = Identifier::data().schema().build(); | ||||
|         let bytes = self.database.get(&data_key)?.expect("data-schema entry not found"); | ||||
|         Ok(Schema::read_from(&*bytes).expect("Invalid schema")) | ||||
|     } | ||||
|  | ||||
| @@ -113,7 +111,8 @@ impl Index { | ||||
|         // this snapshot will allow consistent reads for the whole search operation | ||||
|         let snapshot = self.database.snapshot(); | ||||
|  | ||||
|         let blobs = match snapshot.get(DATA_BLOBS_ORDER.as_bytes())? { | ||||
|         let data_key = Identifier::data().blobs_order().build(); | ||||
|         let blobs = match snapshot.get(&data_key)? { | ||||
|             Some(value) => { | ||||
|                 let blob_infos = BlobInfo::read_from_slice(&value)?; | ||||
|                 blobs_from_blob_infos(&blob_infos, &snapshot)? | ||||
|   | ||||
| @@ -4,9 +4,9 @@ use std::error::Error; | ||||
| use ::rocksdb::rocksdb_options; | ||||
|  | ||||
| use crate::blob::BlobInfo; | ||||
| use crate::index::DATA_BLOBS_ORDER; | ||||
| use crate::index::update::Update; | ||||
| use crate::data::DocIdsBuilder; | ||||
| use crate::index::identifier::Identifier; | ||||
| use crate::data::{DocIds, DocIdsBuilder}; | ||||
| use crate::DocumentId; | ||||
|  | ||||
| pub struct NegativeUpdateBuilder { | ||||
| @@ -35,21 +35,23 @@ impl NegativeUpdateBuilder { | ||||
|         file_writer.open(&self.path.to_string_lossy())?; | ||||
|  | ||||
|         // write the doc ids | ||||
|         let blob_key = format!("blob-{}-doc-ids", blob_info.name); | ||||
|         let blob_key = Identifier::blob(blob_info.name).document_ids().build(); | ||||
|         let blob_doc_ids = self.doc_ids.into_inner()?; | ||||
|         file_writer.put(blob_key.as_bytes(), &blob_doc_ids)?; | ||||
|         file_writer.put(&blob_key, &blob_doc_ids)?; | ||||
|  | ||||
|         { | ||||
|             // write the blob name to be merged | ||||
|             let mut buffer = Vec::new(); | ||||
|             blob_info.write_into(&mut buffer); | ||||
|             file_writer.merge(DATA_BLOBS_ORDER.as_bytes(), &buffer)?; | ||||
|             let data_key = Identifier::data().blobs_order().build(); | ||||
|             file_writer.merge(&data_key, &buffer)?; | ||||
|         } | ||||
|  | ||||
|         for id in blob_doc_ids { | ||||
|             let start = format!("docu-{}", id); | ||||
|             let end = format!("docu-{}", id + 1); | ||||
|             file_writer.delete_range(start.as_bytes(), end.as_bytes())?; | ||||
|         let blob_doc_ids = DocIds::from_bytes(blob_doc_ids)?; | ||||
|         for id in blob_doc_ids.doc_ids().iter().cloned() { | ||||
|             let start = Identifier::document(id).build(); | ||||
|             let end = Identifier::document(id + 1).build(); | ||||
|             file_writer.delete_range(&start, &end)?; | ||||
|         } | ||||
|  | ||||
|         file_writer.finish()?; | ||||
|   | ||||
| @@ -5,8 +5,8 @@ use std::fmt::Write; | ||||
|  | ||||
| use ::rocksdb::rocksdb_options; | ||||
|  | ||||
| use crate::index::DATA_BLOBS_ORDER; | ||||
| use crate::index::update::Update; | ||||
| use crate::index::identifier::Identifier; | ||||
| use crate::index::schema::{SchemaProps, Schema, SchemaAttr}; | ||||
| use crate::tokenizer::TokenizerBuilder; | ||||
| use crate::blob::{BlobInfo, PositiveBlobBuilder}; | ||||
| @@ -88,34 +88,29 @@ where B: TokenizerBuilder | ||||
|         let (blob_fst_map, blob_doc_idx) = builder.into_inner()?; | ||||
|  | ||||
|         // write the doc-idx | ||||
|         let blob_key = format!("blob-{}-doc-idx", blob_info.name); | ||||
|         file_writer.put(blob_key.as_bytes(), &blob_doc_idx)?; | ||||
|         let blob_key = Identifier::blob(blob_info.name).document_indexes().build(); | ||||
|         file_writer.put(&blob_key, &blob_doc_idx)?; | ||||
|  | ||||
|         // write the fst | ||||
|         let blob_key = format!("blob-{}-fst", blob_info.name); | ||||
|         file_writer.put(blob_key.as_bytes(), &blob_fst_map)?; | ||||
|         let blob_key = Identifier::blob(blob_info.name).fst_map().build(); | ||||
|         file_writer.put(&blob_key, &blob_fst_map)?; | ||||
|  | ||||
|         { | ||||
|             // write the blob name to be merged | ||||
|             let mut buffer = Vec::new(); | ||||
|             blob_info.write_into(&mut buffer); | ||||
|             file_writer.merge(DATA_BLOBS_ORDER.as_bytes(), &buffer)?; | ||||
|             let data_key = Identifier::data().blobs_order().build(); | ||||
|             file_writer.merge(&data_key, &buffer)?; | ||||
|         } | ||||
|  | ||||
|         // write all the documents fields updates | ||||
|         let mut key = String::from("docu-"); | ||||
|         let prefix_len = key.len(); | ||||
|  | ||||
|         // FIXME write numbers in bytes not decimal representation | ||||
|  | ||||
|         for ((id, field), state) in self.new_states { | ||||
|             key.truncate(prefix_len); | ||||
|             write!(&mut key, "{}-{}", id, field)?; | ||||
|         for ((id, attr), state) in self.new_states { | ||||
|             let key = Identifier::document(id).attribute(attr).build(); | ||||
|             match state { | ||||
|                 NewState::Updated { value, props } => if props.is_stored() { | ||||
|                     file_writer.put(key.as_bytes(), value.as_bytes())? | ||||
|                     file_writer.put(&key, value.as_bytes())? | ||||
|                 }, | ||||
|                 NewState::Removed => file_writer.delete(key.as_bytes())?, | ||||
|                 NewState::Removed => file_writer.delete(&key)?, | ||||
|             } | ||||
|         } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user