mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	feat: Introduce the Schema
This commit is contained in:
		| @@ -136,12 +136,12 @@ mod tests { | ||||
|         let mut builder = PositiveUpdateBuilder::new("update-positive-0001.sst", schema.clone(), tokenizer_builder); | ||||
|  | ||||
|         // you can insert documents in any order, it is sorted internally | ||||
|         let title_field = schema.field("title").unwrap(); | ||||
|         let title_field = schema.attribute("title").unwrap(); | ||||
|         builder.update_field(1, title_field, "hallo!".to_owned()); | ||||
|         builder.update_field(5, title_field, "hello!".to_owned()); | ||||
|         builder.update_field(2, title_field, "hi!".to_owned()); | ||||
|  | ||||
|         let name_field = schema.field("name").unwrap(); | ||||
|         let name_field = schema.attribute("name").unwrap(); | ||||
|         builder.remove_field(4, name_field); | ||||
|  | ||||
|         let update = builder.build()?; | ||||
|   | ||||
| @@ -1,14 +1,16 @@ | ||||
| use std::collections::{HashMap, BTreeMap}; | ||||
| use std::io::{Read, Write}; | ||||
| use std::error::Error; | ||||
| use std::path::Path; | ||||
| use std::ops::BitOr; | ||||
| use std::fs::File; | ||||
| use std::fmt; | ||||
|  | ||||
| use linked_hash_map::LinkedHashMap; | ||||
|  | ||||
| pub const STORED: SchemaProps = SchemaProps { stored: true, indexed: false }; | ||||
| pub const INDEXED: SchemaProps = SchemaProps { stored: false, indexed: true }; | ||||
|  | ||||
| #[derive(Copy, Clone)] | ||||
| #[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)] | ||||
| pub struct SchemaProps { | ||||
|     stored: bool, | ||||
|     indexed: bool, | ||||
| @@ -36,66 +38,110 @@ impl BitOr for SchemaProps { | ||||
| } | ||||
|  | ||||
| pub struct SchemaBuilder { | ||||
|     fields: Vec<(String, SchemaProps)>, | ||||
|     attrs: LinkedHashMap<String, SchemaProps>, | ||||
| } | ||||
|  | ||||
| impl SchemaBuilder { | ||||
|     pub fn new() -> SchemaBuilder { | ||||
|         SchemaBuilder { fields: Vec::new() } | ||||
|         SchemaBuilder { attrs: LinkedHashMap::new() } | ||||
|     } | ||||
|  | ||||
|     pub fn field<N>(&mut self, name: N, props: SchemaProps) -> SchemaField | ||||
|     where N: Into<String>, | ||||
|     { | ||||
|         let len = self.fields.len(); | ||||
|         let name = name.into(); | ||||
|         self.fields.push((name, props)); | ||||
|  | ||||
|         SchemaField(len as u32) | ||||
|     pub fn new_field<S: Into<String>>(&mut self, name: S, props: SchemaProps) -> SchemaAttr { | ||||
|         let len = self.attrs.len(); | ||||
|         self.attrs.insert(name.into(), props); | ||||
|         SchemaAttr(len as u32) | ||||
|     } | ||||
|  | ||||
|     pub fn build(self) -> Schema { | ||||
|         unimplemented!() | ||||
|         let mut attrs = HashMap::new(); | ||||
|         let mut props = Vec::new(); | ||||
|  | ||||
|         for (i, (name, prop)) in self.attrs.into_iter().enumerate() { | ||||
|             attrs.insert(name, SchemaAttr(i as u32)); | ||||
|             props.push(prop); | ||||
|         } | ||||
|  | ||||
|         Schema { attrs, props } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Clone)] | ||||
| pub struct Schema; | ||||
| #[derive(Debug, Clone, PartialEq, Eq)] | ||||
| pub struct Schema { | ||||
|     attrs: HashMap<String, SchemaAttr>, | ||||
|     props: Vec<SchemaProps>, | ||||
| } | ||||
|  | ||||
| impl Schema { | ||||
|     pub fn open<P: AsRef<Path>>(path: P) -> Result<Schema, Box<Error>> { | ||||
|     pub fn open<P: AsRef<Path>>(path: P) -> bincode::Result<Schema> { | ||||
|         let file = File::open(path)?; | ||||
|         Schema::read_from(file) | ||||
|     } | ||||
|  | ||||
|     pub fn read_from<R: Read>(reader: R) -> Result<Schema, Box<Error>> { | ||||
|         unimplemented!() | ||||
|     pub fn read_from<R: Read>(reader: R) -> bincode::Result<Schema> { | ||||
|         let attrs = bincode::deserialize_from(reader)?; | ||||
|         let builder = SchemaBuilder { attrs }; | ||||
|         Ok(builder.build()) | ||||
|     } | ||||
|  | ||||
|     pub fn write_to<W: Write>(writer: W) -> Result<(), Box<Error>> { | ||||
|         unimplemented!() | ||||
|     pub fn write_to<W: Write>(&self, writer: W) -> bincode::Result<()> { | ||||
|         let mut ordered = BTreeMap::new(); | ||||
|         for (name, field) in &self.attrs { | ||||
|             let index = field.as_u32(); | ||||
|             let props = self.props[index as usize]; | ||||
|             ordered.insert(index, (name, props)); | ||||
|         } | ||||
|  | ||||
|         let mut attrs = LinkedHashMap::with_capacity(ordered.len()); | ||||
|         for (_, (name, props)) in ordered { | ||||
|             attrs.insert(name, props); | ||||
|         } | ||||
|  | ||||
|         bincode::serialize_into(writer, &attrs) | ||||
|     } | ||||
|  | ||||
|     pub fn props(&self, field: SchemaField) -> SchemaProps { | ||||
|         unimplemented!() | ||||
|     pub fn props(&self, attr: SchemaAttr) -> SchemaProps { | ||||
|         self.props[attr.as_u32() as usize] | ||||
|     } | ||||
|  | ||||
|     pub fn field(&self, name: &str) -> Option<SchemaField> { | ||||
|         unimplemented!() | ||||
|     pub fn attribute<S: AsRef<str>>(&self, name: S) -> Option<SchemaAttr> { | ||||
|         self.attrs.get(name.as_ref()).cloned() | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Copy, Clone, PartialOrd, Ord, PartialEq, Eq)] | ||||
| pub struct SchemaField(u32); | ||||
| #[derive(Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq)] | ||||
| pub struct SchemaAttr(u32); | ||||
|  | ||||
| impl SchemaField { | ||||
| impl SchemaAttr { | ||||
|     pub fn as_u32(&self) -> u32 { | ||||
|         self.0 | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl fmt::Display for SchemaField { | ||||
| impl fmt::Display for SchemaAttr { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
|         write!(f, "{}", self.0) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use super::*; | ||||
|  | ||||
|     #[test] | ||||
|     fn serialize_deserialize() -> bincode::Result<()> { | ||||
|         let mut builder = SchemaBuilder::new(); | ||||
|         builder.new_field("alphabet", STORED); | ||||
|         builder.new_field("beta", STORED | INDEXED); | ||||
|         builder.new_field("gamma", INDEXED); | ||||
|         let schema = builder.build(); | ||||
|  | ||||
|         let mut buffer = Vec::new(); | ||||
|  | ||||
|         schema.write_to(&mut buffer)?; | ||||
|         let schema2 = Schema::read_from(buffer.as_slice())?; | ||||
|  | ||||
|         assert_eq!(schema, schema2); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -5,7 +5,7 @@ use std::fmt::Write; | ||||
|  | ||||
| use ::rocksdb::rocksdb_options; | ||||
|  | ||||
| use crate::index::schema::{SchemaProps, Schema, SchemaField}; | ||||
| use crate::index::schema::{SchemaProps, Schema, SchemaAttr}; | ||||
| use crate::index::update::{FIELD_BLOBS_ORDER, Update}; | ||||
| use crate::tokenizer::TokenizerBuilder; | ||||
| use crate::index::blob_name::BlobName; | ||||
| @@ -24,7 +24,7 @@ pub struct PositiveUpdateBuilder<B> { | ||||
|     path: PathBuf, | ||||
|     schema: Schema, | ||||
|     tokenizer_builder: B, | ||||
|     new_states: BTreeMap<(DocumentId, SchemaField), NewState>, | ||||
|     new_states: BTreeMap<(DocumentId, SchemaAttr), NewState>, | ||||
| } | ||||
|  | ||||
| impl<B> PositiveUpdateBuilder<B> { | ||||
| @@ -38,12 +38,12 @@ impl<B> PositiveUpdateBuilder<B> { | ||||
|     } | ||||
|  | ||||
|     // TODO value must be a field that can be indexed | ||||
|     pub fn update_field(&mut self, id: DocumentId, field: SchemaField, value: String) { | ||||
|     pub fn update_field(&mut self, id: DocumentId, field: SchemaAttr, value: String) { | ||||
|         let state = NewState::Updated { value, props: self.schema.props(field) }; | ||||
|         self.new_states.insert((id, field), state); | ||||
|     } | ||||
|  | ||||
|     pub fn remove_field(&mut self, id: DocumentId, field: SchemaField) { | ||||
|     pub fn remove_field(&mut self, id: DocumentId, field: SchemaAttr) { | ||||
|         self.new_states.insert((id, field), NewState::Removed); | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| #![feature(range_contains)] | ||||
|  | ||||
| #[macro_use] extern crate lazy_static; | ||||
| #[macro_use] extern crate serde_derive; | ||||
|  | ||||
| pub mod index; | ||||
| pub mod blob; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user