mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Make possible to create an index and add a schema later on
This commit is contained in:
		| @@ -16,6 +16,7 @@ pub enum Error { | |||||||
|     RmpEncode(rmp_serde::encode::Error), |     RmpEncode(rmp_serde::encode::Error), | ||||||
|     Bincode(bincode::Error), |     Bincode(bincode::Error), | ||||||
|     Serializer(SerializerError), |     Serializer(SerializerError), | ||||||
|  |     UnsupportedOperation(UnsupportedOperation), | ||||||
| } | } | ||||||
|  |  | ||||||
| impl From<io::Error> for Error { | impl From<io::Error> for Error { | ||||||
| @@ -60,6 +61,12 @@ impl From<SerializerError> for Error { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | impl From<UnsupportedOperation> for Error { | ||||||
|  |     fn from(op: UnsupportedOperation) -> Error { | ||||||
|  |         Error::UnsupportedOperation(op) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| impl fmt::Display for Error { | impl fmt::Display for Error { | ||||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||||
|         use self::Error::*; |         use self::Error::*; | ||||||
| @@ -75,9 +82,23 @@ impl fmt::Display for Error { | |||||||
|             RmpEncode(e) => write!(f, "rmp encode error; {}", e), |             RmpEncode(e) => write!(f, "rmp encode error; {}", e), | ||||||
|             Bincode(e) => write!(f, "bincode error; {}", e), |             Bincode(e) => write!(f, "bincode error; {}", e), | ||||||
|             Serializer(e) => write!(f, "serializer error; {}", e), |             Serializer(e) => write!(f, "serializer error; {}", e), | ||||||
|  |             UnsupportedOperation(op) => write!(f, "unsupported operation; {}", op), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl error::Error for Error { } | impl error::Error for Error { } | ||||||
|  |  | ||||||
|  | #[derive(Debug)] | ||||||
|  | pub enum UnsupportedOperation { | ||||||
|  |     SchemaAlreadyExists, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl fmt::Display for UnsupportedOperation { | ||||||
|  |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||||
|  |         use self::UnsupportedOperation::*; | ||||||
|  |         match self { | ||||||
|  |             SchemaAlreadyExists => write!(f, "Cannot update index which already have a schema"), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|   | |||||||
| @@ -1,6 +1,10 @@ | |||||||
| use rkv::{Manager, Rkv, SingleStore, Value, StoreOptions}; |  | ||||||
| use std::{fs, path::Path}; | use std::{fs, path::Path}; | ||||||
|  |  | ||||||
|  | use serde_json::json; | ||||||
|  | use rkv::{Manager, Rkv, SingleStore, Value, StoreOptions}; | ||||||
|  |  | ||||||
| use meilidb_core::{Database, MResult, QueryBuilder}; | use meilidb_core::{Database, MResult, QueryBuilder}; | ||||||
|  | use meilidb_schema::{SchemaBuilder, DISPLAYED, INDEXED}; | ||||||
|  |  | ||||||
| fn main() -> MResult<()> { | fn main() -> MResult<()> { | ||||||
|     env_logger::init(); |     env_logger::init(); | ||||||
| @@ -13,8 +17,24 @@ fn main() -> MResult<()> { | |||||||
|     let hello1 = database.open_index("hello1")?; |     let hello1 = database.open_index("hello1")?; | ||||||
|     let hello2 = database.open_index("hello2")?; |     let hello2 = database.open_index("hello2")?; | ||||||
|  |  | ||||||
|  |     let mut builder = SchemaBuilder::with_identifier("id"); | ||||||
|  |     builder.new_attribute("alpha", DISPLAYED); | ||||||
|  |     builder.new_attribute("beta", DISPLAYED | INDEXED); | ||||||
|  |     builder.new_attribute("gamma", INDEXED); | ||||||
|  |     let schema = builder.build(); | ||||||
|  |  | ||||||
|  |     let rkv = database.rkv.read().unwrap(); | ||||||
|  |     let writer = rkv.write()?; | ||||||
|  |  | ||||||
|  |     hello.schema_update(writer, schema)?; | ||||||
|  |  | ||||||
|  |     let object = json!({ | ||||||
|  |         "id": 23, | ||||||
|  |         "alpha": "hello", | ||||||
|  |     }); | ||||||
|  |  | ||||||
|     let mut additions = hello.documents_addition(); |     let mut additions = hello.documents_addition(); | ||||||
|     additions.extend(vec![()]); |     additions.extend(vec![object]); | ||||||
|  |  | ||||||
|     let rkv = database.rkv.read().unwrap(); |     let rkv = database.rkv.read().unwrap(); | ||||||
|     let writer = rkv.write()?; |     let writer = rkv.write()?; | ||||||
| @@ -53,7 +73,7 @@ fn main() -> MResult<()> { | |||||||
|  |  | ||||||
|     // println!("{:?}", documents); |     // println!("{:?}", documents); | ||||||
|  |  | ||||||
|     std::thread::sleep(std::time::Duration::from_secs(10)); |     std::thread::sleep(std::time::Duration::from_secs(2)); | ||||||
|  |  | ||||||
|     Ok(()) |     Ok(()) | ||||||
| } | } | ||||||
|   | |||||||
| @@ -14,7 +14,8 @@ pub use self::synonyms::Synonyms; | |||||||
| pub use self::updates::Updates; | pub use self::updates::Updates; | ||||||
| pub use self::updates_results::UpdatesResults; | pub use self::updates_results::UpdatesResults; | ||||||
|  |  | ||||||
| use crate::update; | use meilidb_schema::Schema; | ||||||
|  | use crate::{update, MResult}; | ||||||
|  |  | ||||||
| fn aligned_to(bytes: &[u8], align: usize) -> bool { | fn aligned_to(bytes: &[u8], align: usize) -> bool { | ||||||
|     (bytes as *const _ as *const () as usize) % align == 0 |     (bytes as *const _ as *const () as usize) % align == 0 | ||||||
| @@ -62,6 +63,13 @@ pub struct Index { | |||||||
| } | } | ||||||
|  |  | ||||||
| impl Index { | impl Index { | ||||||
|  |     pub fn schema_update(&self, mut writer: rkv::Writer, schema: Schema) -> MResult<()> { | ||||||
|  |         update::push_schema_update(&mut writer, self.updates, self.updates_results, schema)?; | ||||||
|  |         writer.commit()?; | ||||||
|  |         let _ = self.updates_notifier.send(()); | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn documents_addition<D>(&self) -> update::DocumentsAddition<D> { |     pub fn documents_addition<D>(&self) -> update::DocumentsAddition<D> { | ||||||
|         update::DocumentsAddition::new( |         update::DocumentsAddition::new( | ||||||
|             self.updates, |             self.updates, | ||||||
|   | |||||||
| @@ -9,7 +9,7 @@ use crate::raw_indexer::RawIndexer; | |||||||
| use crate::serde::{extract_document_id, Serializer, RamDocumentStore}; | use crate::serde::{extract_document_id, Serializer, RamDocumentStore}; | ||||||
| use crate::store; | use crate::store; | ||||||
| use crate::update::{push_documents_addition, apply_documents_deletion}; | use crate::update::{push_documents_addition, apply_documents_deletion}; | ||||||
| use crate::{Error, RankedMap}; | use crate::{MResult, Error, RankedMap}; | ||||||
|  |  | ||||||
| pub struct DocumentsAddition<D> { | pub struct DocumentsAddition<D> { | ||||||
|     updates_store: store::Updates, |     updates_store: store::Updates, | ||||||
| @@ -37,7 +37,7 @@ impl<D> DocumentsAddition<D> { | |||||||
|         self.documents.push(document); |         self.documents.push(document); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn finalize(self, mut writer: rkv::Writer) -> Result<u64, Error> |     pub fn finalize(self, mut writer: rkv::Writer) -> MResult<u64> | ||||||
|     where D: serde::Serialize |     where D: serde::Serialize | ||||||
|     { |     { | ||||||
|         let update_id = push_documents_addition( |         let update_id = push_documents_addition( | ||||||
| @@ -65,15 +65,19 @@ pub fn apply_documents_addition( | |||||||
|     documents_fields_store: store::DocumentsFields, |     documents_fields_store: store::DocumentsFields, | ||||||
|     postings_lists_store: store::PostingsLists, |     postings_lists_store: store::PostingsLists, | ||||||
|     docs_words_store: store::DocsWords, |     docs_words_store: store::DocsWords, | ||||||
|     schema: &Schema, |  | ||||||
|     mut ranked_map: RankedMap, |     mut ranked_map: RankedMap, | ||||||
|     addition: Vec<rmpv::Value>, |     addition: Vec<rmpv::Value>, | ||||||
| ) -> Result<(), Error> | ) -> MResult<()> | ||||||
| { | { | ||||||
|     let mut document_ids = HashSet::new(); |     let mut document_ids = HashSet::new(); | ||||||
|     let mut document_store = RamDocumentStore::new(); |     let mut document_store = RamDocumentStore::new(); | ||||||
|     let mut indexer = RawIndexer::new(); |     let mut indexer = RawIndexer::new(); | ||||||
|  |  | ||||||
|  |     let schema = match main_store.schema(writer)? { | ||||||
|  |         Some(schema) => schema, | ||||||
|  |         None => return Err(Error::SchemaMissing), | ||||||
|  |     }; | ||||||
|  |  | ||||||
|     let identifier = schema.identifier_name(); |     let identifier = schema.identifier_name(); | ||||||
|  |  | ||||||
|     for document in addition { |     for document in addition { | ||||||
| @@ -87,7 +91,7 @@ pub fn apply_documents_addition( | |||||||
|  |  | ||||||
|         // 2. index the document fields in ram stores |         // 2. index the document fields in ram stores | ||||||
|         let serializer = Serializer { |         let serializer = Serializer { | ||||||
|             schema, |             schema: &schema, | ||||||
|             document_store: &mut document_store, |             document_store: &mut document_store, | ||||||
|             indexer: &mut indexer, |             indexer: &mut indexer, | ||||||
|             ranked_map: &mut ranked_map, |             ranked_map: &mut ranked_map, | ||||||
| @@ -105,7 +109,6 @@ pub fn apply_documents_addition( | |||||||
|         documents_fields_store, |         documents_fields_store, | ||||||
|         postings_lists_store, |         postings_lists_store, | ||||||
|         docs_words_store, |         docs_words_store, | ||||||
|         schema, |  | ||||||
|         ranked_map.clone(), |         ranked_map.clone(), | ||||||
|         documents_to_insert, |         documents_to_insert, | ||||||
|     )?; |     )?; | ||||||
|   | |||||||
| @@ -4,7 +4,7 @@ use fst::{SetBuilder, Streamer}; | |||||||
| use meilidb_schema::Schema; | use meilidb_schema::Schema; | ||||||
| use sdset::{SetBuf, SetOperation, duo::DifferenceByKey}; | use sdset::{SetBuf, SetOperation, duo::DifferenceByKey}; | ||||||
|  |  | ||||||
| use crate::{DocumentId, RankedMap, Error}; | use crate::{DocumentId, RankedMap, MResult, Error}; | ||||||
| use crate::serde::extract_document_id; | use crate::serde::extract_document_id; | ||||||
| use crate::update::push_documents_deletion; | use crate::update::push_documents_deletion; | ||||||
| use crate::store; | use crate::store; | ||||||
| @@ -35,7 +35,7 @@ impl DocumentsDeletion { | |||||||
|         self.documents.push(document_id); |         self.documents.push(document_id); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn delete_document<D>(&mut self, schema: &Schema, document: D) -> Result<(), Error> |     pub fn delete_document<D>(&mut self, schema: &Schema, document: D) -> MResult<()> | ||||||
|     where D: serde::Serialize, |     where D: serde::Serialize, | ||||||
|     { |     { | ||||||
|         let identifier = schema.identifier_name(); |         let identifier = schema.identifier_name(); | ||||||
| @@ -49,7 +49,7 @@ impl DocumentsDeletion { | |||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn finalize(self, mut writer: rkv::Writer) -> Result<u64, Error> { |     pub fn finalize(self, mut writer: rkv::Writer) -> MResult<u64> { | ||||||
|         let update_id = push_documents_deletion( |         let update_id = push_documents_deletion( | ||||||
|             &mut writer, |             &mut writer, | ||||||
|             self.updates_store, |             self.updates_store, | ||||||
| @@ -75,13 +75,17 @@ pub fn apply_documents_deletion( | |||||||
|     documents_fields_store: store::DocumentsFields, |     documents_fields_store: store::DocumentsFields, | ||||||
|     postings_lists_store: store::PostingsLists, |     postings_lists_store: store::PostingsLists, | ||||||
|     docs_words_store: store::DocsWords, |     docs_words_store: store::DocsWords, | ||||||
|     schema: &Schema, |  | ||||||
|     mut ranked_map: RankedMap, |     mut ranked_map: RankedMap, | ||||||
|     deletion: Vec<DocumentId>, |     deletion: Vec<DocumentId>, | ||||||
| ) -> Result<(), Error> | ) -> MResult<()> | ||||||
| { | { | ||||||
|     let idset = SetBuf::from_dirty(deletion); |     let idset = SetBuf::from_dirty(deletion); | ||||||
|  |  | ||||||
|  |     let schema = match main_store.schema(writer)? { | ||||||
|  |         Some(schema) => schema, | ||||||
|  |         None => return Err(Error::SchemaMissing), | ||||||
|  |     }; | ||||||
|  |  | ||||||
|     // collect the ranked attributes according to the schema |     // collect the ranked attributes according to the schema | ||||||
|     let ranked_attrs: Vec<_> = schema.iter() |     let ranked_attrs: Vec<_> = schema.iter() | ||||||
|         .filter_map(|(_, attr, prop)| { |         .filter_map(|(_, attr, prop)| { | ||||||
|   | |||||||
| @@ -1,22 +1,30 @@ | |||||||
| mod documents_addition; | mod documents_addition; | ||||||
| mod documents_deletion; | mod documents_deletion; | ||||||
|  | mod schema_update; | ||||||
|  |  | ||||||
| pub use self::documents_addition::{DocumentsAddition, apply_documents_addition}; | pub use self::documents_addition::{DocumentsAddition, apply_documents_addition}; | ||||||
| pub use self::documents_deletion::{DocumentsDeletion, apply_documents_deletion}; | pub use self::documents_deletion::{DocumentsDeletion, apply_documents_deletion}; | ||||||
|  | pub use self::schema_update::apply_schema_update; | ||||||
|  |  | ||||||
| use std::time::{Duration, Instant}; | use std::time::{Duration, Instant}; | ||||||
|  |  | ||||||
| use log::debug; | use log::debug; | ||||||
| use serde::{Serialize, Deserialize}; | use serde::{Serialize, Deserialize}; | ||||||
|  |  | ||||||
| use crate::{store, Error, MResult, DocumentId, RankedMap}; | use crate::{store, Error, MResult, DocumentId, RankedMap}; | ||||||
|  | use crate::error::UnsupportedOperation; | ||||||
|  | use meilidb_schema::Schema; | ||||||
|  |  | ||||||
| #[derive(Debug, Serialize, Deserialize)] | #[derive(Debug, Serialize, Deserialize)] | ||||||
| pub enum Update { | pub enum Update { | ||||||
|  |     SchemaUpdate(Schema), | ||||||
|     DocumentsAddition(Vec<rmpv::Value>), |     DocumentsAddition(Vec<rmpv::Value>), | ||||||
|     DocumentsDeletion(Vec<DocumentId>), |     DocumentsDeletion(Vec<DocumentId>), | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | #[derive(Debug, Clone, Serialize, Deserialize)] | ||||||
| pub enum UpdateType { | pub enum UpdateType { | ||||||
|  |     SchemaUpdate { schema: Schema }, | ||||||
|     DocumentsAddition { number: usize }, |     DocumentsAddition { number: usize }, | ||||||
|     DocumentsDeletion { number: usize }, |     DocumentsDeletion { number: usize }, | ||||||
| } | } | ||||||
| @@ -77,6 +85,22 @@ pub fn biggest_update_id( | |||||||
|     Ok(max) |     Ok(max) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | pub fn push_schema_update( | ||||||
|  |     writer: &mut rkv::Writer, | ||||||
|  |     updates_store: store::Updates, | ||||||
|  |     updates_results_store: store::UpdatesResults, | ||||||
|  |     schema: Schema, | ||||||
|  | ) -> MResult<u64> | ||||||
|  | { | ||||||
|  |     let last_update_id = biggest_update_id(writer, updates_store, updates_results_store)?; | ||||||
|  |     let last_update_id = last_update_id.map_or(0, |n| n + 1); | ||||||
|  |  | ||||||
|  |     let update = Update::SchemaUpdate(schema); | ||||||
|  |     let update_id = updates_store.put_update(writer, last_update_id, &update)?; | ||||||
|  |  | ||||||
|  |     Ok(last_update_id) | ||||||
|  | } | ||||||
|  |  | ||||||
| pub fn push_documents_addition<D: serde::Serialize>( | pub fn push_documents_addition<D: serde::Serialize>( | ||||||
|     writer: &mut rkv::Writer, |     writer: &mut rkv::Writer, | ||||||
|     updates_store: store::Updates, |     updates_store: store::Updates, | ||||||
| @@ -127,9 +151,14 @@ pub fn update_task( | |||||||
|         None => return Ok(false), |         None => return Ok(false), | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     debug!("Processing update number {}", update_id); |  | ||||||
|  |  | ||||||
|     let (update_type, result, duration) = match update { |     let (update_type, result, duration) = match update { | ||||||
|  |         Update::SchemaUpdate(schema) => { | ||||||
|  |             let start = Instant::now(); | ||||||
|  |             let update_type = UpdateType::SchemaUpdate { schema: schema.clone() }; | ||||||
|  |             let result = apply_schema_update(writer, index.main, &schema); | ||||||
|  |  | ||||||
|  |             (update_type, result, start.elapsed()) | ||||||
|  |         }, | ||||||
|         Update::DocumentsAddition(documents) => { |         Update::DocumentsAddition(documents) => { | ||||||
|             let start = Instant::now(); |             let start = Instant::now(); | ||||||
|  |  | ||||||
| @@ -140,19 +169,15 @@ pub fn update_task( | |||||||
|  |  | ||||||
|             let update_type = UpdateType::DocumentsAddition { number: documents.len() }; |             let update_type = UpdateType::DocumentsAddition { number: documents.len() }; | ||||||
|  |  | ||||||
|             let result = match index.main.schema(writer)? { |             let result = apply_documents_addition( | ||||||
|                 Some(schema) => apply_documents_addition( |                 writer, | ||||||
|                     writer, |                 index.main, | ||||||
|                     index.main, |                 index.documents_fields, | ||||||
|                     index.documents_fields, |                 index.postings_lists, | ||||||
|                     index.postings_lists, |                 index.docs_words, | ||||||
|                     index.docs_words, |                 ranked_map, | ||||||
|                     &schema, |                 documents, | ||||||
|                     ranked_map, |             ); | ||||||
|                     documents, |  | ||||||
|                 ), |  | ||||||
|                 None => Err(Error::SchemaMissing), |  | ||||||
|             }; |  | ||||||
|  |  | ||||||
|             (update_type, result, start.elapsed()) |             (update_type, result, start.elapsed()) | ||||||
|         }, |         }, | ||||||
| @@ -166,24 +191,22 @@ pub fn update_task( | |||||||
|  |  | ||||||
|             let update_type = UpdateType::DocumentsDeletion { number: documents.len() }; |             let update_type = UpdateType::DocumentsDeletion { number: documents.len() }; | ||||||
|  |  | ||||||
|             let result = match index.main.schema(writer)? { |             let result = apply_documents_deletion( | ||||||
|                 Some(schema) => apply_documents_deletion( |                 writer, | ||||||
|                     writer, |                 index.main, | ||||||
|                     index.main, |                 index.documents_fields, | ||||||
|                     index.documents_fields, |                 index.postings_lists, | ||||||
|                     index.postings_lists, |                 index.docs_words, | ||||||
|                     index.docs_words, |                 ranked_map, | ||||||
|                     &schema, |                 documents, | ||||||
|                     ranked_map, |             ); | ||||||
|                     documents, |  | ||||||
|                 ), |  | ||||||
|                 None => Err(Error::SchemaMissing), |  | ||||||
|             }; |  | ||||||
|  |  | ||||||
|             (update_type, result, start.elapsed()) |             (update_type, result, start.elapsed()) | ||||||
|         }, |         }, | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|  |     debug!("Processed update number {} {:?} {:?}", update_id, update_type, result); | ||||||
|  |  | ||||||
|     let detailed_duration = DetailedDuration { main: duration }; |     let detailed_duration = DetailedDuration { main: duration }; | ||||||
|     let status = UpdateResult { |     let status = UpdateResult { | ||||||
|         update_id, |         update_id, | ||||||
|   | |||||||
							
								
								
									
										15
									
								
								meilidb-core/src/update/schema_update.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								meilidb-core/src/update/schema_update.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,15 @@ | |||||||
|  | use meilidb_schema::Schema; | ||||||
|  | use crate::{store, error::UnsupportedOperation, MResult}; | ||||||
|  |  | ||||||
|  | pub fn apply_schema_update( | ||||||
|  |     writer: &mut rkv::Writer, | ||||||
|  |     main_store: store::Main, | ||||||
|  |     new_schema: &Schema, | ||||||
|  | ) -> MResult<()> | ||||||
|  | { | ||||||
|  |     if let Some(_) = main_store.schema(writer)? { | ||||||
|  |         return Err(UnsupportedOperation::SchemaAlreadyExists.into()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     main_store.put_schema(writer, new_schema) | ||||||
|  | } | ||||||
		Reference in New Issue
	
	Block a user