mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	introduce a new schemaless way
This commit is contained in:
		| @@ -1,6 +1,6 @@ | ||||
| use std::cmp::{Ordering, Reverse}; | ||||
| use std::collections::hash_map::{HashMap, Entry}; | ||||
| use meilisearch_schema::SchemaAttr; | ||||
| use meilisearch_schema::IndexedPos; | ||||
| use slice_group_by::GroupBy; | ||||
| use crate::{RawDocument, MResult}; | ||||
| use crate::bucket_sort::BareMatch; | ||||
| @@ -32,7 +32,7 @@ impl Criterion for Exact { | ||||
|                     for bm in group { | ||||
|                         for di in ctx.postings_lists[bm.postings_list].as_ref() { | ||||
|  | ||||
|                             let attr = SchemaAttr(di.attribute); | ||||
|                             let attr = IndexedPos(di.attribute); | ||||
|                             let count = match fields_counts.entry(attr) { | ||||
|                                 Entry::Occupied(entry) => *entry.get(), | ||||
|                                 Entry::Vacant(entry) => { | ||||
|   | ||||
| @@ -69,7 +69,7 @@ impl<'a> SortByAttr<'a> { | ||||
|         reversed: bool, | ||||
|     ) -> Result<SortByAttr<'a>, SortByAttrError> { | ||||
|         let field_id = match schema.get_id(attr_name) { | ||||
|             Some(field_id) => *field_id, | ||||
|             Some(field_id) => field_id, | ||||
|             None => return Err(SortByAttrError::AttributeNotFound), | ||||
|         }; | ||||
|  | ||||
|   | ||||
| @@ -8,11 +8,12 @@ pub type MResult<T> = Result<T, Error>; | ||||
| pub enum Error { | ||||
|     Io(io::Error), | ||||
|     IndexAlreadyExists, | ||||
|     SchemaDiffer, | ||||
|     MissingSchemaIdentifier, | ||||
|     SchemaMissing, | ||||
|     WordIndexMissing, | ||||
|     MissingDocumentId, | ||||
|     MaxFieldsLimitExceeded, | ||||
|     Schema(meilisearch_schema::Error), | ||||
|     Zlmdb(heed::Error), | ||||
|     Fst(fst::Error), | ||||
|     SerdeJson(SerdeJsonError), | ||||
| @@ -28,6 +29,12 @@ impl From<io::Error> for Error { | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<meilisearch_schema::Error> for Error { | ||||
|     fn from(error: meilisearch_schema::Error) -> Error { | ||||
|         Error::Schema(error) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<heed::Error> for Error { | ||||
|     fn from(error: heed::Error) -> Error { | ||||
|         Error::Zlmdb(error) | ||||
| @@ -76,10 +83,12 @@ impl fmt::Display for Error { | ||||
|         match self { | ||||
|             Io(e) => write!(f, "{}", e), | ||||
|             IndexAlreadyExists => write!(f, "index already exists"), | ||||
|             SchemaDiffer => write!(f, "schemas differ"), | ||||
|             MissingSchemaIdentifier => write!(f, "schema cannot be build without identifier"), | ||||
|             SchemaMissing => write!(f, "this index does not have a schema"), | ||||
|             WordIndexMissing => write!(f, "this index does not have a word index"), | ||||
|             MissingDocumentId => write!(f, "document id is missing"), | ||||
|             MaxFieldsLimitExceeded => write!(f, "maximum field in a document is exceeded"), | ||||
|             Schema(e) => write!(f, "schemas error; {}", e), | ||||
|             Zlmdb(e) => write!(f, "heed error; {}", e), | ||||
|             Fst(e) => write!(f, "fst error; {}", e), | ||||
|             SerdeJson(e) => write!(f, "serde json error; {}", e), | ||||
|   | ||||
| @@ -136,7 +136,7 @@ mod tests { | ||||
|     use std::iter::FromIterator; | ||||
|  | ||||
|     use fst::{IntoStreamer, Set}; | ||||
|     use meilisearch_schema::SchemaAttr; | ||||
|     use meilisearch_schema::IndexedPos; | ||||
|     use sdset::SetBuf; | ||||
|     use tempfile::TempDir; | ||||
|  | ||||
| @@ -295,14 +295,14 @@ mod tests { | ||||
|             for ((docid, attr, _), count) in fields_counts { | ||||
|                 let prev = index | ||||
|                     .documents_fields_counts | ||||
|                     .document_field_count(&mut writer, docid, SchemaAttr(attr)) | ||||
|                     .document_field_count(&mut writer, docid, IndexedPos(attr)) | ||||
|                     .unwrap(); | ||||
|  | ||||
|                 let prev = prev.unwrap_or(0); | ||||
|  | ||||
|                 index | ||||
|                     .documents_fields_counts | ||||
|                     .put_document_field_count(&mut writer, docid, SchemaAttr(attr), prev + count) | ||||
|                     .put_document_field_count(&mut writer, docid, IndexedPos(attr), prev + count) | ||||
|                     .unwrap(); | ||||
|             } | ||||
|  | ||||
|   | ||||
| @@ -180,16 +180,16 @@ fn token_to_docindex(id: DocumentId, indexed_pos: IndexedPos, token: Token) -> O | ||||
| mod tests { | ||||
|  | ||||
|     use super::*; | ||||
|     use meilisearch_schema::SchemaAttr; | ||||
|     use meilisearch_schema::IndexedPos; | ||||
|  | ||||
|     #[test] | ||||
|     fn strange_apostrophe() { | ||||
|         let mut indexer = RawIndexer::new(fst::Set::default()); | ||||
|  | ||||
|         let docid = DocumentId(0); | ||||
|         let attr = SchemaAttr(0); | ||||
|         let indexed_pos = IndexedPos(0); | ||||
|         let text = "Zut, l’aspirateur, j’ai oublié de l’éteindre !"; | ||||
|         indexer.index_text(docid, attr, text); | ||||
|         indexer.index_text(docid, indexed_pos, text); | ||||
|  | ||||
|         let Indexed { | ||||
|             words_doc_indexes, .. | ||||
| @@ -209,9 +209,9 @@ mod tests { | ||||
|         let mut indexer = RawIndexer::new(fst::Set::default()); | ||||
|  | ||||
|         let docid = DocumentId(0); | ||||
|         let attr = SchemaAttr(0); | ||||
|         let indexed_pos = IndexedPos(0); | ||||
|         let text = vec!["Zut, l’aspirateur, j’ai oublié de l’éteindre !"]; | ||||
|         indexer.index_text_seq(docid, attr, text); | ||||
|         indexer.index_text_seq(docid, indexed_pos, text); | ||||
|  | ||||
|         let Indexed { | ||||
|             words_doc_indexes, .. | ||||
| @@ -234,9 +234,9 @@ mod tests { | ||||
|         let mut indexer = RawIndexer::new(stop_words); | ||||
|  | ||||
|         let docid = DocumentId(0); | ||||
|         let attr = SchemaAttr(0); | ||||
|         let indexed_pos = IndexedPos(0); | ||||
|         let text = "Zut, l’aspirateur, j’ai oublié de l’éteindre !"; | ||||
|         indexer.index_text(docid, attr, text); | ||||
|         indexer.index_text(docid, indexed_pos, text); | ||||
|  | ||||
|         let Indexed { | ||||
|             words_doc_indexes, .. | ||||
| @@ -258,9 +258,9 @@ mod tests { | ||||
|         let mut indexer = RawIndexer::new(fst::Set::default()); | ||||
|  | ||||
|         let docid = DocumentId(0); | ||||
|         let attr = SchemaAttr(0); | ||||
|         let indexed_pos = IndexedPos(0); | ||||
|         let text = "🇯🇵"; | ||||
|         indexer.index_text(docid, attr, text); | ||||
|         indexer.index_text(docid, indexed_pos, text); | ||||
|  | ||||
|         let Indexed { | ||||
|             words_doc_indexes, .. | ||||
|   | ||||
| @@ -99,7 +99,7 @@ impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> { | ||||
|                         let ioread = SerdeJsonIoRead::new(cursor); | ||||
|                         let value = Value(SerdeJsonDeserializer::new(ioread)); | ||||
|  | ||||
|                         Some((*attribute_name, value)) | ||||
|                         Some((attribute_name, value)) | ||||
|                     } else { | ||||
|                         None | ||||
|                     } | ||||
|   | ||||
| @@ -20,7 +20,7 @@ pub use self::convert_to_string::ConvertToString; | ||||
| pub use self::deserializer::{Deserializer, DeserializerError}; | ||||
| pub use self::extract_document_id::{compute_document_id, extract_document_id, value_to_string}; | ||||
| pub use self::indexer::Indexer; | ||||
| pub use self::serializer::{serialize_value, Serializer}; | ||||
| pub use self::serializer::{serialize_value, serialize_value_with_id, Serializer}; | ||||
|  | ||||
| use std::{error::Error, fmt}; | ||||
|  | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| use meilisearch_schema::{Schema, FieldsMap}; | ||||
| use meilisearch_schema::{Schema, FieldId}; | ||||
| use serde::ser; | ||||
|  | ||||
| use crate::database::MainT; | ||||
| @@ -10,12 +10,11 @@ use super::{ConvertToNumber, ConvertToString, Indexer, SerializerError}; | ||||
|  | ||||
| pub struct Serializer<'a, 'b> { | ||||
|     pub txn: &'a mut heed::RwTxn<'b, MainT>, | ||||
|     pub schema: &'a Schema, | ||||
|     pub schema: &'a mut Schema, | ||||
|     pub document_store: DocumentsFields, | ||||
|     pub document_fields_counts: DocumentsFieldsCounts, | ||||
|     pub indexer: &'a mut RawIndexer, | ||||
|     pub ranked_map: &'a mut RankedMap, | ||||
|     pub fields_map: &'a mut FieldsMap, | ||||
|     pub document_id: DocumentId, | ||||
| } | ||||
|  | ||||
| @@ -159,7 +158,6 @@ impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> { | ||||
|             document_fields_counts: self.document_fields_counts, | ||||
|             indexer: self.indexer, | ||||
|             ranked_map: self.ranked_map, | ||||
|             fields_map: self.fields_map, | ||||
|             current_key_name: None, | ||||
|         }) | ||||
|     } | ||||
| @@ -177,7 +175,6 @@ impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> { | ||||
|             document_fields_counts: self.document_fields_counts, | ||||
|             indexer: self.indexer, | ||||
|             ranked_map: self.ranked_map, | ||||
|             fields_map: self.fields_map, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
| @@ -196,13 +193,12 @@ impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> { | ||||
|  | ||||
| pub struct MapSerializer<'a, 'b> { | ||||
|     txn: &'a mut heed::RwTxn<'b, MainT>, | ||||
|     schema: &'a Schema, | ||||
|     schema: &'a mut Schema, | ||||
|     document_id: DocumentId, | ||||
|     document_store: DocumentsFields, | ||||
|     document_fields_counts: DocumentsFieldsCounts, | ||||
|     indexer: &'a mut RawIndexer, | ||||
|     ranked_map: &'a mut RankedMap, | ||||
|     fields_map: &'a mut FieldsMap, | ||||
|     current_key_name: Option<String>, | ||||
| } | ||||
|  | ||||
| @@ -237,21 +233,17 @@ impl<'a, 'b> ser::SerializeMap for MapSerializer<'a, 'b> { | ||||
|         V: ser::Serialize, | ||||
|     { | ||||
|         let key = key.serialize(ConvertToString)?; | ||||
|         match self.schema.attribute(&key) { | ||||
|             Some(attribute) => serialize_value( | ||||
|                 self.txn, | ||||
|                 attribute, | ||||
|                 self.schema.props(attribute), | ||||
|                 self.document_id, | ||||
|                 self.document_store, | ||||
|                 self.document_fields_counts, | ||||
|                 self.indexer, | ||||
|                 self.ranked_map, | ||||
|                 self.fields_map, | ||||
|                 value, | ||||
|             ), | ||||
|             None => Ok(()), | ||||
|         } | ||||
|         serialize_value( | ||||
|             self.txn, | ||||
|             key, | ||||
|             self.schema, | ||||
|             self.document_id, | ||||
|             self.document_store, | ||||
|             self.document_fields_counts, | ||||
|             self.indexer, | ||||
|             self.ranked_map, | ||||
|             value, | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     fn end(self) -> Result<Self::Ok, Self::Error> { | ||||
| @@ -261,13 +253,12 @@ impl<'a, 'b> ser::SerializeMap for MapSerializer<'a, 'b> { | ||||
|  | ||||
| pub struct StructSerializer<'a, 'b> { | ||||
|     txn: &'a mut heed::RwTxn<'b, MainT>, | ||||
|     schema: &'a Schema, | ||||
|     schema: &'a mut Schema, | ||||
|     document_id: DocumentId, | ||||
|     document_store: DocumentsFields, | ||||
|     document_fields_counts: DocumentsFieldsCounts, | ||||
|     indexer: &'a mut RawIndexer, | ||||
|     ranked_map: &'a mut RankedMap, | ||||
|     fields_map: &'a mut FieldsMap, | ||||
| } | ||||
|  | ||||
| impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> { | ||||
| @@ -282,19 +273,10 @@ impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> { | ||||
|     where | ||||
|         T: ser::Serialize, | ||||
|     { | ||||
|         // let id = fields_map.insert(key)?; | ||||
|  | ||||
|         // let attribute = match self.schema.attribute(id) { | ||||
|         //     Some(attribute) => attribute, | ||||
|         //     None => { | ||||
|  | ||||
|         //     }, | ||||
|         // } | ||||
|  | ||||
|         serialize_value( | ||||
|             self.txn, | ||||
|             attribute, | ||||
|             self.schema.props(attribute), | ||||
|             key.to_string(), | ||||
|             self.schema, | ||||
|             self.document_id, | ||||
|             self.document_store, | ||||
|             self.document_fields_counts, | ||||
| @@ -311,7 +293,36 @@ impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> { | ||||
|  | ||||
| pub fn serialize_value<'a, T: ?Sized>( | ||||
|     txn: &mut heed::RwTxn<MainT>, | ||||
|     attribute: &'static str, | ||||
|     attribute: String, | ||||
|     schema: &'a mut Schema, | ||||
|     document_id: DocumentId, | ||||
|     document_store: DocumentsFields, | ||||
|     documents_fields_counts: DocumentsFieldsCounts, | ||||
|     indexer: &mut RawIndexer, | ||||
|     ranked_map: &mut RankedMap, | ||||
|     value: &T, | ||||
| ) -> Result<(), SerializerError> | ||||
| where | ||||
|     T: ser::Serialize, | ||||
| { | ||||
|     let field_id = schema.get_or_create(attribute)?; | ||||
|  | ||||
|     serialize_value_with_id( | ||||
|         txn, | ||||
|         field_id, | ||||
|         schema, | ||||
|         document_id, | ||||
|         document_store, | ||||
|         documents_fields_counts, | ||||
|         indexer, | ||||
|         ranked_map, | ||||
|         value | ||||
|     ) | ||||
| } | ||||
|  | ||||
| pub fn serialize_value_with_id<'a, T: ?Sized>( | ||||
|     txn: &mut heed::RwTxn<MainT>, | ||||
|     field_id: FieldId, | ||||
|     schema: &'a Schema, | ||||
|     document_id: DocumentId, | ||||
|     document_store: DocumentsFields, | ||||
| @@ -324,12 +335,11 @@ where | ||||
|     T: ser::Serialize, | ||||
| { | ||||
|     let serialized = serde_json::to_vec(value)?; | ||||
|     let field_id = schema.get_or_create(attribute)?; | ||||
|     document_store.put_document_field(txn, document_id, field_id, &serialized)?; | ||||
|  | ||||
|     if let Some(indexed_pos) = schema.id_is_indexed(field_id) { | ||||
|         let indexer = Indexer { | ||||
|             field_id, | ||||
|             pos: *indexed_pos, | ||||
|             indexer, | ||||
|             document_id, | ||||
|         }; | ||||
| @@ -337,13 +347,13 @@ where | ||||
|             documents_fields_counts.put_document_field_count( | ||||
|                 txn, | ||||
|                 document_id, | ||||
|                 field_id, | ||||
|                 *indexed_pos, | ||||
|                 number_of_words as u16, | ||||
|             )?; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     if let Some(field_id) = schema.id_is_ranked(field_id) { | ||||
|     if schema.id_is_ranked(field_id) { | ||||
|         let number = value.serialize(ConvertToNumber)?; | ||||
|         ranked_map.insert(document_id, field_id, number); | ||||
|     } | ||||
|   | ||||
| @@ -1,14 +1,14 @@ | ||||
| use heed::types::{ByteSlice, OwnedType}; | ||||
| use crate::database::MainT; | ||||
| use heed::Result as ZResult; | ||||
| use meilisearch_schema::SchemaAttr; | ||||
| use meilisearch_schema::FieldId; | ||||
|  | ||||
| use super::DocumentAttrKey; | ||||
| use super::DocumentFieldStoredKey; | ||||
| use crate::DocumentId; | ||||
|  | ||||
| #[derive(Copy, Clone)] | ||||
| pub struct DocumentsFields { | ||||
|     pub(crate) documents_fields: heed::Database<OwnedType<DocumentAttrKey>, ByteSlice>, | ||||
|     pub(crate) documents_fields: heed::Database<OwnedType<DocumentFieldStoredKey>, ByteSlice>, | ||||
| } | ||||
|  | ||||
| impl DocumentsFields { | ||||
| @@ -16,10 +16,10 @@ impl DocumentsFields { | ||||
|         self, | ||||
|         writer: &mut heed::RwTxn<MainT>, | ||||
|         document_id: DocumentId, | ||||
|         attribute: SchemaAttr, | ||||
|         attribute: FieldId, | ||||
|         value: &[u8], | ||||
|     ) -> ZResult<()> { | ||||
|         let key = DocumentAttrKey::new(document_id, attribute); | ||||
|         let key = DocumentFieldStoredKey::new(document_id, attribute); | ||||
|         self.documents_fields.put(writer, &key, value) | ||||
|     } | ||||
|  | ||||
| @@ -28,8 +28,8 @@ impl DocumentsFields { | ||||
|         writer: &mut heed::RwTxn<MainT>, | ||||
|         document_id: DocumentId, | ||||
|     ) -> ZResult<usize> { | ||||
|         let start = DocumentAttrKey::new(document_id, SchemaAttr::min()); | ||||
|         let end = DocumentAttrKey::new(document_id, SchemaAttr::max()); | ||||
|         let start = DocumentFieldStoredKey::new(document_id, FieldId::min()); | ||||
|         let end = DocumentFieldStoredKey::new(document_id, FieldId::max()); | ||||
|         self.documents_fields.delete_range(writer, &(start..=end)) | ||||
|     } | ||||
|  | ||||
| @@ -41,9 +41,9 @@ impl DocumentsFields { | ||||
|         self, | ||||
|         reader: &'txn heed::RoTxn<MainT>, | ||||
|         document_id: DocumentId, | ||||
|         attribute: SchemaAttr, | ||||
|         attribute: FieldId, | ||||
|     ) -> ZResult<Option<&'txn [u8]>> { | ||||
|         let key = DocumentAttrKey::new(document_id, attribute); | ||||
|         let key = DocumentFieldStoredKey::new(document_id, attribute); | ||||
|         self.documents_fields.get(reader, &key) | ||||
|     } | ||||
|  | ||||
| @@ -52,25 +52,25 @@ impl DocumentsFields { | ||||
|         reader: &'txn heed::RoTxn<MainT>, | ||||
|         document_id: DocumentId, | ||||
|     ) -> ZResult<DocumentFieldsIter<'txn>> { | ||||
|         let start = DocumentAttrKey::new(document_id, SchemaAttr::min()); | ||||
|         let end = DocumentAttrKey::new(document_id, SchemaAttr::max()); | ||||
|         let start = DocumentFieldStoredKey::new(document_id, FieldId::min()); | ||||
|         let end = DocumentFieldStoredKey::new(document_id, FieldId::max()); | ||||
|         let iter = self.documents_fields.range(reader, &(start..=end))?; | ||||
|         Ok(DocumentFieldsIter { iter }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct DocumentFieldsIter<'txn> { | ||||
|     iter: heed::RoRange<'txn, OwnedType<DocumentAttrKey>, ByteSlice>, | ||||
|     iter: heed::RoRange<'txn, OwnedType<DocumentFieldStoredKey>, ByteSlice>, | ||||
| } | ||||
|  | ||||
| impl<'txn> Iterator for DocumentFieldsIter<'txn> { | ||||
|     type Item = ZResult<(SchemaAttr, &'txn [u8])>; | ||||
|     type Item = ZResult<(FieldId, &'txn [u8])>; | ||||
|  | ||||
|     fn next(&mut self) -> Option<Self::Item> { | ||||
|         match self.iter.next() { | ||||
|             Some(Ok((key, bytes))) => { | ||||
|                 let attr = SchemaAttr(key.attr.get()); | ||||
|                 Some(Ok((attr, bytes))) | ||||
|                 let field_id = FieldId(key.field_id.get()); | ||||
|                 Some(Ok((field_id, bytes))) | ||||
|             } | ||||
|             Some(Err(e)) => Some(Err(e)), | ||||
|             None => None, | ||||
|   | ||||
| @@ -1,13 +1,13 @@ | ||||
| use super::DocumentAttrKey; | ||||
| use super::DocumentFieldIndexedKey; | ||||
| use crate::database::MainT; | ||||
| use crate::DocumentId; | ||||
| use heed::types::OwnedType; | ||||
| use heed::Result as ZResult; | ||||
| use meilisearch_schema::FieldId; | ||||
| use meilisearch_schema::IndexedPos; | ||||
|  | ||||
| #[derive(Copy, Clone)] | ||||
| pub struct DocumentsFieldsCounts { | ||||
|     pub(crate) documents_fields_counts: heed::Database<OwnedType<DocumentAttrKey>, OwnedType<u16>>, | ||||
|     pub(crate) documents_fields_counts: heed::Database<OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>, | ||||
| } | ||||
|  | ||||
| impl DocumentsFieldsCounts { | ||||
| @@ -15,10 +15,10 @@ impl DocumentsFieldsCounts { | ||||
|         self, | ||||
|         writer: &mut heed::RwTxn<MainT>, | ||||
|         document_id: DocumentId, | ||||
|         attribute: FieldId, | ||||
|         attribute: IndexedPos, | ||||
|         value: u16, | ||||
|     ) -> ZResult<()> { | ||||
|         let key = DocumentAttrKey::new(document_id, attribute); | ||||
|         let key = DocumentFieldIndexedKey::new(document_id, attribute); | ||||
|         self.documents_fields_counts.put(writer, &key, &value) | ||||
|     } | ||||
|  | ||||
| @@ -27,10 +27,9 @@ impl DocumentsFieldsCounts { | ||||
|         writer: &mut heed::RwTxn<MainT>, | ||||
|         document_id: DocumentId, | ||||
|     ) -> ZResult<usize> { | ||||
|         let start = DocumentAttrKey::new(document_id, FieldId::min()); | ||||
|         let end = DocumentAttrKey::new(document_id, FieldId::max()); | ||||
|         self.documents_fields_counts | ||||
|             .delete_range(writer, &(start..=end)) | ||||
|         let start = DocumentFieldIndexedKey::new(document_id, IndexedPos::min()); | ||||
|         let end = DocumentFieldIndexedKey::new(document_id, IndexedPos::max()); | ||||
|         self.documents_fields_counts.delete_range(writer, &(start..=end)) | ||||
|     } | ||||
|  | ||||
|     pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> { | ||||
| @@ -41,9 +40,9 @@ impl DocumentsFieldsCounts { | ||||
|         self, | ||||
|         reader: &heed::RoTxn<MainT>, | ||||
|         document_id: DocumentId, | ||||
|         attribute: FieldId, | ||||
|         attribute: IndexedPos, | ||||
|     ) -> ZResult<Option<u16>> { | ||||
|         let key = DocumentAttrKey::new(document_id, attribute); | ||||
|         let key = DocumentFieldIndexedKey::new(document_id, attribute); | ||||
|         match self.documents_fields_counts.get(reader, &key)? { | ||||
|             Some(count) => Ok(Some(count)), | ||||
|             None => Ok(None), | ||||
| @@ -55,8 +54,8 @@ impl DocumentsFieldsCounts { | ||||
|         reader: &'txn heed::RoTxn<MainT>, | ||||
|         document_id: DocumentId, | ||||
|     ) -> ZResult<DocumentFieldsCountsIter<'txn>> { | ||||
|         let start = DocumentAttrKey::new(document_id, FieldId::min()); | ||||
|         let end = DocumentAttrKey::new(document_id, FieldId::max()); | ||||
|         let start = DocumentFieldIndexedKey::new(document_id, IndexedPos::min()); | ||||
|         let end = DocumentFieldIndexedKey::new(document_id, IndexedPos::max()); | ||||
|         let iter = self.documents_fields_counts.range(reader, &(start..=end))?; | ||||
|         Ok(DocumentFieldsCountsIter { iter }) | ||||
|     } | ||||
| @@ -79,17 +78,17 @@ impl DocumentsFieldsCounts { | ||||
| } | ||||
|  | ||||
| pub struct DocumentFieldsCountsIter<'txn> { | ||||
|     iter: heed::RoRange<'txn, OwnedType<DocumentAttrKey>, OwnedType<u16>>, | ||||
|     iter: heed::RoRange<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>, | ||||
| } | ||||
|  | ||||
| impl Iterator for DocumentFieldsCountsIter<'_> { | ||||
|     type Item = ZResult<(FieldId, u16)>; | ||||
|     type Item = ZResult<(IndexedPos, u16)>; | ||||
|  | ||||
|     fn next(&mut self) -> Option<Self::Item> { | ||||
|         match self.iter.next() { | ||||
|             Some(Ok((key, count))) => { | ||||
|                 let attr = FieldId(key.attr.get()); | ||||
|                 Some(Ok((attr, count))) | ||||
|                 let indexed_pos = IndexedPos(key.indexed_pos.get()); | ||||
|                 Some(Ok((indexed_pos, count))) | ||||
|             } | ||||
|             Some(Err(e)) => Some(Err(e)), | ||||
|             None => None, | ||||
| @@ -99,7 +98,7 @@ impl Iterator for DocumentFieldsCountsIter<'_> { | ||||
|  | ||||
| pub struct DocumentsIdsIter<'txn> { | ||||
|     last_seen_id: Option<DocumentId>, | ||||
|     iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u16>>, | ||||
|     iter: heed::RoIter<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>, | ||||
| } | ||||
|  | ||||
| impl Iterator for DocumentsIdsIter<'_> { | ||||
| @@ -123,18 +122,18 @@ impl Iterator for DocumentsIdsIter<'_> { | ||||
| } | ||||
|  | ||||
| pub struct AllDocumentsFieldsCountsIter<'txn> { | ||||
|     iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u16>>, | ||||
|     iter: heed::RoIter<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>, | ||||
| } | ||||
|  | ||||
| impl Iterator for AllDocumentsFieldsCountsIter<'_> { | ||||
|     type Item = ZResult<(DocumentId, FieldId, u16)>; | ||||
|     type Item = ZResult<(DocumentId, IndexedPos, u16)>; | ||||
|  | ||||
|     fn next(&mut self) -> Option<Self::Item> { | ||||
|         match self.iter.next() { | ||||
|             Some(Ok((key, count))) => { | ||||
|                 let docid = DocumentId(key.docid.get()); | ||||
|                 let attr = FieldId(key.attr.get()); | ||||
|                 Some(Ok((docid, attr, count))) | ||||
|                 let indexed_pos = IndexedPos(key.indexed_pos.get()); | ||||
|                 Some(Ok((docid, indexed_pos, count))) | ||||
|             } | ||||
|             Some(Err(e)) => Some(Err(e)), | ||||
|             None => None, | ||||
|   | ||||
| @@ -1,12 +1,13 @@ | ||||
| use crate::fields_map::FieldsMap; | ||||
| use crate::database::MainT; | ||||
| use crate::RankedMap; | ||||
| use std::sync::Arc; | ||||
| use std::collections::{HashMap, BTreeMap, BTreeSet}; | ||||
|  | ||||
| use chrono::{DateTime, Utc}; | ||||
| use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str}; | ||||
| use heed::Result as ZResult; | ||||
| use meilisearch_schema::Schema; | ||||
| use std::collections::{HashMap, BTreeMap, BTreeSet}; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| use crate::database::MainT; | ||||
| use crate::RankedMap; | ||||
|  | ||||
| const CREATED_AT_KEY: &str = "created-at"; | ||||
| const RANKING_RULES_KEY: &str = "ranking-rules-key"; | ||||
| @@ -18,7 +19,6 @@ const FIELDS_FREQUENCY_KEY: &str = "fields-frequency"; | ||||
| const NAME_KEY: &str = "name"; | ||||
| const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents"; | ||||
| const RANKED_MAP_KEY: &str = "ranked-map"; | ||||
| const FIELDS_MAP_KEY: &str = "fields-map"; | ||||
| const SCHEMA_KEY: &str = "schema"; | ||||
| const UPDATED_AT_KEY: &str = "updated-at"; | ||||
| const WORDS_KEY: &str = "words"; | ||||
| @@ -114,16 +114,6 @@ impl Main { | ||||
|             .get::<_, Str, SerdeBincode<RankedMap>>(reader, RANKED_MAP_KEY) | ||||
|     } | ||||
|  | ||||
|     pub fn put_fields_map(self, writer: &mut heed::RwTxn<MainT>, fields_map: &FieldsMap) -> ZResult<()> { | ||||
|         self.main | ||||
|             .put::<_, Str, SerdeBincode<FieldsMap>>(writer, FIELDS_MAP_KEY, &fields_map) | ||||
|     } | ||||
|  | ||||
|     pub fn fields_map(self, reader: &heed::RoTxn<MainT>) -> ZResult<Option<FieldsMap>> { | ||||
|         self.main | ||||
|             .get::<_, Str, SerdeBincode<FieldsMap>>(reader, FIELDS_MAP_KEY) | ||||
|     } | ||||
|  | ||||
|     pub fn put_synonyms_fst(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set) -> ZResult<()> { | ||||
|         let bytes = fst.as_fst().as_bytes(); | ||||
|         self.main.put::<_, Str, ByteSlice>(writer, SYNONYMS_KEY, bytes) | ||||
|   | ||||
| @@ -43,18 +43,50 @@ use crate::{query_builder::QueryBuilder, update, DocIndex, DocumentId, Error, MR | ||||
| type BEU64 = zerocopy::U64<byteorder::BigEndian>; | ||||
| type BEU16 = zerocopy::U16<byteorder::BigEndian>; | ||||
|  | ||||
| // #[derive(Debug, Copy, Clone, AsBytes, FromBytes)] | ||||
| // #[repr(C)] | ||||
| // pub struct DocumentAttrKey { | ||||
| //     docid: BEU64, | ||||
| //     indexed_pos: BEU16, | ||||
| // } | ||||
|  | ||||
| // impl DocumentAttrKey { | ||||
| //     fn new(docid: DocumentId, indexed_pos: IndexedPos) -> DocumentAttrKey { | ||||
| //         DocumentAttrKey { | ||||
| //             docid: BEU64::new(docid.0), | ||||
| //             indexed_pos: BEU16::new(indexed_pos.0), | ||||
| //         } | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| #[derive(Debug, Copy, Clone, AsBytes, FromBytes)] | ||||
| #[repr(C)] | ||||
| pub struct DocumentAttrKey { | ||||
| pub struct DocumentFieldIndexedKey { | ||||
|     docid: BEU64, | ||||
|     attr: BEU16, | ||||
|     indexed_pos: BEU16, | ||||
| } | ||||
|  | ||||
| impl DocumentAttrKey { | ||||
|     fn new(docid: DocumentId, attr: SchemaAttr) -> DocumentAttrKey { | ||||
|         DocumentAttrKey { | ||||
| impl DocumentFieldIndexedKey { | ||||
|     fn new(docid: DocumentId, indexed_pos: IndexedPos) -> DocumentFieldIndexedKey { | ||||
|         DocumentFieldIndexedKey { | ||||
|             docid: BEU64::new(docid.0), | ||||
|             attr: BEU16::new(attr.0), | ||||
|             indexed_pos: BEU16::new(indexed_pos.0), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Copy, Clone, AsBytes, FromBytes)] | ||||
| #[repr(C)] | ||||
| pub struct DocumentFieldStoredKey { | ||||
|     docid: BEU64, | ||||
|     field_id: BEU16, | ||||
| } | ||||
|  | ||||
| impl DocumentFieldStoredKey { | ||||
|     fn new(docid: DocumentId, field_id: FieldId) -> DocumentFieldStoredKey { | ||||
|         DocumentFieldStoredKey { | ||||
|             docid: BEU64::new(docid.0), | ||||
|             field_id: BEU16::new(field_id.0), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -228,7 +260,7 @@ impl Index { | ||||
|         &self, | ||||
|         reader: &heed::RoTxn<MainT>, | ||||
|         document_id: DocumentId, | ||||
|         attribute: SchemaAttr, | ||||
|         attribute: FieldId, | ||||
|     ) -> MResult<Option<T>> { | ||||
|         let bytes = self | ||||
|             .documents_fields | ||||
|   | ||||
| @@ -1,14 +1,13 @@ | ||||
| use std::collections::{HashMap, BTreeSet}; | ||||
| use std::collections::HashMap; | ||||
|  | ||||
| use fst::{set::OpBuilder, SetBuilder}; | ||||
| use sdset::{duo::Union, SetOperation}; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use meilisearch_schema::{Schema, DISPLAYED, INDEXED}; | ||||
|  | ||||
| use crate::database::{MainT, UpdateT}; | ||||
| use crate::database::{UpdateEvent, UpdateEventsEmitter}; | ||||
| use crate::raw_indexer::RawIndexer; | ||||
| use crate::serde::{extract_document_id, serialize_value, Deserializer, Serializer}; | ||||
| use crate::serde::{extract_document_id, serialize_value_with_id, Deserializer, Serializer}; | ||||
| use crate::store; | ||||
| use crate::update::{apply_documents_deletion, compute_short_prefixes, next_update_id, Update}; | ||||
| use crate::{Error, MResult, RankedMap}; | ||||
| @@ -115,16 +114,11 @@ pub fn apply_documents_addition<'a, 'b>( | ||||
|         None => return Err(Error::SchemaMissing), | ||||
|     }; | ||||
|  | ||||
|     if let Some(new_schema) = lazy_new_schema(&schema, &addition) { | ||||
|         main_store.put_schema(writer, &new_schema)?; | ||||
|         schema = new_schema; | ||||
|     } | ||||
|  | ||||
|     let identifier = schema.identifier_name(); | ||||
|     let identifier = schema.identifier(); | ||||
|  | ||||
|     // 1. store documents ids for future deletion | ||||
|     for document in addition { | ||||
|         let document_id = match extract_document_id(identifier, &document)? { | ||||
|         let document_id = match extract_document_id(&identifier, &document)? { | ||||
|             Some(id) => id, | ||||
|             None => return Err(Error::MissingDocumentId), | ||||
|         }; | ||||
| @@ -147,8 +141,6 @@ pub fn apply_documents_addition<'a, 'b>( | ||||
|         None => fst::Set::default(), | ||||
|     }; | ||||
|  | ||||
|     let mut fields_map =  main_store.fields_map(writer)?.unwrap_or_default(); | ||||
|  | ||||
|     // 3. index the documents fields in the stores | ||||
|     let mut indexer = RawIndexer::new(stop_words); | ||||
|  | ||||
| @@ -160,7 +152,6 @@ pub fn apply_documents_addition<'a, 'b>( | ||||
|             document_fields_counts: index.documents_fields_counts, | ||||
|             indexer: &mut indexer, | ||||
|             ranked_map: &mut ranked_map, | ||||
|             fields_map: &mut fields_map, | ||||
|             document_id, | ||||
|         }; | ||||
|  | ||||
| @@ -192,16 +183,11 @@ pub fn apply_documents_partial_addition<'a, 'b>( | ||||
|         None => return Err(Error::SchemaMissing), | ||||
|     }; | ||||
|  | ||||
|     if let Some(new_schema) = lazy_new_schema(&schema, &addition) { | ||||
|         main_store.put_schema(writer, &new_schema)?; | ||||
|         schema = new_schema; | ||||
|     } | ||||
|  | ||||
|     let identifier = schema.identifier_name(); | ||||
|     let identifier = schema.identifier(); | ||||
|  | ||||
|     // 1. store documents ids for future deletion | ||||
|     for mut document in addition { | ||||
|         let document_id = match extract_document_id(identifier, &document)? { | ||||
|         let document_id = match extract_document_id(&identifier, &document)? { | ||||
|             Some(id) => id, | ||||
|             None => return Err(Error::MissingDocumentId), | ||||
|         }; | ||||
| @@ -241,8 +227,6 @@ pub fn apply_documents_partial_addition<'a, 'b>( | ||||
|         None => fst::Set::default(), | ||||
|     }; | ||||
|  | ||||
|     let mut fields_map =  main_store.fields_map(writer)?.unwrap_or_default(); | ||||
|  | ||||
|     // 3. index the documents fields in the stores | ||||
|     let mut indexer = RawIndexer::new(stop_words); | ||||
|  | ||||
| @@ -254,7 +238,6 @@ pub fn apply_documents_partial_addition<'a, 'b>( | ||||
|             document_fields_counts: index.documents_fields_counts, | ||||
|             indexer: &mut indexer, | ||||
|             ranked_map: &mut ranked_map, | ||||
|             fields_map: &mut fields_map, | ||||
|             document_id, | ||||
|         }; | ||||
|  | ||||
| @@ -281,7 +264,6 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind | ||||
|     }; | ||||
|  | ||||
|     let mut ranked_map = RankedMap::default(); | ||||
|     let mut fields_map = main_store.fields_map(writer)?.unwrap_or_default(); | ||||
|  | ||||
|     // 1. retrieve all documents ids | ||||
|     let mut documents_ids_to_reindex = Vec::new(); | ||||
| @@ -312,21 +294,20 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind | ||||
|             for result in index.documents_fields.document_fields(writer, *document_id)? { | ||||
|                 let (attr, bytes) = result?; | ||||
|                 let value: serde_json::Value = serde_json::from_slice(bytes)?; | ||||
|                 ram_store.insert((document_id, attr), value); | ||||
|                 ram_store.insert((document_id, field_id), value); | ||||
|             } | ||||
|  | ||||
|             for ((docid, attr), value) in ram_store.drain() { | ||||
|                 serialize_value( | ||||
|             for ((docid, field_id), value) in ram_store.drain() { | ||||
|                 serialize_value_with_id( | ||||
|                     writer, | ||||
|                     attr, | ||||
|                     schema.props(attr), | ||||
|                     field_id, | ||||
|                     &schema, | ||||
|                     *docid, | ||||
|                     index.documents_fields, | ||||
|                     index.documents_fields_counts, | ||||
|                     &mut indexer, | ||||
|                     &mut ranked_map, | ||||
|                     &mut fields_map, | ||||
|                     &value, | ||||
|                     &value | ||||
|                 )?; | ||||
|             } | ||||
|         } | ||||
| @@ -401,30 +382,3 @@ pub fn write_documents_addition_index( | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| pub fn lazy_new_schema( | ||||
|     schema: &Schema, | ||||
|     documents: &[HashMap<String, serde_json::Value>], | ||||
| ) -> Option<Schema> { | ||||
|     let mut attributes_to_add = BTreeSet::new(); | ||||
|  | ||||
|     for document in documents { | ||||
|         for (key, _) in document { | ||||
|             if schema.attribute(key).is_none() { | ||||
|                 attributes_to_add.insert(key); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     if attributes_to_add.is_empty() { | ||||
|         return None | ||||
|     } | ||||
|  | ||||
|     let mut schema_builder = schema.to_builder(); | ||||
|     for attribute in attributes_to_add { | ||||
|         schema_builder.new_attribute(attribute, DISPLAYED | INDEXED); | ||||
|     } | ||||
|     let schema = schema_builder.build(); | ||||
|  | ||||
|     Some(schema) | ||||
| } | ||||
|   | ||||
| @@ -40,8 +40,8 @@ impl DocumentsDeletion { | ||||
|     where | ||||
|         D: serde::Serialize, | ||||
|     { | ||||
|         let identifier = schema.identifier_name(); | ||||
|         let document_id = match extract_document_id(identifier, &document)? { | ||||
|         let identifier = schema.identifier(); | ||||
|         let document_id = match extract_document_id(&identifier, &document)? { | ||||
|             Some(id) => id, | ||||
|             None => return Err(Error::MissingDocumentId), | ||||
|         }; | ||||
| @@ -101,18 +101,7 @@ pub fn apply_documents_deletion( | ||||
|     }; | ||||
|  | ||||
|     // collect the ranked attributes according to the schema | ||||
|     let ranked_attrs: Vec<_> = schema | ||||
|         .iter() | ||||
|         .filter_map( | ||||
|             |(_, attr, prop)| { | ||||
|                 if prop.is_ranked() { | ||||
|                     Some(attr) | ||||
|                 } else { | ||||
|                     None | ||||
|                 } | ||||
|             }, | ||||
|         ) | ||||
|         .collect(); | ||||
|     let ranked_attrs = schema.get_ranked(); | ||||
|  | ||||
|     let mut words_document_ids = HashMap::new(); | ||||
|     for id in idset { | ||||
|   | ||||
| @@ -1,16 +1,15 @@ | ||||
| use std::collections::{HashMap, BTreeMap, BTreeSet}; | ||||
| use std::collections::{BTreeMap, BTreeSet}; | ||||
|  | ||||
| use heed::Result as ZResult; | ||||
| use fst::{set::OpBuilder, SetBuilder}; | ||||
| use sdset::SetBuf; | ||||
|  | ||||
| use meilisearch_schema::{Schema, SchemaAttr, diff_transposition, generate_schema}; | ||||
| use meilisearch_schema::Schema; | ||||
|  | ||||
| use crate::database::{MainT, UpdateT}; | ||||
| use crate::settings::{UpdateState, SettingsUpdate}; | ||||
| use crate::update::documents_addition::reindex_all_documents; | ||||
| use crate::update::{next_update_id, Update}; | ||||
| use crate::{store, MResult}; | ||||
| use crate::{store, MResult, Error}; | ||||
|  | ||||
| pub fn push_settings_update( | ||||
|     writer: &mut heed::RwTxn<UpdateT>, | ||||
| @@ -35,7 +34,17 @@ pub fn apply_settings_update( | ||||
|  | ||||
|     let mut must_reindex = false; | ||||
|  | ||||
|     let old_schema = index.main.schema(writer)?; | ||||
|     let mut schema = match index.main.schema(writer)? { | ||||
|         Some(schema) => schema, | ||||
|         None => { | ||||
|             match settings.attribute_identifier.clone() { | ||||
|                 UpdateState::Update(id) => Schema::with_identifier(id), | ||||
|                 _ => return Err(Error::MissingSchemaIdentifier) | ||||
|             } | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     println!("settings: {:?}", settings); | ||||
|  | ||||
|     match settings.ranking_rules { | ||||
|         UpdateState::Update(v) => { | ||||
| @@ -55,157 +64,69 @@ pub fn apply_settings_update( | ||||
|         }, | ||||
|         _ => (), | ||||
|     } | ||||
|     let identifier = match settings.attribute_identifier.clone() { | ||||
|         UpdateState::Update(v) => v, | ||||
|         _ => { | ||||
|             old_schema.clone().unwrap().identifier_name().to_owned() | ||||
|         }, | ||||
|  | ||||
|     if let UpdateState::Update(id) = settings.attribute_identifier { | ||||
|          schema.set_identifier(id)?; | ||||
|     }; | ||||
|     let attributes_searchable: Vec<String> = match settings.attributes_searchable.clone() { | ||||
|         UpdateState::Update(v) => v, | ||||
|         UpdateState::Clear => Vec::new(), | ||||
|         UpdateState::Nothing => { | ||||
|             match old_schema.clone() { | ||||
|                 Some(schema) => { | ||||
|                     schema.into_iter() | ||||
|                         .filter(|(_, props)| props.is_indexed()) | ||||
|                         .map(|(name, _)| name) | ||||
|                         .collect() | ||||
|                 }, | ||||
|                 None => Vec::new(), | ||||
|             } | ||||
|  | ||||
|     match settings.attributes_searchable.clone() { | ||||
|         UpdateState::Update(v) => schema.update_indexed(v)?, | ||||
|         UpdateState::Clear => { | ||||
|             let clear: Vec<String> = Vec::new(); | ||||
|             schema.update_indexed(clear)?; | ||||
|         }, | ||||
|         UpdateState::Nothing => (), | ||||
|         UpdateState::Add(attrs) => { | ||||
|             let mut old_attrs = match old_schema.clone() { | ||||
|                 Some(schema) => { | ||||
|                     schema.into_iter() | ||||
|                         .filter(|(_, props)| props.is_indexed()) | ||||
|                         .map(|(name, _)| name) | ||||
|                         .collect() | ||||
|                 }, | ||||
|                 None => Vec::new(), | ||||
|             }; | ||||
|             for attr in attrs { | ||||
|                 if !old_attrs.contains(&attr) { | ||||
|                     old_attrs.push(attr); | ||||
|                 } | ||||
|                 schema.set_indexed(attr)?; | ||||
|             } | ||||
|             old_attrs | ||||
|         }, | ||||
|         UpdateState::Delete(attrs) => { | ||||
|             let mut old_attrs = match old_schema.clone() { | ||||
|                 Some(schema) => { | ||||
|                     schema.into_iter() | ||||
|                         .filter(|(_, props)| props.is_indexed()) | ||||
|                         .map(|(name, _)| name) | ||||
|                         .collect() | ||||
|                 }, | ||||
|                 None => Vec::new(), | ||||
|             }; | ||||
|             for attr in attrs { | ||||
|                 old_attrs.retain(|x| *x == attr) | ||||
|                 schema.remove_indexed(attr); | ||||
|             } | ||||
|             old_attrs | ||||
|         } | ||||
|     }; | ||||
|     let attributes_displayed: Vec<String> = match settings.attributes_displayed.clone() { | ||||
|         UpdateState::Update(v) => v, | ||||
|         UpdateState::Clear => Vec::new(), | ||||
|         UpdateState::Nothing => { | ||||
|             match old_schema.clone() { | ||||
|                 Some(schema) => { | ||||
|                     schema.into_iter() | ||||
|                         .filter(|(_, props)| props.is_displayed()) | ||||
|                         .map(|(name, _)| name) | ||||
|                         .collect() | ||||
|                 }, | ||||
|                 None => Vec::new(), | ||||
|             } | ||||
|     match settings.attributes_displayed.clone() { | ||||
|         UpdateState::Update(v) => schema.update_displayed(v)?, | ||||
|         UpdateState::Clear => { | ||||
|             let clear: Vec<String> = Vec::new(); | ||||
|             schema.update_displayed(clear)?; | ||||
|         }, | ||||
|         UpdateState::Nothing => (), | ||||
|         UpdateState::Add(attrs) => { | ||||
|             let mut old_attrs = match old_schema.clone() { | ||||
|                 Some(schema) => { | ||||
|                     schema.into_iter() | ||||
|                         .filter(|(_, props)| props.is_displayed()) | ||||
|                         .map(|(name, _)| name) | ||||
|                         .collect() | ||||
|                 }, | ||||
|                 None => Vec::new(), | ||||
|             }; | ||||
|             for attr in attrs { | ||||
|                 if !old_attrs.contains(&attr) { | ||||
|                     old_attrs.push(attr); | ||||
|                 } | ||||
|                 schema.set_displayed(attr)?; | ||||
|             } | ||||
|             old_attrs | ||||
|         }, | ||||
|         UpdateState::Delete(attrs) => { | ||||
|             let mut old_attrs = match old_schema.clone() { | ||||
|                 Some(schema) => { | ||||
|                     schema.into_iter() | ||||
|                         .filter(|(_, props)| props.is_displayed()) | ||||
|                         .map(|(name, _)| name) | ||||
|                         .collect() | ||||
|                 }, | ||||
|                 None => Vec::new(), | ||||
|             }; | ||||
|             for attr in attrs { | ||||
|                 old_attrs.retain(|x| *x == attr) | ||||
|                 schema.remove_displayed(attr); | ||||
|             } | ||||
|             old_attrs | ||||
|         } | ||||
|     }; | ||||
|     let attributes_ranked: Vec<String> = match settings.attributes_ranked.clone() { | ||||
|         UpdateState::Update(v) => v, | ||||
|         UpdateState::Clear => Vec::new(), | ||||
|         UpdateState::Nothing => { | ||||
|             match old_schema.clone() { | ||||
|                 Some(schema) => { | ||||
|                     schema.into_iter() | ||||
|                         .filter(|(_, props)| props.is_ranked()) | ||||
|                         .map(|(name, _)| name) | ||||
|                         .collect() | ||||
|                 }, | ||||
|                 None => Vec::new(), | ||||
|             } | ||||
|     match settings.attributes_ranked.clone() { | ||||
|         UpdateState::Update(v) => schema.update_ranked(v)?, | ||||
|         UpdateState::Clear => { | ||||
|             let clear: Vec<String> = Vec::new(); | ||||
|             schema.update_ranked(clear)?; | ||||
|         }, | ||||
|         UpdateState::Nothing => (), | ||||
|         UpdateState::Add(attrs) => { | ||||
|             let mut old_attrs = match old_schema.clone() { | ||||
|                 Some(schema) => { | ||||
|                     schema.into_iter() | ||||
|                         .filter(|(_, props)| props.is_ranked()) | ||||
|                         .map(|(name, _)| name) | ||||
|                         .collect() | ||||
|                 }, | ||||
|                 None => Vec::new(), | ||||
|             }; | ||||
|             for attr in attrs { | ||||
|                 if !old_attrs.contains(&attr) { | ||||
|                     old_attrs.push(attr); | ||||
|                 } | ||||
|                 schema.set_ranked(attr)?; | ||||
|             } | ||||
|             old_attrs | ||||
|         }, | ||||
|         UpdateState::Delete(attrs) => { | ||||
|             let mut old_attrs = match old_schema.clone() { | ||||
|                 Some(schema) => { | ||||
|                     schema.into_iter() | ||||
|                         .filter(|(_, props)| props.is_ranked()) | ||||
|                         .map(|(name, _)| name) | ||||
|                         .collect() | ||||
|                 }, | ||||
|                 None => Vec::new(), | ||||
|             }; | ||||
|             for attr in attrs { | ||||
|                 old_attrs.retain(|x| *x == attr) | ||||
|                 schema.remove_ranked(attr); | ||||
|             } | ||||
|             old_attrs | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     let new_schema = generate_schema(identifier, attributes_searchable, attributes_displayed, attributes_ranked); | ||||
|     index.main.put_schema(writer, &schema)?; | ||||
|  | ||||
|     index.main.put_schema(writer, &new_schema)?; | ||||
|     println!("schema: {:?}", schema); | ||||
|  | ||||
|     match settings.stop_words { | ||||
|         UpdateState::Update(stop_words) => { | ||||
| @@ -233,16 +154,6 @@ pub fn apply_settings_update( | ||||
|     let postings_lists_store = index.postings_lists; | ||||
|     let docs_words_store = index.docs_words; | ||||
|  | ||||
|     if settings.attribute_identifier.is_changed() || | ||||
|         settings.attributes_ranked.is_changed() || | ||||
|         settings.attributes_searchable.is_changed() || | ||||
|         settings.attributes_displayed.is_changed() | ||||
|     { | ||||
|         if let Some(old_schema) = old_schema { | ||||
|             rewrite_all_documents(writer, index, &old_schema, &new_schema)?; | ||||
|             must_reindex = true; | ||||
|         } | ||||
|     } | ||||
|     if must_reindex { | ||||
|         reindex_all_documents( | ||||
|             writer, | ||||
| @@ -438,46 +349,3 @@ pub fn apply_synonyms_update( | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| pub fn rewrite_all_documents( | ||||
|     writer: &mut heed::RwTxn<MainT>, | ||||
|     index: &store::Index, | ||||
|     old_schema: &Schema, | ||||
|     new_schema: &Schema, | ||||
| ) -> MResult<()> { | ||||
|  | ||||
|     let mut documents_ids_to_reindex = Vec::new(); | ||||
|  | ||||
|     // Retrieve all documents present on the database | ||||
|     for result in index.documents_fields_counts.documents_ids(writer)? { | ||||
|         let document_id = result?; | ||||
|         documents_ids_to_reindex.push(document_id); | ||||
|     } | ||||
|  | ||||
|     let transpotition = diff_transposition(old_schema, new_schema); | ||||
|  | ||||
|     // Rewrite all documents one by one | ||||
|     for id in documents_ids_to_reindex { | ||||
|         let mut document: HashMap<SchemaAttr, Vec<u8>> = HashMap::new(); | ||||
|  | ||||
|         // Retrieve the old document | ||||
|         for item in index.documents_fields.document_fields(writer, id)? { | ||||
|             if let Ok(item) = item { | ||||
|                 if let Some(pos) = transpotition[(item.0).0 as usize] { | ||||
|                     // Save the current document with the new SchemaAttr | ||||
|                     document.insert(SchemaAttr::new(pos), item.1.to_vec()); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         // Remove the current document | ||||
|         index.documents_fields.del_all_document_fields(writer, id)?; | ||||
|  | ||||
|         // Rewrite the new document | ||||
|         // TODO: use cursor to not do memory jump at each call | ||||
|         for (key, value) in document { | ||||
|             index.documents_fields.put_document_field(writer, id, key, &value)?; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user