mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	document update
This commit is contained in:
		
							
								
								
									
										1
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										1
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							| @@ -1685,6 +1685,7 @@ dependencies = [ | ||||
|  "serde", | ||||
|  "serde_json", | ||||
|  "toml", | ||||
|  "zerocopy", | ||||
| ] | ||||
|  | ||||
| [[package]] | ||||
|   | ||||
| @@ -197,7 +197,7 @@ impl fmt::Display for FacetError { | ||||
|             InvalidFormat(found) => write!(f, "invalid facet: {}, facets should be \"facetName:facetValue\"", found), | ||||
|             AttributeNotFound(attr) => write!(f, "unknown {:?} attribute", attr), | ||||
|             AttributeNotSet { found, expected } => write!(f, "`{}` is not set as a faceted attribute. available facet attributes: {}", found, expected.join(", ")), | ||||
|             InvalidDocumentAttribute(attr) => write!(f, "invalid document attribute {}, accepted types: string and [string]", attr), | ||||
|             InvalidDocumentAttribute(attr) => write!(f, "invalid document attribute {}, accepted types: String and [String]", attr), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -70,7 +70,7 @@ impl FacetFilter { | ||||
|                 } | ||||
|                 return Ok(Self(filter)); | ||||
|             } | ||||
|             bad_value => Err(FacetError::unexpected_token(&["String"], bad_value)), | ||||
|             bad_value => Err(FacetError::unexpected_token(&["Array"], bad_value)), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| use std::borrow::Cow; | ||||
| use std::sync::Arc; | ||||
| use std::collections::HashMap; | ||||
|  | ||||
|   | ||||
| @@ -7,6 +7,7 @@ use serde::{Deserialize, Serialize}; | ||||
|  | ||||
| use crate::database::{MainT, UpdateT}; | ||||
| use crate::database::{UpdateEvent, UpdateEventsEmitter}; | ||||
| use crate::facets; | ||||
| use crate::raw_indexer::RawIndexer; | ||||
| use crate::serde::{extract_document_id, serialize_value_with_id, Deserializer, Serializer}; | ||||
| use crate::store; | ||||
| @@ -103,10 +104,11 @@ pub fn push_documents_addition<D: serde::Serialize>( | ||||
|     Ok(last_update_id) | ||||
| } | ||||
|  | ||||
| pub fn apply_documents_addition<'a, 'b>( | ||||
| pub fn apply_addition<'a, 'b>( | ||||
|     writer: &'a mut heed::RwTxn<'b, MainT>, | ||||
|     index: &store::Index, | ||||
|     addition: Vec<IndexMap<String, serde_json::Value>>, | ||||
|     partial: bool | ||||
| ) -> MResult<()> { | ||||
|     let mut documents_additions = HashMap::new(); | ||||
|  | ||||
| @@ -118,12 +120,30 @@ pub fn apply_documents_addition<'a, 'b>( | ||||
|     let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?; | ||||
|  | ||||
|     // 1. store documents ids for future deletion | ||||
|     for document in addition { | ||||
|     for mut document in addition { | ||||
|         let document_id = match extract_document_id(&primary_key, &document)? { | ||||
|             Some(id) => id, | ||||
|             None => return Err(Error::MissingDocumentId), | ||||
|         }; | ||||
|  | ||||
|         if partial { | ||||
|             let mut deserializer = Deserializer { | ||||
|                 document_id, | ||||
|                 reader: writer, | ||||
|                 documents_fields: index.documents_fields, | ||||
|                 schema: &schema, | ||||
|                 fields: None, | ||||
|             }; | ||||
|  | ||||
|             // retrieve the old document and | ||||
|             // update the new one with missing keys found in the old one | ||||
|             let result = Option::<HashMap<String, serde_json::Value>>::deserialize(&mut deserializer)?; | ||||
|             if let Some(old_document) = result { | ||||
|                 for (key, value) in old_document { | ||||
|                     document.entry(key).or_insert(value); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         documents_additions.insert(document_id, document); | ||||
|     } | ||||
|  | ||||
| @@ -143,6 +163,11 @@ pub fn apply_documents_addition<'a, 'b>( | ||||
|     }; | ||||
|  | ||||
|     // 3. index the documents fields in the stores | ||||
|     if let Some(attributes_for_facetting) = index.main.attributes_for_faceting(writer)? { | ||||
|         let facet_map = facets::facet_map_from_docs(&schema, &documents_additions, attributes_for_facetting.as_ref())?; | ||||
|         index.facets.add(writer, facet_map)?; | ||||
|     } | ||||
|  | ||||
|     let mut indexer = RawIndexer::new(stop_words); | ||||
|  | ||||
|     for (document_id, document) in documents_additions { | ||||
| @@ -177,85 +202,15 @@ pub fn apply_documents_partial_addition<'a, 'b>( | ||||
|     index: &store::Index, | ||||
|     addition: Vec<IndexMap<String, serde_json::Value>>, | ||||
| ) -> MResult<()> { | ||||
|     let mut documents_additions = HashMap::new(); | ||||
|     apply_addition(writer, index, addition, true) | ||||
| } | ||||
|  | ||||
|     let mut schema = match index.main.schema(writer)? { | ||||
|         Some(schema) => schema, | ||||
|         None => return Err(Error::SchemaMissing), | ||||
|     }; | ||||
|  | ||||
|     let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?; | ||||
|  | ||||
|     // 1. store documents ids for future deletion | ||||
|     for mut document in addition { | ||||
|         let document_id = match extract_document_id(&primary_key, &document)? { | ||||
|             Some(id) => id, | ||||
|             None => return Err(Error::MissingDocumentId), | ||||
|         }; | ||||
|  | ||||
|         let mut deserializer = Deserializer { | ||||
|             document_id, | ||||
|             reader: writer, | ||||
|             documents_fields: index.documents_fields, | ||||
|             schema: &schema, | ||||
|             fields: None, | ||||
|         }; | ||||
|  | ||||
|         // retrieve the old document and | ||||
|         // update the new one with missing keys found in the old one | ||||
|         let result = Option::<HashMap<String, serde_json::Value>>::deserialize(&mut deserializer)?; | ||||
|         if let Some(old_document) = result { | ||||
|             for (key, value) in old_document { | ||||
|                 document.entry(key).or_insert(value); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         documents_additions.insert(document_id, document); | ||||
|     } | ||||
|  | ||||
|     // 2. remove the documents posting lists | ||||
|     let number_of_inserted_documents = documents_additions.len(); | ||||
|     let documents_ids = documents_additions.iter().map(|(id, _)| *id).collect(); | ||||
|     apply_documents_deletion(writer, index, documents_ids)?; | ||||
|  | ||||
|     let mut ranked_map = match index.main.ranked_map(writer)? { | ||||
|         Some(ranked_map) => ranked_map, | ||||
|         None => RankedMap::default(), | ||||
|     }; | ||||
|  | ||||
|     let stop_words = match index.main.stop_words_fst(writer)? { | ||||
|         Some(stop_words) => stop_words, | ||||
|         None => fst::Set::default(), | ||||
|     }; | ||||
|  | ||||
|     // 3. index the documents fields in the stores | ||||
|     let mut indexer = RawIndexer::new(stop_words); | ||||
|  | ||||
|     for (document_id, document) in documents_additions { | ||||
|         let serializer = Serializer { | ||||
|             txn: writer, | ||||
|             schema: &mut schema, | ||||
|             document_store: index.documents_fields, | ||||
|             document_fields_counts: index.documents_fields_counts, | ||||
|             indexer: &mut indexer, | ||||
|             ranked_map: &mut ranked_map, | ||||
|             document_id, | ||||
|         }; | ||||
|  | ||||
|         document.serialize(serializer)?; | ||||
|     } | ||||
|  | ||||
|     write_documents_addition_index( | ||||
|         writer, | ||||
|         index, | ||||
|         &ranked_map, | ||||
|         number_of_inserted_documents, | ||||
|         indexer, | ||||
|     )?; | ||||
|  | ||||
|     index.main.put_schema(writer, &schema)?; | ||||
|  | ||||
|     Ok(()) | ||||
| pub fn apply_documents_addition<'a, 'b>( | ||||
|     writer: &'a mut heed::RwTxn<'b, MainT>, | ||||
|     index: &store::Index, | ||||
|     addition: Vec<IndexMap<String, serde_json::Value>>, | ||||
| ) -> MResult<()> { | ||||
|     apply_addition(writer, index, addition, false) | ||||
| } | ||||
|  | ||||
| pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Index) -> MResult<()> { | ||||
| @@ -277,6 +232,7 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind | ||||
|     index.main.put_words_fst(writer, &fst::Set::default())?; | ||||
|     index.main.put_ranked_map(writer, &ranked_map)?; | ||||
|     index.main.put_number_of_documents(writer, |_| 0)?; | ||||
|     index.facets.clear(writer)?; | ||||
|     index.postings_lists.clear(writer)?; | ||||
|     index.docs_words.clear(writer)?; | ||||
|  | ||||
| @@ -289,6 +245,11 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind | ||||
|     let mut indexer = RawIndexer::new(stop_words); | ||||
|     let mut ram_store = HashMap::new(); | ||||
|  | ||||
|     if let Some(ref attributes_for_facetting) = index.main.attributes_for_faceting(writer)? { | ||||
|         let facet_map = facets::facet_map_from_docids(writer, &index, &documents_ids_to_reindex, &attributes_for_facetting)?; | ||||
|         index.facets.add(writer, facet_map)?; | ||||
|     } | ||||
|     // ^-- https://github.com/meilisearch/MeiliSearch/pull/631#issuecomment-626624470 --v | ||||
|     for document_id in documents_ids_to_reindex { | ||||
|         for result in index.documents_fields.document_fields(writer, document_id)? { | ||||
|             let (field_id, bytes) = result?; | ||||
|   | ||||
| @@ -6,6 +6,7 @@ use sdset::{duo::DifferenceByKey, SetBuf, SetOperation}; | ||||
|  | ||||
| use crate::database::{MainT, UpdateT}; | ||||
| use crate::database::{UpdateEvent, UpdateEventsEmitter}; | ||||
| use crate::facets; | ||||
| use crate::serde::extract_document_id; | ||||
| use crate::store; | ||||
| use crate::update::{next_update_id, compute_short_prefixes, Update}; | ||||
| @@ -88,8 +89,6 @@ pub fn apply_documents_deletion( | ||||
|     index: &store::Index, | ||||
|     deletion: Vec<DocumentId>, | ||||
| ) -> MResult<()> { | ||||
|     let idset = SetBuf::from_dirty(deletion); | ||||
|  | ||||
|     let schema = match index.main.schema(writer)? { | ||||
|         Some(schema) => schema, | ||||
|         None => return Err(Error::SchemaMissing), | ||||
| @@ -100,9 +99,16 @@ pub fn apply_documents_deletion( | ||||
|         None => RankedMap::default(), | ||||
|     }; | ||||
|  | ||||
|     // facet filters deletion | ||||
|     if let Some(attributes_for_facetting) = index.main.attributes_for_faceting(writer)? { | ||||
|         let facet_map = facets::facet_map_from_docids(writer, &index, &deletion, &attributes_for_facetting)?; | ||||
|         index.facets.remove(writer, facet_map)?; | ||||
|     } | ||||
|  | ||||
|     // collect the ranked attributes according to the schema | ||||
|     let ranked_fields = schema.ranked(); | ||||
|  | ||||
|     let idset = SetBuf::from_dirty(deletion); | ||||
|     let mut words_document_ids = HashMap::new(); | ||||
|     for id in idset { | ||||
|         // remove all the ranked attributes from the ranked_map | ||||
|   | ||||
| @@ -11,3 +11,4 @@ indexmap = { version = "1.3.2", features = ["serde-1"] } | ||||
| serde = { version = "1.0.105", features = ["derive"] } | ||||
| serde_json = { version = "1.0.50", features = ["preserve_order"] } | ||||
| toml = { version = "0.5.6", features = ["preserve_order"] } | ||||
| zerocopy = "0.3.0" | ||||
|   | ||||
| @@ -6,6 +6,7 @@ pub use error::{Error, SResult}; | ||||
| pub use fields_map::FieldsMap; | ||||
| pub use schema::Schema; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use zerocopy::{AsBytes, FromBytes}; | ||||
|  | ||||
| #[derive(Serialize, Deserialize, Debug, Copy, Clone, Default, PartialOrd, Ord, PartialEq, Eq, Hash)] | ||||
| pub struct IndexedPos(pub u16); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user