mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 04:56:28 +00:00 
			
		
		
		
	Make documents additions accept only the last duplicate document
This commit is contained in:
		| @@ -12,7 +12,6 @@ pub enum Error { | ||||
|     SchemaMissing, | ||||
|     WordIndexMissing, | ||||
|     MissingDocumentId, | ||||
|     DuplicateDocument, | ||||
|     Zlmdb(heed::Error), | ||||
|     Fst(fst::Error), | ||||
|     SerdeJson(SerdeJsonError), | ||||
| @@ -80,7 +79,6 @@ impl fmt::Display for Error { | ||||
|             SchemaMissing => write!(f, "this index does not have a schema"), | ||||
|             WordIndexMissing => write!(f, "this index does not have a word index"), | ||||
|             MissingDocumentId => write!(f, "document id is missing"), | ||||
|             DuplicateDocument => write!(f, "update contains documents with the same id"), | ||||
|             Zlmdb(e) => write!(f, "heed error; {}", e), | ||||
|             Fst(e) => write!(f, "fst error; {}", e), | ||||
|             SerdeJson(e) => write!(f, "serde json error; {}", e), | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| use std::collections::{HashMap, HashSet}; | ||||
| use std::collections::HashMap; | ||||
|  | ||||
| use fst::{set::OpBuilder, SetBuilder}; | ||||
| use sdset::{duo::Union, SetOperation}; | ||||
| @@ -86,7 +86,7 @@ pub fn apply_documents_addition( | ||||
|     docs_words_store: store::DocsWords, | ||||
|     addition: Vec<serde_json::Value>, | ||||
| ) -> MResult<()> { | ||||
|     let mut documents_ids = HashSet::new(); | ||||
|     let mut documents_additions = HashMap::new(); | ||||
|     let mut indexer = RawIndexer::new(); | ||||
|  | ||||
|     let schema = match main_store.schema(writer)? { | ||||
| @@ -97,19 +97,18 @@ pub fn apply_documents_addition( | ||||
|     let identifier = schema.identifier_name(); | ||||
|  | ||||
|     // 1. store documents ids for future deletion | ||||
|     for document in addition.iter() { | ||||
|     for document in addition { | ||||
|         let document_id = match extract_document_id(identifier, &document)? { | ||||
|             Some(id) => id, | ||||
|             None => return Err(Error::MissingDocumentId), | ||||
|         }; | ||||
|  | ||||
|         if !documents_ids.insert(document_id) { | ||||
|             return Err(Error::DuplicateDocument); | ||||
|         } | ||||
|         documents_additions.insert(document_id, document); | ||||
|     } | ||||
|  | ||||
|     // 2. remove the documents posting lists | ||||
|     let number_of_inserted_documents = documents_ids.len(); | ||||
|     let number_of_inserted_documents = documents_additions.len(); | ||||
|     let documents_ids = documents_additions.iter().map(|(id, _)| *id).collect(); | ||||
|     apply_documents_deletion( | ||||
|         writer, | ||||
|         main_store, | ||||
| @@ -117,7 +116,7 @@ pub fn apply_documents_addition( | ||||
|         documents_fields_counts_store, | ||||
|         postings_lists_store, | ||||
|         docs_words_store, | ||||
|         documents_ids.into_iter().collect(), | ||||
|         documents_ids, | ||||
|     )?; | ||||
|  | ||||
|     let mut ranked_map = match main_store.ranked_map(writer)? { | ||||
| @@ -126,12 +125,7 @@ pub fn apply_documents_addition( | ||||
|     }; | ||||
|  | ||||
|     // 3. index the documents fields in the stores | ||||
|     for document in addition { | ||||
|         let document_id = match extract_document_id(identifier, &document)? { | ||||
|             Some(id) => id, | ||||
|             None => return Err(Error::MissingDocumentId), | ||||
|         }; | ||||
|  | ||||
|     for (document_id, document) in documents_additions { | ||||
|         let serializer = Serializer { | ||||
|             txn: writer, | ||||
|             schema: &schema, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user