mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Make documents additions accept only the last duplicate document
This commit is contained in:
		| @@ -12,7 +12,6 @@ pub enum Error { | |||||||
|     SchemaMissing, |     SchemaMissing, | ||||||
|     WordIndexMissing, |     WordIndexMissing, | ||||||
|     MissingDocumentId, |     MissingDocumentId, | ||||||
|     DuplicateDocument, |  | ||||||
|     Zlmdb(heed::Error), |     Zlmdb(heed::Error), | ||||||
|     Fst(fst::Error), |     Fst(fst::Error), | ||||||
|     SerdeJson(SerdeJsonError), |     SerdeJson(SerdeJsonError), | ||||||
| @@ -80,7 +79,6 @@ impl fmt::Display for Error { | |||||||
|             SchemaMissing => write!(f, "this index does not have a schema"), |             SchemaMissing => write!(f, "this index does not have a schema"), | ||||||
|             WordIndexMissing => write!(f, "this index does not have a word index"), |             WordIndexMissing => write!(f, "this index does not have a word index"), | ||||||
|             MissingDocumentId => write!(f, "document id is missing"), |             MissingDocumentId => write!(f, "document id is missing"), | ||||||
|             DuplicateDocument => write!(f, "update contains documents with the same id"), |  | ||||||
|             Zlmdb(e) => write!(f, "heed error; {}", e), |             Zlmdb(e) => write!(f, "heed error; {}", e), | ||||||
|             Fst(e) => write!(f, "fst error; {}", e), |             Fst(e) => write!(f, "fst error; {}", e), | ||||||
|             SerdeJson(e) => write!(f, "serde json error; {}", e), |             SerdeJson(e) => write!(f, "serde json error; {}", e), | ||||||
|   | |||||||
| @@ -1,4 +1,4 @@ | |||||||
| use std::collections::{HashMap, HashSet}; | use std::collections::HashMap; | ||||||
|  |  | ||||||
| use fst::{set::OpBuilder, SetBuilder}; | use fst::{set::OpBuilder, SetBuilder}; | ||||||
| use sdset::{duo::Union, SetOperation}; | use sdset::{duo::Union, SetOperation}; | ||||||
| @@ -86,7 +86,7 @@ pub fn apply_documents_addition( | |||||||
|     docs_words_store: store::DocsWords, |     docs_words_store: store::DocsWords, | ||||||
|     addition: Vec<serde_json::Value>, |     addition: Vec<serde_json::Value>, | ||||||
| ) -> MResult<()> { | ) -> MResult<()> { | ||||||
|     let mut documents_ids = HashSet::new(); |     let mut documents_additions = HashMap::new(); | ||||||
|     let mut indexer = RawIndexer::new(); |     let mut indexer = RawIndexer::new(); | ||||||
|  |  | ||||||
|     let schema = match main_store.schema(writer)? { |     let schema = match main_store.schema(writer)? { | ||||||
| @@ -97,19 +97,18 @@ pub fn apply_documents_addition( | |||||||
|     let identifier = schema.identifier_name(); |     let identifier = schema.identifier_name(); | ||||||
|  |  | ||||||
|     // 1. store documents ids for future deletion |     // 1. store documents ids for future deletion | ||||||
|     for document in addition.iter() { |     for document in addition { | ||||||
|         let document_id = match extract_document_id(identifier, &document)? { |         let document_id = match extract_document_id(identifier, &document)? { | ||||||
|             Some(id) => id, |             Some(id) => id, | ||||||
|             None => return Err(Error::MissingDocumentId), |             None => return Err(Error::MissingDocumentId), | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         if !documents_ids.insert(document_id) { |         documents_additions.insert(document_id, document); | ||||||
|             return Err(Error::DuplicateDocument); |  | ||||||
|         } |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     // 2. remove the documents posting lists |     // 2. remove the documents posting lists | ||||||
|     let number_of_inserted_documents = documents_ids.len(); |     let number_of_inserted_documents = documents_additions.len(); | ||||||
|  |     let documents_ids = documents_additions.iter().map(|(id, _)| *id).collect(); | ||||||
|     apply_documents_deletion( |     apply_documents_deletion( | ||||||
|         writer, |         writer, | ||||||
|         main_store, |         main_store, | ||||||
| @@ -117,7 +116,7 @@ pub fn apply_documents_addition( | |||||||
|         documents_fields_counts_store, |         documents_fields_counts_store, | ||||||
|         postings_lists_store, |         postings_lists_store, | ||||||
|         docs_words_store, |         docs_words_store, | ||||||
|         documents_ids.into_iter().collect(), |         documents_ids, | ||||||
|     )?; |     )?; | ||||||
|  |  | ||||||
|     let mut ranked_map = match main_store.ranked_map(writer)? { |     let mut ranked_map = match main_store.ranked_map(writer)? { | ||||||
| @@ -126,12 +125,7 @@ pub fn apply_documents_addition( | |||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     // 3. index the documents fields in the stores |     // 3. index the documents fields in the stores | ||||||
|     for document in addition { |     for (document_id, document) in documents_additions { | ||||||
|         let document_id = match extract_document_id(identifier, &document)? { |  | ||||||
|             Some(id) => id, |  | ||||||
|             None => return Err(Error::MissingDocumentId), |  | ||||||
|         }; |  | ||||||
|  |  | ||||||
|         let serializer = Serializer { |         let serializer = Serializer { | ||||||
|             txn: writer, |             txn: writer, | ||||||
|             schema: &schema, |             schema: &schema, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user