mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 21:46:27 +00:00 
			
		
		
		
	fix the addition + deletion bug
This commit is contained in:
		
							
								
								
									
										22
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										22
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							| @@ -359,6 +359,15 @@ dependencies = [ | |||||||
|  "backtrace", |  "backtrace", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "arbitrary" | ||||||
|  | version = "1.3.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "e2d098ff73c1ca148721f37baad5ea6a465a13f9573aba8641fbbbae8164a54e" | ||||||
|  | dependencies = [ | ||||||
|  |  "derive_arbitrary", | ||||||
|  | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "assert-json-diff" | name = "assert-json-diff" | ||||||
| version = "2.0.2" | version = "2.0.2" | ||||||
| @@ -1096,6 +1105,17 @@ dependencies = [ | |||||||
|  "syn 1.0.109", |  "syn 1.0.109", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "derive_arbitrary" | ||||||
|  | version = "1.3.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "f3cdeb9ec472d588e539a818b2dee436825730da08ad0017c4b1a17676bdc8b7" | ||||||
|  | dependencies = [ | ||||||
|  |  "proc-macro2", | ||||||
|  |  "quote", | ||||||
|  |  "syn 1.0.109", | ||||||
|  | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "derive_builder" | name = "derive_builder" | ||||||
| version = "0.12.0" | version = "0.12.0" | ||||||
| @@ -2711,6 +2731,7 @@ dependencies = [ | |||||||
| name = "milli" | name = "milli" | ||||||
| version = "1.2.0" | version = "1.2.0" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  |  "arbitrary", | ||||||
|  "big_s", |  "big_s", | ||||||
|  "bimap", |  "bimap", | ||||||
|  "bincode", |  "bincode", | ||||||
| @@ -2722,6 +2743,7 @@ dependencies = [ | |||||||
|  "csv", |  "csv", | ||||||
|  "deserr", |  "deserr", | ||||||
|  "either", |  "either", | ||||||
|  |  "fastrand", | ||||||
|  "filter-parser", |  "filter-parser", | ||||||
|  "flatten-serde-json", |  "flatten-serde-json", | ||||||
|  "fst", |  "fst", | ||||||
|   | |||||||
| @@ -0,0 +1,43 @@ | |||||||
|  | --- | ||||||
|  | source: index-scheduler/src/lib.rs | ||||||
|  | --- | ||||||
|  | ### Autobatching Enabled = true | ||||||
|  | ### Processing Tasks: | ||||||
|  | [] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### All Tasks: | ||||||
|  | 0 {uid: 0, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} | ||||||
|  | 1 {uid: 1, status: succeeded, details: { received_document_ids: 2, deleted_documents: Some(2) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Status: | ||||||
|  | enqueued [] | ||||||
|  | succeeded [0,1,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Kind: | ||||||
|  | "documentAdditionOrUpdate" [0,] | ||||||
|  | "documentDeletion" [1,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Index Tasks: | ||||||
|  | doggos [0,1,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Index Mapper: | ||||||
|  | doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} } | ||||||
|  |  | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Canceled By: | ||||||
|  |  | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Enqueued At: | ||||||
|  | [timestamp] [0,] | ||||||
|  | [timestamp] [1,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Started At: | ||||||
|  | [timestamp] [0,1,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Finished At: | ||||||
|  | [timestamp] [0,1,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### File Store: | ||||||
|  |  | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  |  | ||||||
| @@ -0,0 +1,9 @@ | |||||||
|  | --- | ||||||
|  | source: index-scheduler/src/lib.rs | ||||||
|  | --- | ||||||
|  | [ | ||||||
|  |   { | ||||||
|  |     "id": 3, | ||||||
|  |     "doggo": "bork" | ||||||
|  |   } | ||||||
|  | ] | ||||||
| @@ -0,0 +1,37 @@ | |||||||
|  | --- | ||||||
|  | source: index-scheduler/src/lib.rs | ||||||
|  | --- | ||||||
|  | ### Autobatching Enabled = true | ||||||
|  | ### Processing Tasks: | ||||||
|  | [] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### All Tasks: | ||||||
|  | 0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Status: | ||||||
|  | enqueued [0,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Kind: | ||||||
|  | "documentAdditionOrUpdate" [0,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Index Tasks: | ||||||
|  | doggos [0,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Index Mapper: | ||||||
|  |  | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Canceled By: | ||||||
|  |  | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Enqueued At: | ||||||
|  | [timestamp] [0,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Started At: | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Finished At: | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### File Store: | ||||||
|  | 00000000-0000-0000-0000-000000000000 | ||||||
|  |  | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  |  | ||||||
| @@ -0,0 +1,40 @@ | |||||||
|  | --- | ||||||
|  | source: index-scheduler/src/lib.rs | ||||||
|  | --- | ||||||
|  | ### Autobatching Enabled = true | ||||||
|  | ### Processing Tasks: | ||||||
|  | [] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### All Tasks: | ||||||
|  | 0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} | ||||||
|  | 1 {uid: 1, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Status: | ||||||
|  | enqueued [0,1,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Kind: | ||||||
|  | "documentAdditionOrUpdate" [0,] | ||||||
|  | "documentDeletion" [1,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Index Tasks: | ||||||
|  | doggos [0,1,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Index Mapper: | ||||||
|  |  | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Canceled By: | ||||||
|  |  | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Enqueued At: | ||||||
|  | [timestamp] [0,] | ||||||
|  | [timestamp] [1,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Started At: | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Finished At: | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### File Store: | ||||||
|  | 00000000-0000-0000-0000-000000000000 | ||||||
|  |  | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  |  | ||||||
| @@ -0,0 +1,43 @@ | |||||||
|  | --- | ||||||
|  | source: index-scheduler/src/lib.rs | ||||||
|  | --- | ||||||
|  | ### Autobatching Enabled = true | ||||||
|  | ### Processing Tasks: | ||||||
|  | [] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### All Tasks: | ||||||
|  | 0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} | ||||||
|  | 1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Status: | ||||||
|  | enqueued [1,] | ||||||
|  | failed [0,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Kind: | ||||||
|  | "documentAdditionOrUpdate" [1,] | ||||||
|  | "documentDeletion" [0,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Index Tasks: | ||||||
|  | doggos [0,1,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Index Mapper: | ||||||
|  |  | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Canceled By: | ||||||
|  |  | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Enqueued At: | ||||||
|  | [timestamp] [0,] | ||||||
|  | [timestamp] [1,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Started At: | ||||||
|  | [timestamp] [0,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Finished At: | ||||||
|  | [timestamp] [0,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### File Store: | ||||||
|  | 00000000-0000-0000-0000-000000000000 | ||||||
|  |  | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  |  | ||||||
| @@ -0,0 +1,46 @@ | |||||||
|  | --- | ||||||
|  | source: index-scheduler/src/lib.rs | ||||||
|  | --- | ||||||
|  | ### Autobatching Enabled = true | ||||||
|  | ### Processing Tasks: | ||||||
|  | [] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### All Tasks: | ||||||
|  | 0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} | ||||||
|  | 1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Status: | ||||||
|  | enqueued [] | ||||||
|  | succeeded [1,] | ||||||
|  | failed [0,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Kind: | ||||||
|  | "documentAdditionOrUpdate" [1,] | ||||||
|  | "documentDeletion" [0,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Index Tasks: | ||||||
|  | doggos [0,1,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Index Mapper: | ||||||
|  | doggos: { number_of_documents: 3, field_distribution: {"catto": 1, "doggo": 2, "id": 3} } | ||||||
|  |  | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Canceled By: | ||||||
|  |  | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Enqueued At: | ||||||
|  | [timestamp] [0,] | ||||||
|  | [timestamp] [1,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Started At: | ||||||
|  | [timestamp] [0,] | ||||||
|  | [timestamp] [1,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Finished At: | ||||||
|  | [timestamp] [0,] | ||||||
|  | [timestamp] [1,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### File Store: | ||||||
|  |  | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  |  | ||||||
| @@ -0,0 +1,17 @@ | |||||||
|  | --- | ||||||
|  | source: index-scheduler/src/lib.rs | ||||||
|  | --- | ||||||
|  | [ | ||||||
|  |   { | ||||||
|  |     "id": 1, | ||||||
|  |     "doggo": "jean bob" | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |     "id": 2, | ||||||
|  |     "catto": "jorts" | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |     "id": 3, | ||||||
|  |     "doggo": "bork" | ||||||
|  |   } | ||||||
|  | ] | ||||||
| @@ -0,0 +1,36 @@ | |||||||
|  | --- | ||||||
|  | source: index-scheduler/src/lib.rs | ||||||
|  | --- | ||||||
|  | ### Autobatching Enabled = true | ||||||
|  | ### Processing Tasks: | ||||||
|  | [] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### All Tasks: | ||||||
|  | 0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Status: | ||||||
|  | enqueued [0,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Kind: | ||||||
|  | "documentDeletion" [0,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Index Tasks: | ||||||
|  | doggos [0,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Index Mapper: | ||||||
|  |  | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Canceled By: | ||||||
|  |  | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Enqueued At: | ||||||
|  | [timestamp] [0,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Started At: | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Finished At: | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### File Store: | ||||||
|  |  | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  |  | ||||||
| @@ -0,0 +1,40 @@ | |||||||
|  | --- | ||||||
|  | source: index-scheduler/src/lib.rs | ||||||
|  | --- | ||||||
|  | ### Autobatching Enabled = true | ||||||
|  | ### Processing Tasks: | ||||||
|  | [] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### All Tasks: | ||||||
|  | 0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} | ||||||
|  | 1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Status: | ||||||
|  | enqueued [0,1,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Kind: | ||||||
|  | "documentAdditionOrUpdate" [1,] | ||||||
|  | "documentDeletion" [0,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Index Tasks: | ||||||
|  | doggos [0,1,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Index Mapper: | ||||||
|  |  | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Canceled By: | ||||||
|  |  | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Enqueued At: | ||||||
|  | [timestamp] [0,] | ||||||
|  | [timestamp] [1,] | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Started At: | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### Finished At: | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  | ### File Store: | ||||||
|  | 00000000-0000-0000-0000-000000000000 | ||||||
|  |  | ||||||
|  | ---------------------------------------------------------------------- | ||||||
|  |  | ||||||
| @@ -56,6 +56,7 @@ itertools = "0.10.5" | |||||||
| log = "0.4.17" | log = "0.4.17" | ||||||
| logging_timer = "1.1.0" | logging_timer = "1.1.0" | ||||||
| csv = "1.2.1" | csv = "1.2.1" | ||||||
|  | fastrand = "1.9.0" | ||||||
|  |  | ||||||
| [dev-dependencies] | [dev-dependencies] | ||||||
| mimalloc = { version = "0.1.29", default-features = false } | mimalloc = { version = "0.1.29", default-features = false } | ||||||
| @@ -64,12 +65,13 @@ insta = "1.29.0" | |||||||
| maplit = "1.0.2" | maplit = "1.0.2" | ||||||
| md5 = "0.7.0" | md5 = "0.7.0" | ||||||
| rand = {version = "0.8.5", features = ["small_rng"] } | rand = {version = "0.8.5", features = ["small_rng"] } | ||||||
|  | arbitrary = { version = "1.3.0", features = ["derive"] } | ||||||
|  |  | ||||||
| [target.'cfg(fuzzing)'.dev-dependencies] | [target.'cfg(fuzzing)'.dev-dependencies] | ||||||
| fuzzcheck = "0.12.1" | fuzzcheck = "0.12.1" | ||||||
|  |  | ||||||
| [features] | [features] | ||||||
| all-tokenizations = [ "charabia/default" ] | all-tokenizations = ["charabia/default"] | ||||||
|  |  | ||||||
| # Use POSIX semaphores instead of SysV semaphores in LMDB | # Use POSIX semaphores instead of SysV semaphores in LMDB | ||||||
| # For more information on this feature, see heed's Cargo.toml | # For more information on this feature, see heed's Cargo.toml | ||||||
|   | |||||||
| @@ -111,7 +111,6 @@ pub enum Error { | |||||||
|     Io(#[from] io::Error), |     Io(#[from] io::Error), | ||||||
| } | } | ||||||
|  |  | ||||||
| #[cfg(test)] |  | ||||||
| pub fn objects_from_json_value(json: serde_json::Value) -> Vec<crate::Object> { | pub fn objects_from_json_value(json: serde_json::Value) -> Vec<crate::Object> { | ||||||
|     let documents = match json { |     let documents = match json { | ||||||
|         object @ serde_json::Value::Object(_) => vec![object], |         object @ serde_json::Value::Object(_) => vec![object], | ||||||
| @@ -141,7 +140,6 @@ macro_rules! documents { | |||||||
|     }}; |     }}; | ||||||
| } | } | ||||||
|  |  | ||||||
| #[cfg(test)] |  | ||||||
| pub fn documents_batch_reader_from_objects( | pub fn documents_batch_reader_from_objects( | ||||||
|     objects: impl IntoIterator<Item = Object>, |     objects: impl IntoIterator<Item = Object>, | ||||||
| ) -> DocumentsBatchReader<std::io::Cursor<Vec<u8>>> { | ) -> DocumentsBatchReader<std::io::Cursor<Vec<u8>>> { | ||||||
|   | |||||||
| @@ -198,6 +198,7 @@ where | |||||||
|             let number_of_documents = self.index.number_of_documents(self.wtxn)?; |             let number_of_documents = self.index.number_of_documents(self.wtxn)?; | ||||||
|             return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents }); |             return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents }); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         let output = self |         let output = self | ||||||
|             .transform |             .transform | ||||||
|             .take() |             .take() | ||||||
| @@ -220,6 +221,7 @@ where | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         let indexed_documents = output.documents_count as u64; |         let indexed_documents = output.documents_count as u64; | ||||||
|  |  | ||||||
|         let number_of_documents = self.execute_raw(output)?; |         let number_of_documents = self.execute_raw(output)?; | ||||||
|  |  | ||||||
|         Ok(DocumentAdditionResult { indexed_documents, number_of_documents }) |         Ok(DocumentAdditionResult { indexed_documents, number_of_documents }) | ||||||
| @@ -236,7 +238,7 @@ where | |||||||
|             primary_key, |             primary_key, | ||||||
|             fields_ids_map, |             fields_ids_map, | ||||||
|             field_distribution, |             field_distribution, | ||||||
|             mut external_documents_ids, |             new_external_documents_ids, | ||||||
|             new_documents_ids, |             new_documents_ids, | ||||||
|             replaced_documents_ids, |             replaced_documents_ids, | ||||||
|             documents_count, |             documents_count, | ||||||
| @@ -363,9 +365,6 @@ where | |||||||
|             deletion_builder.delete_documents(&replaced_documents_ids); |             deletion_builder.delete_documents(&replaced_documents_ids); | ||||||
|             let deleted_documents_result = deletion_builder.execute_inner()?; |             let deleted_documents_result = deletion_builder.execute_inner()?; | ||||||
|             debug!("{} documents actually deleted", deleted_documents_result.deleted_documents); |             debug!("{} documents actually deleted", deleted_documents_result.deleted_documents); | ||||||
|             if !deleted_documents_result.soft_deletion_used { |  | ||||||
|                 external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?; |  | ||||||
|             } |  | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         let index_documents_ids = self.index.documents_ids(self.wtxn)?; |         let index_documents_ids = self.index.documents_ids(self.wtxn)?; | ||||||
| @@ -445,6 +444,9 @@ where | |||||||
|         self.index.put_primary_key(self.wtxn, &primary_key)?; |         self.index.put_primary_key(self.wtxn, &primary_key)?; | ||||||
|  |  | ||||||
|         // We write the external documents ids into the main database. |         // We write the external documents ids into the main database. | ||||||
|  |         let mut external_documents_ids = self.index.external_documents_ids(self.wtxn)?; | ||||||
|  |         external_documents_ids.insert_ids(&new_external_documents_ids)?; | ||||||
|  |         let external_documents_ids = external_documents_ids.into_static(); | ||||||
|         self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?; |         self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?; | ||||||
|  |  | ||||||
|         let all_documents_ids = index_documents_ids | new_documents_ids; |         let all_documents_ids = index_documents_ids | new_documents_ids; | ||||||
| @@ -2515,4 +2517,170 @@ mod tests { | |||||||
|         db_snap!(index, word_position_docids, 3, @"74f556b91d161d997a89468b4da1cb8f"); |         db_snap!(index, word_position_docids, 3, @"74f556b91d161d997a89468b4da1cb8f"); | ||||||
|         db_snap!(index, docid_word_positions, 3, @"5287245332627675740b28bd46e1cde1"); |         db_snap!(index, docid_word_positions, 3, @"5287245332627675740b28bd46e1cde1"); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn reproduce_the_bug() { | ||||||
|  |         /* | ||||||
|  |             [milli/examples/fuzz.rs:69] &batches = [ | ||||||
|  |             Batch( | ||||||
|  |                 [ | ||||||
|  |                     AddDoc( | ||||||
|  |                         { "id": 1, "doggo": "bernese" }, => internal 0 | ||||||
|  |                     ), | ||||||
|  |                 ], | ||||||
|  |             ), | ||||||
|  |             Batch( | ||||||
|  |                 [ | ||||||
|  |                     DeleteDoc( | ||||||
|  |                         1, => delete internal 0 | ||||||
|  |                     ), | ||||||
|  |                     AddDoc( | ||||||
|  |                         { "id": 0, "catto": "jorts" }, => internal 1 | ||||||
|  |                     ), | ||||||
|  |                 ], | ||||||
|  |             ), | ||||||
|  |             Batch( | ||||||
|  |                 [ | ||||||
|  |                     AddDoc( | ||||||
|  |                         { "id": 1, "catto": "jorts" }, => internal 2 | ||||||
|  |                     ), | ||||||
|  |                 ], | ||||||
|  |             ), | ||||||
|  |         ] | ||||||
|  |         */ | ||||||
|  |         let mut index = TempIndex::new(); | ||||||
|  |         index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysHard; | ||||||
|  |  | ||||||
|  |         // START OF BATCH | ||||||
|  |  | ||||||
|  |         println!("--- ENTERING BATCH 1"); | ||||||
|  |  | ||||||
|  |         let mut wtxn = index.write_txn().unwrap(); | ||||||
|  |  | ||||||
|  |         let builder = IndexDocuments::new( | ||||||
|  |             &mut wtxn, | ||||||
|  |             &index, | ||||||
|  |             &index.indexer_config, | ||||||
|  |             index.index_documents_config.clone(), | ||||||
|  |             |_| (), | ||||||
|  |             || false, | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |  | ||||||
|  |         // OP | ||||||
|  |  | ||||||
|  |         let documents = documents!([ | ||||||
|  |             { "id": 1, "doggo": "bernese" }, | ||||||
|  |         ]); | ||||||
|  |         let (builder, added) = builder.add_documents(documents).unwrap(); | ||||||
|  |         insta::assert_display_snapshot!(added.unwrap(), @"1"); | ||||||
|  |  | ||||||
|  |         // FINISHING | ||||||
|  |         let addition = builder.execute().unwrap(); | ||||||
|  |         insta::assert_debug_snapshot!(addition, @r###" | ||||||
|  |         DocumentAdditionResult { | ||||||
|  |             indexed_documents: 1, | ||||||
|  |             number_of_documents: 1, | ||||||
|  |         } | ||||||
|  |         "###); | ||||||
|  |         wtxn.commit().unwrap(); | ||||||
|  |  | ||||||
|  |         db_snap!(index, documents, @r###" | ||||||
|  |         {"id":1,"doggo":"bernese"} | ||||||
|  |         "###); | ||||||
|  |         db_snap!(index, external_documents_ids, @r###" | ||||||
|  |         soft: | ||||||
|  |         hard: | ||||||
|  |         1                        0 | ||||||
|  |         "###); | ||||||
|  |  | ||||||
|  |         // A first batch of documents has been inserted | ||||||
|  |  | ||||||
|  |         // BATCH 2 | ||||||
|  |  | ||||||
|  |         println!("--- ENTERING BATCH 2"); | ||||||
|  |  | ||||||
|  |         let mut wtxn = index.write_txn().unwrap(); | ||||||
|  |  | ||||||
|  |         let builder = IndexDocuments::new( | ||||||
|  |             &mut wtxn, | ||||||
|  |             &index, | ||||||
|  |             &index.indexer_config, | ||||||
|  |             index.index_documents_config.clone(), | ||||||
|  |             |_| (), | ||||||
|  |             || false, | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |  | ||||||
|  |         let (builder, removed) = builder.remove_documents(vec![S("1")]).unwrap(); | ||||||
|  |         insta::assert_display_snapshot!(removed.unwrap(), @"1"); | ||||||
|  |  | ||||||
|  |         let documents = documents!([ | ||||||
|  |             { "id": 0, "catto": "jorts" }, | ||||||
|  |         ]); | ||||||
|  |         let (builder, added) = builder.add_documents(documents).unwrap(); | ||||||
|  |         insta::assert_display_snapshot!(added.unwrap(), @"1"); | ||||||
|  |  | ||||||
|  |         let addition = builder.execute().unwrap(); | ||||||
|  |         insta::assert_debug_snapshot!(addition, @r###" | ||||||
|  |         DocumentAdditionResult { | ||||||
|  |             indexed_documents: 1, | ||||||
|  |             number_of_documents: 1, | ||||||
|  |         } | ||||||
|  |         "###); | ||||||
|  |         wtxn.commit().unwrap(); | ||||||
|  |  | ||||||
|  |         db_snap!(index, documents, @r###" | ||||||
|  |         {"id":0,"catto":"jorts"} | ||||||
|  |         "###); | ||||||
|  |  | ||||||
|  |         db_snap!(index, external_documents_ids, @r###" | ||||||
|  |         soft: | ||||||
|  |         hard: | ||||||
|  |         0                        1 | ||||||
|  |         "###); | ||||||
|  |  | ||||||
|  |         db_snap!(index, soft_deleted_documents_ids, @"[]"); | ||||||
|  |  | ||||||
|  |         // BATCH 3 | ||||||
|  |  | ||||||
|  |         println!("--- ENTERING BATCH 3"); | ||||||
|  |  | ||||||
|  |         let mut wtxn = index.write_txn().unwrap(); | ||||||
|  |  | ||||||
|  |         let builder = IndexDocuments::new( | ||||||
|  |             &mut wtxn, | ||||||
|  |             &index, | ||||||
|  |             &index.indexer_config, | ||||||
|  |             index.index_documents_config.clone(), | ||||||
|  |             |_| (), | ||||||
|  |             || false, | ||||||
|  |         ) | ||||||
|  |         .unwrap(); | ||||||
|  |  | ||||||
|  |         let documents = documents!([ | ||||||
|  |             { "id": 1, "catto": "jorts" }, | ||||||
|  |         ]); | ||||||
|  |         let (builder, added) = builder.add_documents(documents).unwrap(); | ||||||
|  |         insta::assert_display_snapshot!(added.unwrap(), @"1"); | ||||||
|  |  | ||||||
|  |         let addition = builder.execute().unwrap(); | ||||||
|  |         insta::assert_debug_snapshot!(addition, @r###" | ||||||
|  |         DocumentAdditionResult { | ||||||
|  |             indexed_documents: 1, | ||||||
|  |             number_of_documents: 2, | ||||||
|  |         } | ||||||
|  |         "###); | ||||||
|  |         wtxn.commit().unwrap(); | ||||||
|  |  | ||||||
|  |         db_snap!(index, documents, @r###" | ||||||
|  |         {"id":1,"catto":"jorts"} | ||||||
|  |         {"id":0,"catto":"jorts"} | ||||||
|  |         "###); | ||||||
|  |  | ||||||
|  |         // Ensuring all the returned IDs actually exists | ||||||
|  |         let rtxn = index.read_txn().unwrap(); | ||||||
|  |         let res = index.search(&rtxn).execute().unwrap(); | ||||||
|  |         index.documents(&rtxn, res.documents_ids).unwrap(); | ||||||
|  |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -21,15 +21,14 @@ use crate::error::{Error, InternalError, UserError}; | |||||||
| use crate::index::{db_name, main_key}; | use crate::index::{db_name, main_key}; | ||||||
| use crate::update::{AvailableDocumentsIds, ClearDocuments, UpdateIndexingStep}; | use crate::update::{AvailableDocumentsIds, ClearDocuments, UpdateIndexingStep}; | ||||||
| use crate::{ | use crate::{ | ||||||
|     ExternalDocumentsIds, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, |     FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, BEU32, | ||||||
|     Result, BEU32, |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
| pub struct TransformOutput { | pub struct TransformOutput { | ||||||
|     pub primary_key: String, |     pub primary_key: String, | ||||||
|     pub fields_ids_map: FieldsIdsMap, |     pub fields_ids_map: FieldsIdsMap, | ||||||
|     pub field_distribution: FieldDistribution, |     pub field_distribution: FieldDistribution, | ||||||
|     pub external_documents_ids: ExternalDocumentsIds<'static>, |     pub new_external_documents_ids: fst::Map<Cow<'static, [u8]>>, | ||||||
|     pub new_documents_ids: RoaringBitmap, |     pub new_documents_ids: RoaringBitmap, | ||||||
|     pub replaced_documents_ids: RoaringBitmap, |     pub replaced_documents_ids: RoaringBitmap, | ||||||
|     pub documents_count: usize, |     pub documents_count: usize, | ||||||
| @@ -58,8 +57,8 @@ pub struct Transform<'a, 'i> { | |||||||
|     original_sorter: grenad::Sorter<MergeFn>, |     original_sorter: grenad::Sorter<MergeFn>, | ||||||
|     flattened_sorter: grenad::Sorter<MergeFn>, |     flattened_sorter: grenad::Sorter<MergeFn>, | ||||||
|  |  | ||||||
|     replaced_documents_ids: RoaringBitmap, |     pub replaced_documents_ids: RoaringBitmap, | ||||||
|     new_documents_ids: RoaringBitmap, |     pub new_documents_ids: RoaringBitmap, | ||||||
|     // To increase the cache locality and decrease the heap usage we use compact smartstring. |     // To increase the cache locality and decrease the heap usage we use compact smartstring. | ||||||
|     new_external_documents_ids_builder: FxHashMap<SmartString<smartstring::Compact>, u64>, |     new_external_documents_ids_builder: FxHashMap<SmartString<smartstring::Compact>, u64>, | ||||||
|     documents_count: usize, |     documents_count: usize, | ||||||
| @@ -568,8 +567,6 @@ impl<'a, 'i> Transform<'a, 'i> { | |||||||
|             }))? |             }))? | ||||||
|             .to_string(); |             .to_string(); | ||||||
|  |  | ||||||
|         let mut external_documents_ids = self.index.external_documents_ids(wtxn)?; |  | ||||||
|  |  | ||||||
|         // We create a final writer to write the new documents in order from the sorter. |         // We create a final writer to write the new documents in order from the sorter. | ||||||
|         let mut writer = create_writer( |         let mut writer = create_writer( | ||||||
|             self.indexer_settings.chunk_compression_type, |             self.indexer_settings.chunk_compression_type, | ||||||
| @@ -651,13 +648,14 @@ impl<'a, 'i> Transform<'a, 'i> { | |||||||
|             fst_new_external_documents_ids_builder.insert(key, value) |             fst_new_external_documents_ids_builder.insert(key, value) | ||||||
|         })?; |         })?; | ||||||
|         let new_external_documents_ids = fst_new_external_documents_ids_builder.into_map(); |         let new_external_documents_ids = fst_new_external_documents_ids_builder.into_map(); | ||||||
|         external_documents_ids.insert_ids(&new_external_documents_ids)?; |  | ||||||
|  |  | ||||||
|         Ok(TransformOutput { |         Ok(TransformOutput { | ||||||
|             primary_key, |             primary_key, | ||||||
|             fields_ids_map: self.fields_ids_map, |             fields_ids_map: self.fields_ids_map, | ||||||
|             field_distribution, |             field_distribution, | ||||||
|             external_documents_ids: external_documents_ids.into_static(), |             new_external_documents_ids: new_external_documents_ids | ||||||
|  |                 .map_data(|c| Cow::Owned(c)) | ||||||
|  |                 .unwrap(), | ||||||
|             new_documents_ids: self.new_documents_ids, |             new_documents_ids: self.new_documents_ids, | ||||||
|             replaced_documents_ids: self.replaced_documents_ids, |             replaced_documents_ids: self.replaced_documents_ids, | ||||||
|             documents_count: self.documents_count, |             documents_count: self.documents_count, | ||||||
| @@ -691,7 +689,8 @@ impl<'a, 'i> Transform<'a, 'i> { | |||||||
|         let new_external_documents_ids = { |         let new_external_documents_ids = { | ||||||
|             let mut external_documents_ids = self.index.external_documents_ids(wtxn)?; |             let mut external_documents_ids = self.index.external_documents_ids(wtxn)?; | ||||||
|             external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?; |             external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?; | ||||||
|             external_documents_ids |             // it is safe to get the hard document IDs | ||||||
|  |             external_documents_ids.into_static().hard | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         let documents_ids = self.index.documents_ids(wtxn)?; |         let documents_ids = self.index.documents_ids(wtxn)?; | ||||||
| @@ -776,7 +775,7 @@ impl<'a, 'i> Transform<'a, 'i> { | |||||||
|             primary_key, |             primary_key, | ||||||
|             fields_ids_map: new_fields_ids_map, |             fields_ids_map: new_fields_ids_map, | ||||||
|             field_distribution, |             field_distribution, | ||||||
|             external_documents_ids: new_external_documents_ids.into_static(), |             new_external_documents_ids, | ||||||
|             new_documents_ids: documents_ids, |             new_documents_ids: documents_ids, | ||||||
|             replaced_documents_ids: RoaringBitmap::default(), |             replaced_documents_ids: RoaringBitmap::default(), | ||||||
|             documents_count, |             documents_count, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user