mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 07:56:28 +00:00 
			
		
		
		
	fix the addition + deletion bug
This commit is contained in:
		
							
								
								
									
										22
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										22
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							| @@ -359,6 +359,15 @@ dependencies = [ | ||||
|  "backtrace", | ||||
| ] | ||||
|  | ||||
| [[package]] | ||||
| name = "arbitrary" | ||||
| version = "1.3.0" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "e2d098ff73c1ca148721f37baad5ea6a465a13f9573aba8641fbbbae8164a54e" | ||||
| dependencies = [ | ||||
|  "derive_arbitrary", | ||||
| ] | ||||
|  | ||||
| [[package]] | ||||
| name = "assert-json-diff" | ||||
| version = "2.0.2" | ||||
| @@ -1096,6 +1105,17 @@ dependencies = [ | ||||
|  "syn 1.0.109", | ||||
| ] | ||||
|  | ||||
| [[package]] | ||||
| name = "derive_arbitrary" | ||||
| version = "1.3.0" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "f3cdeb9ec472d588e539a818b2dee436825730da08ad0017c4b1a17676bdc8b7" | ||||
| dependencies = [ | ||||
|  "proc-macro2", | ||||
|  "quote", | ||||
|  "syn 1.0.109", | ||||
| ] | ||||
|  | ||||
| [[package]] | ||||
| name = "derive_builder" | ||||
| version = "0.12.0" | ||||
| @@ -2711,6 +2731,7 @@ dependencies = [ | ||||
| name = "milli" | ||||
| version = "1.2.0" | ||||
| dependencies = [ | ||||
|  "arbitrary", | ||||
|  "big_s", | ||||
|  "bimap", | ||||
|  "bincode", | ||||
| @@ -2722,6 +2743,7 @@ dependencies = [ | ||||
|  "csv", | ||||
|  "deserr", | ||||
|  "either", | ||||
|  "fastrand", | ||||
|  "filter-parser", | ||||
|  "flatten-serde-json", | ||||
|  "fst", | ||||
|   | ||||
| @@ -0,0 +1,43 @@ | ||||
| --- | ||||
| source: index-scheduler/src/lib.rs | ||||
| --- | ||||
| ### Autobatching Enabled = true | ||||
| ### Processing Tasks: | ||||
| [] | ||||
| ---------------------------------------------------------------------- | ||||
| ### All Tasks: | ||||
| 0 {uid: 0, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} | ||||
| 1 {uid: 1, status: succeeded, details: { received_document_ids: 2, deleted_documents: Some(2) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} | ||||
| ---------------------------------------------------------------------- | ||||
| ### Status: | ||||
| enqueued [] | ||||
| succeeded [0,1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Kind: | ||||
| "documentAdditionOrUpdate" [0,] | ||||
| "documentDeletion" [1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Index Tasks: | ||||
| doggos [0,1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Index Mapper: | ||||
| doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} } | ||||
|  | ||||
| ---------------------------------------------------------------------- | ||||
| ### Canceled By: | ||||
|  | ||||
| ---------------------------------------------------------------------- | ||||
| ### Enqueued At: | ||||
| [timestamp] [0,] | ||||
| [timestamp] [1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Started At: | ||||
| [timestamp] [0,1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Finished At: | ||||
| [timestamp] [0,1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### File Store: | ||||
|  | ||||
| ---------------------------------------------------------------------- | ||||
|  | ||||
| @@ -0,0 +1,9 @@ | ||||
| --- | ||||
| source: index-scheduler/src/lib.rs | ||||
| --- | ||||
| [ | ||||
|   { | ||||
|     "id": 3, | ||||
|     "doggo": "bork" | ||||
|   } | ||||
| ] | ||||
| @@ -0,0 +1,37 @@ | ||||
| --- | ||||
| source: index-scheduler/src/lib.rs | ||||
| --- | ||||
| ### Autobatching Enabled = true | ||||
| ### Processing Tasks: | ||||
| [] | ||||
| ---------------------------------------------------------------------- | ||||
| ### All Tasks: | ||||
| 0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} | ||||
| ---------------------------------------------------------------------- | ||||
| ### Status: | ||||
| enqueued [0,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Kind: | ||||
| "documentAdditionOrUpdate" [0,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Index Tasks: | ||||
| doggos [0,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Index Mapper: | ||||
|  | ||||
| ---------------------------------------------------------------------- | ||||
| ### Canceled By: | ||||
|  | ||||
| ---------------------------------------------------------------------- | ||||
| ### Enqueued At: | ||||
| [timestamp] [0,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Started At: | ||||
| ---------------------------------------------------------------------- | ||||
| ### Finished At: | ||||
| ---------------------------------------------------------------------- | ||||
| ### File Store: | ||||
| 00000000-0000-0000-0000-000000000000 | ||||
|  | ||||
| ---------------------------------------------------------------------- | ||||
|  | ||||
| @@ -0,0 +1,40 @@ | ||||
| --- | ||||
| source: index-scheduler/src/lib.rs | ||||
| --- | ||||
| ### Autobatching Enabled = true | ||||
| ### Processing Tasks: | ||||
| [] | ||||
| ---------------------------------------------------------------------- | ||||
| ### All Tasks: | ||||
| 0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} | ||||
| 1 {uid: 1, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} | ||||
| ---------------------------------------------------------------------- | ||||
| ### Status: | ||||
| enqueued [0,1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Kind: | ||||
| "documentAdditionOrUpdate" [0,] | ||||
| "documentDeletion" [1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Index Tasks: | ||||
| doggos [0,1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Index Mapper: | ||||
|  | ||||
| ---------------------------------------------------------------------- | ||||
| ### Canceled By: | ||||
|  | ||||
| ---------------------------------------------------------------------- | ||||
| ### Enqueued At: | ||||
| [timestamp] [0,] | ||||
| [timestamp] [1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Started At: | ||||
| ---------------------------------------------------------------------- | ||||
| ### Finished At: | ||||
| ---------------------------------------------------------------------- | ||||
| ### File Store: | ||||
| 00000000-0000-0000-0000-000000000000 | ||||
|  | ||||
| ---------------------------------------------------------------------- | ||||
|  | ||||
| @@ -0,0 +1,43 @@ | ||||
| --- | ||||
| source: index-scheduler/src/lib.rs | ||||
| --- | ||||
| ### Autobatching Enabled = true | ||||
| ### Processing Tasks: | ||||
| [] | ||||
| ---------------------------------------------------------------------- | ||||
| ### All Tasks: | ||||
| 0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} | ||||
| 1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} | ||||
| ---------------------------------------------------------------------- | ||||
| ### Status: | ||||
| enqueued [1,] | ||||
| failed [0,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Kind: | ||||
| "documentAdditionOrUpdate" [1,] | ||||
| "documentDeletion" [0,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Index Tasks: | ||||
| doggos [0,1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Index Mapper: | ||||
|  | ||||
| ---------------------------------------------------------------------- | ||||
| ### Canceled By: | ||||
|  | ||||
| ---------------------------------------------------------------------- | ||||
| ### Enqueued At: | ||||
| [timestamp] [0,] | ||||
| [timestamp] [1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Started At: | ||||
| [timestamp] [0,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Finished At: | ||||
| [timestamp] [0,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### File Store: | ||||
| 00000000-0000-0000-0000-000000000000 | ||||
|  | ||||
| ---------------------------------------------------------------------- | ||||
|  | ||||
| @@ -0,0 +1,46 @@ | ||||
| --- | ||||
| source: index-scheduler/src/lib.rs | ||||
| --- | ||||
| ### Autobatching Enabled = true | ||||
| ### Processing Tasks: | ||||
| [] | ||||
| ---------------------------------------------------------------------- | ||||
| ### All Tasks: | ||||
| 0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} | ||||
| 1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} | ||||
| ---------------------------------------------------------------------- | ||||
| ### Status: | ||||
| enqueued [] | ||||
| succeeded [1,] | ||||
| failed [0,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Kind: | ||||
| "documentAdditionOrUpdate" [1,] | ||||
| "documentDeletion" [0,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Index Tasks: | ||||
| doggos [0,1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Index Mapper: | ||||
| doggos: { number_of_documents: 3, field_distribution: {"catto": 1, "doggo": 2, "id": 3} } | ||||
|  | ||||
| ---------------------------------------------------------------------- | ||||
| ### Canceled By: | ||||
|  | ||||
| ---------------------------------------------------------------------- | ||||
| ### Enqueued At: | ||||
| [timestamp] [0,] | ||||
| [timestamp] [1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Started At: | ||||
| [timestamp] [0,] | ||||
| [timestamp] [1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Finished At: | ||||
| [timestamp] [0,] | ||||
| [timestamp] [1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### File Store: | ||||
|  | ||||
| ---------------------------------------------------------------------- | ||||
|  | ||||
| @@ -0,0 +1,17 @@ | ||||
| --- | ||||
| source: index-scheduler/src/lib.rs | ||||
| --- | ||||
| [ | ||||
|   { | ||||
|     "id": 1, | ||||
|     "doggo": "jean bob" | ||||
|   }, | ||||
|   { | ||||
|     "id": 2, | ||||
|     "catto": "jorts" | ||||
|   }, | ||||
|   { | ||||
|     "id": 3, | ||||
|     "doggo": "bork" | ||||
|   } | ||||
| ] | ||||
| @@ -0,0 +1,36 @@ | ||||
| --- | ||||
| source: index-scheduler/src/lib.rs | ||||
| --- | ||||
| ### Autobatching Enabled = true | ||||
| ### Processing Tasks: | ||||
| [] | ||||
| ---------------------------------------------------------------------- | ||||
| ### All Tasks: | ||||
| 0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} | ||||
| ---------------------------------------------------------------------- | ||||
| ### Status: | ||||
| enqueued [0,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Kind: | ||||
| "documentDeletion" [0,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Index Tasks: | ||||
| doggos [0,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Index Mapper: | ||||
|  | ||||
| ---------------------------------------------------------------------- | ||||
| ### Canceled By: | ||||
|  | ||||
| ---------------------------------------------------------------------- | ||||
| ### Enqueued At: | ||||
| [timestamp] [0,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Started At: | ||||
| ---------------------------------------------------------------------- | ||||
| ### Finished At: | ||||
| ---------------------------------------------------------------------- | ||||
| ### File Store: | ||||
|  | ||||
| ---------------------------------------------------------------------- | ||||
|  | ||||
| @@ -0,0 +1,40 @@ | ||||
| --- | ||||
| source: index-scheduler/src/lib.rs | ||||
| --- | ||||
| ### Autobatching Enabled = true | ||||
| ### Processing Tasks: | ||||
| [] | ||||
| ---------------------------------------------------------------------- | ||||
| ### All Tasks: | ||||
| 0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} | ||||
| 1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} | ||||
| ---------------------------------------------------------------------- | ||||
| ### Status: | ||||
| enqueued [0,1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Kind: | ||||
| "documentAdditionOrUpdate" [1,] | ||||
| "documentDeletion" [0,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Index Tasks: | ||||
| doggos [0,1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Index Mapper: | ||||
|  | ||||
| ---------------------------------------------------------------------- | ||||
| ### Canceled By: | ||||
|  | ||||
| ---------------------------------------------------------------------- | ||||
| ### Enqueued At: | ||||
| [timestamp] [0,] | ||||
| [timestamp] [1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Started At: | ||||
| ---------------------------------------------------------------------- | ||||
| ### Finished At: | ||||
| ---------------------------------------------------------------------- | ||||
| ### File Store: | ||||
| 00000000-0000-0000-0000-000000000000 | ||||
|  | ||||
| ---------------------------------------------------------------------- | ||||
|  | ||||
| @@ -56,6 +56,7 @@ itertools = "0.10.5" | ||||
| log = "0.4.17" | ||||
| logging_timer = "1.1.0" | ||||
| csv = "1.2.1" | ||||
| fastrand = "1.9.0" | ||||
|  | ||||
| [dev-dependencies] | ||||
| mimalloc = { version = "0.1.29", default-features = false } | ||||
| @@ -64,12 +65,13 @@ insta = "1.29.0" | ||||
| maplit = "1.0.2" | ||||
| md5 = "0.7.0" | ||||
| rand = {version = "0.8.5", features = ["small_rng"] } | ||||
| arbitrary = { version = "1.3.0", features = ["derive"] } | ||||
|  | ||||
| [target.'cfg(fuzzing)'.dev-dependencies] | ||||
| fuzzcheck = "0.12.1" | ||||
|  | ||||
| [features] | ||||
| all-tokenizations = [ "charabia/default" ] | ||||
| all-tokenizations = ["charabia/default"] | ||||
|  | ||||
| # Use POSIX semaphores instead of SysV semaphores in LMDB | ||||
| # For more information on this feature, see heed's Cargo.toml | ||||
|   | ||||
| @@ -111,7 +111,6 @@ pub enum Error { | ||||
|     Io(#[from] io::Error), | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| pub fn objects_from_json_value(json: serde_json::Value) -> Vec<crate::Object> { | ||||
|     let documents = match json { | ||||
|         object @ serde_json::Value::Object(_) => vec![object], | ||||
| @@ -141,7 +140,6 @@ macro_rules! documents { | ||||
|     }}; | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| pub fn documents_batch_reader_from_objects( | ||||
|     objects: impl IntoIterator<Item = Object>, | ||||
| ) -> DocumentsBatchReader<std::io::Cursor<Vec<u8>>> { | ||||
|   | ||||
| @@ -198,6 +198,7 @@ where | ||||
|             let number_of_documents = self.index.number_of_documents(self.wtxn)?; | ||||
|             return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents }); | ||||
|         } | ||||
|  | ||||
|         let output = self | ||||
|             .transform | ||||
|             .take() | ||||
| @@ -220,6 +221,7 @@ where | ||||
|         } | ||||
|  | ||||
|         let indexed_documents = output.documents_count as u64; | ||||
|  | ||||
|         let number_of_documents = self.execute_raw(output)?; | ||||
|  | ||||
|         Ok(DocumentAdditionResult { indexed_documents, number_of_documents }) | ||||
| @@ -236,7 +238,7 @@ where | ||||
|             primary_key, | ||||
|             fields_ids_map, | ||||
|             field_distribution, | ||||
|             mut external_documents_ids, | ||||
|             new_external_documents_ids, | ||||
|             new_documents_ids, | ||||
|             replaced_documents_ids, | ||||
|             documents_count, | ||||
| @@ -363,9 +365,6 @@ where | ||||
|             deletion_builder.delete_documents(&replaced_documents_ids); | ||||
|             let deleted_documents_result = deletion_builder.execute_inner()?; | ||||
|             debug!("{} documents actually deleted", deleted_documents_result.deleted_documents); | ||||
|             if !deleted_documents_result.soft_deletion_used { | ||||
|                 external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let index_documents_ids = self.index.documents_ids(self.wtxn)?; | ||||
| @@ -445,6 +444,9 @@ where | ||||
|         self.index.put_primary_key(self.wtxn, &primary_key)?; | ||||
|  | ||||
|         // We write the external documents ids into the main database. | ||||
|         let mut external_documents_ids = self.index.external_documents_ids(self.wtxn)?; | ||||
|         external_documents_ids.insert_ids(&new_external_documents_ids)?; | ||||
|         let external_documents_ids = external_documents_ids.into_static(); | ||||
|         self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?; | ||||
|  | ||||
|         let all_documents_ids = index_documents_ids | new_documents_ids; | ||||
| @@ -2515,4 +2517,170 @@ mod tests { | ||||
|         db_snap!(index, word_position_docids, 3, @"74f556b91d161d997a89468b4da1cb8f"); | ||||
|         db_snap!(index, docid_word_positions, 3, @"5287245332627675740b28bd46e1cde1"); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn reproduce_the_bug() { | ||||
|         /* | ||||
|             [milli/examples/fuzz.rs:69] &batches = [ | ||||
|             Batch( | ||||
|                 [ | ||||
|                     AddDoc( | ||||
|                         { "id": 1, "doggo": "bernese" }, => internal 0 | ||||
|                     ), | ||||
|                 ], | ||||
|             ), | ||||
|             Batch( | ||||
|                 [ | ||||
|                     DeleteDoc( | ||||
|                         1, => delete internal 0 | ||||
|                     ), | ||||
|                     AddDoc( | ||||
|                         { "id": 0, "catto": "jorts" }, => internal 1 | ||||
|                     ), | ||||
|                 ], | ||||
|             ), | ||||
|             Batch( | ||||
|                 [ | ||||
|                     AddDoc( | ||||
|                         { "id": 1, "catto": "jorts" }, => internal 2 | ||||
|                     ), | ||||
|                 ], | ||||
|             ), | ||||
|         ] | ||||
|         */ | ||||
|         let mut index = TempIndex::new(); | ||||
|         index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysHard; | ||||
|  | ||||
|         // START OF BATCH | ||||
|  | ||||
|         println!("--- ENTERING BATCH 1"); | ||||
|  | ||||
|         let mut wtxn = index.write_txn().unwrap(); | ||||
|  | ||||
|         let builder = IndexDocuments::new( | ||||
|             &mut wtxn, | ||||
|             &index, | ||||
|             &index.indexer_config, | ||||
|             index.index_documents_config.clone(), | ||||
|             |_| (), | ||||
|             || false, | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
|         // OP | ||||
|  | ||||
|         let documents = documents!([ | ||||
|             { "id": 1, "doggo": "bernese" }, | ||||
|         ]); | ||||
|         let (builder, added) = builder.add_documents(documents).unwrap(); | ||||
|         insta::assert_display_snapshot!(added.unwrap(), @"1"); | ||||
|  | ||||
|         // FINISHING | ||||
|         let addition = builder.execute().unwrap(); | ||||
|         insta::assert_debug_snapshot!(addition, @r###" | ||||
|         DocumentAdditionResult { | ||||
|             indexed_documents: 1, | ||||
|             number_of_documents: 1, | ||||
|         } | ||||
|         "###); | ||||
|         wtxn.commit().unwrap(); | ||||
|  | ||||
|         db_snap!(index, documents, @r###" | ||||
|         {"id":1,"doggo":"bernese"} | ||||
|         "###); | ||||
|         db_snap!(index, external_documents_ids, @r###" | ||||
|         soft: | ||||
|         hard: | ||||
|         1                        0 | ||||
|         "###); | ||||
|  | ||||
|         // A first batch of documents has been inserted | ||||
|  | ||||
|         // BATCH 2 | ||||
|  | ||||
|         println!("--- ENTERING BATCH 2"); | ||||
|  | ||||
|         let mut wtxn = index.write_txn().unwrap(); | ||||
|  | ||||
|         let builder = IndexDocuments::new( | ||||
|             &mut wtxn, | ||||
|             &index, | ||||
|             &index.indexer_config, | ||||
|             index.index_documents_config.clone(), | ||||
|             |_| (), | ||||
|             || false, | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
|         let (builder, removed) = builder.remove_documents(vec![S("1")]).unwrap(); | ||||
|         insta::assert_display_snapshot!(removed.unwrap(), @"1"); | ||||
|  | ||||
|         let documents = documents!([ | ||||
|             { "id": 0, "catto": "jorts" }, | ||||
|         ]); | ||||
|         let (builder, added) = builder.add_documents(documents).unwrap(); | ||||
|         insta::assert_display_snapshot!(added.unwrap(), @"1"); | ||||
|  | ||||
|         let addition = builder.execute().unwrap(); | ||||
|         insta::assert_debug_snapshot!(addition, @r###" | ||||
|         DocumentAdditionResult { | ||||
|             indexed_documents: 1, | ||||
|             number_of_documents: 1, | ||||
|         } | ||||
|         "###); | ||||
|         wtxn.commit().unwrap(); | ||||
|  | ||||
|         db_snap!(index, documents, @r###" | ||||
|         {"id":0,"catto":"jorts"} | ||||
|         "###); | ||||
|  | ||||
|         db_snap!(index, external_documents_ids, @r###" | ||||
|         soft: | ||||
|         hard: | ||||
|         0                        1 | ||||
|         "###); | ||||
|  | ||||
|         db_snap!(index, soft_deleted_documents_ids, @"[]"); | ||||
|  | ||||
|         // BATCH 3 | ||||
|  | ||||
|         println!("--- ENTERING BATCH 3"); | ||||
|  | ||||
|         let mut wtxn = index.write_txn().unwrap(); | ||||
|  | ||||
|         let builder = IndexDocuments::new( | ||||
|             &mut wtxn, | ||||
|             &index, | ||||
|             &index.indexer_config, | ||||
|             index.index_documents_config.clone(), | ||||
|             |_| (), | ||||
|             || false, | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
|         let documents = documents!([ | ||||
|             { "id": 1, "catto": "jorts" }, | ||||
|         ]); | ||||
|         let (builder, added) = builder.add_documents(documents).unwrap(); | ||||
|         insta::assert_display_snapshot!(added.unwrap(), @"1"); | ||||
|  | ||||
|         let addition = builder.execute().unwrap(); | ||||
|         insta::assert_debug_snapshot!(addition, @r###" | ||||
|         DocumentAdditionResult { | ||||
|             indexed_documents: 1, | ||||
|             number_of_documents: 2, | ||||
|         } | ||||
|         "###); | ||||
|         wtxn.commit().unwrap(); | ||||
|  | ||||
|         db_snap!(index, documents, @r###" | ||||
|         {"id":1,"catto":"jorts"} | ||||
|         {"id":0,"catto":"jorts"} | ||||
|         "###); | ||||
|  | ||||
|         // Ensuring all the returned IDs actually exists | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|         let res = index.search(&rtxn).execute().unwrap(); | ||||
|         index.documents(&rtxn, res.documents_ids).unwrap(); | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -21,15 +21,14 @@ use crate::error::{Error, InternalError, UserError}; | ||||
| use crate::index::{db_name, main_key}; | ||||
| use crate::update::{AvailableDocumentsIds, ClearDocuments, UpdateIndexingStep}; | ||||
| use crate::{ | ||||
|     ExternalDocumentsIds, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, | ||||
|     Result, BEU32, | ||||
|     FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, BEU32, | ||||
| }; | ||||
|  | ||||
| pub struct TransformOutput { | ||||
|     pub primary_key: String, | ||||
|     pub fields_ids_map: FieldsIdsMap, | ||||
|     pub field_distribution: FieldDistribution, | ||||
|     pub external_documents_ids: ExternalDocumentsIds<'static>, | ||||
|     pub new_external_documents_ids: fst::Map<Cow<'static, [u8]>>, | ||||
|     pub new_documents_ids: RoaringBitmap, | ||||
|     pub replaced_documents_ids: RoaringBitmap, | ||||
|     pub documents_count: usize, | ||||
| @@ -58,8 +57,8 @@ pub struct Transform<'a, 'i> { | ||||
|     original_sorter: grenad::Sorter<MergeFn>, | ||||
|     flattened_sorter: grenad::Sorter<MergeFn>, | ||||
|  | ||||
|     replaced_documents_ids: RoaringBitmap, | ||||
|     new_documents_ids: RoaringBitmap, | ||||
|     pub replaced_documents_ids: RoaringBitmap, | ||||
|     pub new_documents_ids: RoaringBitmap, | ||||
|     // To increase the cache locality and decrease the heap usage we use compact smartstring. | ||||
|     new_external_documents_ids_builder: FxHashMap<SmartString<smartstring::Compact>, u64>, | ||||
|     documents_count: usize, | ||||
| @@ -568,8 +567,6 @@ impl<'a, 'i> Transform<'a, 'i> { | ||||
|             }))? | ||||
|             .to_string(); | ||||
|  | ||||
|         let mut external_documents_ids = self.index.external_documents_ids(wtxn)?; | ||||
|  | ||||
|         // We create a final writer to write the new documents in order from the sorter. | ||||
|         let mut writer = create_writer( | ||||
|             self.indexer_settings.chunk_compression_type, | ||||
| @@ -651,13 +648,14 @@ impl<'a, 'i> Transform<'a, 'i> { | ||||
|             fst_new_external_documents_ids_builder.insert(key, value) | ||||
|         })?; | ||||
|         let new_external_documents_ids = fst_new_external_documents_ids_builder.into_map(); | ||||
|         external_documents_ids.insert_ids(&new_external_documents_ids)?; | ||||
|  | ||||
|         Ok(TransformOutput { | ||||
|             primary_key, | ||||
|             fields_ids_map: self.fields_ids_map, | ||||
|             field_distribution, | ||||
|             external_documents_ids: external_documents_ids.into_static(), | ||||
|             new_external_documents_ids: new_external_documents_ids | ||||
|                 .map_data(|c| Cow::Owned(c)) | ||||
|                 .unwrap(), | ||||
|             new_documents_ids: self.new_documents_ids, | ||||
|             replaced_documents_ids: self.replaced_documents_ids, | ||||
|             documents_count: self.documents_count, | ||||
| @@ -691,7 +689,8 @@ impl<'a, 'i> Transform<'a, 'i> { | ||||
|         let new_external_documents_ids = { | ||||
|             let mut external_documents_ids = self.index.external_documents_ids(wtxn)?; | ||||
|             external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?; | ||||
|             external_documents_ids | ||||
|             // it is safe to get the hard document IDs | ||||
|             external_documents_ids.into_static().hard | ||||
|         }; | ||||
|  | ||||
|         let documents_ids = self.index.documents_ids(wtxn)?; | ||||
| @@ -776,7 +775,7 @@ impl<'a, 'i> Transform<'a, 'i> { | ||||
|             primary_key, | ||||
|             fields_ids_map: new_fields_ids_map, | ||||
|             field_distribution, | ||||
|             external_documents_ids: new_external_documents_ids.into_static(), | ||||
|             new_external_documents_ids, | ||||
|             new_documents_ids: documents_ids, | ||||
|             replaced_documents_ids: RoaringBitmap::default(), | ||||
|             documents_count, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user