mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Fix a documents indexing bug and add a test
This commit is contained in:
		| @@ -47,7 +47,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn execute(self) -> anyhow::Result<usize> { |     pub fn execute(self) -> anyhow::Result<usize> { | ||||||
|         // We retrieve remove the deleted documents ids and write them into the database. |         // We retrieve the current documents ids that are in the database. | ||||||
|         let mut documents_ids = self.index.documents_ids(self.wtxn)?; |         let mut documents_ids = self.index.documents_ids(self.wtxn)?; | ||||||
|  |  | ||||||
|         // We can and must stop removing documents in a database that is empty. |         // We can and must stop removing documents in a database that is empty. | ||||||
| @@ -55,8 +55,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { | |||||||
|             return Ok(0); |             return Ok(0); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         // We remove the documents ids that we want to delete | ||||||
|  |         // from the documents in the database and write them back. | ||||||
|         let current_documents_ids_len = documents_ids.len(); |         let current_documents_ids_len = documents_ids.len(); | ||||||
|         documents_ids.intersect_with(&self.documents_ids); |         documents_ids.difference_with(&self.documents_ids); | ||||||
|         self.index.put_documents_ids(self.wtxn, &documents_ids)?; |         self.index.put_documents_ids(self.wtxn, &documents_ids)?; | ||||||
|  |  | ||||||
|         // We can execute a ClearDocuments operation when the number of documents |         // We can execute a ClearDocuments operation when the number of documents | ||||||
| @@ -80,7 +82,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { | |||||||
|         // Retrieve the words and the users ids contained in the documents. |         // Retrieve the words and the users ids contained in the documents. | ||||||
|         let mut words = Vec::new(); |         let mut words = Vec::new(); | ||||||
|         let mut users_ids = Vec::new(); |         let mut users_ids = Vec::new(); | ||||||
|         for docid in &documents_ids { |         for docid in &self.documents_ids { | ||||||
|             // We create an iterator to be able to get the content and delete the document |             // We create an iterator to be able to get the content and delete the document | ||||||
|             // content itself. It's faster to acquire a cursor to get and delete, |             // content itself. It's faster to acquire a cursor to get and delete, | ||||||
|             // as we avoid traversing the LMDB B-Tree two times but only once. |             // as we avoid traversing the LMDB B-Tree two times but only once. | ||||||
| @@ -144,7 +146,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { | |||||||
|             let mut iter = word_docids.prefix_iter_mut(self.wtxn, &word)?; |             let mut iter = word_docids.prefix_iter_mut(self.wtxn, &word)?; | ||||||
|             if let Some((key, mut docids)) = iter.next().transpose()? { |             if let Some((key, mut docids)) = iter.next().transpose()? { | ||||||
|                 if key == word.as_ref() { |                 if key == word.as_ref() { | ||||||
|                     docids.difference_with(&mut documents_ids); |                     docids.difference_with(&self.documents_ids); | ||||||
|                     if docids.is_empty() { |                     if docids.is_empty() { | ||||||
|                         iter.del_current()?; |                         iter.del_current()?; | ||||||
|                         *must_remove = true; |                         *must_remove = true; | ||||||
| @@ -181,7 +183,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { | |||||||
|         let mut iter = word_pair_proximity_docids.iter_mut(self.wtxn)?; |         let mut iter = word_pair_proximity_docids.iter_mut(self.wtxn)?; | ||||||
|         while let Some(result) = iter.next() { |         while let Some(result) = iter.next() { | ||||||
|             let ((w1, w2, prox), mut docids) = result?; |             let ((w1, w2, prox), mut docids) = result?; | ||||||
|             docids.difference_with(&documents_ids); |             docids.difference_with(&self.documents_ids); | ||||||
|             if docids.is_empty() { |             if docids.is_empty() { | ||||||
|                 iter.del_current()?; |                 iter.del_current()?; | ||||||
|             } else { |             } else { | ||||||
| @@ -189,6 +191,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { | |||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         Ok(documents_ids.len() as usize) |         Ok(self.documents_ids.len() as usize) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -484,3 +484,54 @@ impl<'t, 'u, 'i> IndexDocuments<'t, 'u, 'i> { | |||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[cfg(test)] | ||||||
|  | mod tests { | ||||||
|  |     use super::*; | ||||||
|  |     use heed::EnvOpenOptions; | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn simple_replacement() { | ||||||
|  |         let path = tempfile::tempdir().unwrap(); | ||||||
|  |         let mut options = EnvOpenOptions::new(); | ||||||
|  |         options.map_size(10 * 1024 * 1024); // 10 MB | ||||||
|  |  | ||||||
|  |         let index = Index::new(options, &path).unwrap(); | ||||||
|  |  | ||||||
|  |         // First we send 3 documents with ids from 1 to 3. | ||||||
|  |         let mut wtxn = index.write_txn().unwrap(); | ||||||
|  |         let content = &b"id,name\n1,kevin\n2,kevina\n3,benoit\n"[..]; | ||||||
|  |         IndexDocuments::new(&mut wtxn, &index).execute(content, |_, _| ()).unwrap(); | ||||||
|  |         wtxn.commit().unwrap(); | ||||||
|  |  | ||||||
|  |         // Check that there is 3 documents now. | ||||||
|  |         let rtxn = index.read_txn().unwrap(); | ||||||
|  |         let count = index.number_of_documents(&rtxn).unwrap(); | ||||||
|  |         assert_eq!(count, 3); | ||||||
|  |         drop(rtxn); | ||||||
|  |  | ||||||
|  |         // Second we send 1 document with id 1, to erase the previous ones. | ||||||
|  |         let mut wtxn = index.write_txn().unwrap(); | ||||||
|  |         let content = &b"id,name\n1,updated kevin\n"[..]; | ||||||
|  |         IndexDocuments::new(&mut wtxn, &index).execute(content, |_, _| ()).unwrap(); | ||||||
|  |         wtxn.commit().unwrap(); | ||||||
|  |  | ||||||
|  |         // Check that there is **always*** 3 documents. | ||||||
|  |         let rtxn = index.read_txn().unwrap(); | ||||||
|  |         let count = index.number_of_documents(&rtxn).unwrap(); | ||||||
|  |         assert_eq!(count, 3); | ||||||
|  |         drop(rtxn); | ||||||
|  |  | ||||||
|  |         // Third we send 3 documents again to replace the existing ones. | ||||||
|  |         let mut wtxn = index.write_txn().unwrap(); | ||||||
|  |         let content = &b"id,name\n1,updated second kevin\n2,updated kevina\n3,updated benoit\n"[..]; | ||||||
|  |         IndexDocuments::new(&mut wtxn, &index).execute(content, |_, _| ()).unwrap(); | ||||||
|  |         wtxn.commit().unwrap(); | ||||||
|  |  | ||||||
|  |         // Check that there is **always*** 3 documents. | ||||||
|  |         let rtxn = index.read_txn().unwrap(); | ||||||
|  |         let count = index.number_of_documents(&rtxn).unwrap(); | ||||||
|  |         assert_eq!(count, 3); | ||||||
|  |         drop(rtxn); | ||||||
|  |     } | ||||||
|  | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user