mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Implement an ugly deletion of values in the HNSW
This commit is contained in:
		
				
					committed by
					
						 Clément Renault
						Clément Renault
					
				
			
			
				
	
			
			
			
						parent
						
							436a10bef4
						
					
				
				
					commit
					c2a402f3ae
				
			| @@ -4,8 +4,10 @@ use std::collections::{BTreeSet, HashMap, HashSet}; | |||||||
| use fst::IntoStreamer; | use fst::IntoStreamer; | ||||||
| use heed::types::{ByteSlice, DecodeIgnore, Str, UnalignedSlice}; | use heed::types::{ByteSlice, DecodeIgnore, Str, UnalignedSlice}; | ||||||
| use heed::{BytesDecode, BytesEncode, Database, RwIter}; | use heed::{BytesDecode, BytesEncode, Database, RwIter}; | ||||||
|  | use hnsw::Searcher; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
| use serde::{Deserialize, Serialize}; | use serde::{Deserialize, Serialize}; | ||||||
|  | use space::KnnPoints; | ||||||
| use time::OffsetDateTime; | use time::OffsetDateTime; | ||||||
|  |  | ||||||
| use super::facet::delete::FacetsDelete; | use super::facet::delete::FacetsDelete; | ||||||
| @@ -14,6 +16,7 @@ use crate::error::InternalError; | |||||||
| use crate::facet::FacetType; | use crate::facet::FacetType; | ||||||
| use crate::heed_codec::facet::FieldDocIdFacetCodec; | use crate::heed_codec::facet::FieldDocIdFacetCodec; | ||||||
| use crate::heed_codec::CboRoaringBitmapCodec; | use crate::heed_codec::CboRoaringBitmapCodec; | ||||||
|  | use crate::index::Hnsw; | ||||||
| use crate::{ | use crate::{ | ||||||
|     ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, Index, Result, RoaringBitmapCodec, BEU32, |     ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, Index, Result, RoaringBitmapCodec, BEU32, | ||||||
| }; | }; | ||||||
| @@ -430,6 +433,30 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { | |||||||
|             &self.to_delete_docids, |             &self.to_delete_docids, | ||||||
|         )?; |         )?; | ||||||
|  |  | ||||||
|  |         // An ugly and slow way to remove the vectors from the HNSW | ||||||
|  |         // It basically reconstructs the HNSW from scratch without editing the current one. | ||||||
|  |         let current_hnsw = self.index.vector_hnsw(self.wtxn)?.unwrap_or_default(); | ||||||
|  |         if !current_hnsw.is_empty() { | ||||||
|  |             let mut new_hnsw = Hnsw::default(); | ||||||
|  |             let mut searcher = Searcher::new(); | ||||||
|  |             let mut new_vector_id_docids = Vec::new(); | ||||||
|  |  | ||||||
|  |             for result in vector_id_docid.iter(self.wtxn)? { | ||||||
|  |                 let (vector_id, docid) = result?; | ||||||
|  |                 if !self.to_delete_docids.contains(docid.get()) { | ||||||
|  |                     let vector = current_hnsw.get_point(vector_id.get() as usize).clone(); | ||||||
|  |                     let vector_id = new_hnsw.insert(vector, &mut searcher); | ||||||
|  |                     new_vector_id_docids.push((vector_id as u32, docid)); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             vector_id_docid.clear(self.wtxn)?; | ||||||
|  |             for (vector_id, docid) in new_vector_id_docids { | ||||||
|  |                 vector_id_docid.put(self.wtxn, &BEU32::new(vector_id), &docid)?; | ||||||
|  |             } | ||||||
|  |             self.index.put_vector_hnsw(self.wtxn, &new_hnsw)?; | ||||||
|  |         } | ||||||
|  |  | ||||||
|         self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?; |         self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?; | ||||||
|  |  | ||||||
|         Ok(DetailedDocumentDeletionResult { |         Ok(DetailedDocumentDeletionResult { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user