mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-30 15:36:28 +00:00 
			
		
		
		
	Simplify facet update after removing Index::faceted_documents_ids
				
					
				
			This commit is contained in:
		| @@ -1,7 +1,6 @@ | ||||
| use std::borrow::Cow; | ||||
| use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; | ||||
| use std::fs::File; | ||||
| use std::mem::size_of; | ||||
| use std::path::Path; | ||||
|  | ||||
| use charabia::{Language, Script}; | ||||
| @@ -14,7 +13,6 @@ use time::OffsetDateTime; | ||||
|  | ||||
| use crate::distance::NDotProductPoint; | ||||
| use crate::error::{InternalError, UserError}; | ||||
| use crate::facet::FacetType; | ||||
| use crate::fields_ids_map::FieldsIdsMap; | ||||
| use crate::heed_codec::facet::{ | ||||
|     FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, | ||||
|   | ||||
| @@ -1,7 +1,6 @@ | ||||
| use roaring::RoaringBitmap; | ||||
| use time::OffsetDateTime; | ||||
|  | ||||
| use crate::facet::FacetType; | ||||
| use crate::{ExternalDocumentsIds, FieldDistribution, Index, Result}; | ||||
|  | ||||
| pub struct ClearDocuments<'t, 'u, 'i> { | ||||
| @@ -51,7 +50,6 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { | ||||
|  | ||||
|         // We retrieve the number of documents ids that we are deleting. | ||||
|         let number_of_documents = self.index.number_of_documents(self.wtxn)?; | ||||
|         let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; | ||||
|  | ||||
|         // We clean some of the main engine datastructures. | ||||
|         self.index.put_words_fst(self.wtxn, &fst::Set::default())?; | ||||
|   | ||||
| @@ -1,8 +1,7 @@ | ||||
| use std::borrow::Cow; | ||||
| use std::fs::File; | ||||
| use std::io::BufReader; | ||||
|  | ||||
| use grenad::{CompressionType, Reader}; | ||||
| use grenad::CompressionType; | ||||
| use heed::types::ByteSlice; | ||||
| use heed::{BytesEncode, Error, RoTxn, RwTxn}; | ||||
| use obkv::KvReader; | ||||
| @@ -82,10 +81,7 @@ impl<'i> FacetsUpdateBulk<'i> { | ||||
|  | ||||
|         let inner = FacetsUpdateBulkInner { db, delta_data, group_size, min_level_size }; | ||||
|  | ||||
|         inner.update(wtxn, &field_ids, |wtxn, field_id, all_docids| { | ||||
|             // TODO: remove the lambda altogether | ||||
|             Ok(()) | ||||
|         })?; | ||||
|         inner.update(wtxn, &field_ids)?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| @@ -99,21 +95,14 @@ pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> { | ||||
|     pub min_level_size: u8, | ||||
| } | ||||
| impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> { | ||||
|     pub fn update( | ||||
|         mut self, | ||||
|         wtxn: &mut RwTxn, | ||||
|         field_ids: &[u16], | ||||
|         mut handle_all_docids: impl FnMut(&mut RwTxn, FieldId, RoaringBitmap) -> Result<()>, | ||||
|     ) -> Result<()> { | ||||
|     pub fn update(mut self, wtxn: &mut RwTxn, field_ids: &[u16]) -> Result<()> { | ||||
|         self.update_level0(wtxn)?; | ||||
|         for &field_id in field_ids.iter() { | ||||
|             self.clear_levels(wtxn, field_id)?; | ||||
|         } | ||||
|  | ||||
|         for &field_id in field_ids.iter() { | ||||
|             let (level_readers, all_docids) = self.compute_levels_for_field_id(field_id, wtxn)?; | ||||
|  | ||||
|             handle_all_docids(wtxn, field_id, all_docids)?; | ||||
|             let level_readers = self.compute_levels_for_field_id(field_id, wtxn)?; | ||||
|  | ||||
|             for level_reader in level_readers { | ||||
|                 let mut cursor = level_reader.into_cursor()?; | ||||
| @@ -201,16 +190,10 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> { | ||||
|         &self, | ||||
|         field_id: FieldId, | ||||
|         txn: &RoTxn, | ||||
|     ) -> Result<(Vec<grenad::Reader<BufReader<File>>>, RoaringBitmap)> { | ||||
|         let mut all_docids = RoaringBitmap::new(); | ||||
|         let subwriters = self.compute_higher_levels(txn, field_id, 32, &mut |bitmaps, _| { | ||||
|             for bitmap in bitmaps { | ||||
|                 all_docids |= bitmap; | ||||
|             } | ||||
|             Ok(()) | ||||
|         })?; | ||||
|     ) -> Result<Vec<grenad::Reader<BufReader<File>>>> { | ||||
|         let subwriters = self.compute_higher_levels(txn, field_id, 32, &mut |_, _| Ok(()))?; | ||||
|  | ||||
|         Ok((subwriters, all_docids)) | ||||
|         Ok(subwriters) | ||||
|     } | ||||
|     #[allow(clippy::type_complexity)] | ||||
|     fn read_level_0<'t>( | ||||
|   | ||||
| @@ -1,4 +1,3 @@ | ||||
| use std::collections::HashMap; | ||||
| use std::fs::File; | ||||
| use std::io::BufReader; | ||||
|  | ||||
| @@ -15,7 +14,7 @@ use crate::heed_codec::ByteSliceRefCodec; | ||||
| use crate::search::facet::get_highest_level; | ||||
| use crate::update::del_add::DelAdd; | ||||
| use crate::update::index_documents::valid_lmdb_key; | ||||
| use crate::{CboRoaringBitmapCodec, FieldId, Index, Result}; | ||||
| use crate::{CboRoaringBitmapCodec, Index, Result}; | ||||
|  | ||||
| enum InsertionResult { | ||||
|     InPlace, | ||||
| @@ -30,16 +29,14 @@ enum DeletionResult { | ||||
|  | ||||
| /// Algorithm to incrementally insert and delete elememts into the | ||||
| /// `facet_id_(string/f64)_docids` databases. | ||||
| pub struct FacetsUpdateIncremental<'i> { | ||||
|     index: &'i Index, | ||||
| pub struct FacetsUpdateIncremental { | ||||
|     inner: FacetsUpdateIncrementalInner, | ||||
|     facet_type: FacetType, | ||||
|     delta_data: grenad::Reader<BufReader<File>>, | ||||
| } | ||||
|  | ||||
| impl<'i> FacetsUpdateIncremental<'i> { | ||||
| impl FacetsUpdateIncremental { | ||||
|     pub fn new( | ||||
|         index: &'i Index, | ||||
|         index: &Index, | ||||
|         facet_type: FacetType, | ||||
|         delta_data: grenad::Reader<BufReader<File>>, | ||||
|         group_size: u8, | ||||
| @@ -47,7 +44,6 @@ impl<'i> FacetsUpdateIncremental<'i> { | ||||
|         max_group_size: u8, | ||||
|     ) -> Self { | ||||
|         FacetsUpdateIncremental { | ||||
|             index, | ||||
|             inner: FacetsUpdateIncrementalInner { | ||||
|                 db: match facet_type { | ||||
|                     FacetType::String => index | ||||
| @@ -61,12 +57,11 @@ impl<'i> FacetsUpdateIncremental<'i> { | ||||
|                 max_group_size, | ||||
|                 min_level_size, | ||||
|             }, | ||||
|             facet_type, | ||||
|             delta_data, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn execute(self, wtxn: &'i mut RwTxn) -> crate::Result<()> { | ||||
|     pub fn execute(self, wtxn: &mut RwTxn) -> crate::Result<()> { | ||||
|         let mut cursor = self.delta_data.into_cursor()?; | ||||
|         while let Some((key, value)) = cursor.move_on_next()? { | ||||
|             if !valid_lmdb_key(key) { | ||||
|   | ||||
| @@ -115,7 +115,6 @@ pub struct FacetsUpdate<'i> { | ||||
|     min_level_size: u8, | ||||
| } | ||||
| impl<'i> FacetsUpdate<'i> { | ||||
|     // TODO grenad::Reader<Key, Obkv<DelAdd, RoaringBitmap>> | ||||
|     pub fn new( | ||||
|         index: &'i Index, | ||||
|         facet_type: FacetType, | ||||
|   | ||||
| @@ -1,4 +1,3 @@ | ||||
| use std::borrow::Cow; | ||||
| use std::collections::HashMap; | ||||
| use std::convert::TryInto; | ||||
| use std::fs::File; | ||||
| @@ -11,9 +10,7 @@ use heed::types::ByteSlice; | ||||
| use heed::RwTxn; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::helpers::{ | ||||
|     self, merge_ignore_values, serialize_roaring_bitmap, valid_lmdb_key, CursorClonableMmap, | ||||
| }; | ||||
| use super::helpers::{self, merge_ignore_values, valid_lmdb_key, CursorClonableMmap}; | ||||
| use super::{ClonableMmap, MergeFn}; | ||||
| use crate::distance::NDotProductPoint; | ||||
| use crate::error::UserError; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user