mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 13:36:27 +00:00 
			
		
		
		
	Introduce the FacetRevRange Iterator struct
This commit is contained in:
		
							
								
								
									
										1
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										1
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							| @@ -673,6 +673,7 @@ dependencies = [ | |||||||
|  "criterion", |  "criterion", | ||||||
|  "crossbeam-channel", |  "crossbeam-channel", | ||||||
|  "csv", |  "csv", | ||||||
|  |  "either", | ||||||
|  "flate2", |  "flate2", | ||||||
|  "fst", |  "fst", | ||||||
|  "fxhash", |  "fxhash", | ||||||
|   | |||||||
| @@ -10,6 +10,7 @@ bstr = "0.2.13" | |||||||
| byteorder = "1.3.4" | byteorder = "1.3.4" | ||||||
| crossbeam-channel = "0.5.0" | crossbeam-channel = "0.5.0" | ||||||
| csv = "1.1.3" | csv = "1.1.3" | ||||||
|  | either = "1.6.1" | ||||||
| flate2 = "1.0.17" | flate2 = "1.0.17" | ||||||
| fst = "0.4.4" | fst = "0.4.4" | ||||||
| fxhash = "0.2.1" | fxhash = "0.2.1" | ||||||
|   | |||||||
							
								
								
									
										1
									
								
								http-ui/Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										1
									
								
								http-ui/Cargo.lock
									
									
									
										generated
									
									
									
								
							| @@ -999,6 +999,7 @@ dependencies = [ | |||||||
|  "byteorder", |  "byteorder", | ||||||
|  "crossbeam-channel", |  "crossbeam-channel", | ||||||
|  "csv", |  "csv", | ||||||
|  |  "either", | ||||||
|  "flate2", |  "flate2", | ||||||
|  "fst", |  "fst", | ||||||
|  "fxhash", |  "fxhash", | ||||||
|   | |||||||
| @@ -1,9 +1,11 @@ | |||||||
| use std::fmt::Debug; | use std::fmt::Debug; | ||||||
| use std::ops::Bound::{self, Included, Excluded, Unbounded}; | use std::ops::Bound::{self, Included, Excluded, Unbounded}; | ||||||
|  |  | ||||||
| use heed::types::DecodeIgnore; | use either::Either::{self, Left, Right}; | ||||||
|  | use heed::types::{DecodeIgnore, ByteSlice}; | ||||||
| use heed::{BytesEncode, BytesDecode}; | use heed::{BytesEncode, BytesDecode}; | ||||||
| use heed::{Database, RoRange, LazyDecode}; | use heed::{Database, RoRange, RoRevRange, LazyDecode}; | ||||||
|  | use log::debug; | ||||||
| use num_traits::Bounded; | use num_traits::Bounded; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| @@ -76,16 +78,78 @@ where | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | struct FacetRevRange<'t, T: 't, KC> { | ||||||
|  |     iter: RoRevRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>, | ||||||
|  |     end: Bound<T>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'t, T: 't, KC> FacetRevRange<'t, T, KC> | ||||||
|  | where | ||||||
|  |     KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>, | ||||||
|  |     T: PartialOrd + Copy + Bounded, | ||||||
|  | { | ||||||
|  |     fn new( | ||||||
|  |         rtxn: &'t heed::RoTxn, | ||||||
|  |         db: Database<KC, CboRoaringBitmapCodec>, | ||||||
|  |         field_id: FieldId, | ||||||
|  |         level: u8, | ||||||
|  |         left: Bound<T>, | ||||||
|  |         right: Bound<T>, | ||||||
|  |     ) -> heed::Result<FacetRevRange<'t, T, KC>> | ||||||
|  |     { | ||||||
|  |         let left_bound = match left { | ||||||
|  |             Included(left) => Included((field_id, level, left, T::min_value())), | ||||||
|  |             Excluded(left) => Excluded((field_id, level, left, T::min_value())), | ||||||
|  |             Unbounded => Included((field_id, level, T::min_value(), T::min_value())), | ||||||
|  |         }; | ||||||
|  |         let right_bound = Included((field_id, level, T::max_value(), T::max_value())); | ||||||
|  |         let iter = db.lazily_decode_data().rev_range(rtxn, &(left_bound, right_bound))?; | ||||||
|  |         Ok(FacetRevRange { iter, end: right }) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'t, T, KC> Iterator for FacetRevRange<'t, T, KC> | ||||||
|  | where | ||||||
|  |     KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>, | ||||||
|  |     KC: BytesDecode<'t, DItem = (FieldId, u8, T, T)>, | ||||||
|  |     T: PartialOrd + Copy, | ||||||
|  | { | ||||||
|  |     type Item = heed::Result<((FieldId, u8, T, T), RoaringBitmap)>; | ||||||
|  |  | ||||||
|  |     fn next(&mut self) -> Option<Self::Item> { | ||||||
|  |         loop { | ||||||
|  |             match self.iter.next() { | ||||||
|  |                 Some(Ok(((fid, level, left, right), docids))) => { | ||||||
|  |                     let must_be_returned = match self.end { | ||||||
|  |                         Included(end) => right <= end, | ||||||
|  |                         Excluded(end) => right < end, | ||||||
|  |                         Unbounded => true, | ||||||
|  |                     }; | ||||||
|  |                     if must_be_returned { | ||||||
|  |                         match docids.decode() { | ||||||
|  |                             Ok(docids) => return Some(Ok(((fid, level, left, right), docids))), | ||||||
|  |                             Err(e) => return Some(Err(e)), | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                     continue; | ||||||
|  |                 }, | ||||||
|  |                 Some(Err(e)) => return Some(Err(e)), | ||||||
|  |                 None => return None, | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| pub struct FacetIter<'t, T: 't, KC> { | pub struct FacetIter<'t, T: 't, KC> { | ||||||
|     rtxn: &'t heed::RoTxn<'t>, |     rtxn: &'t heed::RoTxn<'t>, | ||||||
|     db: Database<KC, CboRoaringBitmapCodec>, |     db: Database<KC, CboRoaringBitmapCodec>, | ||||||
|     field_id: FieldId, |     field_id: FieldId, | ||||||
|     documents_ids: RoaringBitmap, |     level_iters: Vec<(RoaringBitmap, Either<FacetRange<'t, T, KC>, FacetRevRange<'t, T, KC>>)>, | ||||||
|     level_iters: Vec<FacetRange<'t, T, KC>>, |  | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'t, T, KC> FacetIter<'t, T, KC> | impl<'t, T, KC> FacetIter<'t, T, KC> | ||||||
| where | where | ||||||
|  |     KC: heed::BytesDecode<'t, DItem = (FieldId, u8, T, T)>, | ||||||
|     KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>, |     KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>, | ||||||
|     T: PartialOrd + Copy + Bounded, |     T: PartialOrd + Copy + Bounded, | ||||||
| { | { | ||||||
| @@ -97,8 +161,31 @@ where | |||||||
|     ) -> heed::Result<FacetIter<'t, T, KC>> |     ) -> heed::Result<FacetIter<'t, T, KC>> | ||||||
|     { |     { | ||||||
|         let db = index.facet_field_id_value_docids.remap_key_type::<KC>(); |         let db = index.facet_field_id_value_docids.remap_key_type::<KC>(); | ||||||
|         let level_0_iter = FacetRange::new(rtxn, db, field_id, 0, Unbounded, Unbounded)?; |         let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); | ||||||
|         Ok(FacetIter { rtxn, db, field_id, documents_ids, level_iters: vec![level_0_iter] }) |         let highest_iter = FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?; | ||||||
|  |         Ok(FacetIter { rtxn, db, field_id, level_iters: vec![(documents_ids, Left(highest_iter))] }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn new_reverse( | ||||||
|  |         rtxn: &'t heed::RoTxn, | ||||||
|  |         index: &'t Index, | ||||||
|  |         field_id: FieldId, | ||||||
|  |         documents_ids: RoaringBitmap, | ||||||
|  |     ) -> heed::Result<FacetIter<'t, T, KC>> | ||||||
|  |     { | ||||||
|  |         let db = index.facet_field_id_value_docids.remap_key_type::<KC>(); | ||||||
|  |         let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); | ||||||
|  |         let highest_iter = FacetRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?; | ||||||
|  |         Ok(FacetIter { rtxn, db, field_id, level_iters: vec![(documents_ids, Right(highest_iter))] }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn highest_level<X>(rtxn: &'t heed::RoTxn, db: Database<KC, X>, fid: FieldId) -> heed::Result<Option<u8>> { | ||||||
|  |         let level = db.remap_types::<ByteSlice, DecodeIgnore>() | ||||||
|  |             .prefix_iter(rtxn, &[fid][..])? | ||||||
|  |             .remap_key_type::<KC>() | ||||||
|  |             .last().transpose()? | ||||||
|  |             .map(|((_, level, _, _), _)| level); | ||||||
|  |         Ok(level) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -106,35 +193,54 @@ impl<'t, T: 't, KC> Iterator for FacetIter<'t, T, KC> | |||||||
| where | where | ||||||
|     KC: heed::BytesDecode<'t, DItem = (FieldId, u8, T, T)>, |     KC: heed::BytesDecode<'t, DItem = (FieldId, u8, T, T)>, | ||||||
|     KC: for<'x> heed::BytesEncode<'x, EItem = (FieldId, u8, T, T)>, |     KC: for<'x> heed::BytesEncode<'x, EItem = (FieldId, u8, T, T)>, | ||||||
|     T: PartialOrd + Copy + Bounded, |     T: PartialOrd + Copy + Bounded + Debug, | ||||||
| { | { | ||||||
|     type Item = heed::Result<(T, RoaringBitmap)>; |     type Item = heed::Result<(T, RoaringBitmap)>; | ||||||
|  |  | ||||||
|     fn next(&mut self) -> Option<Self::Item> { |     fn next(&mut self) -> Option<Self::Item> { | ||||||
|         loop { |         'outer: loop { | ||||||
|             let last = self.level_iters.last_mut()?; |             let (documents_ids, last) = self.level_iters.last_mut()?; | ||||||
|  |             let is_ascending = last.is_left(); | ||||||
|             for result in last { |             for result in last { | ||||||
|  |                 // If the last iterator must find an empty set of documents it means | ||||||
|  |                 // that we found all the documents in the sub level iterations already, | ||||||
|  |                 // we can pop this level iterator. | ||||||
|  |                 if documents_ids.is_empty() { | ||||||
|  |                     break; | ||||||
|  |                 } | ||||||
|  |  | ||||||
|                 match result { |                 match result { | ||||||
|                     Ok(((_fid, level, left, right), mut docids)) => { |                     Ok(((_fid, level, left, right), mut docids)) => { | ||||||
|                         if level == 0 { |  | ||||||
|                             docids.intersect_with(&self.documents_ids); |                         docids.intersect_with(&documents_ids); | ||||||
|                             if !docids.is_empty() { |                         if !docids.is_empty() { | ||||||
|                                 self.documents_ids.difference_with(&docids); |                             documents_ids.difference_with(&docids); | ||||||
|  |  | ||||||
|  |                             if level == 0 { | ||||||
|  |                                 debug!("found {:?} at {:?}",  docids, left); | ||||||
|                                 return Some(Ok((left, docids))); |                                 return Some(Ok((left, docids))); | ||||||
|                             } |                             } | ||||||
|                         } else if !docids.is_disjoint(&self.documents_ids) { |  | ||||||
|                             let result = FacetRange::new( |                             let rtxn = self.rtxn; | ||||||
|                                 self.rtxn, |                             let db = self.db; | ||||||
|                                 self.db, |                             let fid = self.field_id; | ||||||
|                                 self.field_id, |                             let left = Included(left); | ||||||
|                                 level - 1, |                             let right = Included(right); | ||||||
|                                 Included(left), |  | ||||||
|                                 Included(right), |                             debug!("calling with {:?} to {:?} (level {}) to find {:?}", | ||||||
|  |                                 left, right, level - 1, docids, | ||||||
|                             ); |                             ); | ||||||
|  |  | ||||||
|  |                             let result = if is_ascending { | ||||||
|  |                                 FacetRange::new(rtxn, db, fid, level - 1, left, right).map(Left) | ||||||
|  |                             } else { | ||||||
|  |                                 FacetRevRange::new(rtxn, db, fid, level - 1, left, right).map(Right) | ||||||
|  |                             }; | ||||||
|  |  | ||||||
|                             match result { |                             match result { | ||||||
|                                 Ok(iter) => { |                                 Ok(iter) => { | ||||||
|                                     self.level_iters.push(iter); |                                     self.level_iters.push((docids, iter)); | ||||||
|                                     break; |                                     continue 'outer; | ||||||
|                                 }, |                                 }, | ||||||
|                                 Err(e) => return Some(Err(e)), |                                 Err(e) => return Some(Err(e)), | ||||||
|                             } |                             } | ||||||
|   | |||||||
| @@ -17,6 +17,7 @@ use crate::query_tokens::{QueryTokens, QueryToken}; | |||||||
| use crate::{Index, FieldId, DocumentId, Criterion}; | use crate::{Index, FieldId, DocumentId, Criterion}; | ||||||
|  |  | ||||||
| pub use self::facet::{FacetCondition, FacetNumberOperator, FacetStringOperator}; | pub use self::facet::{FacetCondition, FacetNumberOperator, FacetStringOperator}; | ||||||
|  | pub use self::facet::{FacetIter}; | ||||||
|  |  | ||||||
| // Building these factories is not free. | // Building these factories is not free. | ||||||
| static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true)); | static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true)); | ||||||
| @@ -151,7 +152,7 @@ impl<'a> Search<'a> { | |||||||
|         &self, |         &self, | ||||||
|         field_id: FieldId, |         field_id: FieldId, | ||||||
|         facet_type: FacetType, |         facet_type: FacetType, | ||||||
|         order: Order, |         ascending: bool, | ||||||
|         documents_ids: RoaringBitmap, |         documents_ids: RoaringBitmap, | ||||||
|         limit: usize, |         limit: usize, | ||||||
|     ) -> anyhow::Result<Vec<DocumentId>> |     ) -> anyhow::Result<Vec<DocumentId>> | ||||||
| @@ -160,34 +161,30 @@ impl<'a> Search<'a> { | |||||||
|         let mut output = Vec::new(); |         let mut output = Vec::new(); | ||||||
|         match facet_type { |         match facet_type { | ||||||
|             FacetType::Float => { |             FacetType::Float => { | ||||||
|                 facet_number_recurse::<f64, FacetLevelValueF64Codec, _>( |                 let facet_fn = if ascending { | ||||||
|                     self.rtxn, |                     FacetIter::<f64, FacetLevelValueF64Codec>::new | ||||||
|                     self.index, |                 } else { | ||||||
|                     field_id, |                     FacetIter::<f64, FacetLevelValueF64Codec>::new_reverse | ||||||
|                     order, |                 }; | ||||||
|                     documents_ids, |                 for result in facet_fn(self.rtxn, self.index, field_id, documents_ids)? { | ||||||
|                     |_val, docids| { |                     let (_val, docids) = result?; | ||||||
|                         limit_tmp = limit_tmp.saturating_sub(docids.len() as usize); |                     limit_tmp = limit_tmp.saturating_sub(docids.len() as usize); | ||||||
|                         debug!("Facet ordered iteration find {:?}", docids); |                     output.push(docids); | ||||||
|                         output.push(docids); |                     if limit_tmp == 0 { break } | ||||||
|                         limit_tmp != 0 // Returns `true` if we must continue iterating |                 } | ||||||
|                     } |  | ||||||
|                 )?; |  | ||||||
|             }, |             }, | ||||||
|             FacetType::Integer => { |             FacetType::Integer => { | ||||||
|                 facet_number_recurse::<i64, FacetLevelValueI64Codec, _>( |                 let facet_fn = if ascending { | ||||||
|                     self.rtxn, |                     FacetIter::<i64, FacetLevelValueI64Codec>::new | ||||||
|                     self.index, |                 } else { | ||||||
|                     field_id, |                     FacetIter::<i64, FacetLevelValueI64Codec>::new_reverse | ||||||
|                     order, |                 }; | ||||||
|                     documents_ids, |                 for result in facet_fn(self.rtxn, self.index, field_id, documents_ids)? { | ||||||
|                     |_val, docids| { |                     let (_val, docids) = result?; | ||||||
|                         limit_tmp = limit_tmp.saturating_sub(docids.len() as usize); |                     limit_tmp = limit_tmp.saturating_sub(docids.len() as usize); | ||||||
|                         debug!("Facet ordered iteration find {:?}", docids); |                     output.push(docids); | ||||||
|                         output.push(docids); |                     if limit_tmp == 0 { break } | ||||||
|                         limit_tmp != 0 // Returns `true` if we must continue iterating |                 } | ||||||
|                     } |  | ||||||
|                 )?; |  | ||||||
|             }, |             }, | ||||||
|             FacetType::String => bail!("criteria facet type must be a number"), |             FacetType::String => bail!("criteria facet type must be a number"), | ||||||
|         } |         } | ||||||
| @@ -214,16 +211,16 @@ impl<'a> Search<'a> { | |||||||
|             let criteria = self.index.criteria(self.rtxn)?; |             let criteria = self.index.criteria(self.rtxn)?; | ||||||
|             let result = criteria.into_iter().flat_map(|criterion| { |             let result = criteria.into_iter().flat_map(|criterion| { | ||||||
|                 match criterion { |                 match criterion { | ||||||
|                     Criterion::Asc(fid) => Some((fid, Order::Asc)), |                     Criterion::Asc(fid) => Some((fid, true)), | ||||||
|                     Criterion::Desc(fid) => Some((fid, Order::Desc)), |                     Criterion::Desc(fid) => Some((fid, false)), | ||||||
|                     _ => None |                     _ => None | ||||||
|                 } |                 } | ||||||
|             }).next(); |             }).next(); | ||||||
|             match result { |             match result { | ||||||
|                 Some((fid, order)) => { |                 Some((fid, is_ascending)) => { | ||||||
|                     let faceted_fields = self.index.faceted_fields(self.rtxn)?; |                     let faceted_fields = self.index.faceted_fields(self.rtxn)?; | ||||||
|                     let ftype = *faceted_fields.get(&fid).context("unknown field id")?; |                     let ftype = *faceted_fields.get(&fid).context("unknown field id")?; | ||||||
|                     Some((fid, ftype, order)) |                     Some((fid, ftype, is_ascending)) | ||||||
|                 }, |                 }, | ||||||
|                 None => None, |                 None => None, | ||||||
|             } |             } | ||||||
| @@ -244,7 +241,9 @@ impl<'a> Search<'a> { | |||||||
|                 // If the query is not set or results in no DFAs but |                 // If the query is not set or results in no DFAs but | ||||||
|                 // there is some facet conditions we return a placeholder. |                 // there is some facet conditions we return a placeholder. | ||||||
|                 let documents_ids = match order_by_facet { |                 let documents_ids = match order_by_facet { | ||||||
|                     Some((fid, ftype, order)) => self.facet_ordered(fid, ftype, order, facet_candidates, limit)?, |                     Some((fid, ftype, is_ascending)) => { | ||||||
|  |                         self.facet_ordered(fid, ftype, is_ascending, facet_candidates, limit)? | ||||||
|  |                     }, | ||||||
|                     None => facet_candidates.iter().take(limit).collect(), |                     None => facet_candidates.iter().take(limit).collect(), | ||||||
|                 }; |                 }; | ||||||
|                 return Ok(SearchResult { documents_ids, ..Default::default() }) |                 return Ok(SearchResult { documents_ids, ..Default::default() }) | ||||||
| @@ -253,7 +252,9 @@ impl<'a> Search<'a> { | |||||||
|                 // If the query is not set or results in no DFAs we return a placeholder. |                 // If the query is not set or results in no DFAs we return a placeholder. | ||||||
|                 let documents_ids = self.index.documents_ids(self.rtxn)?; |                 let documents_ids = self.index.documents_ids(self.rtxn)?; | ||||||
|                 let documents_ids = match order_by_facet { |                 let documents_ids = match order_by_facet { | ||||||
|                     Some((fid, ftype, order)) => self.facet_ordered(fid, ftype, order, documents_ids, limit)?, |                     Some((fid, ftype, is_ascending)) => { | ||||||
|  |                         self.facet_ordered(fid, ftype, is_ascending, documents_ids, limit)? | ||||||
|  |                     }, | ||||||
|                     None => documents_ids.iter().take(limit).collect(), |                     None => documents_ids.iter().take(limit).collect(), | ||||||
|                 }; |                 }; | ||||||
|                 return Ok(SearchResult { documents_ids, ..Default::default() }) |                 return Ok(SearchResult { documents_ids, ..Default::default() }) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user