mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Merge #4682
4682: Speed Up Filter ANDs operations r=Kerollmops a=Kerollmops This PR fixes #4659 and improves the way we do AND operations by using the latest [RoaringBitmap feature to do intersections with serialized bitmaps](https://github.com/RoaringBitmap/roaring-rs/pull/281). Doing so drastically reduces the time spent reading, copying bytes in memory to use and keep a subset of the containers in the bitmap. ### Some Example Results With a 45M documents dataset running on a good NVMe. This example filter was taking 77ms and with this PR only 13ms (6x speedup): ```sql artist = 'The Beatles' AND (duration 150 TO 500 OR duration NOT EXISTS) AND genres IN [Rock, 'Rock and Roll'] AND rating > 4 AND released_year 1960 TO 1990 ``` By reordering the filter AND clauses we can reach a constant 8ms execution time. However, note that it is a manual operation. On the other side the previous filter pipeline is still at a constant 45ms execution time with this filter. (6x speedup) ```sql artist = 'The Beatles' AND genres IN [Rock, 'Rock and Roll'] AND released_year 1960 TO 1990 AND (duration 150 TO 500 OR duration NOT EXISTS) ``` ### To Do - [x] Rebase on `release-v1.9.0`. - [ ] ~Skip branches of the facet/filter tree when nothing is in common with the universe~ slower this way. - [x] When the universe is required use the universe given in parameter if possible. Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
		
							
								
								
									
										11
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										11
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							| @@ -4377,12 +4377,6 @@ dependencies = [ | |||||||
|  "winreg", |  "winreg", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] |  | ||||||
| name = "retain_mut" |  | ||||||
| version = "0.1.7" |  | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" |  | ||||||
| checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086" |  | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "ring" | name = "ring" | ||||||
| version = "0.17.8" | version = "0.17.8" | ||||||
| @@ -4400,13 +4394,12 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "roaring" | name = "roaring" | ||||||
| version = "0.10.2" | version = "0.10.5" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "6106b5cf8587f5834158895e9715a3c6c9716c8aefab57f1f7680917191c7873" | checksum = "7699249cc2c7d71939f30868f47e9d7add0bdc030d90ee10bfd16887ff8bb1c8" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "bytemuck", |  "bytemuck", | ||||||
|  "byteorder", |  "byteorder", | ||||||
|  "retain_mut", |  | ||||||
|  "serde", |  "serde", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|   | |||||||
| @@ -47,6 +47,12 @@ pub struct FacetGroupValue { | |||||||
|     pub bitmap: RoaringBitmap, |     pub bitmap: RoaringBitmap, | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[derive(Debug)] | ||||||
|  | pub struct FacetGroupLazyValue<'b> { | ||||||
|  |     pub size: u8, | ||||||
|  |     pub bitmap_bytes: &'b [u8], | ||||||
|  | } | ||||||
|  |  | ||||||
| pub struct FacetGroupKeyCodec<T> { | pub struct FacetGroupKeyCodec<T> { | ||||||
|     _phantom: PhantomData<T>, |     _phantom: PhantomData<T>, | ||||||
| } | } | ||||||
| @@ -69,6 +75,7 @@ where | |||||||
|         Ok(Cow::Owned(v)) |         Ok(Cow::Owned(v)) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T> | impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T> | ||||||
| where | where | ||||||
|     T: BytesDecode<'a>, |     T: BytesDecode<'a>, | ||||||
| @@ -84,6 +91,7 @@ where | |||||||
| } | } | ||||||
|  |  | ||||||
| pub struct FacetGroupValueCodec; | pub struct FacetGroupValueCodec; | ||||||
|  |  | ||||||
| impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec { | impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec { | ||||||
|     type EItem = FacetGroupValue; |     type EItem = FacetGroupValue; | ||||||
|  |  | ||||||
| @@ -93,11 +101,23 @@ impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec { | |||||||
|         Ok(Cow::Owned(v)) |         Ok(Cow::Owned(v)) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec { | impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec { | ||||||
|     type DItem = FacetGroupValue; |     type DItem = FacetGroupValue; | ||||||
|  |  | ||||||
|     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { |     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { | ||||||
|         let size = bytes[0]; |         let size = bytes[0]; | ||||||
|         let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?; |         let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?; | ||||||
|         Ok(FacetGroupValue { size, bitmap }) |         Ok(FacetGroupValue { size, bitmap }) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | pub struct FacetGroupLazyValueCodec; | ||||||
|  |  | ||||||
|  | impl<'a> heed::BytesDecode<'a> for FacetGroupLazyValueCodec { | ||||||
|  |     type DItem = FacetGroupLazyValue<'a>; | ||||||
|  |  | ||||||
|  |     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { | ||||||
|  |         Ok(FacetGroupLazyValue { size: bytes[0], bitmap_bytes: &bytes[1..] }) | ||||||
|  |     } | ||||||
|  | } | ||||||
|   | |||||||
| @@ -1,5 +1,5 @@ | |||||||
| use std::borrow::Cow; | use std::borrow::Cow; | ||||||
| use std::io; | use std::io::{self, Cursor}; | ||||||
| use std::mem::size_of; | use std::mem::size_of; | ||||||
|  |  | ||||||
| use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}; | use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}; | ||||||
| @@ -57,6 +57,24 @@ impl CboRoaringBitmapCodec { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn intersection_with_serialized( | ||||||
|  |         mut bytes: &[u8], | ||||||
|  |         other: &RoaringBitmap, | ||||||
|  |     ) -> io::Result<RoaringBitmap> { | ||||||
|  |         // See above `deserialize_from` method for implementation details. | ||||||
|  |         if bytes.len() <= THRESHOLD * size_of::<u32>() { | ||||||
|  |             let mut bitmap = RoaringBitmap::new(); | ||||||
|  |             while let Ok(integer) = bytes.read_u32::<NativeEndian>() { | ||||||
|  |                 if other.contains(integer) { | ||||||
|  |                     bitmap.insert(integer); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             Ok(bitmap) | ||||||
|  |         } else { | ||||||
|  |             other.intersection_with_serialized_unchecked(Cursor::new(bytes)) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /// Merge serialized CboRoaringBitmaps in a buffer. |     /// Merge serialized CboRoaringBitmaps in a buffer. | ||||||
|     /// |     /// | ||||||
|     /// if the merged values length is under the threshold, values are directly |     /// if the merged values length is under the threshold, values are directly | ||||||
|   | |||||||
| @@ -38,7 +38,7 @@ where | |||||||
|         field_id, |         field_id, | ||||||
|     )?; |     )?; | ||||||
|  |  | ||||||
|     if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? { |     if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? { | ||||||
|         fd.iterate(candidates, highest_level, first_bound, usize::MAX)?; |         fd.iterate(candidates, highest_level, first_bound, usize::MAX)?; | ||||||
|         Ok(()) |         Ok(()) | ||||||
|     } else { |     } else { | ||||||
| @@ -81,7 +81,7 @@ where | |||||||
|         field_id, |         field_id, | ||||||
|     )?; |     )?; | ||||||
|  |  | ||||||
|     if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? { |     if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? { | ||||||
|         // We first fill the heap with values from the highest level |         // We first fill the heap with values from the highest level | ||||||
|         let starting_key = |         let starting_key = | ||||||
|             FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; |             FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; | ||||||
|   | |||||||
| @@ -4,9 +4,11 @@ use heed::BytesEncode; | |||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; | use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; | ||||||
| use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; | use crate::heed_codec::facet::{ | ||||||
|  |     FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec, | ||||||
|  | }; | ||||||
| use crate::heed_codec::BytesRefCodec; | use crate::heed_codec::BytesRefCodec; | ||||||
| use crate::Result; | use crate::{CboRoaringBitmapCodec, Result}; | ||||||
|  |  | ||||||
| /// Find all the document ids for which the given field contains a value contained within | /// Find all the document ids for which the given field contains a value contained within | ||||||
| /// the two bounds. | /// the two bounds. | ||||||
| @@ -16,6 +18,7 @@ pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>( | |||||||
|     field_id: u16, |     field_id: u16, | ||||||
|     left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>, |     left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>, | ||||||
|     right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>, |     right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>, | ||||||
|  |     universe: Option<&RoaringBitmap>, | ||||||
|     docids: &mut RoaringBitmap, |     docids: &mut RoaringBitmap, | ||||||
| ) -> Result<()> | ) -> Result<()> | ||||||
| where | where | ||||||
| @@ -46,13 +49,15 @@ where | |||||||
|         } |         } | ||||||
|         Bound::Unbounded => Bound::Unbounded, |         Bound::Unbounded => Bound::Unbounded, | ||||||
|     }; |     }; | ||||||
|     let db = db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(); |     let db = db.remap_types::<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>(); | ||||||
|     let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids }; |     let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, universe, docids }; | ||||||
|     let highest_level = get_highest_level(rtxn, db, field_id)?; |     let highest_level = get_highest_level(rtxn, db, field_id)?; | ||||||
|  |  | ||||||
|     if let Some(starting_left_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? { |     if let Some(starting_left_bound) = | ||||||
|  |         get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? | ||||||
|  |     { | ||||||
|         let rightmost_bound = |         let rightmost_bound = | ||||||
|             Bound::Included(get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded |             Bound::Included(get_last_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded | ||||||
|         let group_size = usize::MAX; |         let group_size = usize::MAX; | ||||||
|         f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?; |         f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?; | ||||||
|         Ok(()) |         Ok(()) | ||||||
| @@ -64,12 +69,16 @@ where | |||||||
| /// Fetch the document ids that have a facet with a value between the two given bounds | /// Fetch the document ids that have a facet with a value between the two given bounds | ||||||
| struct FacetRangeSearch<'t, 'b, 'bitmap> { | struct FacetRangeSearch<'t, 'b, 'bitmap> { | ||||||
|     rtxn: &'t heed::RoTxn<'t>, |     rtxn: &'t heed::RoTxn<'t>, | ||||||
|     db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>, |     db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>, | ||||||
|     field_id: u16, |     field_id: u16, | ||||||
|     left: Bound<&'b [u8]>, |     left: Bound<&'b [u8]>, | ||||||
|     right: Bound<&'b [u8]>, |     right: Bound<&'b [u8]>, | ||||||
|  |     /// The subset of documents ids that are useful for this search. | ||||||
|  |     /// Great performance optimizations can be achieved by only fetching values matching this subset. | ||||||
|  |     universe: Option<&'bitmap RoaringBitmap>, | ||||||
|     docids: &'bitmap mut RoaringBitmap, |     docids: &'bitmap mut RoaringBitmap, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { | impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { | ||||||
|     fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> { |     fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> { | ||||||
|         let left_key = |         let left_key = | ||||||
| @@ -104,7 +113,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { | |||||||
|             } |             } | ||||||
|  |  | ||||||
|             if RangeBounds::<&[u8]>::contains(&(self.left, self.right), &key.left_bound) { |             if RangeBounds::<&[u8]>::contains(&(self.left, self.right), &key.left_bound) { | ||||||
|                 *self.docids |= value.bitmap; |                 *self.docids |= match self.universe { | ||||||
|  |                     Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized( | ||||||
|  |                         value.bitmap_bytes, | ||||||
|  |                         universe, | ||||||
|  |                     )?, | ||||||
|  |                     None => CboRoaringBitmapCodec::deserialize_from(value.bitmap_bytes)?, | ||||||
|  |                 }; | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|         Ok(()) |         Ok(()) | ||||||
| @@ -195,7 +210,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { | |||||||
|                 left_condition && right_condition |                 left_condition && right_condition | ||||||
|             }; |             }; | ||||||
|             if should_take_whole_group { |             if should_take_whole_group { | ||||||
|                 *self.docids |= &previous_value.bitmap; |                 *self.docids |= match self.universe { | ||||||
|  |                     Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized( | ||||||
|  |                         previous_value.bitmap_bytes, | ||||||
|  |                         universe, | ||||||
|  |                     )?, | ||||||
|  |                     None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?, | ||||||
|  |                 }; | ||||||
|                 previous_key = next_key; |                 previous_key = next_key; | ||||||
|                 previous_value = next_value; |                 previous_value = next_value; | ||||||
|                 continue; |                 continue; | ||||||
| @@ -291,7 +312,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { | |||||||
|             left_condition && right_condition |             left_condition && right_condition | ||||||
|         }; |         }; | ||||||
|         if should_take_whole_group { |         if should_take_whole_group { | ||||||
|             *self.docids |= &previous_value.bitmap; |             *self.docids |= match self.universe { | ||||||
|  |                 Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized( | ||||||
|  |                     previous_value.bitmap_bytes, | ||||||
|  |                     universe, | ||||||
|  |                 )?, | ||||||
|  |                 None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?, | ||||||
|  |             }; | ||||||
|         } else { |         } else { | ||||||
|             let level = level - 1; |             let level = level - 1; | ||||||
|             let starting_left_bound = previous_key.left_bound; |             let starting_left_bound = previous_key.left_bound; | ||||||
| @@ -365,6 +392,7 @@ mod tests { | |||||||
|                     0, |                     0, | ||||||
|                     &start, |                     &start, | ||||||
|                     &end, |                     &end, | ||||||
|  |                     None, | ||||||
|                     &mut docids, |                     &mut docids, | ||||||
|                 ) |                 ) | ||||||
|                 .unwrap(); |                 .unwrap(); | ||||||
| @@ -384,6 +412,7 @@ mod tests { | |||||||
|                     0, |                     0, | ||||||
|                     &start, |                     &start, | ||||||
|                     &end, |                     &end, | ||||||
|  |                     None, | ||||||
|                     &mut docids, |                     &mut docids, | ||||||
|                 ) |                 ) | ||||||
|                 .unwrap(); |                 .unwrap(); | ||||||
| @@ -418,6 +447,7 @@ mod tests { | |||||||
|                     0, |                     0, | ||||||
|                     &start, |                     &start, | ||||||
|                     &end, |                     &end, | ||||||
|  |                     None, | ||||||
|                     &mut docids, |                     &mut docids, | ||||||
|                 ) |                 ) | ||||||
|                 .unwrap(); |                 .unwrap(); | ||||||
| @@ -439,6 +469,7 @@ mod tests { | |||||||
|                     0, |                     0, | ||||||
|                     &start, |                     &start, | ||||||
|                     &end, |                     &end, | ||||||
|  |                     None, | ||||||
|                     &mut docids, |                     &mut docids, | ||||||
|                 ) |                 ) | ||||||
|                 .unwrap(); |                 .unwrap(); | ||||||
| @@ -474,6 +505,7 @@ mod tests { | |||||||
|                     0, |                     0, | ||||||
|                     &start, |                     &start, | ||||||
|                     &end, |                     &end, | ||||||
|  |                     None, | ||||||
|                     &mut docids, |                     &mut docids, | ||||||
|                 ) |                 ) | ||||||
|                 .unwrap(); |                 .unwrap(); | ||||||
| @@ -499,6 +531,7 @@ mod tests { | |||||||
|                     0, |                     0, | ||||||
|                     &start, |                     &start, | ||||||
|                     &end, |                     &end, | ||||||
|  |                     None, | ||||||
|                     &mut docids, |                     &mut docids, | ||||||
|                 ) |                 ) | ||||||
|                 .unwrap(); |                 .unwrap(); | ||||||
| @@ -537,6 +570,7 @@ mod tests { | |||||||
|                     0, |                     0, | ||||||
|                     &start, |                     &start, | ||||||
|                     &end, |                     &end, | ||||||
|  |                     None, | ||||||
|                     &mut docids, |                     &mut docids, | ||||||
|                 ) |                 ) | ||||||
|                 .unwrap(); |                 .unwrap(); | ||||||
| @@ -556,6 +590,7 @@ mod tests { | |||||||
|                     0, |                     0, | ||||||
|                     &start, |                     &start, | ||||||
|                     &end, |                     &end, | ||||||
|  |                     None, | ||||||
|                     &mut docids, |                     &mut docids, | ||||||
|                 ) |                 ) | ||||||
|                 .unwrap(); |                 .unwrap(); | ||||||
| @@ -571,6 +606,7 @@ mod tests { | |||||||
|                 0, |                 0, | ||||||
|                 &Bound::Unbounded, |                 &Bound::Unbounded, | ||||||
|                 &Bound::Unbounded, |                 &Bound::Unbounded, | ||||||
|  |                 None, | ||||||
|                 &mut docids, |                 &mut docids, | ||||||
|             ) |             ) | ||||||
|             .unwrap(); |             .unwrap(); | ||||||
| @@ -586,6 +622,7 @@ mod tests { | |||||||
|                 1, |                 1, | ||||||
|                 &Bound::Unbounded, |                 &Bound::Unbounded, | ||||||
|                 &Bound::Unbounded, |                 &Bound::Unbounded, | ||||||
|  |                 None, | ||||||
|                 &mut docids, |                 &mut docids, | ||||||
|             ) |             ) | ||||||
|             .unwrap(); |             .unwrap(); | ||||||
| @@ -621,6 +658,7 @@ mod tests { | |||||||
|                     0, |                     0, | ||||||
|                     &start, |                     &start, | ||||||
|                     &end, |                     &end, | ||||||
|  |                     None, | ||||||
|                     &mut docids, |                     &mut docids, | ||||||
|                 ) |                 ) | ||||||
|                 .unwrap(); |                 .unwrap(); | ||||||
| @@ -634,6 +672,7 @@ mod tests { | |||||||
|                     1, |                     1, | ||||||
|                     &start, |                     &start, | ||||||
|                     &end, |                     &end, | ||||||
|  |                     None, | ||||||
|                     &mut docids, |                     &mut docids, | ||||||
|                 ) |                 ) | ||||||
|                 .unwrap(); |                 .unwrap(); | ||||||
|   | |||||||
| @@ -36,7 +36,7 @@ pub fn ascending_facet_sort<'t>( | |||||||
|     candidates: RoaringBitmap, |     candidates: RoaringBitmap, | ||||||
| ) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> { | ) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> { | ||||||
|     let highest_level = get_highest_level(rtxn, db, field_id)?; |     let highest_level = get_highest_level(rtxn, db, field_id)?; | ||||||
|     if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? { |     if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? { | ||||||
|         let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; |         let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; | ||||||
|         let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX); |         let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -19,9 +19,9 @@ pub fn descending_facet_sort<'t>( | |||||||
|     candidates: RoaringBitmap, |     candidates: RoaringBitmap, | ||||||
| ) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> { | ) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> { | ||||||
|     let highest_level = get_highest_level(rtxn, db, field_id)?; |     let highest_level = get_highest_level(rtxn, db, field_id)?; | ||||||
|     if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? { |     if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? { | ||||||
|         let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; |         let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; | ||||||
|         let last_bound = get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap(); |         let last_bound = get_last_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?.unwrap(); | ||||||
|         let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound }; |         let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound }; | ||||||
|         let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX); |         let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX); | ||||||
|         Ok(itertools::Either::Left(DescendingFacetSort { |         Ok(itertools::Either::Left(DescendingFacetSort { | ||||||
|   | |||||||
| @@ -4,7 +4,7 @@ use std::ops::Bound::{self, Excluded, Included}; | |||||||
|  |  | ||||||
| use either::Either; | use either::Either; | ||||||
| pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token}; | pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token}; | ||||||
| use roaring::RoaringBitmap; | use roaring::{MultiOps, RoaringBitmap}; | ||||||
| use serde_json::Value; | use serde_json::Value; | ||||||
|  |  | ||||||
| use super::facet_range_search; | use super::facet_range_search; | ||||||
| @@ -224,14 +224,14 @@ impl<'a> Filter<'a> { | |||||||
|     pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> { |     pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> { | ||||||
|         // to avoid doing this for each recursive call we're going to do it ONCE ahead of time |         // to avoid doing this for each recursive call we're going to do it ONCE ahead of time | ||||||
|         let filterable_fields = index.filterable_fields(rtxn)?; |         let filterable_fields = index.filterable_fields(rtxn)?; | ||||||
|  |         self.inner_evaluate(rtxn, index, &filterable_fields, None) | ||||||
|         self.inner_evaluate(rtxn, index, &filterable_fields) |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn evaluate_operator( |     fn evaluate_operator( | ||||||
|         rtxn: &heed::RoTxn, |         rtxn: &heed::RoTxn, | ||||||
|         index: &Index, |         index: &Index, | ||||||
|         field_id: FieldId, |         field_id: FieldId, | ||||||
|  |         universe: Option<&RoaringBitmap>, | ||||||
|         operator: &Condition<'a>, |         operator: &Condition<'a>, | ||||||
|     ) -> Result<RoaringBitmap> { |     ) -> Result<RoaringBitmap> { | ||||||
|         let numbers_db = index.facet_id_f64_docids; |         let numbers_db = index.facet_id_f64_docids; | ||||||
| @@ -291,14 +291,22 @@ impl<'a> Filter<'a> { | |||||||
|             } |             } | ||||||
|             Condition::NotEqual(val) => { |             Condition::NotEqual(val) => { | ||||||
|                 let operator = Condition::Equal(val.clone()); |                 let operator = Condition::Equal(val.clone()); | ||||||
|                 let docids = Self::evaluate_operator(rtxn, index, field_id, &operator)?; |                 let docids = Self::evaluate_operator(rtxn, index, field_id, None, &operator)?; | ||||||
|                 let all_ids = index.documents_ids(rtxn)?; |                 let all_ids = index.documents_ids(rtxn)?; | ||||||
|                 return Ok(all_ids - docids); |                 return Ok(all_ids - docids); | ||||||
|             } |             } | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         let mut output = RoaringBitmap::new(); |         let mut output = RoaringBitmap::new(); | ||||||
|         Self::explore_facet_number_levels(rtxn, numbers_db, field_id, left, right, &mut output)?; |         Self::explore_facet_number_levels( | ||||||
|  |             rtxn, | ||||||
|  |             numbers_db, | ||||||
|  |             field_id, | ||||||
|  |             left, | ||||||
|  |             right, | ||||||
|  |             universe, | ||||||
|  |             &mut output, | ||||||
|  |         )?; | ||||||
|         Ok(output) |         Ok(output) | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -310,6 +318,7 @@ impl<'a> Filter<'a> { | |||||||
|         field_id: FieldId, |         field_id: FieldId, | ||||||
|         left: Bound<f64>, |         left: Bound<f64>, | ||||||
|         right: Bound<f64>, |         right: Bound<f64>, | ||||||
|  |         universe: Option<&RoaringBitmap>, | ||||||
|         output: &mut RoaringBitmap, |         output: &mut RoaringBitmap, | ||||||
|     ) -> Result<()> { |     ) -> Result<()> { | ||||||
|         match (left, right) { |         match (left, right) { | ||||||
| @@ -321,7 +330,7 @@ impl<'a> Filter<'a> { | |||||||
|             (_, _) => (), |             (_, _) => (), | ||||||
|         } |         } | ||||||
|         facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>( |         facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>( | ||||||
|             rtxn, db, field_id, &left, &right, output, |             rtxn, db, field_id, &left, &right, universe, output, | ||||||
|         )?; |         )?; | ||||||
|  |  | ||||||
|         Ok(()) |         Ok(()) | ||||||
| @@ -332,31 +341,37 @@ impl<'a> Filter<'a> { | |||||||
|         rtxn: &heed::RoTxn, |         rtxn: &heed::RoTxn, | ||||||
|         index: &Index, |         index: &Index, | ||||||
|         filterable_fields: &HashSet<String>, |         filterable_fields: &HashSet<String>, | ||||||
|  |         universe: Option<&RoaringBitmap>, | ||||||
|     ) -> Result<RoaringBitmap> { |     ) -> Result<RoaringBitmap> { | ||||||
|  |         if universe.map_or(false, |u| u.is_empty()) { | ||||||
|  |             return Ok(RoaringBitmap::new()); | ||||||
|  |         } | ||||||
|  |  | ||||||
|         match &self.condition { |         match &self.condition { | ||||||
|             FilterCondition::Not(f) => { |             FilterCondition::Not(f) => { | ||||||
|                 let all_ids = index.documents_ids(rtxn)?; |  | ||||||
|                 let selected = Self::inner_evaluate( |                 let selected = Self::inner_evaluate( | ||||||
|                     &(f.as_ref().clone()).into(), |                     &(f.as_ref().clone()).into(), | ||||||
|                     rtxn, |                     rtxn, | ||||||
|                     index, |                     index, | ||||||
|                     filterable_fields, |                     filterable_fields, | ||||||
|  |                     universe, | ||||||
|                 )?; |                 )?; | ||||||
|                 Ok(all_ids - selected) |                 match universe { | ||||||
|  |                     Some(universe) => Ok(universe - selected), | ||||||
|  |                     None => { | ||||||
|  |                         let all_ids = index.documents_ids(rtxn)?; | ||||||
|  |                         Ok(all_ids - selected) | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|             } |             } | ||||||
|             FilterCondition::In { fid, els } => { |             FilterCondition::In { fid, els } => { | ||||||
|                 if crate::is_faceted(fid.value(), filterable_fields) { |                 if crate::is_faceted(fid.value(), filterable_fields) { | ||||||
|                     let field_ids_map = index.fields_ids_map(rtxn)?; |                     let field_ids_map = index.fields_ids_map(rtxn)?; | ||||||
|  |  | ||||||
|                     if let Some(fid) = field_ids_map.id(fid.value()) { |                     if let Some(fid) = field_ids_map.id(fid.value()) { | ||||||
|                         let mut bitmap = RoaringBitmap::new(); |                         els.iter() | ||||||
|  |                             .map(|el| Condition::Equal(el.clone())) | ||||||
|                         for el in els { |                             .map(|op| Self::evaluate_operator(rtxn, index, fid, universe, &op)) | ||||||
|                             let op = Condition::Equal(el.clone()); |                             .union() | ||||||
|                             let el_bitmap = Self::evaluate_operator(rtxn, index, fid, &op)?; |  | ||||||
|                             bitmap |= el_bitmap; |  | ||||||
|                         } |  | ||||||
|                         Ok(bitmap) |  | ||||||
|                     } else { |                     } else { | ||||||
|                         Ok(RoaringBitmap::new()) |                         Ok(RoaringBitmap::new()) | ||||||
|                     } |                     } | ||||||
| @@ -371,7 +386,7 @@ impl<'a> Filter<'a> { | |||||||
|                 if crate::is_faceted(fid.value(), filterable_fields) { |                 if crate::is_faceted(fid.value(), filterable_fields) { | ||||||
|                     let field_ids_map = index.fields_ids_map(rtxn)?; |                     let field_ids_map = index.fields_ids_map(rtxn)?; | ||||||
|                     if let Some(fid) = field_ids_map.id(fid.value()) { |                     if let Some(fid) = field_ids_map.id(fid.value()) { | ||||||
|                         Self::evaluate_operator(rtxn, index, fid, op) |                         Self::evaluate_operator(rtxn, index, fid, universe, op) | ||||||
|                     } else { |                     } else { | ||||||
|                         Ok(RoaringBitmap::new()) |                         Ok(RoaringBitmap::new()) | ||||||
|                     } |                     } | ||||||
| @@ -382,14 +397,11 @@ impl<'a> Filter<'a> { | |||||||
|                     }))? |                     }))? | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|             FilterCondition::Or(subfilters) => { |             FilterCondition::Or(subfilters) => subfilters | ||||||
|                 let mut bitmap = RoaringBitmap::new(); |                 .iter() | ||||||
|                 for f in subfilters { |                 .cloned() | ||||||
|                     bitmap |= |                 .map(|f| Self::inner_evaluate(&f.into(), rtxn, index, filterable_fields, universe)) | ||||||
|                         Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields)?; |                 .union(), | ||||||
|                 } |  | ||||||
|                 Ok(bitmap) |  | ||||||
|             } |  | ||||||
|             FilterCondition::And(subfilters) => { |             FilterCondition::And(subfilters) => { | ||||||
|                 let mut subfilters_iter = subfilters.iter(); |                 let mut subfilters_iter = subfilters.iter(); | ||||||
|                 if let Some(first_subfilter) = subfilters_iter.next() { |                 if let Some(first_subfilter) = subfilters_iter.next() { | ||||||
| @@ -398,16 +410,21 @@ impl<'a> Filter<'a> { | |||||||
|                         rtxn, |                         rtxn, | ||||||
|                         index, |                         index, | ||||||
|                         filterable_fields, |                         filterable_fields, | ||||||
|  |                         universe, | ||||||
|                     )?; |                     )?; | ||||||
|                     for f in subfilters_iter { |                     for f in subfilters_iter { | ||||||
|                         if bitmap.is_empty() { |                         if bitmap.is_empty() { | ||||||
|                             return Ok(bitmap); |                             return Ok(bitmap); | ||||||
|                         } |                         } | ||||||
|  |                         // TODO We are doing the intersections two times, | ||||||
|  |                         //      it could be more efficient | ||||||
|  |                         //      Can't I just replace this `&=` by an `=`? | ||||||
|                         bitmap &= Self::inner_evaluate( |                         bitmap &= Self::inner_evaluate( | ||||||
|                             &(f.clone()).into(), |                             &(f.clone()).into(), | ||||||
|                             rtxn, |                             rtxn, | ||||||
|                             index, |                             index, | ||||||
|                             filterable_fields, |                             filterable_fields, | ||||||
|  |                             Some(&bitmap), | ||||||
|                         )?; |                         )?; | ||||||
|                     } |                     } | ||||||
|                     Ok(bitmap) |                     Ok(bitmap) | ||||||
| @@ -507,6 +524,7 @@ impl<'a> Filter<'a> { | |||||||
|                         rtxn, |                         rtxn, | ||||||
|                         index, |                         index, | ||||||
|                         filterable_fields, |                         filterable_fields, | ||||||
|  |                         universe, | ||||||
|                     )?; |                     )?; | ||||||
|  |  | ||||||
|                     let geo_lng_token = Token::new( |                     let geo_lng_token = Token::new( | ||||||
| @@ -539,6 +557,7 @@ impl<'a> Filter<'a> { | |||||||
|                             rtxn, |                             rtxn, | ||||||
|                             index, |                             index, | ||||||
|                             filterable_fields, |                             filterable_fields, | ||||||
|  |                             universe, | ||||||
|                         )?; |                         )?; | ||||||
|  |  | ||||||
|                         let condition_right = FilterCondition::Condition { |                         let condition_right = FilterCondition::Condition { | ||||||
| @@ -552,6 +571,7 @@ impl<'a> Filter<'a> { | |||||||
|                             rtxn, |                             rtxn, | ||||||
|                             index, |                             index, | ||||||
|                             filterable_fields, |                             filterable_fields, | ||||||
|  |                             universe, | ||||||
|                         )?; |                         )?; | ||||||
|  |  | ||||||
|                         left | right |                         left | right | ||||||
| @@ -567,6 +587,7 @@ impl<'a> Filter<'a> { | |||||||
|                             rtxn, |                             rtxn, | ||||||
|                             index, |                             index, | ||||||
|                             filterable_fields, |                             filterable_fields, | ||||||
|  |                             universe, | ||||||
|                         )? |                         )? | ||||||
|                     }; |                     }; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -7,7 +7,7 @@ use roaring::RoaringBitmap; | |||||||
| pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET}; | pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET}; | ||||||
| pub use self::filter::{BadGeoError, Filter}; | pub use self::filter::{BadGeoError, Filter}; | ||||||
| pub use self::search::{FacetValueHit, SearchForFacetValues}; | pub use self::search::{FacetValueHit, SearchForFacetValues}; | ||||||
| use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec}; | use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec}; | ||||||
| use crate::heed_codec::BytesRefCodec; | use crate::heed_codec::BytesRefCodec; | ||||||
| use crate::{Index, Result}; | use crate::{Index, Result}; | ||||||
|  |  | ||||||
| @@ -54,9 +54,9 @@ pub fn facet_max_value<'t>( | |||||||
| } | } | ||||||
|  |  | ||||||
| /// Get the first facet value in the facet database | /// Get the first facet value in the facet database | ||||||
| pub(crate) fn get_first_facet_value<'t, BoundCodec>( | pub(crate) fn get_first_facet_value<'t, BoundCodec, DC>( | ||||||
|     txn: &'t RoTxn, |     txn: &'t RoTxn, | ||||||
|     db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>, |     db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>, | ||||||
|     field_id: u16, |     field_id: u16, | ||||||
| ) -> heed::Result<Option<BoundCodec::DItem>> | ) -> heed::Result<Option<BoundCodec::DItem>> | ||||||
| where | where | ||||||
| @@ -78,9 +78,9 @@ where | |||||||
| } | } | ||||||
|  |  | ||||||
| /// Get the last facet value in the facet database | /// Get the last facet value in the facet database | ||||||
| pub(crate) fn get_last_facet_value<'t, BoundCodec>( | pub(crate) fn get_last_facet_value<'t, BoundCodec, DC>( | ||||||
|     txn: &'t RoTxn, |     txn: &'t RoTxn, | ||||||
|     db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>, |     db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>, | ||||||
|     field_id: u16, |     field_id: u16, | ||||||
| ) -> heed::Result<Option<BoundCodec::DItem>> | ) -> heed::Result<Option<BoundCodec::DItem>> | ||||||
| where | where | ||||||
| @@ -102,9 +102,9 @@ where | |||||||
| } | } | ||||||
|  |  | ||||||
| /// Get the height of the highest level in the facet database | /// Get the height of the highest level in the facet database | ||||||
| pub(crate) fn get_highest_level<'t>( | pub(crate) fn get_highest_level<'t, DC>( | ||||||
|     txn: &'t RoTxn<'t>, |     txn: &'t RoTxn<'t>, | ||||||
|     db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>, |     db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>, | ||||||
|     field_id: u16, |     field_id: u16, | ||||||
| ) -> heed::Result<u8> { | ) -> heed::Result<u8> { | ||||||
|     let field_id_prefix = &field_id.to_be_bytes(); |     let field_id_prefix = &field_id.to_be_bytes(); | ||||||
|   | |||||||
| @@ -548,6 +548,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>( | |||||||
|     Ok(()) |     Ok(()) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[tracing::instrument(level = "trace", skip_all, target = "search")] | ||||||
| pub fn filtered_universe( | pub fn filtered_universe( | ||||||
|     index: &Index, |     index: &Index, | ||||||
|     txn: &RoTxn<'_>, |     txn: &RoTxn<'_>, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user