mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-24 20:46:27 +00:00 
			
		
		
		
	Make the facet operator evaluation code generic
This commit is contained in:
		
							
								
								
									
										326
									
								
								src/search.rs
									
									
									
									
									
								
							
							
						
						
									
										326
									
								
								src/search.rs
									
									
									
									
									
								
							| @@ -7,7 +7,7 @@ use std::str::FromStr; | |||||||
|  |  | ||||||
| use anyhow::{bail, ensure, Context}; | use anyhow::{bail, ensure, Context}; | ||||||
| use fst::{IntoStreamer, Streamer}; | use fst::{IntoStreamer, Streamer}; | ||||||
| use heed::types::DecodeIgnore; | use heed::types::{ByteSlice, DecodeIgnore}; | ||||||
| use levenshtein_automata::DFA; | use levenshtein_automata::DFA; | ||||||
| use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder; | use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder; | ||||||
| use log::debug; | use log::debug; | ||||||
| @@ -17,6 +17,7 @@ use roaring::bitmap::RoaringBitmap; | |||||||
|  |  | ||||||
| use crate::facet::FacetType; | use crate::facet::FacetType; | ||||||
| use crate::heed_codec::facet::{FacetLevelValueI64Codec, FacetLevelValueF64Codec}; | use crate::heed_codec::facet::{FacetLevelValueI64Codec, FacetLevelValueF64Codec}; | ||||||
|  | use crate::heed_codec::CboRoaringBitmapCodec; | ||||||
| use crate::mdfs::Mdfs; | use crate::mdfs::Mdfs; | ||||||
| use crate::query_tokens::{QueryTokens, QueryToken}; | use crate::query_tokens::{QueryTokens, QueryToken}; | ||||||
| use crate::{Index, DocumentId}; | use crate::{Index, DocumentId}; | ||||||
| @@ -80,6 +81,7 @@ impl FacetCondition { | |||||||
|     where T::Err: Send + Sync + StdError + 'static, |     where T::Err: Send + Sync + StdError + 'static, | ||||||
|     { |     { | ||||||
|         use FacetOperator::*; |         use FacetOperator::*; | ||||||
|  |  | ||||||
|         match iter.next() { |         match iter.next() { | ||||||
|             Some(">") => { |             Some(">") => { | ||||||
|                 let param = iter.next().context("missing parameter")?; |                 let param = iter.next().context("missing parameter")?; | ||||||
| @@ -117,6 +119,161 @@ impl FacetCondition { | |||||||
|             None => bail!("missing facet filter first parameter"), |             None => bail!("missing facet filter first parameter"), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /// Aggregates the documents ids that are part of the specified range automatically | ||||||
|  |     /// going deeper through the levels. | ||||||
|  |     fn explore_facet_levels<'t, T: 't, KC>( | ||||||
|  |         rtxn: &'t heed::RoTxn, | ||||||
|  |         db: heed::Database<ByteSlice, CboRoaringBitmapCodec>, | ||||||
|  |         field_id: u8, | ||||||
|  |         level: u8, | ||||||
|  |         left: Bound<T>, | ||||||
|  |         right: Bound<T>, | ||||||
|  |         output: &mut RoaringBitmap, | ||||||
|  |     ) -> anyhow::Result<()> | ||||||
|  |     where | ||||||
|  |         T: Copy + PartialEq + PartialOrd + Bounded + Debug, | ||||||
|  |         KC: heed::BytesDecode<'t, DItem = (u8, u8, T, T)>, | ||||||
|  |         KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>, | ||||||
|  |     { | ||||||
|  |         match (left, right) { | ||||||
|  |             // If the request is an exact value we must go directly to the deepest level. | ||||||
|  |             (Included(l), Included(r)) if l == r && level > 0 => { | ||||||
|  |                 return Self::explore_facet_levels::<T, KC>(rtxn, db, field_id, 0, left, right, output); | ||||||
|  |             }, | ||||||
|  |             // lower TO upper when lower > upper must return no result | ||||||
|  |             (Included(l), Included(r)) if l > r => return Ok(()), | ||||||
|  |             (Included(l), Excluded(r)) if l >= r => return Ok(()), | ||||||
|  |             (Excluded(l), Excluded(r)) if l >= r => return Ok(()), | ||||||
|  |             (Excluded(l), Included(r)) if l >= r => return Ok(()), | ||||||
|  |             (_, _) => (), | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         let mut left_found = None; | ||||||
|  |         let mut right_found = None; | ||||||
|  |  | ||||||
|  |         // We must create a custom iterator to be able to iterate over the | ||||||
|  |         // requested range as the range iterator cannot express some conditions. | ||||||
|  |         let left_bound = match left { | ||||||
|  |             Included(left) => Included((field_id, level, left, T::min_value())), | ||||||
|  |             Excluded(left) => Excluded((field_id, level, left, T::min_value())), | ||||||
|  |             Unbounded => Unbounded, | ||||||
|  |         }; | ||||||
|  |         let right_bound = Included((field_id, level, T::max_value(), T::max_value())); | ||||||
|  |         // We also make sure that we don't decode the data before we are sure we must return it. | ||||||
|  |         let iter = db | ||||||
|  |             .remap_key_type::<KC>() | ||||||
|  |             .lazily_decode_data() | ||||||
|  |             .range(rtxn, &(left_bound, right_bound))? | ||||||
|  |             .take_while(|r| r.as_ref().map_or(true, |((.., r), _)| { | ||||||
|  |                 match right { | ||||||
|  |                     Included(right) => *r <= right, | ||||||
|  |                     Excluded(right) => *r < right, | ||||||
|  |                     Unbounded => true, | ||||||
|  |                 } | ||||||
|  |             })) | ||||||
|  |             .map(|r| r.and_then(|(key, lazy)| lazy.decode().map(|data| (key, data)))); | ||||||
|  |  | ||||||
|  |         debug!("Iterating between {:?} and {:?} (level {})", left, right, level); | ||||||
|  |  | ||||||
|  |         for (i, result) in iter.enumerate() { | ||||||
|  |             let ((_fid, level, l, r), docids) = result?; | ||||||
|  |             debug!("{:?} to {:?} (level {}) found {} documents", l, r, level, docids.len()); | ||||||
|  |             output.union_with(&docids); | ||||||
|  |             // We save the leftest and rightest bounds we actually found at this level. | ||||||
|  |             if i == 0 { left_found = Some(l); } | ||||||
|  |             right_found = Some(r); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // Can we go deeper? | ||||||
|  |         let deeper_level = match level.checked_sub(1) { | ||||||
|  |             Some(level) => level, | ||||||
|  |             None => return Ok(()), | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         // We must refine the left and right bounds of this range by retrieving the | ||||||
|  |         // missing part in a deeper level. | ||||||
|  |         match left_found.zip(right_found) { | ||||||
|  |             Some((left_found, right_found)) => { | ||||||
|  |                 // If the bound is satisfied we avoid calling this function again. | ||||||
|  |                 if !matches!(left, Included(l) if l == left_found) { | ||||||
|  |                     let sub_right = Excluded(left_found); | ||||||
|  |                     debug!("calling left with {:?} to {:?} (level {})",  left, sub_right, deeper_level); | ||||||
|  |                     Self::explore_facet_levels::<T, KC>(rtxn, db, field_id, deeper_level, left, sub_right, output)?; | ||||||
|  |                 } | ||||||
|  |                 if !matches!(right, Included(r) if r == right_found) { | ||||||
|  |                     let sub_left = Excluded(right_found); | ||||||
|  |                     debug!("calling right with {:?} to {:?} (level {})", sub_left, right, deeper_level); | ||||||
|  |                     Self::explore_facet_levels::<T, KC>(rtxn, db, field_id, deeper_level, sub_left, right, output)?; | ||||||
|  |                 } | ||||||
|  |             }, | ||||||
|  |             None => { | ||||||
|  |                 // If we found nothing at this level it means that we must find | ||||||
|  |                 // the same bounds but at a deeper, more precise level. | ||||||
|  |                 Self::explore_facet_levels::<T, KC>(rtxn, db, field_id, deeper_level, left, right, output)?; | ||||||
|  |             }, | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn evaluate_operator<'t, T: 't, KC>( | ||||||
|  |         rtxn: &'t heed::RoTxn, | ||||||
|  |         db: heed::Database<ByteSlice, CboRoaringBitmapCodec>, | ||||||
|  |         field_id: u8, | ||||||
|  |         operator: FacetOperator<T>, | ||||||
|  |     ) -> anyhow::Result<RoaringBitmap> | ||||||
|  |     where | ||||||
|  |         T: Copy + PartialEq + PartialOrd + Bounded + Debug, | ||||||
|  |         KC: heed::BytesDecode<'t, DItem = (u8, u8, T, T)>, | ||||||
|  |         KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>, | ||||||
|  |     { | ||||||
|  |         use FacetOperator::*; | ||||||
|  |  | ||||||
|  |         // Make sure we always bound the ranges with the field id and the level, | ||||||
|  |         // as the facets values are all in the same database and prefixed by the | ||||||
|  |         // field id and the level. | ||||||
|  |         let (left, right) = match operator { | ||||||
|  |             GreaterThan(val)        => (Excluded(val),            Included(T::max_value())), | ||||||
|  |             GreaterThanOrEqual(val) => (Included(val),            Included(T::max_value())), | ||||||
|  |             LowerThan(val)          => (Included(T::min_value()), Excluded(val)), | ||||||
|  |             LowerThanOrEqual(val)   => (Included(T::min_value()), Included(val)), | ||||||
|  |             Equal(val)              => (Included(val),            Included(val)), | ||||||
|  |             Between(left, right)    => (Included(left),           Included(right)), | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         // Ask for the biggest value that can exist for this specific field, if it exists | ||||||
|  |         // that's fine if it don't, the value just before will be returned instead. | ||||||
|  |         let biggest_level = db | ||||||
|  |             .remap_types::<KC, DecodeIgnore>() | ||||||
|  |             .get_lower_than_or_equal_to(rtxn, &(field_id, u8::MAX, T::max_value(), T::max_value()))? | ||||||
|  |             .and_then(|((id, level, _, _), _)| if id == field_id { Some(level) } else { None }); | ||||||
|  |  | ||||||
|  |         match biggest_level { | ||||||
|  |             Some(level) => { | ||||||
|  |                 let mut output = RoaringBitmap::new(); | ||||||
|  |                 Self::explore_facet_levels::<T, KC>(rtxn, db, field_id, level, left, right, &mut output)?; | ||||||
|  |                 Ok(output) | ||||||
|  |             }, | ||||||
|  |             None => Ok(RoaringBitmap::new()), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn evaluate( | ||||||
|  |         &self, | ||||||
|  |         rtxn: &heed::RoTxn, | ||||||
|  |         db: heed::Database<ByteSlice, CboRoaringBitmapCodec>, | ||||||
|  |     ) -> anyhow::Result<RoaringBitmap> | ||||||
|  |     { | ||||||
|  |         match *self { | ||||||
|  |             FacetCondition::OperatorI64(fid, operator) => { | ||||||
|  |                 Self::evaluate_operator::<i64, FacetLevelValueI64Codec>(rtxn, db, fid, operator) | ||||||
|  |             }, | ||||||
|  |             FacetCondition::OperatorF64(fid, operator) => { | ||||||
|  |                 Self::evaluate_operator::<f64, FacetLevelValueF64Codec>(rtxn, db, fid, operator) | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| pub struct Search<'a> { | pub struct Search<'a> { | ||||||
| @@ -241,103 +398,6 @@ impl<'a> Search<'a> { | |||||||
|         candidates |         candidates | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /// Aggregates the documents ids that are part of the specified range automatically |  | ||||||
|     /// going deeper through the levels. |  | ||||||
|     fn explore_facet_levels<T: 'a, KC>( |  | ||||||
|         &self, |  | ||||||
|         field_id: u8, |  | ||||||
|         level: u8, |  | ||||||
|         left: Bound<T>, |  | ||||||
|         right: Bound<T>, |  | ||||||
|         output: &mut RoaringBitmap, |  | ||||||
|     ) -> anyhow::Result<()> |  | ||||||
|     where |  | ||||||
|         T: Copy + PartialEq + PartialOrd + Bounded + Debug, |  | ||||||
|         KC: heed::BytesDecode<'a, DItem = (u8, u8, T, T)>, |  | ||||||
|         KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>, |  | ||||||
|     { |  | ||||||
|         match (left, right) { |  | ||||||
|             // If the request is an exact value we must go directly to the deepest level. |  | ||||||
|             (Included(l), Included(r)) if l == r && level > 0 => { |  | ||||||
|                 return self.explore_facet_levels::<T, KC>(field_id, 0, left, right, output); |  | ||||||
|             }, |  | ||||||
|             // lower TO upper when lower > upper must return no result |  | ||||||
|             (Included(l), Included(r)) if l > r => return Ok(()), |  | ||||||
|             (Included(l), Excluded(r)) if l >= r => return Ok(()), |  | ||||||
|             (Excluded(l), Excluded(r)) if l >= r => return Ok(()), |  | ||||||
|             (Excluded(l), Included(r)) if l >= r => return Ok(()), |  | ||||||
|             (_, _) => (), |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         let mut left_found = None; |  | ||||||
|         let mut right_found = None; |  | ||||||
|  |  | ||||||
|         // We must create a custom iterator to be able to iterate over the |  | ||||||
|         // requested range as the range iterator cannot express some conditions. |  | ||||||
|         let left_bound = match left { |  | ||||||
|             Included(left) => Included((field_id, level, left, T::min_value())), |  | ||||||
|             Excluded(left) => Excluded((field_id, level, left, T::min_value())), |  | ||||||
|             Unbounded => Unbounded, |  | ||||||
|         }; |  | ||||||
|         let right_bound = Included((field_id, level, T::max_value(), T::max_value())); |  | ||||||
|         // We also make sure that we don't decode the data before we are sure we must return it. |  | ||||||
|         let iter = self.index |  | ||||||
|             .facet_field_id_value_docids |  | ||||||
|             .remap_key_type::<KC>() |  | ||||||
|             .lazily_decode_data() |  | ||||||
|             .range(self.rtxn, &(left_bound, right_bound))? |  | ||||||
|             .take_while(|r| r.as_ref().map_or(true, |((.., r), _)| { |  | ||||||
|                 match right { |  | ||||||
|                     Included(right) => *r <= right, |  | ||||||
|                     Excluded(right) => *r < right, |  | ||||||
|                     Unbounded => true, |  | ||||||
|                 } |  | ||||||
|             })) |  | ||||||
|             .map(|r| r.and_then(|(key, lazy)| lazy.decode().map(|data| (key, data)))); |  | ||||||
|  |  | ||||||
|         debug!("Iterating between {:?} and {:?} (level {})", left, right, level); |  | ||||||
|  |  | ||||||
|         for (i, result) in iter.enumerate() { |  | ||||||
|             let ((_fid, level, l, r), docids) = result?; |  | ||||||
|             debug!("{:?} to {:?} (level {}) found {} documents", l, r, level, docids.len()); |  | ||||||
|             output.union_with(&docids); |  | ||||||
|             // We save the leftest and rightest bounds we actually found at this level. |  | ||||||
|             if i == 0 { left_found = Some(l); } |  | ||||||
|             right_found = Some(r); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Can we go deeper? |  | ||||||
|         let deeper_level = match level.checked_sub(1) { |  | ||||||
|             Some(level) => level, |  | ||||||
|             None => return Ok(()), |  | ||||||
|         }; |  | ||||||
|  |  | ||||||
|         // We must refine the left and right bounds of this range by retrieving the |  | ||||||
|         // missing part in a deeper level. |  | ||||||
|         match left_found.zip(right_found) { |  | ||||||
|             Some((left_found, right_found)) => { |  | ||||||
|                 // If the bound is satisfied we avoid calling this function again. |  | ||||||
|                 if !matches!(left, Included(l) if l == left_found) { |  | ||||||
|                     let sub_right = Excluded(left_found); |  | ||||||
|                     debug!("calling left with {:?} to {:?} (level {})",  left, sub_right, deeper_level); |  | ||||||
|                     self.explore_facet_levels::<T, KC>(field_id, deeper_level, left, sub_right, output)?; |  | ||||||
|                 } |  | ||||||
|                 if !matches!(right, Included(r) if r == right_found) { |  | ||||||
|                     let sub_left = Excluded(right_found); |  | ||||||
|                     debug!("calling right with {:?} to {:?} (level {})", sub_left, right, deeper_level); |  | ||||||
|                     self.explore_facet_levels::<T, KC>(field_id, deeper_level, sub_left, right, output)?; |  | ||||||
|                 } |  | ||||||
|             }, |  | ||||||
|             None => { |  | ||||||
|                 // If we found nothing at this level it means that we must find |  | ||||||
|                 // the same bounds but at a deeper, more precise level. |  | ||||||
|                 self.explore_facet_levels::<T, KC>(field_id, deeper_level, left, right, output)?; |  | ||||||
|             }, |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         Ok(()) |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     pub fn execute(&self) -> anyhow::Result<SearchResult> { |     pub fn execute(&self) -> anyhow::Result<SearchResult> { | ||||||
|         let limit = self.limit; |         let limit = self.limit; | ||||||
|         let fst = self.index.words_fst(self.rtxn)?; |         let fst = self.index.words_fst(self.rtxn)?; | ||||||
| @@ -349,75 +409,9 @@ impl<'a> Search<'a> { | |||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         // We create the original candidates with the facet conditions results. |         // We create the original candidates with the facet conditions results. | ||||||
|         use FacetOperator::*; |         let facet_db = self.index.facet_field_id_value_docids; | ||||||
|         let facet_candidates = match self.facet_condition { |         let facet_candidates = match self.facet_condition { | ||||||
|             // TODO make that generic over floats and integers. |             Some(condition) => Some(condition.evaluate(self.rtxn, facet_db)?), | ||||||
|             Some(FacetCondition::OperatorI64(fid, operator)) => { |  | ||||||
|                 // Make sure we always bound the ranges with the field id and the level, |  | ||||||
|                 // as the facets values are all in the same database and prefixed by the |  | ||||||
|                 // field id and the level. |  | ||||||
|                 let (left, right) = match operator { |  | ||||||
|                     GreaterThan(val)        => (Excluded(val),      Included(i64::MAX)), |  | ||||||
|                     GreaterThanOrEqual(val) => (Included(val),      Included(i64::MAX)), |  | ||||||
|                     LowerThan(val)          => (Included(i64::MIN), Excluded(val)), |  | ||||||
|                     LowerThanOrEqual(val)   => (Included(i64::MIN), Included(val)), |  | ||||||
|                     Equal(val)              => (Included(val),      Included(val)), |  | ||||||
|                     Between(left, right)    => (Included(left),     Included(right)), |  | ||||||
|                 }; |  | ||||||
|  |  | ||||||
|                 let db = self.index |  | ||||||
|                     .facet_field_id_value_docids |  | ||||||
|                     .remap_key_type::<FacetLevelValueI64Codec>(); |  | ||||||
|  |  | ||||||
|                 // Ask for the biggest value that can exist for this specific field, if it exists |  | ||||||
|                 // that's fine if it don't, the value just before will be returned instead. |  | ||||||
|                 let biggest_level = db |  | ||||||
|                     .remap_data_type::<DecodeIgnore>() |  | ||||||
|                     .get_lower_than_or_equal_to(self.rtxn, &(fid, u8::MAX, i64::MAX, i64::MAX))? |  | ||||||
|                     .and_then(|((id, level, _, _), _)| if id == fid { Some(level) } else { None }); |  | ||||||
|  |  | ||||||
|                 match biggest_level { |  | ||||||
|                     Some(level) => { |  | ||||||
|                         let mut output = RoaringBitmap::new(); |  | ||||||
|                         self.explore_facet_levels::<i64, FacetLevelValueI64Codec>(fid, level, left, right, &mut output)?; |  | ||||||
|                         Some(output) |  | ||||||
|                     }, |  | ||||||
|                     None => None, |  | ||||||
|                 } |  | ||||||
|             }, |  | ||||||
|             Some(FacetCondition::OperatorF64(fid, operator)) => { |  | ||||||
|                 // Make sure we always bound the ranges with the field id and the level, |  | ||||||
|                 // as the facets values are all in the same database and prefixed by the |  | ||||||
|                 // field id and the level. |  | ||||||
|                 let (left, right) = match operator { |  | ||||||
|                     GreaterThan(val)        => (Excluded(val),      Included(f64::MAX)), |  | ||||||
|                     GreaterThanOrEqual(val) => (Included(val),      Included(f64::MAX)), |  | ||||||
|                     LowerThan(val)          => (Included(f64::MIN), Excluded(val)), |  | ||||||
|                     LowerThanOrEqual(val)   => (Included(f64::MIN), Included(val)), |  | ||||||
|                     Equal(val)              => (Included(val),      Included(val)), |  | ||||||
|                     Between(left, right)    => (Included(left),     Included(right)), |  | ||||||
|                 }; |  | ||||||
|  |  | ||||||
|                 let db = self.index |  | ||||||
|                     .facet_field_id_value_docids |  | ||||||
|                     .remap_key_type::<FacetLevelValueF64Codec>(); |  | ||||||
|  |  | ||||||
|                 // Ask for the biggest value that can exist for this specific field, if it exists |  | ||||||
|                 // that's fine if it don't, the value just before will be returned instead. |  | ||||||
|                 let biggest_level = db |  | ||||||
|                     .remap_data_type::<DecodeIgnore>() |  | ||||||
|                     .get_lower_than_or_equal_to(self.rtxn, &(fid, u8::MAX, f64::MAX, f64::MAX))? |  | ||||||
|                     .and_then(|((id, level, _, _), _)| if id == fid { Some(level) } else { None }); |  | ||||||
|  |  | ||||||
|                 match biggest_level { |  | ||||||
|                     Some(level) => { |  | ||||||
|                         let mut output = RoaringBitmap::new(); |  | ||||||
|                         self.explore_facet_levels::<f64, FacetLevelValueF64Codec>(fid, level, left, right, &mut output)?; |  | ||||||
|                         Some(output) |  | ||||||
|                     }, |  | ||||||
|                     None => None, |  | ||||||
|                 } |  | ||||||
|             }, |  | ||||||
|             None => None, |             None => None, | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user