Reintroduce filter range search and facet extractors

This commit is contained in:
Loïc Lecrenier
2022-08-31 08:27:16 +02:00
committed by Loïc Lecrenier
parent 22d80eeaf9
commit 39a4a0a362
5 changed files with 92 additions and 231 deletions

View File

@ -15,7 +15,7 @@ use super::get_last_facet_value;
pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
rtxn: &'t heed::RoTxn<'t>,
db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
db: &'t heed::Database<FacetKeyCodec<BoundCodec>, FacetGroupValueCodec>,
field_id: u16,
left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
@ -48,13 +48,13 @@ where
}
Bound::Unbounded => Bound::Unbounded,
};
let db = db.remap_key_type::<FacetKeyCodec<MyByteSlice>>();
let mut docids = RoaringBitmap::new();
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids: &mut docids };
let highest_level = get_highest_level(rtxn, db, field_id)?;
let mut f = FacetRangeSearch { rtxn, db: &db, field_id, left, right, docids: &mut docids };
let highest_level = get_highest_level(rtxn, &db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? {
let last_bound = get_last_facet_value::<MyByteSlice>(rtxn, db, field_id)?.unwrap();
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, &db, field_id)? {
let last_bound = get_last_facet_value::<MyByteSlice>(rtxn, &db, field_id)?.unwrap();
f.run(highest_level, first_bound, Bound::Included(last_bound), usize::MAX)?;
Ok(docids)
} else {

View File

@ -1,22 +1,17 @@
use std::collections::HashSet;
use std::fmt::{Debug, Display};
use std::ops::Bound::{self, Excluded, Included};
use std::ops::RangeBounds;
use either::Either;
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
use heed::types::DecodeIgnore;
use heed::LazyDecode;
use roaring::RoaringBitmap;
use std::collections::HashSet;
use std::fmt::{Debug, Display};
use std::ops::Bound::{self, Excluded, Included};
// use super::FacetNumberRange;
use crate::error::{Error, UserError};
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec};
// use crate::heed_codec::facet::FacetLevelValueF64Codec;
use crate::{
distance_between_two_points, lat_lng_to_xyz, CboRoaringBitmapCodec, FieldId, Index, Result,
};
use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result};
use super::facet_range_search;
/// The maximum number of filters the filter AST can process.
const MAX_FILTER_DEPTH: usize = 2000;
@ -147,158 +142,15 @@ impl<'a> Filter<'a> {
}
}
fn explore_facet_number_levels(
rtxn: &heed::RoTxn,
db: heed::Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
field_id: FieldId,
) {
}
impl<'a> Filter<'a> {
/// Aggregates the documents ids that are part of the specified range automatically
/// going deeper through the levels.
fn explore_facet_number_levels(
rtxn: &heed::RoTxn,
db: heed::Database<FacetKeyCodec<OrderedF64Codec>, CboRoaringBitmapCodec>,
field_id: FieldId,
level: u8,
left: Bound<f64>,
right: Bound<f64>,
output: &mut RoaringBitmap,
) -> Result<()> {
// level must be > 0, I'll create a separate function for level 0
// if level == 0 {
// call that function
//}
match (left, right) {
// If the request is an exact value we must go directly to the deepest level.
(Included(l), Included(r)) if l == r && level > 0 => {
return Self::explore_facet_number_levels(
rtxn, db, field_id, 0, left, right, output,
);
}
// lower TO upper when lower > upper must return no result
(Included(l), Included(r)) if l > r => return Ok(()),
(Included(l), Excluded(r)) if l >= r => return Ok(()),
(Excluded(l), Excluded(r)) if l >= r => return Ok(()),
(Excluded(l), Included(r)) if l >= r => return Ok(()),
(_, _) => (),
}
let range_start_key = FacetKey {
field_id,
level,
left_bound: match left {
Included(l) => l,
Excluded(l) => l,
Bound::Unbounded => f64::MIN,
},
};
let mut range_iter = db
.remap_data_type::<LazyDecode<FacetGroupValueCodec>>()
.range(rtxn, &(range_start_key..))?;
pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
let soft_deleted_documents = index.soft_deleted_documents_ids(rtxn)?;
let filterable_fields = index.filterable_fields(rtxn)?;
let (mut previous_facet_key, mut previous_value) = range_iter.next().unwrap()?;
while let Some(el) = range_iter.next() {
let (facet_key, value) = el?;
let range = (Included(previous_facet_key.left_bound), Excluded(facet_key.left_bound));
// if the current range intersects with the query range, then go deeper
// what does it mean for two ranges to intersect?
let gte_left = match left {
Included(l) => previous_facet_key.left_bound >= l,
Excluded(l) => previous_facet_key.left_bound > l, // TODO: not true?
Bound::Unbounded => true,
};
let lte_right = match right {
Included(r) => facet_key.left_bound <= r,
Excluded(r) => facet_key.left_bound < r,
Bound::Unbounded => true,
};
}
// at this point, previous_facet_key and previous_value are the last groups in the level
// we must also check whether we should visit this group
todo!();
// let mut left_found = None;
// let mut right_found = None;
// // We must create a custom iterator to be able to iterate over the
// // requested range as the range iterator cannot express some conditions.
// let iter = FacetNumberRange::new(rtxn, db, field_id, level, left, right)?;
// debug!("Iterating between {:?} and {:?} (level {})", left, right, level);
// for (i, result) in iter.enumerate() {
// let ((_fid, level, l, r), docids) = result?;
// debug!("{:?} to {:?} (level {}) found {} documents", l, r, level, docids.len());
// *output |= docids;
// // We save the leftest and rightest bounds we actually found at this level.
// if i == 0 {
// left_found = Some(l);
// }
// right_found = Some(r);
// }
// // Can we go deeper?
// let deeper_level = match level.checked_sub(1) {
// Some(level) => level,
// None => return Ok(()),
// };
// // We must refine the left and right bounds of this range by retrieving the
// // missing part in a deeper level.
// match left_found.zip(right_found) {
// Some((left_found, right_found)) => {
// // If the bound is satisfied we avoid calling this function again.
// if !matches!(left, Included(l) if l == left_found) {
// let sub_right = Excluded(left_found);
// debug!(
// "calling left with {:?} to {:?} (level {})",
// left, sub_right, deeper_level
// );
// Self::explore_facet_number_levels(
// rtxn,
// db,
// field_id,
// deeper_level,
// left,
// sub_right,
// output,
// )?;
// }
// if !matches!(right, Included(r) if r == right_found) {
// let sub_left = Excluded(right_found);
// debug!(
// "calling right with {:?} to {:?} (level {})",
// sub_left, right, deeper_level
// );
// Self::explore_facet_number_levels(
// rtxn,
// db,
// field_id,
// deeper_level,
// sub_left,
// right,
// output,
// )?;
// }
// }
// None => {
// // If we found nothing at this level it means that we must find
// // the same bounds but at a deeper, more precise level.
// Self::explore_facet_number_levels(
// rtxn,
// db,
// field_id,
// deeper_level,
// left,
// right,
// output,
// )?;
// }
// }
// Ok(())
// and finally we delete all the soft_deleted_documents, again, only once at the very end
self.inner_evaluate(rtxn, index, &filterable_fields)
.map(|result| result - soft_deleted_documents)
}
fn evaluate_operator(
@ -337,15 +189,15 @@ impl<'a> Filter<'a> {
Some(n) => {
let n = Included(n);
let mut output = RoaringBitmap::new();
// Self::explore_facet_number_levels(
// rtxn,
// numbers_db,
// field_id,
// 0,
// n,
// n,
// &mut output,
// )?;
Self::explore_facet_number_levels(
rtxn,
numbers_db,
field_id,
0,
n,
n,
&mut output,
)?;
output
}
None => RoaringBitmap::new(),
@ -381,29 +233,53 @@ impl<'a> Filter<'a> {
match biggest_level {
Some(level) => {
let mut output = RoaringBitmap::new();
// Self::explore_facet_number_levels(
// rtxn,
// numbers_db,
// field_id,
// level,
// left,
// right,
// &mut output,
// )?;
Self::explore_facet_number_levels(
rtxn,
numbers_db,
field_id,
level,
left,
right,
&mut output,
)?;
Ok(output)
}
None => Ok(RoaringBitmap::new()),
}
}
pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
let soft_deleted_documents = index.soft_deleted_documents_ids(rtxn)?;
let filterable_fields = index.filterable_fields(rtxn)?;
/// Aggregates the documents ids that are part of the specified range automatically
/// going deeper through the levels.
fn explore_facet_number_levels(
rtxn: &heed::RoTxn,
db: heed::Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
field_id: FieldId,
level: u8,
left: Bound<f64>,
right: Bound<f64>,
output: &mut RoaringBitmap,
) -> Result<()> {
match (left, right) {
// If the request is an exact value we must go directly to the deepest level.
(Included(l), Included(r)) if l == r && level > 0 => {
return Self::explore_facet_number_levels(
rtxn, db, field_id, 0, left, right, output,
);
}
// lower TO upper when lower > upper must return no result
(Included(l), Included(r)) if l > r => return Ok(()),
(Included(l), Excluded(r)) if l >= r => return Ok(()),
(Excluded(l), Excluded(r)) if l >= r => return Ok(()),
(Excluded(l), Included(r)) if l >= r => return Ok(()),
(_, _) => (),
}
let x = facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>(
rtxn, &db, field_id, &left, &right,
)?;
// TODO: the facet range search should take a mutable roaring bitmap as argument
*output = x;
// and finally we delete all the soft_deleted_documents, again, only once at the very end
self.inner_evaluate(rtxn, index, &filterable_fields)
.map(|result| result - soft_deleted_documents)
Ok(())
}
fn inner_evaluate(