Choose implementation strategy of criterion at runtime

This commit is contained in:
Loïc Lecrenier
2022-12-12 16:54:31 +01:00
parent 97fb64e40e
commit 229405aeb9
7 changed files with 156 additions and 50 deletions

View File

@ -12,6 +12,7 @@ use crate::heed_codec::ByteSliceRefCodec;
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
use crate::search::query_tree::Operation;
use crate::search::CriterionImplementationStrategy;
use crate::{FieldId, Index, Result};
/// Threshold on the number of candidates that will make
@ -29,6 +30,7 @@ pub struct AscDesc<'t> {
allowed_candidates: RoaringBitmap,
initial_candidates: InitialCandidates,
faceted_candidates: RoaringBitmap,
implementation_strategy: CriterionImplementationStrategy,
parent: Box<dyn Criterion + 't>,
}
@ -38,8 +40,9 @@ impl<'t> AscDesc<'t> {
rtxn: &'t heed::RoTxn,
parent: Box<dyn Criterion + 't>,
field_name: String,
implementation_strategy: CriterionImplementationStrategy,
) -> Result<Self> {
Self::new(index, rtxn, parent, field_name, true)
Self::new(index, rtxn, parent, field_name, true, implementation_strategy)
}
pub fn desc(
@ -47,8 +50,9 @@ impl<'t> AscDesc<'t> {
rtxn: &'t heed::RoTxn,
parent: Box<dyn Criterion + 't>,
field_name: String,
implementation_strategy: CriterionImplementationStrategy,
) -> Result<Self> {
Self::new(index, rtxn, parent, field_name, false)
Self::new(index, rtxn, parent, field_name, false, implementation_strategy)
}
fn new(
@ -57,6 +61,7 @@ impl<'t> AscDesc<'t> {
parent: Box<dyn Criterion + 't>,
field_name: String,
is_ascending: bool,
implementation_strategy: CriterionImplementationStrategy,
) -> Result<Self> {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let field_id = fields_ids_map.id(&field_name);
@ -82,6 +87,7 @@ impl<'t> AscDesc<'t> {
allowed_candidates: RoaringBitmap::new(),
faceted_candidates,
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
implementation_strategy,
parent,
})
}
@ -149,6 +155,7 @@ impl<'t> Criterion for AscDesc<'t> {
field_id,
self.is_ascending,
candidates & &self.faceted_candidates,
self.implementation_strategy,
)?,
None => Box::new(std::iter::empty()),
};
@ -170,6 +177,51 @@ impl<'t> Criterion for AscDesc<'t> {
}
}
fn facet_ordered_iterative<'t>(
index: &'t Index,
rtxn: &'t heed::RoTxn,
field_id: FieldId,
is_ascending: bool,
candidates: RoaringBitmap,
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
let number_iter = iterative_facet_number_ordered_iter(
index,
rtxn,
field_id,
is_ascending,
candidates.clone(),
)?;
let string_iter =
iterative_facet_string_ordered_iter(index, rtxn, field_id, is_ascending, candidates)?;
Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box<dyn Iterator<Item = _>>)
}
fn facet_ordered_set_based<'t>(
index: &'t Index,
rtxn: &'t heed::RoTxn,
field_id: FieldId,
is_ascending: bool,
candidates: RoaringBitmap,
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
let make_iter = if is_ascending { ascending_facet_sort } else { descending_facet_sort };
let number_iter = make_iter(
rtxn,
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id,
candidates.clone(),
)?;
let string_iter = make_iter(
rtxn,
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id,
candidates,
)?;
Ok(Box::new(number_iter.chain(string_iter)))
}
/// Returns an iterator over groups of the given candidates in ascending or descending order.
///
/// It will either use an iterative or a recursive method on the whole facet database depending
@ -180,36 +232,22 @@ fn facet_ordered<'t>(
field_id: FieldId,
is_ascending: bool,
candidates: RoaringBitmap,
implementation_strategy: CriterionImplementationStrategy,
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
if candidates.len() <= CANDIDATES_THRESHOLD {
let number_iter = iterative_facet_number_ordered_iter(
index,
rtxn,
field_id,
is_ascending,
candidates.clone(),
)?;
let string_iter =
iterative_facet_string_ordered_iter(index, rtxn, field_id, is_ascending, candidates)?;
Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box<dyn Iterator<Item = _>>)
} else {
let make_iter = if is_ascending { ascending_facet_sort } else { descending_facet_sort };
let number_iter = make_iter(
rtxn,
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id,
candidates.clone(),
)?;
let string_iter = make_iter(
rtxn,
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id,
candidates,
)?;
Ok(Box::new(number_iter.chain(string_iter)))
match implementation_strategy {
CriterionImplementationStrategy::OnlyIterative => {
facet_ordered_iterative(index, rtxn, field_id, is_ascending, candidates)
}
CriterionImplementationStrategy::OnlySetBased => {
facet_ordered_set_based(index, rtxn, field_id, is_ascending, candidates)
}
CriterionImplementationStrategy::Dynamic => {
if candidates.len() <= CANDIDATES_THRESHOLD {
facet_ordered_iterative(index, rtxn, field_id, is_ascending, candidates)
} else {
facet_ordered_set_based(index, rtxn, field_id, is_ascending, candidates)
}
}
}
}

View File

@ -9,7 +9,9 @@ use roaring::RoaringBitmap;
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
use crate::search::criteria::{InitialCandidates, Query};
use crate::search::query_tree::{Operation, QueryKind};
use crate::search::{build_dfa, word_derivations, WordDerivationsCache};
use crate::search::{
build_dfa, word_derivations, CriterionImplementationStrategy, WordDerivationsCache,
};
use crate::Result;
/// To be able to divide integers by the number of words in the query
@ -30,10 +32,15 @@ pub struct Attribute<'t> {
parent: Box<dyn Criterion + 't>,
linear_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>,
set_buckets: Option<BinaryHeap<Branch<'t>>>,
implementation_strategy: CriterionImplementationStrategy,
}
impl<'t> Attribute<'t> {
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Self {
pub fn new(
ctx: &'t dyn Context<'t>,
parent: Box<dyn Criterion + 't>,
implementation_strategy: CriterionImplementationStrategy,
) -> Self {
Attribute {
ctx,
state: None,
@ -41,6 +48,7 @@ impl<'t> Attribute<'t> {
parent,
linear_buckets: None,
set_buckets: None,
implementation_strategy,
}
}
}
@ -64,7 +72,15 @@ impl<'t> Criterion for Attribute<'t> {
}));
}
Some((query_tree, flattened_query_tree, mut allowed_candidates)) => {
let found_candidates = if allowed_candidates.len() < CANDIDATES_THRESHOLD {
let found_candidates = if matches!(
self.implementation_strategy,
CriterionImplementationStrategy::OnlyIterative
) || (matches!(
self.implementation_strategy,
CriterionImplementationStrategy::Dynamic
) && allowed_candidates.len()
< CANDIDATES_THRESHOLD)
{
let linear_buckets = match self.linear_buckets.as_mut() {
Some(linear_buckets) => linear_buckets,
None => {

View File

@ -14,6 +14,7 @@ use self::r#final::Final;
use self::typo::Typo;
use self::words::Words;
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
use super::CriterionImplementationStrategy;
use crate::search::criteria::geo::Geo;
use crate::search::{word_derivations, Distinct, WordDerivationsCache};
use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result};
@ -377,6 +378,7 @@ impl<'t> CriteriaBuilder<'t> {
sort_criteria: Option<Vec<AscDescName>>,
exhaustive_number_hits: bool,
distinct: Option<D>,
implementation_strategy: CriterionImplementationStrategy,
) -> Result<Final<'t>> {
use crate::criterion::Criterion as Name;
@ -402,12 +404,14 @@ impl<'t> CriteriaBuilder<'t> {
self.rtxn,
criterion,
field.to_string(),
implementation_strategy,
)?),
AscDescName::Desc(Member::Field(field)) => Box::new(AscDesc::desc(
self.index,
self.rtxn,
criterion,
field.to_string(),
implementation_strategy,
)?),
AscDescName::Asc(Member::Geo(point)) => {
Box::new(Geo::asc(self.index, self.rtxn, criterion, *point)?)
@ -421,15 +425,27 @@ impl<'t> CriteriaBuilder<'t> {
}
None => criterion,
},
Name::Proximity => Box::new(Proximity::new(self, criterion)),
Name::Attribute => Box::new(Attribute::new(self, criterion)),
Name::Proximity => {
Box::new(Proximity::new(self, criterion, implementation_strategy))
}
Name::Attribute => {
Box::new(Attribute::new(self, criterion, implementation_strategy))
}
Name::Exactness => Box::new(Exactness::new(self, criterion, &primitive_query)?),
Name::Asc(field) => {
Box::new(AscDesc::asc(self.index, self.rtxn, criterion, field)?)
}
Name::Desc(field) => {
Box::new(AscDesc::desc(self.index, self.rtxn, criterion, field)?)
}
Name::Asc(field) => Box::new(AscDesc::asc(
self.index,
self.rtxn,
criterion,
field,
implementation_strategy,
)?),
Name::Desc(field) => Box::new(AscDesc::desc(
self.index,
self.rtxn,
criterion,
field,
implementation_strategy,
)?),
};
}

View File

@ -11,7 +11,7 @@ use super::{
};
use crate::search::criteria::InitialCandidates;
use crate::search::query_tree::{maximum_proximity, Operation, Query, QueryKind};
use crate::search::{build_dfa, WordDerivationsCache};
use crate::search::{build_dfa, CriterionImplementationStrategy, WordDerivationsCache};
use crate::{Position, Result};
type Cache = HashMap<(Operation, u8), Vec<(Query, Query, RoaringBitmap)>>;
@ -33,10 +33,15 @@ pub struct Proximity<'t> {
parent: Box<dyn Criterion + 't>,
candidates_cache: Cache,
plane_sweep_cache: Option<btree_map::IntoIter<u8, RoaringBitmap>>,
implementation_strategy: CriterionImplementationStrategy,
}
impl<'t> Proximity<'t> {
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Self {
pub fn new(
ctx: &'t dyn Context<'t>,
parent: Box<dyn Criterion + 't>,
implementation_strategy: CriterionImplementationStrategy,
) -> Self {
Proximity {
ctx,
state: None,
@ -45,6 +50,7 @@ impl<'t> Proximity<'t> {
parent,
candidates_cache: Cache::new(),
plane_sweep_cache: None,
implementation_strategy,
}
}
}
@ -72,8 +78,15 @@ impl<'t> Criterion for Proximity<'t> {
self.state = None; // reset state
}
Some((_, query_tree, allowed_candidates)) => {
let mut new_candidates = if allowed_candidates.len() <= CANDIDATES_THRESHOLD
&& self.proximity > PROXIMITY_THRESHOLD
let mut new_candidates = if matches!(
self.implementation_strategy,
CriterionImplementationStrategy::OnlyIterative
) || (matches!(
self.implementation_strategy,
CriterionImplementationStrategy::Dynamic
) && allowed_candidates.len()
<= CANDIDATES_THRESHOLD
&& self.proximity > PROXIMITY_THRESHOLD)
{
if let Some(cache) = self.plane_sweep_cache.as_mut() {
match cache.next() {