Merge remote-tracking branch 'origin/main' into tmp-release-v1.5.1

This commit is contained in:
Clément Renault
2023-12-13 11:41:31 +01:00
258 changed files with 5774 additions and 8050 deletions

View File

@ -2,7 +2,7 @@ use std::collections::{BTreeMap, HashMap, HashSet};
use std::ops::ControlFlow;
use std::{fmt, mem};
use heed::types::ByteSlice;
use heed::types::Bytes;
use heed::BytesDecode;
use indexmap::IndexMap;
use roaring::RoaringBitmap;
@ -13,7 +13,7 @@ use crate::facet::FacetType;
use crate::heed_codec::facet::{
FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec,
};
use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec};
use crate::heed_codec::{BytesRefCodec, StrRefCodec};
use crate::search::facet::facet_distribution_iter::{
count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution,
};
@ -105,7 +105,7 @@ impl<'a> FacetDistribution<'a> {
key_buffer.truncate(mem::size_of::<FieldId>());
key_buffer.extend_from_slice(&docid.to_be_bytes());
let iter = db
.remap_key_type::<ByteSlice>()
.remap_key_type::<Bytes>()
.prefix_iter(self.rtxn, &key_buffer)?
.remap_key_type::<FieldDocIdFacetF64Codec>();
@ -129,7 +129,7 @@ impl<'a> FacetDistribution<'a> {
key_buffer.truncate(mem::size_of::<FieldId>());
key_buffer.extend_from_slice(&docid.to_be_bytes());
let iter = db
.remap_key_type::<ByteSlice>()
.remap_key_type::<Bytes>()
.prefix_iter(self.rtxn, &key_buffer)?
.remap_key_type::<FieldDocIdFacetStringCodec>();
@ -172,9 +172,7 @@ impl<'a> FacetDistribution<'a> {
search_function(
self.rtxn,
self.index
.facet_id_f64_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
self.index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
field_id,
candidates,
|facet_key, nbr_docids, _| {
@ -203,9 +201,7 @@ impl<'a> FacetDistribution<'a> {
search_function(
self.rtxn,
self.index
.facet_id_string_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
self.index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
field_id,
candidates,
|facet_key, nbr_docids, any_docid| {

View File

@ -7,7 +7,7 @@ use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::ByteSliceRefCodec;
use crate::heed_codec::BytesRefCodec;
use crate::DocumentId;
/// Call the given closure on the facet distribution of the candidate documents.
@ -23,7 +23,7 @@ use crate::DocumentId;
/// keep iterating over the different facet values or stop.
pub fn lexicographically_iterate_over_facet_distribution<'t, CB>(
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
candidates: &RoaringBitmap,
callback: CB,
@ -34,11 +34,11 @@ where
let mut fd = LexicographicFacetDistribution { rtxn, db, field_id, callback };
let highest_level = get_highest_level(
rtxn,
db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
field_id,
)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
Ok(())
} else {
@ -48,7 +48,7 @@ where
pub fn count_iterate_over_facet_distribution<'t, CB>(
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
candidates: &RoaringBitmap,
mut callback: CB,
@ -77,11 +77,11 @@ where
let mut heap = BinaryHeap::new();
let highest_level = get_highest_level(
rtxn,
db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
field_id,
)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
// We first fill the heap with values from the highest level
let starting_key =
FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
@ -146,7 +146,7 @@ where
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
{
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
callback: CB,
}

View File

@ -5,7 +5,7 @@ use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::ByteSliceRefCodec;
use crate::heed_codec::BytesRefCodec;
use crate::Result;
/// Find all the document ids for which the given field contains a value contained within
@ -25,11 +25,11 @@ where
let inner;
let left = match left {
Bound::Included(left) => {
inner = BoundCodec::bytes_encode(left).ok_or(heed::Error::Encoding)?;
inner = BoundCodec::bytes_encode(left).map_err(heed::Error::Encoding)?;
Bound::Included(inner.as_ref())
}
Bound::Excluded(left) => {
inner = BoundCodec::bytes_encode(left).ok_or(heed::Error::Encoding)?;
inner = BoundCodec::bytes_encode(left).map_err(heed::Error::Encoding)?;
Bound::Excluded(inner.as_ref())
}
Bound::Unbounded => Bound::Unbounded,
@ -37,25 +37,22 @@ where
let inner;
let right = match right {
Bound::Included(right) => {
inner = BoundCodec::bytes_encode(right).ok_or(heed::Error::Encoding)?;
inner = BoundCodec::bytes_encode(right).map_err(heed::Error::Encoding)?;
Bound::Included(inner.as_ref())
}
Bound::Excluded(right) => {
inner = BoundCodec::bytes_encode(right).ok_or(heed::Error::Encoding)?;
inner = BoundCodec::bytes_encode(right).map_err(heed::Error::Encoding)?;
Bound::Excluded(inner.as_ref())
}
Bound::Unbounded => Bound::Unbounded,
};
let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let db = db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids };
let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(starting_left_bound) =
get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?
{
let rightmost_bound = Bound::Included(
get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap(),
); // will not fail because get_first_facet_value succeeded
if let Some(starting_left_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
let rightmost_bound =
Bound::Included(get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
let group_size = usize::MAX;
f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?;
Ok(())
@ -67,7 +64,7 @@ where
/// Fetch the document ids that have a facet with a value between the two given bounds
struct FacetRangeSearch<'t, 'b, 'bitmap> {
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
left: Bound<&'b [u8]>,
right: Bound<&'b [u8]>,

View File

@ -5,7 +5,7 @@ use super::{get_first_facet_value, get_highest_level};
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
};
use crate::heed_codec::ByteSliceRefCodec;
use crate::heed_codec::BytesRefCodec;
/// Return an iterator which iterates over the given candidate documents in
/// ascending order of their facet value for the given field id.
@ -13,7 +13,7 @@ use crate::heed_codec::ByteSliceRefCodec;
/// The documents returned by the iterator are grouped by the facet values that
/// determined their rank. For example, given the documents:
///
/// ```ignore
/// ```text
/// 0: { "colour": ["blue", "green"] }
/// 1: { "colour": ["blue", "red"] }
/// 2: { "colour": ["orange", "red"] }
@ -22,7 +22,7 @@ use crate::heed_codec::ByteSliceRefCodec;
/// ```
/// Then calling the function on the candidates `[0, 2, 3, 4]` will return an iterator
/// over the following elements:
/// ```ignore
/// ```text
/// [0, 4] // corresponds to all the documents within the candidates that have the facet value "blue"
/// [3] // same for "green"
/// [2] // same for "orange"
@ -31,12 +31,12 @@ use crate::heed_codec::ByteSliceRefCodec;
/// Note that once a document id is returned by the iterator, it is never returned again.
pub fn ascending_facet_sort<'t>(
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
candidates: RoaringBitmap,
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
@ -53,14 +53,12 @@ pub fn ascending_facet_sort<'t>(
struct AscendingFacetSort<'t, 'e> {
rtxn: &'t heed::RoTxn<'e>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
#[allow(clippy::type_complexity)]
stack: Vec<(
RoaringBitmap,
std::iter::Take<
heed::RoRange<'t, FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
>,
std::iter::Take<heed::RoRange<'t, FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>>,
)>,
}

View File

@ -7,21 +7,21 @@ use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
};
use crate::heed_codec::ByteSliceRefCodec;
use crate::heed_codec::BytesRefCodec;
/// See documentationg for [`ascending_facet_sort`](super::ascending_facet_sort).
///
/// This function does the same thing, but in the opposite order.
pub fn descending_facet_sort<'t>(
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
candidates: RoaringBitmap,
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? {
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
let last_bound = get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap();
let last_bound = get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap();
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
Ok(itertools::Either::Left(DescendingFacetSort {
@ -37,13 +37,13 @@ pub fn descending_facet_sort<'t>(
struct DescendingFacetSort<'t> {
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
#[allow(clippy::type_complexity)]
stack: Vec<(
RoaringBitmap,
std::iter::Take<
heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
heed::RoRevRange<'t, FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
>,
Bound<&'t [u8]>,
)>,
@ -100,7 +100,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
*right_bound = Bound::Excluded(left_bound);
let iter = match self
.db
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
.rev_range(self.rtxn, &(Bound::Included(starting_key_below), end_key_kelow))
{
Ok(iter) => iter,
@ -123,7 +123,7 @@ mod tests {
use roaring::RoaringBitmap;
use crate::heed_codec::facet::FacetGroupKeyCodec;
use crate::heed_codec::ByteSliceRefCodec;
use crate::heed_codec::BytesRefCodec;
use crate::milli_snap;
use crate::search::facet::facet_sort_descending::descending_facet_sort;
use crate::search::facet::tests::{
@ -144,7 +144,7 @@ mod tests {
let txn = index.env.read_txn().unwrap();
let candidates = (200..=300).collect::<RoaringBitmap>();
let mut results = String::new();
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let db = index.content.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
@ -167,7 +167,7 @@ mod tests {
let txn = index.env.read_txn().unwrap();
let candidates = (200..=300).collect::<RoaringBitmap>();
let mut results = String::new();
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let db = index.content.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap();
for el in iter {
let (docids, _) = el.unwrap();

View File

@ -223,12 +223,9 @@ impl<'a> Filter<'a> {
impl<'a> Filter<'a> {
pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
let soft_deleted_documents = index.soft_deleted_documents_ids(rtxn)?;
let filterable_fields = index.filterable_fields(rtxn)?;
// and finally we delete all the soft_deleted_documents, again, only once at the very end
self.inner_evaluate(rtxn, index, &filterable_fields)
.map(|result| result - soft_deleted_documents)
}
fn evaluate_operator(

View File

@ -1,13 +1,13 @@
pub use facet_sort_ascending::ascending_facet_sort;
pub use facet_sort_descending::descending_facet_sort;
use heed::types::{ByteSlice, DecodeIgnore};
use heed::types::{Bytes, DecodeIgnore};
use heed::{BytesDecode, RoTxn};
use roaring::RoaringBitmap;
pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET};
pub use self::filter::{BadGeoError, Filter};
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec};
use crate::heed_codec::ByteSliceRefCodec;
use crate::heed_codec::BytesRefCodec;
use crate::{Index, Result};
mod facet_distribution;
mod facet_distribution_iter;
@ -22,8 +22,10 @@ fn facet_extreme_value<'t>(
let extreme_value =
if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) };
let (_, extreme_value) = extreme_value?;
Ok(OrderedF64Codec::bytes_decode(extreme_value))
OrderedF64Codec::bytes_decode(extreme_value)
.map(Some)
.map_err(heed::Error::Decoding)
.map_err(Into::into)
}
pub fn facet_min_value<'t>(
@ -32,7 +34,7 @@ pub fn facet_min_value<'t>(
field_id: u16,
candidates: RoaringBitmap,
) -> Result<Option<f64>> {
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let it = ascending_facet_sort(rtxn, db, field_id, candidates)?;
facet_extreme_value(it)
}
@ -43,7 +45,7 @@ pub fn facet_max_value<'t>(
field_id: u16,
candidates: RoaringBitmap,
) -> Result<Option<f64>> {
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let it = descending_facet_sort(rtxn, db, field_id, candidates)?;
facet_extreme_value(it)
}
@ -51,7 +53,7 @@ pub fn facet_max_value<'t>(
/// Get the first facet value in the facet database
pub(crate) fn get_first_facet_value<'t, BoundCodec>(
txn: &'t RoTxn,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
) -> heed::Result<Option<BoundCodec::DItem>>
where
@ -60,13 +62,12 @@ where
let mut level0prefix = vec![];
level0prefix.extend_from_slice(&field_id.to_be_bytes());
level0prefix.push(0);
let mut level0_iter_forward = db
.as_polymorph()
.prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, level0prefix.as_slice())?;
let mut level0_iter_forward =
db.remap_types::<Bytes, DecodeIgnore>().prefix_iter(txn, level0prefix.as_slice())?;
if let Some(first) = level0_iter_forward.next() {
let (first_key, _) = first?;
let first_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(first_key)
.ok_or(heed::Error::Encoding)?;
.map_err(heed::Error::Decoding)?;
Ok(Some(first_key.left_bound))
} else {
Ok(None)
@ -76,7 +77,7 @@ where
/// Get the last facet value in the facet database
pub(crate) fn get_last_facet_value<'t, BoundCodec>(
txn: &'t RoTxn,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
) -> heed::Result<Option<BoundCodec::DItem>>
where
@ -85,13 +86,12 @@ where
let mut level0prefix = vec![];
level0prefix.extend_from_slice(&field_id.to_be_bytes());
level0prefix.push(0);
let mut level0_iter_backward = db
.as_polymorph()
.rev_prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, level0prefix.as_slice())?;
let mut level0_iter_backward =
db.remap_types::<Bytes, DecodeIgnore>().rev_prefix_iter(txn, level0prefix.as_slice())?;
if let Some(last) = level0_iter_backward.next() {
let (last_key, _) = last?;
let last_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(last_key)
.ok_or(heed::Error::Encoding)?;
.map_err(heed::Error::Decoding)?;
Ok(Some(last_key.left_bound))
} else {
Ok(None)
@ -101,17 +101,17 @@ where
/// Get the height of the highest level in the facet database
pub(crate) fn get_highest_level<'t>(
txn: &'t RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
field_id: u16,
) -> heed::Result<u8> {
let field_id_prefix = &field_id.to_be_bytes();
Ok(db
.as_polymorph()
.rev_prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, field_id_prefix)?
.remap_types::<Bytes, DecodeIgnore>()
.rev_prefix_iter(txn, field_id_prefix)?
.next()
.map(|el| {
let (key, _) = el.unwrap();
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key).unwrap();
let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key).unwrap();
key.level
})
.unwrap_or(0))

View File

@ -17,8 +17,7 @@ use crate::error::UserError;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::{
execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, Result,
SearchContext, BEU16,
execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, Result, SearchContext,
};
// Building these factories is not free.
@ -299,7 +298,7 @@ impl<'a> SearchForFacetValues<'a> {
None => return Ok(Vec::new()),
};
let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? {
let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? {
Some(fst) => fst,
None => return Ok(vec![]),
};

View File

@ -3,16 +3,17 @@ use std::collections::hash_map::Entry;
use std::hash::Hash;
use fxhash::FxHashMap;
use heed::types::ByteSlice;
use heed::types::Bytes;
use heed::{BytesEncode, Database, RoTxn};
use roaring::RoaringBitmap;
use super::interner::Interned;
use super::Word;
use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
use crate::proximity::ProximityPrecision;
use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
use crate::{
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec, SearchContext,
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, SearchContext, U8StrStrCodec,
};
/// A cache storing pointers to values in the LMDB databases.
@ -25,7 +26,7 @@ pub struct DatabaseCache<'ctx> {
pub word_pair_proximity_docids:
FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
pub word_prefix_pair_proximity_docids:
FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
FxHashMap<(u8, Interned<String>, Interned<String>), Option<RoaringBitmap>>,
pub prefix_word_pair_proximity_docids:
FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
pub word_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
@ -50,7 +51,7 @@ impl<'ctx> DatabaseCache<'ctx> {
cache_key: K1,
db_key: &'v KC::EItem,
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
db: Database<KC, ByteSlice>,
db: Database<KC, Bytes>,
) -> Result<Option<DC::DItem>>
where
K1: Copy + Eq + Hash,
@ -63,12 +64,14 @@ impl<'ctx> DatabaseCache<'ctx> {
}
match cache.get(&cache_key).unwrap() {
Some(Cow::Borrowed(bytes)) => {
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
}
Some(Cow::Owned(bytes)) => {
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
}
Some(Cow::Borrowed(bytes)) => DC::bytes_decode_owned(bytes)
.map(Some)
.map_err(heed::Error::Decoding)
.map_err(Into::into),
Some(Cow::Owned(bytes)) => DC::bytes_decode_owned(bytes)
.map(Some)
.map_err(heed::Error::Decoding)
.map_err(Into::into),
None => Ok(None),
}
}
@ -78,7 +81,7 @@ impl<'ctx> DatabaseCache<'ctx> {
cache_key: K1,
db_keys: &'v [KC::EItem],
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
db: Database<KC, ByteSlice>,
db: Database<KC, Bytes>,
merger: MergeFn,
) -> Result<Option<DC::DItem>>
where
@ -110,12 +113,14 @@ impl<'ctx> DatabaseCache<'ctx> {
}
match cache.get(&cache_key).unwrap() {
Some(Cow::Borrowed(bytes)) => {
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
}
Some(Cow::Owned(bytes)) => {
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
}
Some(Cow::Borrowed(bytes)) => DC::bytes_decode_owned(bytes)
.map(Some)
.map_err(heed::Error::Decoding)
.map_err(Into::into),
Some(Cow::Owned(bytes)) => DC::bytes_decode_owned(bytes)
.map(Some)
.map_err(heed::Error::Decoding)
.map_err(Into::into),
None => Ok(None),
}
}
@ -165,16 +170,16 @@ impl<'ctx> SearchContext<'ctx> {
word,
&keys[..],
&mut self.db_cache.word_docids,
self.index.word_fid_docids.remap_data_type::<ByteSlice>(),
self.index.word_fid_docids.remap_data_type::<Bytes>(),
merge_cbo_roaring_bitmaps,
)
}
None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
self.txn,
word,
self.word_interner.get(word).as_str(),
&mut self.db_cache.word_docids,
self.index.word_docids.remap_data_type::<ByteSlice>(),
self.index.word_docids.remap_data_type::<Bytes>(),
),
}
}
@ -194,16 +199,16 @@ impl<'ctx> SearchContext<'ctx> {
word,
&keys[..],
&mut self.db_cache.exact_word_docids,
self.index.word_fid_docids.remap_data_type::<ByteSlice>(),
self.index.word_fid_docids.remap_data_type::<Bytes>(),
merge_cbo_roaring_bitmaps,
)
}
None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
self.txn,
word,
self.word_interner.get(word).as_str(),
&mut self.db_cache.exact_word_docids,
self.index.exact_word_docids.remap_data_type::<ByteSlice>(),
self.index.exact_word_docids.remap_data_type::<Bytes>(),
),
}
}
@ -244,16 +249,16 @@ impl<'ctx> SearchContext<'ctx> {
prefix,
&keys[..],
&mut self.db_cache.word_prefix_docids,
self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(),
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
merge_cbo_roaring_bitmaps,
)
}
None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
self.txn,
prefix,
self.word_interner.get(prefix).as_str(),
&mut self.db_cache.word_prefix_docids,
self.index.word_prefix_docids.remap_data_type::<ByteSlice>(),
self.index.word_prefix_docids.remap_data_type::<Bytes>(),
),
}
}
@ -273,16 +278,16 @@ impl<'ctx> SearchContext<'ctx> {
prefix,
&keys[..],
&mut self.db_cache.exact_word_prefix_docids,
self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(),
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
merge_cbo_roaring_bitmaps,
)
}
None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
self.txn,
prefix,
self.word_interner.get(prefix).as_str(),
&mut self.db_cache.exact_word_prefix_docids,
self.index.exact_word_prefix_docids.remap_data_type::<ByteSlice>(),
self.index.exact_word_prefix_docids.remap_data_type::<Bytes>(),
),
}
}
@ -293,17 +298,67 @@ impl<'ctx> SearchContext<'ctx> {
word2: Interned<String>,
proximity: u8,
) -> Result<Option<RoaringBitmap>> {
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
self.txn,
(proximity, word1, word2),
&(
proximity,
self.word_interner.get(word1).as_str(),
self.word_interner.get(word2).as_str(),
),
&mut self.db_cache.word_pair_proximity_docids,
self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
)
match self.index.proximity_precision(self.txn)?.unwrap_or_default() {
ProximityPrecision::AttributeScale => {
// Force proximity to 0 because:
// in AttributeScale, there are only 2 possible distances:
// 1. words in same attribute: in that the DB contains (0, word1, word2)
// 2. words in different attributes: no DB entry for these two words.
let proximity = 0;
let docids = if let Some(docids) =
self.db_cache.word_pair_proximity_docids.get(&(proximity, word1, word2))
{
docids
.as_ref()
.map(|d| CboRoaringBitmapCodec::bytes_decode_owned(d))
.transpose()
.map_err(heed::Error::Decoding)?
} else {
// Compute the distance at the attribute level and store it in the cache.
let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
fids
} else {
self.index.fields_ids_map(self.txn)?.ids().collect()
};
let mut docids = RoaringBitmap::new();
for fid in fids {
// for each field, intersect left word bitmap and right word bitmap,
// then merge the result in a global bitmap before storing it in the cache.
let word1_docids = self.get_db_word_fid_docids(word1, fid)?;
let word2_docids = self.get_db_word_fid_docids(word2, fid)?;
if let (Some(word1_docids), Some(word2_docids)) =
(word1_docids, word2_docids)
{
docids |= word1_docids & word2_docids;
}
}
let encoded = CboRoaringBitmapCodec::bytes_encode(&docids)
.map(Cow::into_owned)
.map(Cow::Owned)
.map(Some)
.map_err(heed::Error::Decoding)?;
self.db_cache
.word_pair_proximity_docids
.insert((proximity, word1, word2), encoded);
Some(docids)
};
Ok(docids)
}
ProximityPrecision::WordScale => {
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
self.txn,
(proximity, word1, word2),
&(
proximity,
self.word_interner.get(word1).as_str(),
self.word_interner.get(word2).as_str(),
),
&mut self.db_cache.word_pair_proximity_docids,
self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
)
}
}
}
pub fn get_db_word_pair_proximity_docids_len(
@ -312,54 +367,107 @@ impl<'ctx> SearchContext<'ctx> {
word2: Interned<String>,
proximity: u8,
) -> Result<Option<u64>> {
DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>(
self.txn,
(proximity, word1, word2),
&(
proximity,
self.word_interner.get(word1).as_str(),
self.word_interner.get(word2).as_str(),
),
&mut self.db_cache.word_pair_proximity_docids,
self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
)
match self.index.proximity_precision(self.txn)?.unwrap_or_default() {
ProximityPrecision::AttributeScale => Ok(self
.get_db_word_pair_proximity_docids(word1, word2, proximity)?
.map(|d| d.len())),
ProximityPrecision::WordScale => {
DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>(
self.txn,
(proximity, word1, word2),
&(
proximity,
self.word_interner.get(word1).as_str(),
self.word_interner.get(word2).as_str(),
),
&mut self.db_cache.word_pair_proximity_docids,
self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
)
}
}
}
pub fn get_db_word_prefix_pair_proximity_docids(
&mut self,
word1: Interned<String>,
prefix2: Interned<String>,
proximity: u8,
mut proximity: u8,
) -> Result<Option<RoaringBitmap>> {
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
self.txn,
(proximity, word1, prefix2),
&(
proximity,
self.word_interner.get(word1).as_str(),
self.word_interner.get(prefix2).as_str(),
),
&mut self.db_cache.word_prefix_pair_proximity_docids,
self.index.word_prefix_pair_proximity_docids.remap_data_type::<ByteSlice>(),
)
let proximity_precision = self.index.proximity_precision(self.txn)?.unwrap_or_default();
if proximity_precision == ProximityPrecision::AttributeScale {
// Force proximity to 0 because:
// in AttributeScale, there are only 2 possible distances:
// 1. words in same attribute: in that the DB contains (0, word1, word2)
// 2. words in different attributes: no DB entry for these two words.
proximity = 0;
}
let docids = if let Some(docids) =
self.db_cache.word_prefix_pair_proximity_docids.get(&(proximity, word1, prefix2))
{
docids.clone()
} else {
let prefix_docids = match proximity_precision {
ProximityPrecision::AttributeScale => {
// Compute the distance at the attribute level and store it in the cache.
let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
fids
} else {
self.index.fields_ids_map(self.txn)?.ids().collect()
};
let mut prefix_docids = RoaringBitmap::new();
// for each field, intersect left word bitmap and right word bitmap,
// then merge the result in a global bitmap before storing it in the cache.
for fid in fids {
let word1_docids = self.get_db_word_fid_docids(word1, fid)?;
let prefix2_docids = self.get_db_word_prefix_fid_docids(prefix2, fid)?;
if let (Some(word1_docids), Some(prefix2_docids)) =
(word1_docids, prefix2_docids)
{
prefix_docids |= word1_docids & prefix2_docids;
}
}
prefix_docids
}
ProximityPrecision::WordScale => {
// compute docids using prefix iter and store the result in the cache.
let key = U8StrStrCodec::bytes_encode(&(
proximity,
self.word_interner.get(word1).as_str(),
self.word_interner.get(prefix2).as_str(),
))
.unwrap()
.into_owned();
let mut prefix_docids = RoaringBitmap::new();
let remap_key_type = self
.index
.word_pair_proximity_docids
.remap_key_type::<Bytes>()
.prefix_iter(self.txn, &key)?;
for result in remap_key_type {
let (_, docids) = result?;
prefix_docids |= docids;
}
prefix_docids
}
};
self.db_cache
.word_prefix_pair_proximity_docids
.insert((proximity, word1, prefix2), Some(prefix_docids.clone()));
Some(prefix_docids)
};
Ok(docids)
}
pub fn get_db_prefix_word_pair_proximity_docids(
&mut self,
left_prefix: Interned<String>,
right: Interned<String>,
proximity: u8,
) -> Result<Option<RoaringBitmap>> {
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
self.txn,
(proximity, left_prefix, right),
&(
proximity,
self.word_interner.get(left_prefix).as_str(),
self.word_interner.get(right).as_str(),
),
&mut self.db_cache.prefix_word_pair_proximity_docids,
self.index.prefix_word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
)
// only accept exact matches on reverted positions
self.get_db_word_pair_proximity_docids(left_prefix, right, proximity)
}
pub fn get_db_word_fid_docids(
@ -377,7 +485,7 @@ impl<'ctx> SearchContext<'ctx> {
(word, fid),
&(self.word_interner.get(word).as_str(), fid),
&mut self.db_cache.word_fid_docids,
self.index.word_fid_docids.remap_data_type::<ByteSlice>(),
self.index.word_fid_docids.remap_data_type::<Bytes>(),
)
}
@ -396,7 +504,7 @@ impl<'ctx> SearchContext<'ctx> {
(word_prefix, fid),
&(self.word_interner.get(word_prefix).as_str(), fid),
&mut self.db_cache.word_prefix_fid_docids,
self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(),
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
)
}
@ -410,7 +518,7 @@ impl<'ctx> SearchContext<'ctx> {
let remap_key_type = self
.index
.word_fid_docids
.remap_types::<ByteSlice, ByteSlice>()
.remap_types::<Bytes, Bytes>()
.prefix_iter(self.txn, &key)?
.remap_key_type::<StrBEU16Codec>();
for result in remap_key_type {
@ -436,7 +544,7 @@ impl<'ctx> SearchContext<'ctx> {
let remap_key_type = self
.index
.word_prefix_fid_docids
.remap_types::<ByteSlice, ByteSlice>()
.remap_types::<Bytes, Bytes>()
.prefix_iter(self.txn, &key)?
.remap_key_type::<StrBEU16Codec>();
for result in remap_key_type {
@ -464,7 +572,7 @@ impl<'ctx> SearchContext<'ctx> {
(word, position),
&(self.word_interner.get(word).as_str(), position),
&mut self.db_cache.word_position_docids,
self.index.word_position_docids.remap_data_type::<ByteSlice>(),
self.index.word_position_docids.remap_data_type::<Bytes>(),
)
}
@ -478,7 +586,7 @@ impl<'ctx> SearchContext<'ctx> {
(word_prefix, position),
&(self.word_interner.get(word_prefix).as_str(), position),
&mut self.db_cache.word_prefix_position_docids,
self.index.word_prefix_position_docids.remap_data_type::<ByteSlice>(),
self.index.word_prefix_position_docids.remap_data_type::<Bytes>(),
)
}
@ -492,7 +600,7 @@ impl<'ctx> SearchContext<'ctx> {
let remap_key_type = self
.index
.word_position_docids
.remap_types::<ByteSlice, ByteSlice>()
.remap_types::<Bytes, Bytes>()
.prefix_iter(self.txn, &key)?
.remap_key_type::<StrBEU16Codec>();
for result in remap_key_type {
@ -523,7 +631,7 @@ impl<'ctx> SearchContext<'ctx> {
let remap_key_type = self
.index
.word_prefix_position_docids
.remap_types::<ByteSlice, ByteSlice>()
.remap_types::<Bytes, Bytes>()
.prefix_iter(self.txn, &key)?
.remap_key_type::<StrBEU16Codec>();
for result in remap_key_type {

View File

@ -1,4 +1,4 @@
use heed::types::{ByteSlice, Str, Unit};
use heed::types::{Bytes, Str, Unit};
use heed::{Database, RoPrefix, RoTxn};
use roaring::RoaringBitmap;
@ -8,7 +8,7 @@ const DOCID_SIZE: usize = 4;
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetCodec,
};
use crate::heed_codec::ByteSliceRefCodec;
use crate::heed_codec::BytesRefCodec;
use crate::{Index, Result, SearchContext};
pub struct DistinctOutput {
@ -71,7 +71,7 @@ pub fn distinct_single_docid(
/// Return all the docids containing the given value in the given field
fn facet_value_docids(
database: Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
database: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
txn: &RoTxn,
field_id: u16,
facet_value: &[u8],
@ -87,12 +87,12 @@ fn facet_number_values<'a>(
field_id: u16,
index: &Index,
txn: &'a RoTxn,
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<ByteSliceRefCodec>, Unit>> {
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<BytesRefCodec>, Unit>> {
let key = facet_values_prefix_key(field_id, docid);
let iter = index
.field_id_docid_facet_f64s
.remap_key_type::<ByteSlice>()
.remap_key_type::<Bytes>()
.prefix_iter(txn, &key)?
.remap_key_type();
@ -105,12 +105,12 @@ pub fn facet_string_values<'a>(
field_id: u16,
index: &Index,
txn: &'a RoTxn,
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<ByteSliceRefCodec>, Str>> {
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<BytesRefCodec>, Str>> {
let key = facet_values_prefix_key(field_id, docid);
let iter = index
.field_id_docid_facet_strings
.remap_key_type::<ByteSlice>()
.remap_key_type::<Bytes>()
.prefix_iter(txn, &key)?
.remap_types();

View File

@ -1,7 +1,7 @@
use std::collections::VecDeque;
use std::iter::FromIterator;
use heed::types::{ByteSlice, Unit};
use heed::types::{Bytes, Unit};
use heed::{RoPrefix, RoTxn};
use roaring::RoaringBitmap;
use rstar::RTree;
@ -34,7 +34,7 @@ fn facet_number_values<'a>(
let iter = index
.field_id_docid_facet_f64s
.remap_key_type::<ByteSlice>()
.remap_key_type::<Bytes>()
.prefix_iter(txn, &key)?
.remap_key_type();
@ -163,7 +163,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
// computing the distance between two points is expensive thus we cache the result
documents
.sort_by_cached_key(|(_, p)| distance_between_two_points(&self.point, p) as usize);
self.cached_sorted_docids.extend(documents.into_iter());
self.cached_sorted_docids.extend(documents);
};
Ok(())

View File

@ -228,7 +228,7 @@ impl<T> Ord for Interned<T> {
impl<T> PartialOrd for Interned<T> {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.idx.partial_cmp(&other.idx)
Some(self.cmp(other))
}
}
@ -241,7 +241,7 @@ impl<T> PartialEq for Interned<T> {
}
impl<T> Clone for Interned<T> {
fn clone(&self) -> Self {
Self { idx: self.idx, _phantom: PhantomData }
*self
}
}

View File

@ -52,7 +52,6 @@ use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::apply_distinct_rule;
use crate::{
AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError,
BEU32,
};
/// A structure used throughout the execution of a search query.
@ -469,8 +468,8 @@ pub fn execute_search(
let mut docids = Vec::new();
let mut uniq_docids = RoaringBitmap::new();
for instant_distance::Item { distance: _, pid, point: _ } in neighbors {
let index = BEU32::new(pid.into_inner());
let docid = ctx.index.vector_id_docid.get(ctx.txn, &index)?.unwrap().get();
let index = pid.into_inner();
let docid = ctx.index.vector_id_docid.get(ctx.txn, &index)?.unwrap();
if universe.contains(docid) && uniq_docids.insert(docid) {
docids.push(docid);
if docids.len() == (from + length) {
@ -627,7 +626,8 @@ fn check_sort_criteria(ctx: &SearchContext, sort_criteria: Option<&Vec<AscDesc>>
field: field.to_string(),
valid_fields,
hidden_fields,
})?;
}
.into());
}
Member::Geo(_) if !sortable_fields.contains("_geo") => {
let (valid_fields, hidden_fields) =
@ -637,7 +637,8 @@ fn check_sort_criteria(ctx: &SearchContext, sort_criteria: Option<&Vec<AscDesc>>
field: "_geo".to_string(),
valid_fields,
hidden_fields,
})?;
}
.into());
}
_ => (),
}

View File

@ -175,7 +175,7 @@ impl QueryTermSubset {
pub fn use_prefix_db(&self, ctx: &SearchContext) -> Option<Word> {
let original = ctx.term_interner.get(self.original);
let Some(use_prefix_db) = original.zero_typo.use_prefix_db else { return None };
let use_prefix_db = original.zero_typo.use_prefix_db?;
let word = match &self.zero_typo_subset {
NTypoTermSubset::All => Some(use_prefix_db),
NTypoTermSubset::Subset { words, phrases: _ } => {

View File

@ -4,7 +4,7 @@ use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext};
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec};
use crate::heed_codec::{BytesRefCodec, StrRefCodec};
use crate::score_details::{self, ScoreDetails};
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
use crate::{FieldId, Index, Result};
@ -100,11 +100,11 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
let number_db = ctx
.index
.facet_id_f64_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let string_db = ctx
.index
.facet_id_string_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let (number_iter, string_iter) = if self.is_ascending {
let number_iter = ascending_facet_sort(

View File

@ -124,8 +124,7 @@ fn test_attribute_fid_simple() {
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
}
@ -142,7 +141,6 @@ fn test_attribute_fid_ngrams() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
}

View File

@ -141,8 +141,7 @@ fn test_attribute_position_simple() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
}
#[test]
@ -158,8 +157,7 @@ fn test_attribute_position_repeated() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
}
@ -176,8 +174,7 @@ fn test_attribute_position_different_fields() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
}
@ -194,7 +191,6 @@ fn test_attribute_position_ngrams() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
}

View File

@ -478,8 +478,7 @@ fn test_exactness_simple_ordered() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -511,8 +510,7 @@ fn test_exactness_simple_reversed() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -535,8 +533,7 @@ fn test_exactness_simple_reversed() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -566,8 +563,7 @@ fn test_exactness_simple_random() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -596,8 +592,7 @@ fn test_exactness_attribute_starts_with_simple() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -623,8 +618,7 @@ fn test_exactness_attribute_starts_with_phrase() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -644,8 +638,7 @@ fn test_exactness_attribute_starts_with_phrase() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -674,8 +667,7 @@ fn test_exactness_all_candidates_with_typo() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -711,8 +703,7 @@ fn test_exactness_after_words() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -760,8 +751,7 @@ fn test_words_after_exactness() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 9, 18, 8, 17, 16, 6, 7, 15, 5, 14, 4, 13, 3, 12, 2, 1, 11]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -809,8 +799,7 @@ fn test_proximity_after_exactness() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 1, 0, 4, 5, 8, 7, 3, 6]");
@ -847,8 +836,7 @@ fn test_proximity_after_exactness() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -881,8 +869,7 @@ fn test_exactness_followed_by_typo_prefer_no_typo_prefix() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 1, 0, 4, 3]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -917,8 +904,7 @@ fn test_typo_followed_by_exactness() {
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 0, 4, 3]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);

View File

@ -371,7 +371,7 @@ fn test_proximity_prefix_db() {
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best s");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 6, 7, 8, 11, 15]");
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 9, 6, 7, 8, 11, 12, 13, 15]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@ -379,13 +379,13 @@ fn test_proximity_prefix_db() {
insta::assert_debug_snapshot!(texts, @r###"
[
"\"this is the best summer meal\"",
"\"summer best\"",
"\"this is the best meal of summer\"",
"\"summer x best\"",
"\"this is the best meal I have ever had in such a beautiful summer day\"",
"\"this is the best cooked meal of the summer\"",
"\"this is the best meal of the summer\"",
"\"summer x y best\"",
"\"summer x best\"",
"\"summer best\"",
"\"this is the best meal I have ever had in such a beautiful winter day\"",
]
"###);
@ -423,17 +423,17 @@ fn test_proximity_prefix_db() {
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best win");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[15, 16, 17, 18, 19, 20, 21, 22]");
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 18, 15, 16, 17, 20, 21, 22]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"this is the best winter meal\"",
"\"this is the best meal of winter\"",
"\"this is the best meal I have ever had in such a beautiful winter day\"",
"\"this is the best cooked meal of the winter\"",
"\"this is the best meal of the winter\"",
"\"this is the best meal of winter\"",
"\"this is the best winter meal\"",
"\"winter x y best\"",
"\"winter x best\"",
"\"winter best\"",
@ -471,20 +471,20 @@ fn test_proximity_prefix_db() {
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best wi");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 15, 16, 17, 20]");
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 18, 15, 16, 17, 20, 21, 22]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"this is the best winter meal\"",
"\"winter best\"",
"\"this is the best meal of winter\"",
"\"winter x best\"",
"\"this is the best meal I have ever had in such a beautiful winter day\"",
"\"this is the best cooked meal of the winter\"",
"\"this is the best meal of the winter\"",
"\"winter x y best\"",
"\"winter x best\"",
"\"winter best\"",
]
"###);
}

View File

@ -11,14 +11,6 @@ expression: "format!(\"{document_scores:#?}\")"
},
),
],
[
Proximity(
Rank {
rank: 3,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
@ -30,7 +22,15 @@ expression: "format!(\"{document_scores:#?}\")"
[
Proximity(
Rank {
rank: 2,
rank: 1,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
rank: 1,
max_rank: 4,
},
),

View File

@ -11,14 +11,6 @@ expression: "format!(\"{document_scores:#?}\")"
},
),
],
[
Proximity(
Rank {
rank: 3,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
@ -30,7 +22,15 @@ expression: "format!(\"{document_scores:#?}\")"
[
Proximity(
Rank {
rank: 2,
rank: 1,
max_rank: 4,
},
),
],
[
Proximity(
Rank {
rank: 1,
max_rank: 4,
},
),

View File

@ -6,7 +6,7 @@ expression: "format!(\"{document_scores:#?}\")"
[
Proximity(
Rank {
rank: 1,
rank: 4,
max_rank: 4,
},
),
@ -14,7 +14,7 @@ expression: "format!(\"{document_scores:#?}\")"
[
Proximity(
Rank {
rank: 1,
rank: 2,
max_rank: 4,
},
),

View File

@ -13,6 +13,7 @@ This module tests the `sort` ranking rule:
use big_s::S;
use maplit::hashset;
use meili_snap::insta;
use crate::index::tests::TempIndex;
use crate::search::new::tests::collect_field_values;