mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-29 01:31:00 +00:00
Merge branch 'main' into tmp-release-v1.5.0
This commit is contained in:
@ -13,7 +13,7 @@ use crate::heed_codec::ByteSliceRefCodec;
|
||||
/// The documents returned by the iterator are grouped by the facet values that
|
||||
/// determined their rank. For example, given the documents:
|
||||
///
|
||||
/// ```ignore
|
||||
/// ```text
|
||||
/// 0: { "colour": ["blue", "green"] }
|
||||
/// 1: { "colour": ["blue", "red"] }
|
||||
/// 2: { "colour": ["orange", "red"] }
|
||||
@ -22,7 +22,7 @@ use crate::heed_codec::ByteSliceRefCodec;
|
||||
/// ```
|
||||
/// Then calling the function on the candidates `[0, 2, 3, 4]` will return an iterator
|
||||
/// over the following elements:
|
||||
/// ```ignore
|
||||
/// ```text
|
||||
/// [0, 4] // corresponds to all the documents within the candidates that have the facet value "blue"
|
||||
/// [3] // same for "green"
|
||||
/// [2] // same for "orange"
|
||||
|
@ -223,12 +223,9 @@ impl<'a> Filter<'a> {
|
||||
impl<'a> Filter<'a> {
|
||||
pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
|
||||
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
|
||||
let soft_deleted_documents = index.soft_deleted_documents_ids(rtxn)?;
|
||||
let filterable_fields = index.filterable_fields(rtxn)?;
|
||||
|
||||
// and finally we delete all the soft_deleted_documents, again, only once at the very end
|
||||
self.inner_evaluate(rtxn, index, &filterable_fields)
|
||||
.map(|result| result - soft_deleted_documents)
|
||||
}
|
||||
|
||||
fn evaluate_operator(
|
||||
|
@ -12,7 +12,7 @@ use super::Word;
|
||||
use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
|
||||
use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
|
||||
use crate::{
|
||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec, SearchContext,
|
||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, SearchContext, U8StrStrCodec,
|
||||
};
|
||||
|
||||
/// A cache storing pointers to values in the LMDB databases.
|
||||
@ -25,7 +25,7 @@ pub struct DatabaseCache<'ctx> {
|
||||
pub word_pair_proximity_docids:
|
||||
FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
|
||||
pub word_prefix_pair_proximity_docids:
|
||||
FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
|
||||
FxHashMap<(u8, Interned<String>, Interned<String>), Option<RoaringBitmap>>,
|
||||
pub prefix_word_pair_proximity_docids:
|
||||
FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
|
||||
pub word_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
|
||||
@ -168,7 +168,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
merge_cbo_roaring_bitmaps,
|
||||
)
|
||||
}
|
||||
None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
|
||||
None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
word,
|
||||
self.word_interner.get(word).as_str(),
|
||||
@ -182,7 +182,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
&mut self,
|
||||
word: Interned<String>,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
word,
|
||||
self.word_interner.get(word).as_str(),
|
||||
@ -230,7 +230,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
merge_cbo_roaring_bitmaps,
|
||||
)
|
||||
}
|
||||
None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
|
||||
None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
prefix,
|
||||
self.word_interner.get(prefix).as_str(),
|
||||
@ -244,7 +244,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
&mut self,
|
||||
prefix: Interned<String>,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
prefix,
|
||||
self.word_interner.get(prefix).as_str(),
|
||||
@ -297,35 +297,47 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
prefix2: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(proximity, word1, prefix2),
|
||||
&(
|
||||
proximity,
|
||||
self.word_interner.get(word1).as_str(),
|
||||
self.word_interner.get(prefix2).as_str(),
|
||||
),
|
||||
&mut self.db_cache.word_prefix_pair_proximity_docids,
|
||||
self.index.word_prefix_pair_proximity_docids.remap_data_type::<ByteSlice>(),
|
||||
)
|
||||
let docids = match self
|
||||
.db_cache
|
||||
.word_prefix_pair_proximity_docids
|
||||
.entry((proximity, word1, prefix2))
|
||||
{
|
||||
Entry::Occupied(docids) => docids.get().clone(),
|
||||
Entry::Vacant(entry) => {
|
||||
// compute docids using prefix iter and store the result in the cache.
|
||||
let key = U8StrStrCodec::bytes_encode(&(
|
||||
proximity,
|
||||
self.word_interner.get(word1).as_str(),
|
||||
self.word_interner.get(prefix2).as_str(),
|
||||
))
|
||||
.unwrap()
|
||||
.into_owned();
|
||||
let mut prefix_docids = RoaringBitmap::new();
|
||||
let remap_key_type = self
|
||||
.index
|
||||
.word_pair_proximity_docids
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.prefix_iter(self.txn, &key)?;
|
||||
for result in remap_key_type {
|
||||
let (_, docids) = result?;
|
||||
|
||||
prefix_docids |= docids;
|
||||
}
|
||||
entry.insert(Some(prefix_docids.clone()));
|
||||
Some(prefix_docids)
|
||||
}
|
||||
};
|
||||
Ok(docids)
|
||||
}
|
||||
|
||||
pub fn get_db_prefix_word_pair_proximity_docids(
|
||||
&mut self,
|
||||
left_prefix: Interned<String>,
|
||||
right: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(proximity, left_prefix, right),
|
||||
&(
|
||||
proximity,
|
||||
self.word_interner.get(left_prefix).as_str(),
|
||||
self.word_interner.get(right).as_str(),
|
||||
),
|
||||
&mut self.db_cache.prefix_word_pair_proximity_docids,
|
||||
self.index.prefix_word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
|
||||
)
|
||||
// only accept exact matches on reverted positions
|
||||
self.get_db_word_pair_proximity_docids(left_prefix, right, proximity)
|
||||
}
|
||||
|
||||
pub fn get_db_word_fid_docids(
|
||||
|
@ -371,7 +371,7 @@ fn test_proximity_prefix_db() {
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("best s");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 6, 7, 8, 11, 15]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 9, 6, 7, 8, 11, 12, 13, 15]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
@ -379,13 +379,13 @@ fn test_proximity_prefix_db() {
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"this is the best summer meal\"",
|
||||
"\"summer best\"",
|
||||
"\"this is the best meal of summer\"",
|
||||
"\"summer x best\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful summer day\"",
|
||||
"\"this is the best cooked meal of the summer\"",
|
||||
"\"this is the best meal of the summer\"",
|
||||
"\"summer x y best\"",
|
||||
"\"summer x best\"",
|
||||
"\"summer best\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
]
|
||||
"###);
|
||||
@ -423,17 +423,17 @@ fn test_proximity_prefix_db() {
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("best win");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[15, 16, 17, 18, 19, 20, 21, 22]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 18, 15, 16, 17, 20, 21, 22]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"this is the best winter meal\"",
|
||||
"\"this is the best meal of winter\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
"\"this is the best cooked meal of the winter\"",
|
||||
"\"this is the best meal of the winter\"",
|
||||
"\"this is the best meal of winter\"",
|
||||
"\"this is the best winter meal\"",
|
||||
"\"winter x y best\"",
|
||||
"\"winter x best\"",
|
||||
"\"winter best\"",
|
||||
@ -471,20 +471,20 @@ fn test_proximity_prefix_db() {
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.query("best wi");
|
||||
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 15, 16, 17, 20]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 18, 15, 16, 17, 20, 21, 22]");
|
||||
insta::assert_snapshot!(format!("{document_scores:#?}"));
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"this is the best winter meal\"",
|
||||
"\"winter best\"",
|
||||
"\"this is the best meal of winter\"",
|
||||
"\"winter x best\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
"\"this is the best cooked meal of the winter\"",
|
||||
"\"this is the best meal of the winter\"",
|
||||
"\"winter x y best\"",
|
||||
"\"winter x best\"",
|
||||
"\"winter best\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
@ -11,14 +11,6 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
@ -30,7 +22,15 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 2,
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
|
@ -11,14 +11,6 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 3,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
@ -30,7 +22,15 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 2,
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
|
@ -6,7 +6,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
rank: 4,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
@ -14,7 +14,7 @@ expression: "format!(\"{document_scores:#?}\")"
|
||||
[
|
||||
Proximity(
|
||||
Rank {
|
||||
rank: 1,
|
||||
rank: 2,
|
||||
max_rank: 4,
|
||||
},
|
||||
),
|
||||
|
@ -13,6 +13,7 @@ This module tests the `sort` ranking rule:
|
||||
|
||||
use big_s::S;
|
||||
use maplit::hashset;
|
||||
use meili_snap::insta;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
|
Reference in New Issue
Block a user