mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-11 07:06:30 +00:00
Merge #4888
4888: bring back v1.10.0 into main r=Kerollmops a=ManyTheFish Co-authored-by: Louis Dureuil <louis@meilisearch.com> Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com> Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
@ -339,10 +339,18 @@ impl ValuesCollection {
|
||||
fn normalize_facet_string(facet_string: &str, locales: Option<&[Language]>) -> String {
|
||||
let options = NormalizerOption { lossy: true, ..Default::default() };
|
||||
let mut detection = StrDetection::new(facet_string, locales);
|
||||
|
||||
// Detect the language of the facet string only if several locales are explicitly provided.
|
||||
let language = match locales {
|
||||
Some(&[language]) => Some(language),
|
||||
Some(multiple_locales) if multiple_locales.len() > 1 => detection.language(),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let token = Token {
|
||||
lemma: std::borrow::Cow::Borrowed(facet_string),
|
||||
script: detection.script(),
|
||||
language: detection.language(),
|
||||
language,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
|
@ -360,6 +360,7 @@ mod test {
|
||||
use super::*;
|
||||
|
||||
#[cfg(feature = "japanese")]
|
||||
#[cfg(not(feature = "chinese-pinyin"))]
|
||||
#[test]
|
||||
fn test_kanji_language_detection() {
|
||||
use crate::index::tests::TempIndex;
|
||||
|
@ -110,18 +110,18 @@ impl<'ctx> DatabaseCache<'ctx> {
|
||||
.map_err(Into::into)
|
||||
}
|
||||
|
||||
fn get_value_from_keys<'v, K1, KC, DC>(
|
||||
fn get_value_from_keys<'v, K1, KC>(
|
||||
txn: &'ctx RoTxn<'_>,
|
||||
cache_key: K1,
|
||||
db_keys: &'v [KC::EItem],
|
||||
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
|
||||
db: Database<KC, Bytes>,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
merger: MergeFn,
|
||||
) -> Result<Option<DC::DItem>>
|
||||
) -> Result<Option<RoaringBitmap>>
|
||||
where
|
||||
K1: Copy + Eq + Hash,
|
||||
KC: BytesEncode<'v>,
|
||||
DC: BytesDecodeOwned,
|
||||
KC::EItem: Sized,
|
||||
{
|
||||
if let Entry::Vacant(entry) = cache.entry(cache_key) {
|
||||
@ -146,16 +146,22 @@ impl<'ctx> DatabaseCache<'ctx> {
|
||||
entry.insert(bitmap_ptr);
|
||||
}
|
||||
|
||||
match cache.get(&cache_key).unwrap() {
|
||||
Some(Cow::Borrowed(bytes)) => DC::bytes_decode_owned(bytes)
|
||||
let bitmap_bytes = match cache.get(&cache_key).unwrap() {
|
||||
Some(Cow::Borrowed(bytes)) => bytes,
|
||||
Some(Cow::Owned(bytes)) => bytes.as_slice(),
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
match (bitmap_bytes, universe) {
|
||||
(bytes, Some(universe)) => {
|
||||
CboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)
|
||||
.map(Some)
|
||||
.map_err(Into::into)
|
||||
}
|
||||
(bytes, None) => CboRoaringBitmapCodec::bytes_decode_owned(bytes)
|
||||
.map(Some)
|
||||
.map_err(heed::Error::Decoding)
|
||||
.map_err(Into::into),
|
||||
Some(Cow::Owned(bytes)) => DC::bytes_decode_owned(bytes)
|
||||
.map(Some)
|
||||
.map_err(heed::Error::Decoding)
|
||||
.map_err(Into::into),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -207,12 +213,13 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
let keys: Vec<_> =
|
||||
restricted_fids.tolerant.iter().map(|(fid, _)| (interned, *fid)).collect();
|
||||
|
||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||
DatabaseCache::get_value_from_keys::<_, _>(
|
||||
self.txn,
|
||||
word,
|
||||
&keys[..],
|
||||
&mut self.db_cache.word_docids,
|
||||
self.index.word_fid_docids.remap_data_type::<Bytes>(),
|
||||
universe,
|
||||
merge_cbo_roaring_bitmaps,
|
||||
)
|
||||
}
|
||||
@ -238,12 +245,13 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
let keys: Vec<_> =
|
||||
restricted_fids.exact.iter().map(|(fid, _)| (interned, *fid)).collect();
|
||||
|
||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||
DatabaseCache::get_value_from_keys::<_, _>(
|
||||
self.txn,
|
||||
word,
|
||||
&keys[..],
|
||||
&mut self.db_cache.exact_word_docids,
|
||||
self.index.word_fid_docids.remap_data_type::<Bytes>(),
|
||||
universe,
|
||||
merge_cbo_roaring_bitmaps,
|
||||
)
|
||||
}
|
||||
@ -294,12 +302,13 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
let keys: Vec<_> =
|
||||
restricted_fids.tolerant.iter().map(|(fid, _)| (interned, *fid)).collect();
|
||||
|
||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||
DatabaseCache::get_value_from_keys::<_, _>(
|
||||
self.txn,
|
||||
prefix,
|
||||
&keys[..],
|
||||
&mut self.db_cache.word_prefix_docids,
|
||||
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
|
||||
universe,
|
||||
merge_cbo_roaring_bitmaps,
|
||||
)
|
||||
}
|
||||
@ -325,12 +334,13 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
let keys: Vec<_> =
|
||||
restricted_fids.exact.iter().map(|(fid, _)| (interned, *fid)).collect();
|
||||
|
||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||
DatabaseCache::get_value_from_keys::<_, _>(
|
||||
self.txn,
|
||||
prefix,
|
||||
&keys[..],
|
||||
&mut self.db_cache.exact_word_prefix_docids,
|
||||
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
|
||||
universe,
|
||||
merge_cbo_roaring_bitmaps,
|
||||
)
|
||||
}
|
||||
|
@ -49,6 +49,7 @@ pub use self::geo_sort::Strategy as GeoSortStrategy;
|
||||
use self::graph_based_ranking_rule::Words;
|
||||
use self::interner::Interned;
|
||||
use self::vector_sort::VectorSort;
|
||||
use crate::localized_attributes_rules::LocalizedFieldIds;
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::search::new::distinct::apply_distinct_rule;
|
||||
use crate::vector::Embedder;
|
||||
@ -671,9 +672,44 @@ pub fn execute_search(
|
||||
tokbuilder.words_dict(dictionary);
|
||||
}
|
||||
|
||||
if let Some(locales) = locales {
|
||||
tokbuilder.allow_list(locales);
|
||||
}
|
||||
let db_locales;
|
||||
match locales {
|
||||
Some(locales) => {
|
||||
if !locales.is_empty() {
|
||||
tokbuilder.allow_list(locales);
|
||||
}
|
||||
}
|
||||
None => {
|
||||
// If no locales are specified, we use the locales specified in the localized attributes rules
|
||||
let localized_attributes_rules = ctx.index.localized_attributes_rules(ctx.txn)?;
|
||||
let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
|
||||
let searchable_fields = ctx.index.searchable_fields_ids(ctx.txn)?;
|
||||
|
||||
let localized_fields = match &ctx.restricted_fids {
|
||||
// if AttributeToSearchOn is set, use the restricted list of ids
|
||||
Some(restricted_fids) => {
|
||||
let iter = restricted_fids
|
||||
.exact
|
||||
.iter()
|
||||
.chain(restricted_fids.tolerant.iter())
|
||||
.map(|(fid, _)| *fid);
|
||||
|
||||
LocalizedFieldIds::new(&localized_attributes_rules, &fields_ids_map, iter)
|
||||
}
|
||||
// Otherwise use the full list of ids coming from the index searchable fields
|
||||
None => LocalizedFieldIds::new(
|
||||
&localized_attributes_rules,
|
||||
&fields_ids_map,
|
||||
searchable_fields.into_iter(),
|
||||
),
|
||||
};
|
||||
|
||||
db_locales = localized_fields.all_locales();
|
||||
if !db_locales.is_empty() {
|
||||
tokbuilder.allow_list(&db_locales);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let tokenizer = tokbuilder.build();
|
||||
drop(entered);
|
||||
|
@ -6,6 +6,7 @@ pub mod exactness;
|
||||
pub mod geo_sort;
|
||||
pub mod integration;
|
||||
#[cfg(feature = "all-tokenizations")]
|
||||
#[cfg(not(feature = "chinese-pinyin"))]
|
||||
pub mod language;
|
||||
pub mod ngram_split_words;
|
||||
pub mod proximity;
|
||||
|
Reference in New Issue
Block a user