mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-06 04:36:32 +00:00
Merge branch 'main' into retrieve-query-vectors
This commit is contained in:
@ -1,3 +1,4 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::BTreeSet;
|
||||
use std::fmt::{Debug, Display};
|
||||
use std::ops::Bound::{self, Excluded, Included, Unbounded};
|
||||
@ -14,10 +15,9 @@ use super::facet_range_search;
|
||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
use crate::error::{Error, UserError};
|
||||
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::index::db_name::FACET_ID_STRING_DOCIDS;
|
||||
use crate::search::facet::facet_range_search::find_docids_of_facet_within_bounds;
|
||||
use crate::{
|
||||
distance_between_two_points, lat_lng_to_xyz, FieldId, FieldsIdsMap,
|
||||
FilterableAttributesFeatures, FilterableAttributesRule, Index, InternalError, Result,
|
||||
@ -416,20 +416,56 @@ impl<'a> Filter<'a> {
|
||||
return Ok(docids);
|
||||
}
|
||||
Condition::StartsWith { keyword: _, word } => {
|
||||
// The idea here is that "STARTS WITH baba" is the same as "baba <= value < babb".
|
||||
// We just incremented the last letter to find the upper bound.
|
||||
// The upper bound may not be valid utf8, but lmdb doesn't care as it works over bytes.
|
||||
|
||||
let value = crate::normalize_facet(word.value());
|
||||
let base = FacetGroupKey { field_id, level: 0, left_bound: value.as_str() };
|
||||
let docids = strings_db
|
||||
.prefix_iter(rtxn, &base)?
|
||||
.map(|result| -> Result<RoaringBitmap> {
|
||||
match result {
|
||||
Ok((_facet_group_key, FacetGroupValue { bitmap, .. })) => Ok(bitmap),
|
||||
Err(_e) => Err(InternalError::from(SerializationError::Decoding {
|
||||
db_name: Some(FACET_ID_STRING_DOCIDS),
|
||||
})
|
||||
.into()),
|
||||
}
|
||||
})
|
||||
.union()?;
|
||||
let mut value2 = value.as_bytes().to_owned();
|
||||
|
||||
let last = match value2.last_mut() {
|
||||
Some(last) => last,
|
||||
None => {
|
||||
// The prefix is empty, so all documents that have the field will match.
|
||||
return index
|
||||
.exists_faceted_documents_ids(rtxn, field_id)
|
||||
.map_err(|e| e.into());
|
||||
}
|
||||
};
|
||||
|
||||
if *last == u8::MAX {
|
||||
// u8::MAX is a forbidden UTF-8 byte, we're guaranteed it cannot be sent through a filter to meilisearch, but just in case, we're going to return something
|
||||
tracing::warn!(
|
||||
"Found non utf-8 character in filter. That shouldn't be possible"
|
||||
);
|
||||
return Ok(RoaringBitmap::new());
|
||||
}
|
||||
*last += 1;
|
||||
|
||||
// This is very similar to `heed::Bytes` but its `EItem` is `&[u8]` instead of `[u8]`
|
||||
struct BytesRef;
|
||||
impl<'a> BytesEncode<'a> for BytesRef {
|
||||
type EItem = &'a [u8];
|
||||
|
||||
fn bytes_encode(
|
||||
item: &'a Self::EItem,
|
||||
) -> std::result::Result<Cow<'a, [u8]>, heed::BoxedError> {
|
||||
Ok(Cow::Borrowed(item))
|
||||
}
|
||||
}
|
||||
|
||||
let mut docids = RoaringBitmap::new();
|
||||
let bytes_db =
|
||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRef>>();
|
||||
find_docids_of_facet_within_bounds::<BytesRef>(
|
||||
rtxn,
|
||||
bytes_db,
|
||||
field_id,
|
||||
&Included(value.as_bytes()),
|
||||
&Excluded(value2.as_slice()),
|
||||
universe,
|
||||
&mut docids,
|
||||
)?;
|
||||
|
||||
return Ok(docids);
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let options = EnvOpenOptions::new();
|
||||
let mut options = options.read_txn_without_tls();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
options.map_size(10 * 1024 * 1024); // 10 MiB
|
||||
let index = Index::new(options, &path, true).unwrap();
|
||||
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
@ -8,6 +8,7 @@ use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3};
|
||||
use v1_13::{V1_13_0_To_V1_13_1, V1_13_1_To_Latest_V1_13};
|
||||
use v1_14::Latest_V1_13_To_Latest_V1_14;
|
||||
use v1_15::Latest_V1_14_To_Latest_V1_15;
|
||||
use v1_16::Latest_V1_16_To_V1_17_0;
|
||||
|
||||
use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
||||
use crate::progress::{Progress, VariableNameStep};
|
||||
@ -34,6 +35,7 @@ const UPGRADE_FUNCTIONS: &[&dyn UpgradeIndex] = &[
|
||||
&Latest_V1_13_To_Latest_V1_14 {},
|
||||
&Latest_V1_14_To_Latest_V1_15 {},
|
||||
&Latest_V1_15_To_V1_16_0 {},
|
||||
&Latest_V1_16_To_V1_17_0 {},
|
||||
// This is the last upgrade function, it will be called when the index is up to date.
|
||||
// any other upgrade function should be added before this one.
|
||||
&ToCurrentNoOp {},
|
||||
@ -62,6 +64,7 @@ const fn start(from: (u32, u32, u32)) -> Option<usize> {
|
||||
// We must handle the current version in the match because in case of a failure some index may have been upgraded but not other.
|
||||
(1, 15, _) => function_index!(6),
|
||||
(1, 16, _) => function_index!(7),
|
||||
(1, 17, _) => function_index!(8),
|
||||
// We deliberately don't add a placeholder with (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) here to force manually
|
||||
// considering dumpless upgrade.
|
||||
(_major, _minor, _patch) => return None,
|
||||
|
@ -46,3 +46,22 @@ impl UpgradeIndex for Latest_V1_15_To_V1_16_0 {
|
||||
(1, 16, 0)
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
pub(super) struct Latest_V1_16_To_V1_17_0();
|
||||
|
||||
impl UpgradeIndex for Latest_V1_16_To_V1_17_0 {
|
||||
fn upgrade(
|
||||
&self,
|
||||
_wtxn: &mut RwTxn,
|
||||
_index: &Index,
|
||||
_original: (u32, u32, u32),
|
||||
_progress: Progress,
|
||||
) -> Result<bool> {
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
fn target_version(&self) -> (u32, u32, u32) {
|
||||
(1, 17, 0)
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user