Apply review suggestion

This commit is contained in:
Mubelotix
2025-08-05 10:25:14 +02:00
parent 48a5f4db2d
commit fc814b7537

View File

@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::collections::BTreeSet; use std::collections::BTreeSet;
use std::fmt::{Debug, Display}; use std::fmt::{Debug, Display};
use std::ops::Bound::{self, Excluded, Included, Unbounded}; use std::ops::Bound::{self, Excluded, Included, Unbounded};
@ -14,9 +15,7 @@ use super::facet_range_search;
use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::error::{Error, UserError}; use crate::error::{Error, UserError};
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features}; use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
use crate::heed_codec::facet::{ use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
};
use crate::index::db_name::FACET_ID_STRING_DOCIDS; use crate::index::db_name::FACET_ID_STRING_DOCIDS;
use crate::search::facet::facet_range_search::find_docids_of_facet_within_bounds; use crate::search::facet::facet_range_search::find_docids_of_facet_within_bounds;
use crate::{ use crate::{
@ -427,44 +426,51 @@ impl<'a> Filter<'a> {
// It's used as a fallback. // It's used as a fallback.
let value = crate::normalize_facet(word.value()); let value = crate::normalize_facet(word.value());
let mut value2 = value.as_bytes().to_owned(); let mut value2 = value.as_bytes().to_owned();
if let Some(last) = value2.last_mut() {
if *last != 255 {
*last += 1;
if let Ok(value2) = String::from_utf8(value2) {
// The idea here is that "STARTS WITH baba" is the same as "baba <= value < babb".
// We just increase the last letter to find the upper bound.
// The result could be invalid utf8, so it can fallback.
let mut docids = RoaringBitmap::new();
find_docids_of_facet_within_bounds(
rtxn,
strings_db,
field_id,
&Included(&value),
&Excluded(&value2),
universe,
&mut docids,
)?;
return Ok(docids); let last = match value2.last_mut() {
} Some(last) => last,
None => {
// The prefix is empty, so all documents that have the field will match.
return index
.exists_faceted_documents_ids(rtxn, field_id)
.map_err(|e| e.into());
}
};
if *last == 255 {
// The prefix is invalid utf8, so no documents will match anyway
return Ok(RoaringBitmap::new());
}
*last += 1;
// This is very similar to `heed::Bytes` but its `EItem` is `&[u8]` instead of `[u8]`
struct BytesRef;
impl<'a> BytesEncode<'a> for BytesRef {
type EItem = &'a [u8];
fn bytes_encode(
item: &'a Self::EItem,
) -> std::result::Result<Cow<'a, [u8]>, heed::BoxedError> {
Ok(Cow::Borrowed(item))
} }
} }
let base = FacetGroupKey { field_id, level: 0, left_bound: value.as_str() }; // The idea here is that "STARTS WITH baba" is the same as "baba <= value < babb".
let docids = strings_db // We just incremented the last letter to find the upper bound.
.prefix_iter(rtxn, &base)? // The upper bound may not be valid utf8, but lmdb doesn't care as it works over bytes.
.map(|result| -> Result<RoaringBitmap> { let mut docids = RoaringBitmap::new();
match result { let bytes_db =
Ok((_facet_group_key, FacetGroupValue { bitmap, .. })) => Ok(bitmap), index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRef>>();
Err(_e) => Err(InternalError::from(SerializationError::Decoding { find_docids_of_facet_within_bounds::<BytesRef>(
db_name: Some(FACET_ID_STRING_DOCIDS), rtxn,
}) bytes_db,
.into()), field_id,
} &Included(value.as_bytes()),
}) &Excluded(value2.as_slice()),
.union()?; universe,
&mut docids,
)?;
return Ok(docids); return Ok(docids);
} }