Merge branch 'main' into release-v1.14.0-tmp

This commit is contained in:
Tamo
2025-04-14 12:35:47 +02:00
committed by GitHub
113 changed files with 1268 additions and 852 deletions

View File

@ -121,7 +121,7 @@ impl<'extractor> BalancedCaches<'extractor> {
}
pub fn insert_del_u32(&mut self, key: &[u8], n: u32) -> Result<()> {
if self.max_memory.map_or(false, |mm| self.alloc.allocated_bytes() >= mm) {
if self.max_memory.is_some_and(|mm| self.alloc.allocated_bytes() >= mm) {
self.start_spilling()?;
}
@ -138,7 +138,7 @@ impl<'extractor> BalancedCaches<'extractor> {
}
pub fn insert_add_u32(&mut self, key: &[u8], n: u32) -> Result<()> {
if self.max_memory.map_or(false, |mm| self.alloc.allocated_bytes() >= mm) {
if self.max_memory.is_some_and(|mm| self.alloc.allocated_bytes() >= mm) {
self.start_spilling()?;
}
@ -623,7 +623,7 @@ pub struct FrozenDelAddBbbul<'bump, B> {
pub add: Option<FrozenBbbul<'bump, B>>,
}
impl<'bump, B> FrozenDelAddBbbul<'bump, B> {
impl<B> FrozenDelAddBbbul<'_, B> {
fn is_empty(&self) -> bool {
self.del.is_none() && self.add.is_none()
}

View File

@ -31,7 +31,7 @@ pub struct DocumentExtractorData {
pub field_distribution_delta: HashMap<String, i64>,
}
impl<'a, 'b, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a, 'b> {
impl<'extractor> Extractor<'extractor> for DocumentsExtractor<'_, '_> {
type Data = FullySend<RefCell<DocumentExtractorData>>;
fn init_data(&self, _extractor_alloc: &'extractor Bump) -> Result<Self::Data> {

View File

@ -37,7 +37,7 @@ pub struct FacetedExtractorData<'a, 'b> {
is_geo_enabled: bool,
}
impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b> {
impl<'extractor> Extractor<'extractor> for FacetedExtractorData<'_, '_> {
type Data = RefCell<BalancedCaches<'extractor>>;
fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {

View File

@ -92,7 +92,7 @@ pub struct FrozenGeoExtractorData<'extractor> {
pub spilled_inserted: Option<BufReader<File>>,
}
impl<'extractor> FrozenGeoExtractorData<'extractor> {
impl FrozenGeoExtractorData<'_> {
pub fn iter_and_clear_removed(
&mut self,
) -> io::Result<impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_> {
@ -160,7 +160,7 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor {
for change in changes {
if data_ref.spilled_removed.is_none()
&& max_memory.map_or(false, |mm| context.extractor_alloc.allocated_bytes() >= mm)
&& max_memory.is_some_and(|mm| context.extractor_alloc.allocated_bytes() >= mm)
{
// We must spill as we allocated too much memory
data_ref.spilled_removed = tempfile::tempfile().map(BufWriter::new).map(Some)?;
@ -258,9 +258,11 @@ pub fn extract_geo_coordinates(
Value::Null => return Ok(None),
Value::Object(map) => map,
value => {
return Err(
GeoError::NotAnObject { document_id: Value::from(external_id), value }.into()
)
return Err(Box::new(GeoError::NotAnObject {
document_id: Value::from(external_id),
value,
})
.into())
}
};
@ -269,23 +271,29 @@ pub fn extract_geo_coordinates(
if geo.is_empty() {
[lat, lng]
} else {
return Err(GeoError::UnexpectedExtraFields {
return Err(Box::new(GeoError::UnexpectedExtraFields {
document_id: Value::from(external_id),
value: Value::from(geo),
}
})
.into());
}
}
(Some(_), None) => {
return Err(GeoError::MissingLongitude { document_id: Value::from(external_id) }.into())
return Err(Box::new(GeoError::MissingLongitude {
document_id: Value::from(external_id),
})
.into())
}
(None, Some(_)) => {
return Err(GeoError::MissingLatitude { document_id: Value::from(external_id) }.into())
return Err(Box::new(GeoError::MissingLatitude {
document_id: Value::from(external_id),
})
.into())
}
(None, None) => {
return Err(GeoError::MissingLatitudeAndLongitude {
return Err(Box::new(GeoError::MissingLatitudeAndLongitude {
document_id: Value::from(external_id),
}
})
.into())
}
};
@ -293,16 +301,18 @@ pub fn extract_geo_coordinates(
match (extract_finite_float_from_value(lat), extract_finite_float_from_value(lng)) {
(Ok(lat), Ok(lng)) => Ok(Some([lat, lng])),
(Ok(_), Err(value)) => {
Err(GeoError::BadLongitude { document_id: Value::from(external_id), value }.into())
Err(Box::new(GeoError::BadLongitude { document_id: Value::from(external_id), value })
.into())
}
(Err(value), Ok(_)) => {
Err(GeoError::BadLatitude { document_id: Value::from(external_id), value }.into())
Err(Box::new(GeoError::BadLatitude { document_id: Value::from(external_id), value })
.into())
}
(Err(lat), Err(lng)) => Err(GeoError::BadLatitudeAndLongitude {
(Err(lat), Err(lng)) => Err(Box::new(GeoError::BadLatitudeAndLongitude {
document_id: Value::from(external_id),
lat,
lng,
}
})
.into()),
}
}

View File

@ -31,7 +31,7 @@ pub struct WordDocidsBalancedCaches<'extractor> {
current_docid: Option<DocumentId>,
}
unsafe impl<'extractor> MostlySend for WordDocidsBalancedCaches<'extractor> {}
unsafe impl MostlySend for WordDocidsBalancedCaches<'_> {}
impl<'extractor> WordDocidsBalancedCaches<'extractor> {
pub fn new_in(buckets: usize, max_memory: Option<usize>, alloc: &'extractor Bump) -> Self {
@ -78,7 +78,7 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
buffer.extend_from_slice(&position.to_be_bytes());
self.word_position_docids.insert_add_u32(&buffer, docid)?;
if self.current_docid.map_or(false, |id| docid != id) {
if self.current_docid.is_some_and(|id| docid != id) {
self.flush_fid_word_count(&mut buffer)?;
}
@ -123,7 +123,7 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
buffer.extend_from_slice(&position.to_be_bytes());
self.word_position_docids.insert_del_u32(&buffer, docid)?;
if self.current_docid.map_or(false, |id| docid != id) {
if self.current_docid.is_some_and(|id| docid != id) {
self.flush_fid_word_count(&mut buffer)?;
}
@ -212,7 +212,7 @@ pub struct WordDocidsExtractorData<'a> {
searchable_attributes: Option<Vec<&'a str>>,
}
impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> {
impl<'extractor> Extractor<'extractor> for WordDocidsExtractorData<'_> {
type Data = RefCell<Option<WordDocidsBalancedCaches<'extractor>>>;
fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {

View File

@ -25,7 +25,7 @@ pub struct WordPairProximityDocidsExtractorData<'a> {
buckets: usize,
}
impl<'a, 'extractor> Extractor<'extractor> for WordPairProximityDocidsExtractorData<'a> {
impl<'extractor> Extractor<'extractor> for WordPairProximityDocidsExtractorData<'_> {
type Data = RefCell<BalancedCaches<'extractor>>;
fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
@ -270,7 +270,7 @@ fn process_document_tokens<'doc>(
// drain the proximity window until the head word is considered close to the word we are inserting.
while word_positions
.front()
.map_or(false, |(_w, p)| index_proximity(*p as u32, pos as u32) >= MAX_DISTANCE)
.is_some_and(|(_w, p)| index_proximity(*p as u32, pos as u32) >= MAX_DISTANCE)
{
word_positions_into_word_pair_proximity(word_positions, word_pair_proximity);
}

View File

@ -22,7 +22,7 @@ pub struct DocumentTokenizer<'a> {
pub max_positions_per_attributes: u32,
}
impl<'a> DocumentTokenizer<'a> {
impl DocumentTokenizer<'_> {
pub fn tokenize_document<'doc>(
&self,
document: impl Document<'doc>,

View File

@ -43,7 +43,7 @@ pub struct EmbeddingExtractorData<'extractor>(
unsafe impl MostlySend for EmbeddingExtractorData<'_> {}
impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
type Data = RefCell<EmbeddingExtractorData<'extractor>>;
fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> crate::Result<Self::Data> {