Merge branch 'main' into release-v1.14.0-tmp

This commit is contained in:
Tamo
2025-04-14 12:35:47 +02:00
committed by GitHub
113 changed files with 1268 additions and 852 deletions

View File

@ -115,7 +115,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
if let Some(geo_value) = geo_field_id.and_then(|fid| document.get(fid)) {
if let Err(user_error) = validate_geo_from_json(&document_id, geo_value)? {
return Ok(Err(UserError::from(user_error)));
return Ok(Err(UserError::from(Box::new(user_error))));
}
}

View File

@ -160,11 +160,11 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
let del_geo_support = settings_diff
.old
.geo_fields_ids
.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
.is_some_and(|(lat, lng)| field_id == lat || field_id == lng);
let add_geo_support = settings_diff
.new
.geo_fields_ids
.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
.is_some_and(|(lat, lng)| field_id == lat || field_id == lng);
let del_filterable_values =
del_value.map(|value| extract_facet_values(&value, del_geo_support));
let add_filterable_values =

View File

@ -80,22 +80,28 @@ fn extract_lat_lng(
let (lat, lng) = match (lat, lng) {
(Some(lat), Some(lng)) => (lat, lng),
(Some(_), None) => {
return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
return Err(
Box::new(GeoError::MissingLatitude { document_id: document_id() }).into()
)
}
(None, Some(_)) => {
return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
return Err(
Box::new(GeoError::MissingLongitude { document_id: document_id() }).into()
)
}
(None, None) => return Ok(None),
};
let lat = extract_finite_float_from_value(
serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
)
.map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
.map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })
.map_err(Box::new)?;
let lng = extract_finite_float_from_value(
serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
)
.map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
.map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })
.map_err(Box::new)?;
Ok(Some([lat, lng]))
}
None => Ok(None),

View File

@ -69,7 +69,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
let document_id = u32::from_be_bytes(document_id_bytes);
// if we change document, we fill the sorter
if current_document_id.map_or(false, |id| id != document_id) {
if current_document_id.is_some_and(|id| id != document_id) {
// FIXME: span inside of a hot loop might degrade performance and create big reports
let span = tracing::trace_span!(target: "indexing::details", "document_into_sorter");
let _entered = span.enter();
@ -96,7 +96,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
if let Some(deletion) = KvReaderDelAdd::from_slice(value).get(DelAdd::Deletion) {
for (position, word) in KvReaderU16::from_slice(deletion).iter() {
// drain the proximity window until the head word is considered close to the word we are inserting.
while del_word_positions.front().map_or(false, |(_w, p)| {
while del_word_positions.front().is_some_and(|(_w, p)| {
index_proximity(*p as u32, position as u32) >= MAX_DISTANCE
}) {
word_positions_into_word_pair_proximity(
@ -129,7 +129,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
if let Some(addition) = KvReaderDelAdd::from_slice(value).get(DelAdd::Addition) {
for (position, word) in KvReaderU16::from_slice(addition).iter() {
// drain the proximity window until the head word is considered close to the word we are inserting.
while add_word_positions.front().map_or(false, |(_w, p)| {
while add_word_positions.front().is_some_and(|(_w, p)| {
index_proximity(*p as u32, position as u32) >= MAX_DISTANCE
}) {
word_positions_into_word_pair_proximity(

View File

@ -46,7 +46,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
let document_id = DocumentId::from_be_bytes(document_id_bytes);
if current_document_id.map_or(false, |id| document_id != id) {
if current_document_id.is_some_and(|id| document_id != id) {
words_position_into_sorter(
current_document_id.unwrap(),
&mut key_buffer,

View File

@ -281,7 +281,7 @@ fn send_original_documents_data(
};
if !(remove_vectors.is_empty()
&& manual_vectors.is_empty()
&& embeddings.as_ref().map_or(true, |e| e.is_empty()))
&& embeddings.as_ref().is_none_or(|e| e.is_empty()))
{
let _ = lmdb_writer_sx.send(Ok(TypedChunk::VectorPoints {
remove_vectors,

View File

@ -514,12 +514,9 @@ where
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
)?;
let embedder_config = settings_diff.embedding_config_updates.get(&embedder_name);
let was_quantized = settings_diff
.old
.embedding_configs
.get(&embedder_name)
.map_or(false, |conf| conf.2);
let is_quantizing = embedder_config.map_or(false, |action| action.is_being_quantized);
let was_quantized =
settings_diff.old.embedding_configs.get(&embedder_name).is_some_and(|conf| conf.2);
let is_quantizing = embedder_config.is_some_and(|action| action.is_being_quantized);
pool.install(|| {
let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized);

View File

@ -197,7 +197,7 @@ impl<'a, 'i> Transform<'a, 'i> {
// drop_and_reuse is called instead of .clear() to communicate to the compiler that field_buffer
// does not keep references from the cursor between loop iterations
let mut field_buffer_cache = drop_and_reuse(field_buffer);
if self.indexer_settings.log_every_n.map_or(false, |len| documents_count % len == 0) {
if self.indexer_settings.log_every_n.is_some_and(|len| documents_count % len == 0) {
progress_callback(UpdateIndexingStep::RemapDocumentAddition {
documents_seen: documents_count,
});

View File

@ -55,7 +55,7 @@ impl ChunkAccumulator {
match self
.inner
.iter()
.position(|right| right.first().map_or(false, |right| chunk.mergeable_with(right)))
.position(|right| right.first().is_some_and(|right| chunk.mergeable_with(right)))
{
Some(position) => {
let v = self.inner.get_mut(position).unwrap();
@ -664,11 +664,8 @@ pub(crate) fn write_typed_chunk_into_index(
let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
)?;
let binary_quantized = settings_diff
.old
.embedding_configs
.get(&embedder_name)
.map_or(false, |conf| conf.2);
let binary_quantized =
settings_diff.old.embedding_configs.get(&embedder_name).is_some_and(|conf| conf.2);
// FIXME: allow customizing distance
let writer = ArroyWrapper::new(index.vector_arroy, embedder_index, binary_quantized);