Cellulite is almost in the new indexer. We must add the documentID to the geojson pipeline

This commit is contained in:
Tamo
2025-07-15 23:48:14 +02:00
parent b00a1dcc00
commit a921ee31ce
6 changed files with 99 additions and 1 deletions

View File

@ -16,8 +16,10 @@ use super::settings_changes::settings_change_extract;
use crate::documents::{FieldIdMapper, PrimaryKey};
use crate::progress::{EmbedderStats, MergingWordCache};
use crate::proximity::ProximityPrecision;
use crate::update::new::extract::cellulite::GeoJsonExtractor;
use crate::update::new::extract::EmbeddingExtractor;
use crate::update::new::indexer::settings_changes::DocumentsIndentifiers;
use crate::update::new::merger::merge_and_send_cellulite;
use crate::update::new::merger::merge_and_send_rtree;
use crate::update::new::{merge_and_send_docids, merge_and_send_facet_docids, FacetDatabases};
use crate::update::settings::SettingsDelta;
@ -317,6 +319,37 @@ where
&indexing_context.must_stop_processing,
)?;
}
'cellulite: {
let Some(extractor) = GeoJsonExtractor::new(&rtxn, index, *indexing_context.grenad_parameters)?
else {
break 'cellulite;
};
let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
{
let span = tracing::trace_span!(target: "indexing::documents::extract", "cellulite");
let _entered = span.enter();
extract(
document_changes,
&extractor,
indexing_context,
extractor_allocs,
&datastore,
IndexingStep::WritingGeoJson,
)?;
}
merge_and_send_cellulite(
datastore,
&rtxn,
index,
extractor_sender.geojson(),
&indexing_context.must_stop_processing,
)?;
}
indexing_context.progress.update_progress(IndexingStep::WaitingForDatabaseWrites);
finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);

View File

@ -72,6 +72,10 @@ pub fn write_to_db(
let embedding = large_vector.read_embedding(*dimensions);
writer.add_item_in_store(wtxn, docid, extractor_id, embedding)?;
}
ReceiverAction::GeoJson(geojson) => {
let cellulite = cellulite::Writer::new(index.cellulite);
cellulite.add_item(wtxn, doc_id, &geojson)?;
}
}
// Every time the is a message in the channel we search