Add tracing to milli

This commit is contained in:
Louis Dureuil
2024-01-23 09:42:48 +01:00
parent 02e6c8a440
commit 5d7061682e
24 changed files with 150 additions and 29 deletions

View File

@ -21,7 +21,7 @@ pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), (RoaringBitmap, R
///
/// Returns the generated internal documents ids and a grenad reader
/// with the list of extracted words from the given chunk of documents.
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters,

View File

@ -16,7 +16,7 @@ use crate::Result;
///
/// Returns a grenad reader with the list of extracted facet numbers and
/// documents ids from the given chunk of docid facet number positions.
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
fid_docid_facet_number: grenad::Reader<R>,
indexer: GrenadParameters,

View File

@ -15,7 +15,7 @@ use crate::{FieldId, Result};
///
/// Returns a grenad reader with the list of extracted facet strings and
/// documents ids from the given chunk of docid facet string positions.
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
docid_fid_facet_string: grenad::Reader<R>,
indexer: GrenadParameters,

View File

@ -39,7 +39,7 @@ pub struct ExtractedFacetValues {
/// Returns the generated grenad reader containing the docid the fid and the orginal value as key
/// and the normalized value as value extracted from the given chunk of documents.
/// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially.
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters,

View File

@ -19,7 +19,7 @@ const MAX_COUNTED_WORDS: usize = 30;
///
/// Returns a grenad reader with the list of extracted field id word counts
/// and documents ids from the given chunk of docid word positions.
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters,

View File

@ -13,7 +13,7 @@ use crate::{FieldId, InternalError, Result};
/// Extracts the geographical coordinates contained in each document under the `_geo` field.
///
/// Returns the generated grenad reader containing the docid as key associated to the (latitude, longitude)
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_geo_points<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters,

View File

@ -67,7 +67,7 @@ impl VectorStateDelta {
/// Extracts the embedding vector contained in each document under the `_vectors` field.
///
/// Returns the generated grenad reader containing the docid as key associated to the Vec<f32>
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_vector_points<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters,

View File

@ -23,7 +23,7 @@ use crate::{DocumentId, FieldId, Result};
///
/// The first returned reader is the one for normal word_docids, and the second one is for
/// exact_word_docids
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_word_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters,
@ -135,6 +135,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
))
}
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
fn words_into_sorter(
document_id: DocumentId,
fid: FieldId,

View File

@ -19,7 +19,7 @@ use crate::{DocumentId, Result};
///
/// Returns a grenad reader with the list of extracted word pairs proximities and
/// documents ids from the given chunk of docid word positions.
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters,
@ -59,6 +59,10 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
if current_document_id.map_or(false, |id| id != document_id) {
puffin::profile_scope!("Document into sorter");
// FIXME: span inside of a hot loop might degrade performance and create big reports
let span = tracing::trace_span!(target: "indexing::details", "document_into_sorter");
let _entered = span.enter();
document_word_positions_into_sorter(
current_document_id.unwrap(),
&del_word_pair_proximity,
@ -138,6 +142,10 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
if let Some(document_id) = current_document_id {
puffin::profile_scope!("Final document into sorter");
// FIXME: span inside of a hot loop might degrade performance and create big reports
let span = tracing::trace_span!(target: "indexing::details", "final_document_into_sorter");
let _entered = span.enter();
document_word_positions_into_sorter(
document_id,
&del_word_pair_proximity,
@ -147,6 +155,10 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
}
{
puffin::profile_scope!("sorter_into_reader");
// FIXME: span inside of a hot loop might degrade performance and create big reports
let span = tracing::trace_span!(target: "indexing::details", "sorter_into_reader");
let _entered = span.enter();
let mut writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,

View File

@ -18,7 +18,7 @@ use crate::{bucketed_position, DocumentId, Result};
///
/// Returns a grenad reader with the list of extracted words at positions and
/// documents ids from the given chunk of docid word positions.
#[logging_timer::time]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_word_position_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters,
@ -94,6 +94,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
Ok(word_position_docids_reader)
}
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
fn words_position_into_sorter(
document_id: DocumentId,
key_buffer: &mut Vec<u8>,

View File

@ -41,6 +41,7 @@ use crate::{FieldId, FieldsIdsMap, Result};
/// Extract data for each databases from obkv documents in parallel.
/// Send data in grenad file over provided Sender.
#[allow(clippy::too_many_arguments)]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub(crate) fn data_from_obkv_documents(
original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
@ -257,12 +258,20 @@ fn spawn_extraction_task<FE, FS, M>(
M: MergeableReader + FromParallelIterator<M::Output> + Send + 'static,
M::Output: Send,
{
let current_span = tracing::Span::current();
rayon::spawn(move || {
let child_span = tracing::trace_span!(target: "indexing::details", parent: &current_span, "extract_multiple_chunks");
let _entered = child_span.enter();
puffin::profile_scope!("extract_multiple_chunks", name);
let chunks: Result<M> =
chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer)).collect();
let current_span = tracing::Span::current();
rayon::spawn(move || match chunks {
Ok(chunks) => {
let child_span = tracing::trace_span!(target: "indexing::details", parent: &current_span, "merge_multiple_chunks");
let _entered = child_span.enter();
debug!("merge {} database", name);
puffin::profile_scope!("merge_multiple_chunks", name);
let reader = chunks.merge(merge_fn, &indexer);