Merge #5351
Some checks failed
Run the indexing fuzzer / Setup the action (push) Failing after 2m50s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Look for flaky tests / flaky (push) Failing after 19s
SDKs tests / define-docker-image (push) Failing after 5s
SDKs tests / .NET SDK tests (push) Has been skipped
SDKs tests / Dart SDK tests (push) Has been skipped
SDKs tests / Go SDK tests (push) Has been skipped
SDKs tests / Java SDK tests (push) Has been skipped
SDKs tests / JS SDK tests (push) Has been skipped
SDKs tests / PHP SDK tests (push) Has been skipped
SDKs tests / Python SDK tests (push) Has been skipped
SDKs tests / Ruby SDK tests (push) Has been skipped
SDKs tests / Rust SDK tests (push) Has been skipped
SDKs tests / Swift SDK tests (push) Has been skipped
SDKs tests / meilisearch-js-plugins tests (push) Has been skipped
SDKs tests / meilisearch-rails tests (push) Has been skipped
SDKs tests / meilisearch-symfony tests (push) Has been skipped
Publish binaries to GitHub release / Check the version validity (push) Successful in 9s
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 2s
Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 12s
Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been cancelled
Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
Test suite / Test with Ollama (push) Failing after 7s
Test suite / Test disabled tokenization (push) Failing after 11s
Test suite / Run tests in debug (push) Failing after 11s
Test suite / Run Clippy (push) Failing after 17s
Test suite / Run Rustfmt (push) Successful in 1m51s
Test suite / Tests almost all features (push) Failing after 7m7s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled

5351: Bring back v1.13.0 changes into main r=irevoire a=Kerollmops

This PR brings back the changes made in v1.13 into the main branch.

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Clémentine <clementine@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
meili-bors[bot]
2025-02-18 08:05:02 +00:00
committed by GitHub
101 changed files with 8351 additions and 1518 deletions

View File

@ -234,7 +234,7 @@ where
);
let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
{
let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
let span = tracing::debug_span!(target: "indexing::documents::extract", "vectors");
let _entered = span.enter();
extract(
@ -247,7 +247,7 @@ where
)?;
}
{
let span = tracing::trace_span!(target: "indexing::documents::merge", "vectors");
let span = tracing::debug_span!(target: "indexing::documents::merge", "vectors");
let _entered = span.enter();
for config in &mut index_embeddings {

View File

@ -1,5 +1,5 @@
use std::sync::atomic::AtomicBool;
use std::sync::RwLock;
use std::sync::{Once, RwLock};
use std::thread::{self, Builder};
use big_s::S;
@ -33,6 +33,8 @@ mod post_processing;
mod update_by_function;
mod write;
static LOG_MEMORY_METRICS_ONCE: Once = Once::new();
/// This is the main function of this crate.
///
/// Give it the output of the [`Indexer::document_changes`] method and it will execute it in the [`rayon::ThreadPool`].
@ -93,6 +95,15 @@ where
},
);
LOG_MEMORY_METRICS_ONCE.call_once(|| {
tracing::debug!(
"Indexation allocated memory metrics - \
Total BBQueue size: {total_bbbuffer_capacity}, \
Total extractor memory: {:?}",
grenad_parameters.max_memory,
);
});
let (extractor_sender, writer_receiver) = pool
.install(|| extractor_writer_bbqueue(&mut bbbuffers, total_bbbuffer_capacity, 1000))
.unwrap();
@ -179,13 +190,16 @@ where
indexing_context.progress.update_progress(IndexingStep::WritingEmbeddingsToDatabase);
build_vectors(
index,
wtxn,
index_embeddings,
&mut arroy_writers,
&indexing_context.must_stop_processing,
)?;
pool.install(|| {
build_vectors(
index,
wtxn,
index_embeddings,
&mut arroy_writers,
&indexing_context.must_stop_processing,
)
})
.unwrap()?;
post_processing::post_process(
indexing_context,

View File

@ -72,11 +72,23 @@ pub(super) fn write_to_db(
&mut aligned_embedding,
)?;
}
write_from_bbqueue(&mut writer_receiver, index, wtxn, arroy_writers, &mut aligned_embedding)?;
let direct_attempts = writer_receiver.sent_messages_attempts();
let blocking_attempts = writer_receiver.blocking_sent_messages_attempts();
let congestion_pct = (blocking_attempts as f64 / direct_attempts as f64) * 100.0;
tracing::debug!(
"Channel congestion metrics - \
Attempts: {direct_attempts}, \
Blocked attempts: {blocking_attempts} \
({congestion_pct:.1}% congestion)"
);
Ok(())
}
#[tracing::instrument(level = "trace", skip_all, target = "indexing::vectors")]
#[tracing::instrument(level = "debug", skip_all, target = "indexing::vectors")]
pub(super) fn build_vectors<MSP>(
index: &Index,
wtxn: &mut RwTxn<'_>,