update cellulite to the latest version

This commit is contained in:
Tamo
2025-09-17 10:46:27 +02:00
parent 75b64003f4
commit f9183eb517
13 changed files with 34 additions and 37 deletions

View File

@ -191,6 +191,7 @@ impl<'a> FilterCondition<'a> {
FilterCondition::VectorExists { .. }
| FilterCondition::GeoLowerThan { .. }
| FilterCondition::GeoBoundingBox { .. }
| FilterCondition::GeoPolygon { .. }
| FilterCondition::In { .. } => None,
}
}

View File

@ -45,6 +45,7 @@ impl From<DateField> for Code {
}
}
#[allow(clippy::large_enum_variant)]
#[derive(Error, Debug)]
pub enum Error {
#[error("{1}")]

View File

@ -1,3 +1,5 @@
#![allow(clippy::result_large_err)]
pub mod batch_view;
pub mod batches;
pub mod compression;

View File

@ -1,3 +1,5 @@
#![allow(clippy::result_large_err)]
use std::fs::{read_dir, read_to_string, remove_file, File};
use std::io::{BufWriter, Write as _};
use std::path::PathBuf;

View File

@ -48,6 +48,7 @@ pub enum PrimaryKey<'a> {
Nested { name: &'a str },
}
#[allow(clippy::large_enum_variant)]
pub enum DocumentIdExtractionError {
InvalidDocumentId(UserError),
MissingDocumentId,

View File

@ -21,6 +21,7 @@ pub fn is_reserved_keyword(keyword: &str) -> bool {
.contains(&keyword)
}
#[allow(clippy::large_enum_variant)]
#[derive(Error, Debug)]
pub enum Error {
#[error("internal: {0}.")]
@ -115,6 +116,7 @@ pub enum FieldIdMapMissingEntry {
FieldName { field_name: String, process: &'static str },
}
#[allow(clippy::large_enum_variant)]
#[derive(Error, Debug)]
pub enum UserError {
#[error(transparent)]

View File

@ -244,7 +244,7 @@ impl Index {
let embedder_category_id =
env.create_database(&mut wtxn, Some(VECTOR_EMBEDDER_CATEGORY_ID))?;
let vector_store = env.create_database(&mut wtxn, Some(VECTOR_STORE))?;
let cellulite = cellulite::Cellulite::create_from_env(&env, &mut wtxn)?;
let cellulite = cellulite::Cellulite::create_from_env(&env, &mut wtxn, "cellulite")?;
let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?;
@ -1972,13 +1972,17 @@ impl Index {
// Cellulite
const _CELLULITE_DB_CHECK: () = {
if Cellulite::nb_dbs() != 3 {
if Cellulite::nb_dbs() != 4 {
panic!("Cellulite database count has changed, please update the code accordingly.")
}
};
sizes.insert("cellulite_item", self.cellulite.item_db_stats(rtxn).map(compute_size)?);
sizes.insert("cellulite_cell", self.cellulite.cell_db_stats(rtxn).map(compute_size)?);
sizes.insert("cellulite_update", self.cellulite.update_db_stats(rtxn).map(compute_size)?);
sizes.insert(
"cellulite_metadata",
self.cellulite.metadata_db_stats(rtxn).map(compute_size)?,
);
Ok(sizes)
}

View File

@ -278,30 +278,6 @@ impl<U: Send + Sync + 'static> Step for VariableNameStep<U> {
}
}
// Integration with steppe
impl steppe::Progress for Progress {
fn update(&self, sub_progress: impl steppe::Step) {
self.update_progress(Compat(sub_progress));
}
}
struct Compat<T: steppe::Step>(T);
impl<T: steppe::Step> Step for Compat<T> {
fn name(&self) -> Cow<'static, str> {
self.0.name()
}
fn current(&self) -> u32 {
self.0.current().try_into().unwrap_or(u32::MAX)
}
fn total(&self) -> u32 {
self.0.total().try_into().unwrap_or(u32::MAX)
}
}
impl Step for arroy::MainStep {
fn name(&self) -> Cow<'static, str> {
match self {

View File

@ -872,10 +872,10 @@ impl<'a> Filter<'a> {
let result = index
.cellulite
.in_shape(rtxn, &polygon, &mut |_| ())
.map_err(InternalError::CelluliteError)?;
.in_shape(rtxn, &polygon)
.map_err(InternalError::CelluliteError)?; // TODO: error code in invalid
r2 = Some(RoaringBitmap::from_iter(result)); // TODO: Remove once we update roaring
r2 = Some(RoaringBitmap::from_iter(result)); // TODO: Remove once we update roaring in meilisearch
}
match (r1, r2) {
@ -924,10 +924,10 @@ impl<'a> Filter<'a> {
let polygon = geo_types::Polygon::new(geo_types::LineString(coords), Vec::new());
let result = index
.cellulite
.in_shape(rtxn, &polygon, &mut |_| ())
.map_err(InternalError::CelluliteError)?;
.in_shape(rtxn, &polygon)
.map_err(InternalError::CelluliteError)?; // TODO: update error code
let result = roaring::RoaringBitmap::from_iter(result); // TODO: Remove once we update roaring
let result = roaring::RoaringBitmap::from_iter(result); // TODO: Remove once we update roaring in meilisearch
Ok(result)
}

View File

@ -230,6 +230,7 @@ pub fn request_threads() -> &'static ThreadPoolNoAbort {
/// Extract chunked data and send it into lmdb_writer_sx sender:
/// - documents
#[allow(clippy::too_many_arguments)]
fn send_original_documents_data(
original_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
indexer: GrenadParameters,

View File

@ -523,7 +523,7 @@ where
.is_some_and(|conf| conf.is_quantized);
let is_quantizing = embedder_config.is_some_and(|action| action.is_being_quantized);
pool.install(|| {
pool.install(|| -> Result<_> {
let mut writer =
VectorStore::new(backend, vector_store, embedder_index, was_quantized);
writer.build_and_quantize(
@ -541,7 +541,7 @@ where
.map_err(InternalError::from)??;
}
self.index.cellulite.build(self.wtxn, &Progress::default())?;
self.index.cellulite.build(self.wtxn, &self.should_abort, &Progress::default())?;
self.execute_prefix_databases(
word_docids.map(MergerBuilder::build),

View File

@ -189,7 +189,11 @@ where
println!("Building geojson");
indexing_context.progress.update_progress(IndexingStep::BuildingGeoJson);
index.cellulite.build(wtxn, indexing_context.progress)?;
index.cellulite.build(
wtxn,
&indexing_context.must_stop_processing,
indexing_context.progress,
)?;
indexing_context.progress.update_progress(IndexingStep::Finalizing);
@ -322,7 +326,11 @@ where
.unwrap()?;
indexing_context.progress.update_progress(IndexingStep::BuildingGeoJson);
index.cellulite.build(wtxn, indexing_context.progress)?;
index.cellulite.build(
wtxn,
&indexing_context.must_stop_processing,
indexing_context.progress,
)?;
indexing_context.progress.update_progress(IndexingStep::Finalizing);

View File

@ -17,7 +17,6 @@ use crate::vector::db::IndexEmbeddingConfig;
use crate::vector::settings::EmbedderAction;
use crate::vector::{Embedder, Embeddings, RuntimeEmbedders, VectorStore};
use crate::{DocumentId, Error, Index, InternalError, Result, UserError};
use crate::{Error, Index, InternalError, Result, UserError};
pub fn write_to_db(
mut writer_receiver: WriterBbqueueReceiver<'_>,