From f9183eb5177aac26e89d9b5a7430e59ca55aa6b0 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 17 Sep 2025 10:46:27 +0200 Subject: [PATCH] update cellulite to the latest version --- crates/filter-parser/src/lib.rs | 1 + crates/index-scheduler/src/error.rs | 1 + crates/meilisearch-types/src/lib.rs | 2 ++ crates/meilitool/src/main.rs | 2 ++ crates/milli/src/documents/primary_key.rs | 1 + crates/milli/src/error.rs | 2 ++ crates/milli/src/index.rs | 8 +++++-- crates/milli/src/progress.rs | 24 ------------------- crates/milli/src/search/facet/filter.rs | 12 +++++----- .../src/update/index_documents/extract/mod.rs | 1 + .../milli/src/update/index_documents/mod.rs | 4 ++-- crates/milli/src/update/new/indexer/mod.rs | 12 ++++++++-- crates/milli/src/update/new/indexer/write.rs | 1 - 13 files changed, 34 insertions(+), 37 deletions(-) diff --git a/crates/filter-parser/src/lib.rs b/crates/filter-parser/src/lib.rs index c2cc8b991..e9b2b090b 100644 --- a/crates/filter-parser/src/lib.rs +++ b/crates/filter-parser/src/lib.rs @@ -191,6 +191,7 @@ impl<'a> FilterCondition<'a> { FilterCondition::VectorExists { .. } | FilterCondition::GeoLowerThan { .. } | FilterCondition::GeoBoundingBox { .. } + | FilterCondition::GeoPolygon { .. } | FilterCondition::In { .. } => None, } } diff --git a/crates/index-scheduler/src/error.rs b/crates/index-scheduler/src/error.rs index e67219808..332b7e040 100644 --- a/crates/index-scheduler/src/error.rs +++ b/crates/index-scheduler/src/error.rs @@ -45,6 +45,7 @@ impl From for Code { } } +#[allow(clippy::large_enum_variant)] #[derive(Error, Debug)] pub enum Error { #[error("{1}")] diff --git a/crates/meilisearch-types/src/lib.rs b/crates/meilisearch-types/src/lib.rs index 6c013f3bb..31fc1e215 100644 --- a/crates/meilisearch-types/src/lib.rs +++ b/crates/meilisearch-types/src/lib.rs @@ -1,3 +1,5 @@ +#![allow(clippy::result_large_err)] + pub mod batch_view; pub mod batches; pub mod compression; diff --git a/crates/meilitool/src/main.rs b/crates/meilitool/src/main.rs index 831bcf209..e4f23a7c4 100644 --- a/crates/meilitool/src/main.rs +++ b/crates/meilitool/src/main.rs @@ -1,3 +1,5 @@ +#![allow(clippy::result_large_err)] + use std::fs::{read_dir, read_to_string, remove_file, File}; use std::io::{BufWriter, Write as _}; use std::path::PathBuf; diff --git a/crates/milli/src/documents/primary_key.rs b/crates/milli/src/documents/primary_key.rs index 415453349..7da6a968d 100644 --- a/crates/milli/src/documents/primary_key.rs +++ b/crates/milli/src/documents/primary_key.rs @@ -48,6 +48,7 @@ pub enum PrimaryKey<'a> { Nested { name: &'a str }, } +#[allow(clippy::large_enum_variant)] pub enum DocumentIdExtractionError { InvalidDocumentId(UserError), MissingDocumentId, diff --git a/crates/milli/src/error.rs b/crates/milli/src/error.rs index 4d4346a98..be4f98922 100644 --- a/crates/milli/src/error.rs +++ b/crates/milli/src/error.rs @@ -21,6 +21,7 @@ pub fn is_reserved_keyword(keyword: &str) -> bool { .contains(&keyword) } +#[allow(clippy::large_enum_variant)] #[derive(Error, Debug)] pub enum Error { #[error("internal: {0}.")] @@ -115,6 +116,7 @@ pub enum FieldIdMapMissingEntry { FieldName { field_name: String, process: &'static str }, } +#[allow(clippy::large_enum_variant)] #[derive(Error, Debug)] pub enum UserError { #[error(transparent)] diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index ce72076e6..6b59bab47 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -244,7 +244,7 @@ impl Index { let embedder_category_id = env.create_database(&mut wtxn, Some(VECTOR_EMBEDDER_CATEGORY_ID))?; let vector_store = env.create_database(&mut wtxn, Some(VECTOR_STORE))?; - let cellulite = cellulite::Cellulite::create_from_env(&env, &mut wtxn)?; + let cellulite = cellulite::Cellulite::create_from_env(&env, &mut wtxn, "cellulite")?; let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?; @@ -1972,13 +1972,17 @@ impl Index { // Cellulite const _CELLULITE_DB_CHECK: () = { - if Cellulite::nb_dbs() != 3 { + if Cellulite::nb_dbs() != 4 { panic!("Cellulite database count has changed, please update the code accordingly.") } }; sizes.insert("cellulite_item", self.cellulite.item_db_stats(rtxn).map(compute_size)?); sizes.insert("cellulite_cell", self.cellulite.cell_db_stats(rtxn).map(compute_size)?); sizes.insert("cellulite_update", self.cellulite.update_db_stats(rtxn).map(compute_size)?); + sizes.insert( + "cellulite_metadata", + self.cellulite.metadata_db_stats(rtxn).map(compute_size)?, + ); Ok(sizes) } diff --git a/crates/milli/src/progress.rs b/crates/milli/src/progress.rs index b7f7ae887..1aa34bf95 100644 --- a/crates/milli/src/progress.rs +++ b/crates/milli/src/progress.rs @@ -278,30 +278,6 @@ impl Step for VariableNameStep { } } -// Integration with steppe - -impl steppe::Progress for Progress { - fn update(&self, sub_progress: impl steppe::Step) { - self.update_progress(Compat(sub_progress)); - } -} - -struct Compat(T); - -impl Step for Compat { - fn name(&self) -> Cow<'static, str> { - self.0.name() - } - - fn current(&self) -> u32 { - self.0.current().try_into().unwrap_or(u32::MAX) - } - - fn total(&self) -> u32 { - self.0.total().try_into().unwrap_or(u32::MAX) - } -} - impl Step for arroy::MainStep { fn name(&self) -> Cow<'static, str> { match self { diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs index 1d6265b0a..a99ee92da 100644 --- a/crates/milli/src/search/facet/filter.rs +++ b/crates/milli/src/search/facet/filter.rs @@ -872,10 +872,10 @@ impl<'a> Filter<'a> { let result = index .cellulite - .in_shape(rtxn, &polygon, &mut |_| ()) - .map_err(InternalError::CelluliteError)?; + .in_shape(rtxn, &polygon) + .map_err(InternalError::CelluliteError)?; // TODO: error code in invalid - r2 = Some(RoaringBitmap::from_iter(result)); // TODO: Remove once we update roaring + r2 = Some(RoaringBitmap::from_iter(result)); // TODO: Remove once we update roaring in meilisearch } match (r1, r2) { @@ -924,10 +924,10 @@ impl<'a> Filter<'a> { let polygon = geo_types::Polygon::new(geo_types::LineString(coords), Vec::new()); let result = index .cellulite - .in_shape(rtxn, &polygon, &mut |_| ()) - .map_err(InternalError::CelluliteError)?; + .in_shape(rtxn, &polygon) + .map_err(InternalError::CelluliteError)?; // TODO: update error code - let result = roaring::RoaringBitmap::from_iter(result); // TODO: Remove once we update roaring + let result = roaring::RoaringBitmap::from_iter(result); // TODO: Remove once we update roaring in meilisearch Ok(result) } diff --git a/crates/milli/src/update/index_documents/extract/mod.rs b/crates/milli/src/update/index_documents/extract/mod.rs index 1ff9f4b95..886801696 100644 --- a/crates/milli/src/update/index_documents/extract/mod.rs +++ b/crates/milli/src/update/index_documents/extract/mod.rs @@ -230,6 +230,7 @@ pub fn request_threads() -> &'static ThreadPoolNoAbort { /// Extract chunked data and send it into lmdb_writer_sx sender: /// - documents +#[allow(clippy::too_many_arguments)] fn send_original_documents_data( original_documents_chunk: Result>>, indexer: GrenadParameters, diff --git a/crates/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs index ed9c50108..857afdee5 100644 --- a/crates/milli/src/update/index_documents/mod.rs +++ b/crates/milli/src/update/index_documents/mod.rs @@ -523,7 +523,7 @@ where .is_some_and(|conf| conf.is_quantized); let is_quantizing = embedder_config.is_some_and(|action| action.is_being_quantized); - pool.install(|| { + pool.install(|| -> Result<_> { let mut writer = VectorStore::new(backend, vector_store, embedder_index, was_quantized); writer.build_and_quantize( @@ -541,7 +541,7 @@ where .map_err(InternalError::from)??; } - self.index.cellulite.build(self.wtxn, &Progress::default())?; + self.index.cellulite.build(self.wtxn, &self.should_abort, &Progress::default())?; self.execute_prefix_databases( word_docids.map(MergerBuilder::build), diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs index cd7bd50bb..792ae2c0d 100644 --- a/crates/milli/src/update/new/indexer/mod.rs +++ b/crates/milli/src/update/new/indexer/mod.rs @@ -189,7 +189,11 @@ where println!("Building geojson"); indexing_context.progress.update_progress(IndexingStep::BuildingGeoJson); - index.cellulite.build(wtxn, indexing_context.progress)?; + index.cellulite.build( + wtxn, + &indexing_context.must_stop_processing, + indexing_context.progress, + )?; indexing_context.progress.update_progress(IndexingStep::Finalizing); @@ -322,7 +326,11 @@ where .unwrap()?; indexing_context.progress.update_progress(IndexingStep::BuildingGeoJson); - index.cellulite.build(wtxn, indexing_context.progress)?; + index.cellulite.build( + wtxn, + &indexing_context.must_stop_processing, + indexing_context.progress, + )?; indexing_context.progress.update_progress(IndexingStep::Finalizing); diff --git a/crates/milli/src/update/new/indexer/write.rs b/crates/milli/src/update/new/indexer/write.rs index 5b1c74f7f..5fbbbbc76 100644 --- a/crates/milli/src/update/new/indexer/write.rs +++ b/crates/milli/src/update/new/indexer/write.rs @@ -17,7 +17,6 @@ use crate::vector::db::IndexEmbeddingConfig; use crate::vector::settings::EmbedderAction; use crate::vector::{Embedder, Embeddings, RuntimeEmbedders, VectorStore}; use crate::{DocumentId, Error, Index, InternalError, Result, UserError}; -use crate::{Error, Index, InternalError, Result, UserError}; pub fn write_to_db( mut writer_receiver: WriterBbqueueReceiver<'_>,