add cellulite to the index

This commit is contained in:
Tamo
2025-07-15 23:14:06 +02:00
parent 134237d1eb
commit 56ae029fa5
3 changed files with 17 additions and 1 deletions

View File

@ -19,6 +19,7 @@ bstr = "1.12.0"
bytemuck = { version = "1.23.1", features = ["extern_crate_alloc"] } bytemuck = { version = "1.23.1", features = ["extern_crate_alloc"] }
byteorder = "1.5.0" byteorder = "1.5.0"
charabia = { version = "0.9.7", default-features = false } charabia = { version = "0.9.7", default-features = false }
cellulite = { git = "https://github.com/irevoire/cellulite", branch = "main"}
concat-arrays = "0.1.2" concat-arrays = "0.1.2"
convert_case = "0.8.0" convert_case = "0.8.0"
crossbeam-channel = "0.5.15" crossbeam-channel = "0.5.15"
@ -27,6 +28,7 @@ either = { version = "1.15.0", features = ["serde"] }
flatten-serde-json = { path = "../flatten-serde-json" } flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7" fst = "0.4.7"
fxhash = "0.2.1" fxhash = "0.2.1"
geojson = "0.24.2"
geoutils = "0.5.1" geoutils = "0.5.1"
grenad = { version = "0.5.0", default-features = false, features = [ grenad = { version = "0.5.0", default-features = false, features = [
"rayon", "rayon",

View File

@ -115,6 +115,7 @@ pub mod db_name {
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings"; pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
pub const VECTOR_EMBEDDER_CATEGORY_ID: &str = "vector-embedder-category-id"; pub const VECTOR_EMBEDDER_CATEGORY_ID: &str = "vector-embedder-category-id";
pub const VECTOR_STORE: &str = "vector-arroy"; pub const VECTOR_STORE: &str = "vector-arroy";
pub const CELLULITE: &str = "cellulite";
pub const DOCUMENTS: &str = "documents"; pub const DOCUMENTS: &str = "documents";
} }
const NUMBER_OF_DBS: u32 = 25; const NUMBER_OF_DBS: u32 = 25;
@ -183,6 +184,9 @@ pub struct Index {
/// Vector store based on hannoy™. /// Vector store based on hannoy™.
pub vector_store: hannoy::Database<Unspecified>, pub vector_store: hannoy::Database<Unspecified>,
/// Geo store based on cellulite™.
pub cellulite: cellulite::Database,
/// Maps the document id to the document as an obkv store. /// Maps the document id to the document as an obkv store.
pub(crate) documents: Database<BEU32, ObkvCodec>, pub(crate) documents: Database<BEU32, ObkvCodec>,
} }
@ -239,6 +243,7 @@ impl Index {
let embedder_category_id = let embedder_category_id =
env.create_database(&mut wtxn, Some(VECTOR_EMBEDDER_CATEGORY_ID))?; env.create_database(&mut wtxn, Some(VECTOR_EMBEDDER_CATEGORY_ID))?;
let vector_store = env.create_database(&mut wtxn, Some(VECTOR_STORE))?; let vector_store = env.create_database(&mut wtxn, Some(VECTOR_STORE))?;
let cellulite = env.create_database(&mut wtxn, Some(CELLULITE))?;
let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?; let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?;
@ -267,6 +272,7 @@ impl Index {
field_id_docid_facet_strings, field_id_docid_facet_strings,
vector_store, vector_store,
embedder_category_id, embedder_category_id,
cellulite,
documents, documents,
}; };
if this.get_version(&wtxn)?.is_none() && creation { if this.get_version(&wtxn)?.is_none() && creation {
@ -1052,6 +1058,13 @@ impl Index {
Ok(geo_filter) Ok(geo_filter)
} }
/// Returns true if the geo sorting feature is enabled.
pub fn is_geojson_enabled(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
let geojson_filter =
self.filterable_attributes_rules(rtxn)?.iter().any(|field| field.has_geojson());
Ok(geojson_filter)
}
pub fn asc_desc_fields(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<String>> { pub fn asc_desc_fields(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<String>> {
let asc_desc_fields = self let asc_desc_fields = self
.criteria(rtxn)? .criteria(rtxn)?
@ -1882,6 +1895,7 @@ impl Index {
field_id_docid_facet_strings, field_id_docid_facet_strings,
vector_store, vector_store,
embedder_category_id, embedder_category_id,
cellulite: _,
documents, documents,
} = self; } = self;

View File

@ -87,7 +87,7 @@ pub use self::search::{
}; };
pub use self::update::ChannelCongestion; pub use self::update::ChannelCongestion;
pub type Result<T> = std::result::Result<T, error::Error>; pub type Result<T, E = error::Error> = std::result::Result<T, E>;
pub type Attribute = u32; pub type Attribute = u32;
pub type BEU16 = heed::types::U16<heed::byteorder::BE>; pub type BEU16 = heed::types::U16<heed::byteorder::BE>;