continue previous commit

This commit is contained in:
Tamo
2025-09-18 16:38:55 +02:00
parent d639155fe5
commit 35e4e38961
4 changed files with 52 additions and 13 deletions

View File

@ -10,14 +10,22 @@ use rhai::EvalAltResult;
use serde_json::Value; use serde_json::Value;
use thiserror::Error; use thiserror::Error;
use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::constants::{RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME};
use crate::documents::{self, DocumentsBatchCursorError}; use crate::documents::{self, DocumentsBatchCursorError};
use crate::thread_pool_no_abort::PanicCatched; use crate::thread_pool_no_abort::PanicCatched;
use crate::vector::settings::EmbeddingSettings; use crate::vector::settings::EmbeddingSettings;
use crate::{CriterionError, DocumentId, FieldId, Object, SortError}; use crate::{CriterionError, DocumentId, FieldId, Object, SortError};
pub fn is_reserved_keyword(keyword: &str) -> bool { pub fn is_reserved_keyword(keyword: &str) -> bool {
[RESERVED_GEO_FIELD_NAME, "_geoDistance", "_geoPoint", "_geoRadius", "_geoBoundingBox"] [
RESERVED_GEO_FIELD_NAME,
RESERVED_GEOJSON_FIELD_NAME,
"_geoDistance",
"_geoPoint",
"_geoRadius",
"_geoBoundingBox",
"_geoPolygon",
]
.contains(&keyword) .contains(&keyword)
} }

View File

@ -552,7 +552,10 @@ impl<'a, Mapper: FieldIdMapper> Document<'a> for KvDelAddDocument<'a, Mapper> {
Err(error) => return Some(Err(error.into())), Err(error) => return Some(Err(error.into())),
}; };
if name == RESERVED_VECTORS_FIELD_NAME || name == RESERVED_GEO_FIELD_NAME { if name == RESERVED_VECTORS_FIELD_NAME
|| name == RESERVED_GEO_FIELD_NAME
|| name == RESERVED_GEOJSON_FIELD_NAME
{
continue; continue;
} }
@ -585,7 +588,10 @@ impl<'a, Mapper: FieldIdMapper> Document<'a> for KvDelAddDocument<'a, Mapper> {
Err(_) => return Some(()), Err(_) => return Some(()),
}; };
if name == RESERVED_VECTORS_FIELD_NAME || name == RESERVED_GEO_FIELD_NAME { if name == RESERVED_VECTORS_FIELD_NAME
|| name == RESERVED_GEO_FIELD_NAME
|| name == RESERVED_GEOJSON_FIELD_NAME
{
continue; continue;
} }
@ -595,7 +601,10 @@ impl<'a, Mapper: FieldIdMapper> Document<'a> for KvDelAddDocument<'a, Mapper> {
} }
fn top_level_field(&self, k: &str) -> Result<Option<&'a RawValue>> { fn top_level_field(&self, k: &str) -> Result<Option<&'a RawValue>> {
if k == RESERVED_VECTORS_FIELD_NAME || k == RESERVED_GEO_FIELD_NAME { if k == RESERVED_VECTORS_FIELD_NAME
|| k == RESERVED_GEO_FIELD_NAME
|| k == RESERVED_GEOJSON_FIELD_NAME
{
return Ok(None); return Ok(None);
} }
self.get(k) self.get(k)

View File

@ -5,7 +5,7 @@ use bumpalo::Bump;
use hashbrown::HashMap; use hashbrown::HashMap;
use super::DelAddRoaringBitmap; use super::DelAddRoaringBitmap;
use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::constants::{RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME};
use crate::update::new::channel::{DocumentsSender, ExtractorBbqueueSender}; use crate::update::new::channel::{DocumentsSender, ExtractorBbqueueSender};
use crate::update::new::document::{write_to_obkv, Document, DocumentContext, DocumentIdentifiers}; use crate::update::new::document::{write_to_obkv, Document, DocumentContext, DocumentIdentifiers};
use crate::update::new::indexer::document_changes::{Extractor, IndexingContext}; use crate::update::new::indexer::document_changes::{Extractor, IndexingContext};
@ -75,7 +75,11 @@ impl<'extractor> Extractor<'extractor> for DocumentsExtractor<'_, '_> {
.geo_field() .geo_field()
.transpose() .transpose()
.map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv))); .map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter) { let geojson_iter = content
.geojson_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEOJSON_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter).chain(geojson_iter) {
let (f, _) = res?; let (f, _) = res?;
let entry = document_extractor_data let entry = document_extractor_data
.field_distribution_delta .field_distribution_delta
@ -94,7 +98,11 @@ impl<'extractor> Extractor<'extractor> for DocumentsExtractor<'_, '_> {
.geo_field() .geo_field()
.transpose() .transpose()
.map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv))); .map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter) { let geojson_iter = content
.geojson_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEOJSON_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter).chain(geojson_iter) {
let (f, _) = res?; let (f, _) = res?;
let entry = document_extractor_data let entry = document_extractor_data
.field_distribution_delta .field_distribution_delta
@ -108,7 +116,11 @@ impl<'extractor> Extractor<'extractor> for DocumentsExtractor<'_, '_> {
.geo_field() .geo_field()
.transpose() .transpose()
.map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv))); .map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter) { let geojson_iter = content
.geojson_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEOJSON_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter).chain(geojson_iter) {
let (f, _) = res?; let (f, _) = res?;
let entry = document_extractor_data let entry = document_extractor_data
.field_distribution_delta .field_distribution_delta
@ -143,7 +155,11 @@ impl<'extractor> Extractor<'extractor> for DocumentsExtractor<'_, '_> {
.geo_field() .geo_field()
.transpose() .transpose()
.map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv))); .map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter) { let geojson_iter = content
.geojson_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEOJSON_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter).chain(geojson_iter) {
let (f, _) = res?; let (f, _) = res?;
let entry = document_extractor_data let entry = document_extractor_data
.field_distribution_delta .field_distribution_delta

View File

@ -1,6 +1,7 @@
use heed::RwTxn; use heed::RwTxn;
use super::document::{Document, DocumentFromDb}; use super::document::{Document, DocumentFromDb};
use crate::constants::{RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME};
use crate::progress::{self, AtomicSubStep, Progress}; use crate::progress::{self, AtomicSubStep, Progress};
use crate::{FieldDistribution, Index, Result}; use crate::{FieldDistribution, Index, Result};
@ -22,8 +23,13 @@ pub fn field_distribution(index: &Index, wtxn: &mut RwTxn<'_>, progress: &Progre
let Some(document) = DocumentFromDb::new(docid, wtxn, index, &field_id_map)? else { let Some(document) = DocumentFromDb::new(docid, wtxn, index, &field_id_map)? else {
continue; continue;
}; };
let geo_iter = document.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv))); let geo_iter =
for res in document.iter_top_level_fields().chain(geo_iter) { document.geo_field().transpose().map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
let geojson_iter = document
.geojson_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEOJSON_FIELD_NAME, rv)));
for res in document.iter_top_level_fields().chain(geo_iter).chain(geojson_iter) {
let (field_name, _) = res?; let (field_name, _) = res?;
if let Some(count) = distribution.get_mut(field_name) { if let Some(count) = distribution.get_mut(field_name) {
*count += 1; *count += 1;