continue previous commit

This commit is contained in:
Tamo
2025-09-18 16:38:55 +02:00
parent d639155fe5
commit 35e4e38961
4 changed files with 52 additions and 13 deletions

View File

@ -10,15 +10,23 @@ use rhai::EvalAltResult;
use serde_json::Value;
use thiserror::Error;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::constants::{RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME};
use crate::documents::{self, DocumentsBatchCursorError};
use crate::thread_pool_no_abort::PanicCatched;
use crate::vector::settings::EmbeddingSettings;
use crate::{CriterionError, DocumentId, FieldId, Object, SortError};
pub fn is_reserved_keyword(keyword: &str) -> bool {
[RESERVED_GEO_FIELD_NAME, "_geoDistance", "_geoPoint", "_geoRadius", "_geoBoundingBox"]
.contains(&keyword)
[
RESERVED_GEO_FIELD_NAME,
RESERVED_GEOJSON_FIELD_NAME,
"_geoDistance",
"_geoPoint",
"_geoRadius",
"_geoBoundingBox",
"_geoPolygon",
]
.contains(&keyword)
}
#[allow(clippy::large_enum_variant)]

View File

@ -552,7 +552,10 @@ impl<'a, Mapper: FieldIdMapper> Document<'a> for KvDelAddDocument<'a, Mapper> {
Err(error) => return Some(Err(error.into())),
};
if name == RESERVED_VECTORS_FIELD_NAME || name == RESERVED_GEO_FIELD_NAME {
if name == RESERVED_VECTORS_FIELD_NAME
|| name == RESERVED_GEO_FIELD_NAME
|| name == RESERVED_GEOJSON_FIELD_NAME
{
continue;
}
@ -585,7 +588,10 @@ impl<'a, Mapper: FieldIdMapper> Document<'a> for KvDelAddDocument<'a, Mapper> {
Err(_) => return Some(()),
};
if name == RESERVED_VECTORS_FIELD_NAME || name == RESERVED_GEO_FIELD_NAME {
if name == RESERVED_VECTORS_FIELD_NAME
|| name == RESERVED_GEO_FIELD_NAME
|| name == RESERVED_GEOJSON_FIELD_NAME
{
continue;
}
@ -595,7 +601,10 @@ impl<'a, Mapper: FieldIdMapper> Document<'a> for KvDelAddDocument<'a, Mapper> {
}
fn top_level_field(&self, k: &str) -> Result<Option<&'a RawValue>> {
if k == RESERVED_VECTORS_FIELD_NAME || k == RESERVED_GEO_FIELD_NAME {
if k == RESERVED_VECTORS_FIELD_NAME
|| k == RESERVED_GEO_FIELD_NAME
|| k == RESERVED_GEOJSON_FIELD_NAME
{
return Ok(None);
}
self.get(k)

View File

@ -5,7 +5,7 @@ use bumpalo::Bump;
use hashbrown::HashMap;
use super::DelAddRoaringBitmap;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::constants::{RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME};
use crate::update::new::channel::{DocumentsSender, ExtractorBbqueueSender};
use crate::update::new::document::{write_to_obkv, Document, DocumentContext, DocumentIdentifiers};
use crate::update::new::indexer::document_changes::{Extractor, IndexingContext};
@ -75,7 +75,11 @@ impl<'extractor> Extractor<'extractor> for DocumentsExtractor<'_, '_> {
.geo_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter) {
let geojson_iter = content
.geojson_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEOJSON_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter).chain(geojson_iter) {
let (f, _) = res?;
let entry = document_extractor_data
.field_distribution_delta
@ -94,7 +98,11 @@ impl<'extractor> Extractor<'extractor> for DocumentsExtractor<'_, '_> {
.geo_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter) {
let geojson_iter = content
.geojson_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEOJSON_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter).chain(geojson_iter) {
let (f, _) = res?;
let entry = document_extractor_data
.field_distribution_delta
@ -108,7 +116,11 @@ impl<'extractor> Extractor<'extractor> for DocumentsExtractor<'_, '_> {
.geo_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter) {
let geojson_iter = content
.geojson_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEOJSON_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter).chain(geojson_iter) {
let (f, _) = res?;
let entry = document_extractor_data
.field_distribution_delta
@ -143,7 +155,11 @@ impl<'extractor> Extractor<'extractor> for DocumentsExtractor<'_, '_> {
.geo_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter) {
let geojson_iter = content
.geojson_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEOJSON_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter).chain(geojson_iter) {
let (f, _) = res?;
let entry = document_extractor_data
.field_distribution_delta

View File

@ -1,6 +1,7 @@
use heed::RwTxn;
use super::document::{Document, DocumentFromDb};
use crate::constants::{RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME};
use crate::progress::{self, AtomicSubStep, Progress};
use crate::{FieldDistribution, Index, Result};
@ -22,8 +23,13 @@ pub fn field_distribution(index: &Index, wtxn: &mut RwTxn<'_>, progress: &Progre
let Some(document) = DocumentFromDb::new(docid, wtxn, index, &field_id_map)? else {
continue;
};
let geo_iter = document.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv)));
for res in document.iter_top_level_fields().chain(geo_iter) {
let geo_iter =
document.geo_field().transpose().map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
let geojson_iter = document
.geojson_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEOJSON_FIELD_NAME, rv)));
for res in document.iter_top_level_fields().chain(geo_iter).chain(geojson_iter) {
let (field_name, _) = res?;
if let Some(count) = distribution.get_mut(field_name) {
*count += 1;