mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-28 01:01:00 +00:00
Refactor Document indexing process (Facets)
**Changes:** The Documents changes now take a selector closure instead of a list of field to match the field to extract. The seek_leaf_values_in_object function now uses a selector closure of a list of field to match the field to extract The facet database extraction is now relying on the FilterableAttributesRule to match the field to extract. The facet-search database extraction is now relying on the FieldIdMapWithMetadata to select the field to index. The facet level database extraction is now relying on the FieldIdMapWithMetadata to select the field to index. **Important:** Because the filterable attributes are patterns now, the fieldIdMap will only register the fields that exists in at least one document. if a field doesn't exist in any document, it will not be registered even if it has been specified in the filterable fields. **Impact:** - Document Addition/modification facet indexing - Document deletion facet indexing
This commit is contained in:
@ -5,7 +5,6 @@ mod geo;
|
||||
mod searchable;
|
||||
mod vectors;
|
||||
|
||||
use bumpalo::Bump;
|
||||
pub use cache::{
|
||||
merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap,
|
||||
};
|
||||
@ -15,27 +14,11 @@ pub use geo::*;
|
||||
pub use searchable::*;
|
||||
pub use vectors::EmbeddingExtractor;
|
||||
|
||||
use super::indexer::document_changes::{DocumentChanges, IndexingContext};
|
||||
use super::steps::IndexingStep;
|
||||
use super::thread_local::{FullySend, ThreadLocal};
|
||||
use crate::Result;
|
||||
|
||||
pub trait DocidsExtractor {
|
||||
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
|
||||
document_changes: &DC,
|
||||
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
||||
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||
step: IndexingStep,
|
||||
) -> Result<Vec<BalancedCaches<'extractor>>>
|
||||
where
|
||||
MSP: Fn() -> bool + Sync;
|
||||
}
|
||||
|
||||
/// TODO move in permissive json pointer
|
||||
pub mod perm_json_p {
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
use crate::Result;
|
||||
use crate::{attribute_patterns::PatternMatch, Result};
|
||||
const SPLIT_SYMBOL: char = '.';
|
||||
|
||||
/// Returns `true` if the `selector` match the `key`.
|
||||
@ -68,11 +51,9 @@ pub mod perm_json_p {
|
||||
|
||||
pub fn seek_leaf_values_in_object(
|
||||
value: &Map<String, Value>,
|
||||
selectors: Option<&[&str]>,
|
||||
skip_selectors: &[&str],
|
||||
base_key: &str,
|
||||
base_depth: Depth,
|
||||
seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<()>,
|
||||
seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<PatternMatch>,
|
||||
) -> Result<()> {
|
||||
if value.is_empty() {
|
||||
seeker(base_key, base_depth, &Value::Object(Map::with_capacity(0)))?;
|
||||
@ -85,40 +66,16 @@ pub mod perm_json_p {
|
||||
format!("{}{}{}", base_key, SPLIT_SYMBOL, key)
|
||||
};
|
||||
|
||||
// here if the user only specified `doggo` we need to iterate in all the fields of `doggo`
|
||||
// so we check the contained_in on both side
|
||||
let selection = select_field(&base_key, selectors, skip_selectors);
|
||||
if selection != Selection::Skip {
|
||||
let selection = seeker(&base_key, Depth::OnBaseKey, value)?;
|
||||
if selection != PatternMatch::NoMatch {
|
||||
match value {
|
||||
Value::Object(object) => {
|
||||
if selection == Selection::Select {
|
||||
seeker(&base_key, Depth::OnBaseKey, value)?;
|
||||
}
|
||||
|
||||
seek_leaf_values_in_object(
|
||||
object,
|
||||
selectors,
|
||||
skip_selectors,
|
||||
&base_key,
|
||||
Depth::OnBaseKey,
|
||||
seeker,
|
||||
)
|
||||
seek_leaf_values_in_object(object, &base_key, Depth::OnBaseKey, seeker)
|
||||
}
|
||||
Value::Array(array) => {
|
||||
if selection == Selection::Select {
|
||||
seeker(&base_key, Depth::OnBaseKey, value)?;
|
||||
}
|
||||
|
||||
seek_leaf_values_in_array(
|
||||
array,
|
||||
selectors,
|
||||
skip_selectors,
|
||||
&base_key,
|
||||
Depth::OnBaseKey,
|
||||
seeker,
|
||||
)
|
||||
seek_leaf_values_in_array(array, &base_key, Depth::OnBaseKey, seeker)
|
||||
}
|
||||
value => seeker(&base_key, Depth::OnBaseKey, value),
|
||||
_ => Ok(()),
|
||||
}?;
|
||||
}
|
||||
}
|
||||
@ -128,11 +85,9 @@ pub mod perm_json_p {
|
||||
|
||||
pub fn seek_leaf_values_in_array(
|
||||
values: &[Value],
|
||||
selectors: Option<&[&str]>,
|
||||
skip_selectors: &[&str],
|
||||
base_key: &str,
|
||||
base_depth: Depth,
|
||||
seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<()>,
|
||||
seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<PatternMatch>,
|
||||
) -> Result<()> {
|
||||
if values.is_empty() {
|
||||
seeker(base_key, base_depth, &Value::Array(vec![]))?;
|
||||
@ -140,61 +95,16 @@ pub mod perm_json_p {
|
||||
|
||||
for value in values {
|
||||
match value {
|
||||
Value::Object(object) => seek_leaf_values_in_object(
|
||||
object,
|
||||
selectors,
|
||||
skip_selectors,
|
||||
base_key,
|
||||
Depth::InsideArray,
|
||||
seeker,
|
||||
),
|
||||
Value::Array(array) => seek_leaf_values_in_array(
|
||||
array,
|
||||
selectors,
|
||||
skip_selectors,
|
||||
base_key,
|
||||
Depth::InsideArray,
|
||||
seeker,
|
||||
),
|
||||
value => seeker(base_key, Depth::InsideArray, value),
|
||||
Value::Object(object) => {
|
||||
seek_leaf_values_in_object(object, base_key, Depth::InsideArray, seeker)
|
||||
}
|
||||
Value::Array(array) => {
|
||||
seek_leaf_values_in_array(array, base_key, Depth::InsideArray, seeker)
|
||||
}
|
||||
value => seeker(base_key, Depth::InsideArray, value).map(|_| ()),
|
||||
}?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn select_field(
|
||||
field_name: &str,
|
||||
selectors: Option<&[&str]>,
|
||||
skip_selectors: &[&str],
|
||||
) -> Selection {
|
||||
if skip_selectors.iter().any(|skip_selector| {
|
||||
contained_in(skip_selector, field_name) || contained_in(field_name, skip_selector)
|
||||
}) {
|
||||
Selection::Skip
|
||||
} else if let Some(selectors) = selectors {
|
||||
let mut selection = Selection::Skip;
|
||||
for selector in selectors {
|
||||
if contained_in(field_name, selector) {
|
||||
selection = Selection::Select;
|
||||
break;
|
||||
} else if contained_in(selector, field_name) {
|
||||
selection = Selection::Parent;
|
||||
}
|
||||
}
|
||||
selection
|
||||
} else {
|
||||
Selection::Select
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Selection {
|
||||
/// The field is a parent of the of a nested field that must be selected
|
||||
Parent,
|
||||
/// The field must be selected
|
||||
Select,
|
||||
/// The field must be skipped
|
||||
Skip,
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user