mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-29 09:39:58 +00:00
Merge branch 'search-refactor-exactness' into search-refactor-tests-doc
This commit is contained in:
101
milli/src/search/new/ranking_rule_graph/exactness/mod.rs
Normal file
101
milli/src/search/new/ranking_rule_graph/exactness/mod.rs
Normal file
@ -0,0 +1,101 @@
|
||||
use heed::BytesDecode;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{ComputedCondition, DeadEndsCache, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::search::new::interner::{DedupInterner, Interned, MappedInterner};
|
||||
use crate::search::new::query_graph::{QueryGraph, QueryNode};
|
||||
use crate::search::new::query_term::{ExactTerm, LocatedQueryTermSubset};
|
||||
use crate::{Result, RoaringBitmapCodec, SearchContext, SearchLogger};
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub enum ExactnessCondition {
|
||||
ExactInAttribute(LocatedQueryTermSubset),
|
||||
Skip(LocatedQueryTermSubset),
|
||||
}
|
||||
|
||||
pub enum ExactnessGraph {}
|
||||
|
||||
fn compute_docids(
|
||||
ctx: &mut SearchContext,
|
||||
dest_node: &LocatedQueryTermSubset,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let exact_term = if let Some(exact_term) = dest_node.term_subset.exact_term(ctx) {
|
||||
exact_term
|
||||
} else {
|
||||
return Ok(Default::default());
|
||||
};
|
||||
let mut candidates = match exact_term {
|
||||
ExactTerm::Phrase(phrase) => ctx.get_phrase_docids(phrase)?.clone(),
|
||||
ExactTerm::Word(word) => {
|
||||
if let Some(word_candidates) = ctx.get_db_word_docids(word)? {
|
||||
RoaringBitmapCodec::bytes_decode(word_candidates).ok_or(heed::Error::Decoding)?
|
||||
} else {
|
||||
return Ok(Default::default());
|
||||
}
|
||||
}
|
||||
};
|
||||
// TODO: synonyms?
|
||||
candidates &= universe;
|
||||
Ok(candidates)
|
||||
}
|
||||
|
||||
impl RankingRuleGraphTrait for ExactnessGraph {
|
||||
type Condition = ExactnessCondition;
|
||||
|
||||
fn resolve_condition(
|
||||
ctx: &mut SearchContext,
|
||||
condition: &Self::Condition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<ComputedCondition> {
|
||||
let (docids, dest_node) = match condition {
|
||||
ExactnessCondition::ExactInAttribute(dest_node) => {
|
||||
(compute_docids(ctx, dest_node, universe)?, dest_node)
|
||||
}
|
||||
ExactnessCondition::Skip(dest_node) => (universe.clone(), dest_node),
|
||||
};
|
||||
Ok(ComputedCondition {
|
||||
docids,
|
||||
universe_len: universe.len(),
|
||||
start_term_subset: None,
|
||||
end_term_subset: dest_node.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
fn build_edges(
|
||||
_ctx: &mut SearchContext,
|
||||
conditions_interner: &mut DedupInterner<Self::Condition>,
|
||||
_source_node: Option<&LocatedQueryTermSubset>,
|
||||
dest_node: &LocatedQueryTermSubset,
|
||||
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
|
||||
let exact_condition = ExactnessCondition::ExactInAttribute(dest_node.clone());
|
||||
let exact_condition = conditions_interner.insert(exact_condition);
|
||||
|
||||
let skip_condition = ExactnessCondition::Skip(dest_node.clone());
|
||||
let skip_condition = conditions_interner.insert(skip_condition);
|
||||
|
||||
Ok(vec![(0, exact_condition), (dest_node.term_ids.len() as u32, skip_condition)])
|
||||
}
|
||||
|
||||
fn log_state(
|
||||
_graph: &RankingRuleGraph<Self>,
|
||||
_paths: &[Vec<Interned<Self::Condition>>],
|
||||
_dead_ends_cache: &DeadEndsCache<Self::Condition>,
|
||||
_niverse: &RoaringBitmap,
|
||||
_costs: &MappedInterner<QueryNode, Vec<u64>>,
|
||||
_cost: u64,
|
||||
_logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
) {
|
||||
}
|
||||
|
||||
fn label_for_condition(
|
||||
_ctx: &mut SearchContext,
|
||||
condition: &Self::Condition,
|
||||
) -> Result<String> {
|
||||
Ok(match condition {
|
||||
ExactnessCondition::ExactInAttribute(_) => "exact",
|
||||
ExactnessCondition::Skip(_) => "skip",
|
||||
}
|
||||
.to_owned())
|
||||
}
|
||||
}
|
@ -10,6 +10,8 @@ mod cheapest_paths;
|
||||
mod condition_docids_cache;
|
||||
mod dead_ends_cache;
|
||||
|
||||
/// Implementation of the `exactness` ranking rule
|
||||
mod exactness;
|
||||
/// Implementation of the `proximity` ranking rule
|
||||
mod proximity;
|
||||
/// Implementation of the `typo` ranking rule
|
||||
@ -20,6 +22,7 @@ use std::hash::Hash;
|
||||
pub use cheapest_paths::PathVisitor;
|
||||
pub use condition_docids_cache::ConditionDocIdsCache;
|
||||
pub use dead_ends_cache::DeadEndsCache;
|
||||
pub use exactness::{ExactnessCondition, ExactnessGraph};
|
||||
pub use proximity::{ProximityCondition, ProximityGraph};
|
||||
use roaring::RoaringBitmap;
|
||||
pub use typo::{TypoCondition, TypoGraph};
|
||||
|
@ -1,14 +1,17 @@
|
||||
#![allow(clippy::too_many_arguments)]
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use heed::BytesDecode;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::ProximityCondition;
|
||||
use crate::search::new::interner::Interned;
|
||||
use crate::search::new::query_term::{Phrase, QueryTermSubset};
|
||||
use crate::search::new::ranking_rule_graph::ComputedCondition;
|
||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
|
||||
use crate::search::new::SearchContext;
|
||||
use crate::{CboRoaringBitmapCodec, Result};
|
||||
use roaring::RoaringBitmap;
|
||||
use std::collections::BTreeSet;
|
||||
use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec};
|
||||
|
||||
pub fn compute_docids(
|
||||
ctx: &mut SearchContext,
|
||||
@ -90,7 +93,8 @@ pub fn compute_docids(
|
||||
continue;
|
||||
}
|
||||
} else if let Some(lw_bytes) = ctx.get_db_word_docids(left_word)? {
|
||||
let left_word_docids = CboRoaringBitmapCodec::deserialize_from(lw_bytes)?;
|
||||
let left_word_docids =
|
||||
RoaringBitmapCodec::bytes_decode(lw_bytes).ok_or(heed::Error::Decoding)?;
|
||||
if universe.is_disjoint(&left_word_docids) {
|
||||
continue;
|
||||
}
|
||||
|
Reference in New Issue
Block a user