mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-15 01:06:24 +00:00
Move crates under a sub folder to clean up the code
This commit is contained in:
92
crates/milli/src/search/new/ranking_rule_graph/build.rs
Normal file
92
crates/milli/src/search/new/ranking_rule_graph/build.rs
Normal file
@ -0,0 +1,92 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::search::new::interner::{DedupInterner, MappedInterner};
|
||||
use crate::search::new::query_graph::{QueryNode, QueryNodeData};
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
use crate::search::new::{QueryGraph, SearchContext};
|
||||
use crate::Result;
|
||||
|
||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
/// Build the ranking rule graph from the given query graph
|
||||
pub fn build(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
query_graph: QueryGraph,
|
||||
cost_of_ignoring_node: MappedInterner<QueryNode, Option<(u32, SmallBitmap<QueryNode>)>>,
|
||||
) -> Result<Self> {
|
||||
let QueryGraph { nodes: graph_nodes, .. } = &query_graph;
|
||||
|
||||
let mut conditions_interner = DedupInterner::default();
|
||||
|
||||
let mut edges_store = DedupInterner::default();
|
||||
let mut edges_of_node = query_graph.nodes.map(|_| HashSet::new());
|
||||
|
||||
for (source_id, source_node) in graph_nodes.iter() {
|
||||
let new_edges = edges_of_node.get_mut(source_id);
|
||||
|
||||
for dest_idx in source_node.successors.iter() {
|
||||
let src_term = match &source_node.data {
|
||||
QueryNodeData::Term(t) => Some(t),
|
||||
QueryNodeData::Start => None,
|
||||
QueryNodeData::Deleted | QueryNodeData::End => panic!(),
|
||||
};
|
||||
let dest_node = graph_nodes.get(dest_idx);
|
||||
let dest_term = match &dest_node.data {
|
||||
QueryNodeData::Term(t) => t,
|
||||
QueryNodeData::End => {
|
||||
let new_edge_id = edges_store.insert(Some(Edge {
|
||||
source_node: source_id,
|
||||
dest_node: dest_idx,
|
||||
cost: 0,
|
||||
condition: None,
|
||||
nodes_to_skip: SmallBitmap::for_interned_values_in(graph_nodes),
|
||||
}));
|
||||
new_edges.insert(new_edge_id);
|
||||
continue;
|
||||
}
|
||||
QueryNodeData::Deleted | QueryNodeData::Start => panic!(),
|
||||
};
|
||||
if let Some((cost_of_ignoring, forbidden_nodes)) =
|
||||
cost_of_ignoring_node.get(dest_idx)
|
||||
{
|
||||
let dest = graph_nodes.get(dest_idx);
|
||||
let dest_size = match &dest.data {
|
||||
QueryNodeData::Term(term) => term.term_ids.len(),
|
||||
_ => panic!(),
|
||||
};
|
||||
let new_edge_id = edges_store.insert(Some(Edge {
|
||||
source_node: source_id,
|
||||
dest_node: dest_idx,
|
||||
cost: *cost_of_ignoring * dest_size as u32,
|
||||
condition: None,
|
||||
nodes_to_skip: forbidden_nodes.clone(),
|
||||
}));
|
||||
new_edges.insert(new_edge_id);
|
||||
}
|
||||
|
||||
let edges = G::build_edges(ctx, &mut conditions_interner, src_term, dest_term)?;
|
||||
if edges.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (cost, condition) in edges {
|
||||
let new_edge_id = edges_store.insert(Some(Edge {
|
||||
source_node: source_id,
|
||||
dest_node: dest_idx,
|
||||
cost,
|
||||
condition: Some(condition),
|
||||
nodes_to_skip: SmallBitmap::for_interned_values_in(graph_nodes),
|
||||
}));
|
||||
new_edges.insert(new_edge_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
let edges_store = edges_store.freeze();
|
||||
let edges_of_node =
|
||||
edges_of_node.map(|edges| SmallBitmap::from_iter(edges.iter().copied(), &edges_store));
|
||||
|
||||
let conditions_interner = conditions_interner.freeze();
|
||||
|
||||
Ok(RankingRuleGraph { query_graph, edges_store, edges_of_node, conditions_interner })
|
||||
}
|
||||
}
|
400
crates/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs
Normal file
400
crates/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs
Normal file
@ -0,0 +1,400 @@
|
||||
/** Implements a "PathVisitor" which finds all paths of a certain cost
|
||||
from the START to END node of a ranking rule graph.
|
||||
|
||||
A path is a list of conditions. A condition is the data associated with
|
||||
an edge, given by the ranking rule. Some edges don't have a condition associated
|
||||
with them, they are "unconditional". These kinds of edges are used to "skip" a node.
|
||||
|
||||
The algorithm uses a depth-first search. It benefits from two main optimisations:
|
||||
- The list of all possible costs to go from any node to the END node is precomputed
|
||||
- The `DeadEndsCache` reduces the number of valid paths drastically, by making some edges
|
||||
untraversable depending on what other edges were selected.
|
||||
|
||||
These two optimisations are meant to avoid traversing edges that wouldn't lead
|
||||
to a valid path. In practically all cases, we avoid the exponential complexity
|
||||
that is inherent to depth-first search in a large ranking rule graph.
|
||||
|
||||
The DeadEndsCache is a sort of prefix tree which associates a list of forbidden
|
||||
conditions to a list of traversed conditions.
|
||||
For example, the DeadEndsCache could say the following:
|
||||
- Immediately, from the start, the conditions `[a,b]` are forbidden
|
||||
- if we take the condition `c`, then the conditions `[e]` are also forbidden
|
||||
- and if after that, we take `f`, then `[h,i]` are also forbidden
|
||||
- etc.
|
||||
- if we take `g`, then `[f]` is also forbidden
|
||||
- etc.
|
||||
- etc.
|
||||
As we traverse the graph, we also traverse the `DeadEndsCache` and keep a list of forbidden
|
||||
conditions in memory. Then, we know to avoid all edges which have a condition that is forbidden.
|
||||
|
||||
When a path is found from START to END, we give it to the `visit` closure.
|
||||
This closure takes a mutable reference to the `DeadEndsCache`. This means that
|
||||
the caller can update this cache. Therefore, we must handle the case where the
|
||||
DeadEndsCache has been updated. This means potentially backtracking up to the point
|
||||
where the traversed conditions are all allowed by the new DeadEndsCache.
|
||||
|
||||
The algorithm also implements the `TermsMatchingStrategy` logic.
|
||||
Some edges are augmented with a list of "nodes_to_skip". Skipping
|
||||
a node means "reaching this node through an unconditional edge". If we have
|
||||
already traversed (ie. not skipped) a node that is in this list, then we know that we
|
||||
can't traverse this edge. Otherwise, we traverse the edge but make sure to skip any
|
||||
future node that was present in the "nodes_to_skip" list.
|
||||
|
||||
The caller can decide to stop the path finding algorithm
|
||||
by returning a `ControlFlow::Break` from the `visit` closure.
|
||||
*/
|
||||
use std::collections::{BTreeSet, VecDeque};
|
||||
use std::iter::FromIterator;
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
use fxhash::FxHashSet;
|
||||
|
||||
use super::{DeadEndsCache, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::search::new::interner::{Interned, MappedInterner};
|
||||
use crate::search::new::query_graph::QueryNode;
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
use crate::Result;
|
||||
|
||||
/// Closure which processes a path found by the `PathVisitor`
|
||||
type VisitFn<'f, G> = &'f mut dyn FnMut(
|
||||
// the path as a list of conditions
|
||||
&[Interned<<G as RankingRuleGraphTrait>::Condition>],
|
||||
&mut RankingRuleGraph<G>,
|
||||
// a mutable reference to the DeadEndsCache, to update it in case the given
|
||||
// path doesn't resolve to any valid document ids
|
||||
&mut DeadEndsCache<<G as RankingRuleGraphTrait>::Condition>,
|
||||
) -> Result<ControlFlow<()>>;
|
||||
|
||||
/// A structure which is kept but not updated during the traversal of the graph.
|
||||
/// It can however be updated by the `visit` closure once a valid path has been found.
|
||||
struct VisitorContext<'a, G: RankingRuleGraphTrait> {
|
||||
graph: &'a mut RankingRuleGraph<G>,
|
||||
all_costs_from_node: &'a MappedInterner<QueryNode, Vec<u64>>,
|
||||
dead_ends_cache: &'a mut DeadEndsCache<G::Condition>,
|
||||
}
|
||||
|
||||
/// The internal state of the traversal algorithm
|
||||
struct VisitorState<G: RankingRuleGraphTrait> {
|
||||
/// Budget from the current node to the end node
|
||||
remaining_cost: u64,
|
||||
/// Previously visited conditions, in order.
|
||||
path: Vec<Interned<G::Condition>>,
|
||||
/// Previously visited conditions, as an efficient and compact set.
|
||||
visited_conditions: SmallBitmap<G::Condition>,
|
||||
/// Previously visited (ie not skipped) nodes, as an efficient and compact set.
|
||||
visited_nodes: SmallBitmap<QueryNode>,
|
||||
/// The conditions that cannot be visited anymore
|
||||
forbidden_conditions: SmallBitmap<G::Condition>,
|
||||
/// The nodes that cannot be visited anymore (they must be skipped)
|
||||
nodes_to_skip: SmallBitmap<QueryNode>,
|
||||
}
|
||||
|
||||
/// See module documentation
|
||||
pub struct PathVisitor<'a, G: RankingRuleGraphTrait> {
|
||||
state: VisitorState<G>,
|
||||
ctx: VisitorContext<'a, G>,
|
||||
}
|
||||
impl<'a, G: RankingRuleGraphTrait> PathVisitor<'a, G> {
|
||||
pub fn new(
|
||||
cost: u64,
|
||||
graph: &'a mut RankingRuleGraph<G>,
|
||||
all_costs_from_node: &'a MappedInterner<QueryNode, Vec<u64>>,
|
||||
dead_ends_cache: &'a mut DeadEndsCache<G::Condition>,
|
||||
) -> Self {
|
||||
Self {
|
||||
state: VisitorState {
|
||||
remaining_cost: cost,
|
||||
path: vec![],
|
||||
visited_conditions: SmallBitmap::for_interned_values_in(&graph.conditions_interner),
|
||||
visited_nodes: SmallBitmap::for_interned_values_in(&graph.query_graph.nodes),
|
||||
forbidden_conditions: SmallBitmap::for_interned_values_in(
|
||||
&graph.conditions_interner,
|
||||
),
|
||||
nodes_to_skip: SmallBitmap::for_interned_values_in(&graph.query_graph.nodes),
|
||||
},
|
||||
ctx: VisitorContext { graph, all_costs_from_node, dead_ends_cache },
|
||||
}
|
||||
}
|
||||
|
||||
/// See module documentation
|
||||
pub fn visit_paths(mut self, visit: VisitFn<'_, G>) -> Result<()> {
|
||||
let _ =
|
||||
self.state.visit_node(self.ctx.graph.query_graph.root_node, visit, &mut self.ctx)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<G: RankingRuleGraphTrait> VisitorState<G> {
|
||||
/// Visits a node: traverse all its valid conditional and unconditional edges.
|
||||
///
|
||||
/// Returns ControlFlow::Break if the path finding algorithm should stop.
|
||||
/// Returns whether a valid path was found from this node otherwise.
|
||||
fn visit_node(
|
||||
&mut self,
|
||||
from_node: Interned<QueryNode>,
|
||||
visit: VisitFn<'_, G>,
|
||||
ctx: &mut VisitorContext<'_, G>,
|
||||
) -> Result<ControlFlow<(), bool>> {
|
||||
// any valid path will be found from this point
|
||||
// if a valid path was found, then we know that the DeadEndsCache may have been updated,
|
||||
// and we will need to do more work to potentially backtrack
|
||||
let mut any_valid = false;
|
||||
|
||||
let edges = ctx.graph.edges_of_node.get(from_node).clone();
|
||||
for edge_idx in edges.iter() {
|
||||
// could be none if the edge was deleted
|
||||
let Some(edge) = ctx.graph.edges_store.get(edge_idx).clone() else { continue };
|
||||
|
||||
if self.remaining_cost < edge.cost as u64 {
|
||||
continue;
|
||||
}
|
||||
self.remaining_cost -= edge.cost as u64;
|
||||
|
||||
let cf = match edge.condition {
|
||||
Some(condition) => self.visit_condition(
|
||||
condition,
|
||||
edge.dest_node,
|
||||
&edge.nodes_to_skip,
|
||||
visit,
|
||||
ctx,
|
||||
)?,
|
||||
None => self.visit_no_condition(edge.dest_node, &edge.nodes_to_skip, visit, ctx)?,
|
||||
};
|
||||
self.remaining_cost += edge.cost as u64;
|
||||
|
||||
let ControlFlow::Continue(next_any_valid) = cf else {
|
||||
return Ok(ControlFlow::Break(()));
|
||||
};
|
||||
any_valid |= next_any_valid;
|
||||
if next_any_valid {
|
||||
// backtrack as much as possible if a valid path was found and the dead_ends_cache
|
||||
// was updated such that the current prefix is now invalid
|
||||
self.forbidden_conditions = ctx
|
||||
.dead_ends_cache
|
||||
.forbidden_conditions_for_all_prefixes_up_to(self.path.iter().copied());
|
||||
if self.visited_conditions.intersects(&self.forbidden_conditions) {
|
||||
return Ok(ControlFlow::Continue(true));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ControlFlow::Continue(any_valid))
|
||||
}
|
||||
|
||||
/// Visits an unconditional edge.
|
||||
///
|
||||
/// Returns ControlFlow::Break if the path finding algorithm should stop.
|
||||
/// Returns whether a valid path was found from this node otherwise.
|
||||
fn visit_no_condition(
|
||||
&mut self,
|
||||
dest_node: Interned<QueryNode>,
|
||||
edge_new_nodes_to_skip: &SmallBitmap<QueryNode>,
|
||||
visit: VisitFn<'_, G>,
|
||||
ctx: &mut VisitorContext<'_, G>,
|
||||
) -> Result<ControlFlow<(), bool>> {
|
||||
if !ctx
|
||||
.all_costs_from_node
|
||||
.get(dest_node)
|
||||
.iter()
|
||||
.any(|next_cost| *next_cost == self.remaining_cost)
|
||||
{
|
||||
return Ok(ControlFlow::Continue(false));
|
||||
}
|
||||
// We've reached the END node!
|
||||
if dest_node == ctx.graph.query_graph.end_node {
|
||||
let control_flow = visit(&self.path, ctx.graph, ctx.dead_ends_cache)?;
|
||||
// We could change the return type of the visit closure such that the caller
|
||||
// tells us whether the dead ends cache was updated or not.
|
||||
// Alternatively, maybe the DeadEndsCache should have a generation number
|
||||
// to it, so that we don't need to play with these booleans at all.
|
||||
match control_flow {
|
||||
ControlFlow::Continue(_) => Ok(ControlFlow::Continue(true)),
|
||||
ControlFlow::Break(_) => Ok(ControlFlow::Break(())),
|
||||
}
|
||||
} else {
|
||||
let old_fbct = self.nodes_to_skip.clone();
|
||||
self.nodes_to_skip.union(edge_new_nodes_to_skip);
|
||||
let cf = self.visit_node(dest_node, visit, ctx)?;
|
||||
self.nodes_to_skip = old_fbct;
|
||||
Ok(cf)
|
||||
}
|
||||
}
|
||||
/// Visits a conditional edge.
|
||||
///
|
||||
/// Returns ControlFlow::Break if the path finding algorithm should stop.
|
||||
/// Returns whether a valid path was found from this node otherwise.
|
||||
fn visit_condition(
|
||||
&mut self,
|
||||
condition: Interned<G::Condition>,
|
||||
dest_node: Interned<QueryNode>,
|
||||
edge_new_nodes_to_skip: &SmallBitmap<QueryNode>,
|
||||
visit: VisitFn<'_, G>,
|
||||
ctx: &mut VisitorContext<'_, G>,
|
||||
) -> Result<ControlFlow<(), bool>> {
|
||||
assert!(dest_node != ctx.graph.query_graph.end_node);
|
||||
|
||||
if self.forbidden_conditions.contains(condition)
|
||||
|| self.nodes_to_skip.contains(dest_node)
|
||||
|| edge_new_nodes_to_skip.intersects(&self.visited_nodes)
|
||||
{
|
||||
return Ok(ControlFlow::Continue(false));
|
||||
}
|
||||
|
||||
// Checking that from the destination node, there is at least
|
||||
// one cost that we can visit that corresponds to our remaining budget.
|
||||
if !ctx
|
||||
.all_costs_from_node
|
||||
.get(dest_node)
|
||||
.iter()
|
||||
.any(|next_cost| *next_cost == self.remaining_cost)
|
||||
{
|
||||
return Ok(ControlFlow::Continue(false));
|
||||
}
|
||||
|
||||
self.path.push(condition);
|
||||
self.visited_nodes.insert(dest_node);
|
||||
self.visited_conditions.insert(condition);
|
||||
|
||||
let old_forb_cond = self.forbidden_conditions.clone();
|
||||
if let Some(next_forbidden) =
|
||||
ctx.dead_ends_cache.forbidden_conditions_after_prefix(self.path.iter().copied())
|
||||
{
|
||||
self.forbidden_conditions.union(&next_forbidden);
|
||||
}
|
||||
let old_nodes_to_skip = self.nodes_to_skip.clone();
|
||||
self.nodes_to_skip.union(edge_new_nodes_to_skip);
|
||||
|
||||
let cf = self.visit_node(dest_node, visit, ctx)?;
|
||||
|
||||
self.nodes_to_skip = old_nodes_to_skip;
|
||||
self.forbidden_conditions = old_forb_cond;
|
||||
|
||||
self.visited_conditions.remove(condition);
|
||||
self.visited_nodes.remove(dest_node);
|
||||
self.path.pop();
|
||||
|
||||
Ok(cf)
|
||||
}
|
||||
}
|
||||
|
||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
pub fn find_all_costs_to_end(&self) -> MappedInterner<QueryNode, Vec<u64>> {
|
||||
let mut costs_to_end = self.query_graph.nodes.map(|_| vec![]);
|
||||
|
||||
self.traverse_breadth_first_backward(self.query_graph.end_node, |cur_node| {
|
||||
if cur_node == self.query_graph.end_node {
|
||||
*costs_to_end.get_mut(self.query_graph.end_node) = vec![0];
|
||||
return;
|
||||
}
|
||||
let mut self_costs = Vec::<u64>::new();
|
||||
|
||||
let cur_node_edges = &self.edges_of_node.get(cur_node);
|
||||
for edge_idx in cur_node_edges.iter() {
|
||||
let edge = self.edges_store.get(edge_idx).as_ref().unwrap();
|
||||
let succ_node = edge.dest_node;
|
||||
let succ_costs = costs_to_end.get(succ_node);
|
||||
for succ_cost in succ_costs {
|
||||
self_costs.push(edge.cost as u64 + succ_cost);
|
||||
}
|
||||
}
|
||||
self_costs.sort_unstable();
|
||||
self_costs.dedup();
|
||||
|
||||
*costs_to_end.get_mut(cur_node) = self_costs;
|
||||
});
|
||||
costs_to_end
|
||||
}
|
||||
|
||||
pub fn update_all_costs_before_node(
|
||||
&self,
|
||||
node_with_removed_outgoing_conditions: Interned<QueryNode>,
|
||||
costs: &mut MappedInterner<QueryNode, Vec<u64>>,
|
||||
) {
|
||||
// Traverse the graph backward from the target node, recomputing the cost for each of its predecessors.
|
||||
// We first check that no other node is contributing the same total cost to a predecessor before removing
|
||||
// the cost from the predecessor.
|
||||
self.traverse_breadth_first_backward(node_with_removed_outgoing_conditions, |cur_node| {
|
||||
let mut costs_to_remove = FxHashSet::default();
|
||||
costs_to_remove.extend(costs.get(cur_node).iter().copied());
|
||||
|
||||
let cur_node_edges = &self.edges_of_node.get(cur_node);
|
||||
for edge_idx in cur_node_edges.iter() {
|
||||
let edge = self.edges_store.get(edge_idx).as_ref().unwrap();
|
||||
for cost in costs.get(edge.dest_node).iter() {
|
||||
costs_to_remove.remove(&(*cost + edge.cost as u64));
|
||||
if costs_to_remove.is_empty() {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if costs_to_remove.is_empty() {
|
||||
return;
|
||||
}
|
||||
let mut new_costs = BTreeSet::from_iter(costs.get(cur_node).iter().copied());
|
||||
for c in costs_to_remove {
|
||||
new_costs.remove(&c);
|
||||
}
|
||||
*costs.get_mut(cur_node) = new_costs.into_iter().collect();
|
||||
});
|
||||
}
|
||||
|
||||
/// Traverse the graph backwards from the given node such that every time
|
||||
/// a node is visited, we are guaranteed that all its successors either:
|
||||
/// 1. have already been visited; OR
|
||||
/// 2. were not reachable from the given node
|
||||
pub fn traverse_breadth_first_backward(
|
||||
&self,
|
||||
from: Interned<QueryNode>,
|
||||
mut visit: impl FnMut(Interned<QueryNode>),
|
||||
) {
|
||||
let mut reachable = SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
|
||||
{
|
||||
// go backward to get the set of all reachable nodes from the given node
|
||||
// the nodes that are not reachable will be set as `visited`
|
||||
let mut stack = VecDeque::new();
|
||||
let mut enqueued = SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
|
||||
enqueued.insert(from);
|
||||
stack.push_back(from);
|
||||
while let Some(n) = stack.pop_front() {
|
||||
if reachable.contains(n) {
|
||||
continue;
|
||||
}
|
||||
reachable.insert(n);
|
||||
for prev_node in self.query_graph.nodes.get(n).predecessors.iter() {
|
||||
if !enqueued.contains(prev_node) && !reachable.contains(prev_node) {
|
||||
stack.push_back(prev_node);
|
||||
enqueued.insert(prev_node);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
let mut unreachable_or_visited =
|
||||
SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
|
||||
for (n, _) in self.query_graph.nodes.iter() {
|
||||
if !reachable.contains(n) {
|
||||
unreachable_or_visited.insert(n);
|
||||
}
|
||||
}
|
||||
|
||||
let mut enqueued = SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
|
||||
let mut stack = VecDeque::new();
|
||||
|
||||
enqueued.insert(from);
|
||||
stack.push_back(from);
|
||||
|
||||
while let Some(cur_node) = stack.pop_front() {
|
||||
if !self.query_graph.nodes.get(cur_node).successors.is_subset(&unreachable_or_visited) {
|
||||
stack.push_back(cur_node);
|
||||
continue;
|
||||
}
|
||||
unreachable_or_visited.insert(cur_node);
|
||||
visit(cur_node);
|
||||
for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() {
|
||||
if !enqueued.contains(prev_node) && !unreachable_or_visited.contains(prev_node) {
|
||||
stack.push_back(prev_node);
|
||||
enqueued.insert(prev_node);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,58 @@
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use fxhash::FxHashMap;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{ComputedCondition, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::search::new::interner::Interned;
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::SearchContext;
|
||||
use crate::Result;
|
||||
|
||||
/// A cache storing the document ids associated with each ranking rule edge
|
||||
pub struct ConditionDocIdsCache<G: RankingRuleGraphTrait> {
|
||||
pub cache: FxHashMap<Interned<G::Condition>, ComputedCondition>,
|
||||
_phantom: PhantomData<G>,
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> Default for ConditionDocIdsCache<G> {
|
||||
fn default() -> Self {
|
||||
Self { cache: Default::default(), _phantom: Default::default() }
|
||||
}
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> {
|
||||
pub fn get_subsets_used_by_condition(
|
||||
&mut self,
|
||||
interned_condition: Interned<G::Condition>,
|
||||
) -> (&Option<LocatedQueryTermSubset>, &LocatedQueryTermSubset) {
|
||||
let c = &self.cache[&interned_condition];
|
||||
(&c.start_term_subset, &c.end_term_subset)
|
||||
}
|
||||
/// Retrieve the document ids for the given edge condition.
|
||||
///
|
||||
/// If the cache does not yet contain these docids, they are computed
|
||||
/// and inserted in the cache.
|
||||
pub fn get_computed_condition<'s>(
|
||||
&'s mut self,
|
||||
ctx: &mut SearchContext<'_>,
|
||||
interned_condition: Interned<G::Condition>,
|
||||
graph: &mut RankingRuleGraph<G>,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<&'s ComputedCondition> {
|
||||
if self.cache.contains_key(&interned_condition) {
|
||||
let computed = self.cache.get_mut(&interned_condition).unwrap();
|
||||
if computed.universe_len == universe.len() {
|
||||
return Ok(computed);
|
||||
} else {
|
||||
computed.docids &= universe;
|
||||
computed.universe_len = universe.len();
|
||||
return Ok(computed);
|
||||
}
|
||||
}
|
||||
let condition = graph.conditions_interner.get_mut(interned_condition);
|
||||
let computed = G::resolve_condition(ctx, condition, universe)?;
|
||||
// Can we put an assert here for computed.universe_len == universe.len() ?
|
||||
let _ = self.cache.insert(interned_condition, computed);
|
||||
let computed = &self.cache[&interned_condition];
|
||||
Ok(computed)
|
||||
}
|
||||
}
|
@ -0,0 +1,100 @@
|
||||
use crate::search::new::interner::{FixedSizeInterner, Interned};
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
|
||||
pub struct DeadEndsCache<T> {
|
||||
// conditions and next could/should be part of the same vector
|
||||
conditions: Vec<Interned<T>>,
|
||||
next: Vec<Self>,
|
||||
pub forbidden: SmallBitmap<T>,
|
||||
}
|
||||
impl<T> Clone for DeadEndsCache<T> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
conditions: self.conditions.clone(),
|
||||
next: self.next.clone(),
|
||||
forbidden: self.forbidden.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<T> DeadEndsCache<T> {
|
||||
pub fn new(for_interner: &FixedSizeInterner<T>) -> Self {
|
||||
Self {
|
||||
conditions: vec![],
|
||||
next: vec![],
|
||||
forbidden: SmallBitmap::for_interned_values_in(for_interner),
|
||||
}
|
||||
}
|
||||
pub fn forbid_condition(&mut self, condition: Interned<T>) {
|
||||
self.forbidden.insert(condition);
|
||||
}
|
||||
|
||||
fn advance(&mut self, condition: Interned<T>) -> Option<&mut Self> {
|
||||
if let Some(idx) = self.conditions.iter().position(|c| *c == condition) {
|
||||
Some(&mut self.next[idx])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
pub fn forbidden_conditions_for_all_prefixes_up_to(
|
||||
&mut self,
|
||||
prefix: impl Iterator<Item = Interned<T>>,
|
||||
) -> SmallBitmap<T> {
|
||||
let mut forbidden = self.forbidden.clone();
|
||||
let mut cursor = self;
|
||||
for c in prefix {
|
||||
if let Some(next) = cursor.advance(c) {
|
||||
cursor = next;
|
||||
forbidden.union(&cursor.forbidden);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
forbidden
|
||||
}
|
||||
pub fn forbidden_conditions_after_prefix(
|
||||
&mut self,
|
||||
prefix: impl Iterator<Item = Interned<T>>,
|
||||
) -> Option<SmallBitmap<T>> {
|
||||
let mut cursor = self;
|
||||
for c in prefix {
|
||||
if let Some(next) = cursor.advance(c) {
|
||||
cursor = next;
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
Some(cursor.forbidden.clone())
|
||||
}
|
||||
pub fn forbid_condition_after_prefix(
|
||||
&mut self,
|
||||
mut prefix: impl Iterator<Item = Interned<T>>,
|
||||
forbidden: Interned<T>,
|
||||
) {
|
||||
match prefix.next() {
|
||||
None => {
|
||||
self.forbidden.insert(forbidden);
|
||||
}
|
||||
Some(first_condition) => {
|
||||
if let Some(idx) = self.conditions.iter().position(|c| *c == first_condition) {
|
||||
return self.next[idx].forbid_condition_after_prefix(prefix, forbidden);
|
||||
}
|
||||
let mut rest = DeadEndsCache {
|
||||
conditions: vec![],
|
||||
next: vec![],
|
||||
forbidden: SmallBitmap::new(self.forbidden.universe_length()),
|
||||
};
|
||||
rest.forbid_condition_after_prefix(prefix, forbidden);
|
||||
self.conditions.push(first_condition);
|
||||
self.next.push(rest);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// pub fn debug_print(&self, indent: usize) {
|
||||
// println!("{} {:?}", " ".repeat(indent), self.forbidden.iter().collect::<Vec<_>>());
|
||||
// for (condition, next) in self.conditions.iter().zip(self.next.iter()) {
|
||||
// println!("{} {condition}:", " ".repeat(indent));
|
||||
// next.debug_print(indent + 2);
|
||||
// }
|
||||
// }
|
||||
}
|
@ -0,0 +1,92 @@
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{ComputedCondition, RankingRuleGraphTrait};
|
||||
use crate::score_details::{self, Rank, ScoreDetails};
|
||||
use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_term::{ExactTerm, LocatedQueryTermSubset};
|
||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
|
||||
use crate::search::new::Word;
|
||||
use crate::{Result, SearchContext};
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub enum ExactnessCondition {
|
||||
ExactInAttribute(LocatedQueryTermSubset),
|
||||
Any(LocatedQueryTermSubset),
|
||||
}
|
||||
|
||||
pub enum ExactnessGraph {}
|
||||
|
||||
fn compute_docids(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
dest_node: &LocatedQueryTermSubset,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let exact_term = if let Some(exact_term) = dest_node.term_subset.exact_term(ctx) {
|
||||
exact_term
|
||||
} else {
|
||||
return Ok(Default::default());
|
||||
};
|
||||
|
||||
let candidates = match exact_term {
|
||||
// TODO I move the intersection here
|
||||
ExactTerm::Phrase(phrase) => ctx.get_phrase_docids(phrase)? & universe,
|
||||
ExactTerm::Word(word) => {
|
||||
ctx.word_docids(Some(universe), Word::Original(word))?.unwrap_or_default()
|
||||
}
|
||||
};
|
||||
|
||||
Ok(candidates)
|
||||
}
|
||||
|
||||
impl RankingRuleGraphTrait for ExactnessGraph {
|
||||
type Condition = ExactnessCondition;
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::exactness")]
|
||||
fn resolve_condition(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
condition: &Self::Condition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<ComputedCondition> {
|
||||
let (docids, end_term_subset) = match condition {
|
||||
ExactnessCondition::ExactInAttribute(dest_node) => {
|
||||
let mut end_term_subset = dest_node.clone();
|
||||
end_term_subset.term_subset.keep_only_exact_term(ctx);
|
||||
end_term_subset.term_subset.make_mandatory();
|
||||
(compute_docids(ctx, dest_node, universe)?, end_term_subset)
|
||||
}
|
||||
ExactnessCondition::Any(dest_node) => {
|
||||
let docids =
|
||||
compute_query_term_subset_docids(ctx, Some(universe), &dest_node.term_subset)?;
|
||||
(docids, dest_node.clone())
|
||||
}
|
||||
};
|
||||
|
||||
Ok(ComputedCondition {
|
||||
docids,
|
||||
universe_len: universe.len(),
|
||||
start_term_subset: None,
|
||||
end_term_subset,
|
||||
})
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::exactness")]
|
||||
fn build_edges(
|
||||
_ctx: &mut SearchContext<'_>,
|
||||
conditions_interner: &mut DedupInterner<Self::Condition>,
|
||||
_source_node: Option<&LocatedQueryTermSubset>,
|
||||
dest_node: &LocatedQueryTermSubset,
|
||||
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
|
||||
let exact_condition = ExactnessCondition::ExactInAttribute(dest_node.clone());
|
||||
let exact_condition = conditions_interner.insert(exact_condition);
|
||||
|
||||
let skip_condition = ExactnessCondition::Any(dest_node.clone());
|
||||
let skip_condition = conditions_interner.insert(skip_condition);
|
||||
|
||||
Ok(vec![(0, exact_condition), (dest_node.term_ids.len() as u32, skip_condition)])
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::exactness")]
|
||||
fn rank_to_score(rank: Rank) -> ScoreDetails {
|
||||
ScoreDetails::ExactWords(score_details::ExactWords::from_rank(rank))
|
||||
}
|
||||
}
|
112
crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs
Normal file
112
crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs
Normal file
@ -0,0 +1,112 @@
|
||||
use fxhash::FxHashSet;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{ComputedCondition, RankingRuleGraphTrait};
|
||||
use crate::score_details::{Rank, ScoreDetails};
|
||||
use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id;
|
||||
use crate::search::new::SearchContext;
|
||||
use crate::{FieldId, InternalError, Result};
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub struct FidCondition {
|
||||
term: LocatedQueryTermSubset,
|
||||
fid: Option<FieldId>,
|
||||
}
|
||||
|
||||
pub enum FidGraph {}
|
||||
|
||||
impl RankingRuleGraphTrait for FidGraph {
|
||||
type Condition = FidCondition;
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::fid")]
|
||||
fn resolve_condition(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
condition: &Self::Condition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<ComputedCondition> {
|
||||
let FidCondition { term, .. } = condition;
|
||||
|
||||
let docids = if let Some(fid) = condition.fid {
|
||||
compute_query_term_subset_docids_within_field_id(
|
||||
ctx,
|
||||
Some(universe),
|
||||
&term.term_subset,
|
||||
fid,
|
||||
)?
|
||||
} else {
|
||||
RoaringBitmap::new()
|
||||
};
|
||||
|
||||
Ok(ComputedCondition {
|
||||
docids,
|
||||
universe_len: universe.len(),
|
||||
start_term_subset: None,
|
||||
end_term_subset: term.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::fid")]
|
||||
fn build_edges(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
conditions_interner: &mut DedupInterner<Self::Condition>,
|
||||
_from: Option<&LocatedQueryTermSubset>,
|
||||
to_term: &LocatedQueryTermSubset,
|
||||
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
|
||||
let term = to_term;
|
||||
|
||||
let mut all_fields = FxHashSet::default();
|
||||
for word in term.term_subset.all_single_words_except_prefix_db(ctx)? {
|
||||
let fields = ctx.get_db_word_fids(word.interned())?;
|
||||
all_fields.extend(fields);
|
||||
}
|
||||
|
||||
for phrase in term.term_subset.all_phrases(ctx)? {
|
||||
for &word in phrase.words(ctx).iter().flatten() {
|
||||
let fields = ctx.get_db_word_fids(word)?;
|
||||
all_fields.extend(fields);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(word_prefix) = term.term_subset.use_prefix_db(ctx) {
|
||||
let fields = ctx.get_db_word_prefix_fids(word_prefix.interned())?;
|
||||
all_fields.extend(fields);
|
||||
}
|
||||
|
||||
let weights_map = ctx.index.fieldids_weights_map(ctx.txn)?;
|
||||
|
||||
let mut edges = vec![];
|
||||
for fid in all_fields.iter().copied() {
|
||||
let weight = weights_map
|
||||
.weight(fid)
|
||||
.ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?;
|
||||
edges.push((
|
||||
weight as u32 * term.term_ids.len() as u32,
|
||||
conditions_interner.insert(FidCondition { term: term.clone(), fid: Some(fid) }),
|
||||
));
|
||||
}
|
||||
|
||||
// always lookup the max_fid if we don't already and add an artificial condition for max scoring
|
||||
let max_weight: Option<u16> = weights_map.max_weight();
|
||||
|
||||
if let Some(max_weight) = max_weight {
|
||||
if !all_fields.contains(&max_weight) {
|
||||
edges.push((
|
||||
max_weight as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
|
||||
conditions_interner.insert(FidCondition {
|
||||
term: term.clone(), // TODO remove this ugly clone
|
||||
fid: None,
|
||||
}),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(edges)
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::fid")]
|
||||
fn rank_to_score(rank: Rank) -> ScoreDetails {
|
||||
ScoreDetails::Fid(rank)
|
||||
}
|
||||
}
|
160
crates/milli/src/search/new/ranking_rule_graph/mod.rs
Normal file
160
crates/milli/src/search/new/ranking_rule_graph/mod.rs
Normal file
@ -0,0 +1,160 @@
|
||||
/*! Module implementing the graph used for the graph-based ranking rules
|
||||
and its related algorithms.
|
||||
|
||||
A ranking rule graph is built on top of the [`QueryGraph`]: the nodes stay
|
||||
the same but the edges are replaced.
|
||||
*/
|
||||
|
||||
mod build;
|
||||
mod cheapest_paths;
|
||||
mod condition_docids_cache;
|
||||
mod dead_ends_cache;
|
||||
|
||||
/// Implementation of the `exactness` ranking rule
|
||||
mod exactness;
|
||||
/// Implementation of the `attribute` ranking rule
|
||||
mod fid;
|
||||
/// Implementation of the `position` ranking rule
|
||||
mod position;
|
||||
/// Implementation of the `proximity` ranking rule
|
||||
mod proximity;
|
||||
/// Implementation of the `typo` ranking rule
|
||||
mod typo;
|
||||
/// Implementation of the `words` ranking rule
|
||||
mod words;
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
use std::hash::Hash;
|
||||
|
||||
pub use cheapest_paths::PathVisitor;
|
||||
pub use condition_docids_cache::ConditionDocIdsCache;
|
||||
pub use dead_ends_cache::DeadEndsCache;
|
||||
pub use exactness::ExactnessGraph;
|
||||
pub use fid::{FidCondition, FidGraph};
|
||||
pub use position::{PositionCondition, PositionGraph};
|
||||
pub use proximity::{ProximityCondition, ProximityGraph};
|
||||
use roaring::RoaringBitmap;
|
||||
pub use typo::{TypoCondition, TypoGraph};
|
||||
pub use words::{WordsCondition, WordsGraph};
|
||||
|
||||
use super::interner::{DedupInterner, FixedSizeInterner, Interned, MappedInterner};
|
||||
use super::query_term::LocatedQueryTermSubset;
|
||||
use super::small_bitmap::SmallBitmap;
|
||||
use super::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::score_details::{Rank, ScoreDetails};
|
||||
use crate::Result;
|
||||
|
||||
pub struct ComputedCondition {
|
||||
pub docids: RoaringBitmap,
|
||||
pub universe_len: u64,
|
||||
pub start_term_subset: Option<LocatedQueryTermSubset>,
|
||||
pub end_term_subset: LocatedQueryTermSubset,
|
||||
}
|
||||
|
||||
/// An edge in the ranking rule graph.
|
||||
///
|
||||
/// It contains:
|
||||
/// 1. The source and destination nodes
|
||||
/// 2. The cost of traversing this edge
|
||||
/// 3. The condition associated with it
|
||||
/// 4. The list of nodes that have to be skipped
|
||||
/// if this edge is traversed.
|
||||
#[derive(Clone)]
|
||||
pub struct Edge<E> {
|
||||
pub source_node: Interned<QueryNode>,
|
||||
pub dest_node: Interned<QueryNode>,
|
||||
pub cost: u32,
|
||||
pub condition: Option<Interned<E>>,
|
||||
pub nodes_to_skip: SmallBitmap<QueryNode>,
|
||||
}
|
||||
|
||||
impl<E> Hash for Edge<E> {
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
self.source_node.hash(state);
|
||||
self.dest_node.hash(state);
|
||||
self.cost.hash(state);
|
||||
self.condition.hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl<E> Eq for Edge<E> {}
|
||||
|
||||
impl<E> PartialEq for Edge<E> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.source_node == other.source_node
|
||||
&& self.dest_node == other.dest_node
|
||||
&& self.cost == other.cost
|
||||
&& self.condition == other.condition
|
||||
}
|
||||
}
|
||||
|
||||
/// A trait to be implemented by a marker type to build a graph-based ranking rule.
|
||||
///
|
||||
/// It mostly describes how to:
|
||||
/// 1. Retrieve the set of edges (their cost and condition) between two nodes.
|
||||
/// 2. Compute the document ids satisfying a condition
|
||||
pub trait RankingRuleGraphTrait: Sized + 'static {
|
||||
type Condition: Sized + Clone + PartialEq + Eq + Hash;
|
||||
|
||||
/// Compute the document ids associated with the given edge condition,
|
||||
/// restricted to the given universe.
|
||||
fn resolve_condition(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
condition: &Self::Condition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<ComputedCondition>;
|
||||
|
||||
/// Return the costs and conditions of the edges going from the source node to the destination node
|
||||
fn build_edges(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
conditions_interner: &mut DedupInterner<Self::Condition>,
|
||||
source_node: Option<&LocatedQueryTermSubset>,
|
||||
dest_node: &LocatedQueryTermSubset,
|
||||
) -> Result<Vec<(u32, Interned<Self::Condition>)>>;
|
||||
|
||||
/// Convert the rank of a path to its corresponding score for the ranking rule
|
||||
fn rank_to_score(rank: Rank) -> ScoreDetails;
|
||||
}
|
||||
|
||||
/// The graph used by graph-based ranking rules.
|
||||
///
|
||||
/// It is built on top of a [`QueryGraph`], keeping the same nodes
|
||||
/// but replacing the edges.
|
||||
pub struct RankingRuleGraph<G: RankingRuleGraphTrait> {
|
||||
pub query_graph: QueryGraph,
|
||||
pub edges_store: FixedSizeInterner<Option<Edge<G::Condition>>>,
|
||||
pub edges_of_node: MappedInterner<QueryNode, SmallBitmap<Option<Edge<G::Condition>>>>,
|
||||
pub conditions_interner: FixedSizeInterner<G::Condition>,
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
query_graph: self.query_graph.clone(),
|
||||
edges_store: self.edges_store.clone(),
|
||||
edges_of_node: self.edges_of_node.clone(),
|
||||
conditions_interner: self.conditions_interner.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
/// Remove all edges with the given condition
|
||||
/// Return a set of all the source nodes of the removed edges
|
||||
pub fn remove_edges_with_condition(
|
||||
&mut self,
|
||||
condition_to_remove: Interned<G::Condition>,
|
||||
) -> BTreeSet<Interned<QueryNode>> {
|
||||
let mut source_nodes = BTreeSet::new();
|
||||
for (edge_id, edge_opt) in self.edges_store.iter_mut() {
|
||||
let Some(edge) = edge_opt.as_mut() else { continue };
|
||||
let Some(condition) = edge.condition else { continue };
|
||||
|
||||
if condition == condition_to_remove {
|
||||
let (source_node, _dest_node) = (edge.source_node, edge.dest_node);
|
||||
*edge_opt = None;
|
||||
self.edges_of_node.get_mut(source_node).remove(edge_id);
|
||||
source_nodes.insert(source_node);
|
||||
}
|
||||
}
|
||||
source_nodes
|
||||
}
|
||||
}
|
143
crates/milli/src/search/new/ranking_rule_graph/position/mod.rs
Normal file
143
crates/milli/src/search/new/ranking_rule_graph/position/mod.rs
Normal file
@ -0,0 +1,143 @@
|
||||
use fxhash::{FxHashMap, FxHashSet};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{ComputedCondition, RankingRuleGraphTrait};
|
||||
use crate::score_details::{Rank, ScoreDetails};
|
||||
use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_position;
|
||||
use crate::search::new::SearchContext;
|
||||
use crate::Result;
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub struct PositionCondition {
|
||||
term: LocatedQueryTermSubset,
|
||||
positions: Vec<u16>,
|
||||
}
|
||||
|
||||
pub enum PositionGraph {}
|
||||
|
||||
impl RankingRuleGraphTrait for PositionGraph {
|
||||
type Condition = PositionCondition;
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::position")]
|
||||
fn resolve_condition(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
condition: &Self::Condition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<ComputedCondition> {
|
||||
let PositionCondition { term, positions } = condition;
|
||||
let mut docids = RoaringBitmap::new();
|
||||
// TODO use MultiOps to do the big union
|
||||
for position in positions {
|
||||
// maybe compute_query_term_subset_docids_within_position should accept a universe as argument
|
||||
docids |= compute_query_term_subset_docids_within_position(
|
||||
ctx,
|
||||
Some(universe),
|
||||
&term.term_subset,
|
||||
*position,
|
||||
)?;
|
||||
}
|
||||
Ok(ComputedCondition {
|
||||
docids,
|
||||
universe_len: universe.len(),
|
||||
start_term_subset: None,
|
||||
end_term_subset: term.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::position")]
|
||||
fn build_edges(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
conditions_interner: &mut DedupInterner<Self::Condition>,
|
||||
_from: Option<&LocatedQueryTermSubset>,
|
||||
to_term: &LocatedQueryTermSubset,
|
||||
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
|
||||
let term = to_term;
|
||||
|
||||
let mut all_positions = FxHashSet::default();
|
||||
for word in term.term_subset.all_single_words_except_prefix_db(ctx)? {
|
||||
let positions = ctx.get_db_word_positions(word.interned())?;
|
||||
all_positions.extend(positions);
|
||||
}
|
||||
|
||||
for phrase in term.term_subset.all_phrases(ctx)? {
|
||||
// Only check the position of the first word in the phrase
|
||||
// this is not correct, but it is the best we can do, since
|
||||
// it is difficult/impossible to know the expected position
|
||||
// of a word in a phrase.
|
||||
// There is probably a more correct way to do it though.
|
||||
if let Some(word) = phrase.words(ctx).iter().flatten().next() {
|
||||
let positions = ctx.get_db_word_positions(*word)?;
|
||||
all_positions.extend(positions);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(word_prefix) = term.term_subset.use_prefix_db(ctx) {
|
||||
let positions = ctx.get_db_word_prefix_positions(word_prefix.interned())?;
|
||||
all_positions.extend(positions);
|
||||
}
|
||||
|
||||
let mut positions_for_costs = FxHashMap::<u32, Vec<u16>>::default();
|
||||
|
||||
for position in all_positions {
|
||||
// FIXME: bucketed position???
|
||||
let distance = position.abs_diff(*term.positions.start());
|
||||
let cost = {
|
||||
let mut cost = 0;
|
||||
for i in 0..term.term_ids.len() {
|
||||
// This is actually not fully correct and slightly penalises ngrams unfairly.
|
||||
// Because if two words are in the same bucketed position (e.g. 32) and consecutive,
|
||||
// then their position cost will be 32+32=64, but an ngram of these two words at the
|
||||
// same position will have a cost of 32+32+1=65
|
||||
cost += cost_from_distance(distance as u32 + i as u32);
|
||||
}
|
||||
cost
|
||||
};
|
||||
positions_for_costs.entry(cost).or_default().push(position);
|
||||
}
|
||||
|
||||
let max_cost = term.term_ids.len() as u32 * 10;
|
||||
let max_cost_exists = positions_for_costs.contains_key(&max_cost);
|
||||
|
||||
let mut edges = vec![];
|
||||
for (cost, positions) in positions_for_costs {
|
||||
edges.push((
|
||||
cost,
|
||||
conditions_interner.insert(PositionCondition { term: term.clone(), positions }),
|
||||
));
|
||||
}
|
||||
|
||||
if !max_cost_exists {
|
||||
// artificial empty condition for computing max cost
|
||||
edges.push((
|
||||
max_cost,
|
||||
conditions_interner
|
||||
.insert(PositionCondition { term: term.clone(), positions: Vec::default() }),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(edges)
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::position")]
|
||||
fn rank_to_score(rank: Rank) -> ScoreDetails {
|
||||
ScoreDetails::Position(rank)
|
||||
}
|
||||
}
|
||||
|
||||
fn cost_from_distance(distance: u32) -> u32 {
|
||||
match distance {
|
||||
0 => 0,
|
||||
1 => 1,
|
||||
2..=4 => 2,
|
||||
5..=7 => 3,
|
||||
8..=11 => 4,
|
||||
12..=16 => 5,
|
||||
17..=24 => 6,
|
||||
25..=64 => 7,
|
||||
65..=256 => 8,
|
||||
257..=1024 => 9,
|
||||
_ => 10,
|
||||
}
|
||||
}
|
@ -0,0 +1,56 @@
|
||||
#![allow(clippy::too_many_arguments)]
|
||||
|
||||
use super::ProximityCondition;
|
||||
use crate::proximity::MAX_DISTANCE;
|
||||
use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::SearchContext;
|
||||
use crate::Result;
|
||||
|
||||
pub fn build_edges(
|
||||
_ctx: &mut SearchContext<'_>,
|
||||
conditions_interner: &mut DedupInterner<ProximityCondition>,
|
||||
left_term: Option<&LocatedQueryTermSubset>,
|
||||
right_term: &LocatedQueryTermSubset,
|
||||
) -> Result<Vec<(u32, Interned<ProximityCondition>)>> {
|
||||
let right_ngram_max = right_term.term_ids.len().saturating_sub(1);
|
||||
|
||||
let Some(left_term) = left_term else {
|
||||
return Ok(vec![(
|
||||
right_ngram_max as u32,
|
||||
conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
|
||||
)]);
|
||||
};
|
||||
|
||||
if left_term.positions.end() + 1 != *right_term.positions.start() {
|
||||
// We want to ignore this pair of terms
|
||||
// Unconditionally walk through the edge without computing the docids
|
||||
// This can happen when, in a query like `the sun flowers are beautiful`, the term
|
||||
// `flowers` is removed by the `words` ranking rule.
|
||||
// The remaining query graph represents `the sun .. are beautiful`
|
||||
// but `sun` and `are` have no proximity condition between them
|
||||
return Ok(vec![(
|
||||
right_ngram_max as u32,
|
||||
conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
|
||||
)]);
|
||||
}
|
||||
|
||||
let mut conditions = vec![];
|
||||
for cost in right_ngram_max..(((MAX_DISTANCE as usize) - 1) + right_ngram_max) {
|
||||
conditions.push((
|
||||
cost as u32,
|
||||
conditions_interner.insert(ProximityCondition::Uninit {
|
||||
left_term: left_term.clone(),
|
||||
right_term: right_term.clone(),
|
||||
cost: (cost + 1) as u8,
|
||||
}),
|
||||
))
|
||||
}
|
||||
|
||||
conditions.push((
|
||||
((MAX_DISTANCE - 1) + (right_ngram_max as u32)),
|
||||
conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
|
||||
));
|
||||
|
||||
Ok(conditions)
|
||||
}
|
@ -0,0 +1,251 @@
|
||||
#![allow(clippy::too_many_arguments)]
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::ProximityCondition;
|
||||
use crate::search::new::interner::Interned;
|
||||
use crate::search::new::query_term::{Phrase, QueryTermSubset};
|
||||
use crate::search::new::ranking_rule_graph::ComputedCondition;
|
||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
|
||||
use crate::search::new::{SearchContext, Word};
|
||||
use crate::Result;
|
||||
|
||||
pub fn compute_docids(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
condition: &ProximityCondition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<ComputedCondition> {
|
||||
let (left_term, right_term, cost) = match condition {
|
||||
ProximityCondition::Uninit { left_term, right_term, cost } => {
|
||||
(left_term, right_term, *cost)
|
||||
}
|
||||
ProximityCondition::Term { term } => {
|
||||
return Ok(ComputedCondition {
|
||||
docids: compute_query_term_subset_docids(ctx, Some(universe), &term.term_subset)?,
|
||||
universe_len: universe.len(),
|
||||
start_term_subset: None,
|
||||
end_term_subset: term.clone(),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let right_term_ngram_len = right_term.term_ids.len() as u8;
|
||||
|
||||
// e.g. for the simple words `sun .. flower`
|
||||
// the cost is 5
|
||||
// the forward proximity is 5
|
||||
// the backward proximity is 4
|
||||
//
|
||||
// for the 2gram `the sunflower`
|
||||
// the cost is 5
|
||||
// the forward proximity is 4
|
||||
// the backward proximity is 3
|
||||
let forward_proximity = 1 + cost - right_term_ngram_len;
|
||||
let backward_proximity = cost - right_term_ngram_len;
|
||||
|
||||
let mut docids = RoaringBitmap::new();
|
||||
|
||||
if let Some(right_prefix) = right_term.term_subset.use_prefix_db(ctx) {
|
||||
for (left_phrase, left_word) in last_words_of_term_derivations(ctx, &left_term.term_subset)?
|
||||
{
|
||||
compute_prefix_edges(
|
||||
ctx,
|
||||
left_word.interned(),
|
||||
right_prefix.interned(),
|
||||
left_phrase,
|
||||
forward_proximity,
|
||||
backward_proximity,
|
||||
&mut docids,
|
||||
universe,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
for (left_phrase, left_word) in last_words_of_term_derivations(ctx, &left_term.term_subset)? {
|
||||
// Before computing the edges, check that the left word and left phrase
|
||||
// aren't disjoint with the universe, but only do it if there is more than
|
||||
// one word derivation to the right.
|
||||
//
|
||||
// This is an optimisation to avoid checking for an excessive number of
|
||||
// pairs.
|
||||
let right_derivs = first_word_of_term_iter(ctx, &right_term.term_subset)?;
|
||||
if right_derivs.len() > 1 {
|
||||
let universe = &universe;
|
||||
if let Some(left_phrase) = left_phrase {
|
||||
if universe.is_disjoint(ctx.get_phrase_docids(left_phrase)?) {
|
||||
continue;
|
||||
}
|
||||
} else if let Some(left_word_docids) = ctx.word_docids(Some(universe), left_word)? {
|
||||
if left_word_docids.is_empty() {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (right_word, right_phrase) in right_derivs {
|
||||
compute_non_prefix_edges(
|
||||
ctx,
|
||||
left_word.interned(),
|
||||
right_word,
|
||||
left_phrase,
|
||||
right_phrase,
|
||||
forward_proximity,
|
||||
backward_proximity,
|
||||
&mut docids,
|
||||
universe,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ComputedCondition {
|
||||
docids,
|
||||
universe_len: universe.len(),
|
||||
start_term_subset: Some(left_term.clone()),
|
||||
end_term_subset: right_term.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
fn compute_prefix_edges(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
left_word: Interned<String>,
|
||||
right_prefix: Interned<String>,
|
||||
left_phrase: Option<Interned<Phrase>>,
|
||||
forward_proximity: u8,
|
||||
backward_proximity: u8,
|
||||
docids: &mut RoaringBitmap,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
let mut used_left_words = BTreeSet::new();
|
||||
let mut used_left_phrases = BTreeSet::new();
|
||||
let mut used_right_prefix = BTreeSet::new();
|
||||
|
||||
let mut universe = universe.clone();
|
||||
if let Some(phrase) = left_phrase {
|
||||
// TODO we can clearly give the universe to this method
|
||||
// Unfortunately, it is deserializing/computing stuff and
|
||||
// keeping the result as a materialized bitmap.
|
||||
let phrase_docids = ctx.get_phrase_docids(phrase)?;
|
||||
if !phrase_docids.is_empty() {
|
||||
used_left_phrases.insert(phrase);
|
||||
}
|
||||
universe &= phrase_docids;
|
||||
if universe.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(new_docids) = ctx.get_db_word_prefix_pair_proximity_docids(
|
||||
Some(&universe),
|
||||
left_word,
|
||||
right_prefix,
|
||||
forward_proximity,
|
||||
)? {
|
||||
if !new_docids.is_empty() {
|
||||
used_left_words.insert(left_word);
|
||||
used_right_prefix.insert(right_prefix);
|
||||
*docids |= new_docids;
|
||||
}
|
||||
}
|
||||
|
||||
// No swapping when computing the proximity between a phrase and a word
|
||||
if left_phrase.is_none() {
|
||||
if let Some(new_docids) = ctx.get_db_prefix_word_pair_proximity_docids(
|
||||
Some(&universe),
|
||||
right_prefix,
|
||||
left_word,
|
||||
backward_proximity,
|
||||
)? {
|
||||
if !new_docids.is_empty() {
|
||||
used_left_words.insert(left_word);
|
||||
used_right_prefix.insert(right_prefix);
|
||||
*docids |= new_docids;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compute_non_prefix_edges(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
word1: Interned<String>,
|
||||
word2: Interned<String>,
|
||||
left_phrase: Option<Interned<Phrase>>,
|
||||
right_phrase: Option<Interned<Phrase>>,
|
||||
forward_proximity: u8,
|
||||
backward_proximity: u8,
|
||||
docids: &mut RoaringBitmap,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
let mut universe = universe.clone();
|
||||
|
||||
for phrase in left_phrase.iter().chain(right_phrase.iter()).copied() {
|
||||
universe &= ctx.get_phrase_docids(phrase)?;
|
||||
if universe.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(new_docids) =
|
||||
ctx.get_db_word_pair_proximity_docids(Some(&universe), word1, word2, forward_proximity)?
|
||||
{
|
||||
if !new_docids.is_empty() {
|
||||
*docids |= new_docids;
|
||||
}
|
||||
}
|
||||
if backward_proximity >= 1 && left_phrase.is_none() && right_phrase.is_none() {
|
||||
if let Some(new_docids) = ctx.get_db_word_pair_proximity_docids(
|
||||
Some(&universe),
|
||||
word2,
|
||||
word1,
|
||||
backward_proximity,
|
||||
)? {
|
||||
if !new_docids.is_empty() {
|
||||
*docids |= new_docids;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn last_words_of_term_derivations(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
t: &QueryTermSubset,
|
||||
) -> Result<BTreeSet<(Option<Interned<Phrase>>, Word)>> {
|
||||
let mut result = BTreeSet::new();
|
||||
|
||||
for w in t.all_single_words_except_prefix_db(ctx)? {
|
||||
result.insert((None, w));
|
||||
}
|
||||
for p in t.all_phrases(ctx)? {
|
||||
let phrase = ctx.phrase_interner.get(p);
|
||||
let last_term_of_phrase = phrase.words.last().unwrap();
|
||||
if let Some(last_word) = last_term_of_phrase {
|
||||
result.insert((Some(p), Word::Original(*last_word)));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
fn first_word_of_term_iter(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
t: &QueryTermSubset,
|
||||
) -> Result<BTreeSet<(Interned<String>, Option<Interned<Phrase>>)>> {
|
||||
let mut result = BTreeSet::new();
|
||||
let all_words = t.all_single_words_except_prefix_db(ctx)?;
|
||||
for w in all_words {
|
||||
result.insert((w.interned(), None));
|
||||
}
|
||||
for p in t.all_phrases(ctx)? {
|
||||
let phrase = ctx.phrase_interner.get(p);
|
||||
let first_term_of_phrase = phrase.words.first().unwrap();
|
||||
if let Some(first_word) = first_term_of_phrase {
|
||||
result.insert((*first_word, Some(p)));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
@ -0,0 +1,47 @@
|
||||
pub mod build;
|
||||
pub mod compute_docids;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{ComputedCondition, RankingRuleGraphTrait};
|
||||
use crate::score_details::{Rank, ScoreDetails};
|
||||
use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::SearchContext;
|
||||
use crate::Result;
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub enum ProximityCondition {
|
||||
Uninit { left_term: LocatedQueryTermSubset, right_term: LocatedQueryTermSubset, cost: u8 },
|
||||
Term { term: LocatedQueryTermSubset },
|
||||
}
|
||||
|
||||
pub enum ProximityGraph {}
|
||||
|
||||
impl RankingRuleGraphTrait for ProximityGraph {
|
||||
type Condition = ProximityCondition;
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::proximity")]
|
||||
fn resolve_condition(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
condition: &Self::Condition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<ComputedCondition> {
|
||||
compute_docids::compute_docids(ctx, condition, universe)
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::proximity")]
|
||||
fn build_edges(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
conditions_interner: &mut DedupInterner<Self::Condition>,
|
||||
source_term: Option<&LocatedQueryTermSubset>,
|
||||
dest_term: &LocatedQueryTermSubset,
|
||||
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
|
||||
build::build_edges(ctx, conditions_interner, source_term, dest_term)
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::proximity")]
|
||||
fn rank_to_score(rank: Rank) -> ScoreDetails {
|
||||
ScoreDetails::Proximity(rank)
|
||||
}
|
||||
}
|
85
crates/milli/src/search/new/ranking_rule_graph/typo/mod.rs
Normal file
85
crates/milli/src/search/new/ranking_rule_graph/typo/mod.rs
Normal file
@ -0,0 +1,85 @@
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{ComputedCondition, RankingRuleGraphTrait};
|
||||
use crate::score_details::{self, Rank, ScoreDetails};
|
||||
use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
|
||||
use crate::search::new::SearchContext;
|
||||
use crate::Result;
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub struct TypoCondition {
|
||||
term: LocatedQueryTermSubset,
|
||||
nbr_typos: u8,
|
||||
}
|
||||
|
||||
pub enum TypoGraph {}
|
||||
|
||||
impl RankingRuleGraphTrait for TypoGraph {
|
||||
type Condition = TypoCondition;
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::typo")]
|
||||
fn resolve_condition(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
condition: &Self::Condition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<ComputedCondition> {
|
||||
let TypoCondition { term, .. } = condition;
|
||||
// maybe compute_query_term_subset_docids should accept a universe as argument
|
||||
let docids = compute_query_term_subset_docids(ctx, Some(universe), &term.term_subset)?;
|
||||
|
||||
Ok(ComputedCondition {
|
||||
docids,
|
||||
universe_len: universe.len(),
|
||||
start_term_subset: None,
|
||||
end_term_subset: term.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::typo")]
|
||||
fn build_edges(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
conditions_interner: &mut DedupInterner<Self::Condition>,
|
||||
_from: Option<&LocatedQueryTermSubset>,
|
||||
to_term: &LocatedQueryTermSubset,
|
||||
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
|
||||
let term = to_term;
|
||||
|
||||
let mut edges = vec![];
|
||||
// Ngrams have a base typo cost
|
||||
// 2-gram -> equivalent to 1 typo
|
||||
// 3-gram -> equivalent to 2 typos
|
||||
let base_cost = if term.term_ids.len() == 1 { 0 } else { term.term_ids.len() as u32 };
|
||||
|
||||
for nbr_typos in 0..=term.term_subset.max_typo_cost(ctx) {
|
||||
let mut term = term.clone();
|
||||
match nbr_typos {
|
||||
0 => {
|
||||
term.term_subset.clear_one_typo_subset();
|
||||
term.term_subset.clear_two_typo_subset();
|
||||
}
|
||||
1 => {
|
||||
term.term_subset.clear_zero_typo_subset();
|
||||
term.term_subset.clear_two_typo_subset();
|
||||
}
|
||||
2 => {
|
||||
term.term_subset.clear_zero_typo_subset();
|
||||
term.term_subset.clear_one_typo_subset();
|
||||
}
|
||||
_ => panic!(),
|
||||
};
|
||||
|
||||
edges.push((
|
||||
nbr_typos as u32 + base_cost,
|
||||
conditions_interner.insert(TypoCondition { term, nbr_typos }),
|
||||
));
|
||||
}
|
||||
Ok(edges)
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::typo")]
|
||||
fn rank_to_score(rank: Rank) -> ScoreDetails {
|
||||
ScoreDetails::Typo(score_details::Typo::from_rank(rank))
|
||||
}
|
||||
}
|
53
crates/milli/src/search/new/ranking_rule_graph/words/mod.rs
Normal file
53
crates/milli/src/search/new/ranking_rule_graph/words/mod.rs
Normal file
@ -0,0 +1,53 @@
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{ComputedCondition, RankingRuleGraphTrait};
|
||||
use crate::score_details::{self, Rank, ScoreDetails};
|
||||
use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
|
||||
use crate::search::new::SearchContext;
|
||||
use crate::Result;
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub struct WordsCondition {
|
||||
term: LocatedQueryTermSubset,
|
||||
}
|
||||
|
||||
pub enum WordsGraph {}
|
||||
|
||||
impl RankingRuleGraphTrait for WordsGraph {
|
||||
type Condition = WordsCondition;
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::words")]
|
||||
fn resolve_condition(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
condition: &Self::Condition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<ComputedCondition> {
|
||||
let WordsCondition { term, .. } = condition;
|
||||
// maybe compute_query_term_subset_docids should accept a universe as argument
|
||||
let docids = compute_query_term_subset_docids(ctx, Some(universe), &term.term_subset)?;
|
||||
|
||||
Ok(ComputedCondition {
|
||||
docids,
|
||||
universe_len: universe.len(),
|
||||
start_term_subset: None,
|
||||
end_term_subset: term.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::words")]
|
||||
fn build_edges(
|
||||
_ctx: &mut SearchContext<'_>,
|
||||
conditions_interner: &mut DedupInterner<Self::Condition>,
|
||||
_from: Option<&LocatedQueryTermSubset>,
|
||||
to_term: &LocatedQueryTermSubset,
|
||||
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
|
||||
Ok(vec![(0, conditions_interner.insert(WordsCondition { term: to_term.clone() }))])
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::words")]
|
||||
fn rank_to_score(rank: Rank) -> ScoreDetails {
|
||||
ScoreDetails::Words(score_details::Words::from_rank(rank))
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user