Move crates under a sub folder to clean up the code

2025-11-04 09:56:28 +00:00 · 2024-10-21 08:18:43 +02:00
parent 30f3c30389
commit 9c1e54a2c8
1062 changed files with 19 additions and 20 deletions
--- a/crates/milli/src/search/new/ranking_rule_graph/build.rs
+++ b/crates/milli/src/search/new/ranking_rule_graph/build.rs
@@ -0,0 +1,92 @@
+use std::collections::HashSet;
+
+use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
+use crate::search::new::interner::{DedupInterner, MappedInterner};
+use crate::search::new::query_graph::{QueryNode, QueryNodeData};
+use crate::search::new::small_bitmap::SmallBitmap;
+use crate::search::new::{QueryGraph, SearchContext};
+use crate::Result;
+
+impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
+    /// Build the ranking rule graph from the given query graph
+    pub fn build(
+        ctx: &mut SearchContext<'_>,
+        query_graph: QueryGraph,
+        cost_of_ignoring_node: MappedInterner<QueryNode, Option<(u32, SmallBitmap<QueryNode>)>>,
+    ) -> Result<Self> {
+        let QueryGraph { nodes: graph_nodes, .. } = &query_graph;
+
+        let mut conditions_interner = DedupInterner::default();
+
+        let mut edges_store = DedupInterner::default();
+        let mut edges_of_node = query_graph.nodes.map(|_| HashSet::new());
+
+        for (source_id, source_node) in graph_nodes.iter() {
+            let new_edges = edges_of_node.get_mut(source_id);
+
+            for dest_idx in source_node.successors.iter() {
+                let src_term = match &source_node.data {
+                    QueryNodeData::Term(t) => Some(t),
+                    QueryNodeData::Start => None,
+                    QueryNodeData::Deleted | QueryNodeData::End => panic!(),
+                };
+                let dest_node = graph_nodes.get(dest_idx);
+                let dest_term = match &dest_node.data {
+                    QueryNodeData::Term(t) => t,
+                    QueryNodeData::End => {
+                        let new_edge_id = edges_store.insert(Some(Edge {
+                            source_node: source_id,
+                            dest_node: dest_idx,
+                            cost: 0,
+                            condition: None,
+                            nodes_to_skip: SmallBitmap::for_interned_values_in(graph_nodes),
+                        }));
+                        new_edges.insert(new_edge_id);
+                        continue;
+                    }
+                    QueryNodeData::Deleted | QueryNodeData::Start => panic!(),
+                };
+                if let Some((cost_of_ignoring, forbidden_nodes)) =
+                    cost_of_ignoring_node.get(dest_idx)
+                {
+                    let dest = graph_nodes.get(dest_idx);
+                    let dest_size = match &dest.data {
+                        QueryNodeData::Term(term) => term.term_ids.len(),
+                        _ => panic!(),
+                    };
+                    let new_edge_id = edges_store.insert(Some(Edge {
+                        source_node: source_id,
+                        dest_node: dest_idx,
+                        cost: *cost_of_ignoring * dest_size as u32,
+                        condition: None,
+                        nodes_to_skip: forbidden_nodes.clone(),
+                    }));
+                    new_edges.insert(new_edge_id);
+                }
+
+                let edges = G::build_edges(ctx, &mut conditions_interner, src_term, dest_term)?;
+                if edges.is_empty() {
+                    continue;
+                }
+
+                for (cost, condition) in edges {
+                    let new_edge_id = edges_store.insert(Some(Edge {
+                        source_node: source_id,
+                        dest_node: dest_idx,
+                        cost,
+                        condition: Some(condition),
+                        nodes_to_skip: SmallBitmap::for_interned_values_in(graph_nodes),
+                    }));
+                    new_edges.insert(new_edge_id);
+                }
+            }
+        }
+        let edges_store = edges_store.freeze();
+        let edges_of_node =
+            edges_of_node.map(|edges| SmallBitmap::from_iter(edges.iter().copied(), &edges_store));
+
+        let conditions_interner = conditions_interner.freeze();
+
+        Ok(RankingRuleGraph { query_graph, edges_store, edges_of_node, conditions_interner })
+    }
+}
--- a/crates/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs
+++ b/crates/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs
@@ -0,0 +1,400 @@
+/** Implements a "PathVisitor" which finds all paths of a certain cost
+from the START to END node of a ranking rule graph.
+
+A path is a list of conditions. A condition is the data associated with
+an edge, given by the ranking rule. Some edges don't have a condition associated
+with them, they are "unconditional". These kinds of edges are used to "skip" a node.
+
+The algorithm uses a depth-first search. It benefits from two main optimisations:
+- The list of all possible costs to go from any node to the END node is precomputed
+- The `DeadEndsCache` reduces the number of valid paths drastically, by making some edges
+untraversable depending on what other edges were selected.
+
+These two optimisations are meant to avoid traversing edges that wouldn't lead
+to a valid path. In practically all cases, we avoid the exponential complexity
+that is inherent to depth-first search in a large ranking rule graph.
+
+The DeadEndsCache is a sort of prefix tree which associates a list of forbidden
+conditions to a list of traversed conditions.
+For example, the DeadEndsCache could say the following:
+- Immediately, from the start, the conditions `[a,b]` are forbidden
+    - if we take the condition `c`, then the conditions `[e]` are also forbidden
+        - and if after that, we take `f`, then `[h,i]` are also forbidden
+            - etc.
+    - if we take `g`, then `[f]` is also forbidden
+        - etc.
+    - etc.
+As we traverse the graph, we also traverse the `DeadEndsCache` and keep a list of forbidden
+conditions in memory. Then, we know to avoid all edges which have a condition that is forbidden.
+
+When a path is found from START to END, we give it to the `visit` closure.
+This closure takes a mutable reference to the `DeadEndsCache`. This means that
+the caller can update this cache. Therefore, we must handle the case where the
+DeadEndsCache has been updated. This means potentially backtracking up to the point
+where the traversed conditions are all allowed by the new DeadEndsCache.
+
+The algorithm also implements the `TermsMatchingStrategy` logic.
+Some edges are augmented with a list of "nodes_to_skip". Skipping
+a node means "reaching this node through an unconditional edge". If we have
+already traversed (ie. not skipped) a node that is in this list, then we know that we
+can't traverse this edge. Otherwise, we traverse the edge but make sure to skip any
+future node that was present in the "nodes_to_skip" list.
+
+The caller can decide to stop the path finding algorithm
+by returning a `ControlFlow::Break` from the `visit` closure.
+*/
+use std::collections::{BTreeSet, VecDeque};
+use std::iter::FromIterator;
+use std::ops::ControlFlow;
+
+use fxhash::FxHashSet;
+
+use super::{DeadEndsCache, RankingRuleGraph, RankingRuleGraphTrait};
+use crate::search::new::interner::{Interned, MappedInterner};
+use crate::search::new::query_graph::QueryNode;
+use crate::search::new::small_bitmap::SmallBitmap;
+use crate::Result;
+
+/// Closure which processes a path found by the `PathVisitor`
+type VisitFn<'f, G> = &'f mut dyn FnMut(
+    // the path as a list of conditions
+    &[Interned<<G as RankingRuleGraphTrait>::Condition>],
+    &mut RankingRuleGraph<G>,
+    // a mutable reference to the DeadEndsCache, to update it in case the given
+    // path doesn't resolve to any valid document ids
+    &mut DeadEndsCache<<G as RankingRuleGraphTrait>::Condition>,
+) -> Result<ControlFlow<()>>;
+
+/// A structure which is kept but not updated during the traversal of the graph.
+/// It can however be updated by the `visit` closure once a valid path has been found.
+struct VisitorContext<'a, G: RankingRuleGraphTrait> {
+    graph: &'a mut RankingRuleGraph<G>,
+    all_costs_from_node: &'a MappedInterner<QueryNode, Vec<u64>>,
+    dead_ends_cache: &'a mut DeadEndsCache<G::Condition>,
+}
+
+/// The internal state of the traversal algorithm
+struct VisitorState<G: RankingRuleGraphTrait> {
+    /// Budget from the current node to the end node
+    remaining_cost: u64,
+    /// Previously visited conditions, in order.
+    path: Vec<Interned<G::Condition>>,
+    /// Previously visited conditions, as an efficient and compact set.
+    visited_conditions: SmallBitmap<G::Condition>,
+    /// Previously visited (ie not skipped) nodes, as an efficient and compact set.
+    visited_nodes: SmallBitmap<QueryNode>,
+    /// The conditions that cannot be visited anymore
+    forbidden_conditions: SmallBitmap<G::Condition>,
+    /// The nodes that cannot be visited anymore (they must be skipped)
+    nodes_to_skip: SmallBitmap<QueryNode>,
+}
+
+/// See module documentation
+pub struct PathVisitor<'a, G: RankingRuleGraphTrait> {
+    state: VisitorState<G>,
+    ctx: VisitorContext<'a, G>,
+}
+impl<'a, G: RankingRuleGraphTrait> PathVisitor<'a, G> {
+    pub fn new(
+        cost: u64,
+        graph: &'a mut RankingRuleGraph<G>,
+        all_costs_from_node: &'a MappedInterner<QueryNode, Vec<u64>>,
+        dead_ends_cache: &'a mut DeadEndsCache<G::Condition>,
+    ) -> Self {
+        Self {
+            state: VisitorState {
+                remaining_cost: cost,
+                path: vec![],
+                visited_conditions: SmallBitmap::for_interned_values_in(&graph.conditions_interner),
+                visited_nodes: SmallBitmap::for_interned_values_in(&graph.query_graph.nodes),
+                forbidden_conditions: SmallBitmap::for_interned_values_in(
+                    &graph.conditions_interner,
+                ),
+                nodes_to_skip: SmallBitmap::for_interned_values_in(&graph.query_graph.nodes),
+            },
+            ctx: VisitorContext { graph, all_costs_from_node, dead_ends_cache },
+        }
+    }
+
+    /// See module documentation
+    pub fn visit_paths(mut self, visit: VisitFn<'_, G>) -> Result<()> {
+        let _ =
+            self.state.visit_node(self.ctx.graph.query_graph.root_node, visit, &mut self.ctx)?;
+        Ok(())
+    }
+}
+
+impl<G: RankingRuleGraphTrait> VisitorState<G> {
+    /// Visits a node: traverse all its valid conditional and unconditional edges.
+    ///
+    /// Returns ControlFlow::Break if the path finding algorithm should stop.
+    /// Returns whether a valid path was found from this node otherwise.
+    fn visit_node(
+        &mut self,
+        from_node: Interned<QueryNode>,
+        visit: VisitFn<'_, G>,
+        ctx: &mut VisitorContext<'_, G>,
+    ) -> Result<ControlFlow<(), bool>> {
+        // any valid path will be found from this point
+        // if a valid path was found, then we know that the DeadEndsCache may have been updated,
+        // and we will need to do more work to potentially backtrack
+        let mut any_valid = false;
+
+        let edges = ctx.graph.edges_of_node.get(from_node).clone();
+        for edge_idx in edges.iter() {
+            // could be none if the edge was deleted
+            let Some(edge) = ctx.graph.edges_store.get(edge_idx).clone() else { continue };
+
+            if self.remaining_cost < edge.cost as u64 {
+                continue;
+            }
+            self.remaining_cost -= edge.cost as u64;
+
+            let cf = match edge.condition {
+                Some(condition) => self.visit_condition(
+                    condition,
+                    edge.dest_node,
+                    &edge.nodes_to_skip,
+                    visit,
+                    ctx,
+                )?,
+                None => self.visit_no_condition(edge.dest_node, &edge.nodes_to_skip, visit, ctx)?,
+            };
+            self.remaining_cost += edge.cost as u64;
+
+            let ControlFlow::Continue(next_any_valid) = cf else {
+                return Ok(ControlFlow::Break(()));
+            };
+            any_valid |= next_any_valid;
+            if next_any_valid {
+                // backtrack as much as possible if a valid path was found and the dead_ends_cache
+                // was updated such that the current prefix is now invalid
+                self.forbidden_conditions = ctx
+                    .dead_ends_cache
+                    .forbidden_conditions_for_all_prefixes_up_to(self.path.iter().copied());
+                if self.visited_conditions.intersects(&self.forbidden_conditions) {
+                    return Ok(ControlFlow::Continue(true));
+                }
+            }
+        }
+
+        Ok(ControlFlow::Continue(any_valid))
+    }
+
+    /// Visits an unconditional edge.
+    ///
+    /// Returns ControlFlow::Break if the path finding algorithm should stop.
+    /// Returns whether a valid path was found from this node otherwise.
+    fn visit_no_condition(
+        &mut self,
+        dest_node: Interned<QueryNode>,
+        edge_new_nodes_to_skip: &SmallBitmap<QueryNode>,
+        visit: VisitFn<'_, G>,
+        ctx: &mut VisitorContext<'_, G>,
+    ) -> Result<ControlFlow<(), bool>> {
+        if !ctx
+            .all_costs_from_node
+            .get(dest_node)
+            .iter()
+            .any(|next_cost| *next_cost == self.remaining_cost)
+        {
+            return Ok(ControlFlow::Continue(false));
+        }
+        // We've reached the END node!
+        if dest_node == ctx.graph.query_graph.end_node {
+            let control_flow = visit(&self.path, ctx.graph, ctx.dead_ends_cache)?;
+            // We could change the return type of the visit closure such that the caller
+            // tells us whether the dead ends cache was updated or not.
+            // Alternatively, maybe the DeadEndsCache should have a generation number
+            // to it, so that we don't need to play with these booleans at all.
+            match control_flow {
+                ControlFlow::Continue(_) => Ok(ControlFlow::Continue(true)),
+                ControlFlow::Break(_) => Ok(ControlFlow::Break(())),
+            }
+        } else {
+            let old_fbct = self.nodes_to_skip.clone();
+            self.nodes_to_skip.union(edge_new_nodes_to_skip);
+            let cf = self.visit_node(dest_node, visit, ctx)?;
+            self.nodes_to_skip = old_fbct;
+            Ok(cf)
+        }
+    }
+    /// Visits a conditional edge.
+    ///
+    /// Returns ControlFlow::Break if the path finding algorithm should stop.
+    /// Returns whether a valid path was found from this node otherwise.
+    fn visit_condition(
+        &mut self,
+        condition: Interned<G::Condition>,
+        dest_node: Interned<QueryNode>,
+        edge_new_nodes_to_skip: &SmallBitmap<QueryNode>,
+        visit: VisitFn<'_, G>,
+        ctx: &mut VisitorContext<'_, G>,
+    ) -> Result<ControlFlow<(), bool>> {
+        assert!(dest_node != ctx.graph.query_graph.end_node);
+
+        if self.forbidden_conditions.contains(condition)
+            || self.nodes_to_skip.contains(dest_node)
+            || edge_new_nodes_to_skip.intersects(&self.visited_nodes)
+        {
+            return Ok(ControlFlow::Continue(false));
+        }
+
+        // Checking that from the destination node, there is at least
+        // one cost that we can visit that corresponds to our remaining budget.
+        if !ctx
+            .all_costs_from_node
+            .get(dest_node)
+            .iter()
+            .any(|next_cost| *next_cost == self.remaining_cost)
+        {
+            return Ok(ControlFlow::Continue(false));
+        }
+
+        self.path.push(condition);
+        self.visited_nodes.insert(dest_node);
+        self.visited_conditions.insert(condition);
+
+        let old_forb_cond = self.forbidden_conditions.clone();
+        if let Some(next_forbidden) =
+            ctx.dead_ends_cache.forbidden_conditions_after_prefix(self.path.iter().copied())
+        {
+            self.forbidden_conditions.union(&next_forbidden);
+        }
+        let old_nodes_to_skip = self.nodes_to_skip.clone();
+        self.nodes_to_skip.union(edge_new_nodes_to_skip);
+
+        let cf = self.visit_node(dest_node, visit, ctx)?;
+
+        self.nodes_to_skip = old_nodes_to_skip;
+        self.forbidden_conditions = old_forb_cond;
+
+        self.visited_conditions.remove(condition);
+        self.visited_nodes.remove(dest_node);
+        self.path.pop();
+
+        Ok(cf)
+    }
+}
+
+impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
+    pub fn find_all_costs_to_end(&self) -> MappedInterner<QueryNode, Vec<u64>> {
+        let mut costs_to_end = self.query_graph.nodes.map(|_| vec![]);
+
+        self.traverse_breadth_first_backward(self.query_graph.end_node, |cur_node| {
+            if cur_node == self.query_graph.end_node {
+                *costs_to_end.get_mut(self.query_graph.end_node) = vec![0];
+                return;
+            }
+            let mut self_costs = Vec::<u64>::new();
+
+            let cur_node_edges = &self.edges_of_node.get(cur_node);
+            for edge_idx in cur_node_edges.iter() {
+                let edge = self.edges_store.get(edge_idx).as_ref().unwrap();
+                let succ_node = edge.dest_node;
+                let succ_costs = costs_to_end.get(succ_node);
+                for succ_cost in succ_costs {
+                    self_costs.push(edge.cost as u64 + succ_cost);
+                }
+            }
+            self_costs.sort_unstable();
+            self_costs.dedup();
+
+            *costs_to_end.get_mut(cur_node) = self_costs;
+        });
+        costs_to_end
+    }
+
+    pub fn update_all_costs_before_node(
+        &self,
+        node_with_removed_outgoing_conditions: Interned<QueryNode>,
+        costs: &mut MappedInterner<QueryNode, Vec<u64>>,
+    ) {
+        // Traverse the graph backward from the target node, recomputing the cost for each of its predecessors.
+        // We first check that no other node is contributing the same total cost to a predecessor before removing
+        // the cost from the predecessor.
+        self.traverse_breadth_first_backward(node_with_removed_outgoing_conditions, |cur_node| {
+            let mut costs_to_remove = FxHashSet::default();
+            costs_to_remove.extend(costs.get(cur_node).iter().copied());
+
+            let cur_node_edges = &self.edges_of_node.get(cur_node);
+            for edge_idx in cur_node_edges.iter() {
+                let edge = self.edges_store.get(edge_idx).as_ref().unwrap();
+                for cost in costs.get(edge.dest_node).iter() {
+                    costs_to_remove.remove(&(*cost + edge.cost as u64));
+                    if costs_to_remove.is_empty() {
+                        return;
+                    }
+                }
+            }
+            if costs_to_remove.is_empty() {
+                return;
+            }
+            let mut new_costs = BTreeSet::from_iter(costs.get(cur_node).iter().copied());
+            for c in costs_to_remove {
+                new_costs.remove(&c);
+            }
+            *costs.get_mut(cur_node) = new_costs.into_iter().collect();
+        });
+    }
+
+    /// Traverse the graph backwards from the given node such that every time
+    /// a node is visited, we are guaranteed that all its successors either:
+    /// 1. have already been visited; OR
+    /// 2. were not reachable from the given node
+    pub fn traverse_breadth_first_backward(
+        &self,
+        from: Interned<QueryNode>,
+        mut visit: impl FnMut(Interned<QueryNode>),
+    ) {
+        let mut reachable = SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
+        {
+            // go backward to get the set of all reachable nodes from the given node
+            // the nodes that are not reachable will be set as `visited`
+            let mut stack = VecDeque::new();
+            let mut enqueued = SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
+            enqueued.insert(from);
+            stack.push_back(from);
+            while let Some(n) = stack.pop_front() {
+                if reachable.contains(n) {
+                    continue;
+                }
+                reachable.insert(n);
+                for prev_node in self.query_graph.nodes.get(n).predecessors.iter() {
+                    if !enqueued.contains(prev_node) && !reachable.contains(prev_node) {
+                        stack.push_back(prev_node);
+                        enqueued.insert(prev_node);
+                    }
+                }
+            }
+        };
+        let mut unreachable_or_visited =
+            SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
+        for (n, _) in self.query_graph.nodes.iter() {
+            if !reachable.contains(n) {
+                unreachable_or_visited.insert(n);
+            }
+        }
+
+        let mut enqueued = SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
+        let mut stack = VecDeque::new();
+
+        enqueued.insert(from);
+        stack.push_back(from);
+
+        while let Some(cur_node) = stack.pop_front() {
+            if !self.query_graph.nodes.get(cur_node).successors.is_subset(&unreachable_or_visited) {
+                stack.push_back(cur_node);
+                continue;
+            }
+            unreachable_or_visited.insert(cur_node);
+            visit(cur_node);
+            for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() {
+                if !enqueued.contains(prev_node) && !unreachable_or_visited.contains(prev_node) {
+                    stack.push_back(prev_node);
+                    enqueued.insert(prev_node);
+                }
+            }
+        }
+    }
+}
--- a/crates/milli/src/search/new/ranking_rule_graph/condition_docids_cache.rs
+++ b/crates/milli/src/search/new/ranking_rule_graph/condition_docids_cache.rs
@@ -0,0 +1,58 @@
+use std::marker::PhantomData;
+
+use fxhash::FxHashMap;
+use roaring::RoaringBitmap;
+
+use super::{ComputedCondition, RankingRuleGraph, RankingRuleGraphTrait};
+use crate::search::new::interner::Interned;
+use crate::search::new::query_term::LocatedQueryTermSubset;
+use crate::search::new::SearchContext;
+use crate::Result;
+
+/// A cache storing the document ids associated with each ranking rule edge
+pub struct ConditionDocIdsCache<G: RankingRuleGraphTrait> {
+    pub cache: FxHashMap<Interned<G::Condition>, ComputedCondition>,
+    _phantom: PhantomData<G>,
+}
+impl<G: RankingRuleGraphTrait> Default for ConditionDocIdsCache<G> {
+    fn default() -> Self {
+        Self { cache: Default::default(), _phantom: Default::default() }
+    }
+}
+impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> {
+    pub fn get_subsets_used_by_condition(
+        &mut self,
+        interned_condition: Interned<G::Condition>,
+    ) -> (&Option<LocatedQueryTermSubset>, &LocatedQueryTermSubset) {
+        let c = &self.cache[&interned_condition];
+        (&c.start_term_subset, &c.end_term_subset)
+    }
+    /// Retrieve the document ids for the given edge condition.
+    ///
+    /// If the cache does not yet contain these docids, they are computed
+    /// and inserted in the cache.
+    pub fn get_computed_condition<'s>(
+        &'s mut self,
+        ctx: &mut SearchContext<'_>,
+        interned_condition: Interned<G::Condition>,
+        graph: &mut RankingRuleGraph<G>,
+        universe: &RoaringBitmap,
+    ) -> Result<&'s ComputedCondition> {
+        if self.cache.contains_key(&interned_condition) {
+            let computed = self.cache.get_mut(&interned_condition).unwrap();
+            if computed.universe_len == universe.len() {
+                return Ok(computed);
+            } else {
+                computed.docids &= universe;
+                computed.universe_len = universe.len();
+                return Ok(computed);
+            }
+        }
+        let condition = graph.conditions_interner.get_mut(interned_condition);
+        let computed = G::resolve_condition(ctx, condition, universe)?;
+        // Can we put an assert here for computed.universe_len == universe.len() ?
+        let _ = self.cache.insert(interned_condition, computed);
+        let computed = &self.cache[&interned_condition];
+        Ok(computed)
+    }
+}
--- a/crates/milli/src/search/new/ranking_rule_graph/dead_ends_cache.rs
+++ b/crates/milli/src/search/new/ranking_rule_graph/dead_ends_cache.rs
@@ -0,0 +1,100 @@
+use crate::search::new::interner::{FixedSizeInterner, Interned};
+use crate::search::new::small_bitmap::SmallBitmap;
+
+pub struct DeadEndsCache<T> {
+    // conditions and next could/should be part of the same vector
+    conditions: Vec<Interned<T>>,
+    next: Vec<Self>,
+    pub forbidden: SmallBitmap<T>,
+}
+impl<T> Clone for DeadEndsCache<T> {
+    fn clone(&self) -> Self {
+        Self {
+            conditions: self.conditions.clone(),
+            next: self.next.clone(),
+            forbidden: self.forbidden.clone(),
+        }
+    }
+}
+impl<T> DeadEndsCache<T> {
+    pub fn new(for_interner: &FixedSizeInterner<T>) -> Self {
+        Self {
+            conditions: vec![],
+            next: vec![],
+            forbidden: SmallBitmap::for_interned_values_in(for_interner),
+        }
+    }
+    pub fn forbid_condition(&mut self, condition: Interned<T>) {
+        self.forbidden.insert(condition);
+    }
+
+    fn advance(&mut self, condition: Interned<T>) -> Option<&mut Self> {
+        if let Some(idx) = self.conditions.iter().position(|c| *c == condition) {
+            Some(&mut self.next[idx])
+        } else {
+            None
+        }
+    }
+    pub fn forbidden_conditions_for_all_prefixes_up_to(
+        &mut self,
+        prefix: impl Iterator<Item = Interned<T>>,
+    ) -> SmallBitmap<T> {
+        let mut forbidden = self.forbidden.clone();
+        let mut cursor = self;
+        for c in prefix {
+            if let Some(next) = cursor.advance(c) {
+                cursor = next;
+                forbidden.union(&cursor.forbidden);
+            } else {
+                break;
+            }
+        }
+        forbidden
+    }
+    pub fn forbidden_conditions_after_prefix(
+        &mut self,
+        prefix: impl Iterator<Item = Interned<T>>,
+    ) -> Option<SmallBitmap<T>> {
+        let mut cursor = self;
+        for c in prefix {
+            if let Some(next) = cursor.advance(c) {
+                cursor = next;
+            } else {
+                return None;
+            }
+        }
+        Some(cursor.forbidden.clone())
+    }
+    pub fn forbid_condition_after_prefix(
+        &mut self,
+        mut prefix: impl Iterator<Item = Interned<T>>,
+        forbidden: Interned<T>,
+    ) {
+        match prefix.next() {
+            None => {
+                self.forbidden.insert(forbidden);
+            }
+            Some(first_condition) => {
+                if let Some(idx) = self.conditions.iter().position(|c| *c == first_condition) {
+                    return self.next[idx].forbid_condition_after_prefix(prefix, forbidden);
+                }
+                let mut rest = DeadEndsCache {
+                    conditions: vec![],
+                    next: vec![],
+                    forbidden: SmallBitmap::new(self.forbidden.universe_length()),
+                };
+                rest.forbid_condition_after_prefix(prefix, forbidden);
+                self.conditions.push(first_condition);
+                self.next.push(rest);
+            }
+        }
+    }
+
+    // pub fn debug_print(&self, indent: usize) {
+    //     println!("{} {:?}", " ".repeat(indent), self.forbidden.iter().collect::<Vec<_>>());
+    //     for (condition, next) in self.conditions.iter().zip(self.next.iter()) {
+    //         println!("{} {condition}:", " ".repeat(indent));
+    //         next.debug_print(indent + 2);
+    //     }
+    // }
+}
--- a/crates/milli/src/search/new/ranking_rule_graph/exactness/mod.rs
+++ b/crates/milli/src/search/new/ranking_rule_graph/exactness/mod.rs
@@ -0,0 +1,92 @@
+use roaring::RoaringBitmap;
+
+use super::{ComputedCondition, RankingRuleGraphTrait};
+use crate::score_details::{self, Rank, ScoreDetails};
+use crate::search::new::interner::{DedupInterner, Interned};
+use crate::search::new::query_term::{ExactTerm, LocatedQueryTermSubset};
+use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
+use crate::search::new::Word;
+use crate::{Result, SearchContext};
+
+#[derive(Clone, PartialEq, Eq, Hash)]
+pub enum ExactnessCondition {
+    ExactInAttribute(LocatedQueryTermSubset),
+    Any(LocatedQueryTermSubset),
+}
+
+pub enum ExactnessGraph {}
+
+fn compute_docids(
+    ctx: &mut SearchContext<'_>,
+    dest_node: &LocatedQueryTermSubset,
+    universe: &RoaringBitmap,
+) -> Result<RoaringBitmap> {
+    let exact_term = if let Some(exact_term) = dest_node.term_subset.exact_term(ctx) {
+        exact_term
+    } else {
+        return Ok(Default::default());
+    };
+
+    let candidates = match exact_term {
+        // TODO I move the intersection here
+        ExactTerm::Phrase(phrase) => ctx.get_phrase_docids(phrase)? & universe,
+        ExactTerm::Word(word) => {
+            ctx.word_docids(Some(universe), Word::Original(word))?.unwrap_or_default()
+        }
+    };
+
+    Ok(candidates)
+}
+
+impl RankingRuleGraphTrait for ExactnessGraph {
+    type Condition = ExactnessCondition;
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::exactness")]
+    fn resolve_condition(
+        ctx: &mut SearchContext<'_>,
+        condition: &Self::Condition,
+        universe: &RoaringBitmap,
+    ) -> Result<ComputedCondition> {
+        let (docids, end_term_subset) = match condition {
+            ExactnessCondition::ExactInAttribute(dest_node) => {
+                let mut end_term_subset = dest_node.clone();
+                end_term_subset.term_subset.keep_only_exact_term(ctx);
+                end_term_subset.term_subset.make_mandatory();
+                (compute_docids(ctx, dest_node, universe)?, end_term_subset)
+            }
+            ExactnessCondition::Any(dest_node) => {
+                let docids =
+                    compute_query_term_subset_docids(ctx, Some(universe), &dest_node.term_subset)?;
+                (docids, dest_node.clone())
+            }
+        };
+
+        Ok(ComputedCondition {
+            docids,
+            universe_len: universe.len(),
+            start_term_subset: None,
+            end_term_subset,
+        })
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::exactness")]
+    fn build_edges(
+        _ctx: &mut SearchContext<'_>,
+        conditions_interner: &mut DedupInterner<Self::Condition>,
+        _source_node: Option<&LocatedQueryTermSubset>,
+        dest_node: &LocatedQueryTermSubset,
+    ) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
+        let exact_condition = ExactnessCondition::ExactInAttribute(dest_node.clone());
+        let exact_condition = conditions_interner.insert(exact_condition);
+
+        let skip_condition = ExactnessCondition::Any(dest_node.clone());
+        let skip_condition = conditions_interner.insert(skip_condition);
+
+        Ok(vec![(0, exact_condition), (dest_node.term_ids.len() as u32, skip_condition)])
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::exactness")]
+    fn rank_to_score(rank: Rank) -> ScoreDetails {
+        ScoreDetails::ExactWords(score_details::ExactWords::from_rank(rank))
+    }
+}
--- a/crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs
+++ b/crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs
@@ -0,0 +1,112 @@
+use fxhash::FxHashSet;
+use roaring::RoaringBitmap;
+
+use super::{ComputedCondition, RankingRuleGraphTrait};
+use crate::score_details::{Rank, ScoreDetails};
+use crate::search::new::interner::{DedupInterner, Interned};
+use crate::search::new::query_term::LocatedQueryTermSubset;
+use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id;
+use crate::search::new::SearchContext;
+use crate::{FieldId, InternalError, Result};
+
+#[derive(Clone, PartialEq, Eq, Hash)]
+pub struct FidCondition {
+    term: LocatedQueryTermSubset,
+    fid: Option<FieldId>,
+}
+
+pub enum FidGraph {}
+
+impl RankingRuleGraphTrait for FidGraph {
+    type Condition = FidCondition;
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::fid")]
+    fn resolve_condition(
+        ctx: &mut SearchContext<'_>,
+        condition: &Self::Condition,
+        universe: &RoaringBitmap,
+    ) -> Result<ComputedCondition> {
+        let FidCondition { term, .. } = condition;
+
+        let docids = if let Some(fid) = condition.fid {
+            compute_query_term_subset_docids_within_field_id(
+                ctx,
+                Some(universe),
+                &term.term_subset,
+                fid,
+            )?
+        } else {
+            RoaringBitmap::new()
+        };
+
+        Ok(ComputedCondition {
+            docids,
+            universe_len: universe.len(),
+            start_term_subset: None,
+            end_term_subset: term.clone(),
+        })
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::fid")]
+    fn build_edges(
+        ctx: &mut SearchContext<'_>,
+        conditions_interner: &mut DedupInterner<Self::Condition>,
+        _from: Option<&LocatedQueryTermSubset>,
+        to_term: &LocatedQueryTermSubset,
+    ) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
+        let term = to_term;
+
+        let mut all_fields = FxHashSet::default();
+        for word in term.term_subset.all_single_words_except_prefix_db(ctx)? {
+            let fields = ctx.get_db_word_fids(word.interned())?;
+            all_fields.extend(fields);
+        }
+
+        for phrase in term.term_subset.all_phrases(ctx)? {
+            for &word in phrase.words(ctx).iter().flatten() {
+                let fields = ctx.get_db_word_fids(word)?;
+                all_fields.extend(fields);
+            }
+        }
+
+        if let Some(word_prefix) = term.term_subset.use_prefix_db(ctx) {
+            let fields = ctx.get_db_word_prefix_fids(word_prefix.interned())?;
+            all_fields.extend(fields);
+        }
+
+        let weights_map = ctx.index.fieldids_weights_map(ctx.txn)?;
+
+        let mut edges = vec![];
+        for fid in all_fields.iter().copied() {
+            let weight = weights_map
+                .weight(fid)
+                .ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?;
+            edges.push((
+                weight as u32 * term.term_ids.len() as u32,
+                conditions_interner.insert(FidCondition { term: term.clone(), fid: Some(fid) }),
+            ));
+        }
+
+        // always lookup the max_fid if we don't already and add an artificial condition for max scoring
+        let max_weight: Option<u16> = weights_map.max_weight();
+
+        if let Some(max_weight) = max_weight {
+            if !all_fields.contains(&max_weight) {
+                edges.push((
+                    max_weight as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
+                    conditions_interner.insert(FidCondition {
+                        term: term.clone(), // TODO remove this ugly clone
+                        fid: None,
+                    }),
+                ));
+            }
+        }
+
+        Ok(edges)
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::fid")]
+    fn rank_to_score(rank: Rank) -> ScoreDetails {
+        ScoreDetails::Fid(rank)
+    }
+}
--- a/crates/milli/src/search/new/ranking_rule_graph/mod.rs
+++ b/crates/milli/src/search/new/ranking_rule_graph/mod.rs
@@ -0,0 +1,160 @@
+/*! Module implementing the graph used for the graph-based ranking rules
+and its related algorithms.
+
+A ranking rule graph is built on top of the [`QueryGraph`]: the nodes stay
+the same but the edges are replaced.
+*/
+
+mod build;
+mod cheapest_paths;
+mod condition_docids_cache;
+mod dead_ends_cache;
+
+/// Implementation of the `exactness` ranking rule
+mod exactness;
+/// Implementation of the `attribute` ranking rule
+mod fid;
+/// Implementation of the `position` ranking rule
+mod position;
+/// Implementation of the `proximity` ranking rule
+mod proximity;
+/// Implementation of the `typo` ranking rule
+mod typo;
+/// Implementation of the `words` ranking rule
+mod words;
+
+use std::collections::BTreeSet;
+use std::hash::Hash;
+
+pub use cheapest_paths::PathVisitor;
+pub use condition_docids_cache::ConditionDocIdsCache;
+pub use dead_ends_cache::DeadEndsCache;
+pub use exactness::ExactnessGraph;
+pub use fid::{FidCondition, FidGraph};
+pub use position::{PositionCondition, PositionGraph};
+pub use proximity::{ProximityCondition, ProximityGraph};
+use roaring::RoaringBitmap;
+pub use typo::{TypoCondition, TypoGraph};
+pub use words::{WordsCondition, WordsGraph};
+
+use super::interner::{DedupInterner, FixedSizeInterner, Interned, MappedInterner};
+use super::query_term::LocatedQueryTermSubset;
+use super::small_bitmap::SmallBitmap;
+use super::{QueryGraph, QueryNode, SearchContext};
+use crate::score_details::{Rank, ScoreDetails};
+use crate::Result;
+
+pub struct ComputedCondition {
+    pub docids: RoaringBitmap,
+    pub universe_len: u64,
+    pub start_term_subset: Option<LocatedQueryTermSubset>,
+    pub end_term_subset: LocatedQueryTermSubset,
+}
+
+/// An edge in the ranking rule graph.
+///
+/// It contains:
+/// 1. The source and destination nodes
+/// 2. The cost of traversing this edge
+/// 3. The condition associated with it
+/// 4. The list of nodes that have to be skipped
+/// if this edge is traversed.
+#[derive(Clone)]
+pub struct Edge<E> {
+    pub source_node: Interned<QueryNode>,
+    pub dest_node: Interned<QueryNode>,
+    pub cost: u32,
+    pub condition: Option<Interned<E>>,
+    pub nodes_to_skip: SmallBitmap<QueryNode>,
+}
+
+impl<E> Hash for Edge<E> {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        self.source_node.hash(state);
+        self.dest_node.hash(state);
+        self.cost.hash(state);
+        self.condition.hash(state);
+    }
+}
+
+impl<E> Eq for Edge<E> {}
+
+impl<E> PartialEq for Edge<E> {
+    fn eq(&self, other: &Self) -> bool {
+        self.source_node == other.source_node
+            && self.dest_node == other.dest_node
+            && self.cost == other.cost
+            && self.condition == other.condition
+    }
+}
+
+/// A trait to be implemented by a marker type to build a graph-based ranking rule.
+///
+/// It mostly describes how to:
+/// 1. Retrieve the set of edges (their cost and condition) between two nodes.
+/// 2. Compute the document ids satisfying a condition
+pub trait RankingRuleGraphTrait: Sized + 'static {
+    type Condition: Sized + Clone + PartialEq + Eq + Hash;
+
+    /// Compute the document ids associated with the given edge condition,
+    /// restricted to the given universe.
+    fn resolve_condition(
+        ctx: &mut SearchContext<'_>,
+        condition: &Self::Condition,
+        universe: &RoaringBitmap,
+    ) -> Result<ComputedCondition>;
+
+    /// Return the costs and conditions of the edges going from the source node to the destination node
+    fn build_edges(
+        ctx: &mut SearchContext<'_>,
+        conditions_interner: &mut DedupInterner<Self::Condition>,
+        source_node: Option<&LocatedQueryTermSubset>,
+        dest_node: &LocatedQueryTermSubset,
+    ) -> Result<Vec<(u32, Interned<Self::Condition>)>>;
+
+    /// Convert the rank of a path to its corresponding score for the ranking rule
+    fn rank_to_score(rank: Rank) -> ScoreDetails;
+}
+
+/// The graph used by graph-based ranking rules.
+///
+/// It is built on top of a [`QueryGraph`], keeping the same nodes
+/// but replacing the edges.
+pub struct RankingRuleGraph<G: RankingRuleGraphTrait> {
+    pub query_graph: QueryGraph,
+    pub edges_store: FixedSizeInterner<Option<Edge<G::Condition>>>,
+    pub edges_of_node: MappedInterner<QueryNode, SmallBitmap<Option<Edge<G::Condition>>>>,
+    pub conditions_interner: FixedSizeInterner<G::Condition>,
+}
+impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> {
+    fn clone(&self) -> Self {
+        Self {
+            query_graph: self.query_graph.clone(),
+            edges_store: self.edges_store.clone(),
+            edges_of_node: self.edges_of_node.clone(),
+            conditions_interner: self.conditions_interner.clone(),
+        }
+    }
+}
+impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
+    /// Remove all edges with the given condition
+    /// Return a set of all the source nodes of the removed edges
+    pub fn remove_edges_with_condition(
+        &mut self,
+        condition_to_remove: Interned<G::Condition>,
+    ) -> BTreeSet<Interned<QueryNode>> {
+        let mut source_nodes = BTreeSet::new();
+        for (edge_id, edge_opt) in self.edges_store.iter_mut() {
+            let Some(edge) = edge_opt.as_mut() else { continue };
+            let Some(condition) = edge.condition else { continue };
+
+            if condition == condition_to_remove {
+                let (source_node, _dest_node) = (edge.source_node, edge.dest_node);
+                *edge_opt = None;
+                self.edges_of_node.get_mut(source_node).remove(edge_id);
+                source_nodes.insert(source_node);
+            }
+        }
+        source_nodes
+    }
+}
--- a/crates/milli/src/search/new/ranking_rule_graph/position/mod.rs
+++ b/crates/milli/src/search/new/ranking_rule_graph/position/mod.rs
@@ -0,0 +1,143 @@
+use fxhash::{FxHashMap, FxHashSet};
+use roaring::RoaringBitmap;
+
+use super::{ComputedCondition, RankingRuleGraphTrait};
+use crate::score_details::{Rank, ScoreDetails};
+use crate::search::new::interner::{DedupInterner, Interned};
+use crate::search::new::query_term::LocatedQueryTermSubset;
+use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_position;
+use crate::search::new::SearchContext;
+use crate::Result;
+
+#[derive(Clone, PartialEq, Eq, Hash)]
+pub struct PositionCondition {
+    term: LocatedQueryTermSubset,
+    positions: Vec<u16>,
+}
+
+pub enum PositionGraph {}
+
+impl RankingRuleGraphTrait for PositionGraph {
+    type Condition = PositionCondition;
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::position")]
+    fn resolve_condition(
+        ctx: &mut SearchContext<'_>,
+        condition: &Self::Condition,
+        universe: &RoaringBitmap,
+    ) -> Result<ComputedCondition> {
+        let PositionCondition { term, positions } = condition;
+        let mut docids = RoaringBitmap::new();
+        // TODO use MultiOps to do the big union
+        for position in positions {
+            // maybe compute_query_term_subset_docids_within_position should accept a universe as argument
+            docids |= compute_query_term_subset_docids_within_position(
+                ctx,
+                Some(universe),
+                &term.term_subset,
+                *position,
+            )?;
+        }
+        Ok(ComputedCondition {
+            docids,
+            universe_len: universe.len(),
+            start_term_subset: None,
+            end_term_subset: term.clone(),
+        })
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::position")]
+    fn build_edges(
+        ctx: &mut SearchContext<'_>,
+        conditions_interner: &mut DedupInterner<Self::Condition>,
+        _from: Option<&LocatedQueryTermSubset>,
+        to_term: &LocatedQueryTermSubset,
+    ) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
+        let term = to_term;
+
+        let mut all_positions = FxHashSet::default();
+        for word in term.term_subset.all_single_words_except_prefix_db(ctx)? {
+            let positions = ctx.get_db_word_positions(word.interned())?;
+            all_positions.extend(positions);
+        }
+
+        for phrase in term.term_subset.all_phrases(ctx)? {
+            // Only check the position of the first word in the phrase
+            // this is not correct, but it is the best we can do, since
+            // it is difficult/impossible to know the expected position
+            // of a word in a phrase.
+            // There is probably a more correct way to do it though.
+            if let Some(word) = phrase.words(ctx).iter().flatten().next() {
+                let positions = ctx.get_db_word_positions(*word)?;
+                all_positions.extend(positions);
+            }
+        }
+
+        if let Some(word_prefix) = term.term_subset.use_prefix_db(ctx) {
+            let positions = ctx.get_db_word_prefix_positions(word_prefix.interned())?;
+            all_positions.extend(positions);
+        }
+
+        let mut positions_for_costs = FxHashMap::<u32, Vec<u16>>::default();
+
+        for position in all_positions {
+            // FIXME: bucketed position???
+            let distance = position.abs_diff(*term.positions.start());
+            let cost = {
+                let mut cost = 0;
+                for i in 0..term.term_ids.len() {
+                    // This is actually not fully correct and slightly penalises ngrams unfairly.
+                    // Because if two words are in the same bucketed position (e.g. 32) and consecutive,
+                    // then their position cost will be 32+32=64, but an ngram of these two words at the
+                    // same position will have a cost of 32+32+1=65
+                    cost += cost_from_distance(distance as u32 + i as u32);
+                }
+                cost
+            };
+            positions_for_costs.entry(cost).or_default().push(position);
+        }
+
+        let max_cost = term.term_ids.len() as u32 * 10;
+        let max_cost_exists = positions_for_costs.contains_key(&max_cost);
+
+        let mut edges = vec![];
+        for (cost, positions) in positions_for_costs {
+            edges.push((
+                cost,
+                conditions_interner.insert(PositionCondition { term: term.clone(), positions }),
+            ));
+        }
+
+        if !max_cost_exists {
+            // artificial empty condition for computing max cost
+            edges.push((
+                max_cost,
+                conditions_interner
+                    .insert(PositionCondition { term: term.clone(), positions: Vec::default() }),
+            ));
+        }
+
+        Ok(edges)
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::position")]
+    fn rank_to_score(rank: Rank) -> ScoreDetails {
+        ScoreDetails::Position(rank)
+    }
+}
+
+fn cost_from_distance(distance: u32) -> u32 {
+    match distance {
+        0 => 0,
+        1 => 1,
+        2..=4 => 2,
+        5..=7 => 3,
+        8..=11 => 4,
+        12..=16 => 5,
+        17..=24 => 6,
+        25..=64 => 7,
+        65..=256 => 8,
+        257..=1024 => 9,
+        _ => 10,
+    }
+}
--- a/crates/milli/src/search/new/ranking_rule_graph/proximity/build.rs
+++ b/crates/milli/src/search/new/ranking_rule_graph/proximity/build.rs
@@ -0,0 +1,56 @@
+#![allow(clippy::too_many_arguments)]
+
+use super::ProximityCondition;
+use crate::proximity::MAX_DISTANCE;
+use crate::search::new::interner::{DedupInterner, Interned};
+use crate::search::new::query_term::LocatedQueryTermSubset;
+use crate::search::new::SearchContext;
+use crate::Result;
+
+pub fn build_edges(
+    _ctx: &mut SearchContext<'_>,
+    conditions_interner: &mut DedupInterner<ProximityCondition>,
+    left_term: Option<&LocatedQueryTermSubset>,
+    right_term: &LocatedQueryTermSubset,
+) -> Result<Vec<(u32, Interned<ProximityCondition>)>> {
+    let right_ngram_max = right_term.term_ids.len().saturating_sub(1);
+
+    let Some(left_term) = left_term else {
+        return Ok(vec![(
+            right_ngram_max as u32,
+            conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
+        )]);
+    };
+
+    if left_term.positions.end() + 1 != *right_term.positions.start() {
+        // We want to ignore this pair of terms
+        // Unconditionally walk through the edge without computing the docids
+        // This can happen when, in a query like `the sun flowers are beautiful`, the term
+        // `flowers` is removed by the `words` ranking rule.
+        // The remaining query graph represents `the sun .. are beautiful`
+        // but `sun` and `are` have no proximity condition between them
+        return Ok(vec![(
+            right_ngram_max as u32,
+            conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
+        )]);
+    }
+
+    let mut conditions = vec![];
+    for cost in right_ngram_max..(((MAX_DISTANCE as usize) - 1) + right_ngram_max) {
+        conditions.push((
+            cost as u32,
+            conditions_interner.insert(ProximityCondition::Uninit {
+                left_term: left_term.clone(),
+                right_term: right_term.clone(),
+                cost: (cost + 1) as u8,
+            }),
+        ))
+    }
+
+    conditions.push((
+        ((MAX_DISTANCE - 1) + (right_ngram_max as u32)),
+        conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
+    ));
+
+    Ok(conditions)
+}
--- a/crates/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs
+++ b/crates/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs
@@ -0,0 +1,251 @@
+#![allow(clippy::too_many_arguments)]
+
+use std::collections::BTreeSet;
+
+use roaring::RoaringBitmap;
+
+use super::ProximityCondition;
+use crate::search::new::interner::Interned;
+use crate::search::new::query_term::{Phrase, QueryTermSubset};
+use crate::search::new::ranking_rule_graph::ComputedCondition;
+use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
+use crate::search::new::{SearchContext, Word};
+use crate::Result;
+
+pub fn compute_docids(
+    ctx: &mut SearchContext<'_>,
+    condition: &ProximityCondition,
+    universe: &RoaringBitmap,
+) -> Result<ComputedCondition> {
+    let (left_term, right_term, cost) = match condition {
+        ProximityCondition::Uninit { left_term, right_term, cost } => {
+            (left_term, right_term, *cost)
+        }
+        ProximityCondition::Term { term } => {
+            return Ok(ComputedCondition {
+                docids: compute_query_term_subset_docids(ctx, Some(universe), &term.term_subset)?,
+                universe_len: universe.len(),
+                start_term_subset: None,
+                end_term_subset: term.clone(),
+            });
+        }
+    };
+
+    let right_term_ngram_len = right_term.term_ids.len() as u8;
+
+    // e.g. for the simple words `sun .. flower`
+    // the cost is 5
+    // the forward proximity is 5
+    // the backward proximity is 4
+    //
+    // for the 2gram `the sunflower`
+    // the cost is 5
+    // the forward proximity is 4
+    // the backward proximity is 3
+    let forward_proximity = 1 + cost - right_term_ngram_len;
+    let backward_proximity = cost - right_term_ngram_len;
+
+    let mut docids = RoaringBitmap::new();
+
+    if let Some(right_prefix) = right_term.term_subset.use_prefix_db(ctx) {
+        for (left_phrase, left_word) in last_words_of_term_derivations(ctx, &left_term.term_subset)?
+        {
+            compute_prefix_edges(
+                ctx,
+                left_word.interned(),
+                right_prefix.interned(),
+                left_phrase,
+                forward_proximity,
+                backward_proximity,
+                &mut docids,
+                universe,
+            )?;
+        }
+    }
+
+    for (left_phrase, left_word) in last_words_of_term_derivations(ctx, &left_term.term_subset)? {
+        // Before computing the edges, check that the left word and left phrase
+        // aren't disjoint with the universe, but only do it if there is more than
+        // one word derivation to the right.
+        //
+        // This is an optimisation to avoid checking for an excessive number of
+        // pairs.
+        let right_derivs = first_word_of_term_iter(ctx, &right_term.term_subset)?;
+        if right_derivs.len() > 1 {
+            let universe = &universe;
+            if let Some(left_phrase) = left_phrase {
+                if universe.is_disjoint(ctx.get_phrase_docids(left_phrase)?) {
+                    continue;
+                }
+            } else if let Some(left_word_docids) = ctx.word_docids(Some(universe), left_word)? {
+                if left_word_docids.is_empty() {
+                    continue;
+                }
+            }
+        }
+
+        for (right_word, right_phrase) in right_derivs {
+            compute_non_prefix_edges(
+                ctx,
+                left_word.interned(),
+                right_word,
+                left_phrase,
+                right_phrase,
+                forward_proximity,
+                backward_proximity,
+                &mut docids,
+                universe,
+            )?;
+        }
+    }
+
+    Ok(ComputedCondition {
+        docids,
+        universe_len: universe.len(),
+        start_term_subset: Some(left_term.clone()),
+        end_term_subset: right_term.clone(),
+    })
+}
+
+fn compute_prefix_edges(
+    ctx: &mut SearchContext<'_>,
+    left_word: Interned<String>,
+    right_prefix: Interned<String>,
+    left_phrase: Option<Interned<Phrase>>,
+    forward_proximity: u8,
+    backward_proximity: u8,
+    docids: &mut RoaringBitmap,
+    universe: &RoaringBitmap,
+) -> Result<()> {
+    let mut used_left_words = BTreeSet::new();
+    let mut used_left_phrases = BTreeSet::new();
+    let mut used_right_prefix = BTreeSet::new();
+
+    let mut universe = universe.clone();
+    if let Some(phrase) = left_phrase {
+        // TODO we can clearly give the universe to this method
+        //      Unfortunately, it is deserializing/computing stuff and
+        //      keeping the result as a materialized bitmap.
+        let phrase_docids = ctx.get_phrase_docids(phrase)?;
+        if !phrase_docids.is_empty() {
+            used_left_phrases.insert(phrase);
+        }
+        universe &= phrase_docids;
+        if universe.is_empty() {
+            return Ok(());
+        }
+    }
+
+    if let Some(new_docids) = ctx.get_db_word_prefix_pair_proximity_docids(
+        Some(&universe),
+        left_word,
+        right_prefix,
+        forward_proximity,
+    )? {
+        if !new_docids.is_empty() {
+            used_left_words.insert(left_word);
+            used_right_prefix.insert(right_prefix);
+            *docids |= new_docids;
+        }
+    }
+
+    // No swapping when computing the proximity between a phrase and a word
+    if left_phrase.is_none() {
+        if let Some(new_docids) = ctx.get_db_prefix_word_pair_proximity_docids(
+            Some(&universe),
+            right_prefix,
+            left_word,
+            backward_proximity,
+        )? {
+            if !new_docids.is_empty() {
+                used_left_words.insert(left_word);
+                used_right_prefix.insert(right_prefix);
+                *docids |= new_docids;
+            }
+        }
+    }
+
+    Ok(())
+}
+
+fn compute_non_prefix_edges(
+    ctx: &mut SearchContext<'_>,
+    word1: Interned<String>,
+    word2: Interned<String>,
+    left_phrase: Option<Interned<Phrase>>,
+    right_phrase: Option<Interned<Phrase>>,
+    forward_proximity: u8,
+    backward_proximity: u8,
+    docids: &mut RoaringBitmap,
+    universe: &RoaringBitmap,
+) -> Result<()> {
+    let mut universe = universe.clone();
+
+    for phrase in left_phrase.iter().chain(right_phrase.iter()).copied() {
+        universe &= ctx.get_phrase_docids(phrase)?;
+        if universe.is_empty() {
+            return Ok(());
+        }
+    }
+
+    if let Some(new_docids) =
+        ctx.get_db_word_pair_proximity_docids(Some(&universe), word1, word2, forward_proximity)?
+    {
+        if !new_docids.is_empty() {
+            *docids |= new_docids;
+        }
+    }
+    if backward_proximity >= 1 && left_phrase.is_none() && right_phrase.is_none() {
+        if let Some(new_docids) = ctx.get_db_word_pair_proximity_docids(
+            Some(&universe),
+            word2,
+            word1,
+            backward_proximity,
+        )? {
+            if !new_docids.is_empty() {
+                *docids |= new_docids;
+            }
+        }
+    }
+
+    Ok(())
+}
+
+fn last_words_of_term_derivations(
+    ctx: &mut SearchContext<'_>,
+    t: &QueryTermSubset,
+) -> Result<BTreeSet<(Option<Interned<Phrase>>, Word)>> {
+    let mut result = BTreeSet::new();
+
+    for w in t.all_single_words_except_prefix_db(ctx)? {
+        result.insert((None, w));
+    }
+    for p in t.all_phrases(ctx)? {
+        let phrase = ctx.phrase_interner.get(p);
+        let last_term_of_phrase = phrase.words.last().unwrap();
+        if let Some(last_word) = last_term_of_phrase {
+            result.insert((Some(p), Word::Original(*last_word)));
+        }
+    }
+
+    Ok(result)
+}
+fn first_word_of_term_iter(
+    ctx: &mut SearchContext<'_>,
+    t: &QueryTermSubset,
+) -> Result<BTreeSet<(Interned<String>, Option<Interned<Phrase>>)>> {
+    let mut result = BTreeSet::new();
+    let all_words = t.all_single_words_except_prefix_db(ctx)?;
+    for w in all_words {
+        result.insert((w.interned(), None));
+    }
+    for p in t.all_phrases(ctx)? {
+        let phrase = ctx.phrase_interner.get(p);
+        let first_term_of_phrase = phrase.words.first().unwrap();
+        if let Some(first_word) = first_term_of_phrase {
+            result.insert((*first_word, Some(p)));
+        }
+    }
+
+    Ok(result)
+}
--- a/crates/milli/src/search/new/ranking_rule_graph/proximity/mod.rs
+++ b/crates/milli/src/search/new/ranking_rule_graph/proximity/mod.rs
@@ -0,0 +1,47 @@
+pub mod build;
+pub mod compute_docids;
+
+use roaring::RoaringBitmap;
+
+use super::{ComputedCondition, RankingRuleGraphTrait};
+use crate::score_details::{Rank, ScoreDetails};
+use crate::search::new::interner::{DedupInterner, Interned};
+use crate::search::new::query_term::LocatedQueryTermSubset;
+use crate::search::new::SearchContext;
+use crate::Result;
+
+#[derive(Clone, PartialEq, Eq, Hash)]
+pub enum ProximityCondition {
+    Uninit { left_term: LocatedQueryTermSubset, right_term: LocatedQueryTermSubset, cost: u8 },
+    Term { term: LocatedQueryTermSubset },
+}
+
+pub enum ProximityGraph {}
+
+impl RankingRuleGraphTrait for ProximityGraph {
+    type Condition = ProximityCondition;
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::proximity")]
+    fn resolve_condition(
+        ctx: &mut SearchContext<'_>,
+        condition: &Self::Condition,
+        universe: &RoaringBitmap,
+    ) -> Result<ComputedCondition> {
+        compute_docids::compute_docids(ctx, condition, universe)
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::proximity")]
+    fn build_edges(
+        ctx: &mut SearchContext<'_>,
+        conditions_interner: &mut DedupInterner<Self::Condition>,
+        source_term: Option<&LocatedQueryTermSubset>,
+        dest_term: &LocatedQueryTermSubset,
+    ) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
+        build::build_edges(ctx, conditions_interner, source_term, dest_term)
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::proximity")]
+    fn rank_to_score(rank: Rank) -> ScoreDetails {
+        ScoreDetails::Proximity(rank)
+    }
+}
--- a/crates/milli/src/search/new/ranking_rule_graph/typo/mod.rs
+++ b/crates/milli/src/search/new/ranking_rule_graph/typo/mod.rs
@@ -0,0 +1,85 @@
+use roaring::RoaringBitmap;
+
+use super::{ComputedCondition, RankingRuleGraphTrait};
+use crate::score_details::{self, Rank, ScoreDetails};
+use crate::search::new::interner::{DedupInterner, Interned};
+use crate::search::new::query_term::LocatedQueryTermSubset;
+use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
+use crate::search::new::SearchContext;
+use crate::Result;
+
+#[derive(Clone, PartialEq, Eq, Hash)]
+pub struct TypoCondition {
+    term: LocatedQueryTermSubset,
+    nbr_typos: u8,
+}
+
+pub enum TypoGraph {}
+
+impl RankingRuleGraphTrait for TypoGraph {
+    type Condition = TypoCondition;
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::typo")]
+    fn resolve_condition(
+        ctx: &mut SearchContext<'_>,
+        condition: &Self::Condition,
+        universe: &RoaringBitmap,
+    ) -> Result<ComputedCondition> {
+        let TypoCondition { term, .. } = condition;
+        // maybe compute_query_term_subset_docids should accept a universe as argument
+        let docids = compute_query_term_subset_docids(ctx, Some(universe), &term.term_subset)?;
+
+        Ok(ComputedCondition {
+            docids,
+            universe_len: universe.len(),
+            start_term_subset: None,
+            end_term_subset: term.clone(),
+        })
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::typo")]
+    fn build_edges(
+        ctx: &mut SearchContext<'_>,
+        conditions_interner: &mut DedupInterner<Self::Condition>,
+        _from: Option<&LocatedQueryTermSubset>,
+        to_term: &LocatedQueryTermSubset,
+    ) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
+        let term = to_term;
+
+        let mut edges = vec![];
+        // Ngrams have a base typo cost
+        // 2-gram -> equivalent to 1 typo
+        // 3-gram -> equivalent to 2 typos
+        let base_cost = if term.term_ids.len() == 1 { 0 } else { term.term_ids.len() as u32 };
+
+        for nbr_typos in 0..=term.term_subset.max_typo_cost(ctx) {
+            let mut term = term.clone();
+            match nbr_typos {
+                0 => {
+                    term.term_subset.clear_one_typo_subset();
+                    term.term_subset.clear_two_typo_subset();
+                }
+                1 => {
+                    term.term_subset.clear_zero_typo_subset();
+                    term.term_subset.clear_two_typo_subset();
+                }
+                2 => {
+                    term.term_subset.clear_zero_typo_subset();
+                    term.term_subset.clear_one_typo_subset();
+                }
+                _ => panic!(),
+            };
+
+            edges.push((
+                nbr_typos as u32 + base_cost,
+                conditions_interner.insert(TypoCondition { term, nbr_typos }),
+            ));
+        }
+        Ok(edges)
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::typo")]
+    fn rank_to_score(rank: Rank) -> ScoreDetails {
+        ScoreDetails::Typo(score_details::Typo::from_rank(rank))
+    }
+}
--- a/crates/milli/src/search/new/ranking_rule_graph/words/mod.rs
+++ b/crates/milli/src/search/new/ranking_rule_graph/words/mod.rs
@@ -0,0 +1,53 @@
+use roaring::RoaringBitmap;
+
+use super::{ComputedCondition, RankingRuleGraphTrait};
+use crate::score_details::{self, Rank, ScoreDetails};
+use crate::search::new::interner::{DedupInterner, Interned};
+use crate::search::new::query_term::LocatedQueryTermSubset;
+use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
+use crate::search::new::SearchContext;
+use crate::Result;
+
+#[derive(Clone, PartialEq, Eq, Hash)]
+pub struct WordsCondition {
+    term: LocatedQueryTermSubset,
+}
+
+pub enum WordsGraph {}
+
+impl RankingRuleGraphTrait for WordsGraph {
+    type Condition = WordsCondition;
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::words")]
+    fn resolve_condition(
+        ctx: &mut SearchContext<'_>,
+        condition: &Self::Condition,
+        universe: &RoaringBitmap,
+    ) -> Result<ComputedCondition> {
+        let WordsCondition { term, .. } = condition;
+        // maybe compute_query_term_subset_docids should accept a universe as argument
+        let docids = compute_query_term_subset_docids(ctx, Some(universe), &term.term_subset)?;
+
+        Ok(ComputedCondition {
+            docids,
+            universe_len: universe.len(),
+            start_term_subset: None,
+            end_term_subset: term.clone(),
+        })
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::words")]
+    fn build_edges(
+        _ctx: &mut SearchContext<'_>,
+        conditions_interner: &mut DedupInterner<Self::Condition>,
+        _from: Option<&LocatedQueryTermSubset>,
+        to_term: &LocatedQueryTermSubset,
+    ) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
+        Ok(vec![(0, conditions_interner.insert(WordsCondition { term: to_term.clone() }))])
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::words")]
+    fn rank_to_score(rank: Rank) -> ScoreDetails {
+        ScoreDetails::Words(score_details::Words::from_rank(rank))
+    }
+}