mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-29 18:04:47 +00:00
Refactor of the Interner
This commit is contained in:
@ -1,7 +1,7 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::search::new::interner::Interner;
|
||||
use crate::search::new::interner::{DedupInterner, Interner};
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
use crate::search::new::{QueryGraph, SearchContext};
|
||||
use crate::Result;
|
||||
@ -15,40 +15,43 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
|
||||
/// Build the ranking rule graph from the given query graph
|
||||
pub fn build(ctx: &mut SearchContext, query_graph: QueryGraph) -> Result<Self> {
|
||||
let QueryGraph { nodes: graph_nodes, edges: graph_edges, .. } = &query_graph;
|
||||
let QueryGraph { nodes: graph_nodes, .. } = &query_graph;
|
||||
|
||||
let mut conditions_interner = Interner::default();
|
||||
let mut conditions_interner = DedupInterner::default();
|
||||
|
||||
let mut edges_store = vec![];
|
||||
let mut edges_of_node = vec![];
|
||||
let mut edges_store = Interner::default();
|
||||
let mut edges_of_node = query_graph.nodes.map(|_| HashSet::new());
|
||||
|
||||
for (source_idx, source_node) in graph_nodes.iter().enumerate() {
|
||||
edges_of_node.push(HashSet::new());
|
||||
let new_edges = edges_of_node.last_mut().unwrap();
|
||||
for (source_id, source_node) in graph_nodes.iter() {
|
||||
let new_edges = edges_of_node.get_mut(source_id);
|
||||
|
||||
for dest_idx in graph_edges[source_idx].successors.iter() {
|
||||
let dest_node = &graph_nodes[dest_idx as usize];
|
||||
for dest_idx in source_node.successors.iter() {
|
||||
let dest_node = graph_nodes.get(dest_idx);
|
||||
let edges = G::build_edges(ctx, &mut conditions_interner, source_node, dest_node)?;
|
||||
if edges.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (cost, condition) in edges {
|
||||
edges_store.push(Some(Edge {
|
||||
source_node: source_idx as u16,
|
||||
let new_edge_id = edges_store.push(Some(Edge {
|
||||
source_node: source_id,
|
||||
dest_node: dest_idx,
|
||||
cost,
|
||||
condition,
|
||||
}));
|
||||
new_edges.insert(edges_store.len() as u16 - 1);
|
||||
new_edges.insert(new_edge_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
let edges_of_node = edges_of_node
|
||||
.into_iter()
|
||||
.map(|edges| SmallBitmap::from_iter(edges.into_iter(), edges_store.len() as u16))
|
||||
.collect();
|
||||
let edges_store = edges_store.freeze();
|
||||
let edges_of_node =
|
||||
edges_of_node.map(|edges| SmallBitmap::from_iter(edges.iter().copied(), &edges_store));
|
||||
|
||||
Ok(RankingRuleGraph { query_graph, edges_store, edges_of_node, conditions_interner })
|
||||
Ok(RankingRuleGraph {
|
||||
query_graph,
|
||||
edges_store,
|
||||
edges_of_node,
|
||||
conditions_interner: conditions_interner.freeze(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -3,8 +3,10 @@
|
||||
use std::collections::btree_map::Entry;
|
||||
use std::collections::{BTreeMap, VecDeque};
|
||||
|
||||
use super::empty_paths_cache::EmptyPathsCache;
|
||||
use super::{RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use super::empty_paths_cache::DeadEndPathCache;
|
||||
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::search::new::interner::{Interned, MappedInterner};
|
||||
use crate::search::new::query_graph::QueryNode;
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
use crate::Result;
|
||||
|
||||
@ -17,11 +19,11 @@ pub struct Path {
|
||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
pub fn visit_paths_of_cost(
|
||||
&mut self,
|
||||
from: usize,
|
||||
from: Interned<QueryNode>,
|
||||
cost: u16,
|
||||
all_distances: &[Vec<(u16, SmallBitmap)>],
|
||||
empty_paths_cache: &mut EmptyPathsCache,
|
||||
mut visit: impl FnMut(&[u16], &mut Self, &mut EmptyPathsCache) -> Result<()>,
|
||||
all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>,
|
||||
empty_paths_cache: &mut DeadEndPathCache<G>,
|
||||
mut visit: impl FnMut(&[u16], &mut Self, &mut DeadEndPathCache<G>) -> Result<()>,
|
||||
) -> Result<()> {
|
||||
let _ = self.visit_paths_of_cost_rec(
|
||||
from,
|
||||
@ -30,76 +32,108 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
empty_paths_cache,
|
||||
&mut visit,
|
||||
&mut vec![],
|
||||
&mut SmallBitmap::new(self.edges_store.len() as u16),
|
||||
empty_paths_cache.empty_edges.clone(),
|
||||
&mut SmallBitmap::new(self.edges_store.len()),
|
||||
&mut empty_paths_cache.conditions.clone(),
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
pub fn visit_paths_of_cost_rec(
|
||||
&mut self,
|
||||
from: usize,
|
||||
from: Interned<QueryNode>,
|
||||
cost: u16,
|
||||
all_distances: &[Vec<(u16, SmallBitmap)>],
|
||||
empty_paths_cache: &mut EmptyPathsCache,
|
||||
visit: &mut impl FnMut(&[u16], &mut Self, &mut EmptyPathsCache) -> Result<()>,
|
||||
prev_edges: &mut Vec<u16>,
|
||||
cur_path: &mut SmallBitmap,
|
||||
mut forbidden_edges: SmallBitmap,
|
||||
all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>,
|
||||
empty_paths_cache: &mut DeadEndPathCache<G>,
|
||||
visit: &mut impl FnMut(&[u16], &mut Self, &mut DeadEndPathCache<G>) -> Result<()>,
|
||||
prev_conditions: &mut Vec<u16>,
|
||||
cur_path: &mut SmallBitmap<G::EdgeCondition>,
|
||||
forbidden_conditions: &mut SmallBitmap<G::EdgeCondition>,
|
||||
) -> Result<bool> {
|
||||
let mut any_valid = false;
|
||||
|
||||
let edges = self.edges_of_node[from].clone();
|
||||
let edges = self.edges_of_node.get(from).clone();
|
||||
for edge_idx in edges.iter() {
|
||||
let Some(edge) = self.edges_store[edge_idx as usize].as_ref() else { continue };
|
||||
if cost < edge.cost as u16
|
||||
|| forbidden_edges.contains(edge_idx)
|
||||
|| !all_distances[edge.dest_node as usize].iter().any(
|
||||
|(next_cost, necessary_edges)| {
|
||||
(*next_cost == cost - edge.cost as u16)
|
||||
&& !forbidden_edges.intersects(necessary_edges)
|
||||
},
|
||||
)
|
||||
{
|
||||
let Some(edge) = self.edges_store.get(edge_idx).as_ref() else { continue };
|
||||
if cost < edge.cost as u16 {
|
||||
continue;
|
||||
}
|
||||
cur_path.insert(edge_idx);
|
||||
prev_edges.push(edge_idx);
|
||||
let next_any_valid = match edge.condition {
|
||||
EdgeCondition::Unconditional => {
|
||||
if edge.dest_node == self.query_graph.end_node {
|
||||
any_valid = true;
|
||||
visit(prev_conditions, self, empty_paths_cache)?;
|
||||
true
|
||||
} else {
|
||||
self.visit_paths_of_cost_rec(
|
||||
edge.dest_node,
|
||||
cost - edge.cost as u16,
|
||||
all_distances,
|
||||
empty_paths_cache,
|
||||
visit,
|
||||
prev_conditions,
|
||||
cur_path,
|
||||
forbidden_conditions,
|
||||
)?
|
||||
}
|
||||
}
|
||||
EdgeCondition::Conditional(condition) => {
|
||||
if forbidden_conditions.contains(condition)
|
||||
|| !all_distances.get(edge.dest_node).iter().any(
|
||||
|(next_cost, necessary_conditions)| {
|
||||
(*next_cost == cost - edge.cost as u16)
|
||||
&& !forbidden_conditions.intersects(necessary_conditions)
|
||||
},
|
||||
)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
cur_path.insert(condition);
|
||||
// TODO: typed path set
|
||||
prev_conditions.push(condition.into_inner());
|
||||
|
||||
let mut new_forbidden_edges = forbidden_edges.clone();
|
||||
new_forbidden_edges.union(&empty_paths_cache.empty_couple_edges[edge_idx as usize]);
|
||||
empty_paths_cache.empty_prefixes.final_edges_after_prefix(prev_edges, &mut |x| {
|
||||
new_forbidden_edges.insert(x);
|
||||
});
|
||||
|
||||
let next_any_valid = if edge.dest_node == self.query_graph.end_node {
|
||||
any_valid = true;
|
||||
visit(prev_edges, self, empty_paths_cache)?;
|
||||
true
|
||||
} else {
|
||||
self.visit_paths_of_cost_rec(
|
||||
edge.dest_node as usize,
|
||||
cost - edge.cost as u16,
|
||||
all_distances,
|
||||
empty_paths_cache,
|
||||
visit,
|
||||
prev_edges,
|
||||
cur_path,
|
||||
new_forbidden_edges,
|
||||
)?
|
||||
let mut new_forbidden_conditions = forbidden_conditions.clone();
|
||||
new_forbidden_conditions
|
||||
.union(empty_paths_cache.condition_couples.get(condition));
|
||||
empty_paths_cache.prefixes.final_edges_after_prefix(
|
||||
prev_conditions,
|
||||
&mut |x| {
|
||||
new_forbidden_conditions.insert(Interned::new(x));
|
||||
},
|
||||
);
|
||||
let next_any_valid = if edge.dest_node == self.query_graph.end_node {
|
||||
any_valid = true;
|
||||
visit(prev_conditions, self, empty_paths_cache)?;
|
||||
true
|
||||
} else {
|
||||
self.visit_paths_of_cost_rec(
|
||||
edge.dest_node,
|
||||
cost - edge.cost as u16,
|
||||
all_distances,
|
||||
empty_paths_cache,
|
||||
visit,
|
||||
prev_conditions,
|
||||
cur_path,
|
||||
&mut new_forbidden_conditions,
|
||||
)?
|
||||
};
|
||||
cur_path.remove(condition);
|
||||
prev_conditions.pop();
|
||||
next_any_valid
|
||||
}
|
||||
};
|
||||
any_valid |= next_any_valid;
|
||||
cur_path.remove(edge_idx);
|
||||
prev_edges.pop();
|
||||
|
||||
if next_any_valid {
|
||||
if empty_paths_cache.path_is_empty(prev_edges, cur_path) {
|
||||
if empty_paths_cache.path_is_dead_end(prev_conditions, cur_path) {
|
||||
return Ok(any_valid);
|
||||
}
|
||||
forbidden_edges.union(&empty_paths_cache.empty_edges);
|
||||
for edge in prev_edges.iter() {
|
||||
forbidden_edges.union(&empty_paths_cache.empty_couple_edges[*edge as usize]);
|
||||
forbidden_conditions.union(&empty_paths_cache.conditions);
|
||||
for prev_condition in prev_conditions.iter() {
|
||||
forbidden_conditions.union(
|
||||
empty_paths_cache.condition_couples.get(Interned::new(*prev_condition)),
|
||||
);
|
||||
}
|
||||
empty_paths_cache.empty_prefixes.final_edges_after_prefix(prev_edges, &mut |x| {
|
||||
forbidden_edges.insert(x);
|
||||
empty_paths_cache.prefixes.final_edges_after_prefix(prev_conditions, &mut |x| {
|
||||
forbidden_conditions.insert(Interned::new(x));
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -107,36 +141,41 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
Ok(any_valid)
|
||||
}
|
||||
|
||||
pub fn initialize_distances_with_necessary_edges(&self) -> Vec<Vec<(u16, SmallBitmap)>> {
|
||||
let mut distances_to_end: Vec<Vec<(u16, SmallBitmap)>> =
|
||||
vec![vec![]; self.query_graph.nodes.len()];
|
||||
let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len() as u16);
|
||||
pub fn initialize_distances_with_necessary_edges(
|
||||
&self,
|
||||
) -> MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode> {
|
||||
let mut distances_to_end = self.query_graph.nodes.map(|_| vec![]);
|
||||
let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len());
|
||||
|
||||
let mut node_stack = VecDeque::new();
|
||||
|
||||
distances_to_end[self.query_graph.end_node as usize] =
|
||||
vec![(0, SmallBitmap::new(self.edges_store.len() as u16))];
|
||||
*distances_to_end.get_mut(self.query_graph.end_node) =
|
||||
vec![(0, SmallBitmap::for_interned_values_in(&self.conditions_interner))];
|
||||
|
||||
for prev_node in
|
||||
self.query_graph.edges[self.query_graph.end_node as usize].predecessors.iter()
|
||||
{
|
||||
node_stack.push_back(prev_node as usize);
|
||||
for prev_node in self.query_graph.nodes.get(self.query_graph.end_node).predecessors.iter() {
|
||||
node_stack.push_back(prev_node);
|
||||
enqueued.insert(prev_node);
|
||||
}
|
||||
|
||||
while let Some(cur_node) = node_stack.pop_front() {
|
||||
let mut self_distances = BTreeMap::<u16, SmallBitmap>::new();
|
||||
let mut self_distances = BTreeMap::<u16, SmallBitmap<G::EdgeCondition>>::new();
|
||||
|
||||
let cur_node_edges = &self.edges_of_node[cur_node];
|
||||
let cur_node_edges = &self.edges_of_node.get(cur_node);
|
||||
for edge_idx in cur_node_edges.iter() {
|
||||
let edge = self.edges_store[edge_idx as usize].as_ref().unwrap();
|
||||
let edge = self.edges_store.get(edge_idx).as_ref().unwrap();
|
||||
let condition = match edge.condition {
|
||||
EdgeCondition::Unconditional => None,
|
||||
EdgeCondition::Conditional(condition) => Some(condition),
|
||||
};
|
||||
let succ_node = edge.dest_node;
|
||||
let succ_distances = &distances_to_end[succ_node as usize];
|
||||
for (succ_distance, succ_necessary_edges) in succ_distances {
|
||||
let potential_necessary_edges = SmallBitmap::from_iter(
|
||||
std::iter::once(edge_idx).chain(succ_necessary_edges.iter()),
|
||||
self.edges_store.len() as u16,
|
||||
);
|
||||
let succ_distances = distances_to_end.get(succ_node);
|
||||
for (succ_distance, succ_necessary_conditions) in succ_distances {
|
||||
let mut potential_necessary_edges =
|
||||
SmallBitmap::for_interned_values_in(&self.conditions_interner);
|
||||
for condition in condition.into_iter().chain(succ_necessary_conditions.iter()) {
|
||||
potential_necessary_edges.insert(condition);
|
||||
}
|
||||
|
||||
match self_distances.entry(edge.cost as u16 + succ_distance) {
|
||||
Entry::Occupied(mut prev_necessary_edges) => {
|
||||
prev_necessary_edges.get_mut().intersection(&potential_necessary_edges);
|
||||
@ -147,10 +186,14 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
}
|
||||
}
|
||||
}
|
||||
distances_to_end[cur_node] = self_distances.into_iter().collect();
|
||||
for prev_node in self.query_graph.edges[cur_node].predecessors.iter() {
|
||||
let distances_to_end_cur_node = distances_to_end.get_mut(cur_node);
|
||||
for (cost, necessary_edges) in self_distances.iter() {
|
||||
distances_to_end_cur_node.push((*cost, necessary_edges.clone()));
|
||||
}
|
||||
*distances_to_end.get_mut(cur_node) = self_distances.into_iter().collect();
|
||||
for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() {
|
||||
if !enqueued.contains(prev_node) {
|
||||
node_stack.push_back(prev_node as usize);
|
||||
node_stack.push_back(prev_node);
|
||||
enqueued.insert(prev_node);
|
||||
}
|
||||
}
|
||||
|
@ -9,17 +9,17 @@ use crate::search::new::SearchContext;
|
||||
use crate::Result;
|
||||
|
||||
/// A cache storing the document ids associated with each ranking rule edge
|
||||
pub struct EdgeConditionsCache<G: RankingRuleGraphTrait> {
|
||||
pub struct EdgeConditionDocIdsCache<G: RankingRuleGraphTrait> {
|
||||
// TODO: should be FxHashMap<Interned<EdgeCondition>, RoaringBitmap>
|
||||
pub cache: FxHashMap<Interned<G::EdgeCondition>, RoaringBitmap>,
|
||||
_phantom: PhantomData<G>,
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> Default for EdgeConditionsCache<G> {
|
||||
impl<G: RankingRuleGraphTrait> Default for EdgeConditionDocIdsCache<G> {
|
||||
fn default() -> Self {
|
||||
Self { cache: Default::default(), _phantom: Default::default() }
|
||||
}
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> EdgeConditionsCache<G> {
|
||||
impl<G: RankingRuleGraphTrait> EdgeConditionDocIdsCache<G> {
|
||||
/// Retrieve the document ids for the given edge condition.
|
||||
///
|
||||
/// If the cache does not yet contain these docids, they are computed
|
||||
|
@ -1,59 +1,82 @@
|
||||
use super::path_set::PathSet;
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
use super::{path_set::PathSet, RankingRuleGraphTrait};
|
||||
use crate::search::new::{
|
||||
interner::{FixedSizeInterner, Interned, MappedInterner},
|
||||
small_bitmap::SmallBitmap,
|
||||
};
|
||||
|
||||
/// A cache which stores sufficient conditions for a path
|
||||
/// to resolve to an empty set of candidates within the current
|
||||
/// universe.
|
||||
#[derive(Clone)]
|
||||
pub struct EmptyPathsCache {
|
||||
/// The set of edge indexes that resolve to no documents.
|
||||
pub empty_edges: SmallBitmap,
|
||||
pub struct DeadEndPathCache<G: RankingRuleGraphTrait> {
|
||||
/// The set of edge conditions that resolve to no documents.
|
||||
pub conditions: SmallBitmap<G::EdgeCondition>,
|
||||
/// A set of path prefixes that resolve to no documents.
|
||||
pub empty_prefixes: PathSet,
|
||||
/// A set of empty couples of edge indexes that resolve to no documents.
|
||||
pub empty_couple_edges: Vec<SmallBitmap>,
|
||||
pub prefixes: PathSet,
|
||||
/// A set of empty couples of edge conditions that resolve to no documents.
|
||||
pub condition_couples: MappedInterner<SmallBitmap<G::EdgeCondition>, G::EdgeCondition>,
|
||||
}
|
||||
impl EmptyPathsCache {
|
||||
/// Create a new cache for a ranking rule graph containing at most `all_edges_len` edges.
|
||||
pub fn new(all_edges_len: u16) -> Self {
|
||||
impl<G: RankingRuleGraphTrait> Clone for DeadEndPathCache<G> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
empty_edges: SmallBitmap::new(all_edges_len),
|
||||
empty_prefixes: PathSet::default(),
|
||||
empty_couple_edges: vec![SmallBitmap::new(all_edges_len); all_edges_len as usize],
|
||||
conditions: self.conditions.clone(),
|
||||
prefixes: self.prefixes.clone(),
|
||||
condition_couples: self.condition_couples.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> {
|
||||
/// Create a new cache for a ranking rule graph containing at most `all_edges_len` edges.
|
||||
pub fn new(all_edge_conditions: &FixedSizeInterner<G::EdgeCondition>) -> Self {
|
||||
Self {
|
||||
conditions: SmallBitmap::for_interned_values_in(all_edge_conditions),
|
||||
prefixes: PathSet::default(),
|
||||
condition_couples: all_edge_conditions
|
||||
.map(|_| SmallBitmap::for_interned_values_in(all_edge_conditions)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Store in the cache that every path containing the given edge resolves to no documents.
|
||||
pub fn forbid_edge(&mut self, edge_idx: u16) {
|
||||
self.empty_edges.insert(edge_idx);
|
||||
self.empty_couple_edges[edge_idx as usize].clear();
|
||||
self.empty_prefixes.remove_edge(&edge_idx);
|
||||
for edges2 in self.empty_couple_edges.iter_mut() {
|
||||
edges2.remove(edge_idx);
|
||||
pub fn add_condition(&mut self, condition: Interned<G::EdgeCondition>) {
|
||||
self.conditions.insert(condition);
|
||||
self.condition_couples.get_mut(condition).clear();
|
||||
self.prefixes.remove_edge(condition.into_inner()); // TODO: typed PathSet
|
||||
for (_, edges2) in self.condition_couples.iter_mut() {
|
||||
edges2.remove(condition);
|
||||
}
|
||||
}
|
||||
/// Store in the cache that every path containing the given prefix resolves to no documents.
|
||||
pub fn forbid_prefix(&mut self, prefix: &[u16]) {
|
||||
self.empty_prefixes.insert(prefix.iter().copied());
|
||||
pub fn add_prefix(&mut self, prefix: &[u16]) {
|
||||
// TODO: typed PathSet
|
||||
self.prefixes.insert(prefix.iter().copied());
|
||||
}
|
||||
|
||||
/// Store in the cache that every path containing the two given edges resolves to no documents.
|
||||
pub fn forbid_couple_edges(&mut self, edge1: u16, edge2: u16) {
|
||||
self.empty_couple_edges[edge1 as usize].insert(edge2);
|
||||
pub fn add_condition_couple(
|
||||
&mut self,
|
||||
edge1: Interned<G::EdgeCondition>,
|
||||
edge2: Interned<G::EdgeCondition>,
|
||||
) {
|
||||
self.condition_couples.get_mut(edge1).insert(edge2);
|
||||
}
|
||||
|
||||
/// Returns true if the cache can determine that the given path resolves to no documents.
|
||||
pub fn path_is_empty(&self, path: &[u16], path_bitmap: &SmallBitmap) -> bool {
|
||||
if path_bitmap.intersects(&self.empty_edges) {
|
||||
pub fn path_is_dead_end(
|
||||
&self,
|
||||
path: &[u16],
|
||||
path_bitmap: &SmallBitmap<G::EdgeCondition>,
|
||||
) -> bool {
|
||||
if path_bitmap.intersects(&self.conditions) {
|
||||
return true;
|
||||
}
|
||||
for edge in path.iter() {
|
||||
let forbidden_other_edges = &self.empty_couple_edges[*edge as usize];
|
||||
// TODO: typed path
|
||||
let forbidden_other_edges = self.condition_couples.get(Interned::new(*edge));
|
||||
if path_bitmap.intersects(forbidden_other_edges) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if self.empty_prefixes.contains_prefix_of_path(path) {
|
||||
if self.prefixes.contains_prefix_of_path(path) {
|
||||
return true;
|
||||
}
|
||||
false
|
||||
|
@ -18,13 +18,13 @@ mod typo;
|
||||
|
||||
use std::hash::Hash;
|
||||
|
||||
pub use edge_docids_cache::EdgeConditionsCache;
|
||||
pub use empty_paths_cache::EmptyPathsCache;
|
||||
pub use proximity::ProximityGraph;
|
||||
pub use edge_docids_cache::EdgeConditionDocIdsCache;
|
||||
pub use empty_paths_cache::DeadEndPathCache;
|
||||
pub use proximity::{ProximityEdge, ProximityGraph};
|
||||
use roaring::RoaringBitmap;
|
||||
pub use typo::TypoGraph;
|
||||
pub use typo::{TypoEdge, TypoGraph};
|
||||
|
||||
use super::interner::{Interned, Interner};
|
||||
use super::interner::{DedupInterner, FixedSizeInterner, Interned, MappedInterner};
|
||||
use super::logger::SearchLogger;
|
||||
use super::small_bitmap::SmallBitmap;
|
||||
use super::{QueryGraph, QueryNode, SearchContext};
|
||||
@ -63,8 +63,8 @@ impl<E> Clone for EdgeCondition<E> {
|
||||
/// 3. The condition associated with it
|
||||
#[derive(Clone)]
|
||||
pub struct Edge<E> {
|
||||
pub source_node: u16,
|
||||
pub dest_node: u16,
|
||||
pub source_node: Interned<QueryNode>,
|
||||
pub dest_node: Interned<QueryNode>,
|
||||
pub cost: u8,
|
||||
pub condition: EdgeCondition<E>,
|
||||
}
|
||||
@ -96,7 +96,7 @@ pub trait RankingRuleGraphTrait: Sized {
|
||||
/// (with [`build_step_visit_source_node`](RankingRuleGraphTrait::build_step_visit_source_node)) to `dest_node`.
|
||||
fn build_edges<'ctx>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
conditions_interner: &mut Interner<Self::EdgeCondition>,
|
||||
conditions_interner: &mut DedupInterner<Self::EdgeCondition>,
|
||||
source_node: &QueryNode,
|
||||
dest_node: &QueryNode,
|
||||
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>>;
|
||||
@ -104,9 +104,9 @@ pub trait RankingRuleGraphTrait: Sized {
|
||||
fn log_state(
|
||||
graph: &RankingRuleGraph<Self>,
|
||||
paths: &[Vec<u16>],
|
||||
empty_paths_cache: &EmptyPathsCache,
|
||||
empty_paths_cache: &DeadEndPathCache<Self>,
|
||||
universe: &RoaringBitmap,
|
||||
distances: &[Vec<(u16, SmallBitmap)>],
|
||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<Self::EdgeCondition>)>, QueryNode>,
|
||||
cost: u16,
|
||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
);
|
||||
@ -118,9 +118,9 @@ pub trait RankingRuleGraphTrait: Sized {
|
||||
/// but replacing the edges.
|
||||
pub struct RankingRuleGraph<G: RankingRuleGraphTrait> {
|
||||
pub query_graph: QueryGraph,
|
||||
pub edges_store: Vec<Option<Edge<G::EdgeCondition>>>,
|
||||
pub edges_of_node: Vec<SmallBitmap>,
|
||||
pub conditions_interner: Interner<G::EdgeCondition>,
|
||||
pub edges_store: FixedSizeInterner<Option<Edge<G::EdgeCondition>>>,
|
||||
pub edges_of_node: MappedInterner<SmallBitmap<Option<Edge<G::EdgeCondition>>>, QueryNode>,
|
||||
pub conditions_interner: FixedSizeInterner<G::EdgeCondition>,
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> {
|
||||
fn clone(&self) -> Self {
|
||||
@ -133,13 +133,20 @@ impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> {
|
||||
}
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
/// Remove the given edge from the ranking rule graph
|
||||
pub fn remove_ranking_rule_edge(&mut self, edge_index: u16) {
|
||||
let edge_opt = &mut self.edges_store[edge_index as usize];
|
||||
let Some(edge) = &edge_opt else { return };
|
||||
let (source_node, _dest_node) = (edge.source_node, edge.dest_node);
|
||||
*edge_opt = None;
|
||||
|
||||
self.edges_of_node[source_node as usize].remove(edge_index);
|
||||
/// Remove all edges with the given condition
|
||||
pub fn remove_edges_with_condition(&mut self, condition_to_remove: Interned<G::EdgeCondition>) {
|
||||
for (edge_id, edge_opt) in self.edges_store.iter_mut() {
|
||||
let Some(edge) = edge_opt.as_mut() else { continue };
|
||||
match edge.condition {
|
||||
EdgeCondition::Unconditional => continue,
|
||||
EdgeCondition::Conditional(condition) => {
|
||||
if condition == condition_to_remove {
|
||||
let (source_node, _dest_node) = (edge.source_node, edge.dest_node);
|
||||
*edge_opt = None;
|
||||
self.edges_of_node.get_mut(source_node).remove(edge_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -27,10 +27,10 @@ impl PathSet {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn remove_edge(&mut self, forbidden_edge: &u16) {
|
||||
pub fn remove_edge(&mut self, forbidden_edge: u16) {
|
||||
let mut i = 0;
|
||||
while i < self.nodes.len() {
|
||||
let should_remove = if &self.nodes[i].0 == forbidden_edge {
|
||||
let should_remove = if self.nodes[i].0 == forbidden_edge {
|
||||
true
|
||||
} else if !self.nodes[i].1.nodes.is_empty() {
|
||||
self.nodes[i].1.remove_edge(forbidden_edge);
|
||||
|
@ -3,7 +3,8 @@ use std::collections::BTreeMap;
|
||||
|
||||
use super::ProximityEdge;
|
||||
use crate::search::new::db_cache::DatabaseCache;
|
||||
use crate::search::new::interner::{Interned, Interner};
|
||||
use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_graph::QueryNodeData;
|
||||
use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm};
|
||||
use crate::search::new::ranking_rule_graph::proximity::WordPair;
|
||||
use crate::search::new::ranking_rule_graph::EdgeCondition;
|
||||
@ -13,7 +14,7 @@ use heed::RoTxn;
|
||||
|
||||
fn last_word_of_term_iter<'t>(
|
||||
t: &'t QueryTerm,
|
||||
phrase_interner: &'t Interner<Phrase>,
|
||||
phrase_interner: &'t DedupInterner<Phrase>,
|
||||
) -> impl Iterator<Item = (Option<Interned<Phrase>>, Interned<String>)> + 't {
|
||||
t.all_single_words_except_prefix_db().map(|w| (None, w)).chain(t.all_phrases().flat_map(
|
||||
move |p| {
|
||||
@ -24,7 +25,7 @@ fn last_word_of_term_iter<'t>(
|
||||
}
|
||||
fn first_word_of_term_iter<'t>(
|
||||
t: &'t QueryTerm,
|
||||
phrase_interner: &'t Interner<Phrase>,
|
||||
phrase_interner: &'t DedupInterner<Phrase>,
|
||||
) -> impl Iterator<Item = (Interned<String>, Option<Interned<Phrase>>)> + 't {
|
||||
t.all_single_words_except_prefix_db().map(|w| (w, None)).chain(t.all_phrases().flat_map(
|
||||
move |p| {
|
||||
@ -36,7 +37,7 @@ fn first_word_of_term_iter<'t>(
|
||||
|
||||
pub fn build_edges<'ctx>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
conditions_interner: &mut Interner<ProximityEdge>,
|
||||
conditions_interner: &mut DedupInterner<ProximityEdge>,
|
||||
from_node: &QueryNode,
|
||||
to_node: &QueryNode,
|
||||
) -> Result<Vec<(u8, EdgeCondition<ProximityEdge>)>> {
|
||||
@ -50,19 +51,19 @@ pub fn build_edges<'ctx>(
|
||||
term_docids: _,
|
||||
} = ctx;
|
||||
|
||||
let (left_term, left_end_position) = match from_node {
|
||||
QueryNode::Term(LocatedQueryTerm { value, positions }) => {
|
||||
let (left_term, left_end_position) = match &from_node.data {
|
||||
QueryNodeData::Term(LocatedQueryTerm { value, positions }) => {
|
||||
(term_interner.get(*value), *positions.end())
|
||||
}
|
||||
QueryNode::Deleted => return Ok(vec![]),
|
||||
QueryNode::Start => return Ok(vec![(0, EdgeCondition::Unconditional)]),
|
||||
QueryNode::End => return Ok(vec![]),
|
||||
QueryNodeData::Deleted => return Ok(vec![]),
|
||||
QueryNodeData::Start => return Ok(vec![(0, EdgeCondition::Unconditional)]),
|
||||
QueryNodeData::End => return Ok(vec![]),
|
||||
};
|
||||
|
||||
let right_term = match &to_node {
|
||||
QueryNode::End => return Ok(vec![(0, EdgeCondition::Unconditional)]),
|
||||
QueryNode::Deleted | QueryNode::Start => return Ok(vec![]),
|
||||
QueryNode::Term(term) => term,
|
||||
let right_term = match &to_node.data {
|
||||
QueryNodeData::End => return Ok(vec![(0, EdgeCondition::Unconditional)]),
|
||||
QueryNodeData::Deleted | QueryNodeData::Start => return Ok(vec![]),
|
||||
QueryNodeData::Term(term) => term,
|
||||
};
|
||||
let LocatedQueryTerm { value: right_value, positions: right_positions } = right_term;
|
||||
|
||||
@ -145,7 +146,7 @@ fn add_prefix_edges<'ctx>(
|
||||
index: &mut &crate::Index,
|
||||
txn: &'ctx RoTxn,
|
||||
db_cache: &mut DatabaseCache<'ctx>,
|
||||
word_interner: &mut Interner<String>,
|
||||
word_interner: &mut DedupInterner<String>,
|
||||
right_ngram_length: usize,
|
||||
left_word: Interned<String>,
|
||||
right_prefix: Interned<String>,
|
||||
@ -207,7 +208,7 @@ fn add_non_prefix_edges<'ctx>(
|
||||
index: &mut &crate::Index,
|
||||
txn: &'ctx RoTxn,
|
||||
db_cache: &mut DatabaseCache<'ctx>,
|
||||
word_interner: &mut Interner<String>,
|
||||
word_interner: &mut DedupInterner<String>,
|
||||
right_ngram_length: usize,
|
||||
word1: Interned<String>,
|
||||
word2: Interned<String>,
|
||||
|
@ -3,9 +3,9 @@ pub mod compute_docids;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::empty_paths_cache::EmptyPathsCache;
|
||||
use super::empty_paths_cache::DeadEndPathCache;
|
||||
use super::{EdgeCondition, RankingRuleGraphTrait};
|
||||
use crate::search::new::interner::{Interned, Interner};
|
||||
use crate::search::new::interner::{DedupInterner, Interned, MappedInterner};
|
||||
use crate::search::new::logger::SearchLogger;
|
||||
use crate::search::new::query_term::Phrase;
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
@ -56,7 +56,7 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
||||
|
||||
fn build_edges<'ctx>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
conditions_interner: &mut Interner<Self::EdgeCondition>,
|
||||
conditions_interner: &mut DedupInterner<Self::EdgeCondition>,
|
||||
source_node: &QueryNode,
|
||||
dest_node: &QueryNode,
|
||||
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> {
|
||||
@ -66,19 +66,12 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
||||
fn log_state(
|
||||
graph: &super::RankingRuleGraph<Self>,
|
||||
paths: &[Vec<u16>],
|
||||
empty_paths_cache: &EmptyPathsCache,
|
||||
empty_paths_cache: &DeadEndPathCache<Self>,
|
||||
universe: &RoaringBitmap,
|
||||
distances: &[Vec<(u16, SmallBitmap)>],
|
||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityEdge>)>, QueryNode>,
|
||||
cost: u16,
|
||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
) {
|
||||
logger.log_proximity_state(
|
||||
graph,
|
||||
paths,
|
||||
empty_paths_cache,
|
||||
universe,
|
||||
distances.to_vec(),
|
||||
cost,
|
||||
);
|
||||
logger.log_proximity_state(graph, paths, empty_paths_cache, universe, distances, cost);
|
||||
}
|
||||
}
|
||||
|
@ -1,9 +1,10 @@
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::empty_paths_cache::EmptyPathsCache;
|
||||
use super::empty_paths_cache::DeadEndPathCache;
|
||||
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::search::new::interner::{Interned, Interner};
|
||||
use crate::search::new::interner::{DedupInterner, Interned, MappedInterner};
|
||||
use crate::search::new::logger::SearchLogger;
|
||||
use crate::search::new::query_graph::QueryNodeData;
|
||||
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm};
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
|
||||
@ -55,13 +56,13 @@ impl RankingRuleGraphTrait for TypoGraph {
|
||||
|
||||
fn build_edges<'ctx>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
conditions_interner: &mut Interner<Self::EdgeCondition>,
|
||||
conditions_interner: &mut DedupInterner<Self::EdgeCondition>,
|
||||
_from_node: &QueryNode,
|
||||
to_node: &QueryNode,
|
||||
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> {
|
||||
let SearchContext { term_interner, .. } = ctx;
|
||||
match to_node {
|
||||
QueryNode::Term(LocatedQueryTerm { value, positions }) => {
|
||||
match &to_node.data {
|
||||
QueryNodeData::Term(LocatedQueryTerm { value, positions }) => {
|
||||
let mut edges = vec![];
|
||||
// Ngrams have a base typo cost
|
||||
// 2-gram -> equivalent to 1 typo
|
||||
@ -130,20 +131,20 @@ impl RankingRuleGraphTrait for TypoGraph {
|
||||
}
|
||||
Ok(edges)
|
||||
}
|
||||
QueryNode::End => Ok(vec![(0, EdgeCondition::Unconditional)]),
|
||||
QueryNode::Deleted | QueryNode::Start => panic!(),
|
||||
QueryNodeData::End => Ok(vec![(0, EdgeCondition::Unconditional)]),
|
||||
QueryNodeData::Deleted | QueryNodeData::Start => panic!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn log_state(
|
||||
graph: &RankingRuleGraph<Self>,
|
||||
paths: &[Vec<u16>],
|
||||
empty_paths_cache: &EmptyPathsCache,
|
||||
empty_paths_cache: &DeadEndPathCache<Self>,
|
||||
universe: &RoaringBitmap,
|
||||
distances: &[Vec<(u16, SmallBitmap)>],
|
||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
|
||||
cost: u16,
|
||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
) {
|
||||
logger.log_typo_state(graph, paths, empty_paths_cache, universe, distances.to_vec(), cost);
|
||||
logger.log_typo_state(graph, paths, empty_paths_cache, universe, distances, cost);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user