Add some documentation and use bitmaps instead of hashmaps when possible

This commit is contained in:
Loïc Lecrenier
2023-02-21 12:33:32 +01:00
parent 132191360b
commit 66d0c63694
10 changed files with 298 additions and 232 deletions

View File

@ -1,6 +1,9 @@
use std::collections::{BTreeMap, HashSet};
use itertools::Itertools;
use roaring::RoaringBitmap;
use crate::new::NodeIndex;
use super::{
empty_paths_cache::EmptyPathsCache, paths_map::PathsMap, Edge, EdgeIndex, RankingRuleGraph,
@ -14,18 +17,11 @@ pub struct Path {
}
struct DijkstraState {
unvisited: HashSet<usize>, // should be a small bitset
distances: Vec<u64>, // or binary heap (f64, usize)
unvisited: RoaringBitmap, // should be a small bitset?
distances: Vec<u64>, // or binary heap, or btreemap? (f64, usize)
edges: Vec<EdgeIndex>,
edge_costs: Vec<u8>,
paths: Vec<Option<usize>>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct PathEdgeId<Id> {
pub from: usize,
pub to: usize,
pub id: Id,
paths: Vec<Option<NodeIndex>>,
}
pub struct KCheapestPathsState {
@ -127,9 +123,10 @@ impl KCheapestPathsState {
// for all the paths already found that share a common prefix with the root path
// we delete the edge from the spur node to the next one
for edge_index_to_remove in self.cheapest_paths.edge_indices_after_prefix(root_path) {
let was_removed = graph.node_edges[*spur_node].remove(&edge_index_to_remove.0);
let was_removed =
graph.node_edges[spur_node.0 as usize].remove(edge_index_to_remove.0 as u32);
if was_removed {
tmp_removed_edges.push(edge_index_to_remove.0);
tmp_removed_edges.push(edge_index_to_remove.0 as u32);
}
}
@ -137,7 +134,7 @@ impl KCheapestPathsState {
// we will combine it with the root path to get a potential kth cheapest path
let spur_path = graph.cheapest_path_to_end(*spur_node);
// restore the temporarily removed edges
graph.node_edges[*spur_node].extend(tmp_removed_edges);
graph.node_edges[spur_node.0 as usize].extend(tmp_removed_edges);
let Some(spur_path) = spur_path else { continue; };
let total_cost = root_cost + spur_path.cost;
@ -182,68 +179,73 @@ impl KCheapestPathsState {
}
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
fn cheapest_path_to_end(&self, from: usize) -> Option<Path> {
fn cheapest_path_to_end(&self, from: NodeIndex) -> Option<Path> {
let mut dijkstra = DijkstraState {
unvisited: (0..self.query_graph.nodes.len()).collect(),
unvisited: (0..self.query_graph.nodes.len() as u32).collect(),
distances: vec![u64::MAX; self.query_graph.nodes.len()],
edges: vec![EdgeIndex(usize::MAX); self.query_graph.nodes.len()],
edge_costs: vec![u8::MAX; self.query_graph.nodes.len()],
paths: vec![None; self.query_graph.nodes.len()],
};
dijkstra.distances[from] = 0;
dijkstra.distances[from.0 as usize] = 0;
// TODO: could use a binary heap here to store the distances
while let Some(&cur_node) =
dijkstra.unvisited.iter().min_by_key(|&&n| dijkstra.distances[n])
// TODO: could use a binary heap here to store the distances, or a btreemap
while let Some(cur_node) =
dijkstra.unvisited.iter().min_by_key(|&n| dijkstra.distances[n as usize])
{
let cur_node_dist = dijkstra.distances[cur_node];
let cur_node_dist = dijkstra.distances[cur_node as usize];
if cur_node_dist == u64::MAX {
return None;
}
if cur_node == self.query_graph.end_node {
if cur_node == self.query_graph.end_node.0 {
break;
}
let succ_cur_node: HashSet<_> = self.node_edges[cur_node]
.iter()
.map(|e| self.all_edges[*e].as_ref().unwrap().to_node)
.collect();
// this is expensive, but shouldn't
// ideally I could quickly get a bitmap of all a node's successors
// then take the intersection with unvisited
let succ_cur_node: &RoaringBitmap = &self.successors[cur_node as usize];
// .iter()
// .map(|e| self.all_edges[e as usize].as_ref().unwrap().to_node.0)
// .collect();
// TODO: this intersection may be slow but shouldn't be,
// can use a bitmap intersection instead
let unvisited_succ_cur_node = succ_cur_node.intersection(&dijkstra.unvisited);
for &succ in unvisited_succ_cur_node {
let Some((cheapest_edge, cheapest_edge_cost)) = self.cheapest_edge(cur_node, succ) else {
let unvisited_succ_cur_node = succ_cur_node & &dijkstra.unvisited;
for succ in unvisited_succ_cur_node {
// cheapest_edge() is also potentially too expensive
let Some((cheapest_edge, cheapest_edge_cost)) = self.cheapest_edge(NodeIndex(cur_node), NodeIndex(succ)) else {
continue
};
// println!("cur node dist {cur_node_dist}");
let old_dist_succ = &mut dijkstra.distances[succ];
let old_dist_succ = &mut dijkstra.distances[succ as usize];
let new_potential_distance = cur_node_dist + cheapest_edge_cost as u64;
if new_potential_distance < *old_dist_succ {
*old_dist_succ = new_potential_distance;
dijkstra.edges[succ] = cheapest_edge;
dijkstra.edge_costs[succ] = cheapest_edge_cost;
dijkstra.paths[succ] = Some(cur_node);
dijkstra.edges[succ as usize] = cheapest_edge;
dijkstra.edge_costs[succ as usize] = cheapest_edge_cost;
dijkstra.paths[succ as usize] = Some(NodeIndex(cur_node));
}
}
dijkstra.unvisited.remove(&cur_node);
dijkstra.unvisited.remove(cur_node);
}
let mut cur = self.query_graph.end_node;
// let mut edge_costs = vec![];
// let mut distances = vec![];
let mut path_edges = vec![];
while let Some(n) = dijkstra.paths[cur] {
path_edges.push(dijkstra.edges[cur]);
while let Some(n) = dijkstra.paths[cur.0 as usize] {
path_edges.push(dijkstra.edges[cur.0 as usize]);
cur = n;
}
path_edges.reverse();
Some(Path { edges: path_edges, cost: dijkstra.distances[self.query_graph.end_node] })
Some(Path {
edges: path_edges,
cost: dijkstra.distances[self.query_graph.end_node.0 as usize],
})
}
// TODO: this implementation is VERY fragile, as we assume that the edges are ordered by cost
// already. Change it.
pub fn cheapest_edge(&self, cur_node: usize, succ: usize) -> Option<(EdgeIndex, u8)> {
pub fn cheapest_edge(&self, cur_node: NodeIndex, succ: NodeIndex) -> Option<(EdgeIndex, u8)> {
self.visit_edges(cur_node, succ, |edge_idx, edge| {
std::ops::ControlFlow::Break((edge_idx, edge.cost))
})