mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-10-23 20:16:29 +00:00
Intern ranking rule graph edge conditions as well
This commit is contained in:
@@ -40,10 +40,11 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::logger::SearchLogger;
|
use super::logger::SearchLogger;
|
||||||
use super::ranking_rule_graph::{
|
use super::ranking_rule_graph::{
|
||||||
EdgeDocidsCache, EmptyPathsCache, RankingRuleGraph, RankingRuleGraphTrait, TypoGraph, ProximityGraph,
|
EdgeCondition, EdgeConditionsCache, EmptyPathsCache, ProximityGraph, RankingRuleGraph,
|
||||||
|
RankingRuleGraphTrait, TypoGraph,
|
||||||
};
|
};
|
||||||
use super::small_bitmap::SmallBitmap;
|
use super::small_bitmap::SmallBitmap;
|
||||||
use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
|
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
pub type Proximity = GraphBasedRankingRule<ProximityGraph>;
|
pub type Proximity = GraphBasedRankingRule<ProximityGraph>;
|
||||||
@@ -78,7 +79,7 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
|
|||||||
/// The current graph
|
/// The current graph
|
||||||
graph: RankingRuleGraph<G>,
|
graph: RankingRuleGraph<G>,
|
||||||
/// Cache to retrieve the docids associated with each edge
|
/// Cache to retrieve the docids associated with each edge
|
||||||
edge_docids_cache: EdgeDocidsCache<G>,
|
edge_conditions_cache: EdgeConditionsCache<G>,
|
||||||
/// Cache used to optimistically discard paths that resolve to no documents.
|
/// Cache used to optimistically discard paths that resolve to no documents.
|
||||||
empty_paths_cache: EmptyPathsCache,
|
empty_paths_cache: EmptyPathsCache,
|
||||||
/// A structure giving the list of possible costs from each node to the end node,
|
/// A structure giving the list of possible costs from each node to the end node,
|
||||||
@@ -94,25 +95,27 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
|
|||||||
fn remove_empty_edges<'search, G: RankingRuleGraphTrait>(
|
fn remove_empty_edges<'search, G: RankingRuleGraphTrait>(
|
||||||
ctx: &mut SearchContext<'search>,
|
ctx: &mut SearchContext<'search>,
|
||||||
graph: &mut RankingRuleGraph<G>,
|
graph: &mut RankingRuleGraph<G>,
|
||||||
edge_docids_cache: &mut EdgeDocidsCache<G>,
|
edge_docids_cache: &mut EdgeConditionsCache<G>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
empty_paths_cache: &mut EmptyPathsCache,
|
empty_paths_cache: &mut EmptyPathsCache,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
for edge_index in 0..graph.edges_store.len() as u16 {
|
for edge_index in 0..graph.edges_store.len() as u16 {
|
||||||
if graph.edges_store[edge_index as usize].is_none() {
|
let Some(edge) = graph.edges_store[edge_index as usize].as_ref() else {
|
||||||
continue;
|
continue;
|
||||||
}
|
};
|
||||||
let docids = edge_docids_cache.get_edge_docids(ctx, edge_index, &*graph, universe)?;
|
let condition = edge.condition;
|
||||||
match docids {
|
|
||||||
BitmapOrAllRef::Bitmap(docids) => {
|
match condition {
|
||||||
|
EdgeCondition::Unconditional => continue,
|
||||||
|
EdgeCondition::Conditional(condition) => {
|
||||||
|
let docids = edge_docids_cache.get_edge_docids(ctx, condition, graph, universe)?;
|
||||||
if docids.is_disjoint(universe) {
|
if docids.is_disjoint(universe) {
|
||||||
graph.remove_ranking_rule_edge(edge_index);
|
graph.remove_ranking_rule_edge(edge_index);
|
||||||
empty_paths_cache.forbid_edge(edge_index);
|
empty_paths_cache.forbid_edge(edge_index);
|
||||||
edge_docids_cache.cache.remove(&edge_index);
|
edge_docids_cache.cache.remove(&condition);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
BitmapOrAllRef::All => continue,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -132,7 +135,7 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
|||||||
query_graph: &QueryGraph,
|
query_graph: &QueryGraph,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
|
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
|
||||||
let mut edge_docids_cache = EdgeDocidsCache::default();
|
let mut edge_docids_cache = EdgeConditionsCache::default();
|
||||||
let mut empty_paths_cache = EmptyPathsCache::new(graph.edges_store.len() as u16);
|
let mut empty_paths_cache = EmptyPathsCache::new(graph.edges_store.len() as u16);
|
||||||
|
|
||||||
// First simplify the graph as much as possible, by computing the docids of the edges
|
// First simplify the graph as much as possible, by computing the docids of the edges
|
||||||
@@ -150,7 +153,7 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
|||||||
|
|
||||||
let state = GraphBasedRankingRuleState {
|
let state = GraphBasedRankingRuleState {
|
||||||
graph,
|
graph,
|
||||||
edge_docids_cache,
|
edge_conditions_cache: edge_docids_cache,
|
||||||
empty_paths_cache,
|
empty_paths_cache,
|
||||||
all_distances,
|
all_distances,
|
||||||
cur_distance_idx: 0,
|
cur_distance_idx: 0,
|
||||||
@@ -174,11 +177,11 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
|||||||
// should never happen
|
// should never happen
|
||||||
let mut state = self.state.take().unwrap();
|
let mut state = self.state.take().unwrap();
|
||||||
|
|
||||||
// TODO: does this have a real positive performance cost?
|
// TODO: does this have a real positive performance impact?
|
||||||
remove_empty_edges(
|
remove_empty_edges(
|
||||||
ctx,
|
ctx,
|
||||||
&mut state.graph,
|
&mut state.graph,
|
||||||
&mut state.edge_docids_cache,
|
&mut state.edge_conditions_cache,
|
||||||
universe,
|
universe,
|
||||||
&mut state.empty_paths_cache,
|
&mut state.empty_paths_cache,
|
||||||
)?;
|
)?;
|
||||||
@@ -201,17 +204,17 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
|||||||
|
|
||||||
let GraphBasedRankingRuleState {
|
let GraphBasedRankingRuleState {
|
||||||
graph,
|
graph,
|
||||||
edge_docids_cache,
|
edge_conditions_cache: edge_docids_cache,
|
||||||
empty_paths_cache,
|
empty_paths_cache,
|
||||||
all_distances,
|
all_distances,
|
||||||
cur_distance_idx: _,
|
cur_distance_idx: _,
|
||||||
} = &mut state;
|
} = &mut state;
|
||||||
|
|
||||||
let original_universe = universe;
|
// let original_universe = universe;
|
||||||
let mut universe = universe.clone();
|
let mut universe = universe.clone();
|
||||||
|
|
||||||
// TODO: remove this unnecessary clone
|
// TODO: remove this unnecessary clone
|
||||||
let original_graph = graph.clone();
|
// let original_graph = graph.clone();
|
||||||
// and this vector as well
|
// and this vector as well
|
||||||
let mut paths = vec![];
|
let mut paths = vec![];
|
||||||
|
|
||||||
@@ -241,12 +244,15 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
|||||||
|
|
||||||
for &edge_index in path {
|
for &edge_index in path {
|
||||||
visited_edges.push(edge_index);
|
visited_edges.push(edge_index);
|
||||||
let edge_docids =
|
let edge = graph.edges_store[edge_index as usize].as_ref().unwrap();
|
||||||
edge_docids_cache.get_edge_docids(ctx, edge_index, graph, &universe)?;
|
let condition = match edge.condition {
|
||||||
let edge_docids = match edge_docids {
|
EdgeCondition::Unconditional => continue,
|
||||||
BitmapOrAllRef::Bitmap(b) => b,
|
EdgeCondition::Conditional(condition) => condition,
|
||||||
BitmapOrAllRef::All => continue,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let edge_docids =
|
||||||
|
edge_docids_cache.get_edge_docids(ctx, condition, graph, &universe)?;
|
||||||
|
|
||||||
cached_edge_docids.push((edge_index, edge_docids.clone()));
|
cached_edge_docids.push((edge_index, edge_docids.clone()));
|
||||||
|
|
||||||
// If the edge is empty, then the path will be empty as well, we update the graph
|
// If the edge is empty, then the path will be empty as well, we update the graph
|
||||||
@@ -257,7 +263,7 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
|||||||
// 2. remove this edge from the ranking rule graph
|
// 2. remove this edge from the ranking rule graph
|
||||||
graph.remove_ranking_rule_edge(edge_index);
|
graph.remove_ranking_rule_edge(edge_index);
|
||||||
// 3. Also remove the entry from the edge_docids_cache, since we don't need it anymore
|
// 3. Also remove the entry from the edge_docids_cache, since we don't need it anymore
|
||||||
edge_docids_cache.cache.remove(&edge_index);
|
edge_docids_cache.cache.remove(&condition);
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
path_docids &= edge_docids;
|
path_docids &= edge_docids;
|
||||||
@@ -279,6 +285,8 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
|||||||
empty_paths_cache.forbid_couple_edges(*edge_index2, edge_index);
|
empty_paths_cache.forbid_couple_edges(*edge_index2, edge_index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// We should maybe instead try to compute:
|
||||||
|
// 0th & nth & 1st & n-1th & 2nd & etc...
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -289,15 +297,15 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
|||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
G::log_state(
|
// G::log_state(
|
||||||
&original_graph,
|
// &original_graph,
|
||||||
&paths,
|
// &paths,
|
||||||
&state.empty_paths_cache,
|
// &state.empty_paths_cache,
|
||||||
original_universe,
|
// original_universe,
|
||||||
&state.all_distances,
|
// &state.all_distances,
|
||||||
cost,
|
// cost,
|
||||||
logger,
|
// logger,
|
||||||
);
|
// );
|
||||||
|
|
||||||
// TODO: Graph-based ranking rules do not (yet) modify the query graph. We could, however,
|
// TODO: Graph-based ranking rules do not (yet) modify the query graph. We could, however,
|
||||||
// remove nodes and/or terms within nodes that weren't present in any of the paths.
|
// remove nodes and/or terms within nodes that weren't present in any of the paths.
|
||||||
|
@@ -14,6 +14,21 @@ impl<T> Interned<T> {
|
|||||||
Self { idx, _phantom: PhantomData }
|
Self { idx, _phantom: PhantomData }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: the stable store should be replaced by a bump allocator
|
||||||
|
// and the interned value should be a pointer wrapper
|
||||||
|
// then we can get its value with `interned.get()` instead of `interner.get(interned)`
|
||||||
|
// and as a bonus, its validity is tracked with Rust's lifetime system
|
||||||
|
// one problem is that we need two lifetimes: one for the bump allocator, one for the
|
||||||
|
// hashmap
|
||||||
|
// but that's okay, we can use:
|
||||||
|
// ```
|
||||||
|
// struct Interner<'bump> {
|
||||||
|
// bump: &'bump Bump,
|
||||||
|
// lookup: FxHashMap
|
||||||
|
// }
|
||||||
|
// ```
|
||||||
|
|
||||||
/// An [`Interner`] is used to store a unique copy of a value of type `T`. This value
|
/// An [`Interner`] is used to store a unique copy of a value of type `T`. This value
|
||||||
/// is then identified by a lightweight index of type [`Interned<T>`], which can
|
/// is then identified by a lightweight index of type [`Interned<T>`], which can
|
||||||
/// be copied, compared, and hashed efficiently. An immutable reference to the original value
|
/// be copied, compared, and hashed efficiently. An immutable reference to the original value
|
||||||
|
@@ -35,11 +35,6 @@ use crate::search::new::query_term::located_query_terms_from_string;
|
|||||||
use crate::search::new::words::Words;
|
use crate::search::new::words::Words;
|
||||||
use crate::{Filter, Index, Result, TermsMatchingStrategy};
|
use crate::{Filter, Index, Result, TermsMatchingStrategy};
|
||||||
|
|
||||||
pub enum BitmapOrAllRef<'s> {
|
|
||||||
Bitmap(&'s RoaringBitmap),
|
|
||||||
All,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct SearchContext<'search> {
|
pub struct SearchContext<'search> {
|
||||||
pub index: &'search Index,
|
pub index: &'search Index,
|
||||||
pub txn: &'search RoTxn<'search>,
|
pub txn: &'search RoTxn<'search>,
|
||||||
|
@@ -1,6 +1,7 @@
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
|
||||||
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
|
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
|
||||||
|
use crate::search::new::interner::Interner;
|
||||||
use crate::search::new::small_bitmap::SmallBitmap;
|
use crate::search::new::small_bitmap::SmallBitmap;
|
||||||
use crate::search::new::{QueryGraph, SearchContext};
|
use crate::search::new::{QueryGraph, SearchContext};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
@@ -10,6 +11,8 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
pub fn build(ctx: &mut SearchContext, query_graph: QueryGraph) -> Result<Self> {
|
pub fn build(ctx: &mut SearchContext, query_graph: QueryGraph) -> Result<Self> {
|
||||||
let QueryGraph { nodes: graph_nodes, edges: graph_edges, .. } = &query_graph;
|
let QueryGraph { nodes: graph_nodes, edges: graph_edges, .. } = &query_graph;
|
||||||
|
|
||||||
|
let mut conditions_interner = Interner::default();
|
||||||
|
|
||||||
let mut edges_store = vec![];
|
let mut edges_store = vec![];
|
||||||
let mut edges_of_node = vec![];
|
let mut edges_of_node = vec![];
|
||||||
|
|
||||||
@@ -21,18 +24,22 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
|
|
||||||
for successor_idx in graph_edges[node_idx].successors.iter() {
|
for successor_idx in graph_edges[node_idx].successors.iter() {
|
||||||
let dest_node = &graph_nodes[successor_idx as usize];
|
let dest_node = &graph_nodes[successor_idx as usize];
|
||||||
let edges =
|
let edges = G::build_step_visit_destination_node(
|
||||||
G::build_step_visit_destination_node(ctx, dest_node, &source_node_data)?;
|
ctx,
|
||||||
|
&mut conditions_interner,
|
||||||
|
dest_node,
|
||||||
|
&source_node_data,
|
||||||
|
)?;
|
||||||
if edges.is_empty() {
|
if edges.is_empty() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (cost, details) in edges {
|
for (cost, condition) in edges {
|
||||||
edges_store.push(Some(Edge {
|
edges_store.push(Some(Edge {
|
||||||
source_node: node_idx as u16,
|
source_node: node_idx as u16,
|
||||||
dest_node: successor_idx,
|
dest_node: successor_idx,
|
||||||
cost,
|
cost,
|
||||||
condition: details,
|
condition,
|
||||||
}));
|
}));
|
||||||
new_edges.insert(edges_store.len() as u16 - 1);
|
new_edges.insert(edges_store.len() as u16 - 1);
|
||||||
}
|
}
|
||||||
@@ -43,6 +50,6 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
.map(|edges| SmallBitmap::from_iter(edges.into_iter(), edges_store.len() as u16))
|
.map(|edges| SmallBitmap::from_iter(edges.into_iter(), edges_store.len() as u16))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
Ok(RankingRuleGraph { query_graph, edges_store, edges_of_node })
|
Ok(RankingRuleGraph { query_graph, edges_store, edges_of_node, conditions_interner })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -3,22 +3,23 @@ use std::marker::PhantomData;
|
|||||||
use fxhash::FxHashMap;
|
use fxhash::FxHashMap;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
|
use super::{RankingRuleGraph, RankingRuleGraphTrait};
|
||||||
use crate::search::new::{BitmapOrAllRef, SearchContext};
|
use crate::search::new::interner::Interned;
|
||||||
|
use crate::search::new::SearchContext;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
/// A cache storing the document ids associated with each ranking rule edge
|
/// A cache storing the document ids associated with each ranking rule edge
|
||||||
pub struct EdgeDocidsCache<G: RankingRuleGraphTrait> {
|
pub struct EdgeConditionsCache<G: RankingRuleGraphTrait> {
|
||||||
// TODO: should be FxHashMap<Interned<EdgeCondition>, RoaringBitmap>
|
// TODO: should be FxHashMap<Interned<EdgeCondition>, RoaringBitmap>
|
||||||
pub cache: FxHashMap<u16, RoaringBitmap>,
|
pub cache: FxHashMap<Interned<G::EdgeCondition>, RoaringBitmap>,
|
||||||
_phantom: PhantomData<G>,
|
_phantom: PhantomData<G>,
|
||||||
}
|
}
|
||||||
impl<G: RankingRuleGraphTrait> Default for EdgeDocidsCache<G> {
|
impl<G: RankingRuleGraphTrait> Default for EdgeConditionsCache<G> {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self { cache: Default::default(), _phantom: Default::default() }
|
Self { cache: Default::default(), _phantom: Default::default() }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> {
|
impl<G: RankingRuleGraphTrait> EdgeConditionsCache<G> {
|
||||||
/// Retrieve the document ids for the given edge condition.
|
/// Retrieve the document ids for the given edge condition.
|
||||||
///
|
///
|
||||||
/// If the cache does not yet contain these docids, they are computed
|
/// If the cache does not yet contain these docids, they are computed
|
||||||
@@ -27,30 +28,25 @@ impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> {
|
|||||||
&'s mut self,
|
&'s mut self,
|
||||||
ctx: &mut SearchContext<'search>,
|
ctx: &mut SearchContext<'search>,
|
||||||
// TODO: should be Interned<EdgeCondition>
|
// TODO: should be Interned<EdgeCondition>
|
||||||
edge_index: u16,
|
interned_edge_condition: Interned<G::EdgeCondition>,
|
||||||
graph: &RankingRuleGraph<G>,
|
graph: &RankingRuleGraph<G>,
|
||||||
// TODO: maybe universe doesn't belong here
|
// TODO: maybe universe doesn't belong here
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
) -> Result<BitmapOrAllRef<'s>> {
|
) -> Result<&'s RoaringBitmap> {
|
||||||
let edge = graph.edges_store[edge_index as usize].as_ref().unwrap();
|
if self.cache.contains_key(&interned_edge_condition) {
|
||||||
|
// TODO: should we update the bitmap in the cache if the new universe
|
||||||
match &edge.condition {
|
// reduces it?
|
||||||
EdgeCondition::Unconditional => Ok(BitmapOrAllRef::All),
|
// TODO: maybe have a generation: u32 to track every time the universe was
|
||||||
EdgeCondition::Conditional(details) => {
|
// reduced. Then only attempt to recompute the intersection when there is a chance
|
||||||
if self.cache.contains_key(&edge_index) {
|
// that edge_docids & universe changed
|
||||||
// TODO: should we update the bitmap in the cache if the new universe
|
return Ok(&self.cache[&interned_edge_condition]);
|
||||||
// reduces it?
|
|
||||||
// TODO: maybe have a generation: u32 to track every time the universe was
|
|
||||||
// reduced. Then only attempt to recompute the intersection when there is a chance
|
|
||||||
// that edge_docids & universe changed
|
|
||||||
return Ok(BitmapOrAllRef::Bitmap(&self.cache[&edge_index]));
|
|
||||||
}
|
|
||||||
// TODO: maybe universe doesn't belong here
|
|
||||||
let docids = universe & G::resolve_edge_condition(ctx, details, universe)?;
|
|
||||||
let _ = self.cache.insert(edge_index, docids);
|
|
||||||
let docids = &self.cache[&edge_index];
|
|
||||||
Ok(BitmapOrAllRef::Bitmap(docids))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
// TODO: maybe universe doesn't belong here
|
||||||
|
let edge_condition = graph.conditions_interner.get(interned_edge_condition);
|
||||||
|
// TODO: faster way to do this?
|
||||||
|
let docids = universe & G::resolve_edge_condition(ctx, edge_condition, universe)?;
|
||||||
|
let _ = self.cache.insert(interned_edge_condition, docids);
|
||||||
|
let docids = &self.cache[&interned_edge_condition];
|
||||||
|
Ok(docids)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -16,12 +16,15 @@ mod proximity;
|
|||||||
/// Implementation of the `typo` ranking rule
|
/// Implementation of the `typo` ranking rule
|
||||||
mod typo;
|
mod typo;
|
||||||
|
|
||||||
pub use edge_docids_cache::EdgeDocidsCache;
|
use std::hash::Hash;
|
||||||
|
|
||||||
|
pub use edge_docids_cache::EdgeConditionsCache;
|
||||||
pub use empty_paths_cache::EmptyPathsCache;
|
pub use empty_paths_cache::EmptyPathsCache;
|
||||||
pub use proximity::ProximityGraph;
|
pub use proximity::ProximityGraph;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
pub use typo::TypoGraph;
|
pub use typo::TypoGraph;
|
||||||
|
|
||||||
|
use super::interner::{Interned, Interner};
|
||||||
use super::logger::SearchLogger;
|
use super::logger::SearchLogger;
|
||||||
use super::small_bitmap::SmallBitmap;
|
use super::small_bitmap::SmallBitmap;
|
||||||
use super::{QueryGraph, QueryNode, SearchContext};
|
use super::{QueryGraph, QueryNode, SearchContext};
|
||||||
@@ -36,10 +39,20 @@ use crate::Result;
|
|||||||
/// proximity ranking rule, the condition could be that a word is N-close to another one.
|
/// proximity ranking rule, the condition could be that a word is N-close to another one.
|
||||||
/// When the edge is traversed, some database operations are executed to retrieve the set
|
/// When the edge is traversed, some database operations are executed to retrieve the set
|
||||||
/// of documents that satisfy the condition, which reduces the list of candidate document ids.
|
/// of documents that satisfy the condition, which reduces the list of candidate document ids.
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub enum EdgeCondition<E> {
|
pub enum EdgeCondition<E> {
|
||||||
Unconditional,
|
Unconditional,
|
||||||
Conditional(E),
|
Conditional(Interned<E>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<E> Copy for EdgeCondition<E> {}
|
||||||
|
|
||||||
|
impl<E> Clone for EdgeCondition<E> {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
match self {
|
||||||
|
Self::Unconditional => Self::Unconditional,
|
||||||
|
Self::Conditional(arg0) => Self::Conditional(*arg0),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An edge in the ranking rule graph.
|
/// An edge in the ranking rule graph.
|
||||||
@@ -48,7 +61,7 @@ pub enum EdgeCondition<E> {
|
|||||||
/// 1. The source and destination nodes
|
/// 1. The source and destination nodes
|
||||||
/// 2. The cost of traversing this edge
|
/// 2. The cost of traversing this edge
|
||||||
/// 3. The condition associated with it
|
/// 3. The condition associated with it
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Edge<E> {
|
pub struct Edge<E> {
|
||||||
pub source_node: u16,
|
pub source_node: u16,
|
||||||
pub dest_node: u16,
|
pub dest_node: u16,
|
||||||
@@ -106,7 +119,7 @@ pub trait RankingRuleGraphTrait: Sized {
|
|||||||
/// The condition of an edge connecting two query nodes. The condition
|
/// The condition of an edge connecting two query nodes. The condition
|
||||||
/// should be sufficient to compute the edge's cost and associated document ids
|
/// should be sufficient to compute the edge's cost and associated document ids
|
||||||
/// in [`resolve_edge_condition`](RankingRuleGraphTrait::resolve_edge_condition).
|
/// in [`resolve_edge_condition`](RankingRuleGraphTrait::resolve_edge_condition).
|
||||||
type EdgeCondition: Sized + Clone;
|
type EdgeCondition: Sized + Clone + PartialEq + Eq + Hash;
|
||||||
|
|
||||||
/// A structure used in the construction of the graph, created when a
|
/// A structure used in the construction of the graph, created when a
|
||||||
/// query graph source node is visited. It is used to determine the cost
|
/// query graph source node is visited. It is used to determine the cost
|
||||||
@@ -138,6 +151,7 @@ pub trait RankingRuleGraphTrait: Sized {
|
|||||||
/// (with [`build_step_visit_source_node`](RankingRuleGraphTrait::build_step_visit_source_node)) to `dest_node`.
|
/// (with [`build_step_visit_source_node`](RankingRuleGraphTrait::build_step_visit_source_node)) to `dest_node`.
|
||||||
fn build_step_visit_destination_node<'from_data, 'search: 'from_data>(
|
fn build_step_visit_destination_node<'from_data, 'search: 'from_data>(
|
||||||
ctx: &mut SearchContext<'search>,
|
ctx: &mut SearchContext<'search>,
|
||||||
|
conditions_interner: &mut Interner<Self::EdgeCondition>,
|
||||||
dest_node: &QueryNode,
|
dest_node: &QueryNode,
|
||||||
source_node_data: &'from_data Self::BuildVisitedFromNode,
|
source_node_data: &'from_data Self::BuildVisitedFromNode,
|
||||||
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>>;
|
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>>;
|
||||||
@@ -161,16 +175,18 @@ pub struct RankingRuleGraph<G: RankingRuleGraphTrait> {
|
|||||||
pub query_graph: QueryGraph,
|
pub query_graph: QueryGraph,
|
||||||
pub edges_store: Vec<Option<Edge<G::EdgeCondition>>>,
|
pub edges_store: Vec<Option<Edge<G::EdgeCondition>>>,
|
||||||
pub edges_of_node: Vec<SmallBitmap>,
|
pub edges_of_node: Vec<SmallBitmap>,
|
||||||
|
pub conditions_interner: Interner<G::EdgeCondition>,
|
||||||
}
|
}
|
||||||
impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> {
|
// impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> {
|
||||||
fn clone(&self) -> Self {
|
// fn clone(&self) -> Self {
|
||||||
Self {
|
// Self {
|
||||||
query_graph: self.query_graph.clone(),
|
// query_graph: self.query_graph.clone(),
|
||||||
edges_store: self.edges_store.clone(),
|
// edges_store: self.edges_store.clone(),
|
||||||
edges_of_node: self.edges_of_node.clone(),
|
// edges_of_node: self.edges_of_node.clone(),
|
||||||
}
|
// conditions_interner: self.conditions_interner.clone(),
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
// }
|
||||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||||
/// Remove the given edge from the ranking rule graph
|
/// Remove the given edge from the ranking rule graph
|
||||||
pub fn remove_ranking_rule_edge(&mut self, edge_index: u16) {
|
pub fn remove_ranking_rule_edge(&mut self, edge_index: u16) {
|
||||||
|
@@ -3,6 +3,7 @@ use std::collections::BTreeMap;
|
|||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
|
|
||||||
use super::ProximityEdge;
|
use super::ProximityEdge;
|
||||||
|
use crate::search::new::interner::Interner;
|
||||||
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||||
use crate::search::new::ranking_rule_graph::proximity::WordPair;
|
use crate::search::new::ranking_rule_graph::proximity::WordPair;
|
||||||
use crate::search::new::ranking_rule_graph::EdgeCondition;
|
use crate::search::new::ranking_rule_graph::EdgeCondition;
|
||||||
@@ -59,6 +60,7 @@ pub fn visit_from_node(
|
|||||||
|
|
||||||
pub fn visit_to_node<'search, 'from_data>(
|
pub fn visit_to_node<'search, 'from_data>(
|
||||||
ctx: &mut SearchContext<'search>,
|
ctx: &mut SearchContext<'search>,
|
||||||
|
conditions_interner: &mut Interner<ProximityEdge>,
|
||||||
to_node: &QueryNode,
|
to_node: &QueryNode,
|
||||||
from_node_data: &'from_data (WordDerivations, i8),
|
from_node_data: &'from_data (WordDerivations, i8),
|
||||||
) -> Result<Vec<(u8, EdgeCondition<ProximityEdge>)>> {
|
) -> Result<Vec<(u8, EdgeCondition<ProximityEdge>)>> {
|
||||||
@@ -224,22 +226,23 @@ pub fn visit_to_node<'search, 'from_data>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let mut new_edges = cost_proximity_word_pairs
|
let mut new_edges =
|
||||||
.into_iter()
|
cost_proximity_word_pairs
|
||||||
.flat_map(|(cost, proximity_word_pairs)| {
|
.into_iter()
|
||||||
let mut edges = vec![];
|
.flat_map(|(cost, proximity_word_pairs)| {
|
||||||
for (proximity, word_pairs) in proximity_word_pairs {
|
let mut edges = vec![];
|
||||||
edges.push((
|
for (proximity, word_pairs) in proximity_word_pairs {
|
||||||
cost,
|
edges.push((
|
||||||
EdgeCondition::Conditional(ProximityEdge {
|
cost,
|
||||||
pairs: word_pairs.into_boxed_slice(),
|
EdgeCondition::Conditional(conditions_interner.insert(ProximityEdge {
|
||||||
proximity,
|
pairs: word_pairs.into_boxed_slice(),
|
||||||
}),
|
proximity,
|
||||||
))
|
})),
|
||||||
}
|
))
|
||||||
edges
|
}
|
||||||
})
|
edges
|
||||||
.collect::<Vec<_>>();
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
new_edges.push((8 + (ngram_len2 - 1) as u8, EdgeCondition::Unconditional));
|
new_edges.push((8 + (ngram_len2 - 1) as u8, EdgeCondition::Unconditional));
|
||||||
Ok(new_edges)
|
Ok(new_edges)
|
||||||
}
|
}
|
||||||
|
@@ -5,23 +5,21 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::empty_paths_cache::EmptyPathsCache;
|
use super::empty_paths_cache::EmptyPathsCache;
|
||||||
use super::{EdgeCondition, RankingRuleGraphTrait};
|
use super::{EdgeCondition, RankingRuleGraphTrait};
|
||||||
use crate::search::new::interner::Interned;
|
use crate::search::new::interner::{Interned, Interner};
|
||||||
use crate::search::new::logger::SearchLogger;
|
use crate::search::new::logger::SearchLogger;
|
||||||
use crate::search::new::query_term::WordDerivations;
|
use crate::search::new::query_term::WordDerivations;
|
||||||
use crate::search::new::small_bitmap::SmallBitmap;
|
use crate::search::new::small_bitmap::SmallBitmap;
|
||||||
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
|
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
// TODO: intern the proximity edges as well?
|
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub enum WordPair {
|
pub enum WordPair {
|
||||||
Words { left: Interned<String>, right: Interned<String> },
|
Words { left: Interned<String>, right: Interned<String> },
|
||||||
WordPrefix { left: Interned<String>, right_prefix: Interned<String> },
|
WordPrefix { left: Interned<String>, right_prefix: Interned<String> },
|
||||||
WordPrefixSwapped { left_prefix: Interned<String>, right: Interned<String> },
|
WordPrefixSwapped { left_prefix: Interned<String>, right: Interned<String> },
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||||
pub struct ProximityEdge {
|
pub struct ProximityEdge {
|
||||||
pairs: Box<[WordPair]>,
|
pairs: Box<[WordPair]>,
|
||||||
proximity: u8,
|
proximity: u8,
|
||||||
@@ -55,10 +53,11 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
|||||||
|
|
||||||
fn build_step_visit_destination_node<'from_data, 'search: 'from_data>(
|
fn build_step_visit_destination_node<'from_data, 'search: 'from_data>(
|
||||||
ctx: &mut SearchContext<'search>,
|
ctx: &mut SearchContext<'search>,
|
||||||
|
conditions_interner: &mut Interner<Self::EdgeCondition>,
|
||||||
to_node: &QueryNode,
|
to_node: &QueryNode,
|
||||||
from_node_data: &'from_data Self::BuildVisitedFromNode,
|
from_node_data: &'from_data Self::BuildVisitedFromNode,
|
||||||
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> {
|
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> {
|
||||||
build::visit_to_node(ctx, to_node, from_node_data)
|
build::visit_to_node(ctx, conditions_interner, to_node, from_node_data)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn log_state(
|
fn log_state(
|
||||||
|
@@ -2,14 +2,14 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::empty_paths_cache::EmptyPathsCache;
|
use super::empty_paths_cache::EmptyPathsCache;
|
||||||
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
|
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
|
||||||
use crate::search::new::interner::Interned;
|
use crate::search::new::interner::{Interned, Interner};
|
||||||
use crate::search::new::logger::SearchLogger;
|
use crate::search::new::logger::SearchLogger;
|
||||||
use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations};
|
use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations};
|
||||||
use crate::search::new::small_bitmap::SmallBitmap;
|
use crate::search::new::small_bitmap::SmallBitmap;
|
||||||
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
|
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||||
pub enum TypoEdge {
|
pub enum TypoEdge {
|
||||||
Phrase { phrase: Interned<Phrase> },
|
Phrase { phrase: Interned<Phrase> },
|
||||||
Word { derivations: Interned<WordDerivations>, nbr_typos: u8 },
|
Word { derivations: Interned<WordDerivations>, nbr_typos: u8 },
|
||||||
@@ -78,15 +78,19 @@ impl RankingRuleGraphTrait for TypoGraph {
|
|||||||
|
|
||||||
fn build_step_visit_destination_node<'from_data, 'search: 'from_data>(
|
fn build_step_visit_destination_node<'from_data, 'search: 'from_data>(
|
||||||
ctx: &mut SearchContext<'search>,
|
ctx: &mut SearchContext<'search>,
|
||||||
|
conditions_interner: &mut Interner<Self::EdgeCondition>,
|
||||||
to_node: &QueryNode,
|
to_node: &QueryNode,
|
||||||
_from_node_data: &'from_data Self::BuildVisitedFromNode,
|
_from_node_data: &'from_data Self::BuildVisitedFromNode,
|
||||||
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> {
|
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> {
|
||||||
let SearchContext { derivations_interner, .. } = ctx;
|
let SearchContext { derivations_interner, .. } = ctx;
|
||||||
match to_node {
|
match to_node {
|
||||||
QueryNode::Term(LocatedQueryTerm { value, .. }) => match *value {
|
QueryNode::Term(LocatedQueryTerm { value, .. }) => match *value {
|
||||||
QueryTerm::Phrase { phrase } => {
|
QueryTerm::Phrase { phrase } => Ok(vec![(
|
||||||
Ok(vec![(0, EdgeCondition::Conditional(TypoEdge::Phrase { phrase }))])
|
0,
|
||||||
}
|
EdgeCondition::Conditional(
|
||||||
|
conditions_interner.insert(TypoEdge::Phrase { phrase }),
|
||||||
|
),
|
||||||
|
)]),
|
||||||
QueryTerm::Word { derivations } => {
|
QueryTerm::Word { derivations } => {
|
||||||
let mut edges = vec![];
|
let mut edges = vec![];
|
||||||
|
|
||||||
@@ -136,10 +140,12 @@ impl RankingRuleGraphTrait for TypoGraph {
|
|||||||
if !new_derivations.is_empty() {
|
if !new_derivations.is_empty() {
|
||||||
edges.push((
|
edges.push((
|
||||||
nbr_typos,
|
nbr_typos,
|
||||||
EdgeCondition::Conditional(TypoEdge::Word {
|
EdgeCondition::Conditional(conditions_interner.insert(
|
||||||
derivations: derivations_interner.insert(new_derivations),
|
TypoEdge::Word {
|
||||||
nbr_typos,
|
derivations: derivations_interner.insert(new_derivations),
|
||||||
}),
|
nbr_typos,
|
||||||
|
},
|
||||||
|
)),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user