Rewrite cheapest path algorithm and empty path cache

It is now much simpler and has much better performance.
This commit is contained in:
Loïc Lecrenier
2023-03-02 21:27:42 +01:00
parent caa1e1b923
commit c27ea2677f
14 changed files with 782 additions and 530 deletions

View File

@ -16,9 +16,9 @@ pub fn visit_from_node(from_node: &QueryNode) -> Result<Option<(WordDerivations,
QueryNode::Term(LocatedQueryTerm { value: value1, positions: pos1 }) => {
match value1 {
QueryTerm::Word { derivations } => (derivations.clone(), *pos1.end()),
QueryTerm::Phrase(phrase1) => {
QueryTerm::Phrase { phrase: phrase1 } => {
// TODO: remove second unwrap
let original = phrase1.last().unwrap().as_ref().unwrap().clone();
let original = phrase1.words.last().unwrap().as_ref().unwrap().clone();
(
WordDerivations {
original: original.clone(),
@ -26,6 +26,8 @@ pub fn visit_from_node(from_node: &QueryNode) -> Result<Option<(WordDerivations,
one_typo: vec![],
two_typos: vec![],
use_prefix_db: false,
synonyms: vec![],
split_words: None,
},
*pos1.end(),
)
@ -39,6 +41,8 @@ pub fn visit_from_node(from_node: &QueryNode) -> Result<Option<(WordDerivations,
one_typo: vec![],
two_typos: vec![],
use_prefix_db: false,
synonyms: vec![],
split_words: None,
},
-100,
),
@ -63,9 +67,9 @@ pub fn visit_to_node<'transaction, 'from_data>(
let (derivations2, pos2, ngram_len2) = match value2 {
QueryTerm::Word { derivations } => (derivations.clone(), *pos2.start(), pos2.len()),
QueryTerm::Phrase(phrase2) => {
QueryTerm::Phrase { phrase: phrase2 } => {
// TODO: remove second unwrap
let original = phrase2.last().unwrap().as_ref().unwrap().clone();
let original = phrase2.words.last().unwrap().as_ref().unwrap().clone();
(
WordDerivations {
original: original.clone(),
@ -73,6 +77,8 @@ pub fn visit_to_node<'transaction, 'from_data>(
one_typo: vec![],
two_typos: vec![],
use_prefix_db: false,
synonyms: vec![],
split_words: None,
},
*pos2.start(),
1,

View File

@ -2,18 +2,21 @@ pub mod build;
pub mod compute_docids;
use heed::RoTxn;
use roaring::RoaringBitmap;
use super::empty_paths_cache::EmptyPathsCache;
use super::paths_map::PathsMap;
use super::{EdgeDetails, RankingRuleGraphTrait};
use crate::new::db_cache::DatabaseCache;
use crate::new::logger::SearchLogger;
use crate::new::query_term::WordDerivations;
use crate::new::QueryNode;
use crate::new::{QueryGraph, QueryNode};
use crate::{Index, Result};
// TODO: intern the strings, refer to them by their pointer?
#[derive(Debug, Clone)]
pub enum WordPair {
// TODO: add WordsSwapped and WordPrefixSwapped case
Words { left: String, right: String },
WordsSwapped { left: String, right: String },
WordPrefix { left: String, right_prefix: String },
@ -22,6 +25,7 @@ pub enum WordPair {
#[derive(Clone)]
pub struct ProximityEdge {
// TODO: use a list of pointers to the word pairs instead?
pairs: Vec<WordPair>,
proximity: u8,
}
@ -67,10 +71,20 @@ impl RankingRuleGraphTrait for ProximityGraph {
fn log_state(
graph: &super::RankingRuleGraph<Self>,
paths: &PathsMap<u64>,
paths: &[Vec<u32>],
empty_paths_cache: &EmptyPathsCache,
logger: &mut dyn crate::new::logger::SearchLogger<crate::new::QueryGraph>,
universe: &RoaringBitmap,
distances: &[Vec<u64>],
cost: u64,
logger: &mut dyn SearchLogger<QueryGraph>,
) {
logger.log_proximity_state(graph, paths, empty_paths_cache);
logger.log_proximity_state(
graph,
paths,
empty_paths_cache,
universe,
distances.to_vec(),
cost,
);
}
}