mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-29 18:04:47 +00:00
Intern all strings and phrases in the search logic
This commit is contained in:
@ -1,18 +1,10 @@
|
||||
use heed::RoTxn;
|
||||
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::new::{QueryGraph, SearchContext};
|
||||
use crate::Result;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::new::db_cache::DatabaseCache;
|
||||
use crate::new::QueryGraph;
|
||||
use crate::{Index, Result};
|
||||
|
||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
pub fn build<'db_cache, 'transaction: 'db_cache>(
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
query_graph: QueryGraph,
|
||||
) -> Result<Self> {
|
||||
pub fn build(ctx: &mut SearchContext, query_graph: QueryGraph) -> Result<Self> {
|
||||
let mut ranking_rule_graph =
|
||||
Self { query_graph, all_edges: vec![], node_edges: vec![], successors: vec![] };
|
||||
|
||||
@ -22,12 +14,11 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
let new_edges = ranking_rule_graph.node_edges.last_mut().unwrap();
|
||||
let new_successors = ranking_rule_graph.successors.last_mut().unwrap();
|
||||
|
||||
let Some(from_node_data) = G::build_visit_from_node(index, txn, db_cache, node)? else { continue };
|
||||
let Some(from_node_data) = G::build_visit_from_node(ctx, node)? else { continue };
|
||||
|
||||
for successor_idx in ranking_rule_graph.query_graph.edges[node_idx].successors.iter() {
|
||||
let to_node = &ranking_rule_graph.query_graph.nodes[successor_idx as usize];
|
||||
let mut edges =
|
||||
G::build_visit_to_node(index, txn, db_cache, to_node, &from_node_data)?;
|
||||
let mut edges = G::build_visit_to_node(ctx, to_node, &from_node_data)?;
|
||||
if edges.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
@ -1,13 +1,10 @@
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use fxhash::FxHashMap;
|
||||
use heed::RoTxn;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::new::db_cache::DatabaseCache;
|
||||
use crate::new::BitmapOrAllRef;
|
||||
use crate::{Index, Result};
|
||||
use crate::new::{BitmapOrAllRef, SearchContext};
|
||||
use crate::Result;
|
||||
use fxhash::FxHashMap;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
// TODO: the cache should have a G::EdgeDetails as key
|
||||
// but then it means that we should have a quick way of
|
||||
@ -25,11 +22,9 @@ impl<G: RankingRuleGraphTrait> Default for EdgeDocidsCache<G> {
|
||||
}
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> {
|
||||
pub fn get_edge_docids<'s, 'transaction>(
|
||||
pub fn get_edge_docids<'s, 'search>(
|
||||
&'s mut self,
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
ctx: &mut SearchContext<'search>,
|
||||
edge_index: u32,
|
||||
graph: &RankingRuleGraph<G>,
|
||||
// TODO: maybe universe doesn't belong here
|
||||
@ -46,7 +41,7 @@ impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> {
|
||||
return Ok(BitmapOrAllRef::Bitmap(&self.cache[&edge_index]));
|
||||
}
|
||||
// TODO: maybe universe doesn't belong here
|
||||
let docids = universe & G::compute_docids(index, txn, db_cache, details)?;
|
||||
let docids = universe & G::compute_docids(ctx, details)?;
|
||||
let _ = self.cache.insert(edge_index, docids);
|
||||
let docids = &self.cache[&edge_index];
|
||||
Ok(BitmapOrAllRef::Bitmap(docids))
|
||||
|
@ -7,20 +7,15 @@ mod proximity;
|
||||
mod resolve_paths;
|
||||
mod typo;
|
||||
|
||||
use super::logger::SearchLogger;
|
||||
use super::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::Result;
|
||||
pub use edge_docids_cache::EdgeDocidsCache;
|
||||
pub use empty_paths_cache::EmptyPathsCache;
|
||||
pub use proximity::ProximityGraph;
|
||||
pub use typo::TypoGraph;
|
||||
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
use heed::RoTxn;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::db_cache::DatabaseCache;
|
||||
use super::logger::SearchLogger;
|
||||
use super::{QueryGraph, QueryNode};
|
||||
use crate::{Index, Result};
|
||||
use std::ops::ControlFlow;
|
||||
pub use typo::TypoGraph;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum EdgeDetails<E> {
|
||||
@ -42,6 +37,48 @@ pub struct EdgePointer<'graph, E> {
|
||||
pub edge: &'graph Edge<E>,
|
||||
}
|
||||
|
||||
// pub struct SubWordDerivations {
|
||||
// words: FxHashSet<Interned<String>>,
|
||||
// synonyms: FxHashSet<Interned<Phrase>>, // NO! they're phrases, not strings
|
||||
// split_words: bool,
|
||||
// use_prefix_db: bool,
|
||||
// }
|
||||
|
||||
// pub struct EdgeWordDerivations {
|
||||
// // TODO: not Option, instead: Any | All | Subset(SubWordDerivations)
|
||||
// from_words: Option<SubWordDerivations>, // ???
|
||||
// to_words: Option<SubWordDerivations>, // + use prefix db?
|
||||
// }
|
||||
|
||||
// fn aggregate_edge_word_derivations(
|
||||
// graph: (),
|
||||
// edges: Vec<usize>,
|
||||
// ) -> BTreeMap<usize, SubWordDerivations> {
|
||||
// todo!()
|
||||
// }
|
||||
|
||||
// fn reduce_word_term_to_sub_word_derivations(
|
||||
// term: &mut WordDerivations,
|
||||
// derivations: &SubWordDerivations,
|
||||
// ) {
|
||||
// let mut new_one_typo = vec![];
|
||||
// for w in term.one_typo {
|
||||
// if derivations.words.contains(w) {
|
||||
// new_one_typo.push(w);
|
||||
// }
|
||||
// }
|
||||
// if term.use_prefix_db && !derivations.use_prefix_db {
|
||||
// term.use_prefix_db = false;
|
||||
// }
|
||||
// // etc.
|
||||
// }
|
||||
|
||||
// fn word_derivations_used_by_edge<G: RankingRuleGraphTrait>(
|
||||
// edge: G::EdgeDetails,
|
||||
// ) -> SubWordDerivations {
|
||||
// todo!()
|
||||
// }
|
||||
|
||||
pub trait RankingRuleGraphTrait: Sized {
|
||||
/// The details of an edge connecting two query nodes. These details
|
||||
/// should be sufficient to compute the edge's cost and associated document ids
|
||||
@ -55,10 +92,8 @@ pub trait RankingRuleGraphTrait: Sized {
|
||||
fn graphviz_edge_details_label(edge: &Self::EdgeDetails) -> String;
|
||||
|
||||
/// Compute the document ids associated with the given edge.
|
||||
fn compute_docids<'transaction>(
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
fn compute_docids<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
edge_details: &Self::EdgeDetails,
|
||||
) -> Result<RoaringBitmap>;
|
||||
|
||||
@ -66,19 +101,15 @@ pub trait RankingRuleGraphTrait: Sized {
|
||||
///
|
||||
/// This call is followed by zero, one or more calls to [`build_visit_to_node`](RankingRuleGraphTrait::build_visit_to_node),
|
||||
/// which builds the actual edges.
|
||||
fn build_visit_from_node<'transaction>(
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
fn build_visit_from_node<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
from_node: &QueryNode,
|
||||
) -> Result<Option<Self::BuildVisitedFromNode>>;
|
||||
|
||||
/// Return the cost and details of the edges going from the previously visited node
|
||||
/// (with [`build_visit_from_node`](RankingRuleGraphTrait::build_visit_from_node)) to `to_node`.
|
||||
fn build_visit_to_node<'from_data, 'transaction: 'from_data>(
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
fn build_visit_to_node<'from_data, 'search: 'from_data>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
to_node: &QueryNode,
|
||||
from_node_data: &'from_data Self::BuildVisitedFromNode,
|
||||
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>>;
|
||||
|
@ -1,30 +1,30 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use heed::RoTxn;
|
||||
use itertools::Itertools;
|
||||
|
||||
use super::ProximityEdge;
|
||||
use crate::new::db_cache::DatabaseCache;
|
||||
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||
use crate::new::ranking_rule_graph::proximity::WordPair;
|
||||
use crate::new::ranking_rule_graph::EdgeDetails;
|
||||
use crate::new::QueryNode;
|
||||
use crate::{Index, Result};
|
||||
use crate::new::{QueryNode, SearchContext};
|
||||
use crate::Result;
|
||||
use itertools::Itertools;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
pub fn visit_from_node(from_node: &QueryNode) -> Result<Option<(WordDerivations, i8)>> {
|
||||
pub fn visit_from_node(
|
||||
ctx: &mut SearchContext,
|
||||
from_node: &QueryNode,
|
||||
) -> Result<Option<(WordDerivations, i8)>> {
|
||||
Ok(Some(match from_node {
|
||||
QueryNode::Term(LocatedQueryTerm { value: value1, positions: pos1 }) => match value1 {
|
||||
QueryTerm::Word { derivations } => (derivations.clone(), *pos1.end()),
|
||||
QueryTerm::Phrase { phrase: phrase1 } => {
|
||||
if let Some(original) = phrase1.words.last().unwrap().as_ref() {
|
||||
let phrase1 = ctx.phrase_interner.get(*phrase1);
|
||||
if let Some(original) = *phrase1.words.last().unwrap() {
|
||||
(
|
||||
WordDerivations {
|
||||
original: original.clone(),
|
||||
zero_typo: vec![original.to_owned()],
|
||||
one_typo: vec![],
|
||||
two_typos: vec![],
|
||||
original,
|
||||
zero_typo: Box::new([original]),
|
||||
one_typo: Box::new([]),
|
||||
two_typos: Box::new([]),
|
||||
use_prefix_db: false,
|
||||
synonyms: vec![],
|
||||
synonyms: Box::new([]),
|
||||
split_words: None,
|
||||
},
|
||||
*pos1.end(),
|
||||
@ -37,12 +37,12 @@ pub fn visit_from_node(from_node: &QueryNode) -> Result<Option<(WordDerivations,
|
||||
},
|
||||
QueryNode::Start => (
|
||||
WordDerivations {
|
||||
original: String::new(),
|
||||
zero_typo: vec![],
|
||||
one_typo: vec![],
|
||||
two_typos: vec![],
|
||||
original: ctx.word_interner.insert(String::new()),
|
||||
zero_typo: Box::new([]),
|
||||
one_typo: Box::new([]),
|
||||
two_typos: Box::new([]),
|
||||
use_prefix_db: false,
|
||||
synonyms: vec![],
|
||||
synonyms: Box::new([]),
|
||||
split_words: None,
|
||||
},
|
||||
-100,
|
||||
@ -51,10 +51,8 @@ pub fn visit_from_node(from_node: &QueryNode) -> Result<Option<(WordDerivations,
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn visit_to_node<'transaction, 'from_data>(
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
pub fn visit_to_node<'search, 'from_data>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
to_node: &QueryNode,
|
||||
from_node_data: &'from_data (WordDerivations, i8),
|
||||
) -> Result<Vec<(u8, EdgeDetails<ProximityEdge>)>> {
|
||||
@ -69,15 +67,16 @@ pub fn visit_to_node<'transaction, 'from_data>(
|
||||
let (derivations2, pos2, ngram_len2) = match value2 {
|
||||
QueryTerm::Word { derivations } => (derivations.clone(), *pos2.start(), pos2.len()),
|
||||
QueryTerm::Phrase { phrase: phrase2 } => {
|
||||
if let Some(original) = phrase2.words.first().unwrap().as_ref() {
|
||||
let phrase2 = ctx.phrase_interner.get(*phrase2);
|
||||
if let Some(original) = *phrase2.words.first().unwrap() {
|
||||
(
|
||||
WordDerivations {
|
||||
original: original.clone(),
|
||||
zero_typo: vec![original.to_owned()],
|
||||
one_typo: vec![],
|
||||
two_typos: vec![],
|
||||
original,
|
||||
zero_typo: Box::new([original]),
|
||||
one_typo: Box::new([]),
|
||||
two_typos: Box::new([]),
|
||||
use_prefix_db: false,
|
||||
synonyms: vec![],
|
||||
synonyms: Box::new([]),
|
||||
split_words: None,
|
||||
},
|
||||
*pos2.start(),
|
||||
@ -106,19 +105,16 @@ pub fn visit_to_node<'transaction, 'from_data>(
|
||||
|
||||
let derivations1 = derivations1.all_derivations_except_prefix_db();
|
||||
// TODO: eventually, we want to get rid of the uses from `orginal`
|
||||
let original_word_2 = derivations2.original.clone();
|
||||
let mut cost_proximity_word_pairs = BTreeMap::<u8, BTreeMap<u8, Vec<WordPair>>>::new();
|
||||
|
||||
if updb2 {
|
||||
for word1 in derivations1.clone() {
|
||||
for proximity in 1..=(8 - ngram_len2) {
|
||||
let cost = (proximity + ngram_len2 - 1) as u8;
|
||||
if db_cache
|
||||
if ctx
|
||||
.get_word_prefix_pair_proximity_docids(
|
||||
index,
|
||||
txn,
|
||||
word1,
|
||||
original_word_2.as_str(),
|
||||
derivations2.original,
|
||||
proximity as u8,
|
||||
)?
|
||||
.is_some()
|
||||
@ -129,16 +125,14 @@ pub fn visit_to_node<'transaction, 'from_data>(
|
||||
.entry(proximity as u8)
|
||||
.or_default()
|
||||
.push(WordPair::WordPrefix {
|
||||
left: word1.to_owned(),
|
||||
right_prefix: original_word_2.to_owned(),
|
||||
left: word1,
|
||||
right_prefix: derivations2.original,
|
||||
});
|
||||
}
|
||||
if db_cache
|
||||
if ctx
|
||||
.get_prefix_word_pair_proximity_docids(
|
||||
index,
|
||||
txn,
|
||||
original_word_2.as_str(),
|
||||
word1.as_str(),
|
||||
derivations2.original,
|
||||
word1,
|
||||
proximity as u8 - 1,
|
||||
)?
|
||||
.is_some()
|
||||
@ -149,8 +143,8 @@ pub fn visit_to_node<'transaction, 'from_data>(
|
||||
.entry(proximity as u8)
|
||||
.or_default()
|
||||
.push(WordPair::WordPrefixSwapped {
|
||||
left_prefix: original_word_2.to_owned(),
|
||||
right: word1.to_owned(),
|
||||
left_prefix: derivations2.original,
|
||||
right: word1,
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -164,28 +158,23 @@ pub fn visit_to_node<'transaction, 'from_data>(
|
||||
for (word1, word2) in product_derivations {
|
||||
for proximity in 1..=(8 - ngram_len2) {
|
||||
let cost = (proximity + ngram_len2 - 1) as u8;
|
||||
if db_cache
|
||||
.get_word_pair_proximity_docids(index, txn, word1, word2, proximity as u8)?
|
||||
.is_some()
|
||||
{
|
||||
if ctx.get_word_pair_proximity_docids(word1, word2, proximity as u8)?.is_some() {
|
||||
cost_proximity_word_pairs
|
||||
.entry(cost)
|
||||
.or_default()
|
||||
.entry(proximity as u8)
|
||||
.or_default()
|
||||
.push(WordPair::Words { left: word1.to_owned(), right: word2.to_owned() });
|
||||
.push(WordPair::Words { left: word1, right: word2 });
|
||||
}
|
||||
if proximity > 1
|
||||
&& db_cache
|
||||
.get_word_pair_proximity_docids(index, txn, word2, word1, proximity as u8 - 1)?
|
||||
.is_some()
|
||||
&& ctx.get_word_pair_proximity_docids(word2, word1, proximity as u8 - 1)?.is_some()
|
||||
{
|
||||
cost_proximity_word_pairs
|
||||
.entry(cost)
|
||||
.or_default()
|
||||
.entry(proximity as u8 - 1)
|
||||
.or_default()
|
||||
.push(WordPair::Words { left: word2.to_owned(), right: word1.to_owned() });
|
||||
.push(WordPair::Words { left: word2, right: word1 });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,14 +1,10 @@
|
||||
use heed::RoTxn;
|
||||
use super::{ProximityEdge, WordPair};
|
||||
use crate::new::SearchContext;
|
||||
use crate::{CboRoaringBitmapCodec, Result};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
|
||||
use super::{ProximityEdge, WordPair};
|
||||
use crate::new::db_cache::DatabaseCache;
|
||||
use crate::{CboRoaringBitmapCodec, Result};
|
||||
|
||||
pub fn compute_docids<'transaction>(
|
||||
index: &crate::Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
pub fn compute_docids<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
edge: &ProximityEdge,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let ProximityEdge { pairs, proximity } = edge;
|
||||
@ -16,12 +12,14 @@ pub fn compute_docids<'transaction>(
|
||||
for pair in pairs.iter() {
|
||||
let bytes = match pair {
|
||||
WordPair::Words { left, right } => {
|
||||
db_cache.get_word_pair_proximity_docids(index, txn, left, right, *proximity)
|
||||
ctx.get_word_pair_proximity_docids(*left, *right, *proximity)
|
||||
}
|
||||
WordPair::WordPrefix { left, right_prefix } => {
|
||||
ctx.get_word_prefix_pair_proximity_docids(*left, *right_prefix, *proximity)
|
||||
}
|
||||
WordPair::WordPrefixSwapped { left_prefix, right } => {
|
||||
ctx.get_prefix_word_pair_proximity_docids(*left_prefix, *right, *proximity)
|
||||
}
|
||||
WordPair::WordPrefix { left, right_prefix } => db_cache
|
||||
.get_word_prefix_pair_proximity_docids(index, txn, left, right_prefix, *proximity),
|
||||
WordPair::WordPrefixSwapped { left_prefix, right } => db_cache
|
||||
.get_prefix_word_pair_proximity_docids(index, txn, left_prefix, right, *proximity),
|
||||
}?;
|
||||
let bitmap =
|
||||
bytes.map(CboRoaringBitmapCodec::deserialize_from).transpose()?.unwrap_or_default();
|
||||
|
@ -1,25 +1,22 @@
|
||||
pub mod build;
|
||||
pub mod compute_docids;
|
||||
|
||||
use heed::RoTxn;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::empty_paths_cache::EmptyPathsCache;
|
||||
|
||||
use super::{EdgeDetails, RankingRuleGraphTrait};
|
||||
use crate::new::db_cache::DatabaseCache;
|
||||
use crate::new::interner::Interned;
|
||||
use crate::new::logger::SearchLogger;
|
||||
use crate::new::query_term::WordDerivations;
|
||||
use crate::new::{QueryGraph, QueryNode};
|
||||
use crate::{Index, Result};
|
||||
use crate::new::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::Result;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
// TODO: intern the strings, refer to them by their pointer?
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Clone)]
|
||||
pub enum WordPair {
|
||||
Words { left: String, right: String },
|
||||
WordPrefix { left: String, right_prefix: String },
|
||||
WordPrefixSwapped { left_prefix: String, right: String },
|
||||
Words { left: Interned<String>, right: Interned<String> },
|
||||
WordPrefix { left: Interned<String>, right_prefix: Interned<String> },
|
||||
WordPrefixSwapped { left_prefix: Interned<String>, right: Interned<String> },
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
@ -40,32 +37,26 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
||||
format!(", prox {proximity}, {} pairs", pairs.len())
|
||||
}
|
||||
|
||||
fn compute_docids<'db_cache, 'transaction>(
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
fn compute_docids<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
edge: &Self::EdgeDetails,
|
||||
) -> Result<roaring::RoaringBitmap> {
|
||||
compute_docids::compute_docids(index, txn, db_cache, edge)
|
||||
compute_docids::compute_docids(ctx, edge)
|
||||
}
|
||||
|
||||
fn build_visit_from_node<'transaction>(
|
||||
_index: &Index,
|
||||
_txn: &'transaction RoTxn,
|
||||
_db_cache: &mut DatabaseCache<'transaction>,
|
||||
fn build_visit_from_node<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
from_node: &QueryNode,
|
||||
) -> Result<Option<Self::BuildVisitedFromNode>> {
|
||||
build::visit_from_node(from_node)
|
||||
build::visit_from_node(ctx, from_node)
|
||||
}
|
||||
|
||||
fn build_visit_to_node<'from_data, 'transaction: 'from_data>(
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
fn build_visit_to_node<'from_data, 'search: 'from_data>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
to_node: &QueryNode,
|
||||
from_node_data: &'from_data Self::BuildVisitedFromNode,
|
||||
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> {
|
||||
build::visit_to_node(index, txn, db_cache, to_node, from_node_data)
|
||||
build::visit_to_node(ctx, to_node, from_node_data)
|
||||
}
|
||||
|
||||
fn log_state(
|
||||
|
@ -1,23 +1,18 @@
|
||||
#![allow(clippy::too_many_arguments)]
|
||||
|
||||
use heed::RoTxn;
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
|
||||
use super::edge_docids_cache::EdgeDocidsCache;
|
||||
use super::empty_paths_cache::EmptyPathsCache;
|
||||
|
||||
use super::{RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::new::db_cache::DatabaseCache;
|
||||
|
||||
use crate::new::BitmapOrAllRef;
|
||||
use crate::{Index, Result};
|
||||
use crate::new::{BitmapOrAllRef, SearchContext};
|
||||
use crate::Result;
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
|
||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
pub fn resolve_paths<'transaction>(
|
||||
// TODO: reduce the universe after computing each path
|
||||
// TODO: deserialize roaring bitmap within a universe
|
||||
pub fn resolve_paths<'search>(
|
||||
&mut self,
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
ctx: &mut SearchContext<'search>,
|
||||
edge_docids_cache: &mut EdgeDocidsCache<G>,
|
||||
empty_paths_cache: &mut EmptyPathsCache,
|
||||
universe: &RoaringBitmap,
|
||||
@ -52,8 +47,8 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
let mut cached_edge_docids = vec![];
|
||||
'edge_loop: for edge_index in edge_indexes {
|
||||
visited_edges.push(edge_index);
|
||||
let edge_docids = edge_docids_cache
|
||||
.get_edge_docids(index, txn, db_cache, edge_index, self, universe)?;
|
||||
let edge_docids =
|
||||
edge_docids_cache.get_edge_docids(ctx, edge_index, self, universe)?;
|
||||
match edge_docids {
|
||||
BitmapOrAllRef::Bitmap(edge_docids) => {
|
||||
cached_edge_docids.push((edge_index, edge_docids.clone()));
|
||||
|
@ -1,19 +1,17 @@
|
||||
use heed::{BytesDecode, RoTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::empty_paths_cache::EmptyPathsCache;
|
||||
|
||||
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::new::db_cache::DatabaseCache;
|
||||
use crate::new::interner::Interned;
|
||||
use crate::new::logger::SearchLogger;
|
||||
use crate::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations};
|
||||
use crate::new::resolve_query_graph::resolve_phrase;
|
||||
use crate::new::{QueryGraph, QueryNode};
|
||||
use crate::{Index, Result, RoaringBitmapCodec};
|
||||
use crate::new::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::{Result, RoaringBitmapCodec};
|
||||
use heed::BytesDecode;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum TypoEdge {
|
||||
Phrase { phrase: Phrase },
|
||||
Phrase { phrase: Interned<Phrase> },
|
||||
Word { derivations: WordDerivations, nbr_typos: u8 },
|
||||
}
|
||||
|
||||
@ -30,14 +28,12 @@ impl RankingRuleGraphTrait for TypoGraph {
|
||||
}
|
||||
}
|
||||
|
||||
fn compute_docids<'db_cache, 'transaction>(
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
fn compute_docids<'db_cache, 'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
edge: &Self::EdgeDetails,
|
||||
) -> Result<RoaringBitmap> {
|
||||
match edge {
|
||||
TypoEdge::Phrase { phrase } => resolve_phrase(index, txn, db_cache, phrase),
|
||||
TypoEdge::Phrase { phrase } => resolve_phrase(ctx, *phrase),
|
||||
TypoEdge::Word { derivations, nbr_typos } => {
|
||||
let words = match nbr_typos {
|
||||
0 => &derivations.zero_typo,
|
||||
@ -46,16 +42,14 @@ impl RankingRuleGraphTrait for TypoGraph {
|
||||
_ => panic!(),
|
||||
};
|
||||
let mut docids = RoaringBitmap::new();
|
||||
for word in words.iter() {
|
||||
let Some(bytes) = db_cache.get_word_docids(index, txn, word)? else { continue };
|
||||
for word in words.iter().copied() {
|
||||
let Some(bytes) = ctx.get_word_docids(word)? else { continue };
|
||||
let bitmap =
|
||||
RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding)?;
|
||||
docids |= bitmap;
|
||||
}
|
||||
if *nbr_typos == 0 {
|
||||
if let Some(bytes) =
|
||||
db_cache.get_prefix_docids(index, txn, &derivations.original)?
|
||||
{
|
||||
if let Some(bytes) = ctx.get_prefix_docids(derivations.original)? {
|
||||
let bitmap =
|
||||
RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding)?;
|
||||
docids |= bitmap;
|
||||
@ -66,26 +60,22 @@ impl RankingRuleGraphTrait for TypoGraph {
|
||||
}
|
||||
}
|
||||
|
||||
fn build_visit_from_node<'transaction>(
|
||||
_index: &Index,
|
||||
_txn: &'transaction RoTxn,
|
||||
_db_cache: &mut DatabaseCache<'transaction>,
|
||||
fn build_visit_from_node<'search>(
|
||||
_ctx: &mut SearchContext<'search>,
|
||||
_from_node: &QueryNode,
|
||||
) -> Result<Option<Self::BuildVisitedFromNode>> {
|
||||
Ok(Some(()))
|
||||
}
|
||||
|
||||
fn build_visit_to_node<'from_data, 'transaction: 'from_data>(
|
||||
_index: &Index,
|
||||
_txn: &'transaction RoTxn,
|
||||
_db_cache: &mut DatabaseCache<'transaction>,
|
||||
fn build_visit_to_node<'from_data, 'search: 'from_data>(
|
||||
_ctx: &mut SearchContext<'search>,
|
||||
to_node: &QueryNode,
|
||||
_from_node_data: &'from_data Self::BuildVisitedFromNode,
|
||||
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> {
|
||||
match to_node {
|
||||
QueryNode::Term(LocatedQueryTerm { value, .. }) => match value {
|
||||
QueryTerm::Phrase { phrase } => {
|
||||
Ok(vec![(0, EdgeDetails::Data(TypoEdge::Phrase { phrase: phrase.clone() }))])
|
||||
&QueryTerm::Phrase { phrase } => {
|
||||
Ok(vec![(0, EdgeDetails::Data(TypoEdge::Phrase { phrase }))])
|
||||
}
|
||||
QueryTerm::Word { derivations } => {
|
||||
let mut edges = vec![];
|
||||
|
Reference in New Issue
Block a user