mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 01:46:28 +00:00 
			
		
		
		
	Exact attribute with state
This commit is contained in:
		@@ -1,5 +1,5 @@
 | 
			
		||||
use heed::BytesDecode;
 | 
			
		||||
use roaring::MultiOps;
 | 
			
		||||
use roaring::{MultiOps, RoaringBitmap};
 | 
			
		||||
 | 
			
		||||
use super::query_graph::QueryGraph;
 | 
			
		||||
use super::ranking_rules::{RankingRule, RankingRuleOutput};
 | 
			
		||||
@@ -7,19 +7,18 @@ use crate::search::new::query_graph::QueryNodeData;
 | 
			
		||||
use crate::search::new::query_term::ExactTerm;
 | 
			
		||||
use crate::{CboRoaringBitmapCodec, Result, SearchContext, SearchLogger};
 | 
			
		||||
 | 
			
		||||
/// FIXME:
 | 
			
		||||
/// A ranking rule that produces 3 disjoint buckets:
 | 
			
		||||
///
 | 
			
		||||
/// - A lot of work done in next_bucket that start_iteration could do.
 | 
			
		||||
/// - Consider calling the graph based rule directly from this one.
 | 
			
		||||
/// - currently we did exact term, don't forget about prefix
 | 
			
		||||
/// - some tests
 | 
			
		||||
/// 1. Documents from the universe whose value is exactly the query.
 | 
			
		||||
/// 2. Documents from the universe not in (1) whose value starts with the query.
 | 
			
		||||
/// 3. Documents from the universe not in (1) or (2).
 | 
			
		||||
pub struct ExactAttribute {
 | 
			
		||||
    query_graph: Option<QueryGraph>,
 | 
			
		||||
    state: State,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl ExactAttribute {
 | 
			
		||||
    pub fn new() -> Self {
 | 
			
		||||
        Self { query_graph: None }
 | 
			
		||||
        Self { state: Default::default() }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -30,23 +29,69 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
 | 
			
		||||
 | 
			
		||||
    fn start_iteration(
 | 
			
		||||
        &mut self,
 | 
			
		||||
        _ctx: &mut SearchContext<'ctx>,
 | 
			
		||||
        ctx: &mut SearchContext<'ctx>,
 | 
			
		||||
        _logger: &mut dyn SearchLogger<QueryGraph>,
 | 
			
		||||
        _universe: &roaring::RoaringBitmap,
 | 
			
		||||
        universe: &roaring::RoaringBitmap,
 | 
			
		||||
        query: &QueryGraph,
 | 
			
		||||
    ) -> Result<()> {
 | 
			
		||||
        self.query_graph = Some(query.clone());
 | 
			
		||||
        self.state = State::start_iteration(ctx, universe, query)?;
 | 
			
		||||
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fn next_bucket(
 | 
			
		||||
        &mut self,
 | 
			
		||||
        ctx: &mut SearchContext<'ctx>,
 | 
			
		||||
        _ctx: &mut SearchContext<'ctx>,
 | 
			
		||||
        _logger: &mut dyn SearchLogger<QueryGraph>,
 | 
			
		||||
        universe: &roaring::RoaringBitmap,
 | 
			
		||||
    ) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
 | 
			
		||||
        // iterate on the nodes of the graph, retain LocatedQueryTermSubset
 | 
			
		||||
        let query_graph = self.query_graph.as_ref().unwrap();
 | 
			
		||||
        let state = std::mem::take(&mut self.state);
 | 
			
		||||
        let (state, output) = State::next(state, universe);
 | 
			
		||||
        self.state = state;
 | 
			
		||||
 | 
			
		||||
        Ok(output)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fn end_iteration(
 | 
			
		||||
        &mut self,
 | 
			
		||||
        _ctx: &mut SearchContext<'ctx>,
 | 
			
		||||
        _logger: &mut dyn SearchLogger<QueryGraph>,
 | 
			
		||||
    ) {
 | 
			
		||||
        self.state = Default::default();
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Inner state of the ranking rule.
 | 
			
		||||
#[derive(Default)]
 | 
			
		||||
enum State {
 | 
			
		||||
    /// State between two iterations
 | 
			
		||||
    #[default]
 | 
			
		||||
    Uninitialized,
 | 
			
		||||
    /// The next call to `next` will output the documents in the universe that have an attribute that is the exact query
 | 
			
		||||
    ExactAttribute(QueryGraph, Vec<FieldCandidates>),
 | 
			
		||||
    /// The next call to `next` will output the documents in the universe that have an attribute that starts with the exact query,
 | 
			
		||||
    /// but isn't the exact query.
 | 
			
		||||
    AttributeStarts(QueryGraph, Vec<FieldCandidates>),
 | 
			
		||||
    /// The next calls to `next` will output the input universe.
 | 
			
		||||
    Empty(QueryGraph),
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// The candidates sorted by attributes
 | 
			
		||||
///
 | 
			
		||||
/// Each of the bitmap in a single `FieldCandidates` struct applies to the same field.
 | 
			
		||||
struct FieldCandidates {
 | 
			
		||||
    /// The candidates that start with all the words of the query in the field
 | 
			
		||||
    start_with_exact: RoaringBitmap,
 | 
			
		||||
    /// The candidates that have the same number of words as the query in the field
 | 
			
		||||
    exact_word_count: RoaringBitmap,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl State {
 | 
			
		||||
    fn start_iteration(
 | 
			
		||||
        ctx: &mut SearchContext<'_>,
 | 
			
		||||
        universe: &RoaringBitmap,
 | 
			
		||||
        query_graph: &QueryGraph,
 | 
			
		||||
    ) -> Result<Self> {
 | 
			
		||||
        let mut exact_term_position_ids: Vec<(ExactTerm, u16, u8)> =
 | 
			
		||||
            Vec::with_capacity(query_graph.nodes.len() as usize);
 | 
			
		||||
        for (_, node) in query_graph.nodes.iter() {
 | 
			
		||||
@@ -55,11 +100,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
 | 
			
		||||
                    let exact_term = if let Some(exact_term) = term.term_subset.exact_term(ctx) {
 | 
			
		||||
                        exact_term
 | 
			
		||||
                    } else {
 | 
			
		||||
                        // FIXME: Use `None` or some function indicating that we're passing down the bucket to our child rules
 | 
			
		||||
                        return Ok(Some(RankingRuleOutput {
 | 
			
		||||
                            query: query_graph.clone(),
 | 
			
		||||
                            candidates: universe.clone(),
 | 
			
		||||
                        }));
 | 
			
		||||
                        continue;
 | 
			
		||||
                    };
 | 
			
		||||
                    exact_term_position_ids.push((
 | 
			
		||||
                        exact_term,
 | 
			
		||||
@@ -73,14 +114,17 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
 | 
			
		||||
 | 
			
		||||
        exact_term_position_ids.sort_by_key(|(_, _, id)| *id);
 | 
			
		||||
        // bail if there is a "hole" (missing word) in remaining query graph
 | 
			
		||||
        if let Some((_, _, first_id)) = exact_term_position_ids.first() {
 | 
			
		||||
            if *first_id != 0 {
 | 
			
		||||
                return Ok(State::Empty(query_graph.clone()));
 | 
			
		||||
            }
 | 
			
		||||
        } else {
 | 
			
		||||
            return Ok(State::Empty(query_graph.clone()));
 | 
			
		||||
        }
 | 
			
		||||
        let mut previous_id = 0;
 | 
			
		||||
        for (_, _, id) in exact_term_position_ids.iter().copied() {
 | 
			
		||||
            if id < previous_id || id - previous_id > 1 {
 | 
			
		||||
                // FIXME: Use `None` or some function indicating that we're passing down the bucket to our child rules
 | 
			
		||||
                return Ok(Some(RankingRuleOutput {
 | 
			
		||||
                    query: query_graph.clone(),
 | 
			
		||||
                    candidates: universe.clone(),
 | 
			
		||||
                }));
 | 
			
		||||
                return Ok(State::Empty(query_graph.clone()));
 | 
			
		||||
            } else {
 | 
			
		||||
                previous_id = id;
 | 
			
		||||
            }
 | 
			
		||||
@@ -102,11 +146,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
 | 
			
		||||
            .collect();
 | 
			
		||||
        for (words, position) in &words_positions {
 | 
			
		||||
            if candidates.is_empty() {
 | 
			
		||||
                // FIXME: Use `None` or some function indicating that we're passing down the bucket to our child rules
 | 
			
		||||
                return Ok(Some(RankingRuleOutput {
 | 
			
		||||
                    query: query_graph.clone(),
 | 
			
		||||
                    candidates: universe.clone(),
 | 
			
		||||
                }));
 | 
			
		||||
                return Ok(State::Empty(query_graph.clone()));
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            'words: for (offset, word) in words.iter().enumerate() {
 | 
			
		||||
@@ -116,8 +156,11 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
 | 
			
		||||
                } else {
 | 
			
		||||
                    continue 'words;
 | 
			
		||||
                };
 | 
			
		||||
                // Note: Since the position is stored bucketed in word_position_docids, for queries with a lot of
 | 
			
		||||
                // longer phrases we'll be losing on precision here.
 | 
			
		||||
                let bucketed_position = crate::bucketed_position(position + offset);
 | 
			
		||||
                let word_position_docids = CboRoaringBitmapCodec::bytes_decode(
 | 
			
		||||
                    ctx.get_db_word_position_docids(*word, position + offset)?.unwrap_or_default(),
 | 
			
		||||
                    ctx.get_db_word_position_docids(*word, bucketed_position)?.unwrap_or_default(),
 | 
			
		||||
                )
 | 
			
		||||
                .unwrap_or_default();
 | 
			
		||||
                candidates &= word_position_docids;
 | 
			
		||||
@@ -127,16 +170,12 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
 | 
			
		||||
        let candidates = candidates;
 | 
			
		||||
 | 
			
		||||
        if candidates.is_empty() {
 | 
			
		||||
            // FIXME: Use `None` or some function indicating that we're passing down the bucket to our child rules
 | 
			
		||||
            return Ok(Some(RankingRuleOutput {
 | 
			
		||||
                query: query_graph.clone(),
 | 
			
		||||
                candidates: universe.clone(),
 | 
			
		||||
            }));
 | 
			
		||||
            return Ok(State::Empty(query_graph.clone()));
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        let searchable_fields_ids = ctx.index.searchable_fields_ids(ctx.txn)?.unwrap_or_default();
 | 
			
		||||
 | 
			
		||||
        let mut candidates_per_attributes = Vec::with_capacity(searchable_fields_ids.len());
 | 
			
		||||
        let mut candidates_per_attribute = Vec::with_capacity(searchable_fields_ids.len());
 | 
			
		||||
 | 
			
		||||
        // then check that there exists at least one attribute that has all of the terms
 | 
			
		||||
        for fid in searchable_fields_ids {
 | 
			
		||||
@@ -156,20 +195,59 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
 | 
			
		||||
            )?;
 | 
			
		||||
            intersection &= &candidates;
 | 
			
		||||
            if !intersection.is_empty() {
 | 
			
		||||
                candidates_per_attributes.push(intersection);
 | 
			
		||||
                let candidates_with_exact_word_count = ctx
 | 
			
		||||
                    .index
 | 
			
		||||
                    .field_id_word_count_docids
 | 
			
		||||
                    .get(ctx.txn, &(fid, exact_term_position_ids.len() as u8))?
 | 
			
		||||
                    .unwrap_or_default();
 | 
			
		||||
                candidates_per_attribute.push(FieldCandidates {
 | 
			
		||||
                    start_with_exact: intersection,
 | 
			
		||||
                    exact_word_count: candidates_with_exact_word_count,
 | 
			
		||||
                });
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        // note we could have "false positives" where there both exist different attributes that collectively
 | 
			
		||||
        // have the terms in the correct order and a single attribute that have all the terms, but in the incorrect order.
 | 
			
		||||
 | 
			
		||||
        let candidates = MultiOps::union(candidates_per_attributes.into_iter());
 | 
			
		||||
        Ok(Some(RankingRuleOutput { query: query_graph.clone(), candidates }))
 | 
			
		||||
        Ok(State::ExactAttribute(query_graph.clone(), candidates_per_attribute))
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fn end_iteration(
 | 
			
		||||
        &mut self,
 | 
			
		||||
        _ctx: &mut SearchContext<'ctx>,
 | 
			
		||||
        _logger: &mut dyn SearchLogger<QueryGraph>,
 | 
			
		||||
    ) {
 | 
			
		||||
    fn next(
 | 
			
		||||
        state: State,
 | 
			
		||||
        universe: &RoaringBitmap,
 | 
			
		||||
    ) -> (State, Option<RankingRuleOutput<QueryGraph>>) {
 | 
			
		||||
        let (state, output) = match state {
 | 
			
		||||
            State::Uninitialized => (state, None),
 | 
			
		||||
            State::ExactAttribute(query_graph, candidates_per_attribute) => {
 | 
			
		||||
                let mut candidates = MultiOps::union(candidates_per_attribute.iter().map(
 | 
			
		||||
                    |FieldCandidates { start_with_exact, exact_word_count }| {
 | 
			
		||||
                        start_with_exact & exact_word_count
 | 
			
		||||
                    },
 | 
			
		||||
                ));
 | 
			
		||||
                candidates &= universe;
 | 
			
		||||
                (
 | 
			
		||||
                    State::AttributeStarts(query_graph.clone(), candidates_per_attribute),
 | 
			
		||||
                    Some(RankingRuleOutput { query: query_graph, candidates }),
 | 
			
		||||
                )
 | 
			
		||||
            }
 | 
			
		||||
            State::AttributeStarts(query_graph, candidates_per_attribute) => {
 | 
			
		||||
                let mut candidates = MultiOps::union(candidates_per_attribute.into_iter().map(
 | 
			
		||||
                    |FieldCandidates { mut start_with_exact, exact_word_count }| {
 | 
			
		||||
                        start_with_exact -= exact_word_count;
 | 
			
		||||
                        start_with_exact
 | 
			
		||||
                    },
 | 
			
		||||
                ));
 | 
			
		||||
                candidates &= universe;
 | 
			
		||||
                (
 | 
			
		||||
                    State::Empty(query_graph.clone()),
 | 
			
		||||
                    Some(RankingRuleOutput { query: query_graph, candidates }),
 | 
			
		||||
                )
 | 
			
		||||
            }
 | 
			
		||||
            State::Empty(query_graph) => (
 | 
			
		||||
                State::Empty(query_graph.clone()),
 | 
			
		||||
                Some(RankingRuleOutput { query: query_graph, candidates: universe.clone() }),
 | 
			
		||||
            ),
 | 
			
		||||
        };
 | 
			
		||||
        (state, output)
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user