Add distinct_fid function and expose distinct_single_docid

This commit is contained in:
Louis Dureuil 2025-05-28 17:57:31 +02:00
parent 3c13feebf7
commit fd4b192a39
No known key found for this signature in database
3 changed files with 22 additions and 14 deletions

View File

@ -4,7 +4,9 @@ use super::logger::SearchLogger;
use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait};
use super::SearchContext;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput};
use crate::search::new::distinct::{
apply_distinct_rule, distinct_fid, distinct_single_docid, DistinctOutput,
};
use crate::{Result, TimeBudget};
pub struct BucketSortOutput {
@ -35,16 +37,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
logger.ranking_rules(&ranking_rules);
logger.initial_universe(universe);
let distinct_field = match distinct {
Some(distinct) => Some(distinct),
None => ctx.index.distinct_field(ctx.txn)?,
};
let distinct_fid = if let Some(field) = distinct_field {
ctx.index.fields_ids_map(ctx.txn)?.id(field)
} else {
None
};
let distinct_fid = distinct_fid(distinct, ctx.index, ctx.txn)?;
if universe.len() < from as u64 {
return Ok(BucketSortOutput {

View File

@ -9,7 +9,7 @@ use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetCodec,
};
use crate::heed_codec::BytesRefCodec;
use crate::{Index, Result, SearchContext};
use crate::{FieldId, Index, Result, SearchContext};
pub struct DistinctOutput {
pub remaining: RoaringBitmap,
@ -121,3 +121,18 @@ pub fn facet_string_values<'a>(
fn facet_values_prefix_key(distinct: u16, id: u32) -> [u8; FID_SIZE + DOCID_SIZE] {
concat_arrays::concat_arrays!(distinct.to_be_bytes(), id.to_be_bytes())
}
pub fn distinct_fid(
query_distinct_field: Option<&str>,
index: &Index,
rtxn: &RoTxn<'_>,
) -> Result<Option<FieldId>> {
let distinct_field = match query_distinct_field {
Some(distinct) => Some(distinct),
None => index.distinct_field(rtxn)?,
};
let distinct_fid =
if let Some(field) = distinct_field { index.fields_ids_map(rtxn)?.id(field) } else { None };
Ok(distinct_fid)
}

View File

@ -28,6 +28,7 @@ use std::time::Duration;
use bucket_sort::{bucket_sort, BucketSortOutput};
use charabia::{Language, TokenizerBuilder};
use db_cache::DatabaseCache;
pub use distinct::{distinct_fid, distinct_single_docid};
use exact_attribute::ExactAttribute;
use graph_based_ranking_rule::{Exactness, Fid, Position, Proximity, Typo};
use heed::RoTxn;
@ -47,8 +48,7 @@ use sort::Sort;
use self::distinct::facet_string_values;
use self::geo_sort::GeoSort;
pub use self::geo_sort::Parameter as GeoSortParameter;
pub use self::geo_sort::Strategy as GeoSortStrategy;
pub use self::geo_sort::{Parameter as GeoSortParameter, Strategy as GeoSortStrategy};
use self::graph_based_ranking_rule::Words;
use self::interner::Interned;
use self::vector_sort::VectorSort;