Various changes

- DistributionShift in Search object (to be set from model in embed?)
- Fix issue where embedder index wasn't computed at search time
- Accept as default embedder either the "default" one, or the only embedder when there is only one
This commit is contained in:
Louis Dureuil
2023-12-13 15:38:44 +01:00
parent 12940d79a9
commit e0cc775dc4
12 changed files with 141 additions and 33 deletions

View File

@ -266,6 +266,7 @@ fn get_ranking_rules_for_vector<'ctx>(
limit_plus_offset: usize,
target: &[f32],
distribution_shift: Option<DistributionShift>,
embedder_name: &str,
) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
// query graph search
@ -292,6 +293,7 @@ fn get_ranking_rules_for_vector<'ctx>(
vector_candidates,
limit_plus_offset,
distribution_shift,
embedder_name,
)?;
ranking_rules.push(Box::new(vector_sort));
vector = true;
@ -513,6 +515,8 @@ pub fn execute_vector_search(
geo_strategy: geo_sort::Strategy,
from: usize,
length: usize,
distribution_shift: Option<DistributionShift>,
embedder_name: &str,
) -> Result<PartialSearchResult> {
check_sort_criteria(ctx, sort_criteria.as_ref())?;
@ -524,7 +528,8 @@ pub fn execute_vector_search(
geo_strategy,
from + length,
vector,
None,
distribution_shift,
embedder_name,
)?;
let mut placeholder_search_logger = logger::DefaultSearchLogger;

View File

@ -15,16 +15,21 @@ pub struct VectorSort<Q: RankingRuleQueryTrait> {
cached_sorted_docids: std::vec::IntoIter<(DocumentId, f32, Vec<f32>)>,
limit: usize,
distribution_shift: Option<DistributionShift>,
embedder_index: u8,
}
impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
pub fn new(
_ctx: &SearchContext,
ctx: &SearchContext,
target: Vec<f32>,
vector_candidates: RoaringBitmap,
limit: usize,
distribution_shift: Option<DistributionShift>,
embedder_name: &str,
) -> Result<Self> {
/// FIXME: unwrap
let embedder_index = ctx.index.embedder_category_id.get(ctx.txn, embedder_name)?.unwrap();
Ok(Self {
query: None,
target,
@ -32,6 +37,7 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
cached_sorted_docids: Default::default(),
limit,
distribution_shift,
embedder_index,
})
}
@ -40,9 +46,10 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
ctx: &mut SearchContext<'_>,
vector_candidates: &RoaringBitmap,
) -> Result<()> {
let writer_index = (self.embedder_index as u16) << 8;
let readers: std::result::Result<Vec<_>, _> = (0..=u8::MAX)
.map_while(|k| {
arroy::Reader::open(ctx.txn, k.into(), ctx.index.vector_arroy)
arroy::Reader::open(ctx.txn, writer_index | (k as u16), ctx.index.vector_arroy)
.map(Some)
.or_else(|e| match e {
arroy::Error::MissingMetadata => Ok(None),