mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-26 15:59:10 +00:00
Compare commits
6 Commits
prototype-
...
lazy-word-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6a5a834f27 | ||
|
|
418fa47963 | ||
|
|
0656a0d515 | ||
|
|
f9807ba32e | ||
|
|
8c8cc59a6c | ||
|
|
f540a69ac3 |
@@ -173,16 +173,18 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
ranking_rule_scores.push(ScoreDetails::Skipped);
|
||||
|
||||
// remove candidates from the universe without adding them to result if their score is below the threshold
|
||||
if let Some(ranking_score_threshold) = ranking_score_threshold {
|
||||
let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
|
||||
if current_score < ranking_score_threshold {
|
||||
all_candidates -= bucket | &ranking_rule_universes[cur_ranking_rule_index];
|
||||
back!();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
let is_below_threshold =
|
||||
ranking_score_threshold.is_some_and(|ranking_score_threshold| {
|
||||
let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
|
||||
current_score < ranking_score_threshold
|
||||
});
|
||||
|
||||
maybe_add_to_results!(bucket);
|
||||
if is_below_threshold {
|
||||
all_candidates -= &bucket;
|
||||
all_candidates -= &ranking_rule_universes[cur_ranking_rule_index];
|
||||
} else {
|
||||
maybe_add_to_results!(bucket);
|
||||
}
|
||||
|
||||
ranking_rule_scores.pop();
|
||||
|
||||
@@ -237,23 +239,24 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
);
|
||||
|
||||
// remove candidates from the universe without adding them to result if their score is below the threshold
|
||||
if let Some(ranking_score_threshold) = ranking_score_threshold {
|
||||
let is_below_threshold = ranking_score_threshold.is_some_and(|ranking_score_threshold| {
|
||||
let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
|
||||
if current_score < ranking_score_threshold {
|
||||
all_candidates -=
|
||||
next_bucket.candidates | &ranking_rule_universes[cur_ranking_rule_index];
|
||||
back!();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
current_score < ranking_score_threshold
|
||||
});
|
||||
|
||||
ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates;
|
||||
|
||||
if cur_ranking_rule_index == ranking_rules_len - 1
|
||||
|| (scoring_strategy == ScoringStrategy::Skip && next_bucket.candidates.len() <= 1)
|
||||
|| cur_offset + (next_bucket.candidates.len() as usize) < from
|
||||
|| is_below_threshold
|
||||
{
|
||||
maybe_add_to_results!(next_bucket.candidates);
|
||||
if is_below_threshold {
|
||||
all_candidates -= &next_bucket.candidates;
|
||||
all_candidates -= &ranking_rule_universes[cur_ranking_rule_index];
|
||||
} else {
|
||||
maybe_add_to_results!(next_bucket.candidates);
|
||||
}
|
||||
ranking_rule_scores.pop();
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -144,7 +144,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
|
||||
let mut merger_iter = builder.build().into_stream_merger_iter()?;
|
||||
let mut current_field_id = None;
|
||||
let mut fst;
|
||||
let mut fst_merger_builder: Option<FstMergerBuilder> = None;
|
||||
let mut fst_merger_builder: Option<FstMergerBuilder<_>> = None;
|
||||
while let Some((key, deladd)) = merger_iter.next()? {
|
||||
let (field_id, normalized_facet_string) =
|
||||
BEU16StrCodec::bytes_decode(key).map_err(heed::Error::Encoding)?;
|
||||
@@ -153,12 +153,13 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
|
||||
if let (Some(current_field_id), Some(fst_merger_builder)) =
|
||||
(current_field_id, fst_merger_builder)
|
||||
{
|
||||
let mmap = fst_merger_builder.build(&mut callback)?;
|
||||
index.facet_id_string_fst.remap_data_type::<Bytes>().put(
|
||||
wtxn,
|
||||
¤t_field_id,
|
||||
&mmap,
|
||||
)?;
|
||||
if let Some(mmap) = fst_merger_builder.build(&mut callback)? {
|
||||
index.facet_id_string_fst.remap_data_type::<Bytes>().put(
|
||||
wtxn,
|
||||
¤t_field_id,
|
||||
&mmap,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
fst = index.facet_id_string_fst.get(rtxn, &field_id)?;
|
||||
@@ -209,8 +210,9 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
|
||||
}
|
||||
|
||||
if let (Some(field_id), Some(fst_merger_builder)) = (current_field_id, fst_merger_builder) {
|
||||
let mmap = fst_merger_builder.build(&mut callback)?;
|
||||
index.facet_id_string_fst.remap_data_type::<Bytes>().put(wtxn, &field_id, &mmap)?;
|
||||
if let Some(mmap) = fst_merger_builder.build(&mut callback)? {
|
||||
index.facet_id_string_fst.remap_data_type::<Bytes>().put(wtxn, &field_id, &mmap)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -1,25 +1,27 @@
|
||||
use std::fs::File;
|
||||
use std::io::BufWriter;
|
||||
|
||||
use fst::{Set, SetBuilder, Streamer};
|
||||
use fst::{IntoStreamer, Set, SetBuilder, Streamer};
|
||||
use memmap2::Mmap;
|
||||
use tempfile::tempfile;
|
||||
|
||||
use crate::update::del_add::DelAdd;
|
||||
use crate::{InternalError, Result};
|
||||
|
||||
pub struct FstMergerBuilder<'a> {
|
||||
pub struct FstMergerBuilder<'a, D: AsRef<[u8]>> {
|
||||
fst: Option<&'a Set<D>>,
|
||||
stream: Option<fst::set::Stream<'a>>,
|
||||
fst_builder: SetBuilder<BufWriter<File>>,
|
||||
fst_builder: Option<SetBuilder<BufWriter<File>>>,
|
||||
last: Option<Vec<u8>>,
|
||||
inserted_words: usize,
|
||||
}
|
||||
|
||||
impl<'a> FstMergerBuilder<'a> {
|
||||
pub fn new<D: AsRef<[u8]>>(fst: Option<&'a Set<D>>) -> Result<Self> {
|
||||
impl<'a, D: AsRef<[u8]>> FstMergerBuilder<'a, D> {
|
||||
pub fn new(fst: Option<&'a Set<D>>) -> Result<Self> {
|
||||
Ok(Self {
|
||||
fst,
|
||||
stream: fst.map(|fst| fst.stream()),
|
||||
fst_builder: SetBuilder::new(BufWriter::new(tempfile()?))?,
|
||||
fst_builder: None,
|
||||
last: None,
|
||||
inserted_words: 0,
|
||||
})
|
||||
@@ -110,11 +112,17 @@ impl<'a> FstMergerBuilder<'a> {
|
||||
is_modified: bool,
|
||||
insertion_callback: &mut impl FnMut(&[u8], DelAdd, bool) -> Result<()>,
|
||||
) -> Result<()> {
|
||||
// Addition: We insert the word
|
||||
// Deletion: We delete the word by not inserting it
|
||||
if deladd == DelAdd::Addition {
|
||||
self.inserted_words += 1;
|
||||
self.fst_builder.insert(bytes)?;
|
||||
if is_modified && self.fst_builder.is_none() {
|
||||
self.build_new_fst(bytes)?;
|
||||
}
|
||||
|
||||
if let Some(fst_builder) = self.fst_builder.as_mut() {
|
||||
// Addition: We insert the word
|
||||
// Deletion: We delete the word by not inserting it
|
||||
if deladd == DelAdd::Addition {
|
||||
self.inserted_words += 1;
|
||||
fst_builder.insert(bytes)?;
|
||||
}
|
||||
}
|
||||
|
||||
insertion_callback(bytes, deladd, is_modified)?;
|
||||
@@ -122,6 +130,19 @@ impl<'a> FstMergerBuilder<'a> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Lazily build the new fst
|
||||
fn build_new_fst(&mut self, bytes: &[u8]) -> Result<()> {
|
||||
let mut fst_builder = SetBuilder::new(BufWriter::new(tempfile()?))?;
|
||||
|
||||
if let Some(fst) = self.fst {
|
||||
fst_builder.extend_stream(fst.range().lt(bytes).into_stream())?;
|
||||
}
|
||||
|
||||
self.fst_builder = Some(fst_builder);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn drain_stream(
|
||||
&mut self,
|
||||
insertion_callback: &mut impl FnMut(&[u8], DelAdd, bool) -> Result<()>,
|
||||
@@ -142,16 +163,20 @@ impl<'a> FstMergerBuilder<'a> {
|
||||
pub fn build(
|
||||
mut self,
|
||||
insertion_callback: &mut impl FnMut(&[u8], DelAdd, bool) -> Result<()>,
|
||||
) -> Result<Mmap> {
|
||||
) -> Result<Option<Mmap>> {
|
||||
self.drain_stream(insertion_callback)?;
|
||||
|
||||
let fst_file = self
|
||||
.fst_builder
|
||||
.into_inner()?
|
||||
.into_inner()
|
||||
.map_err(|_| InternalError::IndexingMergingKeys { process: "building-fst" })?;
|
||||
let fst_mmap = unsafe { Mmap::map(&fst_file)? };
|
||||
match self.fst_builder {
|
||||
Some(fst_builder) => {
|
||||
let fst_file = fst_builder
|
||||
.into_inner()?
|
||||
.into_inner()
|
||||
.map_err(|_| InternalError::IndexingMergingKeys { process: "building-fst" })?;
|
||||
let fst_mmap = unsafe { Mmap::map(&fst_file)? };
|
||||
|
||||
Ok(fst_mmap)
|
||||
Ok(Some(fst_mmap))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -118,7 +118,9 @@ fn compute_word_fst(
|
||||
}
|
||||
|
||||
let (word_fst_mmap, prefix_data) = word_fst_builder.build(index, &rtxn)?;
|
||||
index.main.remap_types::<Str, Bytes>().put(wtxn, WORDS_FST_KEY, &word_fst_mmap)?;
|
||||
if let Some(word_fst_mmap) = word_fst_mmap {
|
||||
index.main.remap_types::<Str, Bytes>().put(wtxn, WORDS_FST_KEY, &word_fst_mmap)?;
|
||||
}
|
||||
if let Some(PrefixData { prefixes_fst_mmap, prefix_delta }) = prefix_data {
|
||||
index.main.remap_types::<Str, Bytes>().put(
|
||||
wtxn,
|
||||
|
||||
@@ -10,14 +10,14 @@ use crate::index::PrefixSettings;
|
||||
use crate::update::del_add::DelAdd;
|
||||
use crate::{InternalError, Prefix, Result};
|
||||
|
||||
pub struct WordFstBuilder<'a> {
|
||||
word_fst_builder: FstMergerBuilder<'a>,
|
||||
pub struct WordFstBuilder<'a, D: AsRef<[u8]>> {
|
||||
word_fst_builder: FstMergerBuilder<'a, D>,
|
||||
prefix_fst_builder: Option<PrefixFstBuilder>,
|
||||
registered_words: usize,
|
||||
}
|
||||
|
||||
impl<'a> WordFstBuilder<'a> {
|
||||
pub fn new(words_fst: &'a Set<std::borrow::Cow<'a, [u8]>>) -> Result<Self> {
|
||||
impl<'a, D: AsRef<[u8]>> WordFstBuilder<'a, D> {
|
||||
pub fn new(words_fst: &'a Set<D>) -> Result<Self> {
|
||||
Ok(Self {
|
||||
word_fst_builder: FstMergerBuilder::new(Some(words_fst))?,
|
||||
prefix_fst_builder: None,
|
||||
@@ -50,7 +50,7 @@ impl<'a> WordFstBuilder<'a> {
|
||||
mut self,
|
||||
index: &crate::Index,
|
||||
rtxn: &heed::RoTxn,
|
||||
) -> Result<(Mmap, Option<PrefixData>)> {
|
||||
) -> Result<(Option<Mmap>, Option<PrefixData>)> {
|
||||
let words_fst_mmap = self.word_fst_builder.build(&mut |bytes, deladd, is_modified| {
|
||||
if let Some(prefix_fst_builder) = &mut self.prefix_fst_builder {
|
||||
prefix_fst_builder.insert_word(bytes, deladd, is_modified)
|
||||
|
||||
Reference in New Issue
Block a user