Merge branch 'main' into release-v1.14.0-tmp

This commit is contained in:
Tamo
2025-04-14 12:35:47 +02:00
committed by GitHub
113 changed files with 1268 additions and 852 deletions

View File

@ -537,7 +537,7 @@ impl<'ctx> SearchContext<'ctx> {
fid: u16,
) -> Result<Option<RoaringBitmap>> {
// if the requested fid isn't in the restricted list, return None.
if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
if self.restricted_fids.as_ref().is_some_and(|fids| !fids.contains(&fid)) {
return Ok(None);
}
@ -558,7 +558,7 @@ impl<'ctx> SearchContext<'ctx> {
fid: u16,
) -> Result<Option<RoaringBitmap>> {
// if the requested fid isn't in the restricted list, return None.
if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
if self.restricted_fids.as_ref().is_some_and(|fids| !fids.contains(&fid)) {
return Ok(None);
}

View File

@ -72,7 +72,7 @@ pub fn find_best_match_interval(matches: &[Match], crop_size: usize) -> [&Match;
let interval_score = get_interval_score(&matches[interval_first..=interval_last]);
let is_interval_score_better = &best_interval
.as_ref()
.map_or(true, |MatchIntervalWithScore { score, .. }| interval_score > *score);
.is_none_or(|MatchIntervalWithScore { score, .. }| interval_score > *score);
if *is_interval_score_better {
best_interval = Some(MatchIntervalWithScore {

View File

@ -8,6 +8,7 @@ use std::cmp::{max, min};
use charabia::{Language, SeparatorKind, Token, Tokenizer};
use either::Either;
use itertools::Itertools;
pub use matching_words::MatchingWords;
use matching_words::{MatchType, PartialMatch};
use r#match::{Match, MatchPosition};
@ -122,7 +123,7 @@ pub struct Matcher<'t, 'tokenizer, 'b, 'lang> {
matches: Option<(Vec<Token<'t>>, Vec<Match>)>,
}
impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
impl<'t> Matcher<'t, '_, '_, '_> {
/// Iterates over tokens and save any of them that matches the query.
fn compute_matches(&mut self) -> &mut Self {
/// some words are counted as matches only if they are close together and in the good order,
@ -229,8 +230,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
.iter()
.map(|m| MatchBounds {
start: tokens[m.get_first_token_pos()].byte_start,
// TODO: Why is this in chars, while start is in bytes?
length: m.char_count,
length: self.calc_byte_length(tokens, m),
indices: if array_indices.is_empty() {
None
} else {
@ -241,6 +241,18 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
}
}
fn calc_byte_length(&self, tokens: &[Token<'t>], m: &Match) -> usize {
(m.get_first_token_pos()..=m.get_last_token_pos())
.flat_map(|i| match &tokens[i].char_map {
Some(char_map) => {
char_map.iter().map(|(original, _)| *original as usize).collect_vec()
}
None => tokens[i].lemma().chars().map(|c| c.len_utf8()).collect_vec(),
})
.take(m.char_count)
.sum()
}
/// Returns the bounds in byte index of the crop window.
fn crop_bounds(&self, tokens: &[Token<'_>], matches: &[Match], crop_size: usize) -> [usize; 2] {
let (

View File

@ -327,7 +327,7 @@ impl QueryGraph {
let mut peekable = term_with_frequency.into_iter().peekable();
while let Some((idx, frequency)) = peekable.next() {
term_weight.insert(idx, weight);
if peekable.peek().map_or(false, |(_, f)| frequency != *f) {
if peekable.peek().is_some_and(|(_, f)| frequency != *f) {
weight += 1;
}
}

View File

@ -398,7 +398,7 @@ fn split_best_frequency(
let right = ctx.word_interner.insert(right.to_owned());
if let Some(frequency) = ctx.get_db_word_pair_proximity_docids_len(None, left, right, 1)? {
if best.map_or(true, |(old, _, _)| frequency > old) {
if best.is_none_or(|(old, _, _)| frequency > old) {
best = Some((frequency, left, right));
}
}

View File

@ -203,7 +203,7 @@ pub fn number_of_typos_allowed<'ctx>(
Ok(Box::new(move |word: &str| {
if !authorize_typos
|| word.len() < min_len_one_typo as usize
|| exact_words.as_ref().map_or(false, |fst| fst.contains(word))
|| exact_words.as_ref().is_some_and(|fst| fst.contains(word))
{
0
} else if word.len() < min_len_two_typos as usize {

View File

@ -17,7 +17,7 @@ use crate::Result;
pub struct PhraseDocIdsCache {
pub cache: FxHashMap<Interned<Phrase>, RoaringBitmap>,
}
impl<'ctx> SearchContext<'ctx> {
impl SearchContext<'_> {
/// Get the document ids associated with the given phrase
pub fn get_phrase_docids(&mut self, phrase: Interned<Phrase>) -> Result<&RoaringBitmap> {
if self.phrase_docids.cache.contains_key(&phrase) {

View File

@ -263,7 +263,7 @@ impl SmallBitmapInternal {
pub fn contains(&self, x: u16) -> bool {
let (set, x) = self.get_set_index(x);
set & 0b1 << x != 0
set & (0b1 << x) != 0
}
pub fn insert(&mut self, x: u16) {
@ -381,7 +381,7 @@ pub enum SmallBitmapInternalIter<'b> {
Tiny(u64),
Small { cur: u64, next: &'b [u64], base: u16 },
}
impl<'b> Iterator for SmallBitmapInternalIter<'b> {
impl Iterator for SmallBitmapInternalIter<'_> {
type Item = u16;
fn next(&mut self) -> Option<Self::Item> {