mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-23 13:16:27 +00:00
word prox: Remove again the interim BVec
This commit is contained in:
@ -42,8 +42,6 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
||||
let rtxn = &context.rtxn;
|
||||
|
||||
let mut key_buffer = bumpalo::collections::Vec::new_in(doc_alloc);
|
||||
let mut del_word_pair_proximity = bumpalo::collections::Vec::new_in(doc_alloc);
|
||||
let mut add_word_pair_proximity = bumpalo::collections::Vec::new_in(doc_alloc);
|
||||
|
||||
let mut new_fields_ids_map = context.new_fields_ids_map.borrow_mut_or_yield();
|
||||
let new_fields_ids_map = &mut *new_fields_ids_map;
|
||||
@ -65,7 +63,8 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
||||
new_fields_ids_map,
|
||||
&mut word_positions,
|
||||
&mut |(w1, w2), prox| {
|
||||
del_word_pair_proximity.push(((w1, w2), prox));
|
||||
let key = build_key(prox, &w1, &w2, &mut key_buffer);
|
||||
cached_sorter.insert_del_u32(key, docid)
|
||||
},
|
||||
)?;
|
||||
}
|
||||
@ -77,7 +76,8 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
||||
new_fields_ids_map,
|
||||
&mut word_positions,
|
||||
&mut |(w1, w2), prox| {
|
||||
del_word_pair_proximity.push(((w1, w2), prox));
|
||||
let key = build_key(prox, &w1, &w2, &mut key_buffer);
|
||||
cached_sorter.insert_del_u32(key, docid)
|
||||
},
|
||||
)?;
|
||||
let document = inner.merged(rtxn, index, context.db_fields_ids_map)?;
|
||||
@ -87,7 +87,8 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
||||
new_fields_ids_map,
|
||||
&mut word_positions,
|
||||
&mut |(w1, w2), prox| {
|
||||
add_word_pair_proximity.push(((w1, w2), prox));
|
||||
let key = build_key(prox, &w1, &w2, &mut key_buffer);
|
||||
cached_sorter.insert_add_u32(key, docid)
|
||||
},
|
||||
)?;
|
||||
}
|
||||
@ -99,25 +100,13 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
||||
new_fields_ids_map,
|
||||
&mut word_positions,
|
||||
&mut |(w1, w2), prox| {
|
||||
add_word_pair_proximity.push(((w1, w2), prox));
|
||||
let key = build_key(prox, &w1, &w2, &mut key_buffer);
|
||||
cached_sorter.insert_add_u32(key, docid)
|
||||
},
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
del_word_pair_proximity.sort_unstable();
|
||||
del_word_pair_proximity.dedup_by(|(k1, _), (k2, _)| k1 == k2);
|
||||
for ((w1, w2), prox) in del_word_pair_proximity.iter() {
|
||||
let key = build_key(*prox, w1, w2, &mut key_buffer);
|
||||
cached_sorter.insert_del_u32(key, docid)?;
|
||||
}
|
||||
|
||||
add_word_pair_proximity.sort_unstable();
|
||||
add_word_pair_proximity.dedup_by(|(k1, _), (k2, _)| k1 == k2);
|
||||
for ((w1, w2), prox) in add_word_pair_proximity.iter() {
|
||||
let key = build_key(*prox, w1, w2, &mut key_buffer);
|
||||
cached_sorter.insert_add_u32(key, docid)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@ -138,24 +127,26 @@ fn build_key<'a>(
|
||||
|
||||
fn word_positions_into_word_pair_proximity(
|
||||
word_positions: &mut VecDeque<(Rc<str>, u16)>,
|
||||
word_pair_proximity: &mut impl FnMut((Rc<str>, Rc<str>), u8),
|
||||
) {
|
||||
word_pair_proximity: &mut impl FnMut((Rc<str>, Rc<str>), u8) -> Result<()>,
|
||||
) -> Result<()> {
|
||||
let (head_word, head_position) = word_positions.pop_front().unwrap();
|
||||
for (word, position) in word_positions.iter() {
|
||||
let prox = index_proximity(head_position as u32, *position as u32) as u8;
|
||||
if prox > 0 && prox < MAX_DISTANCE as u8 {
|
||||
word_pair_proximity((head_word.clone(), word.clone()), prox);
|
||||
word_pair_proximity((head_word.clone(), word.clone()), prox)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn drain_word_positions(
|
||||
word_positions: &mut VecDeque<(Rc<str>, u16)>,
|
||||
word_pair_proximity: &mut impl FnMut((Rc<str>, Rc<str>), u8),
|
||||
) {
|
||||
word_pair_proximity: &mut impl FnMut((Rc<str>, Rc<str>), u8) -> Result<()>,
|
||||
) -> Result<()> {
|
||||
while !word_positions.is_empty() {
|
||||
word_positions_into_word_pair_proximity(word_positions, word_pair_proximity);
|
||||
word_positions_into_word_pair_proximity(word_positions, word_pair_proximity)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn process_document_tokens<'doc>(
|
||||
@ -163,20 +154,20 @@ fn process_document_tokens<'doc>(
|
||||
document_tokenizer: &DocumentTokenizer,
|
||||
fields_ids_map: &mut GlobalFieldsIdsMap,
|
||||
word_positions: &mut VecDeque<(Rc<str>, u16)>,
|
||||
word_pair_proximity: &mut impl FnMut((Rc<str>, Rc<str>), u8),
|
||||
word_pair_proximity: &mut impl FnMut((Rc<str>, Rc<str>), u8) -> Result<()>,
|
||||
) -> Result<()> {
|
||||
let mut field_id = None;
|
||||
let mut token_fn = |_fname: &str, fid: FieldId, pos: u16, word: &str| {
|
||||
if field_id != Some(fid) {
|
||||
field_id = Some(fid);
|
||||
drain_word_positions(word_positions, word_pair_proximity);
|
||||
drain_word_positions(word_positions, word_pair_proximity)?;
|
||||
}
|
||||
// drain the proximity window until the head word is considered close to the word we are inserting.
|
||||
while word_positions
|
||||
.front()
|
||||
.map_or(false, |(_w, p)| index_proximity(*p as u32, pos as u32) >= MAX_DISTANCE)
|
||||
{
|
||||
word_positions_into_word_pair_proximity(word_positions, word_pair_proximity);
|
||||
word_positions_into_word_pair_proximity(word_positions, word_pair_proximity)?;
|
||||
}
|
||||
|
||||
// insert the new word.
|
||||
@ -185,6 +176,6 @@ fn process_document_tokens<'doc>(
|
||||
};
|
||||
document_tokenizer.tokenize_document(document, fields_ids_map, &mut token_fn)?;
|
||||
|
||||
drain_word_positions(word_positions, word_pair_proximity);
|
||||
drain_word_positions(word_positions, word_pair_proximity)?;
|
||||
Ok(())
|
||||
}
|
||||
|
Reference in New Issue
Block a user