Compare commits

...

2 Commits

Author SHA1 Message Date
Kerollmops
2822cac55d Clean up some comments 2025-11-17 16:12:18 +01:00
Kerollmops
070b8cad4d Fix test_tokenize_document test 2025-11-17 16:05:06 +01:00
2 changed files with 17 additions and 15 deletions

View File

@@ -555,15 +555,13 @@ impl SettingsChangeWordDocidsExtractors {
let cached_sorter = cached_sorter_ref.as_mut().unwrap();
let doc_alloc = &context.doc_alloc;
// Note: Whenever we delete a searchable field, we should remove the
// Note: Whenever we delete a searchable field, we must remove the
// corresponding field from the word_fid_docids and fid_word_count_docids
// in a more optimized way. Like deleting every key that starts with
// the field id instead of deleting entries document by document.
// TODO do this outside the loop
let new_fields_ids_map = settings_delta.new_fields_ids_map();
let old_fields_ids_map = context.index.fields_ids_map_with_metadata(&context.rtxn)?;
let old_searchable = settings_delta.old_searchable_attributes().as_ref();
let new_searchable = settings_delta.new_searchable_attributes().as_ref();
@@ -581,9 +579,10 @@ impl SettingsChangeWordDocidsExtractors {
SkipDocument,
}
// Here we do a preliminary check to determine the action
// to take. This check doesn't trigger the tokenizer.
let mut action = ActionToOperate::SkipDocument;
// Here we do a preliminary check to determine the action to take.
// This check doesn't trigger the tokenizer as we never return
// PatternMatch::Match.
document_tokenizer.tokenize_document(
current_document,
&mut |field_name| {
@@ -672,7 +671,6 @@ impl SettingsChangeWordDocidsExtractors {
let old_disabled_typos_terms = settings_delta.old_disabled_typos_terms();
let new_disabled_typos_terms = settings_delta.new_disabled_typos_terms();
let mut token_fn = |_field_name: &str, field_id, pos, word: &str| {
let old_field_metadata = old_fields_ids_map.metadata(field_id).unwrap();
let new_field_metadata = new_fields_ids_map.metadata(field_id).unwrap();

View File

@@ -183,7 +183,7 @@ mod test {
use super::*;
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
use crate::update::new::document::{DocumentFromVersions, Versions};
use crate::{FieldsIdsMap, GlobalFieldsIdsMap};
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, UserError};
#[test]
fn test_tokenize_document() {
@@ -243,15 +243,19 @@ mod test {
let document = Versions::single(document);
let document = DocumentFromVersions::new(&document);
let mut should_tokenize = |field_name: &str| {
let Some(field_id) = global_fields_ids_map.id_or_insert(field_name) else {
return Err(UserError::AttributeLimitReached.into());
};
Ok((field_id, PatternMatch::Match))
};
document_tokenizer
.tokenize_document(
document,
&mut global_fields_ids_map,
&mut |_fname, fid, pos, word| {
words.insert([fid, pos], word.to_string());
Ok(())
},
)
.tokenize_document(document, &mut should_tokenize, &mut |_fname, fid, pos, word| {
words.insert([fid, pos], word.to_string());
Ok(())
})
.unwrap();
snapshot!(format!("{:#?}", words), @r###"