remove byte offset in index_seq

This commit is contained in:
mpostma
2021-01-04 14:54:19 +01:00
committed by many
parent 2a145e288c
commit c290719984

View File

@@ -73,25 +73,18 @@ where
where where
I: IntoIterator<Item = &'s str>, I: IntoIterator<Item = &'s str>,
{ {
let mut byte_offset = 0;
let mut word_offset = 0; let mut word_offset = 0;
for text in text_iter.into_iter() { for text in text_iter.into_iter() {
let current_byte_offset = byte_offset;
let current_word_offset = word_offset; let current_word_offset = word_offset;
let analyzed_text = self.analyzer.analyze(text); let analyzed_text = self.analyzer.analyze(text);
let tokens = process_tokens(analyzed_text.tokens()) let tokens = process_tokens(analyzed_text.tokens())
.map(|(i, mut t)| { .map(|(i, t)| (i + current_word_offset, t))
t.byte_start += current_byte_offset;
t.byte_end += current_byte_offset;
(i + current_word_offset, t)
})
.enumerate(); .enumerate();
for (token_pos, (word_pos, token)) in tokens { for (token_pos, (word_pos, token)) in tokens {
word_offset = word_pos + 1; word_offset = word_pos + 1;
byte_offset = token.byte_end + 1;
let must_continue = index_token( let must_continue = index_token(
token, token,