Support multiple space seperated words

This commit is contained in:
Kerollmops
2020-05-31 16:09:34 +02:00
parent 24587148fd
commit 6c726df9b9
5 changed files with 116 additions and 16 deletions

16
src/lib.rs Normal file
View File

@ -0,0 +1,16 @@
use std::collections::HashMap;
use std::hash::BuildHasherDefault;
use fxhash::FxHasher32;
use slice_group_by::StrGroupBy;
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
pub type SmallString32 = smallstr::SmallString<[u8; 32]>;
pub type SmallVec32 = smallvec::SmallVec<[u8; 32]>;
pub type BEU32 = heed::zerocopy::U32<heed::byteorder::BE>;
pub type DocumentId = u32;
pub fn alphanumeric_tokens(string: &str) -> impl Iterator<Item = &str> {
let is_alphanumeric = |s: &&str| s.chars().next().map_or(false, char::is_alphanumeric);
string.linear_group_by_key(|c| c.is_alphanumeric()).filter(is_alphanumeric)
}