update charabia

This commit is contained in:
ManyTheFish
2023-10-26 17:01:10 +02:00
parent ca52021079
commit 4c6fddb1cb
4 changed files with 761 additions and 32 deletions

View File

@ -17,7 +17,7 @@ bincode = "1.3.3"
bstr = "1.4.0"
bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
byteorder = "1.4.3"
charabia = { version = "0.8.3", default-features = false }
charabia = { version = "0.8.5", default-features = false }
concat-arrays = "0.1.2"
crossbeam-channel = "0.5.8"
deserr = { version = "0.6.0", features = ["actix-web"]}
@ -82,7 +82,7 @@ md5 = "0.7.0"
rand = { version = "0.8.5", features = ["small_rng"] }
[features]
all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek"]
all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"]
# Use POSIX semaphores instead of SysV semaphores in LMDB
# For more information on this feature, see heed's Cargo.toml
@ -106,3 +106,6 @@ thai = ["charabia/thai"]
# allow greek specialized tokenization
greek = ["charabia/greek"]
# allow khmer specialized tokenization
khmer = ["charabia/khmer"]