4365: Update charabia r=dureuill a=ManyTheFish

Update Charabia v0.8.7,

- Add Vietnamese Normalization (Ð and Đ into d)

Fixes #4357

Charabia versions:
- https://github.com/meilisearch/charabia/releases/tag/v0.8.6
- https://github.com/meilisearch/charabia/releases/tag/v0.8.7

Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
meili-bors[bot]
2024-02-14 16:57:25 +00:00
committed by GitHub
4 changed files with 94 additions and 719 deletions

View File

@ -17,7 +17,7 @@ bincode = "1.3.3"
bstr = "1.9.0"
bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] }
byteorder = "1.5.0"
charabia = { version = "0.8.5", default-features = false }
charabia = { version = "0.8.7", default-features = false }
concat-arrays = "0.1.2"
crossbeam-channel = "0.5.11"
deserr = "0.6.1"
@ -102,7 +102,16 @@ meili-snap = { path = "../meili-snap" }
rand = { version = "0.8.5", features = ["small_rng"] }
[features]
all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"]
all-tokenizations = [
"charabia/chinese",
"charabia/hebrew",
"charabia/japanese",
"charabia/thai",
"charabia/korean",
"charabia/greek",
"charabia/khmer",
"charabia/vietnamese",
]
# Use POSIX semaphores instead of SysV semaphores in LMDB
# For more information on this feature, see heed's Cargo.toml
@ -130,5 +139,7 @@ greek = ["charabia/greek"]
# allow khmer specialized tokenization
khmer = ["charabia/khmer"]
vietnamese = ["charabia/vietnamese"]
# allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
cuda = ["candle-core/cuda"]