Add turkish normalization

This commit is contained in:
ManyTheFish
2024-09-25 11:03:17 +02:00
parent efdc5739d7
commit e9580fe619
3 changed files with 9 additions and 1 deletions

View File

@ -108,6 +108,7 @@ all-tokenizations = [
"charabia/vietnamese",
"charabia/swedish-recomposition",
"charabia/german-segmentation",
"charabia/turkish",
]
# Use POSIX semaphores instead of SysV semaphores in LMDB
@ -146,5 +147,8 @@ german = ["charabia/german-segmentation"]
# force swedish character recomposition
swedish-recomposition = ["charabia/swedish-recomposition"]
# allow turkish specialized tokenization
turkish = ["charabia/turkish"]
# allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
cuda = ["candle-core/cuda"]