Update Charabia

This commit is contained in:
ManyTheFish
2023-06-28 18:52:32 +02:00
parent 9deeec88e0
commit 84845de9ef
9 changed files with 150 additions and 140 deletions

220
Cargo.lock generated
View File

@ -152,7 +152,7 @@ dependencies = [
"pin-project-lite",
"tokio-rustls 0.23.4",
"tokio-util",
"webpki-roots",
"webpki-roots 0.22.6",
]
[[package]]
@ -706,23 +706,24 @@ dependencies = [
[[package]]
name = "charabia"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "413155d93157bff9130895c3bd83970ac7f35659ca57226a96aa35cf1e8e102c"
dependencies = [
"aho-corasick",
"cow-utils",
"csv",
"deunicode",
"either",
"finl_unicode",
"fst",
"irg-kvariants",
"jieba-rs",
"lindera",
"lindera-core",
"lindera-dictionary",
"lindera-tokenizer",
"once_cell",
"pinyin",
"serde",
"slice-group-by",
"unicode-normalization",
"unicode-segmentation",
"wana_kana",
"whatlang",
]
@ -2135,15 +2136,6 @@ dependencies = [
"simple_asn1",
]
[[package]]
name = "kanaria"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0f9d9652540055ac4fded998a73aca97d965899077ab1212587437da44196ff"
dependencies = [
"bitflags",
]
[[package]]
name = "language-tags"
version = "0.3.2"
@ -2211,38 +2203,11 @@ dependencies = [
"vcpkg",
]
[[package]]
name = "lindera"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72be283281bec2768687b1784be03a678609b51f2f90f6f9d9b4f07953e6dd25"
dependencies = [
"anyhow",
"bincode",
"byteorder",
"encoding",
"kanaria",
"lindera-cc-cedict-builder",
"lindera-core",
"lindera-dictionary",
"lindera-filter",
"lindera-ipadic-builder",
"lindera-ko-dic-builder",
"lindera-unidic-builder",
"regex",
"serde",
"serde_json",
"thiserror",
"unicode-blocks",
"unicode-normalization",
"yada",
]
[[package]]
name = "lindera-cc-cedict-builder"
version = "0.23.0"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10fbafd37adab44ccc2668a40fba2dbc4e665cb3c36018c15dfe2e2b830e28ce"
checksum = "4c6bf79b29a90bcd22036e494d6cc9ac3abe9ab604b21f3258ba6dc1ce501801"
dependencies = [
"anyhow",
"bincode",
@ -2259,9 +2224,9 @@ dependencies = [
[[package]]
name = "lindera-compress"
version = "0.23.0"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed9196bf5995503f6878a090dfee6114ba86430c72f67ef3624246b564869937"
checksum = "8f2e99e67736352bbb6ed1c273643975822505067ca32194b0981040bc50527a"
dependencies = [
"anyhow",
"flate2",
@ -2270,9 +2235,9 @@ dependencies = [
[[package]]
name = "lindera-core"
version = "0.23.0"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5f0baa9932f682e9c5b388897330f155d3c40de80016e60125897fde5e0e246"
checksum = "7c3935e966409156f22cb4b334b21b0dce84b7aa1cad62214b466489d249c8e5"
dependencies = [
"anyhow",
"bincode",
@ -2287,9 +2252,9 @@ dependencies = [
[[package]]
name = "lindera-decompress"
version = "0.23.0"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6e63fa6ef0bc3ce2c26d372aa6185b7a316194494a84f81678f5da2893bf4a2"
checksum = "7476406abb63c49d7f59c88b9b868ee8d2981495ea7e2c3ad129902f9916b3c6"
dependencies = [
"anyhow",
"flate2",
@ -2298,63 +2263,50 @@ dependencies = [
[[package]]
name = "lindera-dictionary"
version = "0.23.0"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd765c36166016de87a1f447ea971573e4c63e334836c46ad0020f0408c88bfc"
checksum = "808b7d2b3cabc25a4022526d484a4cfd1d5924dc76a26e0379707698841acef2"
dependencies = [
"anyhow",
"bincode",
"byteorder",
"lindera-cc-cedict-builder",
"lindera-core",
"lindera-ipadic",
"lindera-ko-dic",
"serde",
]
[[package]]
name = "lindera-filter"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5345e37fb9521ab3cee19283bed135d46b3521dc1fd13a49fa0992379056203"
dependencies = [
"anyhow",
"bincode",
"byteorder",
"kanaria",
"lindera-core",
"lindera-dictionary",
"once_cell",
"regex",
"serde",
"serde_json",
"unicode-blocks",
"unicode-normalization",
"unicode-segmentation",
"yada",
]
[[package]]
name = "lindera-ipadic"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60eeb356295f784e7db4cfd2c6772f2bd059e565a7744e246642a07bc333a88a"
dependencies = [
"bincode",
"byteorder",
"encoding",
"flate2",
"lindera-core",
"lindera-decompress",
"lindera-ipadic-builder",
"once_cell",
"tar",
"lindera-ipadic-neologd-builder",
"lindera-ko-dic",
"lindera-ko-dic-builder",
"lindera-unidic",
"lindera-unidic-builder",
"serde",
]
[[package]]
name = "lindera-ipadic-builder"
version = "0.23.0"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a16a2a88db9d956f5086bc976deb9951ca2dbbfef41a002df0a7bfb2c845aab"
checksum = "31f373a280958c930e5ee4a1e4db3a0ee0542afaf02d3b5cacb8cab4e298648e"
dependencies = [
"anyhow",
"bincode",
"byteorder",
"csv",
"encoding_rs",
"encoding_rs_io",
"env_logger",
"glob",
"lindera-core",
"lindera-decompress",
"log",
"serde",
"yada",
]
[[package]]
name = "lindera-ipadic-neologd-builder"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92eff98e9ed1a7a412b91709c2343457a04ef02fa0c27c27e3a5892f5591eae9"
dependencies = [
"anyhow",
"bincode",
@ -2364,7 +2316,6 @@ dependencies = [
"encoding_rs_io",
"env_logger",
"glob",
"lindera-compress",
"lindera-core",
"lindera-decompress",
"log",
@ -2374,9 +2325,9 @@ dependencies = [
[[package]]
name = "lindera-ko-dic"
version = "0.23.0"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abb479b170a841b8cfbe602d772e30849ffe0562b219190a378368968b8c8f66"
checksum = "74c6d5bf7d8092bd6d10de7a5d74b70ea7cf234586235b0d6cdb903b05a6c9e2"
dependencies = [
"bincode",
"byteorder",
@ -2391,9 +2342,9 @@ dependencies = [
[[package]]
name = "lindera-ko-dic-builder"
version = "0.23.0"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b9b58213552560717c48e7833444a20d2d7fe26a6e565f7ce0cbbf85784c7cf"
checksum = "f0a4add6d3c1e41ec9e2690d33e287d0223fb59a30ccee4980c23f31368cae1e"
dependencies = [
"anyhow",
"bincode",
@ -2410,10 +2361,42 @@ dependencies = [
]
[[package]]
name = "lindera-unidic-builder"
version = "0.23.0"
name = "lindera-tokenizer"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6858147cdaf4a7b564c08a247449d3aca38e9b4812499651af08afbf85324596"
checksum = "cb6a8acbd068019d1cdac7316f0dcb87f8e33ede2b13aa237f45114f9750afb8"
dependencies = [
"bincode",
"byteorder",
"lindera-core",
"lindera-dictionary",
"once_cell",
"serde",
"serde_json",
]
[[package]]
name = "lindera-unidic"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14abf0613d350b30d3b0406a33b1de8fa8d829f26516909421702174785991c8"
dependencies = [
"bincode",
"byteorder",
"encoding",
"lindera-core",
"lindera-decompress",
"lindera-unidic-builder",
"once_cell",
"ureq",
"zip",
]
[[package]]
name = "lindera-unidic-builder"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e204ed53d9bd63227d1e6a6c1f122ca039e00a8634ac32e7fb0281eeec8615c4"
dependencies = [
"anyhow",
"bincode",
@ -2422,6 +2405,7 @@ dependencies = [
"encoding",
"env_logger",
"glob",
"lindera-compress",
"lindera-core",
"lindera-decompress",
"log",
@ -3427,7 +3411,7 @@ dependencies = [
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
"webpki-roots",
"webpki-roots 0.22.6",
"winreg",
]
@ -4210,12 +4194,6 @@ version = "0.3.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460"
[[package]]
name = "unicode-blocks"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "943e3f1f50cc455d072e0801ccb71ff893b0c88060b1169f92e35fb5bb881cc6"
[[package]]
name = "unicode-ident"
version = "1.0.9"
@ -4249,6 +4227,21 @@ version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"
[[package]]
name = "ureq"
version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b11c96ac7ee530603dcdf68ed1557050f374ce55a5a07193ebf8cbc9f8927e9"
dependencies = [
"base64 0.21.2",
"log",
"once_cell",
"rustls 0.21.1",
"rustls-webpki",
"url",
"webpki-roots 0.23.1",
]
[[package]]
name = "url"
version = "2.3.1"
@ -4457,6 +4450,15 @@ dependencies = [
"webpki",
]
[[package]]
name = "webpki-roots"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b03058f88386e5ff5310d9111d53f48b17d732b401aeb83a8d5190f2ac459338"
dependencies = [
"rustls-webpki",
]
[[package]]
name = "whatlang"
version = "0.16.2"