mirror of
https://github.com/LiteyukiStudio/nonebot-plugin-marshoai.git
synced 2025-08-02 11:40:04 +00:00
✨ 优化网页内容获取功能,添加摘要生成支持,重构相关函数
This commit is contained in:
@ -0,0 +1,35 @@
|
||||
import asyncio
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from newspaper import Article # type: ignore
|
||||
from sumy.nlp.tokenizers import Tokenizer # type: ignore
|
||||
from sumy.parsers.plaintext import PlaintextParser # type: ignore
|
||||
from sumy.summarizers.lsa import LsaSummarizer # type: ignore
|
||||
|
||||
executor = ThreadPoolExecutor()
|
||||
|
||||
|
||||
async def make_html_summary(
|
||||
html_content: str, language: str = "english", length: int = 3
|
||||
) -> str:
|
||||
"""使用html内容生成摘要
|
||||
|
||||
Args:
|
||||
html_content (str): html内容
|
||||
language (str, optional): 语言. Defaults to "english".
|
||||
length (int, optional): 摘要长度. Defaults to 3.
|
||||
|
||||
Returns:
|
||||
str: 摘要
|
||||
"""
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(
|
||||
executor, _make_summary, html_content, language, length
|
||||
)
|
||||
|
||||
|
||||
def _make_summary(html_content: str, language: str, length: int) -> str:
|
||||
parser = PlaintextParser.from_string(html_content, Tokenizer(language))
|
||||
summarizer = LsaSummarizer()
|
||||
summary = summarizer(parser.document, length)
|
||||
return " ".join([str(sentence) for sentence in summary])
|
||||
|
Reference in New Issue
Block a user