mirror of
https://github.com/nonebot/nonebot2.git
synced 2025-07-27 16:21:28 +00:00
Try to add the first natural language processor - 'translate'
This commit is contained in:
@ -1,12 +1,39 @@
|
|||||||
import jieba
|
import os
|
||||||
|
import importlib
|
||||||
|
|
||||||
from command import CommandRegistry
|
from command import CommandRegistry
|
||||||
|
from commands import core
|
||||||
|
from nl_processor import parse_potential_commands
|
||||||
|
from little_shit import get_nl_processors_dir
|
||||||
|
from command import hub as cmdhub
|
||||||
|
|
||||||
__registry__ = cr = CommandRegistry()
|
|
||||||
|
def _init():
|
||||||
|
_load_processors()
|
||||||
|
|
||||||
|
|
||||||
|
__registry__ = cr = CommandRegistry(init_func=_init)
|
||||||
|
|
||||||
|
|
||||||
@cr.register('process')
|
@cr.register('process')
|
||||||
@cr.restrict(full_command_only=True)
|
@cr.restrict(full_command_only=True)
|
||||||
def process(args_text, ctx_msg, internal=False):
|
def process(sentence, ctx_msg, internal=False):
|
||||||
print('自然语言消息处理', args_text)
|
sentence = sentence.strip()
|
||||||
print(list(jieba.cut_for_search(args_text)))
|
potential_commands = parse_potential_commands(sentence)
|
||||||
|
potential_commands = sorted(filter(lambda x: x[0] > 60, potential_commands), key=lambda x: x[0], reverse=True)
|
||||||
|
if len(potential_commands) > 0:
|
||||||
|
most_possible_cmd = potential_commands[0]
|
||||||
|
ctx_msg['parsed_data'] = most_possible_cmd[3]
|
||||||
|
cmdhub.call(most_possible_cmd[1], most_possible_cmd[2], ctx_msg)
|
||||||
|
else:
|
||||||
|
core.echo('我暂时不理解你在说什么哦~', ctx_msg, internal)
|
||||||
|
|
||||||
|
|
||||||
|
def _load_processors():
|
||||||
|
processor_mod_files = filter(
|
||||||
|
lambda filename: filename.endswith('.py') and not filename.startswith('_'),
|
||||||
|
os.listdir(get_nl_processors_dir())
|
||||||
|
)
|
||||||
|
command_mods = [os.path.splitext(file)[0] for file in processor_mod_files]
|
||||||
|
for mod_name in command_mods:
|
||||||
|
importlib.import_module('nl_processors.' + mod_name)
|
||||||
|
@ -47,7 +47,27 @@ _lang_alias_map = {
|
|||||||
'汉语': 'zh',
|
'汉语': 'zh',
|
||||||
'英文': 'en',
|
'英文': 'en',
|
||||||
'日文': 'jp',
|
'日文': 'jp',
|
||||||
'韩文': 'kor'
|
'韩文': 'kor',
|
||||||
|
'法文': 'fra',
|
||||||
|
'西班牙文': 'spa',
|
||||||
|
'阿拉伯文': 'ara',
|
||||||
|
'俄文': 'ru',
|
||||||
|
'葡萄牙文': 'pt',
|
||||||
|
'德文': 'de',
|
||||||
|
'意大利文': 'it',
|
||||||
|
'希腊文': 'el',
|
||||||
|
'荷兰文': 'nl',
|
||||||
|
'波兰文': 'pl',
|
||||||
|
'保加利亚文': 'bul',
|
||||||
|
'爱沙尼亚文': 'est',
|
||||||
|
'丹麦文': 'dan',
|
||||||
|
'芬兰文': 'fin',
|
||||||
|
'捷克文': 'cs',
|
||||||
|
'罗马尼亚文': 'rom',
|
||||||
|
'斯洛文尼亚文': 'slo',
|
||||||
|
'瑞典文': 'swe',
|
||||||
|
'匈牙利文': 'hu',
|
||||||
|
'越南文': 'vie'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -67,7 +87,7 @@ def translate(args_text, ctx_msg):
|
|||||||
return translate_to('简体中文 ' + args_text, ctx_msg)
|
return translate_to('简体中文 ' + args_text, ctx_msg)
|
||||||
|
|
||||||
|
|
||||||
@cr.register('translate_to', 'translate-to', '翻译到', '翻译成')
|
@cr.register('translate_to', 'translate-to', '翻译到', '翻译成', '翻译为')
|
||||||
def translate_to(args_text, ctx_msg):
|
def translate_to(args_text, ctx_msg):
|
||||||
args = args_text.strip().split(' ', 1)
|
args = args_text.strip().split(' ', 1)
|
||||||
if len(args) < 2 or (args[0] not in _lang_map and args[0] not in _lang_alias_map):
|
if len(args) < 2 or (args[0] not in _lang_map and args[0] not in _lang_alias_map):
|
||||||
|
@ -80,6 +80,7 @@ def _filter(ctx_msg):
|
|||||||
if text:
|
if text:
|
||||||
reply = '识别结果(百度语音识别):\n%s\n\n下面将把识别到的内容作为文字消息处理……' % text
|
reply = '识别结果(百度语音识别):\n%s\n\n下面将把识别到的内容作为文字消息处理……' % text
|
||||||
ctx_msg['text'] = text
|
ctx_msg['text'] = text
|
||||||
|
ctx_msg['from_voice'] = True
|
||||||
else:
|
else:
|
||||||
reply = '抱歉哦,没有识别出你说的是什么'
|
reply = '抱歉哦,没有识别出你说的是什么'
|
||||||
core.echo(reply, ctx_msg)
|
core.echo(reply, ctx_msg)
|
||||||
|
@ -26,6 +26,10 @@ def get_commands_dir():
|
|||||||
return _mkdir_if_not_exists_and_return_path(os.path.join(get_root_dir(), 'commands'))
|
return _mkdir_if_not_exists_and_return_path(os.path.join(get_root_dir(), 'commands'))
|
||||||
|
|
||||||
|
|
||||||
|
def get_nl_processors_dir():
|
||||||
|
return _mkdir_if_not_exists_and_return_path(os.path.join(get_root_dir(), 'nl_processors'))
|
||||||
|
|
||||||
|
|
||||||
def get_db_dir():
|
def get_db_dir():
|
||||||
return _mkdir_if_not_exists_and_return_path(os.path.join(get_root_dir(), 'data', 'db'))
|
return _mkdir_if_not_exists_and_return_path(os.path.join(get_root_dir(), 'data', 'db'))
|
||||||
|
|
||||||
|
38
nl_processor.py
Normal file
38
nl_processor.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
import jieba.posseg
|
||||||
|
|
||||||
|
_processors = []
|
||||||
|
_processors_without_keyword = []
|
||||||
|
|
||||||
|
|
||||||
|
def as_processor(keywords=None):
|
||||||
|
def decorator(func):
|
||||||
|
if keywords:
|
||||||
|
_processors.append((keywords, func))
|
||||||
|
else:
|
||||||
|
_processors_without_keyword.append(func)
|
||||||
|
return func
|
||||||
|
|
||||||
|
return decorator
|
||||||
|
|
||||||
|
|
||||||
|
def parse_potential_commands(sentence):
|
||||||
|
segmentation = list(jieba.posseg.cut(sentence=sentence))
|
||||||
|
print('分词结果:', segmentation)
|
||||||
|
potential_commands = []
|
||||||
|
for processor in _processors:
|
||||||
|
processed = False
|
||||||
|
for regex in processor[0]:
|
||||||
|
for word, flag in segmentation:
|
||||||
|
if re.match(regex, word):
|
||||||
|
potential_commands.append(processor[1](sentence, segmentation))
|
||||||
|
processed = True
|
||||||
|
# A word matched, skip the rest of words
|
||||||
|
break
|
||||||
|
if processed:
|
||||||
|
# Current processor has processed, skip the rest of keywords
|
||||||
|
break
|
||||||
|
for func in _processors_without_keyword:
|
||||||
|
potential_commands.append(func(sentence, segmentation))
|
||||||
|
return potential_commands
|
31
nl_processors/translate.py
Normal file
31
nl_processors/translate.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from nl_processor import as_processor
|
||||||
|
|
||||||
|
_query_lang_matcher = [
|
||||||
|
re.compile('[把将]?[ ,.,。]?(.*?)[ ,.,。]?(?:这[个]?(?:词[组]?|句(?:子|话)?|短语))翻译[成为到](\w+?[文语])(?![ ::,,.。])'),
|
||||||
|
re.compile('(\w+?)[ ,.,。]?(?:这[个]?(?:词[组]?|句(?:子|话)?|短语))?[的用](\w+?[文语])')
|
||||||
|
]
|
||||||
|
|
||||||
|
_lang_query_matcher = [
|
||||||
|
re.compile('[把将]?(?:(?:这[个]?|[下后][面]?)(?:词[组]?|句(?:子|话)?|短语))翻译[成为到](\w+?[文语])[ ::,,.。](.*)'),
|
||||||
|
re.compile('[用]?(\w+[文语])\w+?(?:说|讲|表达|表示)(.*)(?:这[个]?(?:词[组]?|句(?:子|话)?|短语))'),
|
||||||
|
re.compile('[用]?(\w+[文语])\w+?(?:说|讲|表达|表示)(.*)')
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@as_processor(keywords=('翻译(为|成|到)?', '.+(文|语)'))
|
||||||
|
def _processor(sentence, segmentation):
|
||||||
|
lang = None
|
||||||
|
query = None
|
||||||
|
for matcher in _query_lang_matcher + _lang_query_matcher:
|
||||||
|
m = matcher.match(sentence)
|
||||||
|
if m:
|
||||||
|
if matcher in _lang_query_matcher:
|
||||||
|
lang, query = m.group(1), m.group(2)
|
||||||
|
else:
|
||||||
|
lang, query = m.group(2), m.group(1)
|
||||||
|
break
|
||||||
|
if lang and query:
|
||||||
|
return 90, 'translate.translate_to', ' '.join((lang.strip(), query.strip(' ,,'))), None
|
||||||
|
return None
|
Reference in New Issue
Block a user