Files
py-logiliteal/src/logiliteal/utils/regex.py
2025-07-29 14:17:22 +08:00

128 lines
4.6 KiB
Python

"""
正则表达式处理工具
"""
import re
from collections import deque
from .styles import set_color, set_bg_color
def process_links(text: str, no_process: bool = False) -> str:
"""处理链接标签(HTML和Markdown格式)"""
if no_process:
return text
link_stack = deque()
placeholder_count = 0
placeholders = {}
def replace_link(m):
nonlocal placeholder_count
placeholder_count += 1
if len(m.groups()) == 2 and m.group(2) and not m.group(1).startswith('http'):
# Markdown链接 [text](url)
url = m.group(2).strip()
text = m.group(1)
else:
url = m.group(1)
text = m.group(2)
placeholder = f"__LINK_PLACEHOLDER_{placeholder_count}__"
placeholders[placeholder] = (url if url else "#", text)
link_stack.append(placeholder)
return placeholder
text = re.sub(r'<a\s+href="([^"]+)">(.*?)</a>', replace_link, text, flags=re.DOTALL)
text = re.sub(r'<link\s+href="([^"]+)">(.*?)</link>', replace_link, text, flags=re.DOTALL)
text = re.sub(r'\[(.*?)\]\((.*?)\)', replace_link, text)
for placeholder, (url, text_content) in placeholders.items():
ansi_link = f'\033]8;;{url}\033\\{set_color("\033[4m" + text_content, "#5f93ff")}\033]8;;\033\\'
text = text.replace(placeholder, ansi_link)
return text
def process_markdown_formats(text: str, no_process: bool = False) -> str:
"""处理Markdown格式"""
if no_process:
return text
# Markdown粗体 (**text**)
text = re.sub(r'\*\*(.*?)\*\*', '\033[1m\\g<1>\033[22m', text)
# Markdown斜体 (*text*)
text = re.sub(r'\*(.*?)\*', '\033[3m\\g<1>\033[23m', text)
# Markdown下划线 (__text__)
text = re.sub(r'__(.*?)__', '\033[4m\\g<1>\033[24m', text)
# Markdown删除线 (~~text~~)
text = re.sub(r'~~(.*?)~~', '\033[9m\\g<1>\033[29m', text)
return text
def process_html_styles(text: str, no_process: bool = False) -> str:
"""处理HTML样式标签"""
if no_process:
return text
# HTML斜体 <i></i>
text = re.sub(r'<i>([^<]*?)(</i>|$)',
lambda m: '\033[3m' + m.group(1) + '\033[23m', text, flags=re.DOTALL)
# HTML粗体 <b></b>
text = re.sub(r'<b>([^<]*?)</b>',
lambda m: '\033[1m' + m.group(1) + '\033[22m', text)
# HTML下划线 <u></u>
text = re.sub(r'<u>([^<]*?)</u>',
lambda m: '\033[4m' + m.group(1) + '\033[24m', text)
# HTML删除线 <s></s>
text = re.sub(r'<s>([^<]*?)(</s>|$)',
lambda m: '\033[9m' + m.group(1) + '\033[29m', text, flags=re.DOTALL)
return text
def process_special_tags(text: str, no_process: bool = False) -> str:
"""处理特殊标签(换行、重置、段落)"""
if no_process:
return text
text = re.sub(r'<br>', '\n', text)
text = re.sub(r'<c>', '\033[0m', text)
# 处理段落标签
text = re.sub(r'<p>(.*?)</p>', r'\n\033[0m\\g<1>\033[0m\n', text, flags=re.DOTALL)
text = re.sub(r'<p>(.*?)(</p>|$)', r'\n\033[0m\\g<1>\033[0m\n', text, flags=re.DOTALL)
text = re.sub(r'</p>', '\033[0m\n', text)
return text
def process_color_formatting(text: str, no_process: bool = False) -> str:
"""处理颜色标签"""
if no_process:
return text
color_pattern = r'<#([0-9a-fA-F]{6})>'
close_pattern = r'</>'
parts = re.split(f'({color_pattern}|{close_pattern})', text)
result = []
color_stack = []
for part in parts:
if part and re.fullmatch(color_pattern, part):
color = re.match(color_pattern, part).group(1)
color_stack.append(color)
continue
elif part == '</>':
if color_stack:
color_stack.pop()
continue
elif part:
if color_stack:
current_color = color_stack[-1]
r = int(current_color[0:2], 16)
g = int(current_color[2:4], 16)
b = int(current_color[4:6], 16)
ansi_code = f'\033[38;2;{r};{g};{b}m'
reset_code = '\033[0m'
result.append(f'{ansi_code}{part}{reset_code}')
else:
processed_text = part
processed_text = re.sub(r'<#([0-9a-fA-F]{6})>', lambda m: f'<{set_color(f"#{m.group(1)}")}>', processed_text)
result.append(processed_text)
processed_text = ''.join(result)
processed_text = re.sub(f'{color_pattern}|{close_pattern}', '', processed_text)
processed_text = re.sub(r'[0-9a-fA-F]{6}', '', processed_text)
return processed_text