Files
012-kaopeilian/backend/app/core/sanitize.py
yuliang_guo d59a4355a5
All checks were successful
continuous-integration/drone/push Build is passing
fix: 修复安全问题 - 登录失败返回401 + XSS过滤
- 登录失败返回 HTTP 401 而非 200
- 添加 XSS 输入过滤工具函数
- 课程名称和描述字段添加 XSS 过滤验证器
2026-01-31 10:39:07 +08:00

137 lines
3.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
输入清理和XSS防护工具
"""
import re
import html
from typing import Optional
# 危险的HTML标签和属性
DANGEROUS_TAGS = [
'script', 'iframe', 'object', 'embed', 'form', 'input',
'textarea', 'button', 'select', 'style', 'link', 'meta',
'base', 'applet', 'frame', 'frameset', 'layer', 'ilayer',
'bgsound', 'xml', 'blink', 'marquee'
]
DANGEROUS_ATTRS = [
'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover',
'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup',
'onload', 'onerror', 'onabort', 'onblur', 'onchange', 'onfocus',
'onreset', 'onsubmit', 'onunload', 'onbeforeunload', 'onresize',
'onscroll', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave',
'ondragover', 'ondragstart', 'ondrop', 'onmousewheel', 'onwheel',
'oncopy', 'oncut', 'onpaste', 'oncontextmenu', 'oninput', 'oninvalid',
'onsearch', 'onselect', 'ontoggle', 'formaction', 'xlink:href'
]
def sanitize_html(text: Optional[str]) -> Optional[str]:
"""
清理HTML内容移除危险标签和属性
Args:
text: 输入文本
Returns:
清理后的文本
"""
if text is None:
return None
if not isinstance(text, str):
return text
result = text
# 移除危险标签
for tag in DANGEROUS_TAGS:
# 移除开标签
pattern = re.compile(rf'<{tag}[^>]*>', re.IGNORECASE)
result = pattern.sub('', result)
# 移除闭标签
pattern = re.compile(rf'</{tag}>', re.IGNORECASE)
result = pattern.sub('', result)
# 移除危险属性
for attr in DANGEROUS_ATTRS:
pattern = re.compile(rf'\s*{attr}\s*=\s*["\'][^"\']*["\']', re.IGNORECASE)
result = pattern.sub('', result)
# 也处理没有引号的情况
pattern = re.compile(rf'\s*{attr}\s*=\s*\S+', re.IGNORECASE)
result = pattern.sub('', result)
# 移除 javascript: 协议
pattern = re.compile(r'javascript\s*:', re.IGNORECASE)
result = pattern.sub('', result)
# 移除 data: 协议(可能包含恶意代码)
pattern = re.compile(r'data\s*:\s*text/html', re.IGNORECASE)
result = pattern.sub('', result)
# 移除 vbscript: 协议
pattern = re.compile(r'vbscript\s*:', re.IGNORECASE)
result = pattern.sub('', result)
return result
def escape_html(text: Optional[str]) -> Optional[str]:
"""
转义HTML特殊字符
Args:
text: 输入文本
Returns:
转义后的文本
"""
if text is None:
return None
if not isinstance(text, str):
return text
return html.escape(text, quote=True)
def strip_tags(text: Optional[str]) -> Optional[str]:
"""
完全移除所有HTML标签
Args:
text: 输入文本
Returns:
移除标签后的纯文本
"""
if text is None:
return None
if not isinstance(text, str):
return text
# 移除所有HTML标签
clean = re.compile('<[^>]*>')
return clean.sub('', text)
def sanitize_input(text: Optional[str], strict: bool = False) -> Optional[str]:
"""
清理用户输入
Args:
text: 输入文本
strict: 是否使用严格模式完全移除所有HTML标签
Returns:
清理后的文本
"""
if text is None:
return None
if strict:
return strip_tags(text)
else:
return sanitize_html(text)