feat: 集成MinIO对象存储服务

- 新增storage_service.py封装MinIO操作 - 修改upload.py使用storage_service上传文件 - 修改course_service.py使用storage_service删除文件 - 适配preview.py支持从MinIO获取文件 - 适配knowledge_analysis_v2.py支持MinIO存储 - 在config.py添加MinIO配置项 - 添加minio依赖到requirements.txt 支持特性： - 自动降级到本地存储（MinIO不可用时） - 保持URL格式兼容（/static/uploads/） - 文件自动缓存到本地（用于预览和分析） Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-03 14:06:22 +08:00
parent fca82e2d44
commit 2f47193059
13 changed files with 1071 additions and 629 deletions
--- a/backend/app/core/sanitize.py
+++ b/backend/app/core/sanitize.py
@@ -1,136 +1,136 @@
-"""
-输入清理和XSS防护工具
-"""
-import re
-import html
-from typing import Optional
-
-
-# 危险的HTML标签和属性
-DANGEROUS_TAGS = [
-    'script', 'iframe', 'object', 'embed', 'form', 'input', 
-    'textarea', 'button', 'select', 'style', 'link', 'meta',
-    'base', 'applet', 'frame', 'frameset', 'layer', 'ilayer',
-    'bgsound', 'xml', 'blink', 'marquee'
-]
-
-DANGEROUS_ATTRS = [
-    'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover',
-    'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup',
-    'onload', 'onerror', 'onabort', 'onblur', 'onchange', 'onfocus',
-    'onreset', 'onsubmit', 'onunload', 'onbeforeunload', 'onresize',
-    'onscroll', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave',
-    'ondragover', 'ondragstart', 'ondrop', 'onmousewheel', 'onwheel',
-    'oncopy', 'oncut', 'onpaste', 'oncontextmenu', 'oninput', 'oninvalid',
-    'onsearch', 'onselect', 'ontoggle', 'formaction', 'xlink:href'
-]
-
-
-def sanitize_html(text: Optional[str]) -> Optional[str]:
-    """
-    清理HTML内容，移除危险标签和属性
-    
-    Args:
-        text: 输入文本
-        
-    Returns:
-        清理后的文本
-    """
-    if text is None:
-        return None
-    
-    if not isinstance(text, str):
-        return text
-    
-    result = text
-    
-    # 移除危险标签
-    for tag in DANGEROUS_TAGS:
-        # 移除开标签
-        pattern = re.compile(rf'<{tag}[^>]*>', re.IGNORECASE)
-        result = pattern.sub('', result)
-        # 移除闭标签
-        pattern = re.compile(rf'</{tag}>', re.IGNORECASE)
-        result = pattern.sub('', result)
-    
-    # 移除危险属性
-    for attr in DANGEROUS_ATTRS:
-        pattern = re.compile(rf'\s*{attr}\s*=\s*["\'][^"\']*["\']', re.IGNORECASE)
-        result = pattern.sub('', result)
-        # 也处理没有引号的情况
-        pattern = re.compile(rf'\s*{attr}\s*=\s*\S+', re.IGNORECASE)
-        result = pattern.sub('', result)
-    
-    # 移除 javascript: 协议
-    pattern = re.compile(r'javascript\s*:', re.IGNORECASE)
-    result = pattern.sub('', result)
-    
-    # 移除 data: 协议（可能包含恶意代码）
-    pattern = re.compile(r'data\s*:\s*text/html', re.IGNORECASE)
-    result = pattern.sub('', result)
-    
-    # 移除 vbscript: 协议
-    pattern = re.compile(r'vbscript\s*:', re.IGNORECASE)
-    result = pattern.sub('', result)
-    
-    return result
-
-
-def escape_html(text: Optional[str]) -> Optional[str]:
-    """
-    转义HTML特殊字符
-    
-    Args:
-        text: 输入文本
-        
-    Returns:
-        转义后的文本
-    """
-    if text is None:
-        return None
-    
-    if not isinstance(text, str):
-        return text
-    
-    return html.escape(text, quote=True)
-
-
-def strip_tags(text: Optional[str]) -> Optional[str]:
-    """
-    完全移除所有HTML标签
-    
-    Args:
-        text: 输入文本
-        
-    Returns:
-        移除标签后的纯文本
-    """
-    if text is None:
-        return None
-    
-    if not isinstance(text, str):
-        return text
-    
-    # 移除所有HTML标签
-    clean = re.compile('<[^>]*>')
-    return clean.sub('', text)
-
-
-def sanitize_input(text: Optional[str], strict: bool = False) -> Optional[str]:
-    """
-    清理用户输入
-    
-    Args:
-        text: 输入文本
-        strict: 是否使用严格模式（完全移除所有HTML标签）
-        
-    Returns:
-        清理后的文本
-    """
-    if text is None:
-        return None
-    
-    if strict:
-        return strip_tags(text)
-    else:
-        return sanitize_html(text)
+"""
+输入清理和XSS防护工具
+"""
+import re
+import html
+from typing import Optional
+
+
+# 危险的HTML标签和属性
+DANGEROUS_TAGS = [
+    'script', 'iframe', 'object', 'embed', 'form', 'input', 
+    'textarea', 'button', 'select', 'style', 'link', 'meta',
+    'base', 'applet', 'frame', 'frameset', 'layer', 'ilayer',
+    'bgsound', 'xml', 'blink', 'marquee'
+]
+
+DANGEROUS_ATTRS = [
+    'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover',
+    'onmousemove', 'onmouseout', 'onkeypress', 'onkeydown', 'onkeyup',
+    'onload', 'onerror', 'onabort', 'onblur', 'onchange', 'onfocus',
+    'onreset', 'onsubmit', 'onunload', 'onbeforeunload', 'onresize',
+    'onscroll', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave',
+    'ondragover', 'ondragstart', 'ondrop', 'onmousewheel', 'onwheel',
+    'oncopy', 'oncut', 'onpaste', 'oncontextmenu', 'oninput', 'oninvalid',
+    'onsearch', 'onselect', 'ontoggle', 'formaction', 'xlink:href'
+]
+
+
+def sanitize_html(text: Optional[str]) -> Optional[str]:
+    """
+    清理HTML内容，移除危险标签和属性
+    
+    Args:
+        text: 输入文本
+        
+    Returns:
+        清理后的文本
+    """
+    if text is None:
+        return None
+    
+    if not isinstance(text, str):
+        return text
+    
+    result = text
+    
+    # 移除危险标签
+    for tag in DANGEROUS_TAGS:
+        # 移除开标签
+        pattern = re.compile(rf'<{tag}[^>]*>', re.IGNORECASE)
+        result = pattern.sub('', result)
+        # 移除闭标签
+        pattern = re.compile(rf'</{tag}>', re.IGNORECASE)
+        result = pattern.sub('', result)
+    
+    # 移除危险属性
+    for attr in DANGEROUS_ATTRS:
+        pattern = re.compile(rf'\s*{attr}\s*=\s*["\'][^"\']*["\']', re.IGNORECASE)
+        result = pattern.sub('', result)
+        # 也处理没有引号的情况
+        pattern = re.compile(rf'\s*{attr}\s*=\s*\S+', re.IGNORECASE)
+        result = pattern.sub('', result)
+    
+    # 移除 javascript: 协议
+    pattern = re.compile(r'javascript\s*:', re.IGNORECASE)
+    result = pattern.sub('', result)
+    
+    # 移除 data: 协议（可能包含恶意代码）
+    pattern = re.compile(r'data\s*:\s*text/html', re.IGNORECASE)
+    result = pattern.sub('', result)
+    
+    # 移除 vbscript: 协议
+    pattern = re.compile(r'vbscript\s*:', re.IGNORECASE)
+    result = pattern.sub('', result)
+    
+    return result
+
+
+def escape_html(text: Optional[str]) -> Optional[str]:
+    """
+    转义HTML特殊字符
+    
+    Args:
+        text: 输入文本
+        
+    Returns:
+        转义后的文本
+    """
+    if text is None:
+        return None
+    
+    if not isinstance(text, str):
+        return text
+    
+    return html.escape(text, quote=True)
+
+
+def strip_tags(text: Optional[str]) -> Optional[str]:
+    """
+    完全移除所有HTML标签
+    
+    Args:
+        text: 输入文本
+        
+    Returns:
+        移除标签后的纯文本
+    """
+    if text is None:
+        return None
+    
+    if not isinstance(text, str):
+        return text
+    
+    # 移除所有HTML标签
+    clean = re.compile('<[^>]*>')
+    return clean.sub('', text)
+
+
+def sanitize_input(text: Optional[str], strict: bool = False) -> Optional[str]:
+    """
+    清理用户输入
+    
+    Args:
+        text: 输入文本
+        strict: 是否使用严格模式（完全移除所有HTML标签）
+        
+    Returns:
+        清理后的文本
+    """
+    if text is None:
+        return None
+    
+    if strict:
+        return strip_tags(text)
+    else:
+        return sanitize_html(text)