feat: 初始化考培练系统项目

- 从服务器拉取完整代码 - 按框架规范整理项目结构 - 配置 Drone CI 测试环境部署 - 包含后端(FastAPI)、前端(Vue3)、管理端技术栈: Vue3 + TypeScript + FastAPI + MySQL
2026-01-24 19:33:28 +08:00
commit 998211c483
1197 changed files with 228429 additions and 0 deletions
--- a/backend/app/services/document_converter.py
+++ b/backend/app/services/document_converter.py
@@ -0,0 +1,305 @@
+"""
+文档转换服务
+使用 LibreOffice 将 Office 文档转换为 PDF
+"""
+import os
+import logging
+import subprocess
+from pathlib import Path
+from typing import Optional
+from datetime import datetime
+
+from app.core.config import settings
+
+logger = logging.getLogger(__name__)
+
+
+class DocumentConverterService:
+    """文档转换服务类"""
+    
+    # 支持转换的文件格式
+    SUPPORTED_FORMATS = {'.docx', '.doc', '.pptx', '.ppt', '.xlsx', '.xls'}
+    
+    # Excel文件格式（需要特殊处理页面布局）
+    EXCEL_FORMATS = {'.xlsx', '.xls'}
+    
+    def __init__(self):
+        """初始化转换服务"""
+        self.converted_path = Path(settings.UPLOAD_PATH) / "converted"
+        self.converted_path.mkdir(parents=True, exist_ok=True)
+    
+    def get_converted_file_path(self, course_id: int, material_id: int) -> Path:
+        """
+        获取转换后的文件路径
+        
+        Args:
+            course_id: 课程ID
+            material_id: 资料ID
+            
+        Returns:
+            转换后的PDF文件路径
+        """
+        course_dir = self.converted_path / str(course_id)
+        course_dir.mkdir(parents=True, exist_ok=True)
+        return course_dir / f"{material_id}.pdf"
+    
+    def need_convert(self, source_file: Path, converted_file: Path) -> bool:
+        """
+        判断是否需要重新转换
+        
+        Args:
+            source_file: 源文件路径
+            converted_file: 转换后的文件路径
+            
+        Returns:
+            是否需要转换
+        """
+        # 如果转换文件不存在，需要转换
+        if not converted_file.exists():
+            return True
+        
+        # 如果源文件不存在，不需要转换
+        if not source_file.exists():
+            return False
+        
+        # 如果源文件修改时间晚于转换文件，需要重新转换
+        source_mtime = source_file.stat().st_mtime
+        converted_mtime = converted_file.stat().st_mtime
+        
+        return source_mtime > converted_mtime
+    
+    def convert_excel_to_html(
+        self,
+        source_file: str,
+        course_id: int,
+        material_id: int
+    ) -> Optional[str]:
+        """
+        将Excel文件转换为HTML（避免PDF分页问题）
+        
+        Args:
+            source_file: 源文件路径
+            course_id: 课程ID
+            material_id: 资料ID
+            
+        Returns:
+            转换后的HTML文件URL，失败返回None
+        """
+        try:
+            try:
+                import openpyxl
+                from openpyxl.utils import get_column_letter
+            except ImportError as ie:
+                logger.error(f"Excel转换依赖缺失: openpyxl 未安装。请运行 pip install openpyxl 或重建Docker镜像。错误: {str(ie)}")
+                return None
+            
+            source_path = Path(source_file)
+            logger.info(f"开始Excel转HTML: source={source_file}, course_id={course_id}, material_id={material_id}")
+            
+            # 获取HTML输出路径
+            course_dir = self.converted_path / str(course_id)
+            course_dir.mkdir(parents=True, exist_ok=True)
+            html_file = course_dir / f"{material_id}.html"
+            
+            # 检查缓存
+            if html_file.exists():
+                source_mtime = source_path.stat().st_mtime
+                html_mtime = html_file.stat().st_mtime
+                if source_mtime <= html_mtime:
+                    logger.info(f"使用缓存的HTML文件: {html_file}")
+                    return f"/static/uploads/converted/{course_id}/{material_id}.html"
+            
+            # 读取Excel文件
+            wb = openpyxl.load_workbook(source_file, data_only=True)
+            
+            # 构建HTML
+            html_content = '''<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="UTF-8">
+    <style>
+        body { font-family: Arial, sans-serif; padding: 20px; background: #f5f5f5; }
+        .sheet-tabs { display: flex; gap: 10px; margin-bottom: 20px; flex-wrap: wrap; }
+        .sheet-tab { padding: 8px 16px; background: #fff; border: 1px solid #ddd; border-radius: 4px; cursor: pointer; }
+        .sheet-tab.active { background: #409eff; color: white; border-color: #409eff; }
+        .sheet-content { display: none; }
+        .sheet-content.active { display: block; }
+        table { border-collapse: collapse; width: 100%; background: white; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
+        th, td { border: 1px solid #e4e7ed; padding: 8px 12px; text-align: left; white-space: nowrap; }
+        th { background: #f5f7fa; font-weight: 600; position: sticky; top: 0; }
+        tr:nth-child(even) { background: #fafafa; }
+        tr:hover { background: #ecf5ff; }
+        .table-wrapper { overflow-x: auto; max-height: 80vh; }
+    </style>
+</head>
+<body>
+'''
+            
+            # 生成sheet选项卡
+            sheet_names = wb.sheetnames
+            html_content += '<div class="sheet-tabs">\n'
+            for i, name in enumerate(sheet_names):
+                active = 'active' if i == 0 else ''
+                html_content += f'<div class="sheet-tab {active}" onclick="showSheet({i})">{name}</div>\n'
+            html_content += '</div>\n'
+            
+            # 生成每个sheet的表格
+            for i, sheet_name in enumerate(sheet_names):
+                ws = wb[sheet_name]
+                active = 'active' if i == 0 else ''
+                html_content += f'<div class="sheet-content {active}" id="sheet-{i}">\n'
+                html_content += '<div class="table-wrapper"><table>\n'
+                
+                # 获取有效数据范围
+                max_row = ws.max_row or 1
+                max_col = ws.max_column or 1
+                
+                for row_idx in range(1, min(max_row + 1, 1001)):  # 限制最多1000行
+                    html_content += '<tr>'
+                    for col_idx in range(1, min(max_col + 1, 51)):  # 限制最多50列
+                        cell = ws.cell(row=row_idx, column=col_idx)
+                        value = cell.value if cell.value is not None else ''
+                        tag = 'th' if row_idx == 1 else 'td'
+                        # 转义HTML特殊字符
+                        if isinstance(value, str):
+                            value = value.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
+                        html_content += f'<{tag}>{value}</{tag}>'
+                    html_content += '</tr>\n'
+                
+                html_content += '</table></div></div>\n'
+            
+            # 添加JavaScript
+            html_content += '''
+<script>
+function showSheet(index) {
+    document.querySelectorAll('.sheet-tab').forEach((tab, i) => {
+        tab.classList.toggle('active', i === index);
+    });
+    document.querySelectorAll('.sheet-content').forEach((content, i) => {
+        content.classList.toggle('active', i === index);
+    });
+}
+</script>
+</body>
+</html>'''
+            
+            # 写入HTML文件
+            with open(html_file, 'w', encoding='utf-8') as f:
+                f.write(html_content)
+            
+            logger.info(f"Excel转HTML成功: {html_file}")
+            return f"/static/uploads/converted/{course_id}/{material_id}.html"
+            
+        except Exception as e:
+            logger.error(f"Excel转HTML失败: {source_file}, 错误: {str(e)}", exc_info=True)
+            return None
+    
+    def convert_to_pdf(
+        self, 
+        source_file: str, 
+        course_id: int, 
+        material_id: int
+    ) -> Optional[str]:
+        """
+        将Office文档转换为PDF
+        
+        Args:
+            source_file: 源文件路径（绝对路径或相对路径）
+            course_id: 课程ID
+            material_id: 资料ID
+            
+        Returns:
+            转换后的PDF文件URL，失败返回None
+        """
+        try:
+            source_path = Path(source_file)
+            
+            # 检查源文件是否存在
+            if not source_path.exists():
+                logger.error(f"源文件不存在: {source_file}")
+                return None
+            
+            # 检查文件格式是否支持
+            file_ext = source_path.suffix.lower()
+            if file_ext not in self.SUPPORTED_FORMATS:
+                logger.error(f"不支持的文件格式: {file_ext}")
+                return None
+            
+            # Excel文件使用HTML预览（避免分页问题）
+            if file_ext in self.EXCEL_FORMATS:
+                return self.convert_excel_to_html(source_file, course_id, material_id)
+            
+            # 获取转换后的文件路径
+            converted_file = self.get_converted_file_path(course_id, material_id)
+            
+            # 检查是否需要转换
+            if not self.need_convert(source_path, converted_file):
+                logger.info(f"使用缓存的转换文件: {converted_file}")
+                return f"/static/uploads/converted/{course_id}/{material_id}.pdf"
+            
+            # 执行转换
+            logger.info(f"开始转换文档: {source_file} -> {converted_file}")
+            
+            # 使用 LibreOffice 转换
+            # --headless: 无界面模式
+            # --convert-to pdf: 转换为PDF
+            # --outdir: 输出目录
+            output_dir = converted_file.parent
+            
+            cmd = [
+                'libreoffice',
+                '--headless',
+                '--convert-to', 'pdf',
+                '--outdir', str(output_dir),
+                str(source_path)
+            ]
+            
+            # 执行转换命令（设置超时时间为60秒）
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=60,
+                check=True
+            )
+            
+            # LibreOffice 转换后的文件名是源文件名.pdf
+            # 需要重命名为 material_id.pdf
+            temp_converted = output_dir / f"{source_path.stem}.pdf"
+            if temp_converted.exists() and temp_converted != converted_file:
+                temp_converted.rename(converted_file)
+            
+            # 检查转换结果
+            if converted_file.exists():
+                logger.info(f"文档转换成功: {converted_file}")
+                return f"/static/uploads/converted/{course_id}/{material_id}.pdf"
+            else:
+                logger.error(f"文档转换失败，输出文件不存在: {converted_file}")
+                return None
+                
+        except subprocess.TimeoutExpired:
+            logger.error(f"文档转换超时: {source_file}")
+            return None
+        except subprocess.CalledProcessError as e:
+            logger.error(f"文档转换失败: {source_file}, 错误: {e.stderr}")
+            return None
+        except Exception as e:
+            logger.error(f"文档转换异常: {source_file}, 错误: {str(e)}", exc_info=True)
+            return None
+    
+    def is_convertible(self, file_ext: str) -> bool:
+        """
+        判断文件格式是否可转换
+        
+        Args:
+            file_ext: 文件扩展名（带点，如 .docx）
+            
+        Returns:
+            是否可转换
+        """
+        return file_ext.lower() in self.SUPPORTED_FORMATS
+
+
+# 创建全局实例
+document_converter = DocumentConverterService()
+