feat: 初始化考培练系统项目
- 从服务器拉取完整代码 - 按框架规范整理项目结构 - 配置 Drone CI 测试环境部署 - 包含后端(FastAPI)、前端(Vue3)、管理端 技术栈: Vue3 + TypeScript + FastAPI + MySQL
This commit is contained in:
305
backend/app/services/document_converter.py
Normal file
305
backend/app/services/document_converter.py
Normal file
@@ -0,0 +1,305 @@
|
||||
"""
|
||||
文档转换服务
|
||||
使用 LibreOffice 将 Office 文档转换为 PDF
|
||||
"""
|
||||
import os
|
||||
import logging
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from datetime import datetime
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DocumentConverterService:
|
||||
"""文档转换服务类"""
|
||||
|
||||
# 支持转换的文件格式
|
||||
SUPPORTED_FORMATS = {'.docx', '.doc', '.pptx', '.ppt', '.xlsx', '.xls'}
|
||||
|
||||
# Excel文件格式(需要特殊处理页面布局)
|
||||
EXCEL_FORMATS = {'.xlsx', '.xls'}
|
||||
|
||||
def __init__(self):
|
||||
"""初始化转换服务"""
|
||||
self.converted_path = Path(settings.UPLOAD_PATH) / "converted"
|
||||
self.converted_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def get_converted_file_path(self, course_id: int, material_id: int) -> Path:
|
||||
"""
|
||||
获取转换后的文件路径
|
||||
|
||||
Args:
|
||||
course_id: 课程ID
|
||||
material_id: 资料ID
|
||||
|
||||
Returns:
|
||||
转换后的PDF文件路径
|
||||
"""
|
||||
course_dir = self.converted_path / str(course_id)
|
||||
course_dir.mkdir(parents=True, exist_ok=True)
|
||||
return course_dir / f"{material_id}.pdf"
|
||||
|
||||
def need_convert(self, source_file: Path, converted_file: Path) -> bool:
|
||||
"""
|
||||
判断是否需要重新转换
|
||||
|
||||
Args:
|
||||
source_file: 源文件路径
|
||||
converted_file: 转换后的文件路径
|
||||
|
||||
Returns:
|
||||
是否需要转换
|
||||
"""
|
||||
# 如果转换文件不存在,需要转换
|
||||
if not converted_file.exists():
|
||||
return True
|
||||
|
||||
# 如果源文件不存在,不需要转换
|
||||
if not source_file.exists():
|
||||
return False
|
||||
|
||||
# 如果源文件修改时间晚于转换文件,需要重新转换
|
||||
source_mtime = source_file.stat().st_mtime
|
||||
converted_mtime = converted_file.stat().st_mtime
|
||||
|
||||
return source_mtime > converted_mtime
|
||||
|
||||
def convert_excel_to_html(
|
||||
self,
|
||||
source_file: str,
|
||||
course_id: int,
|
||||
material_id: int
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
将Excel文件转换为HTML(避免PDF分页问题)
|
||||
|
||||
Args:
|
||||
source_file: 源文件路径
|
||||
course_id: 课程ID
|
||||
material_id: 资料ID
|
||||
|
||||
Returns:
|
||||
转换后的HTML文件URL,失败返回None
|
||||
"""
|
||||
try:
|
||||
try:
|
||||
import openpyxl
|
||||
from openpyxl.utils import get_column_letter
|
||||
except ImportError as ie:
|
||||
logger.error(f"Excel转换依赖缺失: openpyxl 未安装。请运行 pip install openpyxl 或重建Docker镜像。错误: {str(ie)}")
|
||||
return None
|
||||
|
||||
source_path = Path(source_file)
|
||||
logger.info(f"开始Excel转HTML: source={source_file}, course_id={course_id}, material_id={material_id}")
|
||||
|
||||
# 获取HTML输出路径
|
||||
course_dir = self.converted_path / str(course_id)
|
||||
course_dir.mkdir(parents=True, exist_ok=True)
|
||||
html_file = course_dir / f"{material_id}.html"
|
||||
|
||||
# 检查缓存
|
||||
if html_file.exists():
|
||||
source_mtime = source_path.stat().st_mtime
|
||||
html_mtime = html_file.stat().st_mtime
|
||||
if source_mtime <= html_mtime:
|
||||
logger.info(f"使用缓存的HTML文件: {html_file}")
|
||||
return f"/static/uploads/converted/{course_id}/{material_id}.html"
|
||||
|
||||
# 读取Excel文件
|
||||
wb = openpyxl.load_workbook(source_file, data_only=True)
|
||||
|
||||
# 构建HTML
|
||||
html_content = '''<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<style>
|
||||
body { font-family: Arial, sans-serif; padding: 20px; background: #f5f5f5; }
|
||||
.sheet-tabs { display: flex; gap: 10px; margin-bottom: 20px; flex-wrap: wrap; }
|
||||
.sheet-tab { padding: 8px 16px; background: #fff; border: 1px solid #ddd; border-radius: 4px; cursor: pointer; }
|
||||
.sheet-tab.active { background: #409eff; color: white; border-color: #409eff; }
|
||||
.sheet-content { display: none; }
|
||||
.sheet-content.active { display: block; }
|
||||
table { border-collapse: collapse; width: 100%; background: white; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
|
||||
th, td { border: 1px solid #e4e7ed; padding: 8px 12px; text-align: left; white-space: nowrap; }
|
||||
th { background: #f5f7fa; font-weight: 600; position: sticky; top: 0; }
|
||||
tr:nth-child(even) { background: #fafafa; }
|
||||
tr:hover { background: #ecf5ff; }
|
||||
.table-wrapper { overflow-x: auto; max-height: 80vh; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
'''
|
||||
|
||||
# 生成sheet选项卡
|
||||
sheet_names = wb.sheetnames
|
||||
html_content += '<div class="sheet-tabs">\n'
|
||||
for i, name in enumerate(sheet_names):
|
||||
active = 'active' if i == 0 else ''
|
||||
html_content += f'<div class="sheet-tab {active}" onclick="showSheet({i})">{name}</div>\n'
|
||||
html_content += '</div>\n'
|
||||
|
||||
# 生成每个sheet的表格
|
||||
for i, sheet_name in enumerate(sheet_names):
|
||||
ws = wb[sheet_name]
|
||||
active = 'active' if i == 0 else ''
|
||||
html_content += f'<div class="sheet-content {active}" id="sheet-{i}">\n'
|
||||
html_content += '<div class="table-wrapper"><table>\n'
|
||||
|
||||
# 获取有效数据范围
|
||||
max_row = ws.max_row or 1
|
||||
max_col = ws.max_column or 1
|
||||
|
||||
for row_idx in range(1, min(max_row + 1, 1001)): # 限制最多1000行
|
||||
html_content += '<tr>'
|
||||
for col_idx in range(1, min(max_col + 1, 51)): # 限制最多50列
|
||||
cell = ws.cell(row=row_idx, column=col_idx)
|
||||
value = cell.value if cell.value is not None else ''
|
||||
tag = 'th' if row_idx == 1 else 'td'
|
||||
# 转义HTML特殊字符
|
||||
if isinstance(value, str):
|
||||
value = value.replace('&', '&').replace('<', '<').replace('>', '>')
|
||||
html_content += f'<{tag}>{value}</{tag}>'
|
||||
html_content += '</tr>\n'
|
||||
|
||||
html_content += '</table></div></div>\n'
|
||||
|
||||
# 添加JavaScript
|
||||
html_content += '''
|
||||
<script>
|
||||
function showSheet(index) {
|
||||
document.querySelectorAll('.sheet-tab').forEach((tab, i) => {
|
||||
tab.classList.toggle('active', i === index);
|
||||
});
|
||||
document.querySelectorAll('.sheet-content').forEach((content, i) => {
|
||||
content.classList.toggle('active', i === index);
|
||||
});
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>'''
|
||||
|
||||
# 写入HTML文件
|
||||
with open(html_file, 'w', encoding='utf-8') as f:
|
||||
f.write(html_content)
|
||||
|
||||
logger.info(f"Excel转HTML成功: {html_file}")
|
||||
return f"/static/uploads/converted/{course_id}/{material_id}.html"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Excel转HTML失败: {source_file}, 错误: {str(e)}", exc_info=True)
|
||||
return None
|
||||
|
||||
def convert_to_pdf(
|
||||
self,
|
||||
source_file: str,
|
||||
course_id: int,
|
||||
material_id: int
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
将Office文档转换为PDF
|
||||
|
||||
Args:
|
||||
source_file: 源文件路径(绝对路径或相对路径)
|
||||
course_id: 课程ID
|
||||
material_id: 资料ID
|
||||
|
||||
Returns:
|
||||
转换后的PDF文件URL,失败返回None
|
||||
"""
|
||||
try:
|
||||
source_path = Path(source_file)
|
||||
|
||||
# 检查源文件是否存在
|
||||
if not source_path.exists():
|
||||
logger.error(f"源文件不存在: {source_file}")
|
||||
return None
|
||||
|
||||
# 检查文件格式是否支持
|
||||
file_ext = source_path.suffix.lower()
|
||||
if file_ext not in self.SUPPORTED_FORMATS:
|
||||
logger.error(f"不支持的文件格式: {file_ext}")
|
||||
return None
|
||||
|
||||
# Excel文件使用HTML预览(避免分页问题)
|
||||
if file_ext in self.EXCEL_FORMATS:
|
||||
return self.convert_excel_to_html(source_file, course_id, material_id)
|
||||
|
||||
# 获取转换后的文件路径
|
||||
converted_file = self.get_converted_file_path(course_id, material_id)
|
||||
|
||||
# 检查是否需要转换
|
||||
if not self.need_convert(source_path, converted_file):
|
||||
logger.info(f"使用缓存的转换文件: {converted_file}")
|
||||
return f"/static/uploads/converted/{course_id}/{material_id}.pdf"
|
||||
|
||||
# 执行转换
|
||||
logger.info(f"开始转换文档: {source_file} -> {converted_file}")
|
||||
|
||||
# 使用 LibreOffice 转换
|
||||
# --headless: 无界面模式
|
||||
# --convert-to pdf: 转换为PDF
|
||||
# --outdir: 输出目录
|
||||
output_dir = converted_file.parent
|
||||
|
||||
cmd = [
|
||||
'libreoffice',
|
||||
'--headless',
|
||||
'--convert-to', 'pdf',
|
||||
'--outdir', str(output_dir),
|
||||
str(source_path)
|
||||
]
|
||||
|
||||
# 执行转换命令(设置超时时间为60秒)
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
check=True
|
||||
)
|
||||
|
||||
# LibreOffice 转换后的文件名是源文件名.pdf
|
||||
# 需要重命名为 material_id.pdf
|
||||
temp_converted = output_dir / f"{source_path.stem}.pdf"
|
||||
if temp_converted.exists() and temp_converted != converted_file:
|
||||
temp_converted.rename(converted_file)
|
||||
|
||||
# 检查转换结果
|
||||
if converted_file.exists():
|
||||
logger.info(f"文档转换成功: {converted_file}")
|
||||
return f"/static/uploads/converted/{course_id}/{material_id}.pdf"
|
||||
else:
|
||||
logger.error(f"文档转换失败,输出文件不存在: {converted_file}")
|
||||
return None
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.error(f"文档转换超时: {source_file}")
|
||||
return None
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"文档转换失败: {source_file}, 错误: {e.stderr}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"文档转换异常: {source_file}, 错误: {str(e)}", exc_info=True)
|
||||
return None
|
||||
|
||||
def is_convertible(self, file_ext: str) -> bool:
|
||||
"""
|
||||
判断文件格式是否可转换
|
||||
|
||||
Args:
|
||||
file_ext: 文件扩展名(带点,如 .docx)
|
||||
|
||||
Returns:
|
||||
是否可转换
|
||||
"""
|
||||
return file_ext.lower() in self.SUPPORTED_FORMATS
|
||||
|
||||
|
||||
# 创建全局实例
|
||||
document_converter = DocumentConverterService()
|
||||
|
||||
Reference in New Issue
Block a user