Files
012-kaopeilian/backend/app/api/v1/preview.py
yuliang_guo 2f47193059
All checks were successful
continuous-integration/drone/push Build is passing
feat: 集成MinIO对象存储服务
- 新增storage_service.py封装MinIO操作
- 修改upload.py使用storage_service上传文件
- 修改course_service.py使用storage_service删除文件
- 适配preview.py支持从MinIO获取文件
- 适配knowledge_analysis_v2.py支持MinIO存储
- 在config.py添加MinIO配置项
- 添加minio依赖到requirements.txt

支持特性:
- 自动降级到本地存储(MinIO不可用时)
- 保持URL格式兼容(/static/uploads/)
- 文件自动缓存到本地(用于预览和分析)

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-03 14:06:22 +08:00

292 lines
9.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
文件预览API
提供课程资料的在线预览功能
支持MinIO和本地文件系统两种存储后端
"""
import logging
from pathlib import Path
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from app.core.deps import get_db, get_current_user
from app.schemas.base import ResponseModel
from app.core.config import settings
from app.models.user import User
from app.models.course import CourseMaterial
from app.services.document_converter import document_converter
from app.services.storage_service import storage_service
logger = logging.getLogger(__name__)
router = APIRouter()
class PreviewType:
"""预览类型常量
支持格式TXT、Markdown、MDX、PDF、HTML、Excel、Word、CSV、VTT、Properties
"""
PDF = "pdf"
TEXT = "text"
HTML = "html"
EXCEL_HTML = "excel_html" # Excel转HTML预览
VIDEO = "video"
AUDIO = "audio"
IMAGE = "image"
DOWNLOAD = "download"
# 文件类型到预览类型的映射
FILE_TYPE_MAPPING = {
# PDF - 直接预览
'.pdf': PreviewType.PDF,
# 文本 - 直接显示内容
'.txt': PreviewType.TEXT,
'.md': PreviewType.TEXT,
'.mdx': PreviewType.TEXT,
'.csv': PreviewType.TEXT,
'.vtt': PreviewType.TEXT,
'.properties': PreviewType.TEXT,
# HTML - 在iframe中预览
'.html': PreviewType.HTML,
'.htm': PreviewType.HTML,
}
def get_preview_type(file_ext: str) -> str:
"""
根据文件扩展名获取预览类型
Args:
file_ext: 文件扩展名(带点,如 .pdf
Returns:
预览类型
"""
file_ext_lower = file_ext.lower()
# 直接映射的类型
if file_ext_lower in FILE_TYPE_MAPPING:
return FILE_TYPE_MAPPING[file_ext_lower]
# Excel文件使用HTML预览避免分页问题
if file_ext_lower in {'.xlsx', '.xls'}:
return PreviewType.EXCEL_HTML
# 其他Office文档需要转换为PDF预览
if document_converter.is_convertible(file_ext_lower):
return PreviewType.PDF
# 其他类型,只提供下载
return PreviewType.DOWNLOAD
async def get_file_path_from_url(file_url: str) -> Optional[Path]:
"""
从文件URL获取本地文件路径
支持MinIO和本地文件系统。如果文件在MinIO中会先下载到本地缓存。
Args:
file_url: 文件URL如 /static/uploads/courses/1/xxx.pdf
Returns:
本地文件路径如果无效返回None
"""
try:
# 移除 /static/uploads/ 前缀
if file_url.startswith('/static/uploads/'):
object_name = file_url.replace('/static/uploads/', '')
# 使用storage_service获取文件路径自动处理MinIO下载
return await storage_service.get_file_path(object_name)
return None
except Exception as e:
logger.error(f"获取文件路径失败: {e}")
return None
@router.get("/material/{material_id}", response_model=ResponseModel[dict])
async def get_material_preview(
material_id: int,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
"""
获取资料预览信息
Args:
material_id: 资料ID
Returns:
预览信息包括预览类型、预览URL等
"""
try:
# 查询资料信息
stmt = select(CourseMaterial).where(
CourseMaterial.id == material_id,
CourseMaterial.is_deleted == False
)
result = await db.execute(stmt)
material = result.scalar_one_or_none()
if not material:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="资料不存在"
)
# TODO: 权限检查 - 确认当前用户是否有权访问该课程的资料
# 可以通过查询 position_courses 表和用户的岗位关系来判断
# 获取文件扩展名
file_ext = Path(material.name).suffix.lower()
# 确定预览类型
preview_type = get_preview_type(file_ext)
logger.info(
f"资料预览请求 - material_id: {material_id}, "
f"file_type: {file_ext}, preview_type: {preview_type}, "
f"user_id: {current_user.id}"
)
# 构建响应数据
response_data = {
"preview_type": preview_type,
"file_name": material.name,
"original_url": material.file_url,
"file_size": material.file_size,
}
# 根据预览类型处理
if preview_type == PreviewType.TEXT:
# 文本类型,读取文件内容
file_path = await get_file_path_from_url(material.file_url)
if file_path and file_path.exists():
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
response_data["content"] = content
response_data["preview_url"] = None
except Exception as e:
logger.error(f"读取文本文件失败: {str(e)}")
# 读取失败,改为下载模式
response_data["preview_type"] = PreviewType.DOWNLOAD
response_data["preview_url"] = material.file_url
else:
response_data["preview_type"] = PreviewType.DOWNLOAD
response_data["preview_url"] = material.file_url
elif preview_type == PreviewType.EXCEL_HTML:
# Excel文件转换为HTML预览
file_path = await get_file_path_from_url(material.file_url)
if file_path and file_path.exists():
converted_url = document_converter.convert_excel_to_html(
str(file_path),
material.course_id,
material.id
)
if converted_url:
response_data["preview_url"] = converted_url
response_data["preview_type"] = "html" # 前端使用html类型渲染
response_data["is_converted"] = True
else:
logger.warning(f"Excel转HTML失败改为下载模式 - material_id: {material_id}")
response_data["preview_type"] = PreviewType.DOWNLOAD
response_data["preview_url"] = material.file_url
response_data["is_converted"] = False
else:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="文件不存在"
)
elif preview_type == PreviewType.PDF and document_converter.is_convertible(file_ext):
# Office文档需要转换为PDF
file_path = await get_file_path_from_url(material.file_url)
if file_path and file_path.exists():
# 执行转换
converted_url = document_converter.convert_to_pdf(
str(file_path),
material.course_id,
material.id
)
if converted_url:
response_data["preview_url"] = converted_url
response_data["is_converted"] = True
else:
# 转换失败,改为下载模式
logger.warning(f"文档转换失败,改为下载模式 - material_id: {material_id}")
response_data["preview_type"] = PreviewType.DOWNLOAD
response_data["preview_url"] = material.file_url
response_data["is_converted"] = False
else:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="文件不存在"
)
else:
# 其他类型直接返回原始URL
response_data["preview_url"] = material.file_url
return ResponseModel(data=response_data, message="获取预览信息成功")
except HTTPException:
raise
except Exception as e:
logger.error(f"获取资料预览信息失败: {str(e)}", exc_info=True)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="获取预览信息失败"
)
@router.get("/check-converter", response_model=ResponseModel[dict])
async def check_converter_status(
current_user: User = Depends(get_current_user),
):
"""
检查文档转换服务状态(用于调试)
Returns:
转换服务状态信息
"""
try:
import subprocess
# 检查 LibreOffice 是否安装
try:
result = subprocess.run(
['libreoffice', '--version'],
capture_output=True,
text=True,
timeout=5
)
libreoffice_installed = result.returncode == 0
libreoffice_version = result.stdout.strip() if libreoffice_installed else None
except Exception:
libreoffice_installed = False
libreoffice_version = None
return ResponseModel(
data={
"libreoffice_installed": libreoffice_installed,
"libreoffice_version": libreoffice_version,
"supported_formats": list(document_converter.SUPPORTED_FORMATS),
"converted_path": str(document_converter.converted_path),
},
message="转换服务状态检查完成"
)
except Exception as e:
logger.error(f"检查转换服务状态失败: {str(e)}", exc_info=True)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="检查转换服务状态失败"
)