""" 文件预览API 提供课程资料的在线预览功能 支持MinIO和本地文件系统两种存储后端 """ import logging from pathlib import Path from typing import Optional from fastapi import APIRouter, Depends, HTTPException, status from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy import select from app.core.deps import get_db, get_current_user from app.schemas.base import ResponseModel from app.core.config import settings from app.models.user import User from app.models.course import CourseMaterial from app.services.document_converter import document_converter from app.services.storage_service import storage_service logger = logging.getLogger(__name__) router = APIRouter() class PreviewType: """预览类型常量 支持格式:TXT、Markdown、MDX、PDF、HTML、Excel、Word、CSV、VTT、Properties """ PDF = "pdf" TEXT = "text" HTML = "html" EXCEL_HTML = "excel_html" # Excel转HTML预览 VIDEO = "video" AUDIO = "audio" IMAGE = "image" DOWNLOAD = "download" # 文件类型到预览类型的映射 FILE_TYPE_MAPPING = { # PDF - 直接预览 '.pdf': PreviewType.PDF, # 文本 - 直接显示内容 '.txt': PreviewType.TEXT, '.md': PreviewType.TEXT, '.mdx': PreviewType.TEXT, '.csv': PreviewType.TEXT, '.vtt': PreviewType.TEXT, '.properties': PreviewType.TEXT, # HTML - 在iframe中预览 '.html': PreviewType.HTML, '.htm': PreviewType.HTML, } def get_preview_type(file_ext: str) -> str: """ 根据文件扩展名获取预览类型 Args: file_ext: 文件扩展名(带点,如 .pdf) Returns: 预览类型 """ file_ext_lower = file_ext.lower() # 直接映射的类型 if file_ext_lower in FILE_TYPE_MAPPING: return FILE_TYPE_MAPPING[file_ext_lower] # Excel文件使用HTML预览(避免分页问题) if file_ext_lower in {'.xlsx', '.xls'}: return PreviewType.EXCEL_HTML # 其他Office文档,需要转换为PDF预览 if document_converter.is_convertible(file_ext_lower): return PreviewType.PDF # 其他类型,只提供下载 return PreviewType.DOWNLOAD async def get_file_path_from_url(file_url: str) -> Optional[Path]: """ 从文件URL获取本地文件路径 支持MinIO和本地文件系统。如果文件在MinIO中,会先下载到本地缓存。 Args: file_url: 文件URL(如 /static/uploads/courses/1/xxx.pdf) Returns: 本地文件路径,如果无效返回None """ try: # 移除 /static/uploads/ 前缀 if file_url.startswith('/static/uploads/'): object_name = file_url.replace('/static/uploads/', '') # 使用storage_service获取文件路径(自动处理MinIO下载) return await storage_service.get_file_path(object_name) return None except Exception as e: logger.error(f"获取文件路径失败: {e}") return None @router.get("/material/{material_id}", response_model=ResponseModel[dict]) async def get_material_preview( material_id: int, current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db), ): """ 获取资料预览信息 Args: material_id: 资料ID Returns: 预览信息,包括预览类型、预览URL等 """ try: # 查询资料信息 stmt = select(CourseMaterial).where( CourseMaterial.id == material_id, CourseMaterial.is_deleted == False ) result = await db.execute(stmt) material = result.scalar_one_or_none() if not material: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="资料不存在" ) # TODO: 权限检查 - 确认当前用户是否有权访问该课程的资料 # 可以通过查询 position_courses 表和用户的岗位关系来判断 # 获取文件扩展名 file_ext = Path(material.name).suffix.lower() # 确定预览类型 preview_type = get_preview_type(file_ext) logger.info( f"资料预览请求 - material_id: {material_id}, " f"file_type: {file_ext}, preview_type: {preview_type}, " f"user_id: {current_user.id}" ) # 构建响应数据 response_data = { "preview_type": preview_type, "file_name": material.name, "original_url": material.file_url, "file_size": material.file_size, } # 根据预览类型处理 if preview_type == PreviewType.TEXT: # 文本类型,读取文件内容 file_path = await get_file_path_from_url(material.file_url) if file_path and file_path.exists(): try: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() response_data["content"] = content response_data["preview_url"] = None except Exception as e: logger.error(f"读取文本文件失败: {str(e)}") # 读取失败,改为下载模式 response_data["preview_type"] = PreviewType.DOWNLOAD response_data["preview_url"] = material.file_url else: response_data["preview_type"] = PreviewType.DOWNLOAD response_data["preview_url"] = material.file_url elif preview_type == PreviewType.EXCEL_HTML: # Excel文件转换为HTML预览 file_path = await get_file_path_from_url(material.file_url) if file_path and file_path.exists(): converted_url = document_converter.convert_excel_to_html( str(file_path), material.course_id, material.id ) if converted_url: response_data["preview_url"] = converted_url response_data["preview_type"] = "html" # 前端使用html类型渲染 response_data["is_converted"] = True else: logger.warning(f"Excel转HTML失败,改为下载模式 - material_id: {material_id}") response_data["preview_type"] = PreviewType.DOWNLOAD response_data["preview_url"] = material.file_url response_data["is_converted"] = False else: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="文件不存在" ) elif preview_type == PreviewType.PDF and document_converter.is_convertible(file_ext): # Office文档,需要转换为PDF file_path = await get_file_path_from_url(material.file_url) if file_path and file_path.exists(): # 执行转换 converted_url = document_converter.convert_to_pdf( str(file_path), material.course_id, material.id ) if converted_url: response_data["preview_url"] = converted_url response_data["is_converted"] = True else: # 转换失败,改为下载模式 logger.warning(f"文档转换失败,改为下载模式 - material_id: {material_id}") response_data["preview_type"] = PreviewType.DOWNLOAD response_data["preview_url"] = material.file_url response_data["is_converted"] = False else: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="文件不存在" ) else: # 其他类型,直接返回原始URL response_data["preview_url"] = material.file_url return ResponseModel(data=response_data, message="获取预览信息成功") except HTTPException: raise except Exception as e: logger.error(f"获取资料预览信息失败: {str(e)}", exc_info=True) raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="获取预览信息失败" ) @router.get("/check-converter", response_model=ResponseModel[dict]) async def check_converter_status( current_user: User = Depends(get_current_user), ): """ 检查文档转换服务状态(用于调试) Returns: 转换服务状态信息 """ try: import subprocess # 检查 LibreOffice 是否安装 try: result = subprocess.run( ['libreoffice', '--version'], capture_output=True, text=True, timeout=5 ) libreoffice_installed = result.returncode == 0 libreoffice_version = result.stdout.strip() if libreoffice_installed else None except Exception: libreoffice_installed = False libreoffice_version = None return ResponseModel( data={ "libreoffice_installed": libreoffice_installed, "libreoffice_version": libreoffice_version, "supported_formats": list(document_converter.SUPPORTED_FORMATS), "converted_path": str(document_converter.converted_path), }, message="转换服务状态检查完成" ) except Exception as e: logger.error(f"检查转换服务状态失败: {str(e)}", exc_info=True) raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="检查转换服务状态失败" )