012-kaopeilian/backend/app/api/v1/preview.py

"""
文件预览API
提供课程资料的在线预览功能

支持MinIO和本地文件系统两种存储后端
"""
import logging
from pathlib import Path
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select

from app.core.deps import get_db, get_current_user
from app.schemas.base import ResponseModel
from app.core.config import settings
from app.models.user import User
from app.models.course import CourseMaterial
from app.services.document_converter import document_converter
from app.services.storage_service import storage_service

logger = logging.getLogger(__name__)
router = APIRouter()


class PreviewType:
    """预览类型常量
    支持格式：TXT、Markdown、MDX、PDF、HTML、Excel、Word、CSV、VTT、Properties
    """
    PDF = "pdf"
    TEXT = "text"
    HTML = "html"
    EXCEL_HTML = "excel_html"  # Excel转HTML预览
    VIDEO = "video"
    AUDIO = "audio"
    IMAGE = "image"
    DOWNLOAD = "download"


# 文件类型到预览类型的映射
FILE_TYPE_MAPPING = {
    # PDF - 直接预览
    '.pdf': PreviewType.PDF,

    # 文本 - 直接显示内容
    '.txt': PreviewType.TEXT,
    '.md': PreviewType.TEXT,
    '.mdx': PreviewType.TEXT,
    '.csv': PreviewType.TEXT,
    '.vtt': PreviewType.TEXT,
    '.properties': PreviewType.TEXT,

    # HTML - 在iframe中预览
    '.html': PreviewType.HTML,
    '.htm': PreviewType.HTML,
}


def get_preview_type(file_ext: str) -> str:
    """
    根据文件扩展名获取预览类型

    Args:
        file_ext: 文件扩展名（带点，如 .pdf）

    Returns:
        预览类型
    """
    file_ext_lower = file_ext.lower()

    # 直接映射的类型
    if file_ext_lower in FILE_TYPE_MAPPING:
        return FILE_TYPE_MAPPING[file_ext_lower]

    # Excel文件使用HTML预览（避免分页问题）
    if file_ext_lower in {'.xlsx', '.xls'}:
        return PreviewType.EXCEL_HTML

    # 其他Office文档，需要转换为PDF预览
    if document_converter.is_convertible(file_ext_lower):
        return PreviewType.PDF

    # 其他类型，只提供下载
    return PreviewType.DOWNLOAD


async def get_file_path_from_url(file_url: str) -> Optional[Path]:
    """
    从文件URL获取本地文件路径

    支持MinIO和本地文件系统。如果文件在MinIO中，会先下载到本地缓存。

    Args:
        file_url: 文件URL（如 /static/uploads/courses/1/xxx.pdf）

    Returns:
        本地文件路径，如果无效返回None
    """
    try:
        # 移除 /static/uploads/ 前缀
        if file_url.startswith('/static/uploads/'):
            object_name = file_url.replace('/static/uploads/', '')
            # 使用storage_service获取文件路径（自动处理MinIO下载）
            return await storage_service.get_file_path(object_name)
        return None
    except Exception as e:
        logger.error(f"获取文件路径失败: {e}")
        return None


@router.get("/material/{material_id}", response_model=ResponseModel[dict])
async def get_material_preview(
    material_id: int,
    current_user: User = Depends(get_current_user),
    db: AsyncSession = Depends(get_db),
):
    """
    获取资料预览信息

    Args:
        material_id: 资料ID

    Returns:
        预览信息，包括预览类型、预览URL等
    """
    try:
        # 查询资料信息
        stmt = select(CourseMaterial).where(
            CourseMaterial.id == material_id,
            CourseMaterial.is_deleted == False
        )
        result = await db.execute(stmt)
        material = result.scalar_one_or_none()

        if not material:
            raise HTTPException(
                status_code=status.HTTP_404_NOT_FOUND,
                detail="资料不存在"
            )

        # TODO: 权限检查 - 确认当前用户是否有权访问该课程的资料
        # 可以通过查询 position_courses 表和用户的岗位关系来判断

        # 获取文件扩展名
        file_ext = Path(material.name).suffix.lower()

        # 确定预览类型
        preview_type = get_preview_type(file_ext)

        logger.info(
            f"资料预览请求 - material_id: {material_id}, "
            f"file_type: {file_ext}, preview_type: {preview_type}, "
            f"user_id: {current_user.id}"
        )

        # 构建响应数据
        response_data = {
            "preview_type": preview_type,
            "file_name": material.name,
            "original_url": material.file_url,
            "file_size": material.file_size,
        }

        # 根据预览类型处理
        if preview_type == PreviewType.TEXT:
            # 文本类型，读取文件内容
            file_path = await get_file_path_from_url(material.file_url)
            if file_path and file_path.exists():
                try:
                    with open(file_path, 'r', encoding='utf-8') as f:
                        content = f.read()
                    response_data["content"] = content
                    response_data["preview_url"] = None
                except Exception as e:
                    logger.error(f"读取文本文件失败: {str(e)}")
                    # 读取失败，改为下载模式
                    response_data["preview_type"] = PreviewType.DOWNLOAD
                    response_data["preview_url"] = material.file_url
            else:
                response_data["preview_type"] = PreviewType.DOWNLOAD
                response_data["preview_url"] = material.file_url

        elif preview_type == PreviewType.EXCEL_HTML:
            # Excel文件转换为HTML预览
            file_path = await get_file_path_from_url(material.file_url)
            if file_path and file_path.exists():
                converted_url = document_converter.convert_excel_to_html(
                    str(file_path),
                    material.course_id,
                    material.id
                )
                if converted_url:
                    response_data["preview_url"] = converted_url
                    response_data["preview_type"] = "html"  # 前端使用html类型渲染
                    response_data["is_converted"] = True
                else:
                    logger.warning(f"Excel转HTML失败，改为下载模式 - material_id: {material_id}")
                    response_data["preview_type"] = PreviewType.DOWNLOAD
                    response_data["preview_url"] = material.file_url
                    response_data["is_converted"] = False
            else:
                raise HTTPException(
                    status_code=status.HTTP_404_NOT_FOUND,
                    detail="文件不存在"
                )

        elif preview_type == PreviewType.PDF and document_converter.is_convertible(file_ext):
            # Office文档，需要转换为PDF
            file_path = await get_file_path_from_url(material.file_url)
            if file_path and file_path.exists():
                # 执行转换
                converted_url = document_converter.convert_to_pdf(
                    str(file_path),
                    material.course_id,
                    material.id
                )
                if converted_url:
                    response_data["preview_url"] = converted_url
                    response_data["is_converted"] = True
                else:
                    # 转换失败，改为下载模式
                    logger.warning(f"文档转换失败，改为下载模式 - material_id: {material_id}")
                    response_data["preview_type"] = PreviewType.DOWNLOAD
                    response_data["preview_url"] = material.file_url
                    response_data["is_converted"] = False
            else:
                raise HTTPException(
                    status_code=status.HTTP_404_NOT_FOUND,
                    detail="文件不存在"
                )

        else:
            # 其他类型，直接返回原始URL
            response_data["preview_url"] = material.file_url

        return ResponseModel(data=response_data, message="获取预览信息成功")

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"获取资料预览信息失败: {str(e)}", exc_info=True)
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail="获取预览信息失败"
        )


@router.get("/check-converter", response_model=ResponseModel[dict])
async def check_converter_status(
    current_user: User = Depends(get_current_user),
):
    """
    检查文档转换服务状态（用于调试）

    Returns:
        转换服务状态信息
    """
    try:
        import subprocess

        # 检查 LibreOffice 是否安装
        try:
            result = subprocess.run(
                ['libreoffice', '--version'],
                capture_output=True,
                text=True,
                timeout=5
            )
            libreoffice_installed = result.returncode == 0
            libreoffice_version = result.stdout.strip() if libreoffice_installed else None
        except Exception:
            libreoffice_installed = False
            libreoffice_version = None

        return ResponseModel(
            data={
                "libreoffice_installed": libreoffice_installed,
                "libreoffice_version": libreoffice_version,
                "supported_formats": list(document_converter.SUPPORTED_FORMATS),
                "converted_path": str(document_converter.converted_path),
            },
            message="转换服务状态检查完成"
        )

    except Exception as e:
        logger.error(f"检查转换服务状态失败: {str(e)}", exc_info=True)
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail="检查转换服务状态失败"
        )