feat: 集成MinIO对象存储服务
All checks were successful
continuous-integration/drone/push Build is passing

- 新增storage_service.py封装MinIO操作
- 修改upload.py使用storage_service上传文件
- 修改course_service.py使用storage_service删除文件
- 适配preview.py支持从MinIO获取文件
- 适配knowledge_analysis_v2.py支持MinIO存储
- 在config.py添加MinIO配置项
- 添加minio依赖到requirements.txt

支持特性:
- 自动降级到本地存储(MinIO不可用时)
- 保持URL格式兼容(/static/uploads/)
- 文件自动缓存到本地(用于预览和分析)

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
yuliang_guo
2026-02-03 14:06:22 +08:00
parent fca82e2d44
commit 2f47193059
13 changed files with 1071 additions and 629 deletions

View File

@@ -0,0 +1,422 @@
"""
统一文件存储服务
支持MinIO对象存储兼容本地文件系统
使用方式:
from app.services.storage_service import storage_service
# 上传文件
file_url = await storage_service.upload(file_data, "courses/1/doc.pdf")
# 下载文件
file_data = await storage_service.download("courses/1/doc.pdf")
# 删除文件
await storage_service.delete("courses/1/doc.pdf")
"""
import os
import io
import logging
from pathlib import Path
from typing import Optional, Union, BinaryIO
from datetime import timedelta
from minio import Minio
from minio.error import S3Error
from app.core.config import settings
logger = logging.getLogger(__name__)
class StorageService:
"""
统一文件存储服务
支持两种存储后端:
1. MinIO对象存储推荐生产环境
2. 本地文件系统开发环境或MinIO不可用时的降级方案
"""
def __init__(self):
self._client: Optional[Minio] = None
self._initialized = False
self._use_minio = False
def _ensure_initialized(self):
"""确保服务已初始化"""
if self._initialized:
return
self._initialized = True
# 检查是否启用MinIO
if not settings.MINIO_ENABLED:
logger.info("MinIO未启用使用本地文件存储")
self._use_minio = False
return
try:
self._client = Minio(
settings.MINIO_ENDPOINT,
access_key=settings.MINIO_ACCESS_KEY,
secret_key=settings.MINIO_SECRET_KEY,
secure=settings.MINIO_SECURE,
)
# 验证连接并确保bucket存在
bucket_name = self._get_bucket_name()
if not self._client.bucket_exists(bucket_name):
self._client.make_bucket(bucket_name)
logger.info(f"创建MinIO bucket: {bucket_name}")
# 设置bucket策略为公开读取
self._set_bucket_public_read(bucket_name)
self._use_minio = True
logger.info(f"MinIO存储服务初始化成功 - endpoint: {settings.MINIO_ENDPOINT}, bucket: {bucket_name}")
except Exception as e:
logger.warning(f"MinIO初始化失败降级为本地存储: {e}")
self._use_minio = False
def _get_bucket_name(self) -> str:
"""获取当前租户的bucket名称"""
return f"kpl-{settings.TENANT_CODE}"
def _set_bucket_public_read(self, bucket_name: str):
"""设置bucket为公开读取"""
try:
# 设置匿名读取策略
policy = {
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {"AWS": "*"},
"Action": ["s3:GetObject"],
"Resource": [f"arn:aws:s3:::{bucket_name}/*"]
}
]
}
import json
self._client.set_bucket_policy(bucket_name, json.dumps(policy))
except Exception as e:
logger.warning(f"设置bucket公开读取策略失败: {e}")
def _normalize_object_name(self, object_name: str) -> str:
"""标准化对象名称,移除前缀斜杠"""
if object_name.startswith('/'):
object_name = object_name[1:]
if object_name.startswith('static/uploads/'):
object_name = object_name.replace('static/uploads/', '')
return object_name
def _get_file_url(self, object_name: str) -> str:
"""获取文件访问URL"""
object_name = self._normalize_object_name(object_name)
# 统一返回 /static/uploads/ 格式的URL由Nginx代理到MinIO
return f"/static/uploads/{object_name}"
def _get_local_path(self, object_name: str) -> Path:
"""获取本地文件路径"""
object_name = self._normalize_object_name(object_name)
return Path(settings.UPLOAD_PATH) / object_name
async def upload(
self,
file_data: Union[bytes, BinaryIO],
object_name: str,
content_type: Optional[str] = None,
) -> str:
"""
上传文件
Args:
file_data: 文件数据bytes或文件对象
object_name: 对象名称(如 courses/1/doc.pdf
content_type: 文件MIME类型
Returns:
文件访问URL
"""
self._ensure_initialized()
object_name = self._normalize_object_name(object_name)
# 转换为bytes
if isinstance(file_data, bytes):
data = file_data
else:
data = file_data.read()
if self._use_minio:
return await self._upload_to_minio(data, object_name, content_type)
else:
return await self._upload_to_local(data, object_name)
async def _upload_to_minio(
self,
data: bytes,
object_name: str,
content_type: Optional[str] = None,
) -> str:
"""上传到MinIO"""
try:
bucket_name = self._get_bucket_name()
# 自动检测content_type
if not content_type:
content_type = self._guess_content_type(object_name)
self._client.put_object(
bucket_name,
object_name,
io.BytesIO(data),
length=len(data),
content_type=content_type,
)
file_url = self._get_file_url(object_name)
logger.info(f"文件上传到MinIO成功: {object_name} -> {file_url}")
return file_url
except S3Error as e:
logger.error(f"MinIO上传失败: {e}")
# 降级到本地存储
return await self._upload_to_local(data, object_name)
async def _upload_to_local(self, data: bytes, object_name: str) -> str:
"""上传到本地文件系统"""
try:
file_path = self._get_local_path(object_name)
file_path.parent.mkdir(parents=True, exist_ok=True)
with open(file_path, 'wb') as f:
f.write(data)
file_url = self._get_file_url(object_name)
logger.info(f"文件上传到本地成功: {object_name} -> {file_url}")
return file_url
except Exception as e:
logger.error(f"本地文件上传失败: {e}")
raise
async def download(self, object_name: str) -> Optional[bytes]:
"""
下载文件
Args:
object_name: 对象名称
Returns:
文件数据如果文件不存在返回None
"""
self._ensure_initialized()
object_name = self._normalize_object_name(object_name)
if self._use_minio:
return await self._download_from_minio(object_name)
else:
return await self._download_from_local(object_name)
async def _download_from_minio(self, object_name: str) -> Optional[bytes]:
"""从MinIO下载"""
try:
bucket_name = self._get_bucket_name()
response = self._client.get_object(bucket_name, object_name)
data = response.read()
response.close()
response.release_conn()
return data
except S3Error as e:
if e.code == 'NoSuchKey':
logger.warning(f"MinIO文件不存在: {object_name}")
# 尝试从本地读取(兼容迁移过渡期)
return await self._download_from_local(object_name)
logger.error(f"MinIO下载失败: {e}")
return None
async def _download_from_local(self, object_name: str) -> Optional[bytes]:
"""从本地文件系统下载"""
try:
file_path = self._get_local_path(object_name)
if not file_path.exists():
logger.warning(f"本地文件不存在: {file_path}")
return None
with open(file_path, 'rb') as f:
return f.read()
except Exception as e:
logger.error(f"本地文件下载失败: {e}")
return None
async def delete(self, object_name: str) -> bool:
"""
删除文件
Args:
object_name: 对象名称
Returns:
是否删除成功
"""
self._ensure_initialized()
object_name = self._normalize_object_name(object_name)
success = True
# MinIO删除
if self._use_minio:
try:
bucket_name = self._get_bucket_name()
self._client.remove_object(bucket_name, object_name)
logger.info(f"MinIO文件删除成功: {object_name}")
except S3Error as e:
if e.code != 'NoSuchKey':
logger.error(f"MinIO文件删除失败: {e}")
success = False
# 同时删除本地文件(确保彻底清理)
try:
file_path = self._get_local_path(object_name)
if file_path.exists():
os.remove(file_path)
logger.info(f"本地文件删除成功: {file_path}")
except Exception as e:
logger.warning(f"本地文件删除失败: {e}")
return success
async def exists(self, object_name: str) -> bool:
"""
检查文件是否存在
Args:
object_name: 对象名称
Returns:
文件是否存在
"""
self._ensure_initialized()
object_name = self._normalize_object_name(object_name)
if self._use_minio:
try:
bucket_name = self._get_bucket_name()
self._client.stat_object(bucket_name, object_name)
return True
except S3Error:
pass
# 检查本地文件
file_path = self._get_local_path(object_name)
return file_path.exists()
async def get_file_path(self, object_name: str) -> Optional[Path]:
"""
获取文件的本地路径(用于需要本地文件操作的场景)
如果文件在MinIO中会先下载到临时目录
Args:
object_name: 对象名称
Returns:
本地文件路径如果文件不存在返回None
"""
self._ensure_initialized()
object_name = self._normalize_object_name(object_name)
# 先检查本地是否存在
local_path = self._get_local_path(object_name)
if local_path.exists():
return local_path
# 如果MinIO启用尝试下载到本地缓存
if self._use_minio:
try:
data = await self._download_from_minio(object_name)
if data:
# 保存到本地缓存
local_path.parent.mkdir(parents=True, exist_ok=True)
with open(local_path, 'wb') as f:
f.write(data)
logger.info(f"从MinIO下载文件到本地缓存: {object_name}")
return local_path
except Exception as e:
logger.error(f"下载MinIO文件到本地失败: {e}")
return None
def get_presigned_url(self, object_name: str, expires: int = 3600) -> Optional[str]:
"""
获取预签名URL用于直接访问MinIO
Args:
object_name: 对象名称
expires: 过期时间(秒)
Returns:
预签名URL如果MinIO未启用返回None
"""
self._ensure_initialized()
if not self._use_minio:
return None
object_name = self._normalize_object_name(object_name)
try:
bucket_name = self._get_bucket_name()
url = self._client.presigned_get_object(
bucket_name,
object_name,
expires=timedelta(seconds=expires)
)
return url
except S3Error as e:
logger.error(f"获取预签名URL失败: {e}")
return None
def _guess_content_type(self, filename: str) -> str:
"""根据文件名猜测MIME类型"""
ext = filename.rsplit('.', 1)[-1].lower() if '.' in filename else ''
content_types = {
'pdf': 'application/pdf',
'doc': 'application/msword',
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'xls': 'application/vnd.ms-excel',
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'ppt': 'application/vnd.ms-powerpoint',
'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'txt': 'text/plain',
'md': 'text/markdown',
'html': 'text/html',
'htm': 'text/html',
'csv': 'text/csv',
'json': 'application/json',
'xml': 'application/xml',
'zip': 'application/zip',
'png': 'image/png',
'jpg': 'image/jpeg',
'jpeg': 'image/jpeg',
'gif': 'image/gif',
'webp': 'image/webp',
'mp3': 'audio/mpeg',
'wav': 'audio/wav',
'mp4': 'video/mp4',
'webm': 'video/webm',
}
return content_types.get(ext, 'application/octet-stream')
@property
def is_minio_enabled(self) -> bool:
"""检查MinIO是否启用"""
self._ensure_initialized()
return self._use_minio
# 全局单例
storage_service = StorageService()