All checks were successful
continuous-integration/drone/push Build is passing
- 新增storage_service.py封装MinIO操作 - 修改upload.py使用storage_service上传文件 - 修改course_service.py使用storage_service删除文件 - 适配preview.py支持从MinIO获取文件 - 适配knowledge_analysis_v2.py支持MinIO存储 - 在config.py添加MinIO配置项 - 添加minio依赖到requirements.txt 支持特性: - 自动降级到本地存储(MinIO不可用时) - 保持URL格式兼容(/static/uploads/) - 文件自动缓存到本地(用于预览和分析) Co-authored-by: Cursor <cursoragent@cursor.com>
423 lines
14 KiB
Python
423 lines
14 KiB
Python
"""
|
||
统一文件存储服务
|
||
支持MinIO对象存储,兼容本地文件系统
|
||
|
||
使用方式:
|
||
from app.services.storage_service import storage_service
|
||
|
||
# 上传文件
|
||
file_url = await storage_service.upload(file_data, "courses/1/doc.pdf")
|
||
|
||
# 下载文件
|
||
file_data = await storage_service.download("courses/1/doc.pdf")
|
||
|
||
# 删除文件
|
||
await storage_service.delete("courses/1/doc.pdf")
|
||
"""
|
||
|
||
import os
|
||
import io
|
||
import logging
|
||
from pathlib import Path
|
||
from typing import Optional, Union, BinaryIO
|
||
from datetime import timedelta
|
||
|
||
from minio import Minio
|
||
from minio.error import S3Error
|
||
|
||
from app.core.config import settings
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class StorageService:
|
||
"""
|
||
统一文件存储服务
|
||
|
||
支持两种存储后端:
|
||
1. MinIO对象存储(推荐,生产环境)
|
||
2. 本地文件系统(开发环境或MinIO不可用时的降级方案)
|
||
"""
|
||
|
||
def __init__(self):
|
||
self._client: Optional[Minio] = None
|
||
self._initialized = False
|
||
self._use_minio = False
|
||
|
||
def _ensure_initialized(self):
|
||
"""确保服务已初始化"""
|
||
if self._initialized:
|
||
return
|
||
|
||
self._initialized = True
|
||
|
||
# 检查是否启用MinIO
|
||
if not settings.MINIO_ENABLED:
|
||
logger.info("MinIO未启用,使用本地文件存储")
|
||
self._use_minio = False
|
||
return
|
||
|
||
try:
|
||
self._client = Minio(
|
||
settings.MINIO_ENDPOINT,
|
||
access_key=settings.MINIO_ACCESS_KEY,
|
||
secret_key=settings.MINIO_SECRET_KEY,
|
||
secure=settings.MINIO_SECURE,
|
||
)
|
||
|
||
# 验证连接并确保bucket存在
|
||
bucket_name = self._get_bucket_name()
|
||
if not self._client.bucket_exists(bucket_name):
|
||
self._client.make_bucket(bucket_name)
|
||
logger.info(f"创建MinIO bucket: {bucket_name}")
|
||
|
||
# 设置bucket策略为公开读取
|
||
self._set_bucket_public_read(bucket_name)
|
||
|
||
self._use_minio = True
|
||
logger.info(f"MinIO存储服务初始化成功 - endpoint: {settings.MINIO_ENDPOINT}, bucket: {bucket_name}")
|
||
|
||
except Exception as e:
|
||
logger.warning(f"MinIO初始化失败,降级为本地存储: {e}")
|
||
self._use_minio = False
|
||
|
||
def _get_bucket_name(self) -> str:
|
||
"""获取当前租户的bucket名称"""
|
||
return f"kpl-{settings.TENANT_CODE}"
|
||
|
||
def _set_bucket_public_read(self, bucket_name: str):
|
||
"""设置bucket为公开读取"""
|
||
try:
|
||
# 设置匿名读取策略
|
||
policy = {
|
||
"Version": "2012-10-17",
|
||
"Statement": [
|
||
{
|
||
"Effect": "Allow",
|
||
"Principal": {"AWS": "*"},
|
||
"Action": ["s3:GetObject"],
|
||
"Resource": [f"arn:aws:s3:::{bucket_name}/*"]
|
||
}
|
||
]
|
||
}
|
||
import json
|
||
self._client.set_bucket_policy(bucket_name, json.dumps(policy))
|
||
except Exception as e:
|
||
logger.warning(f"设置bucket公开读取策略失败: {e}")
|
||
|
||
def _normalize_object_name(self, object_name: str) -> str:
|
||
"""标准化对象名称,移除前缀斜杠"""
|
||
if object_name.startswith('/'):
|
||
object_name = object_name[1:]
|
||
if object_name.startswith('static/uploads/'):
|
||
object_name = object_name.replace('static/uploads/', '')
|
||
return object_name
|
||
|
||
def _get_file_url(self, object_name: str) -> str:
|
||
"""获取文件访问URL"""
|
||
object_name = self._normalize_object_name(object_name)
|
||
# 统一返回 /static/uploads/ 格式的URL,由Nginx代理到MinIO
|
||
return f"/static/uploads/{object_name}"
|
||
|
||
def _get_local_path(self, object_name: str) -> Path:
|
||
"""获取本地文件路径"""
|
||
object_name = self._normalize_object_name(object_name)
|
||
return Path(settings.UPLOAD_PATH) / object_name
|
||
|
||
async def upload(
|
||
self,
|
||
file_data: Union[bytes, BinaryIO],
|
||
object_name: str,
|
||
content_type: Optional[str] = None,
|
||
) -> str:
|
||
"""
|
||
上传文件
|
||
|
||
Args:
|
||
file_data: 文件数据(bytes或文件对象)
|
||
object_name: 对象名称(如 courses/1/doc.pdf)
|
||
content_type: 文件MIME类型
|
||
|
||
Returns:
|
||
文件访问URL
|
||
"""
|
||
self._ensure_initialized()
|
||
object_name = self._normalize_object_name(object_name)
|
||
|
||
# 转换为bytes
|
||
if isinstance(file_data, bytes):
|
||
data = file_data
|
||
else:
|
||
data = file_data.read()
|
||
|
||
if self._use_minio:
|
||
return await self._upload_to_minio(data, object_name, content_type)
|
||
else:
|
||
return await self._upload_to_local(data, object_name)
|
||
|
||
async def _upload_to_minio(
|
||
self,
|
||
data: bytes,
|
||
object_name: str,
|
||
content_type: Optional[str] = None,
|
||
) -> str:
|
||
"""上传到MinIO"""
|
||
try:
|
||
bucket_name = self._get_bucket_name()
|
||
|
||
# 自动检测content_type
|
||
if not content_type:
|
||
content_type = self._guess_content_type(object_name)
|
||
|
||
self._client.put_object(
|
||
bucket_name,
|
||
object_name,
|
||
io.BytesIO(data),
|
||
length=len(data),
|
||
content_type=content_type,
|
||
)
|
||
|
||
file_url = self._get_file_url(object_name)
|
||
logger.info(f"文件上传到MinIO成功: {object_name} -> {file_url}")
|
||
return file_url
|
||
|
||
except S3Error as e:
|
||
logger.error(f"MinIO上传失败: {e}")
|
||
# 降级到本地存储
|
||
return await self._upload_to_local(data, object_name)
|
||
|
||
async def _upload_to_local(self, data: bytes, object_name: str) -> str:
|
||
"""上传到本地文件系统"""
|
||
try:
|
||
file_path = self._get_local_path(object_name)
|
||
file_path.parent.mkdir(parents=True, exist_ok=True)
|
||
|
||
with open(file_path, 'wb') as f:
|
||
f.write(data)
|
||
|
||
file_url = self._get_file_url(object_name)
|
||
logger.info(f"文件上传到本地成功: {object_name} -> {file_url}")
|
||
return file_url
|
||
|
||
except Exception as e:
|
||
logger.error(f"本地文件上传失败: {e}")
|
||
raise
|
||
|
||
async def download(self, object_name: str) -> Optional[bytes]:
|
||
"""
|
||
下载文件
|
||
|
||
Args:
|
||
object_name: 对象名称
|
||
|
||
Returns:
|
||
文件数据,如果文件不存在返回None
|
||
"""
|
||
self._ensure_initialized()
|
||
object_name = self._normalize_object_name(object_name)
|
||
|
||
if self._use_minio:
|
||
return await self._download_from_minio(object_name)
|
||
else:
|
||
return await self._download_from_local(object_name)
|
||
|
||
async def _download_from_minio(self, object_name: str) -> Optional[bytes]:
|
||
"""从MinIO下载"""
|
||
try:
|
||
bucket_name = self._get_bucket_name()
|
||
response = self._client.get_object(bucket_name, object_name)
|
||
data = response.read()
|
||
response.close()
|
||
response.release_conn()
|
||
return data
|
||
except S3Error as e:
|
||
if e.code == 'NoSuchKey':
|
||
logger.warning(f"MinIO文件不存在: {object_name}")
|
||
# 尝试从本地读取(兼容迁移过渡期)
|
||
return await self._download_from_local(object_name)
|
||
logger.error(f"MinIO下载失败: {e}")
|
||
return None
|
||
|
||
async def _download_from_local(self, object_name: str) -> Optional[bytes]:
|
||
"""从本地文件系统下载"""
|
||
try:
|
||
file_path = self._get_local_path(object_name)
|
||
if not file_path.exists():
|
||
logger.warning(f"本地文件不存在: {file_path}")
|
||
return None
|
||
|
||
with open(file_path, 'rb') as f:
|
||
return f.read()
|
||
except Exception as e:
|
||
logger.error(f"本地文件下载失败: {e}")
|
||
return None
|
||
|
||
async def delete(self, object_name: str) -> bool:
|
||
"""
|
||
删除文件
|
||
|
||
Args:
|
||
object_name: 对象名称
|
||
|
||
Returns:
|
||
是否删除成功
|
||
"""
|
||
self._ensure_initialized()
|
||
object_name = self._normalize_object_name(object_name)
|
||
|
||
success = True
|
||
|
||
# MinIO删除
|
||
if self._use_minio:
|
||
try:
|
||
bucket_name = self._get_bucket_name()
|
||
self._client.remove_object(bucket_name, object_name)
|
||
logger.info(f"MinIO文件删除成功: {object_name}")
|
||
except S3Error as e:
|
||
if e.code != 'NoSuchKey':
|
||
logger.error(f"MinIO文件删除失败: {e}")
|
||
success = False
|
||
|
||
# 同时删除本地文件(确保彻底清理)
|
||
try:
|
||
file_path = self._get_local_path(object_name)
|
||
if file_path.exists():
|
||
os.remove(file_path)
|
||
logger.info(f"本地文件删除成功: {file_path}")
|
||
except Exception as e:
|
||
logger.warning(f"本地文件删除失败: {e}")
|
||
|
||
return success
|
||
|
||
async def exists(self, object_name: str) -> bool:
|
||
"""
|
||
检查文件是否存在
|
||
|
||
Args:
|
||
object_name: 对象名称
|
||
|
||
Returns:
|
||
文件是否存在
|
||
"""
|
||
self._ensure_initialized()
|
||
object_name = self._normalize_object_name(object_name)
|
||
|
||
if self._use_minio:
|
||
try:
|
||
bucket_name = self._get_bucket_name()
|
||
self._client.stat_object(bucket_name, object_name)
|
||
return True
|
||
except S3Error:
|
||
pass
|
||
|
||
# 检查本地文件
|
||
file_path = self._get_local_path(object_name)
|
||
return file_path.exists()
|
||
|
||
async def get_file_path(self, object_name: str) -> Optional[Path]:
|
||
"""
|
||
获取文件的本地路径(用于需要本地文件操作的场景)
|
||
|
||
如果文件在MinIO中,会先下载到临时目录
|
||
|
||
Args:
|
||
object_name: 对象名称
|
||
|
||
Returns:
|
||
本地文件路径,如果文件不存在返回None
|
||
"""
|
||
self._ensure_initialized()
|
||
object_name = self._normalize_object_name(object_name)
|
||
|
||
# 先检查本地是否存在
|
||
local_path = self._get_local_path(object_name)
|
||
if local_path.exists():
|
||
return local_path
|
||
|
||
# 如果MinIO启用,尝试下载到本地缓存
|
||
if self._use_minio:
|
||
try:
|
||
data = await self._download_from_minio(object_name)
|
||
if data:
|
||
# 保存到本地缓存
|
||
local_path.parent.mkdir(parents=True, exist_ok=True)
|
||
with open(local_path, 'wb') as f:
|
||
f.write(data)
|
||
logger.info(f"从MinIO下载文件到本地缓存: {object_name}")
|
||
return local_path
|
||
except Exception as e:
|
||
logger.error(f"下载MinIO文件到本地失败: {e}")
|
||
|
||
return None
|
||
|
||
def get_presigned_url(self, object_name: str, expires: int = 3600) -> Optional[str]:
|
||
"""
|
||
获取预签名URL(用于直接访问MinIO)
|
||
|
||
Args:
|
||
object_name: 对象名称
|
||
expires: 过期时间(秒)
|
||
|
||
Returns:
|
||
预签名URL,如果MinIO未启用返回None
|
||
"""
|
||
self._ensure_initialized()
|
||
|
||
if not self._use_minio:
|
||
return None
|
||
|
||
object_name = self._normalize_object_name(object_name)
|
||
|
||
try:
|
||
bucket_name = self._get_bucket_name()
|
||
url = self._client.presigned_get_object(
|
||
bucket_name,
|
||
object_name,
|
||
expires=timedelta(seconds=expires)
|
||
)
|
||
return url
|
||
except S3Error as e:
|
||
logger.error(f"获取预签名URL失败: {e}")
|
||
return None
|
||
|
||
def _guess_content_type(self, filename: str) -> str:
|
||
"""根据文件名猜测MIME类型"""
|
||
ext = filename.rsplit('.', 1)[-1].lower() if '.' in filename else ''
|
||
content_types = {
|
||
'pdf': 'application/pdf',
|
||
'doc': 'application/msword',
|
||
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||
'xls': 'application/vnd.ms-excel',
|
||
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||
'ppt': 'application/vnd.ms-powerpoint',
|
||
'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||
'txt': 'text/plain',
|
||
'md': 'text/markdown',
|
||
'html': 'text/html',
|
||
'htm': 'text/html',
|
||
'csv': 'text/csv',
|
||
'json': 'application/json',
|
||
'xml': 'application/xml',
|
||
'zip': 'application/zip',
|
||
'png': 'image/png',
|
||
'jpg': 'image/jpeg',
|
||
'jpeg': 'image/jpeg',
|
||
'gif': 'image/gif',
|
||
'webp': 'image/webp',
|
||
'mp3': 'audio/mpeg',
|
||
'wav': 'audio/wav',
|
||
'mp4': 'video/mp4',
|
||
'webm': 'video/webm',
|
||
}
|
||
return content_types.get(ext, 'application/octet-stream')
|
||
|
||
@property
|
||
def is_minio_enabled(self) -> bool:
|
||
"""检查MinIO是否启用"""
|
||
self._ensure_initialized()
|
||
return self._use_minio
|
||
|
||
|
||
# 全局单例
|
||
storage_service = StorageService()
|