All checks were successful
continuous-integration/drone/push Build is passing
- progress.py: get_db, get_current_user 从 app.core.deps 导入 - speech.py: 同上 - recommendation.py: 同上
145 lines
4.1 KiB
Python
145 lines
4.1 KiB
Python
"""
|
||
语音识别 API
|
||
"""
|
||
from typing import Optional
|
||
from fastapi import APIRouter, Depends, File, UploadFile, Form, HTTPException
|
||
from pydantic import BaseModel
|
||
|
||
from app.core.deps import get_db, get_current_user
|
||
from app.models.user import User
|
||
from app.services.speech_recognition import (
|
||
get_speech_recognition_service,
|
||
SpeechRecognitionError,
|
||
)
|
||
|
||
router = APIRouter()
|
||
|
||
|
||
class SpeechRecognitionRequest(BaseModel):
|
||
"""语音识别请求(文本形式)"""
|
||
text: str
|
||
session_id: Optional[int] = None
|
||
|
||
|
||
class SpeechRecognitionResponse(BaseModel):
|
||
"""语音识别响应"""
|
||
code: int = 200
|
||
message: str = "识别成功"
|
||
data: dict
|
||
|
||
|
||
@router.post("/recognize/text", response_model=SpeechRecognitionResponse)
|
||
async def recognize_text(
|
||
request: SpeechRecognitionRequest,
|
||
current_user: User = Depends(get_current_user),
|
||
):
|
||
"""
|
||
处理前端已识别的语音文本
|
||
用于 Web Speech API 识别后的文本传输
|
||
"""
|
||
service = get_speech_recognition_service("simple")
|
||
|
||
try:
|
||
text = await service.recognize_text(request.text)
|
||
return SpeechRecognitionResponse(
|
||
code=200,
|
||
message="识别成功",
|
||
data={
|
||
"text": text,
|
||
"session_id": request.session_id,
|
||
}
|
||
)
|
||
except SpeechRecognitionError as e:
|
||
raise HTTPException(status_code=400, detail=str(e))
|
||
|
||
|
||
@router.post("/recognize/audio", response_model=SpeechRecognitionResponse)
|
||
async def recognize_audio(
|
||
audio: UploadFile = File(...),
|
||
format: str = Form(default="wav"),
|
||
sample_rate: int = Form(default=16000),
|
||
engine: str = Form(default="aliyun"),
|
||
current_user: User = Depends(get_current_user),
|
||
):
|
||
"""
|
||
识别音频文件
|
||
|
||
支持的音频格式:wav, pcm, mp3, ogg, opus
|
||
支持的识别引擎:aliyun, xunfei
|
||
"""
|
||
# 读取音频数据
|
||
audio_data = await audio.read()
|
||
|
||
if len(audio_data) == 0:
|
||
raise HTTPException(status_code=400, detail="音频文件为空")
|
||
|
||
if len(audio_data) > 10 * 1024 * 1024: # 10MB 限制
|
||
raise HTTPException(status_code=400, detail="音频文件过大,最大支持 10MB")
|
||
|
||
service = get_speech_recognition_service(engine)
|
||
|
||
try:
|
||
text = await service.recognize_audio(audio_data, format, sample_rate)
|
||
return SpeechRecognitionResponse(
|
||
code=200,
|
||
message="识别成功",
|
||
data={
|
||
"text": text,
|
||
"format": format,
|
||
"sample_rate": sample_rate,
|
||
"engine": engine,
|
||
}
|
||
)
|
||
except SpeechRecognitionError as e:
|
||
raise HTTPException(status_code=400, detail=str(e))
|
||
except NotImplementedError as e:
|
||
raise HTTPException(status_code=501, detail=str(e))
|
||
|
||
|
||
@router.get("/engines")
|
||
async def get_available_engines(
|
||
current_user: User = Depends(get_current_user),
|
||
):
|
||
"""
|
||
获取可用的语音识别引擎列表
|
||
"""
|
||
import os
|
||
|
||
engines = [
|
||
{
|
||
"id": "simple",
|
||
"name": "浏览器语音识别",
|
||
"description": "使用浏览器内置的 Web Speech API 进行语音识别",
|
||
"available": True,
|
||
},
|
||
{
|
||
"id": "aliyun",
|
||
"name": "阿里云智能语音",
|
||
"description": "使用阿里云 NLS 服务进行高精度语音识别",
|
||
"available": all([
|
||
os.getenv("ALIYUN_ACCESS_KEY_ID"),
|
||
os.getenv("ALIYUN_ACCESS_KEY_SECRET"),
|
||
os.getenv("ALIYUN_NLS_APP_KEY"),
|
||
]),
|
||
},
|
||
{
|
||
"id": "xunfei",
|
||
"name": "讯飞语音识别",
|
||
"description": "使用讯飞 IAT 服务进行语音识别",
|
||
"available": all([
|
||
os.getenv("XUNFEI_APP_ID"),
|
||
os.getenv("XUNFEI_API_KEY"),
|
||
os.getenv("XUNFEI_API_SECRET"),
|
||
]),
|
||
},
|
||
]
|
||
|
||
return {
|
||
"code": 200,
|
||
"message": "获取成功",
|
||
"data": {
|
||
"engines": engines,
|
||
"default": "simple",
|
||
}
|
||
}
|