Files
012-kaopeilian/backend/app/api/v1/endpoints/speech.py
yuliang_guo 406efa6f14
All checks were successful
continuous-integration/drone/push Build is passing
fix: 修复 endpoints 模块导入路径
- progress.py: get_db, get_current_user 从 app.core.deps 导入
- speech.py: 同上
- recommendation.py: 同上
2026-01-30 15:04:01 +08:00

145 lines
4.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
语音识别 API
"""
from typing import Optional
from fastapi import APIRouter, Depends, File, UploadFile, Form, HTTPException
from pydantic import BaseModel
from app.core.deps import get_db, get_current_user
from app.models.user import User
from app.services.speech_recognition import (
get_speech_recognition_service,
SpeechRecognitionError,
)
router = APIRouter()
class SpeechRecognitionRequest(BaseModel):
"""语音识别请求(文本形式)"""
text: str
session_id: Optional[int] = None
class SpeechRecognitionResponse(BaseModel):
"""语音识别响应"""
code: int = 200
message: str = "识别成功"
data: dict
@router.post("/recognize/text", response_model=SpeechRecognitionResponse)
async def recognize_text(
request: SpeechRecognitionRequest,
current_user: User = Depends(get_current_user),
):
"""
处理前端已识别的语音文本
用于 Web Speech API 识别后的文本传输
"""
service = get_speech_recognition_service("simple")
try:
text = await service.recognize_text(request.text)
return SpeechRecognitionResponse(
code=200,
message="识别成功",
data={
"text": text,
"session_id": request.session_id,
}
)
except SpeechRecognitionError as e:
raise HTTPException(status_code=400, detail=str(e))
@router.post("/recognize/audio", response_model=SpeechRecognitionResponse)
async def recognize_audio(
audio: UploadFile = File(...),
format: str = Form(default="wav"),
sample_rate: int = Form(default=16000),
engine: str = Form(default="aliyun"),
current_user: User = Depends(get_current_user),
):
"""
识别音频文件
支持的音频格式wav, pcm, mp3, ogg, opus
支持的识别引擎aliyun, xunfei
"""
# 读取音频数据
audio_data = await audio.read()
if len(audio_data) == 0:
raise HTTPException(status_code=400, detail="音频文件为空")
if len(audio_data) > 10 * 1024 * 1024: # 10MB 限制
raise HTTPException(status_code=400, detail="音频文件过大,最大支持 10MB")
service = get_speech_recognition_service(engine)
try:
text = await service.recognize_audio(audio_data, format, sample_rate)
return SpeechRecognitionResponse(
code=200,
message="识别成功",
data={
"text": text,
"format": format,
"sample_rate": sample_rate,
"engine": engine,
}
)
except SpeechRecognitionError as e:
raise HTTPException(status_code=400, detail=str(e))
except NotImplementedError as e:
raise HTTPException(status_code=501, detail=str(e))
@router.get("/engines")
async def get_available_engines(
current_user: User = Depends(get_current_user),
):
"""
获取可用的语音识别引擎列表
"""
import os
engines = [
{
"id": "simple",
"name": "浏览器语音识别",
"description": "使用浏览器内置的 Web Speech API 进行语音识别",
"available": True,
},
{
"id": "aliyun",
"name": "阿里云智能语音",
"description": "使用阿里云 NLS 服务进行高精度语音识别",
"available": all([
os.getenv("ALIYUN_ACCESS_KEY_ID"),
os.getenv("ALIYUN_ACCESS_KEY_SECRET"),
os.getenv("ALIYUN_NLS_APP_KEY"),
]),
},
{
"id": "xunfei",
"name": "讯飞语音识别",
"description": "使用讯飞 IAT 服务进行语音识别",
"available": all([
os.getenv("XUNFEI_APP_ID"),
os.getenv("XUNFEI_API_KEY"),
os.getenv("XUNFEI_API_SECRET"),
]),
},
]
return {
"code": 200,
"message": "获取成功",
"data": {
"engines": engines,
"default": "simple",
}
}