""" 语音识别 API """ from typing import Optional from fastapi import APIRouter, Depends, File, UploadFile, Form, HTTPException from pydantic import BaseModel from app.core.deps import get_db, get_current_user from app.models.user import User from app.services.speech_recognition import ( get_speech_recognition_service, SpeechRecognitionError, ) router = APIRouter() class SpeechRecognitionRequest(BaseModel): """语音识别请求(文本形式)""" text: str session_id: Optional[int] = None class SpeechRecognitionResponse(BaseModel): """语音识别响应""" code: int = 200 message: str = "识别成功" data: dict @router.post("/recognize/text", response_model=SpeechRecognitionResponse) async def recognize_text( request: SpeechRecognitionRequest, current_user: User = Depends(get_current_user), ): """ 处理前端已识别的语音文本 用于 Web Speech API 识别后的文本传输 """ service = get_speech_recognition_service("simple") try: text = await service.recognize_text(request.text) return SpeechRecognitionResponse( code=200, message="识别成功", data={ "text": text, "session_id": request.session_id, } ) except SpeechRecognitionError as e: raise HTTPException(status_code=400, detail=str(e)) @router.post("/recognize/audio", response_model=SpeechRecognitionResponse) async def recognize_audio( audio: UploadFile = File(...), format: str = Form(default="wav"), sample_rate: int = Form(default=16000), engine: str = Form(default="aliyun"), current_user: User = Depends(get_current_user), ): """ 识别音频文件 支持的音频格式:wav, pcm, mp3, ogg, opus 支持的识别引擎:aliyun, xunfei """ # 读取音频数据 audio_data = await audio.read() if len(audio_data) == 0: raise HTTPException(status_code=400, detail="音频文件为空") if len(audio_data) > 10 * 1024 * 1024: # 10MB 限制 raise HTTPException(status_code=400, detail="音频文件过大,最大支持 10MB") service = get_speech_recognition_service(engine) try: text = await service.recognize_audio(audio_data, format, sample_rate) return SpeechRecognitionResponse( code=200, message="识别成功", data={ "text": text, "format": format, "sample_rate": sample_rate, "engine": engine, } ) except SpeechRecognitionError as e: raise HTTPException(status_code=400, detail=str(e)) except NotImplementedError as e: raise HTTPException(status_code=501, detail=str(e)) @router.get("/engines") async def get_available_engines( current_user: User = Depends(get_current_user), ): """ 获取可用的语音识别引擎列表 """ import os engines = [ { "id": "simple", "name": "浏览器语音识别", "description": "使用浏览器内置的 Web Speech API 进行语音识别", "available": True, }, { "id": "aliyun", "name": "阿里云智能语音", "description": "使用阿里云 NLS 服务进行高精度语音识别", "available": all([ os.getenv("ALIYUN_ACCESS_KEY_ID"), os.getenv("ALIYUN_ACCESS_KEY_SECRET"), os.getenv("ALIYUN_NLS_APP_KEY"), ]), }, { "id": "xunfei", "name": "讯飞语音识别", "description": "使用讯飞 IAT 服务进行语音识别", "available": all([ os.getenv("XUNFEI_APP_ID"), os.getenv("XUNFEI_API_KEY"), os.getenv("XUNFEI_API_SECRET"), ]), }, ] return { "code": 200, "message": "获取成功", "data": { "engines": engines, "default": "simple", } }