feat: 初始化考培练系统项目
- 从服务器拉取完整代码 - 按框架规范整理项目结构 - 配置 Drone CI 测试环境部署 - 包含后端(FastAPI)、前端(Vue3)、管理端 技术栈: Vue3 + TypeScript + FastAPI + MySQL
This commit is contained in:
151
backend/app/services/ai/__init__.py
Normal file
151
backend/app/services/ai/__init__.py
Normal file
@@ -0,0 +1,151 @@
|
||||
"""
|
||||
AI 服务模块
|
||||
|
||||
包含:
|
||||
- AIService: 本地 AI 服务(支持 4sapi + OpenRouter 降级)
|
||||
- LLM JSON Parser: 大模型 JSON 输出解析器
|
||||
- KnowledgeAnalysisServiceV2: 知识点分析服务(Python 原生实现)
|
||||
- ExamGeneratorService: 试题生成服务(Python 原生实现)
|
||||
- CourseChatServiceV2: 课程对话服务(Python 原生实现)
|
||||
- PracticeSceneService: 陪练场景准备服务(Python 原生实现)
|
||||
- AbilityAnalysisService: 智能工牌能力分析服务(Python 原生实现)
|
||||
- AnswerJudgeService: 答案判断服务(Python 原生实现)
|
||||
- PracticeAnalysisService: 陪练分析报告服务(Python 原生实现)
|
||||
"""
|
||||
|
||||
from .ai_service import (
|
||||
AIService,
|
||||
AIResponse,
|
||||
AIConfig,
|
||||
AIServiceError,
|
||||
AIProvider,
|
||||
DEFAULT_MODEL,
|
||||
MODEL_ANALYSIS,
|
||||
MODEL_CREATIVE,
|
||||
MODEL_IMAGE_GEN,
|
||||
quick_chat,
|
||||
)
|
||||
|
||||
from .llm_json_parser import (
|
||||
parse_llm_json,
|
||||
parse_with_fallback,
|
||||
safe_json_loads,
|
||||
clean_llm_output,
|
||||
diagnose_json_error,
|
||||
validate_json_schema,
|
||||
ParseResult,
|
||||
JSONParseError,
|
||||
JSONUnrecoverableError,
|
||||
)
|
||||
|
||||
from .knowledge_analysis_v2 import (
|
||||
KnowledgeAnalysisServiceV2,
|
||||
knowledge_analysis_service_v2,
|
||||
)
|
||||
|
||||
from .exam_generator_service import (
|
||||
ExamGeneratorService,
|
||||
ExamGeneratorConfig,
|
||||
exam_generator_service,
|
||||
generate_exam,
|
||||
)
|
||||
|
||||
from .course_chat_service import (
|
||||
CourseChatServiceV2,
|
||||
course_chat_service_v2,
|
||||
)
|
||||
|
||||
from .practice_scene_service import (
|
||||
PracticeSceneService,
|
||||
PracticeScene,
|
||||
PracticeSceneResult,
|
||||
practice_scene_service,
|
||||
prepare_practice_knowledge,
|
||||
)
|
||||
|
||||
from .ability_analysis_service import (
|
||||
AbilityAnalysisService,
|
||||
AbilityAnalysisResult,
|
||||
AbilityDimension,
|
||||
CourseRecommendation,
|
||||
ability_analysis_service,
|
||||
)
|
||||
|
||||
from .answer_judge_service import (
|
||||
AnswerJudgeService,
|
||||
JudgeResult,
|
||||
answer_judge_service,
|
||||
judge_answer,
|
||||
)
|
||||
|
||||
from .practice_analysis_service import (
|
||||
PracticeAnalysisService,
|
||||
PracticeAnalysisResult,
|
||||
ScoreBreakdownItem,
|
||||
AbilityDimensionItem,
|
||||
DialogueAnnotation,
|
||||
Suggestion,
|
||||
practice_analysis_service,
|
||||
analyze_practice_session,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# AI Service
|
||||
"AIService",
|
||||
"AIResponse",
|
||||
"AIConfig",
|
||||
"AIServiceError",
|
||||
"AIProvider",
|
||||
"DEFAULT_MODEL",
|
||||
"MODEL_ANALYSIS",
|
||||
"MODEL_CREATIVE",
|
||||
"MODEL_IMAGE_GEN",
|
||||
"quick_chat",
|
||||
# JSON Parser
|
||||
"parse_llm_json",
|
||||
"parse_with_fallback",
|
||||
"safe_json_loads",
|
||||
"clean_llm_output",
|
||||
"diagnose_json_error",
|
||||
"validate_json_schema",
|
||||
"ParseResult",
|
||||
"JSONParseError",
|
||||
"JSONUnrecoverableError",
|
||||
# Knowledge Analysis V2
|
||||
"KnowledgeAnalysisServiceV2",
|
||||
"knowledge_analysis_service_v2",
|
||||
# Exam Generator V2
|
||||
"ExamGeneratorService",
|
||||
"ExamGeneratorConfig",
|
||||
"exam_generator_service",
|
||||
"generate_exam",
|
||||
# Course Chat V2
|
||||
"CourseChatServiceV2",
|
||||
"course_chat_service_v2",
|
||||
# Practice Scene V2
|
||||
"PracticeSceneService",
|
||||
"PracticeScene",
|
||||
"PracticeSceneResult",
|
||||
"practice_scene_service",
|
||||
"prepare_practice_knowledge",
|
||||
# Ability Analysis V2
|
||||
"AbilityAnalysisService",
|
||||
"AbilityAnalysisResult",
|
||||
"AbilityDimension",
|
||||
"CourseRecommendation",
|
||||
"ability_analysis_service",
|
||||
# Answer Judge V2
|
||||
"AnswerJudgeService",
|
||||
"JudgeResult",
|
||||
"answer_judge_service",
|
||||
"judge_answer",
|
||||
# Practice Analysis V2
|
||||
"PracticeAnalysisService",
|
||||
"PracticeAnalysisResult",
|
||||
"ScoreBreakdownItem",
|
||||
"AbilityDimensionItem",
|
||||
"DialogueAnnotation",
|
||||
"Suggestion",
|
||||
"practice_analysis_service",
|
||||
"analyze_practice_session",
|
||||
]
|
||||
479
backend/app/services/ai/ability_analysis_service.py
Normal file
479
backend/app/services/ai/ability_analysis_service.py
Normal file
@@ -0,0 +1,479 @@
|
||||
"""
|
||||
智能工牌能力分析与课程推荐服务 - Python 原生实现
|
||||
|
||||
功能:
|
||||
- 分析员工与顾客的对话记录
|
||||
- 评估多维度能力得分
|
||||
- 基于能力短板推荐课程
|
||||
|
||||
提供稳定可靠的能力分析和课程推荐能力。
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.exceptions import ExternalServiceError
|
||||
|
||||
from .ai_service import AIService, AIResponse
|
||||
from .llm_json_parser import parse_with_fallback, clean_llm_output
|
||||
from .prompts.ability_analysis_prompts import (
|
||||
SYSTEM_PROMPT,
|
||||
USER_PROMPT,
|
||||
ABILITY_ANALYSIS_SCHEMA,
|
||||
ABILITY_DIMENSIONS,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ==================== 数据结构 ====================
|
||||
|
||||
@dataclass
|
||||
class AbilityDimension:
|
||||
"""能力维度评分"""
|
||||
name: str
|
||||
score: float
|
||||
feedback: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class CourseRecommendation:
|
||||
"""课程推荐"""
|
||||
course_id: int
|
||||
course_name: str
|
||||
recommendation_reason: str
|
||||
priority: str # high, medium, low
|
||||
match_score: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class AbilityAnalysisResult:
|
||||
"""能力分析结果"""
|
||||
success: bool
|
||||
total_score: float = 0.0
|
||||
ability_dimensions: List[AbilityDimension] = field(default_factory=list)
|
||||
course_recommendations: List[CourseRecommendation] = field(default_factory=list)
|
||||
ai_provider: str = ""
|
||||
ai_model: str = ""
|
||||
ai_tokens: int = 0
|
||||
ai_latency_ms: int = 0
|
||||
error: str = ""
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""转换为字典"""
|
||||
return {
|
||||
"success": self.success,
|
||||
"total_score": self.total_score,
|
||||
"ability_dimensions": [
|
||||
{"name": d.name, "score": d.score, "feedback": d.feedback}
|
||||
for d in self.ability_dimensions
|
||||
],
|
||||
"course_recommendations": [
|
||||
{
|
||||
"course_id": c.course_id,
|
||||
"course_name": c.course_name,
|
||||
"recommendation_reason": c.recommendation_reason,
|
||||
"priority": c.priority,
|
||||
"match_score": c.match_score,
|
||||
}
|
||||
for c in self.course_recommendations
|
||||
],
|
||||
"ai_provider": self.ai_provider,
|
||||
"ai_model": self.ai_model,
|
||||
"ai_tokens": self.ai_tokens,
|
||||
"ai_latency_ms": self.ai_latency_ms,
|
||||
"error": self.error,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class UserPositionInfo:
|
||||
"""用户岗位信息"""
|
||||
position_id: int
|
||||
position_name: str
|
||||
code: str
|
||||
description: str
|
||||
skills: Optional[Dict[str, Any]]
|
||||
level: str
|
||||
status: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class CourseInfo:
|
||||
"""课程信息"""
|
||||
id: int
|
||||
name: str
|
||||
description: str
|
||||
category: str
|
||||
tags: Optional[List[str]]
|
||||
difficulty_level: int
|
||||
duration_hours: float
|
||||
|
||||
|
||||
# ==================== 服务类 ====================
|
||||
|
||||
class AbilityAnalysisService:
|
||||
"""
|
||||
智能工牌能力分析服务
|
||||
|
||||
使用 Python 原生实现。
|
||||
|
||||
使用示例:
|
||||
```python
|
||||
service = AbilityAnalysisService()
|
||||
result = await service.analyze(
|
||||
db=db_session,
|
||||
user_id=1,
|
||||
dialogue_history="顾客:你好,我想了解一下你们的服务..."
|
||||
)
|
||||
print(result.total_score)
|
||||
print(result.course_recommendations)
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""初始化服务"""
|
||||
self.ai_service = AIService(module_code="ability_analysis")
|
||||
|
||||
async def analyze(
|
||||
self,
|
||||
db: AsyncSession,
|
||||
user_id: int,
|
||||
dialogue_history: str
|
||||
) -> AbilityAnalysisResult:
|
||||
"""
|
||||
分析员工能力并推荐课程
|
||||
|
||||
Args:
|
||||
db: 数据库会话(支持多租户,每个租户传入各自的会话)
|
||||
user_id: 用户ID
|
||||
dialogue_history: 对话记录
|
||||
|
||||
Returns:
|
||||
AbilityAnalysisResult 分析结果
|
||||
"""
|
||||
try:
|
||||
logger.info(f"开始能力分析 - user_id: {user_id}")
|
||||
|
||||
# 1. 验证输入
|
||||
if not dialogue_history or not dialogue_history.strip():
|
||||
return AbilityAnalysisResult(
|
||||
success=False,
|
||||
error="对话记录不能为空"
|
||||
)
|
||||
|
||||
# 2. 查询用户岗位信息
|
||||
user_positions = await self._get_user_positions(db, user_id)
|
||||
user_info_str = self._format_user_info(user_positions)
|
||||
|
||||
logger.info(f"用户岗位信息: {len(user_positions)} 个岗位")
|
||||
|
||||
# 3. 查询所有可选课程
|
||||
courses = await self._get_published_courses(db)
|
||||
courses_str = self._format_courses(courses)
|
||||
|
||||
logger.info(f"可选课程: {len(courses)} 门")
|
||||
|
||||
# 4. 调用 AI 分析
|
||||
ai_response = await self._call_ai_analysis(
|
||||
dialogue_history=dialogue_history,
|
||||
user_info=user_info_str,
|
||||
courses=courses_str
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"AI 分析完成 - provider: {ai_response.provider}, "
|
||||
f"tokens: {ai_response.total_tokens}, latency: {ai_response.latency_ms}ms"
|
||||
)
|
||||
|
||||
# 5. 解析 JSON 结果
|
||||
analysis_data = self._parse_analysis_result(ai_response.content, courses)
|
||||
|
||||
# 6. 构建返回结果
|
||||
result = AbilityAnalysisResult(
|
||||
success=True,
|
||||
total_score=analysis_data.get("total_score", 0),
|
||||
ability_dimensions=[
|
||||
AbilityDimension(
|
||||
name=d.get("name", ""),
|
||||
score=d.get("score", 0),
|
||||
feedback=d.get("feedback", "")
|
||||
)
|
||||
for d in analysis_data.get("ability_dimensions", [])
|
||||
],
|
||||
course_recommendations=[
|
||||
CourseRecommendation(
|
||||
course_id=c.get("course_id", 0),
|
||||
course_name=c.get("course_name", ""),
|
||||
recommendation_reason=c.get("recommendation_reason", ""),
|
||||
priority=c.get("priority", "medium"),
|
||||
match_score=c.get("match_score", 0)
|
||||
)
|
||||
for c in analysis_data.get("course_recommendations", [])
|
||||
],
|
||||
ai_provider=ai_response.provider,
|
||||
ai_model=ai_response.model,
|
||||
ai_tokens=ai_response.total_tokens,
|
||||
ai_latency_ms=ai_response.latency_ms,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"能力分析完成 - user_id: {user_id}, total_score: {result.total_score}, "
|
||||
f"recommendations: {len(result.course_recommendations)}"
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"能力分析失败 - user_id: {user_id}, error: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
return AbilityAnalysisResult(
|
||||
success=False,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
async def _get_user_positions(
|
||||
self,
|
||||
db: AsyncSession,
|
||||
user_id: int
|
||||
) -> List[UserPositionInfo]:
|
||||
"""
|
||||
查询用户的岗位信息
|
||||
|
||||
获取用户基本信息
|
||||
"""
|
||||
query = text("""
|
||||
SELECT
|
||||
p.id as position_id,
|
||||
p.name as position_name,
|
||||
p.code,
|
||||
p.description,
|
||||
p.skills,
|
||||
p.level,
|
||||
p.status
|
||||
FROM positions p
|
||||
INNER JOIN position_members pm ON p.id = pm.position_id
|
||||
WHERE pm.user_id = :user_id
|
||||
AND pm.is_deleted = 0
|
||||
AND p.is_deleted = 0
|
||||
""")
|
||||
|
||||
result = await db.execute(query, {"user_id": user_id})
|
||||
rows = result.fetchall()
|
||||
|
||||
positions = []
|
||||
for row in rows:
|
||||
# 解析 skills JSON
|
||||
skills = None
|
||||
if row.skills:
|
||||
if isinstance(row.skills, str):
|
||||
try:
|
||||
skills = json.loads(row.skills)
|
||||
except json.JSONDecodeError:
|
||||
skills = None
|
||||
else:
|
||||
skills = row.skills
|
||||
|
||||
positions.append(UserPositionInfo(
|
||||
position_id=row.position_id,
|
||||
position_name=row.position_name,
|
||||
code=row.code or "",
|
||||
description=row.description or "",
|
||||
skills=skills,
|
||||
level=row.level or "",
|
||||
status=row.status or ""
|
||||
))
|
||||
|
||||
return positions
|
||||
|
||||
async def _get_published_courses(self, db: AsyncSession) -> List[CourseInfo]:
|
||||
"""
|
||||
查询所有已发布的课程
|
||||
|
||||
获取所有课程列表
|
||||
"""
|
||||
query = text("""
|
||||
SELECT
|
||||
id,
|
||||
name,
|
||||
description,
|
||||
category,
|
||||
tags,
|
||||
difficulty_level,
|
||||
duration_hours
|
||||
FROM courses
|
||||
WHERE status = 'published'
|
||||
AND is_deleted = FALSE
|
||||
ORDER BY sort_order
|
||||
""")
|
||||
|
||||
result = await db.execute(query)
|
||||
rows = result.fetchall()
|
||||
|
||||
courses = []
|
||||
for row in rows:
|
||||
# 解析 tags JSON
|
||||
tags = None
|
||||
if row.tags:
|
||||
if isinstance(row.tags, str):
|
||||
try:
|
||||
tags = json.loads(row.tags)
|
||||
except json.JSONDecodeError:
|
||||
tags = None
|
||||
else:
|
||||
tags = row.tags
|
||||
|
||||
courses.append(CourseInfo(
|
||||
id=row.id,
|
||||
name=row.name,
|
||||
description=row.description or "",
|
||||
category=row.category or "",
|
||||
tags=tags,
|
||||
difficulty_level=row.difficulty_level or 3,
|
||||
duration_hours=row.duration_hours or 0
|
||||
))
|
||||
|
||||
return courses
|
||||
|
||||
def _format_user_info(self, positions: List[UserPositionInfo]) -> str:
|
||||
"""格式化用户岗位信息为文本"""
|
||||
if not positions:
|
||||
return "暂无岗位信息"
|
||||
|
||||
lines = []
|
||||
for p in positions:
|
||||
info = f"- 岗位:{p.position_name}({p.code})"
|
||||
if p.level:
|
||||
info += f",级别:{p.level}"
|
||||
if p.description:
|
||||
info += f"\n 描述:{p.description}"
|
||||
if p.skills:
|
||||
skills_str = json.dumps(p.skills, ensure_ascii=False)
|
||||
info += f"\n 核心技能:{skills_str}"
|
||||
lines.append(info)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def _format_courses(self, courses: List[CourseInfo]) -> str:
|
||||
"""格式化课程列表为文本"""
|
||||
if not courses:
|
||||
return "暂无可选课程"
|
||||
|
||||
lines = []
|
||||
for c in courses:
|
||||
info = f"- ID: {c.id}, 课程名称: {c.name}"
|
||||
if c.category:
|
||||
info += f", 分类: {c.category}"
|
||||
if c.difficulty_level:
|
||||
info += f", 难度: {c.difficulty_level}"
|
||||
if c.duration_hours:
|
||||
info += f", 时长: {c.duration_hours}小时"
|
||||
if c.description:
|
||||
# 截断过长的描述
|
||||
desc = c.description[:100] + "..." if len(c.description) > 100 else c.description
|
||||
info += f"\n 描述: {desc}"
|
||||
lines.append(info)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
async def _call_ai_analysis(
|
||||
self,
|
||||
dialogue_history: str,
|
||||
user_info: str,
|
||||
courses: str
|
||||
) -> AIResponse:
|
||||
"""调用 AI 进行能力分析"""
|
||||
# 构建用户消息
|
||||
user_message = USER_PROMPT.format(
|
||||
dialogue_history=dialogue_history,
|
||||
user_info=user_info,
|
||||
courses=courses
|
||||
)
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": SYSTEM_PROMPT},
|
||||
{"role": "user", "content": user_message}
|
||||
]
|
||||
|
||||
# 调用 AI(自动支持 4sapi → OpenRouter 降级)
|
||||
response = await self.ai_service.chat(
|
||||
messages=messages,
|
||||
temperature=0.7, # 保持一定创意性
|
||||
prompt_name="ability_analysis"
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
def _parse_analysis_result(
|
||||
self,
|
||||
ai_output: str,
|
||||
courses: List[CourseInfo]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
解析 AI 输出的分析结果 JSON
|
||||
|
||||
使用 LLM JSON Parser 进行多层兜底解析
|
||||
"""
|
||||
# 先清洗输出
|
||||
cleaned_output, rules = clean_llm_output(ai_output)
|
||||
if rules:
|
||||
logger.debug(f"AI 输出已清洗: {rules}")
|
||||
|
||||
# 使用带 Schema 校验的解析
|
||||
parsed = parse_with_fallback(
|
||||
cleaned_output,
|
||||
schema=ABILITY_ANALYSIS_SCHEMA,
|
||||
default={"analysis": {}},
|
||||
validate_schema=True,
|
||||
on_error="default"
|
||||
)
|
||||
|
||||
# 提取 analysis 部分
|
||||
analysis = parsed.get("analysis", {})
|
||||
|
||||
# 后处理:验证课程推荐的有效性
|
||||
valid_course_ids = {c.id for c in courses}
|
||||
valid_recommendations = []
|
||||
|
||||
for rec in analysis.get("course_recommendations", []):
|
||||
course_id = rec.get("course_id")
|
||||
if course_id in valid_course_ids:
|
||||
valid_recommendations.append(rec)
|
||||
else:
|
||||
logger.warning(f"推荐的课程ID不存在: {course_id}")
|
||||
|
||||
analysis["course_recommendations"] = valid_recommendations
|
||||
|
||||
# 确保能力维度完整
|
||||
existing_dims = {d.get("name") for d in analysis.get("ability_dimensions", [])}
|
||||
for dim_name in ABILITY_DIMENSIONS:
|
||||
if dim_name not in existing_dims:
|
||||
logger.warning(f"缺少能力维度: {dim_name},使用默认值")
|
||||
analysis.setdefault("ability_dimensions", []).append({
|
||||
"name": dim_name,
|
||||
"score": 70,
|
||||
"feedback": "暂无具体评价"
|
||||
})
|
||||
|
||||
return analysis
|
||||
|
||||
|
||||
# ==================== 全局实例 ====================
|
||||
|
||||
ability_analysis_service = AbilityAnalysisService()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
747
backend/app/services/ai/ai_service.py
Normal file
747
backend/app/services/ai/ai_service.py
Normal file
@@ -0,0 +1,747 @@
|
||||
"""
|
||||
本地 AI 服务 - 遵循瑞小美 AI 接入规范
|
||||
|
||||
功能:
|
||||
- 支持 4sapi.com(首选)和 OpenRouter(备选)自动降级
|
||||
- 统一的请求/响应格式
|
||||
- 调用日志记录
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, AsyncGenerator, Dict, List, Optional, Union
|
||||
from enum import Enum
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AIProvider(Enum):
|
||||
"""AI 服务商"""
|
||||
PRIMARY = "4sapi" # 首选:4sapi.com
|
||||
FALLBACK = "openrouter" # 备选:OpenRouter
|
||||
|
||||
|
||||
@dataclass
|
||||
class AIResponse:
|
||||
"""AI 响应结果"""
|
||||
content: str # AI 回复内容
|
||||
model: str = "" # 使用的模型
|
||||
provider: str = "" # 实际使用的服务商
|
||||
input_tokens: int = 0 # 输入 token 数
|
||||
output_tokens: int = 0 # 输出 token 数
|
||||
total_tokens: int = 0 # 总 token 数
|
||||
cost: float = 0.0 # 费用(美元)
|
||||
latency_ms: int = 0 # 响应延迟(毫秒)
|
||||
raw_response: Dict[str, Any] = field(default_factory=dict) # 原始响应
|
||||
images: List[str] = field(default_factory=list) # 图像生成结果
|
||||
annotations: Dict[str, Any] = field(default_factory=dict) # PDF 解析注释
|
||||
|
||||
|
||||
@dataclass
|
||||
class AIConfig:
|
||||
"""AI 服务配置"""
|
||||
primary_api_key: str # 通用 Key(Gemini/DeepSeek 等)
|
||||
anthropic_api_key: str = "" # Claude 专属 Key
|
||||
primary_base_url: str = "https://4sapi.com/v1"
|
||||
fallback_api_key: str = ""
|
||||
fallback_base_url: str = "https://openrouter.ai/api/v1"
|
||||
default_model: str = "claude-opus-4-5-20251101-thinking" # 默认使用最强模型
|
||||
timeout: float = 120.0
|
||||
max_retries: int = 2
|
||||
|
||||
|
||||
# Claude 模型列表(需要使用 anthropic_api_key)
|
||||
CLAUDE_MODELS = [
|
||||
"claude-opus-4-5-20251101-thinking",
|
||||
"claude-opus-4-5-20251101",
|
||||
"claude-sonnet-4-20250514",
|
||||
"claude-3-opus",
|
||||
"claude-3-sonnet",
|
||||
"claude-3-haiku",
|
||||
]
|
||||
|
||||
|
||||
def is_claude_model(model: str) -> bool:
|
||||
"""判断是否为 Claude 模型"""
|
||||
model_lower = model.lower()
|
||||
return any(claude in model_lower for claude in ["claude", "anthropic"])
|
||||
|
||||
|
||||
# 模型名称映射:4sapi -> OpenRouter
|
||||
MODEL_MAPPING = {
|
||||
# 4sapi 使用简短名称,OpenRouter 使用完整路径
|
||||
"gemini-3-flash-preview": "google/gemini-3-flash-preview",
|
||||
"gemini-3-pro-preview": "google/gemini-3-pro-preview",
|
||||
"claude-opus-4-5-20251101-thinking": "anthropic/claude-opus-4.5",
|
||||
"gemini-2.5-flash-image-preview": "google/gemini-2.0-flash-exp:free",
|
||||
}
|
||||
|
||||
# 反向映射:OpenRouter -> 4sapi
|
||||
MODEL_MAPPING_REVERSE = {v: k for k, v in MODEL_MAPPING.items()}
|
||||
|
||||
|
||||
class AIServiceError(Exception):
|
||||
"""AI 服务错误"""
|
||||
def __init__(self, message: str, provider: str = "", status_code: int = 0):
|
||||
super().__init__(message)
|
||||
self.provider = provider
|
||||
self.status_code = status_code
|
||||
|
||||
|
||||
class AIService:
|
||||
"""
|
||||
本地 AI 服务
|
||||
|
||||
遵循瑞小美 AI 接入规范:
|
||||
- 首选 4sapi.com,失败自动降级到 OpenRouter
|
||||
- 统一的响应格式
|
||||
- 自动模型名称转换
|
||||
|
||||
使用示例:
|
||||
```python
|
||||
ai = AIService(module_code="knowledge_analysis")
|
||||
response = await ai.chat(
|
||||
messages=[
|
||||
{"role": "system", "content": "你是助手"},
|
||||
{"role": "user", "content": "你好"}
|
||||
],
|
||||
prompt_name="greeting"
|
||||
)
|
||||
print(response.content)
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
module_code: str = "default",
|
||||
config: Optional[AIConfig] = None,
|
||||
db_session: Any = None
|
||||
):
|
||||
"""
|
||||
初始化 AI 服务
|
||||
|
||||
配置加载优先级(遵循瑞小美 AI 接入规范):
|
||||
1. 显式传入的 config 参数
|
||||
2. 数据库 ai_config 表(推荐)
|
||||
3. 环境变量(fallback)
|
||||
|
||||
Args:
|
||||
module_code: 模块标识,用于统计
|
||||
config: AI 配置,None 则从数据库/环境变量读取
|
||||
db_session: 数据库会话,用于记录调用日志和读取配置
|
||||
"""
|
||||
self.module_code = module_code
|
||||
self.db_session = db_session
|
||||
self.config = config or self._load_config(db_session)
|
||||
|
||||
logger.info(f"AIService 初始化: module={module_code}, primary={self.config.primary_base_url}")
|
||||
|
||||
def _load_config(self, db_session: Any) -> AIConfig:
|
||||
"""
|
||||
加载配置
|
||||
|
||||
配置加载优先级(遵循瑞小美 AI 接入规范):
|
||||
1. 管理库 tenant_configs 表(推荐,通过 DynamicConfig)
|
||||
2. 环境变量(fallback)
|
||||
|
||||
Args:
|
||||
db_session: 数据库会话(可选,用于日志记录)
|
||||
|
||||
Returns:
|
||||
AIConfig 配置对象
|
||||
"""
|
||||
# 优先从管理库加载(同步方式)
|
||||
try:
|
||||
config = self._load_config_from_admin_db()
|
||||
if config:
|
||||
logger.info("✅ AI 配置已从管理库(tenant_configs)加载")
|
||||
return config
|
||||
except Exception as e:
|
||||
logger.debug(f"从管理库加载 AI 配置失败: {e}")
|
||||
|
||||
# Fallback 到环境变量
|
||||
logger.info("AI 配置从环境变量加载")
|
||||
return self._load_config_from_env()
|
||||
|
||||
def _load_config_from_admin_db(self) -> Optional[AIConfig]:
|
||||
"""
|
||||
从管理库 tenant_configs 表加载配置
|
||||
|
||||
使用同步方式直接查询 kaopeilian_admin.tenant_configs 表
|
||||
|
||||
Returns:
|
||||
AIConfig 配置对象,如果无数据则返回 None
|
||||
"""
|
||||
import os
|
||||
|
||||
# 获取当前租户编码
|
||||
tenant_code = os.getenv("TENANT_CODE", "demo")
|
||||
|
||||
# 获取管理库连接信息
|
||||
admin_db_host = os.getenv("ADMIN_DB_HOST", "prod-mysql")
|
||||
admin_db_port = int(os.getenv("ADMIN_DB_PORT", "3306"))
|
||||
admin_db_user = os.getenv("ADMIN_DB_USER", "root")
|
||||
admin_db_password = os.getenv("ADMIN_DB_PASSWORD", "")
|
||||
admin_db_name = os.getenv("ADMIN_DB_NAME", "kaopeilian_admin")
|
||||
|
||||
if not admin_db_password:
|
||||
logger.debug("ADMIN_DB_PASSWORD 未配置,跳过管理库配置加载")
|
||||
return None
|
||||
|
||||
try:
|
||||
from sqlalchemy import create_engine, text
|
||||
import urllib.parse
|
||||
|
||||
# 构建连接 URL
|
||||
encoded_password = urllib.parse.quote_plus(admin_db_password)
|
||||
admin_db_url = f"mysql+pymysql://{admin_db_user}:{encoded_password}@{admin_db_host}:{admin_db_port}/{admin_db_name}?charset=utf8mb4"
|
||||
|
||||
engine = create_engine(admin_db_url, pool_pre_ping=True)
|
||||
|
||||
with engine.connect() as conn:
|
||||
# 1. 获取租户 ID
|
||||
result = conn.execute(
|
||||
text("SELECT id FROM tenants WHERE code = :code AND status = 'active'"),
|
||||
{"code": tenant_code}
|
||||
)
|
||||
row = result.fetchone()
|
||||
if not row:
|
||||
logger.debug(f"租户 {tenant_code} 不存在或未激活")
|
||||
engine.dispose()
|
||||
return None
|
||||
|
||||
tenant_id = row[0]
|
||||
|
||||
# 2. 获取 AI 配置
|
||||
result = conn.execute(
|
||||
text("""
|
||||
SELECT config_key, config_value
|
||||
FROM tenant_configs
|
||||
WHERE tenant_id = :tenant_id AND config_group = 'ai'
|
||||
"""),
|
||||
{"tenant_id": tenant_id}
|
||||
)
|
||||
rows = result.fetchall()
|
||||
|
||||
engine.dispose()
|
||||
|
||||
if not rows:
|
||||
logger.debug(f"租户 {tenant_code} 无 AI 配置")
|
||||
return None
|
||||
|
||||
# 转换为字典
|
||||
config_dict = {row[0]: row[1] for row in rows}
|
||||
|
||||
# 检查必要的配置是否存在
|
||||
primary_key = config_dict.get("AI_PRIMARY_API_KEY", "")
|
||||
if not primary_key:
|
||||
logger.warning(f"租户 {tenant_code} 的 AI_PRIMARY_API_KEY 为空")
|
||||
return None
|
||||
|
||||
logger.info(f"✅ 从管理库加载租户 {tenant_code} 的 AI 配置成功")
|
||||
|
||||
return AIConfig(
|
||||
primary_api_key=primary_key,
|
||||
anthropic_api_key=config_dict.get("AI_ANTHROPIC_API_KEY", ""),
|
||||
primary_base_url=config_dict.get("AI_PRIMARY_BASE_URL", "https://4sapi.com/v1"),
|
||||
fallback_api_key=config_dict.get("AI_FALLBACK_API_KEY", ""),
|
||||
fallback_base_url=config_dict.get("AI_FALLBACK_BASE_URL", "https://openrouter.ai/api/v1"),
|
||||
default_model=config_dict.get("AI_DEFAULT_MODEL", "claude-opus-4-5-20251101-thinking"),
|
||||
timeout=float(config_dict.get("AI_TIMEOUT", "120")),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"从管理库读取 AI 配置异常: {e}")
|
||||
return None
|
||||
|
||||
def _load_config_from_env(self) -> AIConfig:
|
||||
"""
|
||||
从环境变量加载配置
|
||||
|
||||
⚠️ 强制要求(遵循瑞小美 AI 接入规范):
|
||||
- 禁止在代码中硬编码 API Key
|
||||
- 必须通过环境变量配置 Key
|
||||
|
||||
必须配置的环境变量:
|
||||
- AI_PRIMARY_API_KEY: 通用 Key(用于 Gemini/DeepSeek 等)
|
||||
- AI_ANTHROPIC_API_KEY: Claude 专属 Key
|
||||
"""
|
||||
import os
|
||||
|
||||
primary_api_key = os.getenv("AI_PRIMARY_API_KEY", "")
|
||||
anthropic_api_key = os.getenv("AI_ANTHROPIC_API_KEY", "")
|
||||
|
||||
# 检查必要的 Key 是否已配置
|
||||
if not primary_api_key:
|
||||
logger.warning("⚠️ AI_PRIMARY_API_KEY 未配置,AI 服务可能无法正常工作")
|
||||
if not anthropic_api_key:
|
||||
logger.warning("⚠️ AI_ANTHROPIC_API_KEY 未配置,Claude 模型调用将失败")
|
||||
|
||||
return AIConfig(
|
||||
# 通用 Key(Gemini/DeepSeek 等非 Anthropic 模型)
|
||||
primary_api_key=primary_api_key,
|
||||
# Claude 专属 Key
|
||||
anthropic_api_key=anthropic_api_key,
|
||||
primary_base_url=os.getenv("AI_PRIMARY_BASE_URL", "https://4sapi.com/v1"),
|
||||
fallback_api_key=os.getenv("AI_FALLBACK_API_KEY", ""),
|
||||
fallback_base_url=os.getenv("AI_FALLBACK_BASE_URL", "https://openrouter.ai/api/v1"),
|
||||
# 默认模型:遵循"优先最强"原则,使用 Claude Opus 4.5
|
||||
default_model=os.getenv("AI_DEFAULT_MODEL", "claude-opus-4-5-20251101-thinking"),
|
||||
timeout=float(os.getenv("AI_TIMEOUT", "120")),
|
||||
)
|
||||
|
||||
def _convert_model_name(self, model: str, provider: AIProvider) -> str:
|
||||
"""
|
||||
转换模型名称以匹配服务商格式
|
||||
|
||||
Args:
|
||||
model: 原始模型名称
|
||||
provider: 目标服务商
|
||||
|
||||
Returns:
|
||||
转换后的模型名称
|
||||
"""
|
||||
if provider == AIProvider.FALLBACK:
|
||||
# 4sapi -> OpenRouter
|
||||
return MODEL_MAPPING.get(model, f"google/{model}" if "/" not in model else model)
|
||||
else:
|
||||
# OpenRouter -> 4sapi
|
||||
return MODEL_MAPPING_REVERSE.get(model, model.split("/")[-1] if "/" in model else model)
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
messages: List[Dict[str, str]],
|
||||
model: Optional[str] = None,
|
||||
temperature: float = 0.7,
|
||||
max_tokens: Optional[int] = None,
|
||||
prompt_name: str = "default",
|
||||
**kwargs
|
||||
) -> AIResponse:
|
||||
"""
|
||||
文本聊天
|
||||
|
||||
Args:
|
||||
messages: 消息列表 [{"role": "system/user/assistant", "content": "..."}]
|
||||
model: 模型名称,None 使用默认模型
|
||||
temperature: 温度参数
|
||||
max_tokens: 最大输出 token 数
|
||||
prompt_name: 提示词名称,用于统计
|
||||
**kwargs: 其他参数
|
||||
|
||||
Returns:
|
||||
AIResponse 响应对象
|
||||
"""
|
||||
model = model or self.config.default_model
|
||||
|
||||
# 构建请求体
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
}
|
||||
if max_tokens:
|
||||
payload["max_tokens"] = max_tokens
|
||||
|
||||
# 首选服务商
|
||||
try:
|
||||
return await self._call_provider(
|
||||
provider=AIProvider.PRIMARY,
|
||||
endpoint="/chat/completions",
|
||||
payload=payload,
|
||||
prompt_name=prompt_name
|
||||
)
|
||||
except AIServiceError as e:
|
||||
logger.warning(f"首选服务商调用失败: {e}, 尝试降级到备选服务商")
|
||||
|
||||
# 如果没有备选 API Key,直接抛出异常
|
||||
if not self.config.fallback_api_key:
|
||||
raise
|
||||
|
||||
# 降级到备选服务商
|
||||
# 转换模型名称
|
||||
fallback_model = self._convert_model_name(model, AIProvider.FALLBACK)
|
||||
payload["model"] = fallback_model
|
||||
|
||||
return await self._call_provider(
|
||||
provider=AIProvider.FALLBACK,
|
||||
endpoint="/chat/completions",
|
||||
payload=payload,
|
||||
prompt_name=prompt_name
|
||||
)
|
||||
|
||||
async def chat_stream(
|
||||
self,
|
||||
messages: List[Dict[str, str]],
|
||||
model: Optional[str] = None,
|
||||
temperature: float = 0.7,
|
||||
max_tokens: Optional[int] = None,
|
||||
prompt_name: str = "default",
|
||||
**kwargs
|
||||
) -> AsyncGenerator[str, None]:
|
||||
"""
|
||||
流式文本聊天
|
||||
|
||||
Args:
|
||||
messages: 消息列表 [{"role": "system/user/assistant", "content": "..."}]
|
||||
model: 模型名称,None 使用默认模型
|
||||
temperature: 温度参数
|
||||
max_tokens: 最大输出 token 数
|
||||
prompt_name: 提示词名称,用于统计
|
||||
**kwargs: 其他参数
|
||||
|
||||
Yields:
|
||||
str: 文本块(逐字返回)
|
||||
"""
|
||||
model = model or self.config.default_model
|
||||
|
||||
# 构建请求体
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
"stream": True,
|
||||
}
|
||||
if max_tokens:
|
||||
payload["max_tokens"] = max_tokens
|
||||
|
||||
# 首选服务商
|
||||
try:
|
||||
async for chunk in self._call_provider_stream(
|
||||
provider=AIProvider.PRIMARY,
|
||||
endpoint="/chat/completions",
|
||||
payload=payload,
|
||||
prompt_name=prompt_name
|
||||
):
|
||||
yield chunk
|
||||
return
|
||||
except AIServiceError as e:
|
||||
logger.warning(f"首选服务商流式调用失败: {e}, 尝试降级到备选服务商")
|
||||
|
||||
# 如果没有备选 API Key,直接抛出异常
|
||||
if not self.config.fallback_api_key:
|
||||
raise
|
||||
|
||||
# 降级到备选服务商
|
||||
# 转换模型名称
|
||||
fallback_model = self._convert_model_name(model, AIProvider.FALLBACK)
|
||||
payload["model"] = fallback_model
|
||||
|
||||
async for chunk in self._call_provider_stream(
|
||||
provider=AIProvider.FALLBACK,
|
||||
endpoint="/chat/completions",
|
||||
payload=payload,
|
||||
prompt_name=prompt_name
|
||||
):
|
||||
yield chunk
|
||||
|
||||
async def _call_provider_stream(
|
||||
self,
|
||||
provider: AIProvider,
|
||||
endpoint: str,
|
||||
payload: Dict[str, Any],
|
||||
prompt_name: str
|
||||
) -> AsyncGenerator[str, None]:
|
||||
"""
|
||||
流式调用指定服务商
|
||||
|
||||
Args:
|
||||
provider: 服务商
|
||||
endpoint: API 端点
|
||||
payload: 请求体
|
||||
prompt_name: 提示词名称
|
||||
|
||||
Yields:
|
||||
str: 文本块
|
||||
"""
|
||||
# 获取配置
|
||||
if provider == AIProvider.PRIMARY:
|
||||
base_url = self.config.primary_base_url
|
||||
# 根据模型选择 API Key:Claude 用专属 Key,其他用通用 Key
|
||||
model = payload.get("model", "")
|
||||
if is_claude_model(model) and self.config.anthropic_api_key:
|
||||
api_key = self.config.anthropic_api_key
|
||||
logger.debug(f"[Stream] 使用 Claude 专属 Key 调用模型: {model}")
|
||||
else:
|
||||
api_key = self.config.primary_api_key
|
||||
else:
|
||||
api_key = self.config.fallback_api_key
|
||||
base_url = self.config.fallback_base_url
|
||||
|
||||
url = f"{base_url.rstrip('/')}{endpoint}"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
# OpenRouter 需要额外的 header
|
||||
if provider == AIProvider.FALLBACK:
|
||||
headers["HTTP-Referer"] = "https://kaopeilian.ireborn.com.cn"
|
||||
headers["X-Title"] = "KaoPeiLian"
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
timeout = httpx.Timeout(self.config.timeout, connect=10.0)
|
||||
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
logger.info(f"流式调用 AI 服务: provider={provider.value}, model={payload.get('model')}")
|
||||
|
||||
async with client.stream("POST", url, json=payload, headers=headers) as response:
|
||||
# 检查响应状态
|
||||
if response.status_code != 200:
|
||||
error_text = await response.aread()
|
||||
logger.error(f"AI 服务流式返回错误: status={response.status_code}, body={error_text[:500]}")
|
||||
raise AIServiceError(
|
||||
f"API 流式请求失败: HTTP {response.status_code}",
|
||||
provider=provider.value,
|
||||
status_code=response.status_code
|
||||
)
|
||||
|
||||
# 处理 SSE 流
|
||||
async for line in response.aiter_lines():
|
||||
if not line or not line.strip():
|
||||
continue
|
||||
|
||||
# 解析 SSE 数据行
|
||||
if line.startswith("data: "):
|
||||
data_str = line[6:] # 移除 "data: " 前缀
|
||||
|
||||
# 检查是否是结束标记
|
||||
if data_str.strip() == "[DONE]":
|
||||
logger.info(f"流式响应完成: provider={provider.value}")
|
||||
return
|
||||
|
||||
try:
|
||||
event_data = json.loads(data_str)
|
||||
|
||||
# 提取 delta 内容
|
||||
choices = event_data.get("choices", [])
|
||||
if choices:
|
||||
delta = choices[0].get("delta", {})
|
||||
content = delta.get("content", "")
|
||||
if content:
|
||||
yield content
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.debug(f"解析流式数据失败: {e} - 数据: {data_str[:100]}")
|
||||
continue
|
||||
|
||||
latency_ms = int((time.time() - start_time) * 1000)
|
||||
logger.info(f"流式调用完成: provider={provider.value}, latency={latency_ms}ms")
|
||||
|
||||
except httpx.TimeoutException:
|
||||
latency_ms = int((time.time() - start_time) * 1000)
|
||||
logger.error(f"AI 服务流式超时: provider={provider.value}, latency={latency_ms}ms")
|
||||
raise AIServiceError(f"流式请求超时({self.config.timeout}秒)", provider=provider.value)
|
||||
|
||||
except httpx.RequestError as e:
|
||||
logger.error(f"AI 服务流式网络错误: provider={provider.value}, error={e}")
|
||||
raise AIServiceError(f"流式网络错误: {e}", provider=provider.value)
|
||||
|
||||
async def _call_provider(
|
||||
self,
|
||||
provider: AIProvider,
|
||||
endpoint: str,
|
||||
payload: Dict[str, Any],
|
||||
prompt_name: str
|
||||
) -> AIResponse:
|
||||
"""
|
||||
调用指定服务商
|
||||
|
||||
Args:
|
||||
provider: 服务商
|
||||
endpoint: API 端点
|
||||
payload: 请求体
|
||||
prompt_name: 提示词名称
|
||||
|
||||
Returns:
|
||||
AIResponse 响应对象
|
||||
"""
|
||||
# 获取配置
|
||||
if provider == AIProvider.PRIMARY:
|
||||
base_url = self.config.primary_base_url
|
||||
# 根据模型选择 API Key:Claude 用专属 Key,其他用通用 Key
|
||||
model = payload.get("model", "")
|
||||
if is_claude_model(model) and self.config.anthropic_api_key:
|
||||
api_key = self.config.anthropic_api_key
|
||||
logger.debug(f"使用 Claude 专属 Key 调用模型: {model}")
|
||||
else:
|
||||
api_key = self.config.primary_api_key
|
||||
else:
|
||||
api_key = self.config.fallback_api_key
|
||||
base_url = self.config.fallback_base_url
|
||||
|
||||
url = f"{base_url.rstrip('/')}{endpoint}"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
# OpenRouter 需要额外的 header
|
||||
if provider == AIProvider.FALLBACK:
|
||||
headers["HTTP-Referer"] = "https://kaopeilian.ireborn.com.cn"
|
||||
headers["X-Title"] = "KaoPeiLian"
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=self.config.timeout) as client:
|
||||
logger.info(f"调用 AI 服务: provider={provider.value}, model={payload.get('model')}")
|
||||
|
||||
response = await client.post(url, json=payload, headers=headers)
|
||||
|
||||
latency_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# 检查响应状态
|
||||
if response.status_code != 200:
|
||||
error_text = response.text
|
||||
logger.error(f"AI 服务返回错误: status={response.status_code}, body={error_text[:500]}")
|
||||
raise AIServiceError(
|
||||
f"API 请求失败: HTTP {response.status_code}",
|
||||
provider=provider.value,
|
||||
status_code=response.status_code
|
||||
)
|
||||
|
||||
data = response.json()
|
||||
|
||||
# 解析响应
|
||||
ai_response = self._parse_response(data, provider, latency_ms)
|
||||
|
||||
# 记录日志
|
||||
logger.info(
|
||||
f"AI 调用成功: provider={provider.value}, model={ai_response.model}, "
|
||||
f"tokens={ai_response.total_tokens}, latency={latency_ms}ms"
|
||||
)
|
||||
|
||||
# 保存到数据库(如果有 session)
|
||||
await self._log_call(prompt_name, ai_response)
|
||||
|
||||
return ai_response
|
||||
|
||||
except httpx.TimeoutException:
|
||||
latency_ms = int((time.time() - start_time) * 1000)
|
||||
logger.error(f"AI 服务超时: provider={provider.value}, latency={latency_ms}ms")
|
||||
raise AIServiceError(f"请求超时({self.config.timeout}秒)", provider=provider.value)
|
||||
|
||||
except httpx.RequestError as e:
|
||||
logger.error(f"AI 服务网络错误: provider={provider.value}, error={e}")
|
||||
raise AIServiceError(f"网络错误: {e}", provider=provider.value)
|
||||
|
||||
def _parse_response(
|
||||
self,
|
||||
data: Dict[str, Any],
|
||||
provider: AIProvider,
|
||||
latency_ms: int
|
||||
) -> AIResponse:
|
||||
"""解析 API 响应"""
|
||||
# 提取内容
|
||||
choices = data.get("choices", [])
|
||||
if not choices:
|
||||
raise AIServiceError("响应中没有 choices")
|
||||
|
||||
message = choices[0].get("message", {})
|
||||
content = message.get("content", "")
|
||||
|
||||
# 提取 usage
|
||||
usage = data.get("usage", {})
|
||||
input_tokens = usage.get("prompt_tokens", 0)
|
||||
output_tokens = usage.get("completion_tokens", 0)
|
||||
total_tokens = usage.get("total_tokens", input_tokens + output_tokens)
|
||||
|
||||
# 提取费用(如果有)
|
||||
cost = usage.get("total_cost", 0.0)
|
||||
|
||||
return AIResponse(
|
||||
content=content,
|
||||
model=data.get("model", ""),
|
||||
provider=provider.value,
|
||||
input_tokens=input_tokens,
|
||||
output_tokens=output_tokens,
|
||||
total_tokens=total_tokens,
|
||||
cost=cost,
|
||||
latency_ms=latency_ms,
|
||||
raw_response=data
|
||||
)
|
||||
|
||||
async def _log_call(self, prompt_name: str, response: AIResponse) -> None:
|
||||
"""记录调用日志到数据库"""
|
||||
if not self.db_session:
|
||||
return
|
||||
|
||||
try:
|
||||
# TODO: 实现调用日志记录
|
||||
# 可以参考 ai_call_logs 表结构
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning(f"记录 AI 调用日志失败: {e}")
|
||||
|
||||
async def analyze_document(
|
||||
self,
|
||||
content: str,
|
||||
prompt: str,
|
||||
model: Optional[str] = None,
|
||||
prompt_name: str = "document_analysis"
|
||||
) -> AIResponse:
|
||||
"""
|
||||
分析文档内容
|
||||
|
||||
Args:
|
||||
content: 文档内容
|
||||
prompt: 分析提示词
|
||||
model: 模型名称
|
||||
prompt_name: 提示词名称
|
||||
|
||||
Returns:
|
||||
AIResponse 响应对象
|
||||
"""
|
||||
messages = [
|
||||
{"role": "user", "content": f"{prompt}\n\n文档内容:\n{content}"}
|
||||
]
|
||||
|
||||
return await self.chat(
|
||||
messages=messages,
|
||||
model=model,
|
||||
temperature=0.1, # 文档分析使用低温度
|
||||
prompt_name=prompt_name
|
||||
)
|
||||
|
||||
|
||||
# 便捷函数
|
||||
async def quick_chat(
|
||||
messages: List[Dict[str, str]],
|
||||
model: Optional[str] = None,
|
||||
module_code: str = "quick"
|
||||
) -> str:
|
||||
"""
|
||||
快速聊天,返回纯文本
|
||||
|
||||
Args:
|
||||
messages: 消息列表
|
||||
model: 模型名称
|
||||
module_code: 模块标识
|
||||
|
||||
Returns:
|
||||
AI 回复的文本内容
|
||||
"""
|
||||
ai = AIService(module_code=module_code)
|
||||
response = await ai.chat(messages, model=model)
|
||||
return response.content
|
||||
|
||||
|
||||
# 模型常量(遵循瑞小美 AI 接入规范)
|
||||
# 按优先级排序:首选 > 标准 > 快速
|
||||
MODEL_PRIMARY = "claude-opus-4-5-20251101-thinking" # 🥇 首选:所有任务首先尝试
|
||||
MODEL_STANDARD = "gemini-3-pro-preview" # 🥈 标准:Claude 失败后降级
|
||||
MODEL_FAST = "gemini-3-flash-preview" # 🥉 快速:最终保底
|
||||
MODEL_IMAGE = "gemini-2.5-flash-image-preview" # 🖼️ 图像生成专用
|
||||
MODEL_VIDEO = "veo3.1-pro" # 🎬 视频生成专用
|
||||
|
||||
# 兼容旧代码的别名
|
||||
DEFAULT_MODEL = MODEL_PRIMARY # 默认使用最强模型
|
||||
MODEL_ANALYSIS = MODEL_PRIMARY
|
||||
MODEL_CREATIVE = MODEL_STANDARD
|
||||
MODEL_IMAGE_GEN = MODEL_IMAGE
|
||||
|
||||
197
backend/app/services/ai/answer_judge_service.py
Normal file
197
backend/app/services/ai/answer_judge_service.py
Normal file
@@ -0,0 +1,197 @@
|
||||
"""
|
||||
答案判断服务 - Python 原生实现
|
||||
|
||||
功能:
|
||||
- 判断填空题与问答题的答案是否正确
|
||||
- 通过 AI 语义理解比对用户答案与标准答案
|
||||
|
||||
提供稳定可靠的答案判断能力。
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Optional
|
||||
|
||||
from .ai_service import AIService, AIResponse
|
||||
from .prompts.answer_judge_prompts import (
|
||||
SYSTEM_PROMPT,
|
||||
USER_PROMPT,
|
||||
CORRECT_KEYWORDS,
|
||||
INCORRECT_KEYWORDS,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class JudgeResult:
|
||||
"""判断结果"""
|
||||
is_correct: bool
|
||||
raw_response: str
|
||||
ai_provider: str = ""
|
||||
ai_model: str = ""
|
||||
ai_tokens: int = 0
|
||||
ai_latency_ms: int = 0
|
||||
|
||||
|
||||
class AnswerJudgeService:
|
||||
"""
|
||||
答案判断服务
|
||||
|
||||
使用 Python 原生实现。
|
||||
|
||||
使用示例:
|
||||
```python
|
||||
service = AnswerJudgeService()
|
||||
result = await service.judge(
|
||||
db=db_session, # 传入 db_session 用于记录调用日志
|
||||
question="玻尿酸的主要作用是什么?",
|
||||
correct_answer="补水保湿、填充塑形",
|
||||
user_answer="保湿和塑形",
|
||||
analysis="玻尿酸具有补水保湿和填充塑形两大功能"
|
||||
)
|
||||
print(result.is_correct) # True
|
||||
```
|
||||
"""
|
||||
|
||||
MODULE_CODE = "answer_judge"
|
||||
|
||||
async def judge(
|
||||
self,
|
||||
question: str,
|
||||
correct_answer: str,
|
||||
user_answer: str,
|
||||
analysis: str = "",
|
||||
db: Any = None # 数据库会话,用于记录 AI 调用日志
|
||||
) -> JudgeResult:
|
||||
"""
|
||||
判断答案是否正确
|
||||
|
||||
Args:
|
||||
question: 题目内容
|
||||
correct_answer: 标准答案
|
||||
user_answer: 用户答案
|
||||
analysis: 答案解析(可选)
|
||||
db: 数据库会话,用于记录调用日志(符合 AI 接入规范)
|
||||
|
||||
Returns:
|
||||
JudgeResult 判断结果
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
f"开始判断答案 - question: {question[:50]}..., "
|
||||
f"user_answer: {user_answer[:50]}..."
|
||||
)
|
||||
|
||||
# 创建 AIService 实例(传入 db_session 用于记录调用日志)
|
||||
ai_service = AIService(module_code=self.MODULE_CODE, db_session=db)
|
||||
|
||||
# 构建提示词
|
||||
user_prompt = USER_PROMPT.format(
|
||||
question=question,
|
||||
correct_answer=correct_answer,
|
||||
user_answer=user_answer,
|
||||
analysis=analysis or "无"
|
||||
)
|
||||
|
||||
# 调用 AI
|
||||
messages = [
|
||||
{"role": "system", "content": SYSTEM_PROMPT},
|
||||
{"role": "user", "content": user_prompt}
|
||||
]
|
||||
|
||||
ai_response = await ai_service.chat(
|
||||
messages=messages,
|
||||
temperature=0.1, # 低温度,确保输出稳定
|
||||
prompt_name="answer_judge"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"AI 判断完成 - provider: {ai_response.provider}, "
|
||||
f"response: {ai_response.content}, "
|
||||
f"latency: {ai_response.latency_ms}ms"
|
||||
)
|
||||
|
||||
# 解析 AI 输出
|
||||
is_correct = self._parse_judge_result(ai_response.content)
|
||||
|
||||
logger.info(f"答案判断结果: {is_correct}")
|
||||
|
||||
return JudgeResult(
|
||||
is_correct=is_correct,
|
||||
raw_response=ai_response.content,
|
||||
ai_provider=ai_response.provider,
|
||||
ai_model=ai_response.model,
|
||||
ai_tokens=ai_response.total_tokens,
|
||||
ai_latency_ms=ai_response.latency_ms,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"答案判断失败: {e}", exc_info=True)
|
||||
# 出错时默认返回错误,保守处理
|
||||
return JudgeResult(
|
||||
is_correct=False,
|
||||
raw_response=f"判断失败: {e}",
|
||||
)
|
||||
|
||||
def _parse_judge_result(self, ai_output: str) -> bool:
|
||||
"""
|
||||
解析 AI 输出的判断结果
|
||||
|
||||
Args:
|
||||
ai_output: AI 返回的文本
|
||||
|
||||
Returns:
|
||||
bool: True 表示正确,False 表示错误
|
||||
"""
|
||||
# 清洗输出
|
||||
output = ai_output.strip().lower()
|
||||
|
||||
# 检查是否包含正确关键词
|
||||
for keyword in CORRECT_KEYWORDS:
|
||||
if keyword.lower() in output:
|
||||
return True
|
||||
|
||||
# 检查是否包含错误关键词
|
||||
for keyword in INCORRECT_KEYWORDS:
|
||||
if keyword.lower() in output:
|
||||
return False
|
||||
|
||||
# 无法识别时,默认返回错误(保守处理)
|
||||
logger.warning(f"无法解析判断结果,默认返回错误: {ai_output}")
|
||||
return False
|
||||
|
||||
|
||||
# ==================== 全局实例 ====================
|
||||
|
||||
answer_judge_service = AnswerJudgeService()
|
||||
|
||||
|
||||
# ==================== 便捷函数 ====================
|
||||
|
||||
async def judge_answer(
|
||||
question: str,
|
||||
correct_answer: str,
|
||||
user_answer: str,
|
||||
analysis: str = ""
|
||||
) -> bool:
|
||||
"""
|
||||
便捷函数:判断答案是否正确
|
||||
|
||||
Args:
|
||||
question: 题目内容
|
||||
correct_answer: 标准答案
|
||||
user_answer: 用户答案
|
||||
analysis: 答案解析
|
||||
|
||||
Returns:
|
||||
bool: True 表示正确,False 表示错误
|
||||
"""
|
||||
result = await answer_judge_service.judge(
|
||||
question=question,
|
||||
correct_answer=correct_answer,
|
||||
user_answer=user_answer,
|
||||
analysis=analysis
|
||||
)
|
||||
return result.is_correct
|
||||
|
||||
757
backend/app/services/ai/course_chat_service.py
Normal file
757
backend/app/services/ai/course_chat_service.py
Normal file
@@ -0,0 +1,757 @@
|
||||
"""
|
||||
课程对话服务 V2 - Python 原生实现
|
||||
|
||||
功能:
|
||||
- 查询课程知识点作为知识库
|
||||
- 调用 AI 进行对话
|
||||
- 支持流式输出
|
||||
- 多轮对话历史管理(Redis 缓存)
|
||||
|
||||
提供稳定可靠的课程对话能力。
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.exceptions import ExternalServiceError
|
||||
|
||||
from .ai_service import AIService
|
||||
from .prompts.course_chat_prompts import (
|
||||
SYSTEM_PROMPT,
|
||||
USER_PROMPT,
|
||||
KNOWLEDGE_ITEM_TEMPLATE,
|
||||
CONVERSATION_WINDOW_SIZE,
|
||||
CONVERSATION_TTL,
|
||||
MAX_KNOWLEDGE_POINTS,
|
||||
MAX_KNOWLEDGE_BASE_LENGTH,
|
||||
DEFAULT_CHAT_MODEL,
|
||||
DEFAULT_TEMPERATURE,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 会话索引 Redis key 前缀/后缀
|
||||
CONVERSATION_INDEX_PREFIX = "course_chat:user:"
|
||||
CONVERSATION_INDEX_SUFFIX = ":conversations"
|
||||
# 会话元数据 key 前缀
|
||||
CONVERSATION_META_PREFIX = "course_chat:meta:"
|
||||
# 会话索引过期时间(与会话数据一致)
|
||||
CONVERSATION_INDEX_TTL = CONVERSATION_TTL
|
||||
|
||||
|
||||
class CourseChatServiceV2:
|
||||
"""
|
||||
课程对话服务 V2
|
||||
|
||||
使用 Python 原生实现。
|
||||
|
||||
使用示例:
|
||||
```python
|
||||
service = CourseChatServiceV2()
|
||||
|
||||
# 非流式对话
|
||||
response = await service.chat(
|
||||
db=db_session,
|
||||
course_id=1,
|
||||
query="什么是玻尿酸?",
|
||||
user_id=1,
|
||||
conversation_id=None
|
||||
)
|
||||
|
||||
# 流式对话
|
||||
async for chunk in service.chat_stream(
|
||||
db=db_session,
|
||||
course_id=1,
|
||||
query="什么是玻尿酸?",
|
||||
user_id=1,
|
||||
conversation_id=None
|
||||
):
|
||||
print(chunk, end="", flush=True)
|
||||
```
|
||||
"""
|
||||
|
||||
# Redis key 前缀
|
||||
CONVERSATION_KEY_PREFIX = "course_chat:conversation:"
|
||||
# 模块标识
|
||||
MODULE_CODE = "course_chat"
|
||||
|
||||
def __init__(self):
|
||||
"""初始化服务(AIService 在方法中动态创建,以传入 db_session)"""
|
||||
pass
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
db: AsyncSession,
|
||||
course_id: int,
|
||||
query: str,
|
||||
user_id: int,
|
||||
conversation_id: Optional[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
与课程对话(非流式)
|
||||
|
||||
Args:
|
||||
db: 数据库会话
|
||||
course_id: 课程ID
|
||||
query: 用户问题
|
||||
user_id: 用户ID
|
||||
conversation_id: 会话ID(续接对话时传入)
|
||||
|
||||
Returns:
|
||||
包含 answer、conversation_id 等字段的字典
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
f"开始课程对话 V2 - course_id: {course_id}, user_id: {user_id}, "
|
||||
f"conversation_id: {conversation_id}"
|
||||
)
|
||||
|
||||
# 1. 获取课程知识点
|
||||
knowledge_base = await self._get_course_knowledge(db, course_id)
|
||||
|
||||
if not knowledge_base:
|
||||
logger.warning(f"课程 {course_id} 没有知识点,使用空知识库")
|
||||
knowledge_base = "(该课程暂无知识点内容)"
|
||||
|
||||
# 2. 获取或创建会话ID
|
||||
is_new_conversation = False
|
||||
if not conversation_id:
|
||||
conversation_id = self._generate_conversation_id(user_id, course_id)
|
||||
is_new_conversation = True
|
||||
logger.info(f"创建新会话: {conversation_id}")
|
||||
|
||||
# 3. 构建消息列表
|
||||
messages = await self._build_messages(
|
||||
knowledge_base=knowledge_base,
|
||||
query=query,
|
||||
user_id=user_id,
|
||||
conversation_id=conversation_id
|
||||
)
|
||||
|
||||
# 4. 创建 AIService 并调用(传入 db_session 以记录调用日志)
|
||||
ai_service = AIService(module_code=self.MODULE_CODE, db_session=db)
|
||||
response = await ai_service.chat(
|
||||
messages=messages,
|
||||
model=DEFAULT_CHAT_MODEL,
|
||||
temperature=DEFAULT_TEMPERATURE,
|
||||
prompt_name="course_chat"
|
||||
)
|
||||
|
||||
answer = response.content
|
||||
|
||||
# 5. 保存对话历史
|
||||
await self._save_conversation_history(
|
||||
conversation_id=conversation_id,
|
||||
user_message=query,
|
||||
assistant_message=answer
|
||||
)
|
||||
|
||||
# 6. 更新会话索引
|
||||
if is_new_conversation:
|
||||
await self._add_to_conversation_index(user_id, conversation_id, course_id)
|
||||
else:
|
||||
await self._update_conversation_index(user_id, conversation_id)
|
||||
|
||||
logger.info(
|
||||
f"课程对话完成 - course_id: {course_id}, conversation_id: {conversation_id}, "
|
||||
f"provider: {response.provider}, tokens: {response.total_tokens}"
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"answer": answer,
|
||||
"conversation_id": conversation_id,
|
||||
"ai_provider": response.provider,
|
||||
"ai_model": response.model,
|
||||
"ai_tokens": response.total_tokens,
|
||||
"ai_latency_ms": response.latency_ms,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"课程对话失败 - course_id: {course_id}, user_id: {user_id}, error: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
raise ExternalServiceError(f"课程对话失败: {e}")
|
||||
|
||||
async def chat_stream(
|
||||
self,
|
||||
db: AsyncSession,
|
||||
course_id: int,
|
||||
query: str,
|
||||
user_id: int,
|
||||
conversation_id: Optional[str] = None
|
||||
) -> AsyncGenerator[Tuple[str, Optional[str]], None]:
|
||||
"""
|
||||
与课程对话(流式输出)
|
||||
|
||||
Args:
|
||||
db: 数据库会话
|
||||
course_id: 课程ID
|
||||
query: 用户问题
|
||||
user_id: 用户ID
|
||||
conversation_id: 会话ID(续接对话时传入)
|
||||
|
||||
Yields:
|
||||
Tuple[str, Optional[str]]: (事件类型, 数据)
|
||||
- ("conversation_started", conversation_id): 会话开始
|
||||
- ("chunk", text): 文本块
|
||||
- ("end", None): 结束
|
||||
- ("error", message): 错误
|
||||
"""
|
||||
full_answer = ""
|
||||
|
||||
try:
|
||||
logger.info(
|
||||
f"开始流式课程对话 V2 - course_id: {course_id}, user_id: {user_id}, "
|
||||
f"conversation_id: {conversation_id}"
|
||||
)
|
||||
|
||||
# 1. 获取课程知识点
|
||||
knowledge_base = await self._get_course_knowledge(db, course_id)
|
||||
|
||||
if not knowledge_base:
|
||||
logger.warning(f"课程 {course_id} 没有知识点,使用空知识库")
|
||||
knowledge_base = "(该课程暂无知识点内容)"
|
||||
|
||||
# 2. 获取或创建会话ID
|
||||
is_new_conversation = False
|
||||
if not conversation_id:
|
||||
conversation_id = self._generate_conversation_id(user_id, course_id)
|
||||
is_new_conversation = True
|
||||
logger.info(f"创建新会话: {conversation_id}")
|
||||
|
||||
# 3. 发送会话开始事件(如果是新会话)
|
||||
if is_new_conversation:
|
||||
yield ("conversation_started", conversation_id)
|
||||
|
||||
# 4. 构建消息列表
|
||||
messages = await self._build_messages(
|
||||
knowledge_base=knowledge_base,
|
||||
query=query,
|
||||
user_id=user_id,
|
||||
conversation_id=conversation_id
|
||||
)
|
||||
|
||||
# 5. 创建 AIService 并流式调用(传入 db_session 以记录调用日志)
|
||||
ai_service = AIService(module_code=self.MODULE_CODE, db_session=db)
|
||||
async for chunk in ai_service.chat_stream(
|
||||
messages=messages,
|
||||
model=DEFAULT_CHAT_MODEL,
|
||||
temperature=DEFAULT_TEMPERATURE,
|
||||
prompt_name="course_chat"
|
||||
):
|
||||
full_answer += chunk
|
||||
yield ("chunk", chunk)
|
||||
|
||||
# 6. 发送结束事件
|
||||
yield ("end", None)
|
||||
|
||||
# 7. 保存对话历史
|
||||
await self._save_conversation_history(
|
||||
conversation_id=conversation_id,
|
||||
user_message=query,
|
||||
assistant_message=full_answer
|
||||
)
|
||||
|
||||
# 8. 更新会话索引
|
||||
if is_new_conversation:
|
||||
await self._add_to_conversation_index(user_id, conversation_id, course_id)
|
||||
else:
|
||||
await self._update_conversation_index(user_id, conversation_id)
|
||||
|
||||
logger.info(
|
||||
f"流式课程对话完成 - course_id: {course_id}, conversation_id: {conversation_id}, "
|
||||
f"answer_length: {len(full_answer)}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"流式课程对话失败 - course_id: {course_id}, user_id: {user_id}, error: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
yield ("error", str(e))
|
||||
|
||||
async def _get_course_knowledge(
|
||||
self,
|
||||
db: AsyncSession,
|
||||
course_id: int
|
||||
) -> str:
|
||||
"""
|
||||
获取课程知识点,构建知识库文本
|
||||
|
||||
Args:
|
||||
db: 数据库会话
|
||||
course_id: 课程ID
|
||||
|
||||
Returns:
|
||||
知识库文本
|
||||
"""
|
||||
try:
|
||||
# 查询知识点(课程知识点查询)
|
||||
query = text("""
|
||||
SELECT kp.name, kp.description
|
||||
FROM knowledge_points kp
|
||||
INNER JOIN course_materials cm ON kp.material_id = cm.id
|
||||
WHERE kp.course_id = :course_id
|
||||
AND kp.is_deleted = 0
|
||||
AND cm.is_deleted = 0
|
||||
ORDER BY kp.id
|
||||
LIMIT :limit
|
||||
""")
|
||||
|
||||
result = await db.execute(
|
||||
query,
|
||||
{"course_id": course_id, "limit": MAX_KNOWLEDGE_POINTS}
|
||||
)
|
||||
rows = result.fetchall()
|
||||
|
||||
if not rows:
|
||||
logger.warning(f"课程 {course_id} 没有关联的知识点")
|
||||
return ""
|
||||
|
||||
# 构建知识库文本
|
||||
knowledge_items = []
|
||||
total_length = 0
|
||||
|
||||
for row in rows:
|
||||
name = row[0] or ""
|
||||
description = row[1] or ""
|
||||
|
||||
item = KNOWLEDGE_ITEM_TEMPLATE.format(
|
||||
name=name,
|
||||
description=description
|
||||
)
|
||||
|
||||
# 检查是否超过长度限制
|
||||
if total_length + len(item) > MAX_KNOWLEDGE_BASE_LENGTH:
|
||||
logger.warning(
|
||||
f"知识库文本已达到最大长度限制 {MAX_KNOWLEDGE_BASE_LENGTH},"
|
||||
f"停止添加更多知识点"
|
||||
)
|
||||
break
|
||||
|
||||
knowledge_items.append(item)
|
||||
total_length += len(item)
|
||||
|
||||
knowledge_base = "\n".join(knowledge_items)
|
||||
|
||||
logger.info(
|
||||
f"获取课程知识点成功 - course_id: {course_id}, "
|
||||
f"count: {len(knowledge_items)}, length: {len(knowledge_base)}"
|
||||
)
|
||||
|
||||
return knowledge_base
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"获取课程知识点失败: {e}")
|
||||
raise
|
||||
|
||||
async def _build_messages(
|
||||
self,
|
||||
knowledge_base: str,
|
||||
query: str,
|
||||
user_id: int,
|
||||
conversation_id: str
|
||||
) -> List[Dict[str, str]]:
|
||||
"""
|
||||
构建消息列表(包含历史对话)
|
||||
|
||||
Args:
|
||||
knowledge_base: 知识库文本
|
||||
query: 当前用户问题
|
||||
user_id: 用户ID
|
||||
conversation_id: 会话ID
|
||||
|
||||
Returns:
|
||||
消息列表
|
||||
"""
|
||||
messages = []
|
||||
|
||||
# 1. 系统提示词
|
||||
system_content = SYSTEM_PROMPT.format(knowledge_base=knowledge_base)
|
||||
messages.append({"role": "system", "content": system_content})
|
||||
|
||||
# 2. 获取历史对话
|
||||
history = await self._get_conversation_history(conversation_id)
|
||||
|
||||
# 限制历史窗口大小
|
||||
if len(history) > CONVERSATION_WINDOW_SIZE * 2:
|
||||
history = history[-(CONVERSATION_WINDOW_SIZE * 2):]
|
||||
|
||||
# 添加历史消息
|
||||
messages.extend(history)
|
||||
|
||||
# 3. 当前用户问题
|
||||
user_content = USER_PROMPT.format(query=query)
|
||||
messages.append({"role": "user", "content": user_content})
|
||||
|
||||
logger.debug(
|
||||
f"构建消息列表 - total: {len(messages)}, history: {len(history)}"
|
||||
)
|
||||
|
||||
return messages
|
||||
|
||||
def _generate_conversation_id(self, user_id: int, course_id: int) -> str:
|
||||
"""生成会话ID"""
|
||||
unique_id = uuid.uuid4().hex[:8]
|
||||
return f"conv_{user_id}_{course_id}_{unique_id}"
|
||||
|
||||
async def _get_conversation_history(
|
||||
self,
|
||||
conversation_id: str
|
||||
) -> List[Dict[str, str]]:
|
||||
"""
|
||||
从 Redis 获取会话历史
|
||||
|
||||
Args:
|
||||
conversation_id: 会话ID
|
||||
|
||||
Returns:
|
||||
消息列表 [{"role": "user/assistant", "content": "..."}]
|
||||
"""
|
||||
try:
|
||||
from app.core.redis import get_redis_client
|
||||
|
||||
redis = get_redis_client()
|
||||
key = f"{self.CONVERSATION_KEY_PREFIX}{conversation_id}"
|
||||
|
||||
data = await redis.get(key)
|
||||
if not data:
|
||||
return []
|
||||
|
||||
history = json.loads(data)
|
||||
return history
|
||||
|
||||
except RuntimeError:
|
||||
# Redis 未初始化,返回空历史
|
||||
logger.warning("Redis 未初始化,无法获取会话历史")
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.warning(f"获取会话历史失败: {e}")
|
||||
return []
|
||||
|
||||
async def _save_conversation_history(
|
||||
self,
|
||||
conversation_id: str,
|
||||
user_message: str,
|
||||
assistant_message: str
|
||||
) -> None:
|
||||
"""
|
||||
保存对话历史到 Redis
|
||||
|
||||
Args:
|
||||
conversation_id: 会话ID
|
||||
user_message: 用户消息
|
||||
assistant_message: AI 回复
|
||||
"""
|
||||
try:
|
||||
from app.core.redis import get_redis_client
|
||||
|
||||
redis = get_redis_client()
|
||||
key = f"{self.CONVERSATION_KEY_PREFIX}{conversation_id}"
|
||||
|
||||
# 获取现有历史
|
||||
history = await self._get_conversation_history(conversation_id)
|
||||
|
||||
# 添加新消息
|
||||
history.append({"role": "user", "content": user_message})
|
||||
history.append({"role": "assistant", "content": assistant_message})
|
||||
|
||||
# 限制历史长度
|
||||
max_messages = CONVERSATION_WINDOW_SIZE * 2
|
||||
if len(history) > max_messages:
|
||||
history = history[-max_messages:]
|
||||
|
||||
# 保存到 Redis
|
||||
await redis.setex(
|
||||
key,
|
||||
CONVERSATION_TTL,
|
||||
json.dumps(history, ensure_ascii=False)
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"保存会话历史成功 - conversation_id: {conversation_id}, "
|
||||
f"messages: {len(history)}"
|
||||
)
|
||||
|
||||
except RuntimeError:
|
||||
# Redis 未初始化,跳过保存
|
||||
logger.warning("Redis 未初始化,无法保存会话历史")
|
||||
except Exception as e:
|
||||
logger.warning(f"保存会话历史失败: {e}")
|
||||
|
||||
async def get_conversation_messages(
|
||||
self,
|
||||
conversation_id: str,
|
||||
user_id: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
获取会话的历史消息
|
||||
|
||||
Args:
|
||||
conversation_id: 会话ID
|
||||
user_id: 用户ID(用于权限验证)
|
||||
|
||||
Returns:
|
||||
消息列表
|
||||
"""
|
||||
# 验证会话ID是否属于该用户
|
||||
if not conversation_id.startswith(f"conv_{user_id}_"):
|
||||
logger.warning(
|
||||
f"用户 {user_id} 尝试访问不属于自己的会话: {conversation_id}"
|
||||
)
|
||||
return []
|
||||
|
||||
history = await self._get_conversation_history(conversation_id)
|
||||
|
||||
# 格式化返回数据
|
||||
messages = []
|
||||
for i, msg in enumerate(history):
|
||||
messages.append({
|
||||
"id": i,
|
||||
"role": msg["role"],
|
||||
"content": msg["content"],
|
||||
})
|
||||
|
||||
return messages
|
||||
|
||||
async def _add_to_conversation_index(
|
||||
self,
|
||||
user_id: int,
|
||||
conversation_id: str,
|
||||
course_id: int
|
||||
) -> None:
|
||||
"""
|
||||
将会话添加到用户索引
|
||||
|
||||
Args:
|
||||
user_id: 用户ID
|
||||
conversation_id: 会话ID
|
||||
course_id: 课程ID
|
||||
"""
|
||||
try:
|
||||
from app.core.redis import get_redis_client
|
||||
|
||||
redis = get_redis_client()
|
||||
|
||||
# 1. 添加到用户的会话索引(Sorted Set,score 为时间戳)
|
||||
index_key = f"{CONVERSATION_INDEX_PREFIX}{user_id}{CONVERSATION_INDEX_SUFFIX}"
|
||||
timestamp = time.time()
|
||||
await redis.zadd(index_key, {conversation_id: timestamp})
|
||||
await redis.expire(index_key, CONVERSATION_INDEX_TTL)
|
||||
|
||||
# 2. 保存会话元数据
|
||||
meta_key = f"{CONVERSATION_META_PREFIX}{conversation_id}"
|
||||
meta_data = {
|
||||
"conversation_id": conversation_id,
|
||||
"user_id": user_id,
|
||||
"course_id": course_id,
|
||||
"created_at": timestamp,
|
||||
"updated_at": timestamp,
|
||||
}
|
||||
await redis.setex(
|
||||
meta_key,
|
||||
CONVERSATION_INDEX_TTL,
|
||||
json.dumps(meta_data, ensure_ascii=False)
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"会话已添加到索引 - user_id: {user_id}, conversation_id: {conversation_id}"
|
||||
)
|
||||
|
||||
except RuntimeError:
|
||||
logger.warning("Redis 未初始化,无法添加会话索引")
|
||||
except Exception as e:
|
||||
logger.warning(f"添加会话索引失败: {e}")
|
||||
|
||||
async def _update_conversation_index(
|
||||
self,
|
||||
user_id: int,
|
||||
conversation_id: str
|
||||
) -> None:
|
||||
"""
|
||||
更新会话的最后活跃时间
|
||||
|
||||
Args:
|
||||
user_id: 用户ID
|
||||
conversation_id: 会话ID
|
||||
"""
|
||||
try:
|
||||
from app.core.redis import get_redis_client
|
||||
|
||||
redis = get_redis_client()
|
||||
|
||||
# 更新索引中的时间戳
|
||||
index_key = f"{CONVERSATION_INDEX_PREFIX}{user_id}{CONVERSATION_INDEX_SUFFIX}"
|
||||
timestamp = time.time()
|
||||
await redis.zadd(index_key, {conversation_id: timestamp})
|
||||
await redis.expire(index_key, CONVERSATION_INDEX_TTL)
|
||||
|
||||
# 更新元数据中的 updated_at
|
||||
meta_key = f"{CONVERSATION_META_PREFIX}{conversation_id}"
|
||||
meta_data = await redis.get(meta_key)
|
||||
if meta_data:
|
||||
meta = json.loads(meta_data)
|
||||
meta["updated_at"] = timestamp
|
||||
await redis.setex(
|
||||
meta_key,
|
||||
CONVERSATION_INDEX_TTL,
|
||||
json.dumps(meta, ensure_ascii=False)
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"会话索引已更新 - user_id: {user_id}, conversation_id: {conversation_id}"
|
||||
)
|
||||
|
||||
except RuntimeError:
|
||||
logger.warning("Redis 未初始化,无法更新会话索引")
|
||||
except Exception as e:
|
||||
logger.warning(f"更新会话索引失败: {e}")
|
||||
|
||||
async def list_user_conversations(
|
||||
self,
|
||||
user_id: int,
|
||||
limit: int = 20
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
获取用户的会话列表
|
||||
|
||||
Args:
|
||||
user_id: 用户ID
|
||||
limit: 返回数量限制
|
||||
|
||||
Returns:
|
||||
会话列表,按更新时间倒序
|
||||
"""
|
||||
try:
|
||||
from app.core.redis import get_redis_client
|
||||
|
||||
redis = get_redis_client()
|
||||
|
||||
# 1. 从索引获取最近的会话ID列表(倒序)
|
||||
index_key = f"{CONVERSATION_INDEX_PREFIX}{user_id}{CONVERSATION_INDEX_SUFFIX}"
|
||||
conversation_ids = await redis.zrevrange(index_key, 0, limit - 1)
|
||||
|
||||
if not conversation_ids:
|
||||
logger.debug(f"用户 {user_id} 没有会话记录")
|
||||
return []
|
||||
|
||||
# 2. 获取每个会话的元数据和最后消息
|
||||
conversations = []
|
||||
for conv_id in conversation_ids:
|
||||
# 确保是字符串
|
||||
if isinstance(conv_id, bytes):
|
||||
conv_id = conv_id.decode('utf-8')
|
||||
|
||||
# 获取元数据
|
||||
meta_key = f"{CONVERSATION_META_PREFIX}{conv_id}"
|
||||
meta_data = await redis.get(meta_key)
|
||||
|
||||
if meta_data:
|
||||
if isinstance(meta_data, bytes):
|
||||
meta_data = meta_data.decode('utf-8')
|
||||
meta = json.loads(meta_data)
|
||||
else:
|
||||
# 从 conversation_id 解析 course_id
|
||||
# 格式: conv_{user_id}_{course_id}_{uuid}
|
||||
parts = conv_id.split('_')
|
||||
course_id = int(parts[2]) if len(parts) >= 3 else 0
|
||||
meta = {
|
||||
"conversation_id": conv_id,
|
||||
"user_id": user_id,
|
||||
"course_id": course_id,
|
||||
"created_at": time.time(),
|
||||
"updated_at": time.time(),
|
||||
}
|
||||
|
||||
# 获取最后一条消息作为预览
|
||||
history = await self._get_conversation_history(conv_id)
|
||||
last_message = ""
|
||||
if history:
|
||||
# 获取最后一条 assistant 消息
|
||||
for msg in reversed(history):
|
||||
if msg["role"] == "assistant":
|
||||
last_message = msg["content"][:100] # 截取前100字符
|
||||
if len(msg["content"]) > 100:
|
||||
last_message += "..."
|
||||
break
|
||||
|
||||
conversations.append({
|
||||
"id": conv_id,
|
||||
"course_id": meta.get("course_id"),
|
||||
"created_at": meta.get("created_at"),
|
||||
"updated_at": meta.get("updated_at"),
|
||||
"last_message": last_message,
|
||||
"message_count": len(history),
|
||||
})
|
||||
|
||||
logger.info(f"获取用户会话列表 - user_id: {user_id}, count: {len(conversations)}")
|
||||
return conversations
|
||||
|
||||
except RuntimeError:
|
||||
logger.warning("Redis 未初始化,无法获取会话列表")
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.warning(f"获取会话列表失败: {e}")
|
||||
return []
|
||||
|
||||
# 别名方法,供 API 层调用
|
||||
async def get_conversations(
|
||||
self,
|
||||
user_id: int,
|
||||
course_id: Optional[int] = None,
|
||||
limit: int = 20
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
获取用户的会话列表(别名方法)
|
||||
|
||||
Args:
|
||||
user_id: 用户ID
|
||||
course_id: 课程ID(可选,用于过滤)
|
||||
limit: 返回数量限制
|
||||
|
||||
Returns:
|
||||
会话列表
|
||||
"""
|
||||
conversations = await self.list_user_conversations(user_id, limit)
|
||||
|
||||
# 如果指定了 course_id,进行过滤
|
||||
if course_id is not None:
|
||||
conversations = [
|
||||
c for c in conversations
|
||||
if c.get("course_id") == course_id
|
||||
]
|
||||
|
||||
return conversations
|
||||
|
||||
async def get_messages(
|
||||
self,
|
||||
conversation_id: str,
|
||||
user_id: int,
|
||||
limit: int = 50
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
获取会话历史消息(别名方法)
|
||||
|
||||
Args:
|
||||
conversation_id: 会话ID
|
||||
user_id: 用户ID(用于权限验证)
|
||||
limit: 返回数量限制
|
||||
|
||||
Returns:
|
||||
消息列表
|
||||
"""
|
||||
messages = await self.get_conversation_messages(conversation_id, limit)
|
||||
return messages
|
||||
|
||||
|
||||
# 创建全局实例
|
||||
course_chat_service_v2 = CourseChatServiceV2()
|
||||
|
||||
61
backend/app/services/ai/coze/__init__.py
Normal file
61
backend/app/services/ai/coze/__init__.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""
|
||||
Coze AI 服务模块
|
||||
"""
|
||||
|
||||
from .client import get_coze_client, get_auth_manager, get_bot_config, get_workspace_id
|
||||
from .service import get_coze_service, CozeService
|
||||
from .models import (
|
||||
SessionType,
|
||||
MessageRole,
|
||||
ContentType,
|
||||
StreamEventType,
|
||||
CozeSession,
|
||||
CozeMessage,
|
||||
StreamEvent,
|
||||
CreateSessionRequest,
|
||||
CreateSessionResponse,
|
||||
SendMessageRequest,
|
||||
EndSessionRequest,
|
||||
EndSessionResponse,
|
||||
)
|
||||
from .exceptions import (
|
||||
CozeException,
|
||||
CozeAuthError,
|
||||
CozeAPIError,
|
||||
CozeRateLimitError,
|
||||
CozeTimeoutError,
|
||||
CozeStreamError,
|
||||
map_coze_error_to_exception,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# Client
|
||||
"get_coze_client",
|
||||
"get_auth_manager",
|
||||
"get_bot_config",
|
||||
"get_workspace_id",
|
||||
# Service
|
||||
"get_coze_service",
|
||||
"CozeService",
|
||||
# Models
|
||||
"SessionType",
|
||||
"MessageRole",
|
||||
"ContentType",
|
||||
"StreamEventType",
|
||||
"CozeSession",
|
||||
"CozeMessage",
|
||||
"StreamEvent",
|
||||
"CreateSessionRequest",
|
||||
"CreateSessionResponse",
|
||||
"SendMessageRequest",
|
||||
"EndSessionRequest",
|
||||
"EndSessionResponse",
|
||||
# Exceptions
|
||||
"CozeException",
|
||||
"CozeAuthError",
|
||||
"CozeAPIError",
|
||||
"CozeRateLimitError",
|
||||
"CozeTimeoutError",
|
||||
"CozeStreamError",
|
||||
"map_coze_error_to_exception",
|
||||
]
|
||||
203
backend/app/services/ai/coze/client.py
Normal file
203
backend/app/services/ai/coze/client.py
Normal file
@@ -0,0 +1,203 @@
|
||||
"""
|
||||
Coze AI 客户端管理
|
||||
负责管理 Coze API 的认证和客户端实例
|
||||
"""
|
||||
from functools import lru_cache
|
||||
from typing import Optional, Dict, Any
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from cozepy import Coze, TokenAuth, JWTAuth, COZE_CN_BASE_URL
|
||||
|
||||
from app.core.config import get_settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CozeAuthManager:
|
||||
"""Coze 认证管理器"""
|
||||
|
||||
def __init__(self):
|
||||
self.settings = get_settings()
|
||||
self._client: Optional[Coze] = None
|
||||
|
||||
def _create_pat_auth(self) -> TokenAuth:
|
||||
"""创建个人访问令牌认证"""
|
||||
if not self.settings.COZE_API_TOKEN:
|
||||
raise ValueError("COZE_API_TOKEN 未配置")
|
||||
|
||||
return TokenAuth(token=self.settings.COZE_API_TOKEN)
|
||||
|
||||
def _create_oauth_auth(self) -> JWTAuth:
|
||||
"""创建 OAuth 认证"""
|
||||
if not all(
|
||||
[
|
||||
self.settings.COZE_OAUTH_CLIENT_ID,
|
||||
self.settings.COZE_OAUTH_PUBLIC_KEY_ID,
|
||||
self.settings.COZE_OAUTH_PRIVATE_KEY_PATH,
|
||||
]
|
||||
):
|
||||
raise ValueError("OAuth 配置不完整")
|
||||
|
||||
# 读取私钥
|
||||
private_key_path = Path(self.settings.COZE_OAUTH_PRIVATE_KEY_PATH)
|
||||
if not private_key_path.exists():
|
||||
raise FileNotFoundError(f"私钥文件不存在: {private_key_path}")
|
||||
|
||||
with open(private_key_path, "r") as f:
|
||||
private_key = f.read()
|
||||
|
||||
try:
|
||||
return JWTAuth(
|
||||
client_id=self.settings.COZE_OAUTH_CLIENT_ID,
|
||||
private_key=private_key,
|
||||
public_key_id=self.settings.COZE_OAUTH_PUBLIC_KEY_ID,
|
||||
base_url=self.settings.COZE_API_BASE or COZE_CN_BASE_URL, # 使用中国区API
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"创建 OAuth 认证失败: {e}")
|
||||
raise
|
||||
|
||||
def get_client(self, force_new: bool = False) -> Coze:
|
||||
"""
|
||||
获取 Coze 客户端实例
|
||||
|
||||
Args:
|
||||
force_new: 是否强制创建新客户端(用于长时间运行的请求,避免token过期)
|
||||
|
||||
认证优先级:
|
||||
1. OAuth(推荐):配置完整时使用,自动刷新token
|
||||
2. PAT:仅当OAuth未配置时使用(注意:PAT会过期)
|
||||
"""
|
||||
if self._client is not None and not force_new:
|
||||
return self._client
|
||||
|
||||
auth = None
|
||||
auth_type = None
|
||||
|
||||
# 检查 OAuth 配置是否完整
|
||||
oauth_configured = all([
|
||||
self.settings.COZE_OAUTH_CLIENT_ID,
|
||||
self.settings.COZE_OAUTH_PUBLIC_KEY_ID,
|
||||
self.settings.COZE_OAUTH_PRIVATE_KEY_PATH,
|
||||
])
|
||||
|
||||
if oauth_configured:
|
||||
# OAuth 配置完整,必须使用 OAuth(不fallback到PAT)
|
||||
try:
|
||||
auth = self._create_oauth_auth()
|
||||
auth_type = "OAuth"
|
||||
logger.info("使用 OAuth 认证")
|
||||
except Exception as e:
|
||||
# OAuth 配置完整但创建失败,直接抛出异常(不fallback到可能过期的PAT)
|
||||
logger.error(f"OAuth 认证创建失败: {e}")
|
||||
raise ValueError(f"OAuth 认证失败,请检查私钥文件和配置: {e}")
|
||||
else:
|
||||
# OAuth 未配置,使用 PAT
|
||||
if self.settings.COZE_API_TOKEN:
|
||||
auth = self._create_pat_auth()
|
||||
auth_type = "PAT"
|
||||
logger.warning("使用 PAT 认证(注意:PAT会过期,建议配置OAuth)")
|
||||
else:
|
||||
raise ValueError("Coze 认证未配置:需要配置 OAuth 或 PAT Token")
|
||||
|
||||
# 创建客户端
|
||||
client = Coze(
|
||||
auth=auth, base_url=self.settings.COZE_API_BASE or COZE_CN_BASE_URL
|
||||
)
|
||||
|
||||
logger.debug(f"Coze客户端创建成功,认证方式: {auth_type}, force_new: {force_new}")
|
||||
|
||||
# 只有非强制创建时才缓存
|
||||
if not force_new:
|
||||
self._client = client
|
||||
|
||||
return client
|
||||
|
||||
def reset(self):
|
||||
"""重置客户端实例"""
|
||||
self._client = None
|
||||
|
||||
def get_oauth_token(self) -> str:
|
||||
"""
|
||||
获取OAuth JWT Token用于前端直连
|
||||
|
||||
Returns:
|
||||
JWT token字符串
|
||||
"""
|
||||
if not all([
|
||||
self.settings.COZE_OAUTH_CLIENT_ID,
|
||||
self.settings.COZE_OAUTH_PUBLIC_KEY_ID,
|
||||
self.settings.COZE_OAUTH_PRIVATE_KEY_PATH,
|
||||
]):
|
||||
raise ValueError("OAuth 配置不完整")
|
||||
|
||||
# 读取私钥
|
||||
private_key_path = Path(self.settings.COZE_OAUTH_PRIVATE_KEY_PATH)
|
||||
if not private_key_path.exists():
|
||||
raise FileNotFoundError(f"私钥文件不存在: {private_key_path}")
|
||||
|
||||
with open(private_key_path, "r") as f:
|
||||
private_key = f.read()
|
||||
|
||||
# 创建JWTAuth实例(必须指定中国区base_url)
|
||||
jwt_auth = JWTAuth(
|
||||
client_id=self.settings.COZE_OAUTH_CLIENT_ID,
|
||||
private_key=private_key,
|
||||
public_key_id=self.settings.COZE_OAUTH_PUBLIC_KEY_ID,
|
||||
base_url=self.settings.COZE_API_BASE or COZE_CN_BASE_URL, # 使用中国区API
|
||||
)
|
||||
|
||||
# 获取token(JWTAuth内部会自动生成)
|
||||
# JWTAuth.token属性返回已签名的JWT
|
||||
return jwt_auth.token
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def get_auth_manager() -> CozeAuthManager:
|
||||
"""获取认证管理器单例"""
|
||||
return CozeAuthManager()
|
||||
|
||||
|
||||
def get_coze_client(force_new: bool = False) -> Coze:
|
||||
"""
|
||||
获取 Coze 客户端
|
||||
|
||||
Args:
|
||||
force_new: 是否强制创建新客户端(用于工作流等长时间运行的请求)
|
||||
"""
|
||||
return get_auth_manager().get_client(force_new=force_new)
|
||||
|
||||
|
||||
def get_workspace_id() -> str:
|
||||
"""获取工作空间 ID"""
|
||||
settings = get_settings()
|
||||
if not settings.COZE_WORKSPACE_ID:
|
||||
raise ValueError("COZE_WORKSPACE_ID 未配置")
|
||||
return settings.COZE_WORKSPACE_ID
|
||||
|
||||
|
||||
def get_bot_config(session_type: str) -> Dict[str, Any]:
|
||||
"""
|
||||
根据会话类型获取 Bot 配置
|
||||
|
||||
Args:
|
||||
session_type: 会话类型 (course_chat 或 training)
|
||||
|
||||
Returns:
|
||||
包含 bot_id 等配置的字典
|
||||
"""
|
||||
settings = get_settings()
|
||||
|
||||
if session_type == "course_chat":
|
||||
bot_id = settings.COZE_CHAT_BOT_ID
|
||||
if not bot_id:
|
||||
raise ValueError("COZE_CHAT_BOT_ID 未配置")
|
||||
elif session_type == "training":
|
||||
bot_id = settings.COZE_TRAINING_BOT_ID
|
||||
if not bot_id:
|
||||
raise ValueError("COZE_TRAINING_BOT_ID 未配置")
|
||||
else:
|
||||
raise ValueError(f"不支持的会话类型: {session_type}")
|
||||
|
||||
return {"bot_id": bot_id, "workspace_id": settings.COZE_WORKSPACE_ID}
|
||||
44
backend/app/services/ai/coze/client_backup.py
Normal file
44
backend/app/services/ai/coze/client_backup.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""Coze客户端(临时模拟,等Agent-Coze实现后替换)"""
|
||||
import logging
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CozeClient:
|
||||
"""
|
||||
Coze客户端模拟类
|
||||
TODO: 等Agent-Coze模块实现后,这个类将被真实的Coze网关客户端替换
|
||||
"""
|
||||
|
||||
async def create_conversation(
|
||||
self, bot_id: str, user_id: str, meta_data: Optional[Dict[str, Any]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""创建会话(模拟)"""
|
||||
logger.info(f"模拟创建Coze会话: bot_id={bot_id}, user_id={user_id}")
|
||||
|
||||
# 返回模拟的会话信息
|
||||
return {
|
||||
"conversation_id": f"mock_conversation_{user_id}_{bot_id[:8]}",
|
||||
"bot_id": bot_id,
|
||||
"status": "active",
|
||||
}
|
||||
|
||||
async def send_message(
|
||||
self, conversation_id: str, content: str, message_type: str = "text"
|
||||
) -> Dict[str, Any]:
|
||||
"""发送消息(模拟)"""
|
||||
logger.info(f"模拟发送消息到会话 {conversation_id}: {content[:50]}...")
|
||||
|
||||
# 返回模拟的消息响应
|
||||
return {
|
||||
"message_id": f"mock_msg_{conversation_id[:8]}",
|
||||
"content": f"这是对'{content[:30]}...'的模拟回复",
|
||||
"role": "assistant",
|
||||
}
|
||||
|
||||
async def end_conversation(self, conversation_id: str) -> Dict[str, Any]:
|
||||
"""结束会话(模拟)"""
|
||||
logger.info(f"模拟结束会话: {conversation_id}")
|
||||
|
||||
return {"status": "completed", "conversation_id": conversation_id}
|
||||
101
backend/app/services/ai/coze/exceptions.py
Normal file
101
backend/app/services/ai/coze/exceptions.py
Normal file
@@ -0,0 +1,101 @@
|
||||
"""
|
||||
Coze 服务异常定义
|
||||
"""
|
||||
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
|
||||
class CozeException(Exception):
|
||||
"""Coze 服务基础异常"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
code: Optional[str] = None,
|
||||
status_code: Optional[int] = None,
|
||||
details: Optional[Dict[str, Any]] = None,
|
||||
):
|
||||
super().__init__(message)
|
||||
self.message = message
|
||||
self.code = code
|
||||
self.status_code = status_code
|
||||
self.details = details or {}
|
||||
|
||||
|
||||
class CozeAuthError(CozeException):
|
||||
"""认证异常"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class CozeAPIError(CozeException):
|
||||
"""API 调用异常"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class CozeRateLimitError(CozeException):
|
||||
"""速率限制异常"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class CozeTimeoutError(CozeException):
|
||||
"""超时异常"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class CozeStreamError(CozeException):
|
||||
"""流式响应异常"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
def map_coze_error_to_exception(error: Exception) -> CozeException:
|
||||
"""
|
||||
将 Coze SDK 错误映射为统一异常
|
||||
|
||||
Args:
|
||||
error: 原始异常
|
||||
|
||||
Returns:
|
||||
CozeException: 映射后的异常
|
||||
"""
|
||||
error_message = str(error)
|
||||
|
||||
# 根据错误消息判断错误类型
|
||||
if (
|
||||
"authentication" in error_message.lower()
|
||||
or "unauthorized" in error_message.lower()
|
||||
):
|
||||
return CozeAuthError(
|
||||
message="Coze 认证失败",
|
||||
code="COZE_AUTH_ERROR",
|
||||
status_code=401,
|
||||
details={"original_error": error_message},
|
||||
)
|
||||
|
||||
if "rate limit" in error_message.lower():
|
||||
return CozeRateLimitError(
|
||||
message="Coze API 速率限制",
|
||||
code="COZE_RATE_LIMIT",
|
||||
status_code=429,
|
||||
details={"original_error": error_message},
|
||||
)
|
||||
|
||||
if "timeout" in error_message.lower():
|
||||
return CozeTimeoutError(
|
||||
message="Coze API 调用超时",
|
||||
code="COZE_TIMEOUT",
|
||||
status_code=504,
|
||||
details={"original_error": error_message},
|
||||
)
|
||||
|
||||
# 默认映射为 API 错误
|
||||
return CozeAPIError(
|
||||
message="Coze API 调用失败",
|
||||
code="COZE_API_ERROR",
|
||||
status_code=500,
|
||||
details={"original_error": error_message},
|
||||
)
|
||||
136
backend/app/services/ai/coze/models.py
Normal file
136
backend/app/services/ai/coze/models.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""
|
||||
Coze 服务数据模型
|
||||
"""
|
||||
|
||||
from typing import Optional, List, Dict, Any, Literal
|
||||
from datetime import datetime
|
||||
from pydantic import BaseModel, Field
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class SessionType(str, Enum):
|
||||
"""会话类型"""
|
||||
|
||||
COURSE_CHAT = "course_chat" # 课程对话
|
||||
TRAINING = "training" # 陪练会话
|
||||
EXAM = "exam" # 考试会话
|
||||
|
||||
|
||||
class MessageRole(str, Enum):
|
||||
"""消息角色"""
|
||||
|
||||
USER = "user"
|
||||
ASSISTANT = "assistant"
|
||||
SYSTEM = "system"
|
||||
|
||||
|
||||
class ContentType(str, Enum):
|
||||
"""内容类型"""
|
||||
|
||||
TEXT = "text"
|
||||
CARD = "card"
|
||||
IMAGE = "image"
|
||||
FILE = "file"
|
||||
|
||||
|
||||
class StreamEventType(str, Enum):
|
||||
"""流式事件类型"""
|
||||
|
||||
MESSAGE_START = "conversation.message.start"
|
||||
MESSAGE_DELTA = "conversation.message.delta"
|
||||
MESSAGE_COMPLETED = "conversation.message.completed"
|
||||
ERROR = "error"
|
||||
DONE = "done"
|
||||
|
||||
|
||||
class CozeSession(BaseModel):
|
||||
"""Coze 会话模型"""
|
||||
|
||||
session_id: str = Field(..., description="会话ID")
|
||||
conversation_id: str = Field(..., description="Coze对话ID")
|
||||
session_type: SessionType = Field(..., description="会话类型")
|
||||
user_id: str = Field(..., description="用户ID")
|
||||
bot_id: str = Field(..., description="Bot ID")
|
||||
created_at: datetime = Field(default_factory=datetime.now, description="创建时间")
|
||||
ended_at: Optional[datetime] = Field(None, description="结束时间")
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict, description="元数据")
|
||||
|
||||
class Config:
|
||||
json_encoders = {datetime: lambda v: v.isoformat()}
|
||||
|
||||
|
||||
class CozeMessage(BaseModel):
|
||||
"""Coze 消息模型"""
|
||||
|
||||
message_id: str = Field(..., description="消息ID")
|
||||
session_id: str = Field(..., description="会话ID")
|
||||
role: MessageRole = Field(..., description="消息角色")
|
||||
content: str = Field(..., description="消息内容")
|
||||
content_type: ContentType = Field(ContentType.TEXT, description="内容类型")
|
||||
created_at: datetime = Field(default_factory=datetime.now, description="创建时间")
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict, description="元数据")
|
||||
|
||||
class Config:
|
||||
json_encoders = {datetime: lambda v: v.isoformat()}
|
||||
|
||||
|
||||
class StreamEvent(BaseModel):
|
||||
"""流式事件模型"""
|
||||
|
||||
event: StreamEventType = Field(..., description="事件类型")
|
||||
data: Dict[str, Any] = Field(..., description="事件数据")
|
||||
message_id: Optional[str] = Field(None, description="消息ID")
|
||||
content: Optional[str] = Field(None, description="内容")
|
||||
content_type: Optional[ContentType] = Field(None, description="内容类型")
|
||||
role: Optional[MessageRole] = Field(None, description="角色")
|
||||
error: Optional[str] = Field(None, description="错误信息")
|
||||
|
||||
|
||||
class CreateSessionRequest(BaseModel):
|
||||
"""创建会话请求"""
|
||||
|
||||
session_type: SessionType = Field(..., description="会话类型")
|
||||
user_id: str = Field(..., description="用户ID")
|
||||
course_id: Optional[str] = Field(None, description="课程ID (课程对话时必需)")
|
||||
training_topic: Optional[str] = Field(None, description="陪练主题 (陪练时可选)")
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict, description="额外元数据")
|
||||
|
||||
|
||||
class CreateSessionResponse(BaseModel):
|
||||
"""创建会话响应"""
|
||||
|
||||
session_id: str = Field(..., description="会话ID")
|
||||
conversation_id: str = Field(..., description="Coze对话ID")
|
||||
bot_id: str = Field(..., description="Bot ID")
|
||||
created_at: datetime = Field(..., description="创建时间")
|
||||
|
||||
class Config:
|
||||
json_encoders = {datetime: lambda v: v.isoformat()}
|
||||
|
||||
|
||||
class SendMessageRequest(BaseModel):
|
||||
"""发送消息请求"""
|
||||
|
||||
session_id: str = Field(..., description="会话ID")
|
||||
content: str = Field(..., description="消息内容")
|
||||
file_ids: List[str] = Field(default_factory=list, description="附件ID列表")
|
||||
stream: bool = Field(True, description="是否流式响应")
|
||||
|
||||
|
||||
class EndSessionRequest(BaseModel):
|
||||
"""结束会话请求"""
|
||||
|
||||
reason: Optional[str] = Field(None, description="结束原因")
|
||||
feedback: Optional[Dict[str, Any]] = Field(None, description="用户反馈")
|
||||
|
||||
|
||||
class EndSessionResponse(BaseModel):
|
||||
"""结束会话响应"""
|
||||
|
||||
session_id: str = Field(..., description="会话ID")
|
||||
ended_at: datetime = Field(..., description="结束时间")
|
||||
duration_seconds: int = Field(..., description="会话时长(秒)")
|
||||
message_count: int = Field(..., description="消息数量")
|
||||
|
||||
class Config:
|
||||
json_encoders = {datetime: lambda v: v.isoformat()}
|
||||
335
backend/app/services/ai/coze/service.py
Normal file
335
backend/app/services/ai/coze/service.py
Normal file
@@ -0,0 +1,335 @@
|
||||
"""
|
||||
Coze 服务层实现
|
||||
处理会话管理、消息发送、流式响应等核心功能
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
from typing import AsyncIterator, Dict, Any, List, Optional
|
||||
from datetime import datetime
|
||||
|
||||
from cozepy import ChatEventType, Message, MessageContentType
|
||||
|
||||
from .client import get_coze_client, get_bot_config, get_workspace_id
|
||||
from .models import (
|
||||
CozeSession,
|
||||
CozeMessage,
|
||||
StreamEvent,
|
||||
SessionType,
|
||||
MessageRole,
|
||||
ContentType,
|
||||
StreamEventType,
|
||||
CreateSessionRequest,
|
||||
CreateSessionResponse,
|
||||
SendMessageRequest,
|
||||
EndSessionRequest,
|
||||
EndSessionResponse,
|
||||
)
|
||||
from .exceptions import (
|
||||
CozeAPIError,
|
||||
CozeStreamError,
|
||||
CozeTimeoutError,
|
||||
map_coze_error_to_exception,
|
||||
)
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CozeService:
|
||||
"""Coze 服务类"""
|
||||
|
||||
def __init__(self):
|
||||
self.client = get_coze_client()
|
||||
self.bot_config = get_bot_config()
|
||||
self.workspace_id = get_workspace_id()
|
||||
|
||||
# 内存中的会话存储(生产环境应使用 Redis)
|
||||
self._sessions: Dict[str, CozeSession] = {}
|
||||
self._messages: Dict[str, List[CozeMessage]] = {}
|
||||
|
||||
async def create_session(
|
||||
self, request: CreateSessionRequest
|
||||
) -> CreateSessionResponse:
|
||||
"""
|
||||
创建新会话
|
||||
|
||||
Args:
|
||||
request: 创建会话请求
|
||||
|
||||
Returns:
|
||||
CreateSessionResponse: 会话信息
|
||||
"""
|
||||
try:
|
||||
# 根据会话类型选择 Bot
|
||||
bot_id = self._get_bot_id_by_type(request.session_type)
|
||||
|
||||
# 创建 Coze 对话
|
||||
conversation = await asyncio.to_thread(
|
||||
self.client.conversations.create, bot_id=bot_id
|
||||
)
|
||||
|
||||
# 创建本地会话记录
|
||||
session = CozeSession(
|
||||
session_id=str(uuid.uuid4()),
|
||||
conversation_id=conversation.id,
|
||||
session_type=request.session_type,
|
||||
user_id=request.user_id,
|
||||
bot_id=bot_id,
|
||||
metadata=request.metadata,
|
||||
)
|
||||
|
||||
# 保存会话
|
||||
self._sessions[session.session_id] = session
|
||||
self._messages[session.session_id] = []
|
||||
|
||||
logger.info(
|
||||
f"创建会话成功",
|
||||
extra={
|
||||
"session_id": session.session_id,
|
||||
"conversation_id": conversation.id,
|
||||
"session_type": request.session_type.value,
|
||||
"user_id": request.user_id,
|
||||
},
|
||||
)
|
||||
|
||||
return CreateSessionResponse(
|
||||
session_id=session.session_id,
|
||||
conversation_id=session.conversation_id,
|
||||
bot_id=session.bot_id,
|
||||
created_at=session.created_at,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"创建会话失败: {e}", exc_info=True)
|
||||
raise map_coze_error_to_exception(e)
|
||||
|
||||
async def send_message(
|
||||
self, request: SendMessageRequest
|
||||
) -> AsyncIterator[StreamEvent]:
|
||||
"""
|
||||
发送消息并处理流式响应
|
||||
|
||||
Args:
|
||||
request: 发送消息请求
|
||||
|
||||
Yields:
|
||||
StreamEvent: 流式事件
|
||||
"""
|
||||
session = self._get_session(request.session_id)
|
||||
if not session:
|
||||
raise CozeAPIError(f"会话不存在: {request.session_id}")
|
||||
|
||||
# 记录用户消息
|
||||
user_message = CozeMessage(
|
||||
message_id=str(uuid.uuid4()),
|
||||
session_id=session.session_id,
|
||||
role=MessageRole.USER,
|
||||
content=request.content,
|
||||
)
|
||||
self._messages[session.session_id].append(user_message)
|
||||
|
||||
try:
|
||||
# 构建消息历史
|
||||
messages = self._build_message_history(session.session_id)
|
||||
|
||||
# 调用 Coze API
|
||||
stream = await asyncio.to_thread(
|
||||
self.client.chat.stream,
|
||||
bot_id=session.bot_id,
|
||||
conversation_id=session.conversation_id,
|
||||
additional_messages=messages,
|
||||
auto_save_history=True,
|
||||
)
|
||||
|
||||
# 处理流式响应
|
||||
async for event in self._process_stream(stream, session.session_id):
|
||||
yield event
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
logger.error(f"消息发送超时: session_id={request.session_id}")
|
||||
raise CozeTimeoutError("消息处理超时")
|
||||
except Exception as e:
|
||||
logger.error(f"发送消息失败: {e}", exc_info=True)
|
||||
raise map_coze_error_to_exception(e)
|
||||
|
||||
async def end_session(
|
||||
self, session_id: str, request: EndSessionRequest
|
||||
) -> EndSessionResponse:
|
||||
"""
|
||||
结束会话
|
||||
|
||||
Args:
|
||||
session_id: 会话ID
|
||||
request: 结束会话请求
|
||||
|
||||
Returns:
|
||||
EndSessionResponse: 结束会话响应
|
||||
"""
|
||||
session = self._get_session(session_id)
|
||||
if not session:
|
||||
raise CozeAPIError(f"会话不存在: {session_id}")
|
||||
|
||||
# 更新会话状态
|
||||
session.ended_at = datetime.now()
|
||||
|
||||
# 计算会话统计
|
||||
duration_seconds = int((session.ended_at - session.created_at).total_seconds())
|
||||
message_count = len(self._messages.get(session_id, []))
|
||||
|
||||
# 记录结束原因和反馈
|
||||
if request.reason:
|
||||
session.metadata["end_reason"] = request.reason
|
||||
if request.feedback:
|
||||
session.metadata["feedback"] = request.feedback
|
||||
|
||||
logger.info(
|
||||
f"会话结束",
|
||||
extra={
|
||||
"session_id": session_id,
|
||||
"duration_seconds": duration_seconds,
|
||||
"message_count": message_count,
|
||||
"reason": request.reason,
|
||||
},
|
||||
)
|
||||
|
||||
return EndSessionResponse(
|
||||
session_id=session_id,
|
||||
ended_at=session.ended_at,
|
||||
duration_seconds=duration_seconds,
|
||||
message_count=message_count,
|
||||
)
|
||||
|
||||
async def get_session_messages(
|
||||
self, session_id: str, limit: int = 50, offset: int = 0
|
||||
) -> List[CozeMessage]:
|
||||
"""获取会话消息历史"""
|
||||
messages = self._messages.get(session_id, [])
|
||||
return messages[offset : offset + limit]
|
||||
|
||||
def _get_bot_id_by_type(self, session_type: SessionType) -> str:
|
||||
"""根据会话类型获取 Bot ID"""
|
||||
mapping = {
|
||||
SessionType.COURSE_CHAT: self.bot_config["course_chat"],
|
||||
SessionType.TRAINING: self.bot_config["training"],
|
||||
SessionType.EXAM: self.bot_config["exam"],
|
||||
}
|
||||
return mapping.get(session_type, self.bot_config["training"])
|
||||
|
||||
def _get_session(self, session_id: str) -> Optional[CozeSession]:
|
||||
"""获取会话"""
|
||||
return self._sessions.get(session_id)
|
||||
|
||||
def _build_message_history(self, session_id: str) -> List[Message]:
|
||||
"""构建消息历史"""
|
||||
messages = self._messages.get(session_id, [])
|
||||
history = []
|
||||
|
||||
for msg in messages[-10:]: # 只发送最近10条消息作为上下文
|
||||
history.append(
|
||||
Message(
|
||||
role=msg.role.value,
|
||||
content=msg.content,
|
||||
content_type=MessageContentType.TEXT,
|
||||
)
|
||||
)
|
||||
|
||||
return history
|
||||
|
||||
async def _process_stream(
|
||||
self, stream, session_id: str
|
||||
) -> AsyncIterator[StreamEvent]:
|
||||
"""处理流式响应"""
|
||||
assistant_message_id = str(uuid.uuid4())
|
||||
accumulated_content = []
|
||||
content_type = ContentType.TEXT
|
||||
|
||||
try:
|
||||
for event in stream:
|
||||
if event.event == ChatEventType.CONVERSATION_MESSAGE_DELTA:
|
||||
# 消息片段
|
||||
content = event.message.content
|
||||
accumulated_content.append(content)
|
||||
|
||||
# 检测卡片类型
|
||||
if (
|
||||
hasattr(event.message, "content_type")
|
||||
and event.message.content_type == "card"
|
||||
):
|
||||
content_type = ContentType.CARD
|
||||
|
||||
yield StreamEvent(
|
||||
event=StreamEventType.MESSAGE_DELTA,
|
||||
data={
|
||||
"conversation_id": event.conversation_id,
|
||||
"message_id": assistant_message_id,
|
||||
"content": content,
|
||||
"content_type": content_type.value,
|
||||
},
|
||||
message_id=assistant_message_id,
|
||||
content=content,
|
||||
content_type=content_type,
|
||||
role=MessageRole.ASSISTANT,
|
||||
)
|
||||
|
||||
elif event.event == ChatEventType.CONVERSATION_MESSAGE_COMPLETED:
|
||||
# 消息完成
|
||||
full_content = "".join(accumulated_content)
|
||||
|
||||
# 保存助手消息
|
||||
assistant_message = CozeMessage(
|
||||
message_id=assistant_message_id,
|
||||
session_id=session_id,
|
||||
role=MessageRole.ASSISTANT,
|
||||
content=full_content,
|
||||
content_type=content_type,
|
||||
)
|
||||
self._messages[session_id].append(assistant_message)
|
||||
|
||||
yield StreamEvent(
|
||||
event=StreamEventType.MESSAGE_COMPLETED,
|
||||
data={
|
||||
"conversation_id": event.conversation_id,
|
||||
"message_id": assistant_message_id,
|
||||
"content": full_content,
|
||||
"content_type": content_type.value,
|
||||
"usage": getattr(event, "usage", {}),
|
||||
},
|
||||
message_id=assistant_message_id,
|
||||
content=full_content,
|
||||
content_type=content_type,
|
||||
role=MessageRole.ASSISTANT,
|
||||
)
|
||||
|
||||
elif event.event == ChatEventType.ERROR:
|
||||
# 错误事件
|
||||
yield StreamEvent(
|
||||
event=StreamEventType.ERROR,
|
||||
data={"error": str(event)},
|
||||
error=str(event),
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"流式处理错误: {e}", exc_info=True)
|
||||
yield StreamEvent(
|
||||
event=StreamEventType.ERROR, data={"error": str(e)}, error=str(e)
|
||||
)
|
||||
finally:
|
||||
# 发送结束事件
|
||||
yield StreamEvent(
|
||||
event=StreamEventType.DONE, data={"session_id": session_id}
|
||||
)
|
||||
|
||||
|
||||
# 全局服务实例
|
||||
_service: Optional[CozeService] = None
|
||||
|
||||
|
||||
def get_coze_service() -> CozeService:
|
||||
"""获取 Coze 服务单例"""
|
||||
global _service
|
||||
if _service is None:
|
||||
_service = CozeService()
|
||||
return _service
|
||||
512
backend/app/services/ai/exam_generator_service.py
Normal file
512
backend/app/services/ai/exam_generator_service.py
Normal file
@@ -0,0 +1,512 @@
|
||||
"""
|
||||
试题生成服务 V2 - Python 原生实现
|
||||
|
||||
功能:
|
||||
- 根据岗位和知识点动态生成考试题目
|
||||
- 支持错题重出模式
|
||||
- 调用 AI 生成并解析 JSON 结果
|
||||
|
||||
提供稳定可靠的试题生成能力。
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.exceptions import ExternalServiceError
|
||||
|
||||
from .ai_service import AIService, AIResponse
|
||||
from .llm_json_parser import parse_with_fallback, clean_llm_output
|
||||
from .prompts.exam_generator_prompts import (
|
||||
SYSTEM_PROMPT,
|
||||
USER_PROMPT,
|
||||
MISTAKE_REGEN_SYSTEM_PROMPT,
|
||||
MISTAKE_REGEN_USER_PROMPT,
|
||||
QUESTION_SCHEMA,
|
||||
DEFAULT_QUESTION_COUNTS,
|
||||
DEFAULT_DIFFICULTY_LEVEL,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExamGeneratorConfig:
|
||||
"""考试生成配置"""
|
||||
course_id: int
|
||||
position_id: int
|
||||
single_choice_count: int = DEFAULT_QUESTION_COUNTS["single_choice_count"]
|
||||
multiple_choice_count: int = DEFAULT_QUESTION_COUNTS["multiple_choice_count"]
|
||||
true_false_count: int = DEFAULT_QUESTION_COUNTS["true_false_count"]
|
||||
fill_blank_count: int = DEFAULT_QUESTION_COUNTS["fill_blank_count"]
|
||||
essay_count: int = DEFAULT_QUESTION_COUNTS["essay_count"]
|
||||
difficulty_level: int = DEFAULT_DIFFICULTY_LEVEL
|
||||
mistake_records: str = ""
|
||||
|
||||
@property
|
||||
def total_count(self) -> int:
|
||||
"""计算总题量"""
|
||||
return (
|
||||
self.single_choice_count +
|
||||
self.multiple_choice_count +
|
||||
self.true_false_count +
|
||||
self.fill_blank_count +
|
||||
self.essay_count
|
||||
)
|
||||
|
||||
@property
|
||||
def has_mistakes(self) -> bool:
|
||||
"""是否有错题记录"""
|
||||
return bool(self.mistake_records and self.mistake_records.strip())
|
||||
|
||||
|
||||
class ExamGeneratorService:
|
||||
"""
|
||||
试题生成服务 V2
|
||||
|
||||
使用 Python 原生实现。
|
||||
|
||||
使用示例:
|
||||
```python
|
||||
service = ExamGeneratorService()
|
||||
result = await service.generate_exam(
|
||||
db=db_session,
|
||||
config=ExamGeneratorConfig(
|
||||
course_id=1,
|
||||
position_id=1,
|
||||
single_choice_count=5,
|
||||
multiple_choice_count=3,
|
||||
difficulty_level=3
|
||||
)
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""初始化服务"""
|
||||
self.ai_service = AIService(module_code="exam_generator")
|
||||
|
||||
async def generate_exam(
|
||||
self,
|
||||
db: AsyncSession,
|
||||
config: ExamGeneratorConfig
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
生成考试题目(主入口)
|
||||
|
||||
Args:
|
||||
db: 数据库会话
|
||||
config: 考试生成配置
|
||||
|
||||
Returns:
|
||||
生成结果,包含 success、questions、total_count 等字段
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
f"开始生成试题 - course_id: {config.course_id}, position_id: {config.position_id}, "
|
||||
f"total_count: {config.total_count}, has_mistakes: {config.has_mistakes}"
|
||||
)
|
||||
|
||||
# 根据是否有错题记录,走不同分支
|
||||
if config.has_mistakes:
|
||||
return await self._regenerate_from_mistakes(db, config)
|
||||
else:
|
||||
return await self._generate_from_knowledge(db, config)
|
||||
|
||||
except ExternalServiceError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"试题生成失败 - course_id: {config.course_id}, error: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
raise ExternalServiceError(f"试题生成失败: {e}")
|
||||
|
||||
async def _generate_from_knowledge(
|
||||
self,
|
||||
db: AsyncSession,
|
||||
config: ExamGeneratorConfig
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
基于知识点生成题目(无错题模式)
|
||||
|
||||
流程:
|
||||
1. 查询岗位信息
|
||||
2. 随机查询知识点
|
||||
3. 调用 AI 生成题目
|
||||
4. 解析并返回结果
|
||||
"""
|
||||
# 1. 查询岗位信息
|
||||
position_info = await self._query_position(db, config.position_id)
|
||||
if not position_info:
|
||||
raise ExternalServiceError(f"岗位不存在: position_id={config.position_id}")
|
||||
|
||||
logger.info(f"岗位信息: {position_info.get('name', 'unknown')}")
|
||||
|
||||
# 2. 随机查询知识点
|
||||
knowledge_points = await self._query_knowledge_points(
|
||||
db,
|
||||
config.course_id,
|
||||
config.total_count
|
||||
)
|
||||
if not knowledge_points:
|
||||
raise ExternalServiceError(
|
||||
f"课程没有可用的知识点: course_id={config.course_id}"
|
||||
)
|
||||
|
||||
logger.info(f"查询到 {len(knowledge_points)} 个知识点")
|
||||
|
||||
# 3. 构建提示词
|
||||
system_prompt = SYSTEM_PROMPT.format(
|
||||
total_count=config.total_count,
|
||||
single_choice_count=config.single_choice_count,
|
||||
multiple_choice_count=config.multiple_choice_count,
|
||||
true_false_count=config.true_false_count,
|
||||
fill_blank_count=config.fill_blank_count,
|
||||
essay_count=config.essay_count,
|
||||
difficulty_level=config.difficulty_level,
|
||||
)
|
||||
|
||||
user_prompt = USER_PROMPT.format(
|
||||
position_info=self._format_position_info(position_info),
|
||||
knowledge_points=self._format_knowledge_points(knowledge_points),
|
||||
)
|
||||
|
||||
# 4. 调用 AI 生成
|
||||
ai_response = await self._call_ai_generate(system_prompt, user_prompt)
|
||||
|
||||
logger.info(
|
||||
f"AI 生成完成 - provider: {ai_response.provider}, "
|
||||
f"tokens: {ai_response.total_tokens}, latency: {ai_response.latency_ms}ms"
|
||||
)
|
||||
|
||||
# 5. 解析题目
|
||||
questions = self._parse_questions(ai_response.content)
|
||||
|
||||
logger.info(f"试题解析成功,数量: {len(questions)}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"questions": questions,
|
||||
"total_count": len(questions),
|
||||
"mode": "knowledge_based",
|
||||
"ai_provider": ai_response.provider,
|
||||
"ai_model": ai_response.model,
|
||||
"ai_tokens": ai_response.total_tokens,
|
||||
"ai_latency_ms": ai_response.latency_ms,
|
||||
}
|
||||
|
||||
async def _regenerate_from_mistakes(
|
||||
self,
|
||||
db: AsyncSession,
|
||||
config: ExamGeneratorConfig
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
错题重出模式
|
||||
|
||||
流程:
|
||||
1. 构建错题重出提示词
|
||||
2. 调用 AI 生成新题
|
||||
3. 解析并返回结果
|
||||
"""
|
||||
logger.info("进入错题重出模式")
|
||||
|
||||
# 1. 构建提示词
|
||||
system_prompt = MISTAKE_REGEN_SYSTEM_PROMPT.format(
|
||||
difficulty_level=config.difficulty_level,
|
||||
)
|
||||
|
||||
user_prompt = MISTAKE_REGEN_USER_PROMPT.format(
|
||||
mistake_records=config.mistake_records,
|
||||
)
|
||||
|
||||
# 2. 调用 AI 生成
|
||||
ai_response = await self._call_ai_generate(system_prompt, user_prompt)
|
||||
|
||||
logger.info(
|
||||
f"错题重出完成 - provider: {ai_response.provider}, "
|
||||
f"tokens: {ai_response.total_tokens}, latency: {ai_response.latency_ms}ms"
|
||||
)
|
||||
|
||||
# 3. 解析题目
|
||||
questions = self._parse_questions(ai_response.content)
|
||||
|
||||
logger.info(f"错题重出解析成功,数量: {len(questions)}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"questions": questions,
|
||||
"total_count": len(questions),
|
||||
"mode": "mistake_regen",
|
||||
"ai_provider": ai_response.provider,
|
||||
"ai_model": ai_response.model,
|
||||
"ai_tokens": ai_response.total_tokens,
|
||||
"ai_latency_ms": ai_response.latency_ms,
|
||||
}
|
||||
|
||||
async def _query_position(
|
||||
self,
|
||||
db: AsyncSession,
|
||||
position_id: int
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
查询岗位信息
|
||||
|
||||
SQL:SELECT id, name, description, skills, level FROM positions
|
||||
WHERE id = :id AND is_deleted = FALSE
|
||||
"""
|
||||
try:
|
||||
result = await db.execute(
|
||||
text("""
|
||||
SELECT id, name, description, skills, level
|
||||
FROM positions
|
||||
WHERE id = :position_id AND is_deleted = FALSE
|
||||
"""),
|
||||
{"position_id": position_id}
|
||||
)
|
||||
row = result.fetchone()
|
||||
|
||||
if not row:
|
||||
return None
|
||||
|
||||
# 将 Row 转换为字典
|
||||
return {
|
||||
"id": row[0],
|
||||
"name": row[1],
|
||||
"description": row[2],
|
||||
"skills": row[3], # JSON 字段
|
||||
"level": row[4],
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"查询岗位信息失败: {e}")
|
||||
raise ExternalServiceError(f"查询岗位信息失败: {e}")
|
||||
|
||||
async def _query_knowledge_points(
|
||||
self,
|
||||
db: AsyncSession,
|
||||
course_id: int,
|
||||
limit: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
随机查询知识点
|
||||
|
||||
SQL:SELECT kp.id, kp.name, kp.description, kp.topic_relation
|
||||
FROM knowledge_points kp
|
||||
INNER JOIN course_materials cm ON kp.material_id = cm.id
|
||||
WHERE kp.course_id = :course_id
|
||||
AND kp.is_deleted = FALSE
|
||||
AND cm.is_deleted = FALSE
|
||||
ORDER BY RAND()
|
||||
LIMIT :limit
|
||||
"""
|
||||
try:
|
||||
result = await db.execute(
|
||||
text("""
|
||||
SELECT kp.id, kp.name, kp.description, kp.topic_relation
|
||||
FROM knowledge_points kp
|
||||
INNER JOIN course_materials cm ON kp.material_id = cm.id
|
||||
WHERE kp.course_id = :course_id
|
||||
AND kp.is_deleted = FALSE
|
||||
AND cm.is_deleted = FALSE
|
||||
ORDER BY RAND()
|
||||
LIMIT :limit
|
||||
"""),
|
||||
{"course_id": course_id, "limit": limit}
|
||||
)
|
||||
rows = result.fetchall()
|
||||
|
||||
return [
|
||||
{
|
||||
"id": row[0],
|
||||
"name": row[1],
|
||||
"description": row[2],
|
||||
"topic_relation": row[3],
|
||||
}
|
||||
for row in rows
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"查询知识点失败: {e}")
|
||||
raise ExternalServiceError(f"查询知识点失败: {e}")
|
||||
|
||||
async def _call_ai_generate(
|
||||
self,
|
||||
system_prompt: str,
|
||||
user_prompt: str
|
||||
) -> AIResponse:
|
||||
"""调用 AI 生成题目"""
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt}
|
||||
]
|
||||
|
||||
response = await self.ai_service.chat(
|
||||
messages=messages,
|
||||
temperature=0.7, # 适当的创造性
|
||||
prompt_name="exam_generator"
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
def _parse_questions(self, ai_output: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
解析 AI 输出的题目 JSON
|
||||
|
||||
使用 LLM JSON Parser 进行多层兜底解析
|
||||
"""
|
||||
# 先清洗输出
|
||||
cleaned_output, rules = clean_llm_output(ai_output)
|
||||
if rules:
|
||||
logger.debug(f"AI 输出已清洗: {rules}")
|
||||
|
||||
# 使用带 Schema 校验的解析
|
||||
questions = parse_with_fallback(
|
||||
cleaned_output,
|
||||
schema=QUESTION_SCHEMA,
|
||||
default=[],
|
||||
validate_schema=True,
|
||||
on_error="default"
|
||||
)
|
||||
|
||||
# 后处理:确保每个题目有必要字段
|
||||
processed_questions = []
|
||||
for i, q in enumerate(questions):
|
||||
if isinstance(q, dict):
|
||||
# 确保有 num 字段
|
||||
if "num" not in q:
|
||||
q["num"] = i + 1
|
||||
|
||||
# 确保 num 是整数
|
||||
try:
|
||||
q["num"] = int(q["num"])
|
||||
except (ValueError, TypeError):
|
||||
q["num"] = i + 1
|
||||
|
||||
# 确保有 type 字段
|
||||
if "type" not in q:
|
||||
# 根据是否有 options 推断类型
|
||||
if q.get("topic", {}).get("options"):
|
||||
q["type"] = "single_choice"
|
||||
else:
|
||||
q["type"] = "essay"
|
||||
|
||||
# 确保 knowledge_point_id 是整数或 None
|
||||
kp_id = q.get("knowledge_point_id")
|
||||
if kp_id is not None:
|
||||
try:
|
||||
q["knowledge_point_id"] = int(kp_id)
|
||||
except (ValueError, TypeError):
|
||||
q["knowledge_point_id"] = None
|
||||
|
||||
# 验证必要字段
|
||||
if q.get("topic") and q.get("correct"):
|
||||
processed_questions.append(q)
|
||||
else:
|
||||
logger.warning(f"题目缺少必要字段,已跳过: {q}")
|
||||
|
||||
if not processed_questions:
|
||||
logger.warning("未能解析出有效的题目")
|
||||
|
||||
return processed_questions
|
||||
|
||||
def _format_position_info(self, position: Dict[str, Any]) -> str:
|
||||
"""格式化岗位信息为文本"""
|
||||
lines = [
|
||||
f"岗位名称: {position.get('name', '未知')}",
|
||||
f"岗位等级: {position.get('level', '未设置')}",
|
||||
]
|
||||
|
||||
if position.get('description'):
|
||||
lines.append(f"岗位描述: {position['description']}")
|
||||
|
||||
skills = position.get('skills')
|
||||
if skills:
|
||||
# skills 可能是 JSON 字符串或列表
|
||||
if isinstance(skills, str):
|
||||
try:
|
||||
skills = json.loads(skills)
|
||||
except json.JSONDecodeError:
|
||||
skills = [skills]
|
||||
|
||||
if isinstance(skills, list) and skills:
|
||||
lines.append(f"核心技能: {', '.join(str(s) for s in skills)}")
|
||||
|
||||
return '\n'.join(lines)
|
||||
|
||||
def _format_knowledge_points(self, knowledge_points: List[Dict[str, Any]]) -> str:
|
||||
"""格式化知识点列表为文本"""
|
||||
lines = []
|
||||
for kp in knowledge_points:
|
||||
kp_text = f"【知识点 ID: {kp['id']}】{kp['name']}"
|
||||
if kp.get('description'):
|
||||
kp_text += f"\n{kp['description']}"
|
||||
if kp.get('topic_relation'):
|
||||
kp_text += f"\n关系描述: {kp['topic_relation']}"
|
||||
lines.append(kp_text)
|
||||
|
||||
return '\n\n'.join(lines)
|
||||
|
||||
|
||||
# 创建全局实例
|
||||
exam_generator_service = ExamGeneratorService()
|
||||
|
||||
|
||||
# ==================== 便捷函数 ====================
|
||||
|
||||
async def generate_exam(
|
||||
db: AsyncSession,
|
||||
course_id: int,
|
||||
position_id: int,
|
||||
single_choice_count: int = 4,
|
||||
multiple_choice_count: int = 2,
|
||||
true_false_count: int = 1,
|
||||
fill_blank_count: int = 2,
|
||||
essay_count: int = 1,
|
||||
difficulty_level: int = 3,
|
||||
mistake_records: str = ""
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
便捷函数:生成考试题目
|
||||
|
||||
Args:
|
||||
db: 数据库会话
|
||||
course_id: 课程ID
|
||||
position_id: 岗位ID
|
||||
single_choice_count: 单选题数量
|
||||
multiple_choice_count: 多选题数量
|
||||
true_false_count: 判断题数量
|
||||
fill_blank_count: 填空题数量
|
||||
essay_count: 问答题数量
|
||||
difficulty_level: 难度等级(1-5)
|
||||
mistake_records: 错题记录JSON字符串
|
||||
|
||||
Returns:
|
||||
生成结果
|
||||
"""
|
||||
config = ExamGeneratorConfig(
|
||||
course_id=course_id,
|
||||
position_id=position_id,
|
||||
single_choice_count=single_choice_count,
|
||||
multiple_choice_count=multiple_choice_count,
|
||||
true_false_count=true_false_count,
|
||||
fill_blank_count=fill_blank_count,
|
||||
essay_count=essay_count,
|
||||
difficulty_level=difficulty_level,
|
||||
mistake_records=mistake_records,
|
||||
)
|
||||
|
||||
return await exam_generator_service.generate_exam(db, config)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
548
backend/app/services/ai/knowledge_analysis_v2.py
Normal file
548
backend/app/services/ai/knowledge_analysis_v2.py
Normal file
@@ -0,0 +1,548 @@
|
||||
"""
|
||||
知识点分析服务 V2 - Python 原生实现
|
||||
|
||||
功能:
|
||||
- 读取文档内容(PDF/Word/TXT)
|
||||
- 调用 AI 分析提取知识点
|
||||
- 解析 JSON 结果
|
||||
- 写入数据库
|
||||
|
||||
提供稳定可靠的知识点分析能力。
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.exceptions import ExternalServiceError
|
||||
from app.schemas.course import KnowledgePointCreate
|
||||
|
||||
from .ai_service import AIService, AIResponse
|
||||
from .llm_json_parser import parse_with_fallback, clean_llm_output
|
||||
from .prompts.knowledge_analysis_prompts import (
|
||||
SYSTEM_PROMPT,
|
||||
USER_PROMPT,
|
||||
KNOWLEDGE_POINT_SCHEMA,
|
||||
DEFAULT_KNOWLEDGE_TYPE,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 配置常量
|
||||
STATIC_UPLOADS_PREFIX = '/static/uploads/'
|
||||
MAX_CONTENT_LENGTH = 100000 # 最大文档内容长度(字符)
|
||||
MAX_KNOWLEDGE_POINTS = 20 # 最大知识点数量
|
||||
|
||||
|
||||
class KnowledgeAnalysisServiceV2:
|
||||
"""
|
||||
知识点分析服务 V2
|
||||
|
||||
使用 Python 原生实现。
|
||||
|
||||
使用示例:
|
||||
```python
|
||||
service = KnowledgeAnalysisServiceV2()
|
||||
result = await service.analyze_course_material(
|
||||
db=db_session,
|
||||
course_id=1,
|
||||
material_id=10,
|
||||
file_url="/static/uploads/courses/1/doc.pdf",
|
||||
course_title="医美产品知识",
|
||||
user_id=1
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""初始化服务"""
|
||||
self.ai_service = AIService(module_code="knowledge_analysis")
|
||||
self.upload_path = getattr(settings, 'UPLOAD_PATH', 'uploads')
|
||||
|
||||
async def analyze_course_material(
|
||||
self,
|
||||
db: AsyncSession,
|
||||
course_id: int,
|
||||
material_id: int,
|
||||
file_url: str,
|
||||
course_title: str,
|
||||
user_id: int
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
分析课程资料并提取知识点
|
||||
|
||||
Args:
|
||||
db: 数据库会话
|
||||
course_id: 课程ID
|
||||
material_id: 资料ID
|
||||
file_url: 文件URL(相对路径)
|
||||
course_title: 课程标题
|
||||
user_id: 用户ID
|
||||
|
||||
Returns:
|
||||
分析结果,包含 success、knowledge_points_count 等字段
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
f"开始知识点分析 V2 - course_id: {course_id}, material_id: {material_id}, "
|
||||
f"file_url: {file_url}"
|
||||
)
|
||||
|
||||
# 1. 解析文件路径
|
||||
file_path = self._resolve_file_path(file_url)
|
||||
if not file_path.exists():
|
||||
raise FileNotFoundError(f"文件不存在: {file_path}")
|
||||
|
||||
logger.info(f"文件路径解析成功: {file_path}")
|
||||
|
||||
# 2. 提取文档内容
|
||||
content = await self._extract_document_content(file_path)
|
||||
if not content or not content.strip():
|
||||
raise ValueError("文档内容为空")
|
||||
|
||||
logger.info(f"文档内容提取成功,长度: {len(content)} 字符")
|
||||
|
||||
# 3. 调用 AI 分析
|
||||
ai_response = await self._call_ai_analysis(content, course_title)
|
||||
|
||||
logger.info(
|
||||
f"AI 分析完成 - provider: {ai_response.provider}, "
|
||||
f"tokens: {ai_response.total_tokens}, latency: {ai_response.latency_ms}ms"
|
||||
)
|
||||
|
||||
# 4. 解析 JSON 结果
|
||||
knowledge_points = self._parse_knowledge_points(ai_response.content)
|
||||
|
||||
logger.info(f"知识点解析成功,数量: {len(knowledge_points)}")
|
||||
|
||||
# 5. 删除旧的知识点
|
||||
await self._delete_old_knowledge_points(db, material_id)
|
||||
|
||||
# 6. 保存到数据库
|
||||
saved_count = await self._save_knowledge_points(
|
||||
db=db,
|
||||
course_id=course_id,
|
||||
material_id=material_id,
|
||||
knowledge_points=knowledge_points,
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"知识点分析完成 - course_id: {course_id}, material_id: {material_id}, "
|
||||
f"saved_count: {saved_count}"
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"status": "completed",
|
||||
"knowledge_points_count": saved_count,
|
||||
"ai_provider": ai_response.provider,
|
||||
"ai_model": ai_response.model,
|
||||
"ai_tokens": ai_response.total_tokens,
|
||||
"ai_latency_ms": ai_response.latency_ms,
|
||||
}
|
||||
|
||||
except FileNotFoundError as e:
|
||||
logger.error(f"文件不存在: {e}")
|
||||
raise ExternalServiceError(f"分析文件不存在: {e}")
|
||||
except ValueError as e:
|
||||
logger.error(f"参数错误: {e}")
|
||||
raise ExternalServiceError(f"分析参数错误: {e}")
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"知识点分析失败 - course_id: {course_id}, material_id: {material_id}, "
|
||||
f"error: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
raise ExternalServiceError(f"知识点分析失败: {e}")
|
||||
|
||||
def _resolve_file_path(self, file_url: str) -> Path:
|
||||
"""解析文件 URL 为本地路径"""
|
||||
if file_url.startswith(STATIC_UPLOADS_PREFIX):
|
||||
relative_path = file_url.replace(STATIC_UPLOADS_PREFIX, '')
|
||||
return Path(self.upload_path) / relative_path
|
||||
elif file_url.startswith('/'):
|
||||
# 绝对路径
|
||||
return Path(file_url)
|
||||
else:
|
||||
# 相对路径
|
||||
return Path(self.upload_path) / file_url
|
||||
|
||||
async def _extract_document_content(self, file_path: Path) -> str:
|
||||
"""
|
||||
提取文档内容
|
||||
|
||||
支持:PDF、Word(docx)、文本文件
|
||||
"""
|
||||
suffix = file_path.suffix.lower()
|
||||
|
||||
try:
|
||||
if suffix == '.pdf':
|
||||
return await self._extract_pdf_content(file_path)
|
||||
elif suffix in ['.docx', '.doc']:
|
||||
return await self._extract_docx_content(file_path)
|
||||
elif suffix in ['.txt', '.md', '.text']:
|
||||
return await self._extract_text_content(file_path)
|
||||
else:
|
||||
# 尝试作为文本读取
|
||||
return await self._extract_text_content(file_path)
|
||||
except Exception as e:
|
||||
logger.error(f"文档内容提取失败: {file_path}, error: {e}")
|
||||
raise ValueError(f"无法读取文档内容: {e}")
|
||||
|
||||
async def _extract_pdf_content(self, file_path: Path) -> str:
|
||||
"""提取 PDF 内容"""
|
||||
try:
|
||||
from PyPDF2 import PdfReader
|
||||
|
||||
reader = PdfReader(str(file_path))
|
||||
text_parts = []
|
||||
|
||||
for page in reader.pages:
|
||||
text = page.extract_text()
|
||||
if text:
|
||||
text_parts.append(text)
|
||||
|
||||
content = '\n'.join(text_parts)
|
||||
|
||||
# 清理和截断
|
||||
content = self._clean_content(content)
|
||||
|
||||
return content
|
||||
|
||||
except ImportError:
|
||||
logger.error("PyPDF2 未安装,无法读取 PDF")
|
||||
raise ValueError("服务器未安装 PDF 读取组件")
|
||||
except Exception as e:
|
||||
logger.error(f"PDF 读取失败: {e}")
|
||||
raise ValueError(f"PDF 读取失败: {e}")
|
||||
|
||||
async def _extract_docx_content(self, file_path: Path) -> str:
|
||||
"""提取 Word 文档内容"""
|
||||
try:
|
||||
from docx import Document
|
||||
|
||||
doc = Document(str(file_path))
|
||||
text_parts = []
|
||||
|
||||
for para in doc.paragraphs:
|
||||
if para.text.strip():
|
||||
text_parts.append(para.text)
|
||||
|
||||
# 也提取表格内容
|
||||
for table in doc.tables:
|
||||
for row in table.rows:
|
||||
for cell in row.cells:
|
||||
if cell.text.strip():
|
||||
text_parts.append(cell.text)
|
||||
|
||||
content = '\n'.join(text_parts)
|
||||
content = self._clean_content(content)
|
||||
|
||||
return content
|
||||
|
||||
except ImportError:
|
||||
logger.error("python-docx 未安装,无法读取 Word 文档")
|
||||
raise ValueError("服务器未安装 Word 读取组件")
|
||||
except Exception as e:
|
||||
logger.error(f"Word 文档读取失败: {e}")
|
||||
raise ValueError(f"Word 文档读取失败: {e}")
|
||||
|
||||
async def _extract_text_content(self, file_path: Path) -> str:
|
||||
"""提取文本文件内容"""
|
||||
try:
|
||||
# 尝试多种编码
|
||||
encodings = ['utf-8', 'gbk', 'gb2312', 'latin-1']
|
||||
|
||||
for encoding in encodings:
|
||||
try:
|
||||
with open(file_path, 'r', encoding=encoding) as f:
|
||||
content = f.read()
|
||||
return self._clean_content(content)
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
|
||||
raise ValueError("无法识别文件编码")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"文本文件读取失败: {e}")
|
||||
raise ValueError(f"文本文件读取失败: {e}")
|
||||
|
||||
def _clean_content(self, content: str) -> str:
|
||||
"""清理和截断内容"""
|
||||
# 移除多余空白
|
||||
import re
|
||||
content = re.sub(r'\n{3,}', '\n\n', content)
|
||||
content = re.sub(r' {2,}', ' ', content)
|
||||
|
||||
# 截断过长内容
|
||||
if len(content) > MAX_CONTENT_LENGTH:
|
||||
logger.warning(f"文档内容过长,截断至 {MAX_CONTENT_LENGTH} 字符")
|
||||
content = content[:MAX_CONTENT_LENGTH] + "\n\n[内容已截断...]"
|
||||
|
||||
return content.strip()
|
||||
|
||||
async def _call_ai_analysis(
|
||||
self,
|
||||
content: str,
|
||||
course_title: str
|
||||
) -> AIResponse:
|
||||
"""调用 AI 进行知识点分析"""
|
||||
# 构建消息
|
||||
user_message = USER_PROMPT.format(
|
||||
course_name=course_title,
|
||||
content=content
|
||||
)
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": SYSTEM_PROMPT},
|
||||
{"role": "user", "content": user_message}
|
||||
]
|
||||
|
||||
# 调用 AI
|
||||
response = await self.ai_service.chat(
|
||||
messages=messages,
|
||||
temperature=0.1, # 低温度,保持输出稳定
|
||||
prompt_name="knowledge_analysis"
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
def _parse_knowledge_points(self, ai_output: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
解析 AI 输出的知识点 JSON
|
||||
|
||||
使用 LLM JSON Parser 进行多层兜底解析
|
||||
"""
|
||||
# 先清洗输出
|
||||
cleaned_output, rules = clean_llm_output(ai_output)
|
||||
if rules:
|
||||
logger.debug(f"AI 输出已清洗: {rules}")
|
||||
|
||||
# 使用带 Schema 校验的解析
|
||||
knowledge_points = parse_with_fallback(
|
||||
cleaned_output,
|
||||
schema=KNOWLEDGE_POINT_SCHEMA,
|
||||
default=[],
|
||||
validate_schema=True,
|
||||
on_error="default"
|
||||
)
|
||||
|
||||
# 后处理:确保每个知识点有必要字段
|
||||
processed_points = []
|
||||
for i, kp in enumerate(knowledge_points):
|
||||
if i >= MAX_KNOWLEDGE_POINTS:
|
||||
logger.warning(f"知识点数量超过限制 {MAX_KNOWLEDGE_POINTS},截断")
|
||||
break
|
||||
|
||||
if isinstance(kp, dict):
|
||||
# 提取字段(兼容多种字段名)
|
||||
title = (
|
||||
kp.get('title') or
|
||||
kp.get('name') or
|
||||
kp.get('知识点名称') or
|
||||
f"知识点 {i + 1}"
|
||||
)
|
||||
content = (
|
||||
kp.get('content') or
|
||||
kp.get('description') or
|
||||
kp.get('知识点描述') or
|
||||
''
|
||||
)
|
||||
kp_type = (
|
||||
kp.get('type') or
|
||||
kp.get('知识点类型') or
|
||||
DEFAULT_KNOWLEDGE_TYPE
|
||||
)
|
||||
topic_relation = (
|
||||
kp.get('topic_relation') or
|
||||
kp.get('关系描述') or
|
||||
''
|
||||
)
|
||||
|
||||
if title and (content or topic_relation):
|
||||
processed_points.append({
|
||||
'title': title[:200], # 限制长度
|
||||
'content': content,
|
||||
'type': kp_type,
|
||||
'topic_relation': topic_relation,
|
||||
})
|
||||
|
||||
if not processed_points:
|
||||
logger.warning("未能解析出有效的知识点")
|
||||
|
||||
return processed_points
|
||||
|
||||
async def _delete_old_knowledge_points(
|
||||
self,
|
||||
db: AsyncSession,
|
||||
material_id: int
|
||||
) -> int:
|
||||
"""删除资料关联的旧知识点"""
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
|
||||
result = await db.execute(
|
||||
text("DELETE FROM knowledge_points WHERE material_id = :material_id"),
|
||||
{"material_id": material_id}
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
deleted_count = result.rowcount
|
||||
if deleted_count > 0:
|
||||
logger.info(f"已删除旧知识点: material_id={material_id}, count={deleted_count}")
|
||||
|
||||
return deleted_count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"删除旧知识点失败: {e}")
|
||||
await db.rollback()
|
||||
raise
|
||||
|
||||
async def _save_knowledge_points(
|
||||
self,
|
||||
db: AsyncSession,
|
||||
course_id: int,
|
||||
material_id: int,
|
||||
knowledge_points: List[Dict[str, Any]],
|
||||
user_id: int
|
||||
) -> int:
|
||||
"""保存知识点到数据库"""
|
||||
from app.services.course_service import knowledge_point_service
|
||||
|
||||
saved_count = 0
|
||||
|
||||
for kp_data in knowledge_points:
|
||||
try:
|
||||
kp_create = KnowledgePointCreate(
|
||||
name=kp_data['title'],
|
||||
description=kp_data.get('content', ''),
|
||||
type=kp_data.get('type', DEFAULT_KNOWLEDGE_TYPE),
|
||||
source=1, # AI 分析来源
|
||||
topic_relation=kp_data.get('topic_relation'),
|
||||
material_id=material_id
|
||||
)
|
||||
|
||||
await knowledge_point_service.create_knowledge_point(
|
||||
db=db,
|
||||
course_id=course_id,
|
||||
point_in=kp_create,
|
||||
created_by=user_id
|
||||
)
|
||||
saved_count += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"保存单个知识点失败: title={kp_data.get('title')}, error={e}"
|
||||
)
|
||||
continue
|
||||
|
||||
return saved_count
|
||||
|
||||
async def reanalyze_course_materials(
|
||||
self,
|
||||
db: AsyncSession,
|
||||
course_id: int,
|
||||
course_title: str,
|
||||
user_id: int
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
重新分析课程的所有资料
|
||||
|
||||
Args:
|
||||
db: 数据库会话
|
||||
course_id: 课程ID
|
||||
course_title: 课程标题
|
||||
user_id: 用户ID
|
||||
|
||||
Returns:
|
||||
分析结果汇总
|
||||
"""
|
||||
try:
|
||||
from app.services.course_service import course_service
|
||||
|
||||
# 获取课程的所有资料
|
||||
materials = await course_service.get_course_materials(db, course_id=course_id)
|
||||
|
||||
if not materials:
|
||||
return {
|
||||
"success": True,
|
||||
"message": "该课程暂无资料需要分析",
|
||||
"materials_count": 0,
|
||||
"knowledge_points_count": 0
|
||||
}
|
||||
|
||||
total_knowledge_points = 0
|
||||
analysis_results = []
|
||||
|
||||
for material in materials:
|
||||
try:
|
||||
result = await self.analyze_course_material(
|
||||
db=db,
|
||||
course_id=course_id,
|
||||
material_id=material.id,
|
||||
file_url=material.file_url,
|
||||
course_title=course_title,
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
kp_count = result.get('knowledge_points_count', 0)
|
||||
total_knowledge_points += kp_count
|
||||
|
||||
analysis_results.append({
|
||||
"material_id": material.id,
|
||||
"material_name": material.name,
|
||||
"success": True,
|
||||
"knowledge_points_count": kp_count
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"资料分析失败: material_id={material.id}, error={e}"
|
||||
)
|
||||
analysis_results.append({
|
||||
"material_id": material.id,
|
||||
"material_name": material.name,
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
})
|
||||
|
||||
success_count = sum(1 for r in analysis_results if r['success'])
|
||||
|
||||
logger.info(
|
||||
f"课程资料重新分析完成 - course_id: {course_id}, "
|
||||
f"materials: {len(materials)}, success: {success_count}, "
|
||||
f"total_knowledge_points: {total_knowledge_points}"
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"materials_count": len(materials),
|
||||
"success_count": success_count,
|
||||
"knowledge_points_count": total_knowledge_points,
|
||||
"analysis_results": analysis_results
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"课程资料重新分析失败 - course_id: {course_id}, error: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
raise ExternalServiceError(f"重新分析失败: {e}")
|
||||
|
||||
|
||||
# 创建全局实例
|
||||
knowledge_analysis_service_v2 = KnowledgeAnalysisServiceV2()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
707
backend/app/services/ai/llm_json_parser.py
Normal file
707
backend/app/services/ai/llm_json_parser.py
Normal file
@@ -0,0 +1,707 @@
|
||||
"""
|
||||
LLM JSON Parser - 大模型 JSON 输出解析器
|
||||
|
||||
功能:
|
||||
- 使用 json-repair 库修复 AI 输出的 JSON
|
||||
- 处理中文标点、尾部逗号、Python 风格等问题
|
||||
- Schema 校验确保数据完整性
|
||||
|
||||
使用示例:
|
||||
```python
|
||||
from app.services.ai.llm_json_parser import parse_llm_json, parse_with_fallback
|
||||
|
||||
# 简单解析
|
||||
result = parse_llm_json(ai_response)
|
||||
|
||||
# 带 Schema 校验和默认值
|
||||
result = parse_with_fallback(
|
||||
ai_response,
|
||||
schema=MY_SCHEMA,
|
||||
default=[]
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 尝试导入 json-repair
|
||||
try:
|
||||
from json_repair import loads as json_repair_loads
|
||||
from json_repair import repair_json
|
||||
HAS_JSON_REPAIR = True
|
||||
except ImportError:
|
||||
HAS_JSON_REPAIR = False
|
||||
logger.warning("json-repair 未安装,将使用内置修复逻辑")
|
||||
|
||||
# 尝试导入 jsonschema
|
||||
try:
|
||||
from jsonschema import validate, ValidationError, Draft7Validator
|
||||
HAS_JSONSCHEMA = True
|
||||
except ImportError:
|
||||
HAS_JSONSCHEMA = False
|
||||
logger.warning("jsonschema 未安装,将跳过 Schema 校验")
|
||||
|
||||
|
||||
# ==================== 异常类 ====================
|
||||
|
||||
class JSONParseError(Exception):
|
||||
"""JSON 解析错误基类"""
|
||||
def __init__(self, message: str, raw_text: str = "", issues: List[dict] = None):
|
||||
super().__init__(message)
|
||||
self.raw_text = raw_text
|
||||
self.issues = issues or []
|
||||
|
||||
|
||||
class JSONUnrecoverableError(JSONParseError):
|
||||
"""不可恢复的 JSON 错误"""
|
||||
pass
|
||||
|
||||
|
||||
# ==================== 解析结果 ====================
|
||||
|
||||
@dataclass
|
||||
class ParseResult:
|
||||
"""解析结果"""
|
||||
success: bool
|
||||
data: Any = None
|
||||
method: str = "" # direct / json_repair / preprocessed / fixed / completed / default
|
||||
issues: List[dict] = field(default_factory=list)
|
||||
raw_text: str = ""
|
||||
error: str = ""
|
||||
|
||||
|
||||
# ==================== 核心解析函数 ====================
|
||||
|
||||
def parse_llm_json(
|
||||
text: str,
|
||||
*,
|
||||
strict: bool = False,
|
||||
return_result: bool = False
|
||||
) -> Union[Any, ParseResult]:
|
||||
"""
|
||||
智能解析 LLM 输出的 JSON
|
||||
|
||||
Args:
|
||||
text: 原始文本
|
||||
strict: 严格模式,不进行自动修复
|
||||
return_result: 返回 ParseResult 对象而非直接数据
|
||||
|
||||
Returns:
|
||||
解析后的 JSON 对象,或 ParseResult(如果 return_result=True)
|
||||
|
||||
Raises:
|
||||
JSONUnrecoverableError: 所有修复尝试都失败
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
if return_result:
|
||||
return ParseResult(success=False, error="Empty input")
|
||||
raise JSONUnrecoverableError("Empty input", text)
|
||||
|
||||
text = text.strip()
|
||||
issues = []
|
||||
|
||||
# 第一层:直接解析
|
||||
try:
|
||||
data = json.loads(text)
|
||||
result = ParseResult(success=True, data=data, method="direct", raw_text=text)
|
||||
return result if return_result else data
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
if strict:
|
||||
if return_result:
|
||||
return ParseResult(success=False, error="Strict mode: direct parse failed", raw_text=text)
|
||||
raise JSONUnrecoverableError("Strict mode: direct parse failed", text)
|
||||
|
||||
# 第二层:使用 json-repair(推荐)
|
||||
if HAS_JSON_REPAIR:
|
||||
try:
|
||||
data = json_repair_loads(text)
|
||||
issues.append({"type": "json_repair", "action": "Auto-repaired by json-repair library"})
|
||||
result = ParseResult(success=True, data=data, method="json_repair", issues=issues, raw_text=text)
|
||||
return result if return_result else data
|
||||
except Exception as e:
|
||||
logger.debug(f"json-repair 修复失败: {e}")
|
||||
|
||||
# 第三层:预处理(提取代码块、清理文字)
|
||||
preprocessed = _preprocess_text(text)
|
||||
if preprocessed != text:
|
||||
try:
|
||||
data = json.loads(preprocessed)
|
||||
issues.append({"type": "preprocessed", "action": "Extracted JSON from text"})
|
||||
result = ParseResult(success=True, data=data, method="preprocessed", issues=issues, raw_text=text)
|
||||
return result if return_result else data
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# 再次尝试 json-repair
|
||||
if HAS_JSON_REPAIR:
|
||||
try:
|
||||
data = json_repair_loads(preprocessed)
|
||||
issues.append({"type": "json_repair_preprocessed", "action": "Repaired after preprocessing"})
|
||||
result = ParseResult(success=True, data=data, method="json_repair", issues=issues, raw_text=text)
|
||||
return result if return_result else data
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 第四层:自动修复
|
||||
fixed, fix_issues = _fix_json_format(preprocessed)
|
||||
issues.extend(fix_issues)
|
||||
|
||||
if fixed != preprocessed:
|
||||
try:
|
||||
data = json.loads(fixed)
|
||||
result = ParseResult(success=True, data=data, method="fixed", issues=issues, raw_text=text)
|
||||
return result if return_result else data
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# 第五层:尝试补全截断的 JSON
|
||||
completed = _try_complete_json(fixed)
|
||||
if completed:
|
||||
try:
|
||||
data = json.loads(completed)
|
||||
issues.append({"type": "completed", "action": "Auto-completed truncated JSON"})
|
||||
result = ParseResult(success=True, data=data, method="completed", issues=issues, raw_text=text)
|
||||
return result if return_result else data
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# 所有尝试都失败
|
||||
diagnosis = diagnose_json_error(fixed)
|
||||
if return_result:
|
||||
return ParseResult(
|
||||
success=False,
|
||||
method="failed",
|
||||
issues=issues + diagnosis.get("issues", []),
|
||||
raw_text=text,
|
||||
error=f"All parse attempts failed. Issues: {diagnosis}"
|
||||
)
|
||||
raise JSONUnrecoverableError(f"All parse attempts failed: {diagnosis}", text, issues)
|
||||
|
||||
|
||||
def parse_with_fallback(
|
||||
raw_text: str,
|
||||
schema: dict = None,
|
||||
default: Any = None,
|
||||
*,
|
||||
validate_schema: bool = True,
|
||||
on_error: str = "default" # "default" / "raise" / "none"
|
||||
) -> Any:
|
||||
"""
|
||||
带兜底的 JSON 解析
|
||||
|
||||
Args:
|
||||
raw_text: 原始文本
|
||||
schema: JSON Schema(可选)
|
||||
default: 默认值
|
||||
validate_schema: 是否进行 Schema 校验
|
||||
on_error: 错误处理方式
|
||||
|
||||
Returns:
|
||||
解析后的数据或默认值
|
||||
"""
|
||||
try:
|
||||
result = parse_llm_json(raw_text, return_result=True)
|
||||
|
||||
if not result.success:
|
||||
logger.warning(f"JSON 解析失败: {result.error}")
|
||||
if on_error == "raise":
|
||||
raise JSONUnrecoverableError(result.error, raw_text, result.issues)
|
||||
elif on_error == "none":
|
||||
return None
|
||||
return default
|
||||
|
||||
data = result.data
|
||||
|
||||
# Schema 校验
|
||||
if validate_schema and schema and HAS_JSONSCHEMA:
|
||||
is_valid, errors = validate_json_schema(data, schema)
|
||||
if not is_valid:
|
||||
logger.warning(f"Schema 校验失败: {errors}")
|
||||
if on_error == "raise":
|
||||
raise JSONUnrecoverableError(f"Schema validation failed: {errors}", raw_text)
|
||||
elif on_error == "none":
|
||||
return None
|
||||
return default
|
||||
|
||||
# 记录解析方法
|
||||
if result.method != "direct":
|
||||
logger.info(f"JSON 解析成功: method={result.method}, issues={result.issues}")
|
||||
|
||||
return data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"JSON 解析异常: {e}")
|
||||
if on_error == "raise":
|
||||
raise
|
||||
elif on_error == "none":
|
||||
return None
|
||||
return default
|
||||
|
||||
|
||||
# ==================== 预处理函数 ====================
|
||||
|
||||
def _preprocess_text(text: str) -> str:
|
||||
"""预处理文本:提取代码块、清理前后文字"""
|
||||
# 移除 BOM
|
||||
text = text.lstrip('\ufeff')
|
||||
|
||||
# 移除零宽字符
|
||||
text = re.sub(r'[\u200b\u200c\u200d\ufeff]', '', text)
|
||||
|
||||
# 提取 Markdown 代码块
|
||||
patterns = [
|
||||
r'```json\s*([\s\S]*?)\s*```',
|
||||
r'```\s*([\s\S]*?)\s*```',
|
||||
r'`([^`]+)`',
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, text, re.IGNORECASE)
|
||||
if match:
|
||||
extracted = match.group(1).strip()
|
||||
if extracted.startswith(('{', '[')):
|
||||
text = extracted
|
||||
break
|
||||
|
||||
# 找到 JSON 边界
|
||||
text = _find_json_boundaries(text)
|
||||
|
||||
return text.strip()
|
||||
|
||||
|
||||
def _find_json_boundaries(text: str) -> str:
|
||||
"""找到 JSON 的起止位置"""
|
||||
# 找第一个 { 或 [
|
||||
start = -1
|
||||
for i, c in enumerate(text):
|
||||
if c in '{[':
|
||||
start = i
|
||||
break
|
||||
|
||||
if start == -1:
|
||||
return text
|
||||
|
||||
# 找最后一个匹配的 } 或 ]
|
||||
depth = 0
|
||||
end = -1
|
||||
in_string = False
|
||||
escape = False
|
||||
|
||||
for i in range(start, len(text)):
|
||||
c = text[i]
|
||||
|
||||
if escape:
|
||||
escape = False
|
||||
continue
|
||||
|
||||
if c == '\\':
|
||||
escape = True
|
||||
continue
|
||||
|
||||
if c == '"':
|
||||
in_string = not in_string
|
||||
continue
|
||||
|
||||
if in_string:
|
||||
continue
|
||||
|
||||
if c in '{[':
|
||||
depth += 1
|
||||
elif c in '}]':
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
end = i + 1
|
||||
break
|
||||
|
||||
if end == -1:
|
||||
# 找最后一个 } 或 ]
|
||||
for i in range(len(text) - 1, start, -1):
|
||||
if text[i] in '}]':
|
||||
end = i + 1
|
||||
break
|
||||
|
||||
if end > start:
|
||||
return text[start:end]
|
||||
|
||||
return text[start:]
|
||||
|
||||
|
||||
# ==================== 修复函数 ====================
|
||||
|
||||
def _fix_json_format(text: str) -> Tuple[str, List[dict]]:
|
||||
"""修复常见 JSON 格式问题"""
|
||||
issues = []
|
||||
|
||||
# 1. 中文标点转英文
|
||||
cn_punctuation = {
|
||||
',': ',', '。': '.', ':': ':', ';': ';',
|
||||
'"': '"', '"': '"', ''': "'", ''': "'",
|
||||
'【': '[', '】': ']', '(': '(', ')': ')',
|
||||
'{': '{', '}': '}',
|
||||
}
|
||||
for cn, en in cn_punctuation.items():
|
||||
if cn in text:
|
||||
text = text.replace(cn, en)
|
||||
issues.append({"type": "chinese_punctuation", "from": cn, "to": en})
|
||||
|
||||
# 2. 移除注释
|
||||
if '//' in text:
|
||||
text = re.sub(r'//[^\n]*', '', text)
|
||||
issues.append({"type": "removed_comments", "style": "single-line"})
|
||||
|
||||
if '/*' in text:
|
||||
text = re.sub(r'/\*[\s\S]*?\*/', '', text)
|
||||
issues.append({"type": "removed_comments", "style": "multi-line"})
|
||||
|
||||
# 3. Python 风格转 JSON
|
||||
python_replacements = [
|
||||
(r'\bTrue\b', 'true'),
|
||||
(r'\bFalse\b', 'false'),
|
||||
(r'\bNone\b', 'null'),
|
||||
]
|
||||
for pattern, replacement in python_replacements:
|
||||
if re.search(pattern, text):
|
||||
text = re.sub(pattern, replacement, text)
|
||||
issues.append({"type": "python_style", "from": pattern, "to": replacement})
|
||||
|
||||
# 4. 移除尾部逗号
|
||||
trailing_comma_patterns = [
|
||||
(r',(\s*})', r'\1'),
|
||||
(r',(\s*\])', r'\1'),
|
||||
]
|
||||
for pattern, replacement in trailing_comma_patterns:
|
||||
if re.search(pattern, text):
|
||||
text = re.sub(pattern, replacement, text)
|
||||
issues.append({"type": "trailing_comma", "action": "removed"})
|
||||
|
||||
# 5. 修复单引号(谨慎处理)
|
||||
if text.count("'") > text.count('"') and re.match(r"^\s*\{?\s*'", text):
|
||||
text = re.sub(r"'([^']*)'(\s*:)", r'"\1"\2', text)
|
||||
text = re.sub(r":\s*'([^']*)'", r': "\1"', text)
|
||||
issues.append({"type": "single_quotes", "action": "replaced"})
|
||||
|
||||
return text, issues
|
||||
|
||||
|
||||
def _try_complete_json(text: str) -> Optional[str]:
|
||||
"""尝试补全截断的 JSON"""
|
||||
if not text:
|
||||
return None
|
||||
|
||||
# 统计括号
|
||||
stack = []
|
||||
in_string = False
|
||||
escape = False
|
||||
|
||||
for c in text:
|
||||
if escape:
|
||||
escape = False
|
||||
continue
|
||||
|
||||
if c == '\\':
|
||||
escape = True
|
||||
continue
|
||||
|
||||
if c == '"':
|
||||
in_string = not in_string
|
||||
continue
|
||||
|
||||
if in_string:
|
||||
continue
|
||||
|
||||
if c in '{[':
|
||||
stack.append(c)
|
||||
elif c == '}':
|
||||
if stack and stack[-1] == '{':
|
||||
stack.pop()
|
||||
elif c == ']':
|
||||
if stack and stack[-1] == '[':
|
||||
stack.pop()
|
||||
|
||||
if not stack:
|
||||
return None # 已经平衡了
|
||||
|
||||
# 如果在字符串中,先闭合字符串
|
||||
if in_string:
|
||||
text += '"'
|
||||
|
||||
# 补全括号
|
||||
completion = ""
|
||||
for bracket in reversed(stack):
|
||||
if bracket == '{':
|
||||
completion += '}'
|
||||
elif bracket == '[':
|
||||
completion += ']'
|
||||
|
||||
return text + completion
|
||||
|
||||
|
||||
# ==================== Schema 校验 ====================
|
||||
|
||||
def validate_json_schema(data: Any, schema: dict) -> Tuple[bool, List[dict]]:
|
||||
"""
|
||||
校验 JSON 是否符合 Schema
|
||||
|
||||
Returns:
|
||||
(is_valid, errors)
|
||||
"""
|
||||
if not HAS_JSONSCHEMA:
|
||||
logger.warning("jsonschema 未安装,跳过校验")
|
||||
return True, []
|
||||
|
||||
try:
|
||||
validator = Draft7Validator(schema)
|
||||
errors = list(validator.iter_errors(data))
|
||||
|
||||
if errors:
|
||||
error_messages = [
|
||||
{
|
||||
"path": list(e.absolute_path),
|
||||
"message": e.message,
|
||||
"validator": e.validator
|
||||
}
|
||||
for e in errors
|
||||
]
|
||||
return False, error_messages
|
||||
|
||||
return True, []
|
||||
|
||||
except Exception as e:
|
||||
return False, [{"message": str(e)}]
|
||||
|
||||
|
||||
# ==================== 诊断函数 ====================
|
||||
|
||||
def diagnose_json_error(text: str) -> dict:
|
||||
"""诊断 JSON 错误"""
|
||||
issues = []
|
||||
|
||||
# 检查是否为空
|
||||
if not text or not text.strip():
|
||||
issues.append({
|
||||
"type": "empty_input",
|
||||
"severity": "critical",
|
||||
"suggestion": "输入为空"
|
||||
})
|
||||
return {"issues": issues, "fixable": False}
|
||||
|
||||
# 检查中文标点
|
||||
cn_punctuation = [',', '。', ':', ';', '"', '"', ''', ''']
|
||||
for p in cn_punctuation:
|
||||
if p in text:
|
||||
issues.append({
|
||||
"type": "chinese_punctuation",
|
||||
"char": p,
|
||||
"severity": "low",
|
||||
"suggestion": f"将 {p} 替换为对应英文标点"
|
||||
})
|
||||
|
||||
# 检查代码块包裹
|
||||
if '```' in text:
|
||||
issues.append({
|
||||
"type": "markdown_wrapped",
|
||||
"severity": "low",
|
||||
"suggestion": "需要提取代码块内容"
|
||||
})
|
||||
|
||||
# 检查注释
|
||||
if '//' in text or '/*' in text:
|
||||
issues.append({
|
||||
"type": "has_comments",
|
||||
"severity": "low",
|
||||
"suggestion": "需要移除注释"
|
||||
})
|
||||
|
||||
# 检查 Python 风格
|
||||
if re.search(r'\b(True|False|None)\b', text):
|
||||
issues.append({
|
||||
"type": "python_style",
|
||||
"severity": "low",
|
||||
"suggestion": "将 True/False/None 转为 true/false/null"
|
||||
})
|
||||
|
||||
# 检查尾部逗号
|
||||
if re.search(r',\s*[}\]]', text):
|
||||
issues.append({
|
||||
"type": "trailing_comma",
|
||||
"severity": "low",
|
||||
"suggestion": "移除 } 或 ] 前的逗号"
|
||||
})
|
||||
|
||||
# 检查括号平衡
|
||||
open_braces = text.count('{') - text.count('}')
|
||||
open_brackets = text.count('[') - text.count(']')
|
||||
|
||||
if open_braces > 0:
|
||||
issues.append({
|
||||
"type": "unclosed_brace",
|
||||
"count": open_braces,
|
||||
"severity": "medium",
|
||||
"suggestion": f"缺少 {open_braces} 个 }}"
|
||||
})
|
||||
elif open_braces < 0:
|
||||
issues.append({
|
||||
"type": "extra_brace",
|
||||
"count": -open_braces,
|
||||
"severity": "medium",
|
||||
"suggestion": f"多余 {-open_braces} 个 }}"
|
||||
})
|
||||
|
||||
if open_brackets > 0:
|
||||
issues.append({
|
||||
"type": "unclosed_bracket",
|
||||
"count": open_brackets,
|
||||
"severity": "medium",
|
||||
"suggestion": f"缺少 {open_brackets} 个 ]"
|
||||
})
|
||||
elif open_brackets < 0:
|
||||
issues.append({
|
||||
"type": "extra_bracket",
|
||||
"count": -open_brackets,
|
||||
"severity": "medium",
|
||||
"suggestion": f"多余 {-open_brackets} 个 ]"
|
||||
})
|
||||
|
||||
# 检查引号平衡
|
||||
quote_count = text.count('"')
|
||||
if quote_count % 2 != 0:
|
||||
issues.append({
|
||||
"type": "unbalanced_quotes",
|
||||
"severity": "high",
|
||||
"suggestion": "引号数量不平衡,可能有未闭合的字符串"
|
||||
})
|
||||
|
||||
# 判断是否可修复
|
||||
fixable_types = {
|
||||
"chinese_punctuation", "markdown_wrapped", "has_comments",
|
||||
"python_style", "trailing_comma", "unclosed_brace", "unclosed_bracket"
|
||||
}
|
||||
fixable = all(i["type"] in fixable_types for i in issues)
|
||||
|
||||
return {
|
||||
"issues": issues,
|
||||
"issue_count": len(issues),
|
||||
"fixable": fixable,
|
||||
"severity": max(
|
||||
(i.get("severity", "low") for i in issues),
|
||||
key=lambda x: {"low": 1, "medium": 2, "high": 3, "critical": 4}.get(x, 0),
|
||||
default="low"
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
# ==================== 便捷函数 ====================
|
||||
|
||||
def safe_json_loads(text: str, default: Any = None) -> Any:
|
||||
"""安全的 json.loads,失败返回默认值"""
|
||||
try:
|
||||
return parse_llm_json(text)
|
||||
except Exception:
|
||||
return default
|
||||
|
||||
|
||||
def extract_json_from_text(text: str) -> Optional[str]:
|
||||
"""从文本中提取 JSON 字符串"""
|
||||
preprocessed = _preprocess_text(text)
|
||||
fixed, _ = _fix_json_format(preprocessed)
|
||||
|
||||
try:
|
||||
json.loads(fixed)
|
||||
return fixed
|
||||
except Exception:
|
||||
completed = _try_complete_json(fixed)
|
||||
if completed:
|
||||
try:
|
||||
json.loads(completed)
|
||||
return completed
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def clean_llm_output(text: str) -> Tuple[str, List[str]]:
|
||||
"""
|
||||
清洗大模型输出,返回清洗后的文本和应用的清洗规则
|
||||
|
||||
Args:
|
||||
text: 原始输出文本
|
||||
|
||||
Returns:
|
||||
(cleaned_text, applied_rules)
|
||||
"""
|
||||
if not text:
|
||||
return "", ["empty_input"]
|
||||
|
||||
applied_rules = []
|
||||
original = text
|
||||
|
||||
# 1. 去除 BOM 头
|
||||
if text.startswith('\ufeff'):
|
||||
text = text.lstrip('\ufeff')
|
||||
applied_rules.append("removed_bom")
|
||||
|
||||
# 2. 去除 ANSI 转义序列
|
||||
ansi_pattern = re.compile(r'\x1b\[[0-9;]*m')
|
||||
if ansi_pattern.search(text):
|
||||
text = ansi_pattern.sub('', text)
|
||||
applied_rules.append("removed_ansi")
|
||||
|
||||
# 3. 去除首尾空白
|
||||
text = text.strip()
|
||||
|
||||
# 4. 去除开头的客套话
|
||||
polite_patterns = [
|
||||
r'^好的[,,。.]?\s*',
|
||||
r'^当然[,,。.]?\s*',
|
||||
r'^没问题[,,。.]?\s*',
|
||||
r'^根据您的要求[,,。.]?\s*',
|
||||
r'^以下是.*?[::]\s*',
|
||||
r'^分析结果如下[::]\s*',
|
||||
r'^我来为您.*?[::]\s*',
|
||||
r'^这是.*?结果[::]\s*',
|
||||
]
|
||||
for pattern in polite_patterns:
|
||||
if re.match(pattern, text, re.IGNORECASE):
|
||||
text = re.sub(pattern, '', text, flags=re.IGNORECASE)
|
||||
applied_rules.append("removed_polite_prefix")
|
||||
break
|
||||
|
||||
# 5. 提取 Markdown JSON 代码块
|
||||
json_block_patterns = [
|
||||
r'```json\s*([\s\S]*?)\s*```',
|
||||
r'```\s*([\s\S]*?)\s*```',
|
||||
]
|
||||
for pattern in json_block_patterns:
|
||||
match = re.search(pattern, text, re.IGNORECASE)
|
||||
if match:
|
||||
extracted = match.group(1).strip()
|
||||
if extracted.startswith(('{', '[')):
|
||||
text = extracted
|
||||
applied_rules.append("extracted_code_block")
|
||||
break
|
||||
|
||||
# 6. 处理零宽字符
|
||||
zero_width = re.compile(r'[\u200b\u200c\u200d\ufeff]')
|
||||
if zero_width.search(text):
|
||||
text = zero_width.sub('', text)
|
||||
applied_rules.append("removed_zero_width")
|
||||
|
||||
return text.strip(), applied_rules
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
377
backend/app/services/ai/practice_analysis_service.py
Normal file
377
backend/app/services/ai/practice_analysis_service.py
Normal file
@@ -0,0 +1,377 @@
|
||||
"""
|
||||
陪练分析报告服务 - Python 原生实现
|
||||
|
||||
功能:
|
||||
- 分析陪练对话历史
|
||||
- 生成综合评分、能力维度评估
|
||||
- 提供对话标注和改进建议
|
||||
|
||||
提供稳定可靠的陪练分析报告生成能力。
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from .ai_service import AIService, AIResponse
|
||||
from .llm_json_parser import parse_with_fallback, clean_llm_output
|
||||
from .prompts.practice_analysis_prompts import (
|
||||
SYSTEM_PROMPT,
|
||||
USER_PROMPT,
|
||||
PRACTICE_ANALYSIS_SCHEMA,
|
||||
SCORE_BREAKDOWN_ITEMS,
|
||||
ABILITY_DIMENSIONS,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ==================== 数据结构 ====================
|
||||
|
||||
@dataclass
|
||||
class ScoreBreakdownItem:
|
||||
"""分数细分项"""
|
||||
name: str
|
||||
score: float
|
||||
description: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class AbilityDimensionItem:
|
||||
"""能力维度项"""
|
||||
name: str
|
||||
score: float
|
||||
feedback: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class DialogueAnnotation:
|
||||
"""对话标注"""
|
||||
sequence: int
|
||||
tags: List[str]
|
||||
comment: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class Suggestion:
|
||||
"""改进建议"""
|
||||
title: str
|
||||
content: str
|
||||
example: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class PracticeAnalysisResult:
|
||||
"""陪练分析结果"""
|
||||
success: bool
|
||||
total_score: float = 0.0
|
||||
score_breakdown: List[ScoreBreakdownItem] = field(default_factory=list)
|
||||
ability_dimensions: List[AbilityDimensionItem] = field(default_factory=list)
|
||||
dialogue_annotations: List[DialogueAnnotation] = field(default_factory=list)
|
||||
suggestions: List[Suggestion] = field(default_factory=list)
|
||||
ai_provider: str = ""
|
||||
ai_model: str = ""
|
||||
ai_tokens: int = 0
|
||||
ai_latency_ms: int = 0
|
||||
error: str = ""
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""转换为字典(兼容原有数据格式)"""
|
||||
return {
|
||||
"analysis": {
|
||||
"total_score": self.total_score,
|
||||
"score_breakdown": [
|
||||
{"name": s.name, "score": s.score, "description": s.description}
|
||||
for s in self.score_breakdown
|
||||
],
|
||||
"ability_dimensions": [
|
||||
{"name": d.name, "score": d.score, "feedback": d.feedback}
|
||||
for d in self.ability_dimensions
|
||||
],
|
||||
"dialogue_annotations": [
|
||||
{"sequence": a.sequence, "tags": a.tags, "comment": a.comment}
|
||||
for a in self.dialogue_annotations
|
||||
],
|
||||
"suggestions": [
|
||||
{"title": s.title, "content": s.content, "example": s.example}
|
||||
for s in self.suggestions
|
||||
],
|
||||
},
|
||||
"ai_provider": self.ai_provider,
|
||||
"ai_model": self.ai_model,
|
||||
"ai_tokens": self.ai_tokens,
|
||||
"ai_latency_ms": self.ai_latency_ms,
|
||||
}
|
||||
|
||||
def to_db_format(self) -> Dict[str, Any]:
|
||||
"""转换为数据库存储格式(兼容 PracticeReport 模型)"""
|
||||
return {
|
||||
"total_score": int(self.total_score),
|
||||
"score_breakdown": [
|
||||
{"name": s.name, "score": s.score, "description": s.description}
|
||||
for s in self.score_breakdown
|
||||
],
|
||||
"ability_dimensions": [
|
||||
{"name": d.name, "score": d.score, "feedback": d.feedback}
|
||||
for d in self.ability_dimensions
|
||||
],
|
||||
"dialogue_review": [
|
||||
{"sequence": a.sequence, "tags": a.tags, "comment": a.comment}
|
||||
for a in self.dialogue_annotations
|
||||
],
|
||||
"suggestions": [
|
||||
{"title": s.title, "content": s.content, "example": s.example}
|
||||
for s in self.suggestions
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
# ==================== 服务类 ====================
|
||||
|
||||
class PracticeAnalysisService:
|
||||
"""
|
||||
陪练分析报告服务
|
||||
|
||||
使用 Python 原生实现。
|
||||
|
||||
使用示例:
|
||||
```python
|
||||
service = PracticeAnalysisService()
|
||||
result = await service.analyze(
|
||||
db=db_session, # 传入 db_session 用于记录调用日志
|
||||
dialogue_history=[
|
||||
{"speaker": "user", "content": "您好,我想咨询一下..."},
|
||||
{"speaker": "ai", "content": "您好!很高兴为您服务..."}
|
||||
]
|
||||
)
|
||||
print(result.total_score)
|
||||
print(result.suggestions)
|
||||
```
|
||||
"""
|
||||
|
||||
MODULE_CODE = "practice_analysis"
|
||||
|
||||
async def analyze(
|
||||
self,
|
||||
dialogue_history: List[Dict[str, Any]],
|
||||
db: Any = None # 数据库会话,用于记录 AI 调用日志
|
||||
) -> PracticeAnalysisResult:
|
||||
"""
|
||||
分析陪练对话
|
||||
|
||||
Args:
|
||||
dialogue_history: 对话历史列表,每项包含 speaker, content, timestamp 等字段
|
||||
db: 数据库会话,用于记录调用日志(符合 AI 接入规范)
|
||||
|
||||
Returns:
|
||||
PracticeAnalysisResult 分析结果
|
||||
"""
|
||||
try:
|
||||
logger.info(f"开始分析陪练对话 - 对话轮次: {len(dialogue_history)}")
|
||||
|
||||
# 1. 验证输入
|
||||
if not dialogue_history or len(dialogue_history) < 2:
|
||||
return PracticeAnalysisResult(
|
||||
success=False,
|
||||
error="对话记录太少,无法生成分析报告(至少需要2轮对话)"
|
||||
)
|
||||
|
||||
# 2. 格式化对话历史
|
||||
dialogue_text = self._format_dialogue_history(dialogue_history)
|
||||
|
||||
# 3. 创建 AIService 实例(传入 db_session 用于记录调用日志)
|
||||
self._ai_service = AIService(module_code=self.MODULE_CODE, db_session=db)
|
||||
|
||||
# 4. 调用 AI 分析
|
||||
ai_response = await self._call_ai_analysis(dialogue_text)
|
||||
|
||||
logger.info(
|
||||
f"AI 分析完成 - provider: {ai_response.provider}, "
|
||||
f"tokens: {ai_response.total_tokens}, latency: {ai_response.latency_ms}ms"
|
||||
)
|
||||
|
||||
# 4. 解析 JSON 结果
|
||||
analysis_data = self._parse_analysis_result(ai_response.content)
|
||||
|
||||
# 5. 构建返回结果
|
||||
result = PracticeAnalysisResult(
|
||||
success=True,
|
||||
total_score=analysis_data.get("total_score", 0),
|
||||
score_breakdown=[
|
||||
ScoreBreakdownItem(
|
||||
name=s.get("name", ""),
|
||||
score=s.get("score", 0),
|
||||
description=s.get("description", "")
|
||||
)
|
||||
for s in analysis_data.get("score_breakdown", [])
|
||||
],
|
||||
ability_dimensions=[
|
||||
AbilityDimensionItem(
|
||||
name=d.get("name", ""),
|
||||
score=d.get("score", 0),
|
||||
feedback=d.get("feedback", "")
|
||||
)
|
||||
for d in analysis_data.get("ability_dimensions", [])
|
||||
],
|
||||
dialogue_annotations=[
|
||||
DialogueAnnotation(
|
||||
sequence=a.get("sequence", 0),
|
||||
tags=a.get("tags", []),
|
||||
comment=a.get("comment", "")
|
||||
)
|
||||
for a in analysis_data.get("dialogue_annotations", [])
|
||||
],
|
||||
suggestions=[
|
||||
Suggestion(
|
||||
title=s.get("title", ""),
|
||||
content=s.get("content", ""),
|
||||
example=s.get("example", "")
|
||||
)
|
||||
for s in analysis_data.get("suggestions", [])
|
||||
],
|
||||
ai_provider=ai_response.provider,
|
||||
ai_model=ai_response.model,
|
||||
ai_tokens=ai_response.total_tokens,
|
||||
ai_latency_ms=ai_response.latency_ms,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"陪练分析完成 - total_score: {result.total_score}, "
|
||||
f"annotations: {len(result.dialogue_annotations)}, "
|
||||
f"suggestions: {len(result.suggestions)}"
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"陪练分析失败: {e}", exc_info=True)
|
||||
return PracticeAnalysisResult(
|
||||
success=False,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
def _format_dialogue_history(self, dialogue_history: List[Dict[str, Any]]) -> str:
|
||||
"""
|
||||
格式化对话历史为文本
|
||||
|
||||
Args:
|
||||
dialogue_history: 对话历史列表
|
||||
|
||||
Returns:
|
||||
格式化后的对话文本
|
||||
"""
|
||||
lines = []
|
||||
for i, d in enumerate(dialogue_history, 1):
|
||||
speaker = d.get('speaker', 'unknown')
|
||||
content = d.get('content', '')
|
||||
|
||||
# 统一说话者标识
|
||||
if speaker in ['user', 'employee', 'consultant', '员工', '用户']:
|
||||
speaker_label = '员工'
|
||||
elif speaker in ['ai', 'customer', 'client', '顾客', '客户', 'AI']:
|
||||
speaker_label = '顾客'
|
||||
else:
|
||||
speaker_label = speaker
|
||||
|
||||
lines.append(f"[{i}] {speaker_label}: {content}")
|
||||
|
||||
return '\n'.join(lines)
|
||||
|
||||
async def _call_ai_analysis(self, dialogue_text: str) -> AIResponse:
|
||||
"""调用 AI 进行分析"""
|
||||
user_message = USER_PROMPT.format(dialogue_history=dialogue_text)
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": SYSTEM_PROMPT},
|
||||
{"role": "user", "content": user_message}
|
||||
]
|
||||
|
||||
response = await self._ai_service.chat(
|
||||
messages=messages,
|
||||
temperature=0.7,
|
||||
prompt_name="practice_analysis"
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
def _parse_analysis_result(self, ai_output: str) -> Dict[str, Any]:
|
||||
"""
|
||||
解析 AI 输出的分析结果 JSON
|
||||
|
||||
使用 LLM JSON Parser 进行多层兜底解析
|
||||
"""
|
||||
# 先清洗输出
|
||||
cleaned_output, rules = clean_llm_output(ai_output)
|
||||
if rules:
|
||||
logger.debug(f"AI 输出已清洗: {rules}")
|
||||
|
||||
# 使用带 Schema 校验的解析
|
||||
parsed = parse_with_fallback(
|
||||
cleaned_output,
|
||||
schema=PRACTICE_ANALYSIS_SCHEMA,
|
||||
default={"analysis": {}},
|
||||
validate_schema=True,
|
||||
on_error="default"
|
||||
)
|
||||
|
||||
# 提取 analysis 部分
|
||||
analysis = parsed.get("analysis", {})
|
||||
|
||||
# 确保 score_breakdown 完整
|
||||
existing_breakdown = {s.get("name") for s in analysis.get("score_breakdown", [])}
|
||||
for item_name in SCORE_BREAKDOWN_ITEMS:
|
||||
if item_name not in existing_breakdown:
|
||||
logger.warning(f"缺少分数维度: {item_name},使用默认值")
|
||||
analysis.setdefault("score_breakdown", []).append({
|
||||
"name": item_name,
|
||||
"score": 75,
|
||||
"description": "暂无详细评价"
|
||||
})
|
||||
|
||||
# 确保 ability_dimensions 完整
|
||||
existing_dims = {d.get("name") for d in analysis.get("ability_dimensions", [])}
|
||||
for dim_name in ABILITY_DIMENSIONS:
|
||||
if dim_name not in existing_dims:
|
||||
logger.warning(f"缺少能力维度: {dim_name},使用默认值")
|
||||
analysis.setdefault("ability_dimensions", []).append({
|
||||
"name": dim_name,
|
||||
"score": 75,
|
||||
"feedback": "暂无详细评价"
|
||||
})
|
||||
|
||||
# 确保有建议
|
||||
if not analysis.get("suggestions"):
|
||||
analysis["suggestions"] = [
|
||||
{
|
||||
"title": "持续练习",
|
||||
"content": "建议继续进行陪练练习,提升整体表现",
|
||||
"example": "每周进行2-3次陪练,针对薄弱环节重点练习"
|
||||
}
|
||||
]
|
||||
|
||||
return analysis
|
||||
|
||||
|
||||
# ==================== 全局实例 ====================
|
||||
|
||||
practice_analysis_service = PracticeAnalysisService()
|
||||
|
||||
|
||||
# ==================== 便捷函数 ====================
|
||||
|
||||
async def analyze_practice_session(
|
||||
dialogue_history: List[Dict[str, Any]]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
便捷函数:分析陪练会话
|
||||
|
||||
Args:
|
||||
dialogue_history: 对话历史列表
|
||||
|
||||
Returns:
|
||||
分析结果字典(兼容原有格式)
|
||||
"""
|
||||
result = await practice_analysis_service.analyze(dialogue_history)
|
||||
return result.to_dict()
|
||||
|
||||
379
backend/app/services/ai/practice_scene_service.py
Normal file
379
backend/app/services/ai/practice_scene_service.py
Normal file
@@ -0,0 +1,379 @@
|
||||
"""
|
||||
陪练场景准备服务 - Python 原生实现
|
||||
|
||||
功能:
|
||||
- 根据课程ID获取知识点
|
||||
- 调用 AI 生成陪练场景配置
|
||||
- 解析并返回结构化场景数据
|
||||
|
||||
提供稳定可靠的陪练场景提取能力。
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.exceptions import ExternalServiceError
|
||||
|
||||
from .ai_service import AIService, AIResponse
|
||||
from .llm_json_parser import parse_with_fallback, clean_llm_output
|
||||
from .prompts.practice_scene_prompts import (
|
||||
SYSTEM_PROMPT,
|
||||
USER_PROMPT,
|
||||
PRACTICE_SCENE_SCHEMA,
|
||||
DEFAULT_SCENE_TYPE,
|
||||
DEFAULT_DIFFICULTY,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ==================== 数据结构 ====================
|
||||
|
||||
@dataclass
|
||||
class PracticeScene:
|
||||
"""陪练场景数据结构"""
|
||||
name: str
|
||||
description: str
|
||||
background: str
|
||||
ai_role: str
|
||||
objectives: List[str]
|
||||
keywords: List[str]
|
||||
type: str = DEFAULT_SCENE_TYPE
|
||||
difficulty: str = DEFAULT_DIFFICULTY
|
||||
|
||||
|
||||
@dataclass
|
||||
class PracticeSceneResult:
|
||||
"""陪练场景生成结果"""
|
||||
success: bool
|
||||
scene: Optional[PracticeScene] = None
|
||||
raw_response: Dict[str, Any] = field(default_factory=dict)
|
||||
ai_provider: str = ""
|
||||
ai_model: str = ""
|
||||
ai_tokens: int = 0
|
||||
ai_latency_ms: int = 0
|
||||
knowledge_points_count: int = 0
|
||||
error: str = ""
|
||||
|
||||
|
||||
# ==================== 服务类 ====================
|
||||
|
||||
class PracticeSceneService:
|
||||
"""
|
||||
陪练场景准备服务
|
||||
|
||||
使用 Python 原生实现。
|
||||
|
||||
使用示例:
|
||||
```python
|
||||
service = PracticeSceneService()
|
||||
result = await service.prepare_practice_knowledge(
|
||||
db=db_session,
|
||||
course_id=1
|
||||
)
|
||||
if result.success:
|
||||
print(result.scene.name)
|
||||
print(result.scene.objectives)
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""初始化服务"""
|
||||
self.ai_service = AIService(module_code="practice_scene")
|
||||
|
||||
async def prepare_practice_knowledge(
|
||||
self,
|
||||
db: AsyncSession,
|
||||
course_id: int
|
||||
) -> PracticeSceneResult:
|
||||
"""
|
||||
准备陪练所需的知识内容并生成场景
|
||||
|
||||
陪练知识准备的 Python 实现。
|
||||
|
||||
Args:
|
||||
db: 数据库会话(支持多租户,由调用方传入对应租户的数据库连接)
|
||||
course_id: 课程ID
|
||||
|
||||
Returns:
|
||||
PracticeSceneResult: 包含场景配置和元信息的结果对象
|
||||
"""
|
||||
try:
|
||||
logger.info(f"开始陪练知识准备 - course_id: {course_id}")
|
||||
|
||||
# 1. 查询知识点
|
||||
knowledge_points = await self._fetch_knowledge_points(db, course_id)
|
||||
|
||||
if not knowledge_points:
|
||||
logger.warning(f"课程没有知识点 - course_id: {course_id}")
|
||||
return PracticeSceneResult(
|
||||
success=False,
|
||||
error=f"课程 {course_id} 没有可用的知识点"
|
||||
)
|
||||
|
||||
logger.info(f"获取到 {len(knowledge_points)} 个知识点 - course_id: {course_id}")
|
||||
|
||||
# 2. 格式化知识点为文本
|
||||
knowledge_text = self._format_knowledge_points(knowledge_points)
|
||||
|
||||
# 3. 调用 AI 生成场景
|
||||
ai_response = await self._call_ai_generation(knowledge_text)
|
||||
|
||||
logger.info(
|
||||
f"AI 生成完成 - provider: {ai_response.provider}, "
|
||||
f"tokens: {ai_response.total_tokens}, latency: {ai_response.latency_ms}ms"
|
||||
)
|
||||
|
||||
# 4. 解析 JSON 结果
|
||||
scene_data = self._parse_scene_response(ai_response.content)
|
||||
|
||||
if not scene_data:
|
||||
logger.error(f"场景解析失败 - course_id: {course_id}")
|
||||
return PracticeSceneResult(
|
||||
success=False,
|
||||
raw_response={"ai_output": ai_response.content},
|
||||
ai_provider=ai_response.provider,
|
||||
ai_model=ai_response.model,
|
||||
ai_tokens=ai_response.total_tokens,
|
||||
ai_latency_ms=ai_response.latency_ms,
|
||||
knowledge_points_count=len(knowledge_points),
|
||||
error="AI 输出解析失败"
|
||||
)
|
||||
|
||||
# 5. 构建场景对象
|
||||
scene = self._build_scene_object(scene_data)
|
||||
|
||||
logger.info(
|
||||
f"陪练场景生成成功 - course_id: {course_id}, "
|
||||
f"scene_name: {scene.name}, type: {scene.type}"
|
||||
)
|
||||
|
||||
return PracticeSceneResult(
|
||||
success=True,
|
||||
scene=scene,
|
||||
raw_response=scene_data,
|
||||
ai_provider=ai_response.provider,
|
||||
ai_model=ai_response.model,
|
||||
ai_tokens=ai_response.total_tokens,
|
||||
ai_latency_ms=ai_response.latency_ms,
|
||||
knowledge_points_count=len(knowledge_points)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"陪练知识准备失败 - course_id: {course_id}, error: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
return PracticeSceneResult(
|
||||
success=False,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
async def _fetch_knowledge_points(
|
||||
self,
|
||||
db: AsyncSession,
|
||||
course_id: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
从数据库获取课程知识点
|
||||
|
||||
获取课程知识点
|
||||
"""
|
||||
# 知识点查询 SQL:
|
||||
# SELECT kp.name, kp.description
|
||||
# FROM knowledge_points kp
|
||||
# INNER JOIN course_materials cm ON kp.material_id = cm.id
|
||||
# WHERE kp.course_id = {course_id}
|
||||
# AND kp.is_deleted = 0
|
||||
# AND cm.is_deleted = 0
|
||||
# ORDER BY kp.id;
|
||||
|
||||
sql = text("""
|
||||
SELECT kp.name, kp.description
|
||||
FROM knowledge_points kp
|
||||
INNER JOIN course_materials cm ON kp.material_id = cm.id
|
||||
WHERE kp.course_id = :course_id
|
||||
AND kp.is_deleted = 0
|
||||
AND cm.is_deleted = 0
|
||||
ORDER BY kp.id
|
||||
""")
|
||||
|
||||
try:
|
||||
result = await db.execute(sql, {"course_id": course_id})
|
||||
rows = result.fetchall()
|
||||
|
||||
knowledge_points = []
|
||||
for row in rows:
|
||||
knowledge_points.append({
|
||||
"name": row[0],
|
||||
"description": row[1] or ""
|
||||
})
|
||||
|
||||
return knowledge_points
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"查询知识点失败: {e}")
|
||||
raise ExternalServiceError(f"数据库查询失败: {e}")
|
||||
|
||||
def _format_knowledge_points(self, knowledge_points: List[Dict[str, Any]]) -> str:
|
||||
"""
|
||||
将知识点列表格式化为文本
|
||||
|
||||
Args:
|
||||
knowledge_points: 知识点列表
|
||||
|
||||
Returns:
|
||||
格式化后的文本
|
||||
"""
|
||||
lines = []
|
||||
for i, kp in enumerate(knowledge_points, 1):
|
||||
name = kp.get("name", "")
|
||||
description = kp.get("description", "")
|
||||
|
||||
if description:
|
||||
lines.append(f"{i}. {name}\n {description}")
|
||||
else:
|
||||
lines.append(f"{i}. {name}")
|
||||
|
||||
return "\n\n".join(lines)
|
||||
|
||||
async def _call_ai_generation(self, knowledge_text: str) -> AIResponse:
|
||||
"""
|
||||
调用 AI 生成陪练场景
|
||||
|
||||
Args:
|
||||
knowledge_text: 格式化后的知识点文本
|
||||
|
||||
Returns:
|
||||
AI 响应对象
|
||||
"""
|
||||
# 构建用户消息
|
||||
user_message = USER_PROMPT.format(knowledge_points=knowledge_text)
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": SYSTEM_PROMPT},
|
||||
{"role": "user", "content": user_message}
|
||||
]
|
||||
|
||||
# 调用 AI(自动降级:4sapi.com → OpenRouter)
|
||||
response = await self.ai_service.chat(
|
||||
messages=messages,
|
||||
temperature=0.7, # 适中的创意性
|
||||
prompt_name="practice_scene_generation"
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
def _parse_scene_response(self, ai_output: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
解析 AI 输出的场景 JSON
|
||||
|
||||
使用 LLM JSON Parser 进行多层兜底解析
|
||||
|
||||
Args:
|
||||
ai_output: AI 原始输出
|
||||
|
||||
Returns:
|
||||
解析后的字典,失败返回 None
|
||||
"""
|
||||
# 先清洗输出
|
||||
cleaned_output, rules = clean_llm_output(ai_output)
|
||||
if rules:
|
||||
logger.debug(f"AI 输出已清洗: {rules}")
|
||||
|
||||
# 使用带 Schema 校验的解析
|
||||
result = parse_with_fallback(
|
||||
cleaned_output,
|
||||
schema=PRACTICE_SCENE_SCHEMA,
|
||||
default=None,
|
||||
validate_schema=True,
|
||||
on_error="none"
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def _build_scene_object(self, scene_data: Dict[str, Any]) -> PracticeScene:
|
||||
"""
|
||||
从解析的字典构建场景对象
|
||||
|
||||
Args:
|
||||
scene_data: 解析后的场景数据
|
||||
|
||||
Returns:
|
||||
PracticeScene 对象
|
||||
"""
|
||||
# 提取 scene 字段(JSON 格式为 {"scene": {...}})
|
||||
scene = scene_data.get("scene", scene_data)
|
||||
|
||||
return PracticeScene(
|
||||
name=scene.get("name", "陪练场景"),
|
||||
description=scene.get("description", ""),
|
||||
background=scene.get("background", ""),
|
||||
ai_role=scene.get("ai_role", "AI扮演客户"),
|
||||
objectives=scene.get("objectives", []),
|
||||
keywords=scene.get("keywords", []),
|
||||
type=scene.get("type", DEFAULT_SCENE_TYPE),
|
||||
difficulty=scene.get("difficulty", DEFAULT_DIFFICULTY)
|
||||
)
|
||||
|
||||
def scene_to_dict(self, scene: PracticeScene) -> Dict[str, Any]:
|
||||
"""
|
||||
将场景对象转换为字典
|
||||
|
||||
便于 API 响应序列化
|
||||
|
||||
Args:
|
||||
scene: PracticeScene 对象
|
||||
|
||||
Returns:
|
||||
字典格式的场景数据
|
||||
"""
|
||||
return {
|
||||
"scene": {
|
||||
"name": scene.name,
|
||||
"description": scene.description,
|
||||
"background": scene.background,
|
||||
"ai_role": scene.ai_role,
|
||||
"objectives": scene.objectives,
|
||||
"keywords": scene.keywords,
|
||||
"type": scene.type,
|
||||
"difficulty": scene.difficulty
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# ==================== 全局实例 ====================
|
||||
|
||||
practice_scene_service = PracticeSceneService()
|
||||
|
||||
|
||||
# ==================== 便捷函数 ====================
|
||||
|
||||
async def prepare_practice_knowledge(
|
||||
db: AsyncSession,
|
||||
course_id: int
|
||||
) -> PracticeSceneResult:
|
||||
"""
|
||||
准备陪练所需的知识内容(便捷函数)
|
||||
|
||||
Args:
|
||||
db: 数据库会话
|
||||
course_id: 课程ID
|
||||
|
||||
Returns:
|
||||
PracticeSceneResult 结果对象
|
||||
"""
|
||||
return await practice_scene_service.prepare_practice_knowledge(db, course_id)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
57
backend/app/services/ai/prompts/__init__.py
Normal file
57
backend/app/services/ai/prompts/__init__.py
Normal file
@@ -0,0 +1,57 @@
|
||||
"""
|
||||
提示词模板模块
|
||||
|
||||
遵循瑞小美提示词规范
|
||||
"""
|
||||
|
||||
from .knowledge_analysis_prompts import (
|
||||
PROMPT_META as KNOWLEDGE_ANALYSIS_PROMPT_META,
|
||||
SYSTEM_PROMPT as KNOWLEDGE_ANALYSIS_SYSTEM_PROMPT,
|
||||
USER_PROMPT as KNOWLEDGE_ANALYSIS_USER_PROMPT,
|
||||
KNOWLEDGE_POINT_SCHEMA,
|
||||
)
|
||||
|
||||
from .exam_generator_prompts import (
|
||||
PROMPT_META as EXAM_GENERATOR_PROMPT_META,
|
||||
SYSTEM_PROMPT as EXAM_GENERATOR_SYSTEM_PROMPT,
|
||||
USER_PROMPT as EXAM_GENERATOR_USER_PROMPT,
|
||||
MISTAKE_REGEN_SYSTEM_PROMPT,
|
||||
MISTAKE_REGEN_USER_PROMPT,
|
||||
QUESTION_SCHEMA,
|
||||
QUESTION_TYPES,
|
||||
DEFAULT_QUESTION_COUNTS,
|
||||
DEFAULT_DIFFICULTY_LEVEL,
|
||||
)
|
||||
|
||||
from .ability_analysis_prompts import (
|
||||
PROMPT_META as ABILITY_ANALYSIS_PROMPT_META,
|
||||
SYSTEM_PROMPT as ABILITY_ANALYSIS_SYSTEM_PROMPT,
|
||||
USER_PROMPT as ABILITY_ANALYSIS_USER_PROMPT,
|
||||
ABILITY_ANALYSIS_SCHEMA,
|
||||
ABILITY_DIMENSIONS,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# Knowledge Analysis Prompts
|
||||
"KNOWLEDGE_ANALYSIS_PROMPT_META",
|
||||
"KNOWLEDGE_ANALYSIS_SYSTEM_PROMPT",
|
||||
"KNOWLEDGE_ANALYSIS_USER_PROMPT",
|
||||
"KNOWLEDGE_POINT_SCHEMA",
|
||||
# Exam Generator Prompts
|
||||
"EXAM_GENERATOR_PROMPT_META",
|
||||
"EXAM_GENERATOR_SYSTEM_PROMPT",
|
||||
"EXAM_GENERATOR_USER_PROMPT",
|
||||
"MISTAKE_REGEN_SYSTEM_PROMPT",
|
||||
"MISTAKE_REGEN_USER_PROMPT",
|
||||
"QUESTION_SCHEMA",
|
||||
"QUESTION_TYPES",
|
||||
"DEFAULT_QUESTION_COUNTS",
|
||||
"DEFAULT_DIFFICULTY_LEVEL",
|
||||
# Ability Analysis Prompts
|
||||
"ABILITY_ANALYSIS_PROMPT_META",
|
||||
"ABILITY_ANALYSIS_SYSTEM_PROMPT",
|
||||
"ABILITY_ANALYSIS_USER_PROMPT",
|
||||
"ABILITY_ANALYSIS_SCHEMA",
|
||||
"ABILITY_DIMENSIONS",
|
||||
]
|
||||
|
||||
215
backend/app/services/ai/prompts/ability_analysis_prompts.py
Normal file
215
backend/app/services/ai/prompts/ability_analysis_prompts.py
Normal file
@@ -0,0 +1,215 @@
|
||||
"""
|
||||
智能工牌能力分析与课程推荐提示词模板
|
||||
|
||||
功能:分析员工与顾客的对话记录,评估能力维度得分,并推荐适合的课程
|
||||
"""
|
||||
|
||||
# ==================== 元数据 ====================
|
||||
|
||||
PROMPT_META = {
|
||||
"name": "ability_analysis",
|
||||
"display_name": "智能工牌能力分析",
|
||||
"description": "分析员工与顾客对话,评估多维度能力得分,推荐个性化课程",
|
||||
"module": "kaopeilian",
|
||||
"variables": ["dialogue_history", "user_info", "courses"],
|
||||
"version": "1.0.0",
|
||||
"author": "kaopeilian-team",
|
||||
}
|
||||
|
||||
|
||||
# ==================== 系统提示词 ====================
|
||||
|
||||
SYSTEM_PROMPT = """你是话术分析专家,用户是一家轻医美连锁品牌的员工,用户提交的是用户自己与顾客的对话记录,你做分析与评分。并严格按照以下格式输出。并根据课程列表,为该用户提供选课建议。
|
||||
|
||||
输出标准:
|
||||
{
|
||||
"analysis": {
|
||||
"total_score": 82,
|
||||
"ability_dimensions": [
|
||||
{
|
||||
"name": "专业知识",
|
||||
"score": 88,
|
||||
"feedback": "产品知识扎实,能准确回答客户问题。建议:继续深化对新产品的了解。"
|
||||
},
|
||||
{
|
||||
"name": "沟通技巧",
|
||||
"score": 92,
|
||||
"feedback": "语言表达清晰流畅,善于倾听客户需求。建议:可以多使用开放式问题引导。"
|
||||
},
|
||||
{
|
||||
"name": "操作技能",
|
||||
"score": 85,
|
||||
"feedback": "基本操作熟练,流程规范。建议:提升复杂场景的应对速度。"
|
||||
},
|
||||
{
|
||||
"name": "客户服务",
|
||||
"score": 90,
|
||||
"feedback": "服务态度优秀,客户体验良好。建议:进一步提升个性化服务能力。"
|
||||
},
|
||||
{
|
||||
"name": "安全意识",
|
||||
"score": 79,
|
||||
"feedback": "基本安全规范掌握,但在细节提醒上还可加强。"
|
||||
},
|
||||
{
|
||||
"name": "应变能力",
|
||||
"score": 76,
|
||||
"feedback": "面对突发情况反应较快,但处理方式可以更灵活多样。"
|
||||
}
|
||||
],
|
||||
"course_recommendations": [
|
||||
{
|
||||
"course_id": 5,
|
||||
"course_name": "应变能力提升训练营",
|
||||
"recommendation_reason": "该课程专注于提升应变能力,包含大量实战案例分析和模拟演练,针对您当前的薄弱环节(应变能力76分)设计。通过学习可提升15分左右。",
|
||||
"priority": "high",
|
||||
"match_score": 95
|
||||
},
|
||||
{
|
||||
"course_id": 3,
|
||||
"course_name": "安全规范与操作标准",
|
||||
"recommendation_reason": "系统讲解安全规范和操作标准,通过案例教学帮助建立安全意识。当前您的安全意识得分为79分,通过本课程学习预计可提升12分。",
|
||||
"priority": "high",
|
||||
"match_score": 88
|
||||
},
|
||||
{
|
||||
"course_id": 7,
|
||||
"course_name": "高级销售技巧",
|
||||
"recommendation_reason": "进阶课程,帮助您将已有的沟通优势(92分)转化为更高级的销售技能,进一步巩固客户服务能力(90分)。",
|
||||
"priority": "medium",
|
||||
"match_score": 82
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
## 输出要求(严格执行)
|
||||
1. 直接输出纯净的 JSON,不要包含 Markdown 标记(如 ```json)
|
||||
2. 不要包含任何解释性文字
|
||||
3. 能力维度必须包含:专业知识、沟通技巧、操作技能、客户服务、安全意识、应变能力
|
||||
4. 课程推荐必须来自提供的课程列表,使用真实的 course_id
|
||||
5. 推荐课程数量:1-5个,优先推荐能补齐短板的课程
|
||||
6. priority 取值:high(得分<80的薄弱项)、medium(得分80-85)、low(锦上添花)
|
||||
|
||||
## 评分标准
|
||||
- 90-100:优秀
|
||||
- 80-89:良好
|
||||
- 70-79:一般
|
||||
- 60-69:需改进
|
||||
- <60:亟需提升"""
|
||||
|
||||
|
||||
# ==================== 用户提示词模板 ====================
|
||||
|
||||
USER_PROMPT = """对话记录:{dialogue_history}
|
||||
|
||||
---
|
||||
|
||||
用户的信息和岗位:{user_info}
|
||||
|
||||
---
|
||||
|
||||
所有可选课程:{courses}"""
|
||||
|
||||
|
||||
# ==================== JSON Schema ====================
|
||||
|
||||
ABILITY_ANALYSIS_SCHEMA = {
|
||||
"type": "object",
|
||||
"required": ["analysis"],
|
||||
"properties": {
|
||||
"analysis": {
|
||||
"type": "object",
|
||||
"required": ["total_score", "ability_dimensions", "course_recommendations"],
|
||||
"properties": {
|
||||
"total_score": {
|
||||
"type": "number",
|
||||
"description": "总体评分(0-100)",
|
||||
"minimum": 0,
|
||||
"maximum": 100
|
||||
},
|
||||
"ability_dimensions": {
|
||||
"type": "array",
|
||||
"description": "能力维度评分列表",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"required": ["name", "score", "feedback"],
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "能力维度名称"
|
||||
},
|
||||
"score": {
|
||||
"type": "number",
|
||||
"description": "该维度得分(0-100)",
|
||||
"minimum": 0,
|
||||
"maximum": 100
|
||||
},
|
||||
"feedback": {
|
||||
"type": "string",
|
||||
"description": "该维度的反馈和建议"
|
||||
}
|
||||
}
|
||||
},
|
||||
"minItems": 1
|
||||
},
|
||||
"course_recommendations": {
|
||||
"type": "array",
|
||||
"description": "课程推荐列表",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"required": ["course_id", "course_name", "recommendation_reason", "priority", "match_score"],
|
||||
"properties": {
|
||||
"course_id": {
|
||||
"type": "integer",
|
||||
"description": "课程ID"
|
||||
},
|
||||
"course_name": {
|
||||
"type": "string",
|
||||
"description": "课程名称"
|
||||
},
|
||||
"recommendation_reason": {
|
||||
"type": "string",
|
||||
"description": "推荐理由"
|
||||
},
|
||||
"priority": {
|
||||
"type": "string",
|
||||
"description": "推荐优先级",
|
||||
"enum": ["high", "medium", "low"]
|
||||
},
|
||||
"match_score": {
|
||||
"type": "number",
|
||||
"description": "匹配度得分(0-100)",
|
||||
"minimum": 0,
|
||||
"maximum": 100
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# ==================== 能力维度常量 ====================
|
||||
|
||||
ABILITY_DIMENSIONS = [
|
||||
"专业知识",
|
||||
"沟通技巧",
|
||||
"操作技能",
|
||||
"客户服务",
|
||||
"安全意识",
|
||||
"应变能力",
|
||||
]
|
||||
|
||||
PRIORITY_LEVELS = ["high", "medium", "low"]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
48
backend/app/services/ai/prompts/answer_judge_prompts.py
Normal file
48
backend/app/services/ai/prompts/answer_judge_prompts.py
Normal file
@@ -0,0 +1,48 @@
|
||||
"""
|
||||
答案判断器提示词模板
|
||||
|
||||
功能:判断填空题与问答题是否回答正确
|
||||
"""
|
||||
|
||||
# ==================== 元数据 ====================
|
||||
|
||||
PROMPT_META = {
|
||||
"name": "answer_judge",
|
||||
"display_name": "答案判断器",
|
||||
"description": "判断填空题与问答题的答案是否正确",
|
||||
"module": "kaopeilian",
|
||||
"variables": ["question", "correct_answer", "user_answer", "analysis"],
|
||||
"version": "1.0.0",
|
||||
"author": "kaopeilian-team",
|
||||
}
|
||||
|
||||
|
||||
# ==================== 系统提示词 ====================
|
||||
|
||||
SYSTEM_PROMPT = """你是一个答案判断器,根据用户提交的答案,比对题目、答案、解析。给出正确或错误的判断。
|
||||
|
||||
注意:仅输出"正确"或"错误",无需更多字符和说明。"""
|
||||
|
||||
|
||||
# ==================== 用户提示词模板 ====================
|
||||
|
||||
USER_PROMPT = """题目:{question}。
|
||||
正确答案:{correct_answer}。
|
||||
解析:{analysis}。
|
||||
|
||||
考生的回答:{user_answer}。"""
|
||||
|
||||
|
||||
# ==================== 判断结果常量 ====================
|
||||
|
||||
CORRECT_KEYWORDS = ["正确", "correct", "true", "yes", "对", "是"]
|
||||
INCORRECT_KEYWORDS = ["错误", "incorrect", "false", "no", "wrong", "不正确", "错"]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
74
backend/app/services/ai/prompts/course_chat_prompts.py
Normal file
74
backend/app/services/ai/prompts/course_chat_prompts.py
Normal file
@@ -0,0 +1,74 @@
|
||||
"""
|
||||
课程对话提示词模板
|
||||
|
||||
功能:基于课程知识点进行智能问答
|
||||
"""
|
||||
|
||||
# ==================== 元数据 ====================
|
||||
|
||||
PROMPT_META = {
|
||||
"name": "course_chat",
|
||||
"display_name": "与课程对话",
|
||||
"description": "基于课程知识点内容,为用户提供智能问答服务",
|
||||
"module": "kaopeilian",
|
||||
"variables": ["knowledge_base", "query"],
|
||||
"version": "2.0.0",
|
||||
"author": "kaopeilian-team",
|
||||
}
|
||||
|
||||
|
||||
# ==================== 系统提示词 ====================
|
||||
|
||||
SYSTEM_PROMPT = """你是知识拆解专家,精通以下知识库(课程)内容。请根据用户的问题,从知识库中找到最相关的信息,进行深入分析后,用简洁清晰的语言回答用户。为用户提供与课程对话的服务。
|
||||
|
||||
回答要求:
|
||||
|
||||
1. 直接针对问题核心,避免冗长铺垫
|
||||
2. 使用通俗易懂的语言,必要时举例说明
|
||||
3. 突出关键要点,帮助用户快速理解
|
||||
4. 如果知识库中没有相关内容,请如实告知
|
||||
|
||||
知识库:
|
||||
{knowledge_base}"""
|
||||
|
||||
|
||||
# ==================== 用户提示词模板 ====================
|
||||
|
||||
USER_PROMPT = """{query}"""
|
||||
|
||||
|
||||
# ==================== 知识库格式模板 ====================
|
||||
|
||||
KNOWLEDGE_ITEM_TEMPLATE = """【{name}】
|
||||
{description}
|
||||
"""
|
||||
|
||||
|
||||
# ==================== 配置常量 ====================
|
||||
|
||||
# 会话历史窗口大小(保留最近 N 轮对话)
|
||||
CONVERSATION_WINDOW_SIZE = 10
|
||||
|
||||
# 会话 TTL(秒)- 30 分钟
|
||||
CONVERSATION_TTL = 1800
|
||||
|
||||
# 最大知识点数量
|
||||
MAX_KNOWLEDGE_POINTS = 50
|
||||
|
||||
# 知识库最大字符数
|
||||
MAX_KNOWLEDGE_BASE_LENGTH = 50000
|
||||
|
||||
# 默认模型
|
||||
DEFAULT_CHAT_MODEL = "gemini-3-flash-preview"
|
||||
|
||||
# 温度参数(对话场景使用较高温度)
|
||||
DEFAULT_TEMPERATURE = 0.7
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
300
backend/app/services/ai/prompts/exam_generator_prompts.py
Normal file
300
backend/app/services/ai/prompts/exam_generator_prompts.py
Normal file
@@ -0,0 +1,300 @@
|
||||
"""
|
||||
试题生成器提示词模板
|
||||
|
||||
功能:根据岗位和知识点动态生成考试题目
|
||||
"""
|
||||
|
||||
# ==================== 元数据 ====================
|
||||
|
||||
PROMPT_META = {
|
||||
"name": "exam_generator",
|
||||
"display_name": "试题生成器",
|
||||
"description": "根据课程知识点和岗位特征,动态生成考试题目(单选、多选、判断、填空、问答)",
|
||||
"module": "kaopeilian",
|
||||
"variables": [
|
||||
"total_count",
|
||||
"single_choice_count",
|
||||
"multiple_choice_count",
|
||||
"true_false_count",
|
||||
"fill_blank_count",
|
||||
"essay_count",
|
||||
"difficulty_level",
|
||||
"position_info",
|
||||
"knowledge_points",
|
||||
],
|
||||
"version": "2.0.0",
|
||||
"author": "kaopeilian-team",
|
||||
}
|
||||
|
||||
|
||||
# ==================== 系统提示词(第一轮出题) ====================
|
||||
|
||||
SYSTEM_PROMPT = """## 角色
|
||||
你是一位经验丰富的考试出题专家,能够依据用户提供的知识内容,结合用户的岗位特征,随机地生成{total_count}题考题。你会以专业、严谨且清晰的方式出题。
|
||||
|
||||
## 输出{single_choice_count}道单选题
|
||||
1、每道题目只能有 1 个正确答案。
|
||||
2、干扰项要具有合理性和迷惑性,且所有选项必须与主题相关。
|
||||
3、答案解析要简明扼要,说明选择理由。
|
||||
4、为每道题记录出题来源的知识点 id。
|
||||
5、请以 JSON 格式输出。
|
||||
6、为每道题输出一个序号。
|
||||
|
||||
### 输出结构:
|
||||
{{
|
||||
"num": "题号",
|
||||
"type": "single_choice",
|
||||
"topic": {{
|
||||
"title": "清晰完整的题目描述",
|
||||
"options": {{
|
||||
"opt1": "A:符合语境的选项",
|
||||
"opt2": "B:符合语境的选项",
|
||||
"opt3": "C:符合语境的选项",
|
||||
"opt4": "D:符合语境的选项"
|
||||
}}
|
||||
}},
|
||||
"knowledge_point_id": "出题来源知识点的id",
|
||||
"correct": "其中一个选项的全部原文",
|
||||
"analysis": "准确的答案解析,包含选择原因和知识点说明"
|
||||
}}
|
||||
|
||||
- 严格按照以上格式输出
|
||||
|
||||
## 输出{multiple_choice_count}道多选题
|
||||
1、每道题目有多个正确答案。
|
||||
2、"type": "multiple_choice"
|
||||
3、其它事项同单选题。
|
||||
|
||||
## 输出{true_false_count}道判断题
|
||||
1、每道题目只有 "正确" 或 "错误" 两种答案。
|
||||
2、题目表述应明确清晰,避免歧义。
|
||||
3、题目应直接陈述事实或观点,便于做出是非判断。
|
||||
4、其它事项同单选题。
|
||||
|
||||
### 输出结构:
|
||||
{{
|
||||
"num": "题号",
|
||||
"type": "true_false",
|
||||
"topic": {{
|
||||
"title": "清晰完整的题目描述"
|
||||
}},
|
||||
"knowledge_point_id": " 出题来源知识点的id",
|
||||
"correct": "正确",
|
||||
"analysis": "准确的答案解析,包含判断原因和知识点说明"
|
||||
}}
|
||||
|
||||
- 严格按照以上格式输出
|
||||
|
||||
## 输出{fill_blank_count}道填空题
|
||||
1. 题干应明确完整,空缺处需用横线"___"标示,且只能有一处空缺
|
||||
2. 答案应唯一且明确,避免开放性表述
|
||||
3. 空缺长度应与答案长度大致匹配
|
||||
4. 解析需说明答案依据及相关知识点
|
||||
5. 其余要求与单选题一致
|
||||
|
||||
### 输出结构:
|
||||
{{
|
||||
"num": "题号",
|
||||
"type": "fill_blank",
|
||||
"topic": {{
|
||||
"title": "包含___空缺的题目描述"
|
||||
}},
|
||||
"knowledge_point_id": "出题来源知识点的id",
|
||||
"correct": "准确的填空答案",
|
||||
"analysis": "解析答案的依据和相关知识点说明"
|
||||
}}
|
||||
|
||||
- 严格按照以上格式输出
|
||||
|
||||
### 输出{essay_count}道问答题
|
||||
1. 问题应具体明确,限定回答范围
|
||||
2. 答案需条理清晰,突出核心要点
|
||||
3. 解析可补充扩展说明或评分要点
|
||||
4. 避免过于宽泛或需要主观发挥的问题
|
||||
5. 其余要求同单选题
|
||||
|
||||
### 输出结构:
|
||||
{{
|
||||
"num": "题号",
|
||||
"type": "essay",
|
||||
"topic": {{
|
||||
"title": "需要详细回答的问题描述"
|
||||
}},
|
||||
"knowledge_point_id": "出题来源知识点的id",
|
||||
"correct": "完整准确的参考答案(分点或连贯表述)",
|
||||
"analysis": "对答案的补充说明、评分要点或相关知识点扩展"
|
||||
}}
|
||||
|
||||
## 特殊要求
|
||||
1. 题目难度:{difficulty_level}级(5 级为最难)
|
||||
2. 避免使用模棱两可的表述
|
||||
3. 选项内容要互斥,不能有重叠
|
||||
4. 每个选项长度尽量均衡
|
||||
5. 正确答案(A、B、C、D)分布要合理,避免规律性
|
||||
6. 正确答案必须使用其中一个选项中的全部原文,严禁修改
|
||||
7. knowledge_point_id 必须是唯一的,即每道题的知识点来源只允许填一个 id。
|
||||
|
||||
## 输出格式要求
|
||||
请直接输出一个纯净的 JSON 数组(Array),不要包含 Markdown 标记(如 ```json),也不要包含任何解释性文字。
|
||||
|
||||
请按以上要求生成题目,确保每道题目质量。"""
|
||||
|
||||
|
||||
# ==================== 用户提示词模板(第一轮出题) ====================
|
||||
|
||||
USER_PROMPT = """# 请针对岗位特征、待出题的知识点内容进行出题。
|
||||
|
||||
## 岗位信息:
|
||||
|
||||
{position_info}
|
||||
|
||||
---
|
||||
|
||||
## 知识点:
|
||||
|
||||
{knowledge_points}"""
|
||||
|
||||
|
||||
# ==================== 错题重出系统提示词 ====================
|
||||
|
||||
MISTAKE_REGEN_SYSTEM_PROMPT = """## 角色
|
||||
你是一位经验丰富的考试出题专家,能够依据用户提供的错题记录,重新为用户出题。你会为每道错题重新出一题,你会以专业、严谨且清晰的方式出题。
|
||||
|
||||
## 输出单选题
|
||||
1、每道题目只能有 1 个正确答案。
|
||||
2、干扰项要具有合理性和迷惑性,且所有选项必须与主题相关。
|
||||
3、答案解析要简明扼要,说明选择理由。
|
||||
4、为每道题记录出题来源的知识点 id。
|
||||
5、请以 JSON 格式输出。
|
||||
6、为每道题输出一个序号。
|
||||
|
||||
### 输出结构:
|
||||
{{
|
||||
"num": "题号",
|
||||
"type": "single_choice",
|
||||
"topic": {{
|
||||
"title": "清晰完整的题目描述",
|
||||
"options": {{
|
||||
"opt1": "A:符合语境的选项",
|
||||
"opt2": "B:符合语境的选项",
|
||||
"opt3": "C:符合语境的选项",
|
||||
"opt4": "D:符合语境的选项"
|
||||
}}
|
||||
}},
|
||||
"knowledge_point_id": "出题来源知识点的id",
|
||||
"correct": "其中一个选项的全部原文",
|
||||
"analysis": "准确的答案解析,包含选择原因和知识点说明"
|
||||
}}
|
||||
|
||||
- 严格按照以上格式输出
|
||||
|
||||
|
||||
## 特殊要求
|
||||
1. 题目难度:{difficulty_level}级(5 级为最难)
|
||||
2. 避免使用模棱两可的表述
|
||||
3. 选项内容要互斥,不能有重叠
|
||||
4. 每个选项长度尽量均衡
|
||||
5. 正确答案(A、B、C、D)分布要合理,避免规律性
|
||||
6. 正确答案必须使用其中一个选项中的全部原文,严禁修改
|
||||
7. knowledge_point_id 必须是唯一的,即每道题的知识点来源只允许填一个 id。
|
||||
|
||||
## 输出格式要求
|
||||
请直接输出一个纯净的 JSON 数组(Array),不要包含 Markdown 标记(如 ```json),也不要包含任何解释性文字。
|
||||
|
||||
请按以上要求生成题目,确保每道题目质量。"""
|
||||
|
||||
|
||||
# ==================== 错题重出用户提示词 ====================
|
||||
|
||||
MISTAKE_REGEN_USER_PROMPT = """## 错题记录:
|
||||
|
||||
{mistake_records}"""
|
||||
|
||||
|
||||
# ==================== JSON Schema ====================
|
||||
|
||||
QUESTION_SCHEMA = {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"required": ["num", "type", "topic", "correct"],
|
||||
"properties": {
|
||||
"num": {
|
||||
"oneOf": [
|
||||
{"type": "integer"},
|
||||
{"type": "string"}
|
||||
],
|
||||
"description": "题号"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"enum": ["single_choice", "multiple_choice", "true_false", "fill_blank", "essay"],
|
||||
"description": "题目类型"
|
||||
},
|
||||
"topic": {
|
||||
"type": "object",
|
||||
"required": ["title"],
|
||||
"properties": {
|
||||
"title": {
|
||||
"type": "string",
|
||||
"description": "题目标题"
|
||||
},
|
||||
"options": {
|
||||
"type": "object",
|
||||
"description": "选项(选择题必填)"
|
||||
}
|
||||
}
|
||||
},
|
||||
"knowledge_point_id": {
|
||||
"oneOf": [
|
||||
{"type": "integer"},
|
||||
{"type": "string"},
|
||||
{"type": "null"}
|
||||
],
|
||||
"description": "知识点ID"
|
||||
},
|
||||
"correct": {
|
||||
"type": "string",
|
||||
"description": "正确答案"
|
||||
},
|
||||
"analysis": {
|
||||
"type": "string",
|
||||
"description": "答案解析"
|
||||
}
|
||||
}
|
||||
},
|
||||
"minItems": 1,
|
||||
"maxItems": 50
|
||||
}
|
||||
|
||||
|
||||
# ==================== 题目类型常量 ====================
|
||||
|
||||
QUESTION_TYPES = {
|
||||
"single_choice": "单选题",
|
||||
"multiple_choice": "多选题",
|
||||
"true_false": "判断题",
|
||||
"fill_blank": "填空题",
|
||||
"essay": "问答题",
|
||||
}
|
||||
|
||||
# 默认题目数量配置
|
||||
DEFAULT_QUESTION_COUNTS = {
|
||||
"single_choice_count": 4,
|
||||
"multiple_choice_count": 2,
|
||||
"true_false_count": 1,
|
||||
"fill_blank_count": 2,
|
||||
"essay_count": 1,
|
||||
}
|
||||
|
||||
DEFAULT_DIFFICULTY_LEVEL = 3
|
||||
MAX_DIFFICULTY_LEVEL = 5
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
148
backend/app/services/ai/prompts/knowledge_analysis_prompts.py
Normal file
148
backend/app/services/ai/prompts/knowledge_analysis_prompts.py
Normal file
@@ -0,0 +1,148 @@
|
||||
"""
|
||||
知识点分析提示词模板
|
||||
|
||||
功能:从课程资料中提取知识点
|
||||
"""
|
||||
|
||||
# ==================== 元数据 ====================
|
||||
|
||||
PROMPT_META = {
|
||||
"name": "knowledge_analysis",
|
||||
"display_name": "知识点分析",
|
||||
"description": "从课程资料中提取和分析知识点,支持PDF/Word/文本等格式",
|
||||
"module": "kaopeilian",
|
||||
"variables": ["course_name", "content"],
|
||||
"version": "2.0.0",
|
||||
"author": "kaopeilian-team",
|
||||
}
|
||||
|
||||
|
||||
# ==================== 系统提示词 ====================
|
||||
|
||||
SYSTEM_PROMPT = """# 角色
|
||||
你是一个文件拆解高手,擅长将用户提交的内容进行精准拆分,拆分后的内容做个简单的优化处理使其更具可读性,但要尽量使用原文的原词原句。
|
||||
|
||||
## 技能
|
||||
### 技能 1: 内容拆分
|
||||
1. 当用户提交内容后,拆分为多段。
|
||||
2. 对拆分后的内容做简单优化,使其更具可读性,比如去掉奇怪符号(如换行符、乱码),若语句不通顺,或格式原因导致错位,则重新表达。用户可能会提交录音转文字的内容,因此可能是有错字的,注意修复这些小瑕疵。
|
||||
3. 优化过程中,尽量使用原文的原词原句,特别是话术类,必须保持原有的句式、保持原词原句,而不是重构。
|
||||
4. 注意是拆分而不是重写,不需要润色,尽量不做任何处理。
|
||||
5. 输出到 content。
|
||||
|
||||
### 技能 2: 为每一个选段概括一个标题
|
||||
1. 为每个拆分出来的选段概括一个标题,并输出到 title。
|
||||
|
||||
### 技能 3: 为每一个选段说明与主题的关联
|
||||
1. 详细说明这一段与全文核心主题的关联,并输出到 topic_relation。
|
||||
|
||||
### 技能 4: 为每一个选段打上一个类型标签
|
||||
1. 用户提交的内容很有可能是一个课程、一篇讲义、一个产品的说明书,通常是用户希望他公司的员工或高管学习的知识。
|
||||
2. 用户通常是医疗美容机构或轻医美、生活美容连锁品牌。
|
||||
3. 你要为每个选段打上一个知识类型的标签,最好是这几个类型中的一个:"理论知识", "诊断设计", "操作步骤", "沟通话术", "案例分析", "注意事项", "技巧方法", "客诉处理"。当然你也可以为这个选段匹配一个更适合的。
|
||||
|
||||
## 输出要求(严格按要求输出)
|
||||
请直接输出一个纯净的 JSON 数组(Array),不要包含 Markdown 标记(如 ```json),也不要包含任何解释性文字。格式如下:
|
||||
|
||||
[
|
||||
{
|
||||
"title": "知识点标题",
|
||||
"content": "知识点内容",
|
||||
"topic_relation": "知识点与主题的关系",
|
||||
"type": "知识点类型"
|
||||
},
|
||||
{
|
||||
"title": "第二个知识点标题",
|
||||
"content": "第二个知识点内容...",
|
||||
"topic_relation": "...",
|
||||
"type": "..."
|
||||
}
|
||||
]
|
||||
|
||||
## 限制
|
||||
- 仅围绕用户提交的内容进行拆分和关联标注,不涉及其他无关内容。
|
||||
- 拆分后的内容必须最大程度保持与原文一致。
|
||||
- 关联说明需清晰合理。
|
||||
- 不论如何,不要拆分超过 20 段!"""
|
||||
|
||||
|
||||
# ==================== 用户提示词模板 ====================
|
||||
|
||||
USER_PROMPT = """课程主题:{course_name}
|
||||
|
||||
## 用户提交的内容:
|
||||
|
||||
{content}
|
||||
|
||||
## 注意
|
||||
|
||||
- 以json的格式输出
|
||||
- 不论如何,不要拆分超过20 段!"""
|
||||
|
||||
|
||||
# ==================== JSON Schema ====================
|
||||
|
||||
KNOWLEDGE_POINT_SCHEMA = {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"required": ["title", "content", "type"],
|
||||
"properties": {
|
||||
"title": {
|
||||
"type": "string",
|
||||
"description": "知识点标题",
|
||||
"maxLength": 200
|
||||
},
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "知识点内容"
|
||||
},
|
||||
"topic_relation": {
|
||||
"type": "string",
|
||||
"description": "与主题的关系描述"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "知识点类型",
|
||||
"enum": [
|
||||
"理论知识",
|
||||
"诊断设计",
|
||||
"操作步骤",
|
||||
"沟通话术",
|
||||
"案例分析",
|
||||
"注意事项",
|
||||
"技巧方法",
|
||||
"客诉处理",
|
||||
"其他"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"minItems": 1,
|
||||
"maxItems": 20
|
||||
}
|
||||
|
||||
|
||||
# ==================== 知识点类型常量 ====================
|
||||
|
||||
KNOWLEDGE_POINT_TYPES = [
|
||||
"理论知识",
|
||||
"诊断设计",
|
||||
"操作步骤",
|
||||
"沟通话术",
|
||||
"案例分析",
|
||||
"注意事项",
|
||||
"技巧方法",
|
||||
"客诉处理",
|
||||
]
|
||||
|
||||
DEFAULT_KNOWLEDGE_TYPE = "理论知识"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
193
backend/app/services/ai/prompts/practice_analysis_prompts.py
Normal file
193
backend/app/services/ai/prompts/practice_analysis_prompts.py
Normal file
@@ -0,0 +1,193 @@
|
||||
"""
|
||||
陪练分析报告提示词模板
|
||||
|
||||
功能:分析陪练对话,生成综合评分和改进建议
|
||||
"""
|
||||
|
||||
# ==================== 元数据 ====================
|
||||
|
||||
PROMPT_META = {
|
||||
"name": "practice_analysis",
|
||||
"display_name": "陪练分析报告",
|
||||
"description": "分析陪练对话,生成综合评分、能力维度评估、对话标注和改进建议",
|
||||
"module": "kaopeilian",
|
||||
"variables": ["dialogue_history"],
|
||||
"version": "1.0.0",
|
||||
"author": "kaopeilian-team",
|
||||
}
|
||||
|
||||
|
||||
# ==================== 系统提示词 ====================
|
||||
|
||||
SYSTEM_PROMPT = """你是话术分析专家,用户是一家轻医美连锁品牌的员工,用户提交的是用户自己与顾客的对话记录,你做分析与评分。并严格按照以下格式输出。
|
||||
|
||||
输出标准:
|
||||
{
|
||||
"analysis": {
|
||||
"total_score": 88,
|
||||
"score_breakdown": [
|
||||
{"name": "开场技巧", "score": 92, "description": "开场自然,快速建立信任"},
|
||||
{"name": "需求挖掘", "score": 90, "description": "能够有效识别客户需求"},
|
||||
{"name": "产品介绍", "score": 88, "description": "产品介绍清晰,重点突出"},
|
||||
{"name": "异议处理", "score": 85, "description": "处理客户异议还需加强"},
|
||||
{"name": "成交技巧", "score": 86, "description": "成交话术运用良好"}
|
||||
],
|
||||
"ability_dimensions": [
|
||||
{"name": "沟通表达", "score": 90, "feedback": "语言流畅,表达清晰,语调富有亲和力"},
|
||||
{"name": "倾听理解", "score": 92, "feedback": "能够准确理解客户意图,给予恰当回应"},
|
||||
{"name": "情绪控制", "score": 88, "feedback": "整体情绪稳定,面对异议时保持专业"},
|
||||
{"name": "专业知识", "score": 93, "feedback": "对医美项目知识掌握扎实"},
|
||||
{"name": "销售技巧", "score": 87, "feedback": "销售流程把控良好"},
|
||||
{"name": "应变能力", "score": 85, "feedback": "面对突发问题能够快速反应"}
|
||||
],
|
||||
"dialogue_annotations": [
|
||||
{"sequence": 1, "tags": ["亮点话术"], "comment": "开场专业,身份介绍清晰"},
|
||||
{"sequence": 3, "tags": ["金牌话术"], "comment": "巧妙引导,从客户角度出发"},
|
||||
{"sequence": 5, "tags": ["亮点话术"], "comment": "类比生动,让客户容易理解"},
|
||||
{"sequence": 7, "tags": ["金牌话术"], "comment": "专业解答,打消客户疑虑"}
|
||||
],
|
||||
"suggestions": [
|
||||
{"title": "控制语速", "content": "您的语速偏快,建议适当放慢,给客户更多思考时间", "example": "说完产品优势后,停顿2-3秒,观察客户反应"},
|
||||
{"title": "多用开放式问题", "content": "增加开放式问题的使用,更深入了解客户需求", "example": "您对未来的保障有什么期望?而不是您需要保险吗?"},
|
||||
{"title": "强化成交信号识别", "content": "客户已经表现出兴趣时,要及时推进成交", "example": "当客户问费用多少时,这是购买信号,应该立即报价并促成"}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
## 输出要求(严格执行)
|
||||
1. 直接输出纯净的 JSON,不要包含 Markdown 标记(如 ```json)
|
||||
2. 不要包含任何解释性文字
|
||||
3. score_breakdown 必须包含 5 项:开场技巧、需求挖掘、产品介绍、异议处理、成交技巧
|
||||
4. ability_dimensions 必须包含 6 项:沟通表达、倾听理解、情绪控制、专业知识、销售技巧、应变能力
|
||||
5. dialogue_annotations 标注有亮点或问题的对话轮次,tags 可选:亮点话术、金牌话术、待改进、问题话术
|
||||
6. suggestions 提供 2-4 条具体可操作的改进建议,每条包含 title、content、example
|
||||
|
||||
## 评分标准
|
||||
- 90-100:优秀
|
||||
- 80-89:良好
|
||||
- 70-79:一般
|
||||
- 60-69:需改进
|
||||
- <60:亟需提升"""
|
||||
|
||||
|
||||
# ==================== 用户提示词模板 ====================
|
||||
|
||||
USER_PROMPT = """{dialogue_history}"""
|
||||
|
||||
|
||||
# ==================== JSON Schema ====================
|
||||
|
||||
PRACTICE_ANALYSIS_SCHEMA = {
|
||||
"type": "object",
|
||||
"required": ["analysis"],
|
||||
"properties": {
|
||||
"analysis": {
|
||||
"type": "object",
|
||||
"required": ["total_score", "score_breakdown", "ability_dimensions", "dialogue_annotations", "suggestions"],
|
||||
"properties": {
|
||||
"total_score": {
|
||||
"type": "number",
|
||||
"description": "总体评分(0-100)",
|
||||
"minimum": 0,
|
||||
"maximum": 100
|
||||
},
|
||||
"score_breakdown": {
|
||||
"type": "array",
|
||||
"description": "分数细分(5项)",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"required": ["name", "score", "description"],
|
||||
"properties": {
|
||||
"name": {"type": "string", "description": "维度名称"},
|
||||
"score": {"type": "number", "description": "得分(0-100)"},
|
||||
"description": {"type": "string", "description": "评价描述"}
|
||||
}
|
||||
},
|
||||
"minItems": 5
|
||||
},
|
||||
"ability_dimensions": {
|
||||
"type": "array",
|
||||
"description": "能力维度评分(6项)",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"required": ["name", "score", "feedback"],
|
||||
"properties": {
|
||||
"name": {"type": "string", "description": "能力维度名称"},
|
||||
"score": {"type": "number", "description": "得分(0-100)"},
|
||||
"feedback": {"type": "string", "description": "反馈评语"}
|
||||
}
|
||||
},
|
||||
"minItems": 6
|
||||
},
|
||||
"dialogue_annotations": {
|
||||
"type": "array",
|
||||
"description": "对话标注",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"required": ["sequence", "tags", "comment"],
|
||||
"properties": {
|
||||
"sequence": {"type": "integer", "description": "对话轮次序号"},
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"description": "标签列表",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"comment": {"type": "string", "description": "点评内容"}
|
||||
}
|
||||
}
|
||||
},
|
||||
"suggestions": {
|
||||
"type": "array",
|
||||
"description": "改进建议",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"required": ["title", "content", "example"],
|
||||
"properties": {
|
||||
"title": {"type": "string", "description": "建议标题"},
|
||||
"content": {"type": "string", "description": "建议内容"},
|
||||
"example": {"type": "string", "description": "示例"}
|
||||
}
|
||||
},
|
||||
"minItems": 2,
|
||||
"maxItems": 5
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# ==================== 常量定义 ====================
|
||||
|
||||
SCORE_BREAKDOWN_ITEMS = [
|
||||
"开场技巧",
|
||||
"需求挖掘",
|
||||
"产品介绍",
|
||||
"异议处理",
|
||||
"成交技巧",
|
||||
]
|
||||
|
||||
ABILITY_DIMENSIONS = [
|
||||
"沟通表达",
|
||||
"倾听理解",
|
||||
"情绪控制",
|
||||
"专业知识",
|
||||
"销售技巧",
|
||||
"应变能力",
|
||||
]
|
||||
|
||||
ANNOTATION_TAGS = [
|
||||
"亮点话术",
|
||||
"金牌话术",
|
||||
"待改进",
|
||||
"问题话术",
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
192
backend/app/services/ai/prompts/practice_scene_prompts.py
Normal file
192
backend/app/services/ai/prompts/practice_scene_prompts.py
Normal file
@@ -0,0 +1,192 @@
|
||||
"""
|
||||
陪练场景生成提示词模板
|
||||
|
||||
功能:根据课程知识点生成陪练场景配置
|
||||
"""
|
||||
|
||||
# ==================== 元数据 ====================
|
||||
|
||||
PROMPT_META = {
|
||||
"name": "practice_scene_generation",
|
||||
"display_name": "陪练场景生成",
|
||||
"description": "根据课程知识点生成 AI 陪练场景配置,包含场景名称、背景、AI 角色、练习目标等",
|
||||
"module": "kaopeilian",
|
||||
"variables": ["knowledge_points"],
|
||||
"version": "1.0.0",
|
||||
"author": "kaopeilian-team",
|
||||
}
|
||||
|
||||
|
||||
# ==================== 系统提示词 ====================
|
||||
|
||||
SYSTEM_PROMPT = """你是一个训练场景研究专家,能将用户提交的知识点,转变为一个模拟陪练的场景,并严格按照以下格式输出。
|
||||
|
||||
输出标准:
|
||||
|
||||
{
|
||||
"scene": {
|
||||
"name": "轻医美产品咨询陪练",
|
||||
"description": "模拟客户咨询轻医美产品的场景",
|
||||
"background": "客户对脸部抗衰项目感兴趣。",
|
||||
"ai_role": "AI扮演一位30岁女性客户",
|
||||
"objectives": ["了解客户需求", "介绍产品优势", "处理价格异议"],
|
||||
"keywords": ["抗衰", "玻尿酸", "价格"],
|
||||
"type": "product-intro",
|
||||
"difficulty": "intermediate"
|
||||
}
|
||||
}
|
||||
|
||||
## 字段说明
|
||||
|
||||
- **name**: 场景名称,简洁明了,体现陪练主题
|
||||
- **description**: 场景描述,说明这是什么样的模拟场景
|
||||
- **background**: 场景背景设定,描述客户的情况和需求
|
||||
- **ai_role**: AI 角色描述,说明 AI 扮演什么角色(通常是客户)
|
||||
- **objectives**: 练习目标数组,列出学员需要达成的目标
|
||||
- **keywords**: 关键词数组,从知识点中提取的核心关键词
|
||||
- **type**: 场景类型,可选值:
|
||||
- phone: 电话销售
|
||||
- face: 面对面销售
|
||||
- complaint: 客户投诉
|
||||
- after-sales: 售后服务
|
||||
- product-intro: 产品介绍
|
||||
- **difficulty**: 难度等级,可选值:
|
||||
- beginner: 入门
|
||||
- junior: 初级
|
||||
- intermediate: 中级
|
||||
- senior: 高级
|
||||
- expert: 专家
|
||||
|
||||
## 输出要求
|
||||
|
||||
1. 直接输出纯净的 JSON 对象,不要包含 Markdown 标记(如 ```json)
|
||||
2. 不要包含任何解释性文字
|
||||
3. 根据知识点内容合理设计场景,确保场景与知识点紧密相关
|
||||
4. objectives 至少包含 2-3 个具体可操作的目标
|
||||
5. keywords 提取 3-5 个核心关键词
|
||||
6. 根据知识点的复杂程度选择合适的 difficulty
|
||||
7. 根据知识点的应用场景选择合适的 type"""
|
||||
|
||||
|
||||
# ==================== 用户提示词模板 ====================
|
||||
|
||||
USER_PROMPT = """请根据以下知识点内容,生成一个模拟陪练场景:
|
||||
|
||||
## 知识点内容
|
||||
|
||||
{knowledge_points}
|
||||
|
||||
## 要求
|
||||
|
||||
- 以 JSON 格式输出
|
||||
- 场景要贴合知识点的实际应用场景
|
||||
- AI 角色要符合轻医美行业的客户特征
|
||||
- 练习目标要具体、可评估"""
|
||||
|
||||
|
||||
# ==================== JSON Schema ====================
|
||||
|
||||
PRACTICE_SCENE_SCHEMA = {
|
||||
"type": "object",
|
||||
"required": ["scene"],
|
||||
"properties": {
|
||||
"scene": {
|
||||
"type": "object",
|
||||
"required": ["name", "description", "background", "ai_role", "objectives", "keywords", "type", "difficulty"],
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "场景名称",
|
||||
"maxLength": 100
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "场景描述",
|
||||
"maxLength": 500
|
||||
},
|
||||
"background": {
|
||||
"type": "string",
|
||||
"description": "场景背景设定",
|
||||
"maxLength": 500
|
||||
},
|
||||
"ai_role": {
|
||||
"type": "string",
|
||||
"description": "AI 角色描述",
|
||||
"maxLength": 200
|
||||
},
|
||||
"objectives": {
|
||||
"type": "array",
|
||||
"description": "练习目标",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"minItems": 2,
|
||||
"maxItems": 5
|
||||
},
|
||||
"keywords": {
|
||||
"type": "array",
|
||||
"description": "关键词",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"minItems": 2,
|
||||
"maxItems": 8
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "场景类型",
|
||||
"enum": [
|
||||
"phone",
|
||||
"face",
|
||||
"complaint",
|
||||
"after-sales",
|
||||
"product-intro"
|
||||
]
|
||||
},
|
||||
"difficulty": {
|
||||
"type": "string",
|
||||
"description": "难度等级",
|
||||
"enum": [
|
||||
"beginner",
|
||||
"junior",
|
||||
"intermediate",
|
||||
"senior",
|
||||
"expert"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# ==================== 场景类型常量 ====================
|
||||
|
||||
SCENE_TYPES = {
|
||||
"phone": "电话销售",
|
||||
"face": "面对面销售",
|
||||
"complaint": "客户投诉",
|
||||
"after-sales": "售后服务",
|
||||
"product-intro": "产品介绍",
|
||||
}
|
||||
|
||||
DIFFICULTY_LEVELS = {
|
||||
"beginner": "入门",
|
||||
"junior": "初级",
|
||||
"intermediate": "中级",
|
||||
"senior": "高级",
|
||||
"expert": "专家",
|
||||
}
|
||||
|
||||
# 默认值
|
||||
DEFAULT_SCENE_TYPE = "product-intro"
|
||||
DEFAULT_DIFFICULTY = "intermediate"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user