feat: 初始化考培练系统项目

- 从服务器拉取完整代码
- 按框架规范整理项目结构
- 配置 Drone CI 测试环境部署
- 包含后端(FastAPI)、前端(Vue3)、管理端

技术栈: Vue3 + TypeScript + FastAPI + MySQL
This commit is contained in:
111
2026-01-24 19:33:28 +08:00
commit 998211c483
1197 changed files with 228429 additions and 0 deletions

View File

@@ -0,0 +1,151 @@
"""
AI 服务模块
包含:
- AIService: 本地 AI 服务(支持 4sapi + OpenRouter 降级)
- LLM JSON Parser: 大模型 JSON 输出解析器
- KnowledgeAnalysisServiceV2: 知识点分析服务Python 原生实现)
- ExamGeneratorService: 试题生成服务Python 原生实现)
- CourseChatServiceV2: 课程对话服务Python 原生实现)
- PracticeSceneService: 陪练场景准备服务Python 原生实现)
- AbilityAnalysisService: 智能工牌能力分析服务Python 原生实现)
- AnswerJudgeService: 答案判断服务Python 原生实现)
- PracticeAnalysisService: 陪练分析报告服务Python 原生实现)
"""
from .ai_service import (
AIService,
AIResponse,
AIConfig,
AIServiceError,
AIProvider,
DEFAULT_MODEL,
MODEL_ANALYSIS,
MODEL_CREATIVE,
MODEL_IMAGE_GEN,
quick_chat,
)
from .llm_json_parser import (
parse_llm_json,
parse_with_fallback,
safe_json_loads,
clean_llm_output,
diagnose_json_error,
validate_json_schema,
ParseResult,
JSONParseError,
JSONUnrecoverableError,
)
from .knowledge_analysis_v2 import (
KnowledgeAnalysisServiceV2,
knowledge_analysis_service_v2,
)
from .exam_generator_service import (
ExamGeneratorService,
ExamGeneratorConfig,
exam_generator_service,
generate_exam,
)
from .course_chat_service import (
CourseChatServiceV2,
course_chat_service_v2,
)
from .practice_scene_service import (
PracticeSceneService,
PracticeScene,
PracticeSceneResult,
practice_scene_service,
prepare_practice_knowledge,
)
from .ability_analysis_service import (
AbilityAnalysisService,
AbilityAnalysisResult,
AbilityDimension,
CourseRecommendation,
ability_analysis_service,
)
from .answer_judge_service import (
AnswerJudgeService,
JudgeResult,
answer_judge_service,
judge_answer,
)
from .practice_analysis_service import (
PracticeAnalysisService,
PracticeAnalysisResult,
ScoreBreakdownItem,
AbilityDimensionItem,
DialogueAnnotation,
Suggestion,
practice_analysis_service,
analyze_practice_session,
)
__all__ = [
# AI Service
"AIService",
"AIResponse",
"AIConfig",
"AIServiceError",
"AIProvider",
"DEFAULT_MODEL",
"MODEL_ANALYSIS",
"MODEL_CREATIVE",
"MODEL_IMAGE_GEN",
"quick_chat",
# JSON Parser
"parse_llm_json",
"parse_with_fallback",
"safe_json_loads",
"clean_llm_output",
"diagnose_json_error",
"validate_json_schema",
"ParseResult",
"JSONParseError",
"JSONUnrecoverableError",
# Knowledge Analysis V2
"KnowledgeAnalysisServiceV2",
"knowledge_analysis_service_v2",
# Exam Generator V2
"ExamGeneratorService",
"ExamGeneratorConfig",
"exam_generator_service",
"generate_exam",
# Course Chat V2
"CourseChatServiceV2",
"course_chat_service_v2",
# Practice Scene V2
"PracticeSceneService",
"PracticeScene",
"PracticeSceneResult",
"practice_scene_service",
"prepare_practice_knowledge",
# Ability Analysis V2
"AbilityAnalysisService",
"AbilityAnalysisResult",
"AbilityDimension",
"CourseRecommendation",
"ability_analysis_service",
# Answer Judge V2
"AnswerJudgeService",
"JudgeResult",
"answer_judge_service",
"judge_answer",
# Practice Analysis V2
"PracticeAnalysisService",
"PracticeAnalysisResult",
"ScoreBreakdownItem",
"AbilityDimensionItem",
"DialogueAnnotation",
"Suggestion",
"practice_analysis_service",
"analyze_practice_session",
]

View File

@@ -0,0 +1,479 @@
"""
智能工牌能力分析与课程推荐服务 - Python 原生实现
功能:
- 分析员工与顾客的对话记录
- 评估多维度能力得分
- 基于能力短板推荐课程
提供稳定可靠的能力分析和课程推荐能力。
"""
import json
import logging
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.exceptions import ExternalServiceError
from .ai_service import AIService, AIResponse
from .llm_json_parser import parse_with_fallback, clean_llm_output
from .prompts.ability_analysis_prompts import (
SYSTEM_PROMPT,
USER_PROMPT,
ABILITY_ANALYSIS_SCHEMA,
ABILITY_DIMENSIONS,
)
logger = logging.getLogger(__name__)
# ==================== 数据结构 ====================
@dataclass
class AbilityDimension:
"""能力维度评分"""
name: str
score: float
feedback: str
@dataclass
class CourseRecommendation:
"""课程推荐"""
course_id: int
course_name: str
recommendation_reason: str
priority: str # high, medium, low
match_score: float
@dataclass
class AbilityAnalysisResult:
"""能力分析结果"""
success: bool
total_score: float = 0.0
ability_dimensions: List[AbilityDimension] = field(default_factory=list)
course_recommendations: List[CourseRecommendation] = field(default_factory=list)
ai_provider: str = ""
ai_model: str = ""
ai_tokens: int = 0
ai_latency_ms: int = 0
error: str = ""
def to_dict(self) -> Dict[str, Any]:
"""转换为字典"""
return {
"success": self.success,
"total_score": self.total_score,
"ability_dimensions": [
{"name": d.name, "score": d.score, "feedback": d.feedback}
for d in self.ability_dimensions
],
"course_recommendations": [
{
"course_id": c.course_id,
"course_name": c.course_name,
"recommendation_reason": c.recommendation_reason,
"priority": c.priority,
"match_score": c.match_score,
}
for c in self.course_recommendations
],
"ai_provider": self.ai_provider,
"ai_model": self.ai_model,
"ai_tokens": self.ai_tokens,
"ai_latency_ms": self.ai_latency_ms,
"error": self.error,
}
@dataclass
class UserPositionInfo:
"""用户岗位信息"""
position_id: int
position_name: str
code: str
description: str
skills: Optional[Dict[str, Any]]
level: str
status: str
@dataclass
class CourseInfo:
"""课程信息"""
id: int
name: str
description: str
category: str
tags: Optional[List[str]]
difficulty_level: int
duration_hours: float
# ==================== 服务类 ====================
class AbilityAnalysisService:
"""
智能工牌能力分析服务
使用 Python 原生实现。
使用示例:
```python
service = AbilityAnalysisService()
result = await service.analyze(
db=db_session,
user_id=1,
dialogue_history="顾客:你好,我想了解一下你们的服务..."
)
print(result.total_score)
print(result.course_recommendations)
```
"""
def __init__(self):
"""初始化服务"""
self.ai_service = AIService(module_code="ability_analysis")
async def analyze(
self,
db: AsyncSession,
user_id: int,
dialogue_history: str
) -> AbilityAnalysisResult:
"""
分析员工能力并推荐课程
Args:
db: 数据库会话(支持多租户,每个租户传入各自的会话)
user_id: 用户ID
dialogue_history: 对话记录
Returns:
AbilityAnalysisResult 分析结果
"""
try:
logger.info(f"开始能力分析 - user_id: {user_id}")
# 1. 验证输入
if not dialogue_history or not dialogue_history.strip():
return AbilityAnalysisResult(
success=False,
error="对话记录不能为空"
)
# 2. 查询用户岗位信息
user_positions = await self._get_user_positions(db, user_id)
user_info_str = self._format_user_info(user_positions)
logger.info(f"用户岗位信息: {len(user_positions)} 个岗位")
# 3. 查询所有可选课程
courses = await self._get_published_courses(db)
courses_str = self._format_courses(courses)
logger.info(f"可选课程: {len(courses)}")
# 4. 调用 AI 分析
ai_response = await self._call_ai_analysis(
dialogue_history=dialogue_history,
user_info=user_info_str,
courses=courses_str
)
logger.info(
f"AI 分析完成 - provider: {ai_response.provider}, "
f"tokens: {ai_response.total_tokens}, latency: {ai_response.latency_ms}ms"
)
# 5. 解析 JSON 结果
analysis_data = self._parse_analysis_result(ai_response.content, courses)
# 6. 构建返回结果
result = AbilityAnalysisResult(
success=True,
total_score=analysis_data.get("total_score", 0),
ability_dimensions=[
AbilityDimension(
name=d.get("name", ""),
score=d.get("score", 0),
feedback=d.get("feedback", "")
)
for d in analysis_data.get("ability_dimensions", [])
],
course_recommendations=[
CourseRecommendation(
course_id=c.get("course_id", 0),
course_name=c.get("course_name", ""),
recommendation_reason=c.get("recommendation_reason", ""),
priority=c.get("priority", "medium"),
match_score=c.get("match_score", 0)
)
for c in analysis_data.get("course_recommendations", [])
],
ai_provider=ai_response.provider,
ai_model=ai_response.model,
ai_tokens=ai_response.total_tokens,
ai_latency_ms=ai_response.latency_ms,
)
logger.info(
f"能力分析完成 - user_id: {user_id}, total_score: {result.total_score}, "
f"recommendations: {len(result.course_recommendations)}"
)
return result
except Exception as e:
logger.error(
f"能力分析失败 - user_id: {user_id}, error: {e}",
exc_info=True
)
return AbilityAnalysisResult(
success=False,
error=str(e)
)
async def _get_user_positions(
self,
db: AsyncSession,
user_id: int
) -> List[UserPositionInfo]:
"""
查询用户的岗位信息
获取用户基本信息
"""
query = text("""
SELECT
p.id as position_id,
p.name as position_name,
p.code,
p.description,
p.skills,
p.level,
p.status
FROM positions p
INNER JOIN position_members pm ON p.id = pm.position_id
WHERE pm.user_id = :user_id
AND pm.is_deleted = 0
AND p.is_deleted = 0
""")
result = await db.execute(query, {"user_id": user_id})
rows = result.fetchall()
positions = []
for row in rows:
# 解析 skills JSON
skills = None
if row.skills:
if isinstance(row.skills, str):
try:
skills = json.loads(row.skills)
except json.JSONDecodeError:
skills = None
else:
skills = row.skills
positions.append(UserPositionInfo(
position_id=row.position_id,
position_name=row.position_name,
code=row.code or "",
description=row.description or "",
skills=skills,
level=row.level or "",
status=row.status or ""
))
return positions
async def _get_published_courses(self, db: AsyncSession) -> List[CourseInfo]:
"""
查询所有已发布的课程
获取所有课程列表
"""
query = text("""
SELECT
id,
name,
description,
category,
tags,
difficulty_level,
duration_hours
FROM courses
WHERE status = 'published'
AND is_deleted = FALSE
ORDER BY sort_order
""")
result = await db.execute(query)
rows = result.fetchall()
courses = []
for row in rows:
# 解析 tags JSON
tags = None
if row.tags:
if isinstance(row.tags, str):
try:
tags = json.loads(row.tags)
except json.JSONDecodeError:
tags = None
else:
tags = row.tags
courses.append(CourseInfo(
id=row.id,
name=row.name,
description=row.description or "",
category=row.category or "",
tags=tags,
difficulty_level=row.difficulty_level or 3,
duration_hours=row.duration_hours or 0
))
return courses
def _format_user_info(self, positions: List[UserPositionInfo]) -> str:
"""格式化用户岗位信息为文本"""
if not positions:
return "暂无岗位信息"
lines = []
for p in positions:
info = f"- 岗位:{p.position_name}{p.code}"
if p.level:
info += f",级别:{p.level}"
if p.description:
info += f"\n 描述:{p.description}"
if p.skills:
skills_str = json.dumps(p.skills, ensure_ascii=False)
info += f"\n 核心技能:{skills_str}"
lines.append(info)
return "\n".join(lines)
def _format_courses(self, courses: List[CourseInfo]) -> str:
"""格式化课程列表为文本"""
if not courses:
return "暂无可选课程"
lines = []
for c in courses:
info = f"- ID: {c.id}, 课程名称: {c.name}"
if c.category:
info += f", 分类: {c.category}"
if c.difficulty_level:
info += f", 难度: {c.difficulty_level}"
if c.duration_hours:
info += f", 时长: {c.duration_hours}小时"
if c.description:
# 截断过长的描述
desc = c.description[:100] + "..." if len(c.description) > 100 else c.description
info += f"\n 描述: {desc}"
lines.append(info)
return "\n".join(lines)
async def _call_ai_analysis(
self,
dialogue_history: str,
user_info: str,
courses: str
) -> AIResponse:
"""调用 AI 进行能力分析"""
# 构建用户消息
user_message = USER_PROMPT.format(
dialogue_history=dialogue_history,
user_info=user_info,
courses=courses
)
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_message}
]
# 调用 AI自动支持 4sapi → OpenRouter 降级)
response = await self.ai_service.chat(
messages=messages,
temperature=0.7, # 保持一定创意性
prompt_name="ability_analysis"
)
return response
def _parse_analysis_result(
self,
ai_output: str,
courses: List[CourseInfo]
) -> Dict[str, Any]:
"""
解析 AI 输出的分析结果 JSON
使用 LLM JSON Parser 进行多层兜底解析
"""
# 先清洗输出
cleaned_output, rules = clean_llm_output(ai_output)
if rules:
logger.debug(f"AI 输出已清洗: {rules}")
# 使用带 Schema 校验的解析
parsed = parse_with_fallback(
cleaned_output,
schema=ABILITY_ANALYSIS_SCHEMA,
default={"analysis": {}},
validate_schema=True,
on_error="default"
)
# 提取 analysis 部分
analysis = parsed.get("analysis", {})
# 后处理:验证课程推荐的有效性
valid_course_ids = {c.id for c in courses}
valid_recommendations = []
for rec in analysis.get("course_recommendations", []):
course_id = rec.get("course_id")
if course_id in valid_course_ids:
valid_recommendations.append(rec)
else:
logger.warning(f"推荐的课程ID不存在: {course_id}")
analysis["course_recommendations"] = valid_recommendations
# 确保能力维度完整
existing_dims = {d.get("name") for d in analysis.get("ability_dimensions", [])}
for dim_name in ABILITY_DIMENSIONS:
if dim_name not in existing_dims:
logger.warning(f"缺少能力维度: {dim_name},使用默认值")
analysis.setdefault("ability_dimensions", []).append({
"name": dim_name,
"score": 70,
"feedback": "暂无具体评价"
})
return analysis
# ==================== 全局实例 ====================
ability_analysis_service = AbilityAnalysisService()

View File

@@ -0,0 +1,747 @@
"""
本地 AI 服务 - 遵循瑞小美 AI 接入规范
功能:
- 支持 4sapi.com首选和 OpenRouter备选自动降级
- 统一的请求/响应格式
- 调用日志记录
"""
import json
import logging
import time
from dataclasses import dataclass, field
from typing import Any, AsyncGenerator, Dict, List, Optional, Union
from enum import Enum
import httpx
logger = logging.getLogger(__name__)
class AIProvider(Enum):
"""AI 服务商"""
PRIMARY = "4sapi" # 首选4sapi.com
FALLBACK = "openrouter" # 备选OpenRouter
@dataclass
class AIResponse:
"""AI 响应结果"""
content: str # AI 回复内容
model: str = "" # 使用的模型
provider: str = "" # 实际使用的服务商
input_tokens: int = 0 # 输入 token 数
output_tokens: int = 0 # 输出 token 数
total_tokens: int = 0 # 总 token 数
cost: float = 0.0 # 费用(美元)
latency_ms: int = 0 # 响应延迟(毫秒)
raw_response: Dict[str, Any] = field(default_factory=dict) # 原始响应
images: List[str] = field(default_factory=list) # 图像生成结果
annotations: Dict[str, Any] = field(default_factory=dict) # PDF 解析注释
@dataclass
class AIConfig:
"""AI 服务配置"""
primary_api_key: str # 通用 KeyGemini/DeepSeek 等)
anthropic_api_key: str = "" # Claude 专属 Key
primary_base_url: str = "https://4sapi.com/v1"
fallback_api_key: str = ""
fallback_base_url: str = "https://openrouter.ai/api/v1"
default_model: str = "claude-opus-4-5-20251101-thinking" # 默认使用最强模型
timeout: float = 120.0
max_retries: int = 2
# Claude 模型列表(需要使用 anthropic_api_key
CLAUDE_MODELS = [
"claude-opus-4-5-20251101-thinking",
"claude-opus-4-5-20251101",
"claude-sonnet-4-20250514",
"claude-3-opus",
"claude-3-sonnet",
"claude-3-haiku",
]
def is_claude_model(model: str) -> bool:
"""判断是否为 Claude 模型"""
model_lower = model.lower()
return any(claude in model_lower for claude in ["claude", "anthropic"])
# 模型名称映射4sapi -> OpenRouter
MODEL_MAPPING = {
# 4sapi 使用简短名称OpenRouter 使用完整路径
"gemini-3-flash-preview": "google/gemini-3-flash-preview",
"gemini-3-pro-preview": "google/gemini-3-pro-preview",
"claude-opus-4-5-20251101-thinking": "anthropic/claude-opus-4.5",
"gemini-2.5-flash-image-preview": "google/gemini-2.0-flash-exp:free",
}
# 反向映射OpenRouter -> 4sapi
MODEL_MAPPING_REVERSE = {v: k for k, v in MODEL_MAPPING.items()}
class AIServiceError(Exception):
"""AI 服务错误"""
def __init__(self, message: str, provider: str = "", status_code: int = 0):
super().__init__(message)
self.provider = provider
self.status_code = status_code
class AIService:
"""
本地 AI 服务
遵循瑞小美 AI 接入规范:
- 首选 4sapi.com失败自动降级到 OpenRouter
- 统一的响应格式
- 自动模型名称转换
使用示例:
```python
ai = AIService(module_code="knowledge_analysis")
response = await ai.chat(
messages=[
{"role": "system", "content": "你是助手"},
{"role": "user", "content": "你好"}
],
prompt_name="greeting"
)
print(response.content)
```
"""
def __init__(
self,
module_code: str = "default",
config: Optional[AIConfig] = None,
db_session: Any = None
):
"""
初始化 AI 服务
配置加载优先级(遵循瑞小美 AI 接入规范):
1. 显式传入的 config 参数
2. 数据库 ai_config 表(推荐)
3. 环境变量fallback
Args:
module_code: 模块标识,用于统计
config: AI 配置None 则从数据库/环境变量读取
db_session: 数据库会话,用于记录调用日志和读取配置
"""
self.module_code = module_code
self.db_session = db_session
self.config = config or self._load_config(db_session)
logger.info(f"AIService 初始化: module={module_code}, primary={self.config.primary_base_url}")
def _load_config(self, db_session: Any) -> AIConfig:
"""
加载配置
配置加载优先级(遵循瑞小美 AI 接入规范):
1. 管理库 tenant_configs 表(推荐,通过 DynamicConfig
2. 环境变量fallback
Args:
db_session: 数据库会话(可选,用于日志记录)
Returns:
AIConfig 配置对象
"""
# 优先从管理库加载(同步方式)
try:
config = self._load_config_from_admin_db()
if config:
logger.info("✅ AI 配置已从管理库tenant_configs加载")
return config
except Exception as e:
logger.debug(f"从管理库加载 AI 配置失败: {e}")
# Fallback 到环境变量
logger.info("AI 配置从环境变量加载")
return self._load_config_from_env()
def _load_config_from_admin_db(self) -> Optional[AIConfig]:
"""
从管理库 tenant_configs 表加载配置
使用同步方式直接查询 kaopeilian_admin.tenant_configs 表
Returns:
AIConfig 配置对象,如果无数据则返回 None
"""
import os
# 获取当前租户编码
tenant_code = os.getenv("TENANT_CODE", "demo")
# 获取管理库连接信息
admin_db_host = os.getenv("ADMIN_DB_HOST", "prod-mysql")
admin_db_port = int(os.getenv("ADMIN_DB_PORT", "3306"))
admin_db_user = os.getenv("ADMIN_DB_USER", "root")
admin_db_password = os.getenv("ADMIN_DB_PASSWORD", "")
admin_db_name = os.getenv("ADMIN_DB_NAME", "kaopeilian_admin")
if not admin_db_password:
logger.debug("ADMIN_DB_PASSWORD 未配置,跳过管理库配置加载")
return None
try:
from sqlalchemy import create_engine, text
import urllib.parse
# 构建连接 URL
encoded_password = urllib.parse.quote_plus(admin_db_password)
admin_db_url = f"mysql+pymysql://{admin_db_user}:{encoded_password}@{admin_db_host}:{admin_db_port}/{admin_db_name}?charset=utf8mb4"
engine = create_engine(admin_db_url, pool_pre_ping=True)
with engine.connect() as conn:
# 1. 获取租户 ID
result = conn.execute(
text("SELECT id FROM tenants WHERE code = :code AND status = 'active'"),
{"code": tenant_code}
)
row = result.fetchone()
if not row:
logger.debug(f"租户 {tenant_code} 不存在或未激活")
engine.dispose()
return None
tenant_id = row[0]
# 2. 获取 AI 配置
result = conn.execute(
text("""
SELECT config_key, config_value
FROM tenant_configs
WHERE tenant_id = :tenant_id AND config_group = 'ai'
"""),
{"tenant_id": tenant_id}
)
rows = result.fetchall()
engine.dispose()
if not rows:
logger.debug(f"租户 {tenant_code} 无 AI 配置")
return None
# 转换为字典
config_dict = {row[0]: row[1] for row in rows}
# 检查必要的配置是否存在
primary_key = config_dict.get("AI_PRIMARY_API_KEY", "")
if not primary_key:
logger.warning(f"租户 {tenant_code} 的 AI_PRIMARY_API_KEY 为空")
return None
logger.info(f"✅ 从管理库加载租户 {tenant_code} 的 AI 配置成功")
return AIConfig(
primary_api_key=primary_key,
anthropic_api_key=config_dict.get("AI_ANTHROPIC_API_KEY", ""),
primary_base_url=config_dict.get("AI_PRIMARY_BASE_URL", "https://4sapi.com/v1"),
fallback_api_key=config_dict.get("AI_FALLBACK_API_KEY", ""),
fallback_base_url=config_dict.get("AI_FALLBACK_BASE_URL", "https://openrouter.ai/api/v1"),
default_model=config_dict.get("AI_DEFAULT_MODEL", "claude-opus-4-5-20251101-thinking"),
timeout=float(config_dict.get("AI_TIMEOUT", "120")),
)
except Exception as e:
logger.debug(f"从管理库读取 AI 配置异常: {e}")
return None
def _load_config_from_env(self) -> AIConfig:
"""
从环境变量加载配置
⚠️ 强制要求(遵循瑞小美 AI 接入规范):
- 禁止在代码中硬编码 API Key
- 必须通过环境变量配置 Key
必须配置的环境变量:
- AI_PRIMARY_API_KEY: 通用 Key用于 Gemini/DeepSeek 等)
- AI_ANTHROPIC_API_KEY: Claude 专属 Key
"""
import os
primary_api_key = os.getenv("AI_PRIMARY_API_KEY", "")
anthropic_api_key = os.getenv("AI_ANTHROPIC_API_KEY", "")
# 检查必要的 Key 是否已配置
if not primary_api_key:
logger.warning("⚠️ AI_PRIMARY_API_KEY 未配置AI 服务可能无法正常工作")
if not anthropic_api_key:
logger.warning("⚠️ AI_ANTHROPIC_API_KEY 未配置Claude 模型调用将失败")
return AIConfig(
# 通用 KeyGemini/DeepSeek 等非 Anthropic 模型)
primary_api_key=primary_api_key,
# Claude 专属 Key
anthropic_api_key=anthropic_api_key,
primary_base_url=os.getenv("AI_PRIMARY_BASE_URL", "https://4sapi.com/v1"),
fallback_api_key=os.getenv("AI_FALLBACK_API_KEY", ""),
fallback_base_url=os.getenv("AI_FALLBACK_BASE_URL", "https://openrouter.ai/api/v1"),
# 默认模型:遵循"优先最强"原则,使用 Claude Opus 4.5
default_model=os.getenv("AI_DEFAULT_MODEL", "claude-opus-4-5-20251101-thinking"),
timeout=float(os.getenv("AI_TIMEOUT", "120")),
)
def _convert_model_name(self, model: str, provider: AIProvider) -> str:
"""
转换模型名称以匹配服务商格式
Args:
model: 原始模型名称
provider: 目标服务商
Returns:
转换后的模型名称
"""
if provider == AIProvider.FALLBACK:
# 4sapi -> OpenRouter
return MODEL_MAPPING.get(model, f"google/{model}" if "/" not in model else model)
else:
# OpenRouter -> 4sapi
return MODEL_MAPPING_REVERSE.get(model, model.split("/")[-1] if "/" in model else model)
async def chat(
self,
messages: List[Dict[str, str]],
model: Optional[str] = None,
temperature: float = 0.7,
max_tokens: Optional[int] = None,
prompt_name: str = "default",
**kwargs
) -> AIResponse:
"""
文本聊天
Args:
messages: 消息列表 [{"role": "system/user/assistant", "content": "..."}]
model: 模型名称None 使用默认模型
temperature: 温度参数
max_tokens: 最大输出 token 数
prompt_name: 提示词名称,用于统计
**kwargs: 其他参数
Returns:
AIResponse 响应对象
"""
model = model or self.config.default_model
# 构建请求体
payload = {
"model": model,
"messages": messages,
"temperature": temperature,
}
if max_tokens:
payload["max_tokens"] = max_tokens
# 首选服务商
try:
return await self._call_provider(
provider=AIProvider.PRIMARY,
endpoint="/chat/completions",
payload=payload,
prompt_name=prompt_name
)
except AIServiceError as e:
logger.warning(f"首选服务商调用失败: {e}, 尝试降级到备选服务商")
# 如果没有备选 API Key直接抛出异常
if not self.config.fallback_api_key:
raise
# 降级到备选服务商
# 转换模型名称
fallback_model = self._convert_model_name(model, AIProvider.FALLBACK)
payload["model"] = fallback_model
return await self._call_provider(
provider=AIProvider.FALLBACK,
endpoint="/chat/completions",
payload=payload,
prompt_name=prompt_name
)
async def chat_stream(
self,
messages: List[Dict[str, str]],
model: Optional[str] = None,
temperature: float = 0.7,
max_tokens: Optional[int] = None,
prompt_name: str = "default",
**kwargs
) -> AsyncGenerator[str, None]:
"""
流式文本聊天
Args:
messages: 消息列表 [{"role": "system/user/assistant", "content": "..."}]
model: 模型名称None 使用默认模型
temperature: 温度参数
max_tokens: 最大输出 token 数
prompt_name: 提示词名称,用于统计
**kwargs: 其他参数
Yields:
str: 文本块(逐字返回)
"""
model = model or self.config.default_model
# 构建请求体
payload = {
"model": model,
"messages": messages,
"temperature": temperature,
"stream": True,
}
if max_tokens:
payload["max_tokens"] = max_tokens
# 首选服务商
try:
async for chunk in self._call_provider_stream(
provider=AIProvider.PRIMARY,
endpoint="/chat/completions",
payload=payload,
prompt_name=prompt_name
):
yield chunk
return
except AIServiceError as e:
logger.warning(f"首选服务商流式调用失败: {e}, 尝试降级到备选服务商")
# 如果没有备选 API Key直接抛出异常
if not self.config.fallback_api_key:
raise
# 降级到备选服务商
# 转换模型名称
fallback_model = self._convert_model_name(model, AIProvider.FALLBACK)
payload["model"] = fallback_model
async for chunk in self._call_provider_stream(
provider=AIProvider.FALLBACK,
endpoint="/chat/completions",
payload=payload,
prompt_name=prompt_name
):
yield chunk
async def _call_provider_stream(
self,
provider: AIProvider,
endpoint: str,
payload: Dict[str, Any],
prompt_name: str
) -> AsyncGenerator[str, None]:
"""
流式调用指定服务商
Args:
provider: 服务商
endpoint: API 端点
payload: 请求体
prompt_name: 提示词名称
Yields:
str: 文本块
"""
# 获取配置
if provider == AIProvider.PRIMARY:
base_url = self.config.primary_base_url
# 根据模型选择 API KeyClaude 用专属 Key其他用通用 Key
model = payload.get("model", "")
if is_claude_model(model) and self.config.anthropic_api_key:
api_key = self.config.anthropic_api_key
logger.debug(f"[Stream] 使用 Claude 专属 Key 调用模型: {model}")
else:
api_key = self.config.primary_api_key
else:
api_key = self.config.fallback_api_key
base_url = self.config.fallback_base_url
url = f"{base_url.rstrip('/')}{endpoint}"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
# OpenRouter 需要额外的 header
if provider == AIProvider.FALLBACK:
headers["HTTP-Referer"] = "https://kaopeilian.ireborn.com.cn"
headers["X-Title"] = "KaoPeiLian"
start_time = time.time()
try:
timeout = httpx.Timeout(self.config.timeout, connect=10.0)
async with httpx.AsyncClient(timeout=timeout) as client:
logger.info(f"流式调用 AI 服务: provider={provider.value}, model={payload.get('model')}")
async with client.stream("POST", url, json=payload, headers=headers) as response:
# 检查响应状态
if response.status_code != 200:
error_text = await response.aread()
logger.error(f"AI 服务流式返回错误: status={response.status_code}, body={error_text[:500]}")
raise AIServiceError(
f"API 流式请求失败: HTTP {response.status_code}",
provider=provider.value,
status_code=response.status_code
)
# 处理 SSE 流
async for line in response.aiter_lines():
if not line or not line.strip():
continue
# 解析 SSE 数据行
if line.startswith("data: "):
data_str = line[6:] # 移除 "data: " 前缀
# 检查是否是结束标记
if data_str.strip() == "[DONE]":
logger.info(f"流式响应完成: provider={provider.value}")
return
try:
event_data = json.loads(data_str)
# 提取 delta 内容
choices = event_data.get("choices", [])
if choices:
delta = choices[0].get("delta", {})
content = delta.get("content", "")
if content:
yield content
except json.JSONDecodeError as e:
logger.debug(f"解析流式数据失败: {e} - 数据: {data_str[:100]}")
continue
latency_ms = int((time.time() - start_time) * 1000)
logger.info(f"流式调用完成: provider={provider.value}, latency={latency_ms}ms")
except httpx.TimeoutException:
latency_ms = int((time.time() - start_time) * 1000)
logger.error(f"AI 服务流式超时: provider={provider.value}, latency={latency_ms}ms")
raise AIServiceError(f"流式请求超时({self.config.timeout}秒)", provider=provider.value)
except httpx.RequestError as e:
logger.error(f"AI 服务流式网络错误: provider={provider.value}, error={e}")
raise AIServiceError(f"流式网络错误: {e}", provider=provider.value)
async def _call_provider(
self,
provider: AIProvider,
endpoint: str,
payload: Dict[str, Any],
prompt_name: str
) -> AIResponse:
"""
调用指定服务商
Args:
provider: 服务商
endpoint: API 端点
payload: 请求体
prompt_name: 提示词名称
Returns:
AIResponse 响应对象
"""
# 获取配置
if provider == AIProvider.PRIMARY:
base_url = self.config.primary_base_url
# 根据模型选择 API KeyClaude 用专属 Key其他用通用 Key
model = payload.get("model", "")
if is_claude_model(model) and self.config.anthropic_api_key:
api_key = self.config.anthropic_api_key
logger.debug(f"使用 Claude 专属 Key 调用模型: {model}")
else:
api_key = self.config.primary_api_key
else:
api_key = self.config.fallback_api_key
base_url = self.config.fallback_base_url
url = f"{base_url.rstrip('/')}{endpoint}"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
# OpenRouter 需要额外的 header
if provider == AIProvider.FALLBACK:
headers["HTTP-Referer"] = "https://kaopeilian.ireborn.com.cn"
headers["X-Title"] = "KaoPeiLian"
start_time = time.time()
try:
async with httpx.AsyncClient(timeout=self.config.timeout) as client:
logger.info(f"调用 AI 服务: provider={provider.value}, model={payload.get('model')}")
response = await client.post(url, json=payload, headers=headers)
latency_ms = int((time.time() - start_time) * 1000)
# 检查响应状态
if response.status_code != 200:
error_text = response.text
logger.error(f"AI 服务返回错误: status={response.status_code}, body={error_text[:500]}")
raise AIServiceError(
f"API 请求失败: HTTP {response.status_code}",
provider=provider.value,
status_code=response.status_code
)
data = response.json()
# 解析响应
ai_response = self._parse_response(data, provider, latency_ms)
# 记录日志
logger.info(
f"AI 调用成功: provider={provider.value}, model={ai_response.model}, "
f"tokens={ai_response.total_tokens}, latency={latency_ms}ms"
)
# 保存到数据库(如果有 session
await self._log_call(prompt_name, ai_response)
return ai_response
except httpx.TimeoutException:
latency_ms = int((time.time() - start_time) * 1000)
logger.error(f"AI 服务超时: provider={provider.value}, latency={latency_ms}ms")
raise AIServiceError(f"请求超时({self.config.timeout}秒)", provider=provider.value)
except httpx.RequestError as e:
logger.error(f"AI 服务网络错误: provider={provider.value}, error={e}")
raise AIServiceError(f"网络错误: {e}", provider=provider.value)
def _parse_response(
self,
data: Dict[str, Any],
provider: AIProvider,
latency_ms: int
) -> AIResponse:
"""解析 API 响应"""
# 提取内容
choices = data.get("choices", [])
if not choices:
raise AIServiceError("响应中没有 choices")
message = choices[0].get("message", {})
content = message.get("content", "")
# 提取 usage
usage = data.get("usage", {})
input_tokens = usage.get("prompt_tokens", 0)
output_tokens = usage.get("completion_tokens", 0)
total_tokens = usage.get("total_tokens", input_tokens + output_tokens)
# 提取费用(如果有)
cost = usage.get("total_cost", 0.0)
return AIResponse(
content=content,
model=data.get("model", ""),
provider=provider.value,
input_tokens=input_tokens,
output_tokens=output_tokens,
total_tokens=total_tokens,
cost=cost,
latency_ms=latency_ms,
raw_response=data
)
async def _log_call(self, prompt_name: str, response: AIResponse) -> None:
"""记录调用日志到数据库"""
if not self.db_session:
return
try:
# TODO: 实现调用日志记录
# 可以参考 ai_call_logs 表结构
pass
except Exception as e:
logger.warning(f"记录 AI 调用日志失败: {e}")
async def analyze_document(
self,
content: str,
prompt: str,
model: Optional[str] = None,
prompt_name: str = "document_analysis"
) -> AIResponse:
"""
分析文档内容
Args:
content: 文档内容
prompt: 分析提示词
model: 模型名称
prompt_name: 提示词名称
Returns:
AIResponse 响应对象
"""
messages = [
{"role": "user", "content": f"{prompt}\n\n文档内容:\n{content}"}
]
return await self.chat(
messages=messages,
model=model,
temperature=0.1, # 文档分析使用低温度
prompt_name=prompt_name
)
# 便捷函数
async def quick_chat(
messages: List[Dict[str, str]],
model: Optional[str] = None,
module_code: str = "quick"
) -> str:
"""
快速聊天,返回纯文本
Args:
messages: 消息列表
model: 模型名称
module_code: 模块标识
Returns:
AI 回复的文本内容
"""
ai = AIService(module_code=module_code)
response = await ai.chat(messages, model=model)
return response.content
# 模型常量(遵循瑞小美 AI 接入规范)
# 按优先级排序:首选 > 标准 > 快速
MODEL_PRIMARY = "claude-opus-4-5-20251101-thinking" # 🥇 首选:所有任务首先尝试
MODEL_STANDARD = "gemini-3-pro-preview" # 🥈 标准Claude 失败后降级
MODEL_FAST = "gemini-3-flash-preview" # 🥉 快速:最终保底
MODEL_IMAGE = "gemini-2.5-flash-image-preview" # 🖼️ 图像生成专用
MODEL_VIDEO = "veo3.1-pro" # 🎬 视频生成专用
# 兼容旧代码的别名
DEFAULT_MODEL = MODEL_PRIMARY # 默认使用最强模型
MODEL_ANALYSIS = MODEL_PRIMARY
MODEL_CREATIVE = MODEL_STANDARD
MODEL_IMAGE_GEN = MODEL_IMAGE

View File

@@ -0,0 +1,197 @@
"""
答案判断服务 - Python 原生实现
功能:
- 判断填空题与问答题的答案是否正确
- 通过 AI 语义理解比对用户答案与标准答案
提供稳定可靠的答案判断能力。
"""
import logging
from dataclasses import dataclass
from typing import Any, Optional
from .ai_service import AIService, AIResponse
from .prompts.answer_judge_prompts import (
SYSTEM_PROMPT,
USER_PROMPT,
CORRECT_KEYWORDS,
INCORRECT_KEYWORDS,
)
logger = logging.getLogger(__name__)
@dataclass
class JudgeResult:
"""判断结果"""
is_correct: bool
raw_response: str
ai_provider: str = ""
ai_model: str = ""
ai_tokens: int = 0
ai_latency_ms: int = 0
class AnswerJudgeService:
"""
答案判断服务
使用 Python 原生实现。
使用示例:
```python
service = AnswerJudgeService()
result = await service.judge(
db=db_session, # 传入 db_session 用于记录调用日志
question="玻尿酸的主要作用是什么?",
correct_answer="补水保湿、填充塑形",
user_answer="保湿和塑形",
analysis="玻尿酸具有补水保湿和填充塑形两大功能"
)
print(result.is_correct) # True
```
"""
MODULE_CODE = "answer_judge"
async def judge(
self,
question: str,
correct_answer: str,
user_answer: str,
analysis: str = "",
db: Any = None # 数据库会话,用于记录 AI 调用日志
) -> JudgeResult:
"""
判断答案是否正确
Args:
question: 题目内容
correct_answer: 标准答案
user_answer: 用户答案
analysis: 答案解析(可选)
db: 数据库会话,用于记录调用日志(符合 AI 接入规范)
Returns:
JudgeResult 判断结果
"""
try:
logger.info(
f"开始判断答案 - question: {question[:50]}..., "
f"user_answer: {user_answer[:50]}..."
)
# 创建 AIService 实例(传入 db_session 用于记录调用日志)
ai_service = AIService(module_code=self.MODULE_CODE, db_session=db)
# 构建提示词
user_prompt = USER_PROMPT.format(
question=question,
correct_answer=correct_answer,
user_answer=user_answer,
analysis=analysis or ""
)
# 调用 AI
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_prompt}
]
ai_response = await ai_service.chat(
messages=messages,
temperature=0.1, # 低温度,确保输出稳定
prompt_name="answer_judge"
)
logger.info(
f"AI 判断完成 - provider: {ai_response.provider}, "
f"response: {ai_response.content}, "
f"latency: {ai_response.latency_ms}ms"
)
# 解析 AI 输出
is_correct = self._parse_judge_result(ai_response.content)
logger.info(f"答案判断结果: {is_correct}")
return JudgeResult(
is_correct=is_correct,
raw_response=ai_response.content,
ai_provider=ai_response.provider,
ai_model=ai_response.model,
ai_tokens=ai_response.total_tokens,
ai_latency_ms=ai_response.latency_ms,
)
except Exception as e:
logger.error(f"答案判断失败: {e}", exc_info=True)
# 出错时默认返回错误,保守处理
return JudgeResult(
is_correct=False,
raw_response=f"判断失败: {e}",
)
def _parse_judge_result(self, ai_output: str) -> bool:
"""
解析 AI 输出的判断结果
Args:
ai_output: AI 返回的文本
Returns:
bool: True 表示正确False 表示错误
"""
# 清洗输出
output = ai_output.strip().lower()
# 检查是否包含正确关键词
for keyword in CORRECT_KEYWORDS:
if keyword.lower() in output:
return True
# 检查是否包含错误关键词
for keyword in INCORRECT_KEYWORDS:
if keyword.lower() in output:
return False
# 无法识别时,默认返回错误(保守处理)
logger.warning(f"无法解析判断结果,默认返回错误: {ai_output}")
return False
# ==================== 全局实例 ====================
answer_judge_service = AnswerJudgeService()
# ==================== 便捷函数 ====================
async def judge_answer(
question: str,
correct_answer: str,
user_answer: str,
analysis: str = ""
) -> bool:
"""
便捷函数:判断答案是否正确
Args:
question: 题目内容
correct_answer: 标准答案
user_answer: 用户答案
analysis: 答案解析
Returns:
bool: True 表示正确False 表示错误
"""
result = await answer_judge_service.judge(
question=question,
correct_answer=correct_answer,
user_answer=user_answer,
analysis=analysis
)
return result.is_correct

View File

@@ -0,0 +1,757 @@
"""
课程对话服务 V2 - Python 原生实现
功能:
- 查询课程知识点作为知识库
- 调用 AI 进行对话
- 支持流式输出
- 多轮对话历史管理Redis 缓存)
提供稳定可靠的课程对话能力。
"""
import json
import logging
import time
import uuid
from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.exceptions import ExternalServiceError
from .ai_service import AIService
from .prompts.course_chat_prompts import (
SYSTEM_PROMPT,
USER_PROMPT,
KNOWLEDGE_ITEM_TEMPLATE,
CONVERSATION_WINDOW_SIZE,
CONVERSATION_TTL,
MAX_KNOWLEDGE_POINTS,
MAX_KNOWLEDGE_BASE_LENGTH,
DEFAULT_CHAT_MODEL,
DEFAULT_TEMPERATURE,
)
logger = logging.getLogger(__name__)
# 会话索引 Redis key 前缀/后缀
CONVERSATION_INDEX_PREFIX = "course_chat:user:"
CONVERSATION_INDEX_SUFFIX = ":conversations"
# 会话元数据 key 前缀
CONVERSATION_META_PREFIX = "course_chat:meta:"
# 会话索引过期时间(与会话数据一致)
CONVERSATION_INDEX_TTL = CONVERSATION_TTL
class CourseChatServiceV2:
"""
课程对话服务 V2
使用 Python 原生实现。
使用示例:
```python
service = CourseChatServiceV2()
# 非流式对话
response = await service.chat(
db=db_session,
course_id=1,
query="什么是玻尿酸?",
user_id=1,
conversation_id=None
)
# 流式对话
async for chunk in service.chat_stream(
db=db_session,
course_id=1,
query="什么是玻尿酸?",
user_id=1,
conversation_id=None
):
print(chunk, end="", flush=True)
```
"""
# Redis key 前缀
CONVERSATION_KEY_PREFIX = "course_chat:conversation:"
# 模块标识
MODULE_CODE = "course_chat"
def __init__(self):
"""初始化服务AIService 在方法中动态创建,以传入 db_session"""
pass
async def chat(
self,
db: AsyncSession,
course_id: int,
query: str,
user_id: int,
conversation_id: Optional[str] = None
) -> Dict[str, Any]:
"""
与课程对话(非流式)
Args:
db: 数据库会话
course_id: 课程ID
query: 用户问题
user_id: 用户ID
conversation_id: 会话ID续接对话时传入
Returns:
包含 answer、conversation_id 等字段的字典
"""
try:
logger.info(
f"开始课程对话 V2 - course_id: {course_id}, user_id: {user_id}, "
f"conversation_id: {conversation_id}"
)
# 1. 获取课程知识点
knowledge_base = await self._get_course_knowledge(db, course_id)
if not knowledge_base:
logger.warning(f"课程 {course_id} 没有知识点,使用空知识库")
knowledge_base = "(该课程暂无知识点内容)"
# 2. 获取或创建会话ID
is_new_conversation = False
if not conversation_id:
conversation_id = self._generate_conversation_id(user_id, course_id)
is_new_conversation = True
logger.info(f"创建新会话: {conversation_id}")
# 3. 构建消息列表
messages = await self._build_messages(
knowledge_base=knowledge_base,
query=query,
user_id=user_id,
conversation_id=conversation_id
)
# 4. 创建 AIService 并调用(传入 db_session 以记录调用日志)
ai_service = AIService(module_code=self.MODULE_CODE, db_session=db)
response = await ai_service.chat(
messages=messages,
model=DEFAULT_CHAT_MODEL,
temperature=DEFAULT_TEMPERATURE,
prompt_name="course_chat"
)
answer = response.content
# 5. 保存对话历史
await self._save_conversation_history(
conversation_id=conversation_id,
user_message=query,
assistant_message=answer
)
# 6. 更新会话索引
if is_new_conversation:
await self._add_to_conversation_index(user_id, conversation_id, course_id)
else:
await self._update_conversation_index(user_id, conversation_id)
logger.info(
f"课程对话完成 - course_id: {course_id}, conversation_id: {conversation_id}, "
f"provider: {response.provider}, tokens: {response.total_tokens}"
)
return {
"success": True,
"answer": answer,
"conversation_id": conversation_id,
"ai_provider": response.provider,
"ai_model": response.model,
"ai_tokens": response.total_tokens,
"ai_latency_ms": response.latency_ms,
}
except Exception as e:
logger.error(
f"课程对话失败 - course_id: {course_id}, user_id: {user_id}, error: {e}",
exc_info=True
)
raise ExternalServiceError(f"课程对话失败: {e}")
async def chat_stream(
self,
db: AsyncSession,
course_id: int,
query: str,
user_id: int,
conversation_id: Optional[str] = None
) -> AsyncGenerator[Tuple[str, Optional[str]], None]:
"""
与课程对话(流式输出)
Args:
db: 数据库会话
course_id: 课程ID
query: 用户问题
user_id: 用户ID
conversation_id: 会话ID续接对话时传入
Yields:
Tuple[str, Optional[str]]: (事件类型, 数据)
- ("conversation_started", conversation_id): 会话开始
- ("chunk", text): 文本块
- ("end", None): 结束
- ("error", message): 错误
"""
full_answer = ""
try:
logger.info(
f"开始流式课程对话 V2 - course_id: {course_id}, user_id: {user_id}, "
f"conversation_id: {conversation_id}"
)
# 1. 获取课程知识点
knowledge_base = await self._get_course_knowledge(db, course_id)
if not knowledge_base:
logger.warning(f"课程 {course_id} 没有知识点,使用空知识库")
knowledge_base = "(该课程暂无知识点内容)"
# 2. 获取或创建会话ID
is_new_conversation = False
if not conversation_id:
conversation_id = self._generate_conversation_id(user_id, course_id)
is_new_conversation = True
logger.info(f"创建新会话: {conversation_id}")
# 3. 发送会话开始事件(如果是新会话)
if is_new_conversation:
yield ("conversation_started", conversation_id)
# 4. 构建消息列表
messages = await self._build_messages(
knowledge_base=knowledge_base,
query=query,
user_id=user_id,
conversation_id=conversation_id
)
# 5. 创建 AIService 并流式调用(传入 db_session 以记录调用日志)
ai_service = AIService(module_code=self.MODULE_CODE, db_session=db)
async for chunk in ai_service.chat_stream(
messages=messages,
model=DEFAULT_CHAT_MODEL,
temperature=DEFAULT_TEMPERATURE,
prompt_name="course_chat"
):
full_answer += chunk
yield ("chunk", chunk)
# 6. 发送结束事件
yield ("end", None)
# 7. 保存对话历史
await self._save_conversation_history(
conversation_id=conversation_id,
user_message=query,
assistant_message=full_answer
)
# 8. 更新会话索引
if is_new_conversation:
await self._add_to_conversation_index(user_id, conversation_id, course_id)
else:
await self._update_conversation_index(user_id, conversation_id)
logger.info(
f"流式课程对话完成 - course_id: {course_id}, conversation_id: {conversation_id}, "
f"answer_length: {len(full_answer)}"
)
except Exception as e:
logger.error(
f"流式课程对话失败 - course_id: {course_id}, user_id: {user_id}, error: {e}",
exc_info=True
)
yield ("error", str(e))
async def _get_course_knowledge(
self,
db: AsyncSession,
course_id: int
) -> str:
"""
获取课程知识点,构建知识库文本
Args:
db: 数据库会话
course_id: 课程ID
Returns:
知识库文本
"""
try:
# 查询知识点(课程知识点查询)
query = text("""
SELECT kp.name, kp.description
FROM knowledge_points kp
INNER JOIN course_materials cm ON kp.material_id = cm.id
WHERE kp.course_id = :course_id
AND kp.is_deleted = 0
AND cm.is_deleted = 0
ORDER BY kp.id
LIMIT :limit
""")
result = await db.execute(
query,
{"course_id": course_id, "limit": MAX_KNOWLEDGE_POINTS}
)
rows = result.fetchall()
if not rows:
logger.warning(f"课程 {course_id} 没有关联的知识点")
return ""
# 构建知识库文本
knowledge_items = []
total_length = 0
for row in rows:
name = row[0] or ""
description = row[1] or ""
item = KNOWLEDGE_ITEM_TEMPLATE.format(
name=name,
description=description
)
# 检查是否超过长度限制
if total_length + len(item) > MAX_KNOWLEDGE_BASE_LENGTH:
logger.warning(
f"知识库文本已达到最大长度限制 {MAX_KNOWLEDGE_BASE_LENGTH}"
f"停止添加更多知识点"
)
break
knowledge_items.append(item)
total_length += len(item)
knowledge_base = "\n".join(knowledge_items)
logger.info(
f"获取课程知识点成功 - course_id: {course_id}, "
f"count: {len(knowledge_items)}, length: {len(knowledge_base)}"
)
return knowledge_base
except Exception as e:
logger.error(f"获取课程知识点失败: {e}")
raise
async def _build_messages(
self,
knowledge_base: str,
query: str,
user_id: int,
conversation_id: str
) -> List[Dict[str, str]]:
"""
构建消息列表(包含历史对话)
Args:
knowledge_base: 知识库文本
query: 当前用户问题
user_id: 用户ID
conversation_id: 会话ID
Returns:
消息列表
"""
messages = []
# 1. 系统提示词
system_content = SYSTEM_PROMPT.format(knowledge_base=knowledge_base)
messages.append({"role": "system", "content": system_content})
# 2. 获取历史对话
history = await self._get_conversation_history(conversation_id)
# 限制历史窗口大小
if len(history) > CONVERSATION_WINDOW_SIZE * 2:
history = history[-(CONVERSATION_WINDOW_SIZE * 2):]
# 添加历史消息
messages.extend(history)
# 3. 当前用户问题
user_content = USER_PROMPT.format(query=query)
messages.append({"role": "user", "content": user_content})
logger.debug(
f"构建消息列表 - total: {len(messages)}, history: {len(history)}"
)
return messages
def _generate_conversation_id(self, user_id: int, course_id: int) -> str:
"""生成会话ID"""
unique_id = uuid.uuid4().hex[:8]
return f"conv_{user_id}_{course_id}_{unique_id}"
async def _get_conversation_history(
self,
conversation_id: str
) -> List[Dict[str, str]]:
"""
从 Redis 获取会话历史
Args:
conversation_id: 会话ID
Returns:
消息列表 [{"role": "user/assistant", "content": "..."}]
"""
try:
from app.core.redis import get_redis_client
redis = get_redis_client()
key = f"{self.CONVERSATION_KEY_PREFIX}{conversation_id}"
data = await redis.get(key)
if not data:
return []
history = json.loads(data)
return history
except RuntimeError:
# Redis 未初始化,返回空历史
logger.warning("Redis 未初始化,无法获取会话历史")
return []
except Exception as e:
logger.warning(f"获取会话历史失败: {e}")
return []
async def _save_conversation_history(
self,
conversation_id: str,
user_message: str,
assistant_message: str
) -> None:
"""
保存对话历史到 Redis
Args:
conversation_id: 会话ID
user_message: 用户消息
assistant_message: AI 回复
"""
try:
from app.core.redis import get_redis_client
redis = get_redis_client()
key = f"{self.CONVERSATION_KEY_PREFIX}{conversation_id}"
# 获取现有历史
history = await self._get_conversation_history(conversation_id)
# 添加新消息
history.append({"role": "user", "content": user_message})
history.append({"role": "assistant", "content": assistant_message})
# 限制历史长度
max_messages = CONVERSATION_WINDOW_SIZE * 2
if len(history) > max_messages:
history = history[-max_messages:]
# 保存到 Redis
await redis.setex(
key,
CONVERSATION_TTL,
json.dumps(history, ensure_ascii=False)
)
logger.debug(
f"保存会话历史成功 - conversation_id: {conversation_id}, "
f"messages: {len(history)}"
)
except RuntimeError:
# Redis 未初始化,跳过保存
logger.warning("Redis 未初始化,无法保存会话历史")
except Exception as e:
logger.warning(f"保存会话历史失败: {e}")
async def get_conversation_messages(
self,
conversation_id: str,
user_id: int
) -> List[Dict[str, Any]]:
"""
获取会话的历史消息
Args:
conversation_id: 会话ID
user_id: 用户ID用于权限验证
Returns:
消息列表
"""
# 验证会话ID是否属于该用户
if not conversation_id.startswith(f"conv_{user_id}_"):
logger.warning(
f"用户 {user_id} 尝试访问不属于自己的会话: {conversation_id}"
)
return []
history = await self._get_conversation_history(conversation_id)
# 格式化返回数据
messages = []
for i, msg in enumerate(history):
messages.append({
"id": i,
"role": msg["role"],
"content": msg["content"],
})
return messages
async def _add_to_conversation_index(
self,
user_id: int,
conversation_id: str,
course_id: int
) -> None:
"""
将会话添加到用户索引
Args:
user_id: 用户ID
conversation_id: 会话ID
course_id: 课程ID
"""
try:
from app.core.redis import get_redis_client
redis = get_redis_client()
# 1. 添加到用户的会话索引Sorted Setscore 为时间戳)
index_key = f"{CONVERSATION_INDEX_PREFIX}{user_id}{CONVERSATION_INDEX_SUFFIX}"
timestamp = time.time()
await redis.zadd(index_key, {conversation_id: timestamp})
await redis.expire(index_key, CONVERSATION_INDEX_TTL)
# 2. 保存会话元数据
meta_key = f"{CONVERSATION_META_PREFIX}{conversation_id}"
meta_data = {
"conversation_id": conversation_id,
"user_id": user_id,
"course_id": course_id,
"created_at": timestamp,
"updated_at": timestamp,
}
await redis.setex(
meta_key,
CONVERSATION_INDEX_TTL,
json.dumps(meta_data, ensure_ascii=False)
)
logger.debug(
f"会话已添加到索引 - user_id: {user_id}, conversation_id: {conversation_id}"
)
except RuntimeError:
logger.warning("Redis 未初始化,无法添加会话索引")
except Exception as e:
logger.warning(f"添加会话索引失败: {e}")
async def _update_conversation_index(
self,
user_id: int,
conversation_id: str
) -> None:
"""
更新会话的最后活跃时间
Args:
user_id: 用户ID
conversation_id: 会话ID
"""
try:
from app.core.redis import get_redis_client
redis = get_redis_client()
# 更新索引中的时间戳
index_key = f"{CONVERSATION_INDEX_PREFIX}{user_id}{CONVERSATION_INDEX_SUFFIX}"
timestamp = time.time()
await redis.zadd(index_key, {conversation_id: timestamp})
await redis.expire(index_key, CONVERSATION_INDEX_TTL)
# 更新元数据中的 updated_at
meta_key = f"{CONVERSATION_META_PREFIX}{conversation_id}"
meta_data = await redis.get(meta_key)
if meta_data:
meta = json.loads(meta_data)
meta["updated_at"] = timestamp
await redis.setex(
meta_key,
CONVERSATION_INDEX_TTL,
json.dumps(meta, ensure_ascii=False)
)
logger.debug(
f"会话索引已更新 - user_id: {user_id}, conversation_id: {conversation_id}"
)
except RuntimeError:
logger.warning("Redis 未初始化,无法更新会话索引")
except Exception as e:
logger.warning(f"更新会话索引失败: {e}")
async def list_user_conversations(
self,
user_id: int,
limit: int = 20
) -> List[Dict[str, Any]]:
"""
获取用户的会话列表
Args:
user_id: 用户ID
limit: 返回数量限制
Returns:
会话列表,按更新时间倒序
"""
try:
from app.core.redis import get_redis_client
redis = get_redis_client()
# 1. 从索引获取最近的会话ID列表倒序
index_key = f"{CONVERSATION_INDEX_PREFIX}{user_id}{CONVERSATION_INDEX_SUFFIX}"
conversation_ids = await redis.zrevrange(index_key, 0, limit - 1)
if not conversation_ids:
logger.debug(f"用户 {user_id} 没有会话记录")
return []
# 2. 获取每个会话的元数据和最后消息
conversations = []
for conv_id in conversation_ids:
# 确保是字符串
if isinstance(conv_id, bytes):
conv_id = conv_id.decode('utf-8')
# 获取元数据
meta_key = f"{CONVERSATION_META_PREFIX}{conv_id}"
meta_data = await redis.get(meta_key)
if meta_data:
if isinstance(meta_data, bytes):
meta_data = meta_data.decode('utf-8')
meta = json.loads(meta_data)
else:
# 从 conversation_id 解析 course_id
# 格式: conv_{user_id}_{course_id}_{uuid}
parts = conv_id.split('_')
course_id = int(parts[2]) if len(parts) >= 3 else 0
meta = {
"conversation_id": conv_id,
"user_id": user_id,
"course_id": course_id,
"created_at": time.time(),
"updated_at": time.time(),
}
# 获取最后一条消息作为预览
history = await self._get_conversation_history(conv_id)
last_message = ""
if history:
# 获取最后一条 assistant 消息
for msg in reversed(history):
if msg["role"] == "assistant":
last_message = msg["content"][:100] # 截取前100字符
if len(msg["content"]) > 100:
last_message += "..."
break
conversations.append({
"id": conv_id,
"course_id": meta.get("course_id"),
"created_at": meta.get("created_at"),
"updated_at": meta.get("updated_at"),
"last_message": last_message,
"message_count": len(history),
})
logger.info(f"获取用户会话列表 - user_id: {user_id}, count: {len(conversations)}")
return conversations
except RuntimeError:
logger.warning("Redis 未初始化,无法获取会话列表")
return []
except Exception as e:
logger.warning(f"获取会话列表失败: {e}")
return []
# 别名方法,供 API 层调用
async def get_conversations(
self,
user_id: int,
course_id: Optional[int] = None,
limit: int = 20
) -> List[Dict[str, Any]]:
"""
获取用户的会话列表(别名方法)
Args:
user_id: 用户ID
course_id: 课程ID可选用于过滤
limit: 返回数量限制
Returns:
会话列表
"""
conversations = await self.list_user_conversations(user_id, limit)
# 如果指定了 course_id进行过滤
if course_id is not None:
conversations = [
c for c in conversations
if c.get("course_id") == course_id
]
return conversations
async def get_messages(
self,
conversation_id: str,
user_id: int,
limit: int = 50
) -> List[Dict[str, Any]]:
"""
获取会话历史消息(别名方法)
Args:
conversation_id: 会话ID
user_id: 用户ID用于权限验证
limit: 返回数量限制
Returns:
消息列表
"""
messages = await self.get_conversation_messages(conversation_id, limit)
return messages
# 创建全局实例
course_chat_service_v2 = CourseChatServiceV2()

View File

@@ -0,0 +1,61 @@
"""
Coze AI 服务模块
"""
from .client import get_coze_client, get_auth_manager, get_bot_config, get_workspace_id
from .service import get_coze_service, CozeService
from .models import (
SessionType,
MessageRole,
ContentType,
StreamEventType,
CozeSession,
CozeMessage,
StreamEvent,
CreateSessionRequest,
CreateSessionResponse,
SendMessageRequest,
EndSessionRequest,
EndSessionResponse,
)
from .exceptions import (
CozeException,
CozeAuthError,
CozeAPIError,
CozeRateLimitError,
CozeTimeoutError,
CozeStreamError,
map_coze_error_to_exception,
)
__all__ = [
# Client
"get_coze_client",
"get_auth_manager",
"get_bot_config",
"get_workspace_id",
# Service
"get_coze_service",
"CozeService",
# Models
"SessionType",
"MessageRole",
"ContentType",
"StreamEventType",
"CozeSession",
"CozeMessage",
"StreamEvent",
"CreateSessionRequest",
"CreateSessionResponse",
"SendMessageRequest",
"EndSessionRequest",
"EndSessionResponse",
# Exceptions
"CozeException",
"CozeAuthError",
"CozeAPIError",
"CozeRateLimitError",
"CozeTimeoutError",
"CozeStreamError",
"map_coze_error_to_exception",
]

View File

@@ -0,0 +1,203 @@
"""
Coze AI 客户端管理
负责管理 Coze API 的认证和客户端实例
"""
from functools import lru_cache
from typing import Optional, Dict, Any
import logging
from pathlib import Path
from cozepy import Coze, TokenAuth, JWTAuth, COZE_CN_BASE_URL
from app.core.config import get_settings
logger = logging.getLogger(__name__)
class CozeAuthManager:
"""Coze 认证管理器"""
def __init__(self):
self.settings = get_settings()
self._client: Optional[Coze] = None
def _create_pat_auth(self) -> TokenAuth:
"""创建个人访问令牌认证"""
if not self.settings.COZE_API_TOKEN:
raise ValueError("COZE_API_TOKEN 未配置")
return TokenAuth(token=self.settings.COZE_API_TOKEN)
def _create_oauth_auth(self) -> JWTAuth:
"""创建 OAuth 认证"""
if not all(
[
self.settings.COZE_OAUTH_CLIENT_ID,
self.settings.COZE_OAUTH_PUBLIC_KEY_ID,
self.settings.COZE_OAUTH_PRIVATE_KEY_PATH,
]
):
raise ValueError("OAuth 配置不完整")
# 读取私钥
private_key_path = Path(self.settings.COZE_OAUTH_PRIVATE_KEY_PATH)
if not private_key_path.exists():
raise FileNotFoundError(f"私钥文件不存在: {private_key_path}")
with open(private_key_path, "r") as f:
private_key = f.read()
try:
return JWTAuth(
client_id=self.settings.COZE_OAUTH_CLIENT_ID,
private_key=private_key,
public_key_id=self.settings.COZE_OAUTH_PUBLIC_KEY_ID,
base_url=self.settings.COZE_API_BASE or COZE_CN_BASE_URL, # 使用中国区API
)
except Exception as e:
logger.error(f"创建 OAuth 认证失败: {e}")
raise
def get_client(self, force_new: bool = False) -> Coze:
"""
获取 Coze 客户端实例
Args:
force_new: 是否强制创建新客户端用于长时间运行的请求避免token过期
认证优先级:
1. OAuth推荐配置完整时使用自动刷新token
2. PAT仅当OAuth未配置时使用注意PAT会过期
"""
if self._client is not None and not force_new:
return self._client
auth = None
auth_type = None
# 检查 OAuth 配置是否完整
oauth_configured = all([
self.settings.COZE_OAUTH_CLIENT_ID,
self.settings.COZE_OAUTH_PUBLIC_KEY_ID,
self.settings.COZE_OAUTH_PRIVATE_KEY_PATH,
])
if oauth_configured:
# OAuth 配置完整,必须使用 OAuth不fallback到PAT
try:
auth = self._create_oauth_auth()
auth_type = "OAuth"
logger.info("使用 OAuth 认证")
except Exception as e:
# OAuth 配置完整但创建失败直接抛出异常不fallback到可能过期的PAT
logger.error(f"OAuth 认证创建失败: {e}")
raise ValueError(f"OAuth 认证失败,请检查私钥文件和配置: {e}")
else:
# OAuth 未配置,使用 PAT
if self.settings.COZE_API_TOKEN:
auth = self._create_pat_auth()
auth_type = "PAT"
logger.warning("使用 PAT 认证注意PAT会过期建议配置OAuth")
else:
raise ValueError("Coze 认证未配置:需要配置 OAuth 或 PAT Token")
# 创建客户端
client = Coze(
auth=auth, base_url=self.settings.COZE_API_BASE or COZE_CN_BASE_URL
)
logger.debug(f"Coze客户端创建成功认证方式: {auth_type}, force_new: {force_new}")
# 只有非强制创建时才缓存
if not force_new:
self._client = client
return client
def reset(self):
"""重置客户端实例"""
self._client = None
def get_oauth_token(self) -> str:
"""
获取OAuth JWT Token用于前端直连
Returns:
JWT token字符串
"""
if not all([
self.settings.COZE_OAUTH_CLIENT_ID,
self.settings.COZE_OAUTH_PUBLIC_KEY_ID,
self.settings.COZE_OAUTH_PRIVATE_KEY_PATH,
]):
raise ValueError("OAuth 配置不完整")
# 读取私钥
private_key_path = Path(self.settings.COZE_OAUTH_PRIVATE_KEY_PATH)
if not private_key_path.exists():
raise FileNotFoundError(f"私钥文件不存在: {private_key_path}")
with open(private_key_path, "r") as f:
private_key = f.read()
# 创建JWTAuth实例必须指定中国区base_url
jwt_auth = JWTAuth(
client_id=self.settings.COZE_OAUTH_CLIENT_ID,
private_key=private_key,
public_key_id=self.settings.COZE_OAUTH_PUBLIC_KEY_ID,
base_url=self.settings.COZE_API_BASE or COZE_CN_BASE_URL, # 使用中国区API
)
# 获取tokenJWTAuth内部会自动生成
# JWTAuth.token属性返回已签名的JWT
return jwt_auth.token
@lru_cache()
def get_auth_manager() -> CozeAuthManager:
"""获取认证管理器单例"""
return CozeAuthManager()
def get_coze_client(force_new: bool = False) -> Coze:
"""
获取 Coze 客户端
Args:
force_new: 是否强制创建新客户端(用于工作流等长时间运行的请求)
"""
return get_auth_manager().get_client(force_new=force_new)
def get_workspace_id() -> str:
"""获取工作空间 ID"""
settings = get_settings()
if not settings.COZE_WORKSPACE_ID:
raise ValueError("COZE_WORKSPACE_ID 未配置")
return settings.COZE_WORKSPACE_ID
def get_bot_config(session_type: str) -> Dict[str, Any]:
"""
根据会话类型获取 Bot 配置
Args:
session_type: 会话类型 (course_chat 或 training)
Returns:
包含 bot_id 等配置的字典
"""
settings = get_settings()
if session_type == "course_chat":
bot_id = settings.COZE_CHAT_BOT_ID
if not bot_id:
raise ValueError("COZE_CHAT_BOT_ID 未配置")
elif session_type == "training":
bot_id = settings.COZE_TRAINING_BOT_ID
if not bot_id:
raise ValueError("COZE_TRAINING_BOT_ID 未配置")
else:
raise ValueError(f"不支持的会话类型: {session_type}")
return {"bot_id": bot_id, "workspace_id": settings.COZE_WORKSPACE_ID}

View File

@@ -0,0 +1,44 @@
"""Coze客户端临时模拟等Agent-Coze实现后替换"""
import logging
from typing import Dict, Any, Optional
logger = logging.getLogger(__name__)
class CozeClient:
"""
Coze客户端模拟类
TODO: 等Agent-Coze模块实现后这个类将被真实的Coze网关客户端替换
"""
async def create_conversation(
self, bot_id: str, user_id: str, meta_data: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""创建会话(模拟)"""
logger.info(f"模拟创建Coze会话: bot_id={bot_id}, user_id={user_id}")
# 返回模拟的会话信息
return {
"conversation_id": f"mock_conversation_{user_id}_{bot_id[:8]}",
"bot_id": bot_id,
"status": "active",
}
async def send_message(
self, conversation_id: str, content: str, message_type: str = "text"
) -> Dict[str, Any]:
"""发送消息(模拟)"""
logger.info(f"模拟发送消息到会话 {conversation_id}: {content[:50]}...")
# 返回模拟的消息响应
return {
"message_id": f"mock_msg_{conversation_id[:8]}",
"content": f"这是对'{content[:30]}...'的模拟回复",
"role": "assistant",
}
async def end_conversation(self, conversation_id: str) -> Dict[str, Any]:
"""结束会话(模拟)"""
logger.info(f"模拟结束会话: {conversation_id}")
return {"status": "completed", "conversation_id": conversation_id}

View File

@@ -0,0 +1,101 @@
"""
Coze 服务异常定义
"""
from typing import Optional, Dict, Any
class CozeException(Exception):
"""Coze 服务基础异常"""
def __init__(
self,
message: str,
code: Optional[str] = None,
status_code: Optional[int] = None,
details: Optional[Dict[str, Any]] = None,
):
super().__init__(message)
self.message = message
self.code = code
self.status_code = status_code
self.details = details or {}
class CozeAuthError(CozeException):
"""认证异常"""
pass
class CozeAPIError(CozeException):
"""API 调用异常"""
pass
class CozeRateLimitError(CozeException):
"""速率限制异常"""
pass
class CozeTimeoutError(CozeException):
"""超时异常"""
pass
class CozeStreamError(CozeException):
"""流式响应异常"""
pass
def map_coze_error_to_exception(error: Exception) -> CozeException:
"""
将 Coze SDK 错误映射为统一异常
Args:
error: 原始异常
Returns:
CozeException: 映射后的异常
"""
error_message = str(error)
# 根据错误消息判断错误类型
if (
"authentication" in error_message.lower()
or "unauthorized" in error_message.lower()
):
return CozeAuthError(
message="Coze 认证失败",
code="COZE_AUTH_ERROR",
status_code=401,
details={"original_error": error_message},
)
if "rate limit" in error_message.lower():
return CozeRateLimitError(
message="Coze API 速率限制",
code="COZE_RATE_LIMIT",
status_code=429,
details={"original_error": error_message},
)
if "timeout" in error_message.lower():
return CozeTimeoutError(
message="Coze API 调用超时",
code="COZE_TIMEOUT",
status_code=504,
details={"original_error": error_message},
)
# 默认映射为 API 错误
return CozeAPIError(
message="Coze API 调用失败",
code="COZE_API_ERROR",
status_code=500,
details={"original_error": error_message},
)

View File

@@ -0,0 +1,136 @@
"""
Coze 服务数据模型
"""
from typing import Optional, List, Dict, Any, Literal
from datetime import datetime
from pydantic import BaseModel, Field
from enum import Enum
class SessionType(str, Enum):
"""会话类型"""
COURSE_CHAT = "course_chat" # 课程对话
TRAINING = "training" # 陪练会话
EXAM = "exam" # 考试会话
class MessageRole(str, Enum):
"""消息角色"""
USER = "user"
ASSISTANT = "assistant"
SYSTEM = "system"
class ContentType(str, Enum):
"""内容类型"""
TEXT = "text"
CARD = "card"
IMAGE = "image"
FILE = "file"
class StreamEventType(str, Enum):
"""流式事件类型"""
MESSAGE_START = "conversation.message.start"
MESSAGE_DELTA = "conversation.message.delta"
MESSAGE_COMPLETED = "conversation.message.completed"
ERROR = "error"
DONE = "done"
class CozeSession(BaseModel):
"""Coze 会话模型"""
session_id: str = Field(..., description="会话ID")
conversation_id: str = Field(..., description="Coze对话ID")
session_type: SessionType = Field(..., description="会话类型")
user_id: str = Field(..., description="用户ID")
bot_id: str = Field(..., description="Bot ID")
created_at: datetime = Field(default_factory=datetime.now, description="创建时间")
ended_at: Optional[datetime] = Field(None, description="结束时间")
metadata: Dict[str, Any] = Field(default_factory=dict, description="元数据")
class Config:
json_encoders = {datetime: lambda v: v.isoformat()}
class CozeMessage(BaseModel):
"""Coze 消息模型"""
message_id: str = Field(..., description="消息ID")
session_id: str = Field(..., description="会话ID")
role: MessageRole = Field(..., description="消息角色")
content: str = Field(..., description="消息内容")
content_type: ContentType = Field(ContentType.TEXT, description="内容类型")
created_at: datetime = Field(default_factory=datetime.now, description="创建时间")
metadata: Dict[str, Any] = Field(default_factory=dict, description="元数据")
class Config:
json_encoders = {datetime: lambda v: v.isoformat()}
class StreamEvent(BaseModel):
"""流式事件模型"""
event: StreamEventType = Field(..., description="事件类型")
data: Dict[str, Any] = Field(..., description="事件数据")
message_id: Optional[str] = Field(None, description="消息ID")
content: Optional[str] = Field(None, description="内容")
content_type: Optional[ContentType] = Field(None, description="内容类型")
role: Optional[MessageRole] = Field(None, description="角色")
error: Optional[str] = Field(None, description="错误信息")
class CreateSessionRequest(BaseModel):
"""创建会话请求"""
session_type: SessionType = Field(..., description="会话类型")
user_id: str = Field(..., description="用户ID")
course_id: Optional[str] = Field(None, description="课程ID (课程对话时必需)")
training_topic: Optional[str] = Field(None, description="陪练主题 (陪练时可选)")
metadata: Dict[str, Any] = Field(default_factory=dict, description="额外元数据")
class CreateSessionResponse(BaseModel):
"""创建会话响应"""
session_id: str = Field(..., description="会话ID")
conversation_id: str = Field(..., description="Coze对话ID")
bot_id: str = Field(..., description="Bot ID")
created_at: datetime = Field(..., description="创建时间")
class Config:
json_encoders = {datetime: lambda v: v.isoformat()}
class SendMessageRequest(BaseModel):
"""发送消息请求"""
session_id: str = Field(..., description="会话ID")
content: str = Field(..., description="消息内容")
file_ids: List[str] = Field(default_factory=list, description="附件ID列表")
stream: bool = Field(True, description="是否流式响应")
class EndSessionRequest(BaseModel):
"""结束会话请求"""
reason: Optional[str] = Field(None, description="结束原因")
feedback: Optional[Dict[str, Any]] = Field(None, description="用户反馈")
class EndSessionResponse(BaseModel):
"""结束会话响应"""
session_id: str = Field(..., description="会话ID")
ended_at: datetime = Field(..., description="结束时间")
duration_seconds: int = Field(..., description="会话时长(秒)")
message_count: int = Field(..., description="消息数量")
class Config:
json_encoders = {datetime: lambda v: v.isoformat()}

View File

@@ -0,0 +1,335 @@
"""
Coze 服务层实现
处理会话管理、消息发送、流式响应等核心功能
"""
import asyncio
import json
import logging
import uuid
from typing import AsyncIterator, Dict, Any, List, Optional
from datetime import datetime
from cozepy import ChatEventType, Message, MessageContentType
from .client import get_coze_client, get_bot_config, get_workspace_id
from .models import (
CozeSession,
CozeMessage,
StreamEvent,
SessionType,
MessageRole,
ContentType,
StreamEventType,
CreateSessionRequest,
CreateSessionResponse,
SendMessageRequest,
EndSessionRequest,
EndSessionResponse,
)
from .exceptions import (
CozeAPIError,
CozeStreamError,
CozeTimeoutError,
map_coze_error_to_exception,
)
logger = logging.getLogger(__name__)
class CozeService:
"""Coze 服务类"""
def __init__(self):
self.client = get_coze_client()
self.bot_config = get_bot_config()
self.workspace_id = get_workspace_id()
# 内存中的会话存储(生产环境应使用 Redis
self._sessions: Dict[str, CozeSession] = {}
self._messages: Dict[str, List[CozeMessage]] = {}
async def create_session(
self, request: CreateSessionRequest
) -> CreateSessionResponse:
"""
创建新会话
Args:
request: 创建会话请求
Returns:
CreateSessionResponse: 会话信息
"""
try:
# 根据会话类型选择 Bot
bot_id = self._get_bot_id_by_type(request.session_type)
# 创建 Coze 对话
conversation = await asyncio.to_thread(
self.client.conversations.create, bot_id=bot_id
)
# 创建本地会话记录
session = CozeSession(
session_id=str(uuid.uuid4()),
conversation_id=conversation.id,
session_type=request.session_type,
user_id=request.user_id,
bot_id=bot_id,
metadata=request.metadata,
)
# 保存会话
self._sessions[session.session_id] = session
self._messages[session.session_id] = []
logger.info(
f"创建会话成功",
extra={
"session_id": session.session_id,
"conversation_id": conversation.id,
"session_type": request.session_type.value,
"user_id": request.user_id,
},
)
return CreateSessionResponse(
session_id=session.session_id,
conversation_id=session.conversation_id,
bot_id=session.bot_id,
created_at=session.created_at,
)
except Exception as e:
logger.error(f"创建会话失败: {e}", exc_info=True)
raise map_coze_error_to_exception(e)
async def send_message(
self, request: SendMessageRequest
) -> AsyncIterator[StreamEvent]:
"""
发送消息并处理流式响应
Args:
request: 发送消息请求
Yields:
StreamEvent: 流式事件
"""
session = self._get_session(request.session_id)
if not session:
raise CozeAPIError(f"会话不存在: {request.session_id}")
# 记录用户消息
user_message = CozeMessage(
message_id=str(uuid.uuid4()),
session_id=session.session_id,
role=MessageRole.USER,
content=request.content,
)
self._messages[session.session_id].append(user_message)
try:
# 构建消息历史
messages = self._build_message_history(session.session_id)
# 调用 Coze API
stream = await asyncio.to_thread(
self.client.chat.stream,
bot_id=session.bot_id,
conversation_id=session.conversation_id,
additional_messages=messages,
auto_save_history=True,
)
# 处理流式响应
async for event in self._process_stream(stream, session.session_id):
yield event
except asyncio.TimeoutError:
logger.error(f"消息发送超时: session_id={request.session_id}")
raise CozeTimeoutError("消息处理超时")
except Exception as e:
logger.error(f"发送消息失败: {e}", exc_info=True)
raise map_coze_error_to_exception(e)
async def end_session(
self, session_id: str, request: EndSessionRequest
) -> EndSessionResponse:
"""
结束会话
Args:
session_id: 会话ID
request: 结束会话请求
Returns:
EndSessionResponse: 结束会话响应
"""
session = self._get_session(session_id)
if not session:
raise CozeAPIError(f"会话不存在: {session_id}")
# 更新会话状态
session.ended_at = datetime.now()
# 计算会话统计
duration_seconds = int((session.ended_at - session.created_at).total_seconds())
message_count = len(self._messages.get(session_id, []))
# 记录结束原因和反馈
if request.reason:
session.metadata["end_reason"] = request.reason
if request.feedback:
session.metadata["feedback"] = request.feedback
logger.info(
f"会话结束",
extra={
"session_id": session_id,
"duration_seconds": duration_seconds,
"message_count": message_count,
"reason": request.reason,
},
)
return EndSessionResponse(
session_id=session_id,
ended_at=session.ended_at,
duration_seconds=duration_seconds,
message_count=message_count,
)
async def get_session_messages(
self, session_id: str, limit: int = 50, offset: int = 0
) -> List[CozeMessage]:
"""获取会话消息历史"""
messages = self._messages.get(session_id, [])
return messages[offset : offset + limit]
def _get_bot_id_by_type(self, session_type: SessionType) -> str:
"""根据会话类型获取 Bot ID"""
mapping = {
SessionType.COURSE_CHAT: self.bot_config["course_chat"],
SessionType.TRAINING: self.bot_config["training"],
SessionType.EXAM: self.bot_config["exam"],
}
return mapping.get(session_type, self.bot_config["training"])
def _get_session(self, session_id: str) -> Optional[CozeSession]:
"""获取会话"""
return self._sessions.get(session_id)
def _build_message_history(self, session_id: str) -> List[Message]:
"""构建消息历史"""
messages = self._messages.get(session_id, [])
history = []
for msg in messages[-10:]: # 只发送最近10条消息作为上下文
history.append(
Message(
role=msg.role.value,
content=msg.content,
content_type=MessageContentType.TEXT,
)
)
return history
async def _process_stream(
self, stream, session_id: str
) -> AsyncIterator[StreamEvent]:
"""处理流式响应"""
assistant_message_id = str(uuid.uuid4())
accumulated_content = []
content_type = ContentType.TEXT
try:
for event in stream:
if event.event == ChatEventType.CONVERSATION_MESSAGE_DELTA:
# 消息片段
content = event.message.content
accumulated_content.append(content)
# 检测卡片类型
if (
hasattr(event.message, "content_type")
and event.message.content_type == "card"
):
content_type = ContentType.CARD
yield StreamEvent(
event=StreamEventType.MESSAGE_DELTA,
data={
"conversation_id": event.conversation_id,
"message_id": assistant_message_id,
"content": content,
"content_type": content_type.value,
},
message_id=assistant_message_id,
content=content,
content_type=content_type,
role=MessageRole.ASSISTANT,
)
elif event.event == ChatEventType.CONVERSATION_MESSAGE_COMPLETED:
# 消息完成
full_content = "".join(accumulated_content)
# 保存助手消息
assistant_message = CozeMessage(
message_id=assistant_message_id,
session_id=session_id,
role=MessageRole.ASSISTANT,
content=full_content,
content_type=content_type,
)
self._messages[session_id].append(assistant_message)
yield StreamEvent(
event=StreamEventType.MESSAGE_COMPLETED,
data={
"conversation_id": event.conversation_id,
"message_id": assistant_message_id,
"content": full_content,
"content_type": content_type.value,
"usage": getattr(event, "usage", {}),
},
message_id=assistant_message_id,
content=full_content,
content_type=content_type,
role=MessageRole.ASSISTANT,
)
elif event.event == ChatEventType.ERROR:
# 错误事件
yield StreamEvent(
event=StreamEventType.ERROR,
data={"error": str(event)},
error=str(event),
)
except Exception as e:
logger.error(f"流式处理错误: {e}", exc_info=True)
yield StreamEvent(
event=StreamEventType.ERROR, data={"error": str(e)}, error=str(e)
)
finally:
# 发送结束事件
yield StreamEvent(
event=StreamEventType.DONE, data={"session_id": session_id}
)
# 全局服务实例
_service: Optional[CozeService] = None
def get_coze_service() -> CozeService:
"""获取 Coze 服务单例"""
global _service
if _service is None:
_service = CozeService()
return _service

View File

@@ -0,0 +1,512 @@
"""
试题生成服务 V2 - Python 原生实现
功能:
- 根据岗位和知识点动态生成考试题目
- 支持错题重出模式
- 调用 AI 生成并解析 JSON 结果
提供稳定可靠的试题生成能力。
"""
import json
import logging
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.exceptions import ExternalServiceError
from .ai_service import AIService, AIResponse
from .llm_json_parser import parse_with_fallback, clean_llm_output
from .prompts.exam_generator_prompts import (
SYSTEM_PROMPT,
USER_PROMPT,
MISTAKE_REGEN_SYSTEM_PROMPT,
MISTAKE_REGEN_USER_PROMPT,
QUESTION_SCHEMA,
DEFAULT_QUESTION_COUNTS,
DEFAULT_DIFFICULTY_LEVEL,
)
logger = logging.getLogger(__name__)
@dataclass
class ExamGeneratorConfig:
"""考试生成配置"""
course_id: int
position_id: int
single_choice_count: int = DEFAULT_QUESTION_COUNTS["single_choice_count"]
multiple_choice_count: int = DEFAULT_QUESTION_COUNTS["multiple_choice_count"]
true_false_count: int = DEFAULT_QUESTION_COUNTS["true_false_count"]
fill_blank_count: int = DEFAULT_QUESTION_COUNTS["fill_blank_count"]
essay_count: int = DEFAULT_QUESTION_COUNTS["essay_count"]
difficulty_level: int = DEFAULT_DIFFICULTY_LEVEL
mistake_records: str = ""
@property
def total_count(self) -> int:
"""计算总题量"""
return (
self.single_choice_count +
self.multiple_choice_count +
self.true_false_count +
self.fill_blank_count +
self.essay_count
)
@property
def has_mistakes(self) -> bool:
"""是否有错题记录"""
return bool(self.mistake_records and self.mistake_records.strip())
class ExamGeneratorService:
"""
试题生成服务 V2
使用 Python 原生实现。
使用示例:
```python
service = ExamGeneratorService()
result = await service.generate_exam(
db=db_session,
config=ExamGeneratorConfig(
course_id=1,
position_id=1,
single_choice_count=5,
multiple_choice_count=3,
difficulty_level=3
)
)
```
"""
def __init__(self):
"""初始化服务"""
self.ai_service = AIService(module_code="exam_generator")
async def generate_exam(
self,
db: AsyncSession,
config: ExamGeneratorConfig
) -> Dict[str, Any]:
"""
生成考试题目(主入口)
Args:
db: 数据库会话
config: 考试生成配置
Returns:
生成结果,包含 success、questions、total_count 等字段
"""
try:
logger.info(
f"开始生成试题 - course_id: {config.course_id}, position_id: {config.position_id}, "
f"total_count: {config.total_count}, has_mistakes: {config.has_mistakes}"
)
# 根据是否有错题记录,走不同分支
if config.has_mistakes:
return await self._regenerate_from_mistakes(db, config)
else:
return await self._generate_from_knowledge(db, config)
except ExternalServiceError:
raise
except Exception as e:
logger.error(
f"试题生成失败 - course_id: {config.course_id}, error: {e}",
exc_info=True
)
raise ExternalServiceError(f"试题生成失败: {e}")
async def _generate_from_knowledge(
self,
db: AsyncSession,
config: ExamGeneratorConfig
) -> Dict[str, Any]:
"""
基于知识点生成题目(无错题模式)
流程:
1. 查询岗位信息
2. 随机查询知识点
3. 调用 AI 生成题目
4. 解析并返回结果
"""
# 1. 查询岗位信息
position_info = await self._query_position(db, config.position_id)
if not position_info:
raise ExternalServiceError(f"岗位不存在: position_id={config.position_id}")
logger.info(f"岗位信息: {position_info.get('name', 'unknown')}")
# 2. 随机查询知识点
knowledge_points = await self._query_knowledge_points(
db,
config.course_id,
config.total_count
)
if not knowledge_points:
raise ExternalServiceError(
f"课程没有可用的知识点: course_id={config.course_id}"
)
logger.info(f"查询到 {len(knowledge_points)} 个知识点")
# 3. 构建提示词
system_prompt = SYSTEM_PROMPT.format(
total_count=config.total_count,
single_choice_count=config.single_choice_count,
multiple_choice_count=config.multiple_choice_count,
true_false_count=config.true_false_count,
fill_blank_count=config.fill_blank_count,
essay_count=config.essay_count,
difficulty_level=config.difficulty_level,
)
user_prompt = USER_PROMPT.format(
position_info=self._format_position_info(position_info),
knowledge_points=self._format_knowledge_points(knowledge_points),
)
# 4. 调用 AI 生成
ai_response = await self._call_ai_generate(system_prompt, user_prompt)
logger.info(
f"AI 生成完成 - provider: {ai_response.provider}, "
f"tokens: {ai_response.total_tokens}, latency: {ai_response.latency_ms}ms"
)
# 5. 解析题目
questions = self._parse_questions(ai_response.content)
logger.info(f"试题解析成功,数量: {len(questions)}")
return {
"success": True,
"questions": questions,
"total_count": len(questions),
"mode": "knowledge_based",
"ai_provider": ai_response.provider,
"ai_model": ai_response.model,
"ai_tokens": ai_response.total_tokens,
"ai_latency_ms": ai_response.latency_ms,
}
async def _regenerate_from_mistakes(
self,
db: AsyncSession,
config: ExamGeneratorConfig
) -> Dict[str, Any]:
"""
错题重出模式
流程:
1. 构建错题重出提示词
2. 调用 AI 生成新题
3. 解析并返回结果
"""
logger.info("进入错题重出模式")
# 1. 构建提示词
system_prompt = MISTAKE_REGEN_SYSTEM_PROMPT.format(
difficulty_level=config.difficulty_level,
)
user_prompt = MISTAKE_REGEN_USER_PROMPT.format(
mistake_records=config.mistake_records,
)
# 2. 调用 AI 生成
ai_response = await self._call_ai_generate(system_prompt, user_prompt)
logger.info(
f"错题重出完成 - provider: {ai_response.provider}, "
f"tokens: {ai_response.total_tokens}, latency: {ai_response.latency_ms}ms"
)
# 3. 解析题目
questions = self._parse_questions(ai_response.content)
logger.info(f"错题重出解析成功,数量: {len(questions)}")
return {
"success": True,
"questions": questions,
"total_count": len(questions),
"mode": "mistake_regen",
"ai_provider": ai_response.provider,
"ai_model": ai_response.model,
"ai_tokens": ai_response.total_tokens,
"ai_latency_ms": ai_response.latency_ms,
}
async def _query_position(
self,
db: AsyncSession,
position_id: int
) -> Optional[Dict[str, Any]]:
"""
查询岗位信息
SQLSELECT id, name, description, skills, level FROM positions
WHERE id = :id AND is_deleted = FALSE
"""
try:
result = await db.execute(
text("""
SELECT id, name, description, skills, level
FROM positions
WHERE id = :position_id AND is_deleted = FALSE
"""),
{"position_id": position_id}
)
row = result.fetchone()
if not row:
return None
# 将 Row 转换为字典
return {
"id": row[0],
"name": row[1],
"description": row[2],
"skills": row[3], # JSON 字段
"level": row[4],
}
except Exception as e:
logger.error(f"查询岗位信息失败: {e}")
raise ExternalServiceError(f"查询岗位信息失败: {e}")
async def _query_knowledge_points(
self,
db: AsyncSession,
course_id: int,
limit: int
) -> List[Dict[str, Any]]:
"""
随机查询知识点
SQLSELECT kp.id, kp.name, kp.description, kp.topic_relation
FROM knowledge_points kp
INNER JOIN course_materials cm ON kp.material_id = cm.id
WHERE kp.course_id = :course_id
AND kp.is_deleted = FALSE
AND cm.is_deleted = FALSE
ORDER BY RAND()
LIMIT :limit
"""
try:
result = await db.execute(
text("""
SELECT kp.id, kp.name, kp.description, kp.topic_relation
FROM knowledge_points kp
INNER JOIN course_materials cm ON kp.material_id = cm.id
WHERE kp.course_id = :course_id
AND kp.is_deleted = FALSE
AND cm.is_deleted = FALSE
ORDER BY RAND()
LIMIT :limit
"""),
{"course_id": course_id, "limit": limit}
)
rows = result.fetchall()
return [
{
"id": row[0],
"name": row[1],
"description": row[2],
"topic_relation": row[3],
}
for row in rows
]
except Exception as e:
logger.error(f"查询知识点失败: {e}")
raise ExternalServiceError(f"查询知识点失败: {e}")
async def _call_ai_generate(
self,
system_prompt: str,
user_prompt: str
) -> AIResponse:
"""调用 AI 生成题目"""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
response = await self.ai_service.chat(
messages=messages,
temperature=0.7, # 适当的创造性
prompt_name="exam_generator"
)
return response
def _parse_questions(self, ai_output: str) -> List[Dict[str, Any]]:
"""
解析 AI 输出的题目 JSON
使用 LLM JSON Parser 进行多层兜底解析
"""
# 先清洗输出
cleaned_output, rules = clean_llm_output(ai_output)
if rules:
logger.debug(f"AI 输出已清洗: {rules}")
# 使用带 Schema 校验的解析
questions = parse_with_fallback(
cleaned_output,
schema=QUESTION_SCHEMA,
default=[],
validate_schema=True,
on_error="default"
)
# 后处理:确保每个题目有必要字段
processed_questions = []
for i, q in enumerate(questions):
if isinstance(q, dict):
# 确保有 num 字段
if "num" not in q:
q["num"] = i + 1
# 确保 num 是整数
try:
q["num"] = int(q["num"])
except (ValueError, TypeError):
q["num"] = i + 1
# 确保有 type 字段
if "type" not in q:
# 根据是否有 options 推断类型
if q.get("topic", {}).get("options"):
q["type"] = "single_choice"
else:
q["type"] = "essay"
# 确保 knowledge_point_id 是整数或 None
kp_id = q.get("knowledge_point_id")
if kp_id is not None:
try:
q["knowledge_point_id"] = int(kp_id)
except (ValueError, TypeError):
q["knowledge_point_id"] = None
# 验证必要字段
if q.get("topic") and q.get("correct"):
processed_questions.append(q)
else:
logger.warning(f"题目缺少必要字段,已跳过: {q}")
if not processed_questions:
logger.warning("未能解析出有效的题目")
return processed_questions
def _format_position_info(self, position: Dict[str, Any]) -> str:
"""格式化岗位信息为文本"""
lines = [
f"岗位名称: {position.get('name', '未知')}",
f"岗位等级: {position.get('level', '未设置')}",
]
if position.get('description'):
lines.append(f"岗位描述: {position['description']}")
skills = position.get('skills')
if skills:
# skills 可能是 JSON 字符串或列表
if isinstance(skills, str):
try:
skills = json.loads(skills)
except json.JSONDecodeError:
skills = [skills]
if isinstance(skills, list) and skills:
lines.append(f"核心技能: {', '.join(str(s) for s in skills)}")
return '\n'.join(lines)
def _format_knowledge_points(self, knowledge_points: List[Dict[str, Any]]) -> str:
"""格式化知识点列表为文本"""
lines = []
for kp in knowledge_points:
kp_text = f"【知识点 ID: {kp['id']}{kp['name']}"
if kp.get('description'):
kp_text += f"\n{kp['description']}"
if kp.get('topic_relation'):
kp_text += f"\n关系描述: {kp['topic_relation']}"
lines.append(kp_text)
return '\n\n'.join(lines)
# 创建全局实例
exam_generator_service = ExamGeneratorService()
# ==================== 便捷函数 ====================
async def generate_exam(
db: AsyncSession,
course_id: int,
position_id: int,
single_choice_count: int = 4,
multiple_choice_count: int = 2,
true_false_count: int = 1,
fill_blank_count: int = 2,
essay_count: int = 1,
difficulty_level: int = 3,
mistake_records: str = ""
) -> Dict[str, Any]:
"""
便捷函数:生成考试题目
Args:
db: 数据库会话
course_id: 课程ID
position_id: 岗位ID
single_choice_count: 单选题数量
multiple_choice_count: 多选题数量
true_false_count: 判断题数量
fill_blank_count: 填空题数量
essay_count: 问答题数量
difficulty_level: 难度等级(1-5)
mistake_records: 错题记录JSON字符串
Returns:
生成结果
"""
config = ExamGeneratorConfig(
course_id=course_id,
position_id=position_id,
single_choice_count=single_choice_count,
multiple_choice_count=multiple_choice_count,
true_false_count=true_false_count,
fill_blank_count=fill_blank_count,
essay_count=essay_count,
difficulty_level=difficulty_level,
mistake_records=mistake_records,
)
return await exam_generator_service.generate_exam(db, config)

View File

@@ -0,0 +1,548 @@
"""
知识点分析服务 V2 - Python 原生实现
功能:
- 读取文档内容PDF/Word/TXT
- 调用 AI 分析提取知识点
- 解析 JSON 结果
- 写入数据库
提供稳定可靠的知识点分析能力。
"""
import logging
import os
from pathlib import Path
from typing import Any, Dict, List, Optional
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import settings
from app.core.exceptions import ExternalServiceError
from app.schemas.course import KnowledgePointCreate
from .ai_service import AIService, AIResponse
from .llm_json_parser import parse_with_fallback, clean_llm_output
from .prompts.knowledge_analysis_prompts import (
SYSTEM_PROMPT,
USER_PROMPT,
KNOWLEDGE_POINT_SCHEMA,
DEFAULT_KNOWLEDGE_TYPE,
)
logger = logging.getLogger(__name__)
# 配置常量
STATIC_UPLOADS_PREFIX = '/static/uploads/'
MAX_CONTENT_LENGTH = 100000 # 最大文档内容长度(字符)
MAX_KNOWLEDGE_POINTS = 20 # 最大知识点数量
class KnowledgeAnalysisServiceV2:
"""
知识点分析服务 V2
使用 Python 原生实现。
使用示例:
```python
service = KnowledgeAnalysisServiceV2()
result = await service.analyze_course_material(
db=db_session,
course_id=1,
material_id=10,
file_url="/static/uploads/courses/1/doc.pdf",
course_title="医美产品知识",
user_id=1
)
```
"""
def __init__(self):
"""初始化服务"""
self.ai_service = AIService(module_code="knowledge_analysis")
self.upload_path = getattr(settings, 'UPLOAD_PATH', 'uploads')
async def analyze_course_material(
self,
db: AsyncSession,
course_id: int,
material_id: int,
file_url: str,
course_title: str,
user_id: int
) -> Dict[str, Any]:
"""
分析课程资料并提取知识点
Args:
db: 数据库会话
course_id: 课程ID
material_id: 资料ID
file_url: 文件URL相对路径
course_title: 课程标题
user_id: 用户ID
Returns:
分析结果,包含 success、knowledge_points_count 等字段
"""
try:
logger.info(
f"开始知识点分析 V2 - course_id: {course_id}, material_id: {material_id}, "
f"file_url: {file_url}"
)
# 1. 解析文件路径
file_path = self._resolve_file_path(file_url)
if not file_path.exists():
raise FileNotFoundError(f"文件不存在: {file_path}")
logger.info(f"文件路径解析成功: {file_path}")
# 2. 提取文档内容
content = await self._extract_document_content(file_path)
if not content or not content.strip():
raise ValueError("文档内容为空")
logger.info(f"文档内容提取成功,长度: {len(content)} 字符")
# 3. 调用 AI 分析
ai_response = await self._call_ai_analysis(content, course_title)
logger.info(
f"AI 分析完成 - provider: {ai_response.provider}, "
f"tokens: {ai_response.total_tokens}, latency: {ai_response.latency_ms}ms"
)
# 4. 解析 JSON 结果
knowledge_points = self._parse_knowledge_points(ai_response.content)
logger.info(f"知识点解析成功,数量: {len(knowledge_points)}")
# 5. 删除旧的知识点
await self._delete_old_knowledge_points(db, material_id)
# 6. 保存到数据库
saved_count = await self._save_knowledge_points(
db=db,
course_id=course_id,
material_id=material_id,
knowledge_points=knowledge_points,
user_id=user_id
)
logger.info(
f"知识点分析完成 - course_id: {course_id}, material_id: {material_id}, "
f"saved_count: {saved_count}"
)
return {
"success": True,
"status": "completed",
"knowledge_points_count": saved_count,
"ai_provider": ai_response.provider,
"ai_model": ai_response.model,
"ai_tokens": ai_response.total_tokens,
"ai_latency_ms": ai_response.latency_ms,
}
except FileNotFoundError as e:
logger.error(f"文件不存在: {e}")
raise ExternalServiceError(f"分析文件不存在: {e}")
except ValueError as e:
logger.error(f"参数错误: {e}")
raise ExternalServiceError(f"分析参数错误: {e}")
except Exception as e:
logger.error(
f"知识点分析失败 - course_id: {course_id}, material_id: {material_id}, "
f"error: {e}",
exc_info=True
)
raise ExternalServiceError(f"知识点分析失败: {e}")
def _resolve_file_path(self, file_url: str) -> Path:
"""解析文件 URL 为本地路径"""
if file_url.startswith(STATIC_UPLOADS_PREFIX):
relative_path = file_url.replace(STATIC_UPLOADS_PREFIX, '')
return Path(self.upload_path) / relative_path
elif file_url.startswith('/'):
# 绝对路径
return Path(file_url)
else:
# 相对路径
return Path(self.upload_path) / file_url
async def _extract_document_content(self, file_path: Path) -> str:
"""
提取文档内容
支持PDF、Worddocx、文本文件
"""
suffix = file_path.suffix.lower()
try:
if suffix == '.pdf':
return await self._extract_pdf_content(file_path)
elif suffix in ['.docx', '.doc']:
return await self._extract_docx_content(file_path)
elif suffix in ['.txt', '.md', '.text']:
return await self._extract_text_content(file_path)
else:
# 尝试作为文本读取
return await self._extract_text_content(file_path)
except Exception as e:
logger.error(f"文档内容提取失败: {file_path}, error: {e}")
raise ValueError(f"无法读取文档内容: {e}")
async def _extract_pdf_content(self, file_path: Path) -> str:
"""提取 PDF 内容"""
try:
from PyPDF2 import PdfReader
reader = PdfReader(str(file_path))
text_parts = []
for page in reader.pages:
text = page.extract_text()
if text:
text_parts.append(text)
content = '\n'.join(text_parts)
# 清理和截断
content = self._clean_content(content)
return content
except ImportError:
logger.error("PyPDF2 未安装,无法读取 PDF")
raise ValueError("服务器未安装 PDF 读取组件")
except Exception as e:
logger.error(f"PDF 读取失败: {e}")
raise ValueError(f"PDF 读取失败: {e}")
async def _extract_docx_content(self, file_path: Path) -> str:
"""提取 Word 文档内容"""
try:
from docx import Document
doc = Document(str(file_path))
text_parts = []
for para in doc.paragraphs:
if para.text.strip():
text_parts.append(para.text)
# 也提取表格内容
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
if cell.text.strip():
text_parts.append(cell.text)
content = '\n'.join(text_parts)
content = self._clean_content(content)
return content
except ImportError:
logger.error("python-docx 未安装,无法读取 Word 文档")
raise ValueError("服务器未安装 Word 读取组件")
except Exception as e:
logger.error(f"Word 文档读取失败: {e}")
raise ValueError(f"Word 文档读取失败: {e}")
async def _extract_text_content(self, file_path: Path) -> str:
"""提取文本文件内容"""
try:
# 尝试多种编码
encodings = ['utf-8', 'gbk', 'gb2312', 'latin-1']
for encoding in encodings:
try:
with open(file_path, 'r', encoding=encoding) as f:
content = f.read()
return self._clean_content(content)
except UnicodeDecodeError:
continue
raise ValueError("无法识别文件编码")
except Exception as e:
logger.error(f"文本文件读取失败: {e}")
raise ValueError(f"文本文件读取失败: {e}")
def _clean_content(self, content: str) -> str:
"""清理和截断内容"""
# 移除多余空白
import re
content = re.sub(r'\n{3,}', '\n\n', content)
content = re.sub(r' {2,}', ' ', content)
# 截断过长内容
if len(content) > MAX_CONTENT_LENGTH:
logger.warning(f"文档内容过长,截断至 {MAX_CONTENT_LENGTH} 字符")
content = content[:MAX_CONTENT_LENGTH] + "\n\n[内容已截断...]"
return content.strip()
async def _call_ai_analysis(
self,
content: str,
course_title: str
) -> AIResponse:
"""调用 AI 进行知识点分析"""
# 构建消息
user_message = USER_PROMPT.format(
course_name=course_title,
content=content
)
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_message}
]
# 调用 AI
response = await self.ai_service.chat(
messages=messages,
temperature=0.1, # 低温度,保持输出稳定
prompt_name="knowledge_analysis"
)
return response
def _parse_knowledge_points(self, ai_output: str) -> List[Dict[str, Any]]:
"""
解析 AI 输出的知识点 JSON
使用 LLM JSON Parser 进行多层兜底解析
"""
# 先清洗输出
cleaned_output, rules = clean_llm_output(ai_output)
if rules:
logger.debug(f"AI 输出已清洗: {rules}")
# 使用带 Schema 校验的解析
knowledge_points = parse_with_fallback(
cleaned_output,
schema=KNOWLEDGE_POINT_SCHEMA,
default=[],
validate_schema=True,
on_error="default"
)
# 后处理:确保每个知识点有必要字段
processed_points = []
for i, kp in enumerate(knowledge_points):
if i >= MAX_KNOWLEDGE_POINTS:
logger.warning(f"知识点数量超过限制 {MAX_KNOWLEDGE_POINTS},截断")
break
if isinstance(kp, dict):
# 提取字段(兼容多种字段名)
title = (
kp.get('title') or
kp.get('name') or
kp.get('知识点名称') or
f"知识点 {i + 1}"
)
content = (
kp.get('content') or
kp.get('description') or
kp.get('知识点描述') or
''
)
kp_type = (
kp.get('type') or
kp.get('知识点类型') or
DEFAULT_KNOWLEDGE_TYPE
)
topic_relation = (
kp.get('topic_relation') or
kp.get('关系描述') or
''
)
if title and (content or topic_relation):
processed_points.append({
'title': title[:200], # 限制长度
'content': content,
'type': kp_type,
'topic_relation': topic_relation,
})
if not processed_points:
logger.warning("未能解析出有效的知识点")
return processed_points
async def _delete_old_knowledge_points(
self,
db: AsyncSession,
material_id: int
) -> int:
"""删除资料关联的旧知识点"""
try:
from sqlalchemy import text
result = await db.execute(
text("DELETE FROM knowledge_points WHERE material_id = :material_id"),
{"material_id": material_id}
)
await db.commit()
deleted_count = result.rowcount
if deleted_count > 0:
logger.info(f"已删除旧知识点: material_id={material_id}, count={deleted_count}")
return deleted_count
except Exception as e:
logger.error(f"删除旧知识点失败: {e}")
await db.rollback()
raise
async def _save_knowledge_points(
self,
db: AsyncSession,
course_id: int,
material_id: int,
knowledge_points: List[Dict[str, Any]],
user_id: int
) -> int:
"""保存知识点到数据库"""
from app.services.course_service import knowledge_point_service
saved_count = 0
for kp_data in knowledge_points:
try:
kp_create = KnowledgePointCreate(
name=kp_data['title'],
description=kp_data.get('content', ''),
type=kp_data.get('type', DEFAULT_KNOWLEDGE_TYPE),
source=1, # AI 分析来源
topic_relation=kp_data.get('topic_relation'),
material_id=material_id
)
await knowledge_point_service.create_knowledge_point(
db=db,
course_id=course_id,
point_in=kp_create,
created_by=user_id
)
saved_count += 1
except Exception as e:
logger.warning(
f"保存单个知识点失败: title={kp_data.get('title')}, error={e}"
)
continue
return saved_count
async def reanalyze_course_materials(
self,
db: AsyncSession,
course_id: int,
course_title: str,
user_id: int
) -> Dict[str, Any]:
"""
重新分析课程的所有资料
Args:
db: 数据库会话
course_id: 课程ID
course_title: 课程标题
user_id: 用户ID
Returns:
分析结果汇总
"""
try:
from app.services.course_service import course_service
# 获取课程的所有资料
materials = await course_service.get_course_materials(db, course_id=course_id)
if not materials:
return {
"success": True,
"message": "该课程暂无资料需要分析",
"materials_count": 0,
"knowledge_points_count": 0
}
total_knowledge_points = 0
analysis_results = []
for material in materials:
try:
result = await self.analyze_course_material(
db=db,
course_id=course_id,
material_id=material.id,
file_url=material.file_url,
course_title=course_title,
user_id=user_id
)
kp_count = result.get('knowledge_points_count', 0)
total_knowledge_points += kp_count
analysis_results.append({
"material_id": material.id,
"material_name": material.name,
"success": True,
"knowledge_points_count": kp_count
})
except Exception as e:
logger.error(
f"资料分析失败: material_id={material.id}, error={e}"
)
analysis_results.append({
"material_id": material.id,
"material_name": material.name,
"success": False,
"error": str(e)
})
success_count = sum(1 for r in analysis_results if r['success'])
logger.info(
f"课程资料重新分析完成 - course_id: {course_id}, "
f"materials: {len(materials)}, success: {success_count}, "
f"total_knowledge_points: {total_knowledge_points}"
)
return {
"success": True,
"materials_count": len(materials),
"success_count": success_count,
"knowledge_points_count": total_knowledge_points,
"analysis_results": analysis_results
}
except Exception as e:
logger.error(
f"课程资料重新分析失败 - course_id: {course_id}, error: {e}",
exc_info=True
)
raise ExternalServiceError(f"重新分析失败: {e}")
# 创建全局实例
knowledge_analysis_service_v2 = KnowledgeAnalysisServiceV2()

View File

@@ -0,0 +1,707 @@
"""
LLM JSON Parser - 大模型 JSON 输出解析器
功能:
- 使用 json-repair 库修复 AI 输出的 JSON
- 处理中文标点、尾部逗号、Python 风格等问题
- Schema 校验确保数据完整性
使用示例:
```python
from app.services.ai.llm_json_parser import parse_llm_json, parse_with_fallback
# 简单解析
result = parse_llm_json(ai_response)
# 带 Schema 校验和默认值
result = parse_with_fallback(
ai_response,
schema=MY_SCHEMA,
default=[]
)
```
"""
import json
import re
import logging
from typing import Any, Dict, List, Optional, Tuple, Union
from dataclasses import dataclass, field
logger = logging.getLogger(__name__)
# 尝试导入 json-repair
try:
from json_repair import loads as json_repair_loads
from json_repair import repair_json
HAS_JSON_REPAIR = True
except ImportError:
HAS_JSON_REPAIR = False
logger.warning("json-repair 未安装,将使用内置修复逻辑")
# 尝试导入 jsonschema
try:
from jsonschema import validate, ValidationError, Draft7Validator
HAS_JSONSCHEMA = True
except ImportError:
HAS_JSONSCHEMA = False
logger.warning("jsonschema 未安装,将跳过 Schema 校验")
# ==================== 异常类 ====================
class JSONParseError(Exception):
"""JSON 解析错误基类"""
def __init__(self, message: str, raw_text: str = "", issues: List[dict] = None):
super().__init__(message)
self.raw_text = raw_text
self.issues = issues or []
class JSONUnrecoverableError(JSONParseError):
"""不可恢复的 JSON 错误"""
pass
# ==================== 解析结果 ====================
@dataclass
class ParseResult:
"""解析结果"""
success: bool
data: Any = None
method: str = "" # direct / json_repair / preprocessed / fixed / completed / default
issues: List[dict] = field(default_factory=list)
raw_text: str = ""
error: str = ""
# ==================== 核心解析函数 ====================
def parse_llm_json(
text: str,
*,
strict: bool = False,
return_result: bool = False
) -> Union[Any, ParseResult]:
"""
智能解析 LLM 输出的 JSON
Args:
text: 原始文本
strict: 严格模式,不进行自动修复
return_result: 返回 ParseResult 对象而非直接数据
Returns:
解析后的 JSON 对象,或 ParseResult如果 return_result=True
Raises:
JSONUnrecoverableError: 所有修复尝试都失败
"""
if not text or not text.strip():
if return_result:
return ParseResult(success=False, error="Empty input")
raise JSONUnrecoverableError("Empty input", text)
text = text.strip()
issues = []
# 第一层:直接解析
try:
data = json.loads(text)
result = ParseResult(success=True, data=data, method="direct", raw_text=text)
return result if return_result else data
except json.JSONDecodeError:
pass
if strict:
if return_result:
return ParseResult(success=False, error="Strict mode: direct parse failed", raw_text=text)
raise JSONUnrecoverableError("Strict mode: direct parse failed", text)
# 第二层:使用 json-repair推荐
if HAS_JSON_REPAIR:
try:
data = json_repair_loads(text)
issues.append({"type": "json_repair", "action": "Auto-repaired by json-repair library"})
result = ParseResult(success=True, data=data, method="json_repair", issues=issues, raw_text=text)
return result if return_result else data
except Exception as e:
logger.debug(f"json-repair 修复失败: {e}")
# 第三层:预处理(提取代码块、清理文字)
preprocessed = _preprocess_text(text)
if preprocessed != text:
try:
data = json.loads(preprocessed)
issues.append({"type": "preprocessed", "action": "Extracted JSON from text"})
result = ParseResult(success=True, data=data, method="preprocessed", issues=issues, raw_text=text)
return result if return_result else data
except json.JSONDecodeError:
pass
# 再次尝试 json-repair
if HAS_JSON_REPAIR:
try:
data = json_repair_loads(preprocessed)
issues.append({"type": "json_repair_preprocessed", "action": "Repaired after preprocessing"})
result = ParseResult(success=True, data=data, method="json_repair", issues=issues, raw_text=text)
return result if return_result else data
except Exception:
pass
# 第四层:自动修复
fixed, fix_issues = _fix_json_format(preprocessed)
issues.extend(fix_issues)
if fixed != preprocessed:
try:
data = json.loads(fixed)
result = ParseResult(success=True, data=data, method="fixed", issues=issues, raw_text=text)
return result if return_result else data
except json.JSONDecodeError:
pass
# 第五层:尝试补全截断的 JSON
completed = _try_complete_json(fixed)
if completed:
try:
data = json.loads(completed)
issues.append({"type": "completed", "action": "Auto-completed truncated JSON"})
result = ParseResult(success=True, data=data, method="completed", issues=issues, raw_text=text)
return result if return_result else data
except json.JSONDecodeError:
pass
# 所有尝试都失败
diagnosis = diagnose_json_error(fixed)
if return_result:
return ParseResult(
success=False,
method="failed",
issues=issues + diagnosis.get("issues", []),
raw_text=text,
error=f"All parse attempts failed. Issues: {diagnosis}"
)
raise JSONUnrecoverableError(f"All parse attempts failed: {diagnosis}", text, issues)
def parse_with_fallback(
raw_text: str,
schema: dict = None,
default: Any = None,
*,
validate_schema: bool = True,
on_error: str = "default" # "default" / "raise" / "none"
) -> Any:
"""
带兜底的 JSON 解析
Args:
raw_text: 原始文本
schema: JSON Schema可选
default: 默认值
validate_schema: 是否进行 Schema 校验
on_error: 错误处理方式
Returns:
解析后的数据或默认值
"""
try:
result = parse_llm_json(raw_text, return_result=True)
if not result.success:
logger.warning(f"JSON 解析失败: {result.error}")
if on_error == "raise":
raise JSONUnrecoverableError(result.error, raw_text, result.issues)
elif on_error == "none":
return None
return default
data = result.data
# Schema 校验
if validate_schema and schema and HAS_JSONSCHEMA:
is_valid, errors = validate_json_schema(data, schema)
if not is_valid:
logger.warning(f"Schema 校验失败: {errors}")
if on_error == "raise":
raise JSONUnrecoverableError(f"Schema validation failed: {errors}", raw_text)
elif on_error == "none":
return None
return default
# 记录解析方法
if result.method != "direct":
logger.info(f"JSON 解析成功: method={result.method}, issues={result.issues}")
return data
except Exception as e:
logger.error(f"JSON 解析异常: {e}")
if on_error == "raise":
raise
elif on_error == "none":
return None
return default
# ==================== 预处理函数 ====================
def _preprocess_text(text: str) -> str:
"""预处理文本:提取代码块、清理前后文字"""
# 移除 BOM
text = text.lstrip('\ufeff')
# 移除零宽字符
text = re.sub(r'[\u200b\u200c\u200d\ufeff]', '', text)
# 提取 Markdown 代码块
patterns = [
r'```json\s*([\s\S]*?)\s*```',
r'```\s*([\s\S]*?)\s*```',
r'`([^`]+)`',
]
for pattern in patterns:
match = re.search(pattern, text, re.IGNORECASE)
if match:
extracted = match.group(1).strip()
if extracted.startswith(('{', '[')):
text = extracted
break
# 找到 JSON 边界
text = _find_json_boundaries(text)
return text.strip()
def _find_json_boundaries(text: str) -> str:
"""找到 JSON 的起止位置"""
# 找第一个 { 或 [
start = -1
for i, c in enumerate(text):
if c in '{[':
start = i
break
if start == -1:
return text
# 找最后一个匹配的 } 或 ]
depth = 0
end = -1
in_string = False
escape = False
for i in range(start, len(text)):
c = text[i]
if escape:
escape = False
continue
if c == '\\':
escape = True
continue
if c == '"':
in_string = not in_string
continue
if in_string:
continue
if c in '{[':
depth += 1
elif c in '}]':
depth -= 1
if depth == 0:
end = i + 1
break
if end == -1:
# 找最后一个 } 或 ]
for i in range(len(text) - 1, start, -1):
if text[i] in '}]':
end = i + 1
break
if end > start:
return text[start:end]
return text[start:]
# ==================== 修复函数 ====================
def _fix_json_format(text: str) -> Tuple[str, List[dict]]:
"""修复常见 JSON 格式问题"""
issues = []
# 1. 中文标点转英文
cn_punctuation = {
'': ',', '': '.', '': ':', '': ';',
'"': '"', '"': '"', ''': "'", ''': "'",
'': '[', '': ']', '': '(', '': ')',
'': '{', '': '}',
}
for cn, en in cn_punctuation.items():
if cn in text:
text = text.replace(cn, en)
issues.append({"type": "chinese_punctuation", "from": cn, "to": en})
# 2. 移除注释
if '//' in text:
text = re.sub(r'//[^\n]*', '', text)
issues.append({"type": "removed_comments", "style": "single-line"})
if '/*' in text:
text = re.sub(r'/\*[\s\S]*?\*/', '', text)
issues.append({"type": "removed_comments", "style": "multi-line"})
# 3. Python 风格转 JSON
python_replacements = [
(r'\bTrue\b', 'true'),
(r'\bFalse\b', 'false'),
(r'\bNone\b', 'null'),
]
for pattern, replacement in python_replacements:
if re.search(pattern, text):
text = re.sub(pattern, replacement, text)
issues.append({"type": "python_style", "from": pattern, "to": replacement})
# 4. 移除尾部逗号
trailing_comma_patterns = [
(r',(\s*})', r'\1'),
(r',(\s*\])', r'\1'),
]
for pattern, replacement in trailing_comma_patterns:
if re.search(pattern, text):
text = re.sub(pattern, replacement, text)
issues.append({"type": "trailing_comma", "action": "removed"})
# 5. 修复单引号(谨慎处理)
if text.count("'") > text.count('"') and re.match(r"^\s*\{?\s*'", text):
text = re.sub(r"'([^']*)'(\s*:)", r'"\1"\2', text)
text = re.sub(r":\s*'([^']*)'", r': "\1"', text)
issues.append({"type": "single_quotes", "action": "replaced"})
return text, issues
def _try_complete_json(text: str) -> Optional[str]:
"""尝试补全截断的 JSON"""
if not text:
return None
# 统计括号
stack = []
in_string = False
escape = False
for c in text:
if escape:
escape = False
continue
if c == '\\':
escape = True
continue
if c == '"':
in_string = not in_string
continue
if in_string:
continue
if c in '{[':
stack.append(c)
elif c == '}':
if stack and stack[-1] == '{':
stack.pop()
elif c == ']':
if stack and stack[-1] == '[':
stack.pop()
if not stack:
return None # 已经平衡了
# 如果在字符串中,先闭合字符串
if in_string:
text += '"'
# 补全括号
completion = ""
for bracket in reversed(stack):
if bracket == '{':
completion += '}'
elif bracket == '[':
completion += ']'
return text + completion
# ==================== Schema 校验 ====================
def validate_json_schema(data: Any, schema: dict) -> Tuple[bool, List[dict]]:
"""
校验 JSON 是否符合 Schema
Returns:
(is_valid, errors)
"""
if not HAS_JSONSCHEMA:
logger.warning("jsonschema 未安装,跳过校验")
return True, []
try:
validator = Draft7Validator(schema)
errors = list(validator.iter_errors(data))
if errors:
error_messages = [
{
"path": list(e.absolute_path),
"message": e.message,
"validator": e.validator
}
for e in errors
]
return False, error_messages
return True, []
except Exception as e:
return False, [{"message": str(e)}]
# ==================== 诊断函数 ====================
def diagnose_json_error(text: str) -> dict:
"""诊断 JSON 错误"""
issues = []
# 检查是否为空
if not text or not text.strip():
issues.append({
"type": "empty_input",
"severity": "critical",
"suggestion": "输入为空"
})
return {"issues": issues, "fixable": False}
# 检查中文标点
cn_punctuation = ['', '', '', '', '"', '"', ''', ''']
for p in cn_punctuation:
if p in text:
issues.append({
"type": "chinese_punctuation",
"char": p,
"severity": "low",
"suggestion": f"{p} 替换为对应英文标点"
})
# 检查代码块包裹
if '```' in text:
issues.append({
"type": "markdown_wrapped",
"severity": "low",
"suggestion": "需要提取代码块内容"
})
# 检查注释
if '//' in text or '/*' in text:
issues.append({
"type": "has_comments",
"severity": "low",
"suggestion": "需要移除注释"
})
# 检查 Python 风格
if re.search(r'\b(True|False|None)\b', text):
issues.append({
"type": "python_style",
"severity": "low",
"suggestion": "将 True/False/None 转为 true/false/null"
})
# 检查尾部逗号
if re.search(r',\s*[}\]]', text):
issues.append({
"type": "trailing_comma",
"severity": "low",
"suggestion": "移除 } 或 ] 前的逗号"
})
# 检查括号平衡
open_braces = text.count('{') - text.count('}')
open_brackets = text.count('[') - text.count(']')
if open_braces > 0:
issues.append({
"type": "unclosed_brace",
"count": open_braces,
"severity": "medium",
"suggestion": f"缺少 {open_braces}}}"
})
elif open_braces < 0:
issues.append({
"type": "extra_brace",
"count": -open_braces,
"severity": "medium",
"suggestion": f"多余 {-open_braces}}}"
})
if open_brackets > 0:
issues.append({
"type": "unclosed_bracket",
"count": open_brackets,
"severity": "medium",
"suggestion": f"缺少 {open_brackets} 个 ]"
})
elif open_brackets < 0:
issues.append({
"type": "extra_bracket",
"count": -open_brackets,
"severity": "medium",
"suggestion": f"多余 {-open_brackets} 个 ]"
})
# 检查引号平衡
quote_count = text.count('"')
if quote_count % 2 != 0:
issues.append({
"type": "unbalanced_quotes",
"severity": "high",
"suggestion": "引号数量不平衡,可能有未闭合的字符串"
})
# 判断是否可修复
fixable_types = {
"chinese_punctuation", "markdown_wrapped", "has_comments",
"python_style", "trailing_comma", "unclosed_brace", "unclosed_bracket"
}
fixable = all(i["type"] in fixable_types for i in issues)
return {
"issues": issues,
"issue_count": len(issues),
"fixable": fixable,
"severity": max(
(i.get("severity", "low") for i in issues),
key=lambda x: {"low": 1, "medium": 2, "high": 3, "critical": 4}.get(x, 0),
default="low"
)
}
# ==================== 便捷函数 ====================
def safe_json_loads(text: str, default: Any = None) -> Any:
"""安全的 json.loads失败返回默认值"""
try:
return parse_llm_json(text)
except Exception:
return default
def extract_json_from_text(text: str) -> Optional[str]:
"""从文本中提取 JSON 字符串"""
preprocessed = _preprocess_text(text)
fixed, _ = _fix_json_format(preprocessed)
try:
json.loads(fixed)
return fixed
except Exception:
completed = _try_complete_json(fixed)
if completed:
try:
json.loads(completed)
return completed
except Exception:
pass
return None
def clean_llm_output(text: str) -> Tuple[str, List[str]]:
"""
清洗大模型输出,返回清洗后的文本和应用的清洗规则
Args:
text: 原始输出文本
Returns:
(cleaned_text, applied_rules)
"""
if not text:
return "", ["empty_input"]
applied_rules = []
original = text
# 1. 去除 BOM 头
if text.startswith('\ufeff'):
text = text.lstrip('\ufeff')
applied_rules.append("removed_bom")
# 2. 去除 ANSI 转义序列
ansi_pattern = re.compile(r'\x1b\[[0-9;]*m')
if ansi_pattern.search(text):
text = ansi_pattern.sub('', text)
applied_rules.append("removed_ansi")
# 3. 去除首尾空白
text = text.strip()
# 4. 去除开头的客套话
polite_patterns = [
r'^好的[,。.]?\s*',
r'^当然[,。.]?\s*',
r'^没问题[,。.]?\s*',
r'^根据您的要求[,。.]?\s*',
r'^以下是.*?[:]\s*',
r'^分析结果如下[:]\s*',
r'^我来为您.*?[:]\s*',
r'^这是.*?结果[:]\s*',
]
for pattern in polite_patterns:
if re.match(pattern, text, re.IGNORECASE):
text = re.sub(pattern, '', text, flags=re.IGNORECASE)
applied_rules.append("removed_polite_prefix")
break
# 5. 提取 Markdown JSON 代码块
json_block_patterns = [
r'```json\s*([\s\S]*?)\s*```',
r'```\s*([\s\S]*?)\s*```',
]
for pattern in json_block_patterns:
match = re.search(pattern, text, re.IGNORECASE)
if match:
extracted = match.group(1).strip()
if extracted.startswith(('{', '[')):
text = extracted
applied_rules.append("extracted_code_block")
break
# 6. 处理零宽字符
zero_width = re.compile(r'[\u200b\u200c\u200d\ufeff]')
if zero_width.search(text):
text = zero_width.sub('', text)
applied_rules.append("removed_zero_width")
return text.strip(), applied_rules

View File

@@ -0,0 +1,377 @@
"""
陪练分析报告服务 - Python 原生实现
功能:
- 分析陪练对话历史
- 生成综合评分、能力维度评估
- 提供对话标注和改进建议
提供稳定可靠的陪练分析报告生成能力。
"""
import json
import logging
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
from .ai_service import AIService, AIResponse
from .llm_json_parser import parse_with_fallback, clean_llm_output
from .prompts.practice_analysis_prompts import (
SYSTEM_PROMPT,
USER_PROMPT,
PRACTICE_ANALYSIS_SCHEMA,
SCORE_BREAKDOWN_ITEMS,
ABILITY_DIMENSIONS,
)
logger = logging.getLogger(__name__)
# ==================== 数据结构 ====================
@dataclass
class ScoreBreakdownItem:
"""分数细分项"""
name: str
score: float
description: str
@dataclass
class AbilityDimensionItem:
"""能力维度项"""
name: str
score: float
feedback: str
@dataclass
class DialogueAnnotation:
"""对话标注"""
sequence: int
tags: List[str]
comment: str
@dataclass
class Suggestion:
"""改进建议"""
title: str
content: str
example: str
@dataclass
class PracticeAnalysisResult:
"""陪练分析结果"""
success: bool
total_score: float = 0.0
score_breakdown: List[ScoreBreakdownItem] = field(default_factory=list)
ability_dimensions: List[AbilityDimensionItem] = field(default_factory=list)
dialogue_annotations: List[DialogueAnnotation] = field(default_factory=list)
suggestions: List[Suggestion] = field(default_factory=list)
ai_provider: str = ""
ai_model: str = ""
ai_tokens: int = 0
ai_latency_ms: int = 0
error: str = ""
def to_dict(self) -> Dict[str, Any]:
"""转换为字典(兼容原有数据格式)"""
return {
"analysis": {
"total_score": self.total_score,
"score_breakdown": [
{"name": s.name, "score": s.score, "description": s.description}
for s in self.score_breakdown
],
"ability_dimensions": [
{"name": d.name, "score": d.score, "feedback": d.feedback}
for d in self.ability_dimensions
],
"dialogue_annotations": [
{"sequence": a.sequence, "tags": a.tags, "comment": a.comment}
for a in self.dialogue_annotations
],
"suggestions": [
{"title": s.title, "content": s.content, "example": s.example}
for s in self.suggestions
],
},
"ai_provider": self.ai_provider,
"ai_model": self.ai_model,
"ai_tokens": self.ai_tokens,
"ai_latency_ms": self.ai_latency_ms,
}
def to_db_format(self) -> Dict[str, Any]:
"""转换为数据库存储格式(兼容 PracticeReport 模型)"""
return {
"total_score": int(self.total_score),
"score_breakdown": [
{"name": s.name, "score": s.score, "description": s.description}
for s in self.score_breakdown
],
"ability_dimensions": [
{"name": d.name, "score": d.score, "feedback": d.feedback}
for d in self.ability_dimensions
],
"dialogue_review": [
{"sequence": a.sequence, "tags": a.tags, "comment": a.comment}
for a in self.dialogue_annotations
],
"suggestions": [
{"title": s.title, "content": s.content, "example": s.example}
for s in self.suggestions
],
}
# ==================== 服务类 ====================
class PracticeAnalysisService:
"""
陪练分析报告服务
使用 Python 原生实现。
使用示例:
```python
service = PracticeAnalysisService()
result = await service.analyze(
db=db_session, # 传入 db_session 用于记录调用日志
dialogue_history=[
{"speaker": "user", "content": "您好,我想咨询一下..."},
{"speaker": "ai", "content": "您好!很高兴为您服务..."}
]
)
print(result.total_score)
print(result.suggestions)
```
"""
MODULE_CODE = "practice_analysis"
async def analyze(
self,
dialogue_history: List[Dict[str, Any]],
db: Any = None # 数据库会话,用于记录 AI 调用日志
) -> PracticeAnalysisResult:
"""
分析陪练对话
Args:
dialogue_history: 对话历史列表,每项包含 speaker, content, timestamp 等字段
db: 数据库会话,用于记录调用日志(符合 AI 接入规范)
Returns:
PracticeAnalysisResult 分析结果
"""
try:
logger.info(f"开始分析陪练对话 - 对话轮次: {len(dialogue_history)}")
# 1. 验证输入
if not dialogue_history or len(dialogue_history) < 2:
return PracticeAnalysisResult(
success=False,
error="对话记录太少无法生成分析报告至少需要2轮对话"
)
# 2. 格式化对话历史
dialogue_text = self._format_dialogue_history(dialogue_history)
# 3. 创建 AIService 实例(传入 db_session 用于记录调用日志)
self._ai_service = AIService(module_code=self.MODULE_CODE, db_session=db)
# 4. 调用 AI 分析
ai_response = await self._call_ai_analysis(dialogue_text)
logger.info(
f"AI 分析完成 - provider: {ai_response.provider}, "
f"tokens: {ai_response.total_tokens}, latency: {ai_response.latency_ms}ms"
)
# 4. 解析 JSON 结果
analysis_data = self._parse_analysis_result(ai_response.content)
# 5. 构建返回结果
result = PracticeAnalysisResult(
success=True,
total_score=analysis_data.get("total_score", 0),
score_breakdown=[
ScoreBreakdownItem(
name=s.get("name", ""),
score=s.get("score", 0),
description=s.get("description", "")
)
for s in analysis_data.get("score_breakdown", [])
],
ability_dimensions=[
AbilityDimensionItem(
name=d.get("name", ""),
score=d.get("score", 0),
feedback=d.get("feedback", "")
)
for d in analysis_data.get("ability_dimensions", [])
],
dialogue_annotations=[
DialogueAnnotation(
sequence=a.get("sequence", 0),
tags=a.get("tags", []),
comment=a.get("comment", "")
)
for a in analysis_data.get("dialogue_annotations", [])
],
suggestions=[
Suggestion(
title=s.get("title", ""),
content=s.get("content", ""),
example=s.get("example", "")
)
for s in analysis_data.get("suggestions", [])
],
ai_provider=ai_response.provider,
ai_model=ai_response.model,
ai_tokens=ai_response.total_tokens,
ai_latency_ms=ai_response.latency_ms,
)
logger.info(
f"陪练分析完成 - total_score: {result.total_score}, "
f"annotations: {len(result.dialogue_annotations)}, "
f"suggestions: {len(result.suggestions)}"
)
return result
except Exception as e:
logger.error(f"陪练分析失败: {e}", exc_info=True)
return PracticeAnalysisResult(
success=False,
error=str(e)
)
def _format_dialogue_history(self, dialogue_history: List[Dict[str, Any]]) -> str:
"""
格式化对话历史为文本
Args:
dialogue_history: 对话历史列表
Returns:
格式化后的对话文本
"""
lines = []
for i, d in enumerate(dialogue_history, 1):
speaker = d.get('speaker', 'unknown')
content = d.get('content', '')
# 统一说话者标识
if speaker in ['user', 'employee', 'consultant', '员工', '用户']:
speaker_label = '员工'
elif speaker in ['ai', 'customer', 'client', '顾客', '客户', 'AI']:
speaker_label = '顾客'
else:
speaker_label = speaker
lines.append(f"[{i}] {speaker_label}: {content}")
return '\n'.join(lines)
async def _call_ai_analysis(self, dialogue_text: str) -> AIResponse:
"""调用 AI 进行分析"""
user_message = USER_PROMPT.format(dialogue_history=dialogue_text)
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_message}
]
response = await self._ai_service.chat(
messages=messages,
temperature=0.7,
prompt_name="practice_analysis"
)
return response
def _parse_analysis_result(self, ai_output: str) -> Dict[str, Any]:
"""
解析 AI 输出的分析结果 JSON
使用 LLM JSON Parser 进行多层兜底解析
"""
# 先清洗输出
cleaned_output, rules = clean_llm_output(ai_output)
if rules:
logger.debug(f"AI 输出已清洗: {rules}")
# 使用带 Schema 校验的解析
parsed = parse_with_fallback(
cleaned_output,
schema=PRACTICE_ANALYSIS_SCHEMA,
default={"analysis": {}},
validate_schema=True,
on_error="default"
)
# 提取 analysis 部分
analysis = parsed.get("analysis", {})
# 确保 score_breakdown 完整
existing_breakdown = {s.get("name") for s in analysis.get("score_breakdown", [])}
for item_name in SCORE_BREAKDOWN_ITEMS:
if item_name not in existing_breakdown:
logger.warning(f"缺少分数维度: {item_name},使用默认值")
analysis.setdefault("score_breakdown", []).append({
"name": item_name,
"score": 75,
"description": "暂无详细评价"
})
# 确保 ability_dimensions 完整
existing_dims = {d.get("name") for d in analysis.get("ability_dimensions", [])}
for dim_name in ABILITY_DIMENSIONS:
if dim_name not in existing_dims:
logger.warning(f"缺少能力维度: {dim_name},使用默认值")
analysis.setdefault("ability_dimensions", []).append({
"name": dim_name,
"score": 75,
"feedback": "暂无详细评价"
})
# 确保有建议
if not analysis.get("suggestions"):
analysis["suggestions"] = [
{
"title": "持续练习",
"content": "建议继续进行陪练练习,提升整体表现",
"example": "每周进行2-3次陪练针对薄弱环节重点练习"
}
]
return analysis
# ==================== 全局实例 ====================
practice_analysis_service = PracticeAnalysisService()
# ==================== 便捷函数 ====================
async def analyze_practice_session(
dialogue_history: List[Dict[str, Any]]
) -> Dict[str, Any]:
"""
便捷函数:分析陪练会话
Args:
dialogue_history: 对话历史列表
Returns:
分析结果字典(兼容原有格式)
"""
result = await practice_analysis_service.analyze(dialogue_history)
return result.to_dict()

View File

@@ -0,0 +1,379 @@
"""
陪练场景准备服务 - Python 原生实现
功能:
- 根据课程ID获取知识点
- 调用 AI 生成陪练场景配置
- 解析并返回结构化场景数据
提供稳定可靠的陪练场景提取能力。
"""
import logging
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.exceptions import ExternalServiceError
from .ai_service import AIService, AIResponse
from .llm_json_parser import parse_with_fallback, clean_llm_output
from .prompts.practice_scene_prompts import (
SYSTEM_PROMPT,
USER_PROMPT,
PRACTICE_SCENE_SCHEMA,
DEFAULT_SCENE_TYPE,
DEFAULT_DIFFICULTY,
)
logger = logging.getLogger(__name__)
# ==================== 数据结构 ====================
@dataclass
class PracticeScene:
"""陪练场景数据结构"""
name: str
description: str
background: str
ai_role: str
objectives: List[str]
keywords: List[str]
type: str = DEFAULT_SCENE_TYPE
difficulty: str = DEFAULT_DIFFICULTY
@dataclass
class PracticeSceneResult:
"""陪练场景生成结果"""
success: bool
scene: Optional[PracticeScene] = None
raw_response: Dict[str, Any] = field(default_factory=dict)
ai_provider: str = ""
ai_model: str = ""
ai_tokens: int = 0
ai_latency_ms: int = 0
knowledge_points_count: int = 0
error: str = ""
# ==================== 服务类 ====================
class PracticeSceneService:
"""
陪练场景准备服务
使用 Python 原生实现。
使用示例:
```python
service = PracticeSceneService()
result = await service.prepare_practice_knowledge(
db=db_session,
course_id=1
)
if result.success:
print(result.scene.name)
print(result.scene.objectives)
```
"""
def __init__(self):
"""初始化服务"""
self.ai_service = AIService(module_code="practice_scene")
async def prepare_practice_knowledge(
self,
db: AsyncSession,
course_id: int
) -> PracticeSceneResult:
"""
准备陪练所需的知识内容并生成场景
陪练知识准备的 Python 实现。
Args:
db: 数据库会话(支持多租户,由调用方传入对应租户的数据库连接)
course_id: 课程ID
Returns:
PracticeSceneResult: 包含场景配置和元信息的结果对象
"""
try:
logger.info(f"开始陪练知识准备 - course_id: {course_id}")
# 1. 查询知识点
knowledge_points = await self._fetch_knowledge_points(db, course_id)
if not knowledge_points:
logger.warning(f"课程没有知识点 - course_id: {course_id}")
return PracticeSceneResult(
success=False,
error=f"课程 {course_id} 没有可用的知识点"
)
logger.info(f"获取到 {len(knowledge_points)} 个知识点 - course_id: {course_id}")
# 2. 格式化知识点为文本
knowledge_text = self._format_knowledge_points(knowledge_points)
# 3. 调用 AI 生成场景
ai_response = await self._call_ai_generation(knowledge_text)
logger.info(
f"AI 生成完成 - provider: {ai_response.provider}, "
f"tokens: {ai_response.total_tokens}, latency: {ai_response.latency_ms}ms"
)
# 4. 解析 JSON 结果
scene_data = self._parse_scene_response(ai_response.content)
if not scene_data:
logger.error(f"场景解析失败 - course_id: {course_id}")
return PracticeSceneResult(
success=False,
raw_response={"ai_output": ai_response.content},
ai_provider=ai_response.provider,
ai_model=ai_response.model,
ai_tokens=ai_response.total_tokens,
ai_latency_ms=ai_response.latency_ms,
knowledge_points_count=len(knowledge_points),
error="AI 输出解析失败"
)
# 5. 构建场景对象
scene = self._build_scene_object(scene_data)
logger.info(
f"陪练场景生成成功 - course_id: {course_id}, "
f"scene_name: {scene.name}, type: {scene.type}"
)
return PracticeSceneResult(
success=True,
scene=scene,
raw_response=scene_data,
ai_provider=ai_response.provider,
ai_model=ai_response.model,
ai_tokens=ai_response.total_tokens,
ai_latency_ms=ai_response.latency_ms,
knowledge_points_count=len(knowledge_points)
)
except Exception as e:
logger.error(
f"陪练知识准备失败 - course_id: {course_id}, error: {e}",
exc_info=True
)
return PracticeSceneResult(
success=False,
error=str(e)
)
async def _fetch_knowledge_points(
self,
db: AsyncSession,
course_id: int
) -> List[Dict[str, Any]]:
"""
从数据库获取课程知识点
获取课程知识点
"""
# 知识点查询 SQL
# SELECT kp.name, kp.description
# FROM knowledge_points kp
# INNER JOIN course_materials cm ON kp.material_id = cm.id
# WHERE kp.course_id = {course_id}
# AND kp.is_deleted = 0
# AND cm.is_deleted = 0
# ORDER BY kp.id;
sql = text("""
SELECT kp.name, kp.description
FROM knowledge_points kp
INNER JOIN course_materials cm ON kp.material_id = cm.id
WHERE kp.course_id = :course_id
AND kp.is_deleted = 0
AND cm.is_deleted = 0
ORDER BY kp.id
""")
try:
result = await db.execute(sql, {"course_id": course_id})
rows = result.fetchall()
knowledge_points = []
for row in rows:
knowledge_points.append({
"name": row[0],
"description": row[1] or ""
})
return knowledge_points
except Exception as e:
logger.error(f"查询知识点失败: {e}")
raise ExternalServiceError(f"数据库查询失败: {e}")
def _format_knowledge_points(self, knowledge_points: List[Dict[str, Any]]) -> str:
"""
将知识点列表格式化为文本
Args:
knowledge_points: 知识点列表
Returns:
格式化后的文本
"""
lines = []
for i, kp in enumerate(knowledge_points, 1):
name = kp.get("name", "")
description = kp.get("description", "")
if description:
lines.append(f"{i}. {name}\n {description}")
else:
lines.append(f"{i}. {name}")
return "\n\n".join(lines)
async def _call_ai_generation(self, knowledge_text: str) -> AIResponse:
"""
调用 AI 生成陪练场景
Args:
knowledge_text: 格式化后的知识点文本
Returns:
AI 响应对象
"""
# 构建用户消息
user_message = USER_PROMPT.format(knowledge_points=knowledge_text)
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_message}
]
# 调用 AI自动降级4sapi.com → OpenRouter
response = await self.ai_service.chat(
messages=messages,
temperature=0.7, # 适中的创意性
prompt_name="practice_scene_generation"
)
return response
def _parse_scene_response(self, ai_output: str) -> Optional[Dict[str, Any]]:
"""
解析 AI 输出的场景 JSON
使用 LLM JSON Parser 进行多层兜底解析
Args:
ai_output: AI 原始输出
Returns:
解析后的字典,失败返回 None
"""
# 先清洗输出
cleaned_output, rules = clean_llm_output(ai_output)
if rules:
logger.debug(f"AI 输出已清洗: {rules}")
# 使用带 Schema 校验的解析
result = parse_with_fallback(
cleaned_output,
schema=PRACTICE_SCENE_SCHEMA,
default=None,
validate_schema=True,
on_error="none"
)
return result
def _build_scene_object(self, scene_data: Dict[str, Any]) -> PracticeScene:
"""
从解析的字典构建场景对象
Args:
scene_data: 解析后的场景数据
Returns:
PracticeScene 对象
"""
# 提取 scene 字段JSON 格式为 {"scene": {...}}
scene = scene_data.get("scene", scene_data)
return PracticeScene(
name=scene.get("name", "陪练场景"),
description=scene.get("description", ""),
background=scene.get("background", ""),
ai_role=scene.get("ai_role", "AI扮演客户"),
objectives=scene.get("objectives", []),
keywords=scene.get("keywords", []),
type=scene.get("type", DEFAULT_SCENE_TYPE),
difficulty=scene.get("difficulty", DEFAULT_DIFFICULTY)
)
def scene_to_dict(self, scene: PracticeScene) -> Dict[str, Any]:
"""
将场景对象转换为字典
便于 API 响应序列化
Args:
scene: PracticeScene 对象
Returns:
字典格式的场景数据
"""
return {
"scene": {
"name": scene.name,
"description": scene.description,
"background": scene.background,
"ai_role": scene.ai_role,
"objectives": scene.objectives,
"keywords": scene.keywords,
"type": scene.type,
"difficulty": scene.difficulty
}
}
# ==================== 全局实例 ====================
practice_scene_service = PracticeSceneService()
# ==================== 便捷函数 ====================
async def prepare_practice_knowledge(
db: AsyncSession,
course_id: int
) -> PracticeSceneResult:
"""
准备陪练所需的知识内容(便捷函数)
Args:
db: 数据库会话
course_id: 课程ID
Returns:
PracticeSceneResult 结果对象
"""
return await practice_scene_service.prepare_practice_knowledge(db, course_id)

View File

@@ -0,0 +1,57 @@
"""
提示词模板模块
遵循瑞小美提示词规范
"""
from .knowledge_analysis_prompts import (
PROMPT_META as KNOWLEDGE_ANALYSIS_PROMPT_META,
SYSTEM_PROMPT as KNOWLEDGE_ANALYSIS_SYSTEM_PROMPT,
USER_PROMPT as KNOWLEDGE_ANALYSIS_USER_PROMPT,
KNOWLEDGE_POINT_SCHEMA,
)
from .exam_generator_prompts import (
PROMPT_META as EXAM_GENERATOR_PROMPT_META,
SYSTEM_PROMPT as EXAM_GENERATOR_SYSTEM_PROMPT,
USER_PROMPT as EXAM_GENERATOR_USER_PROMPT,
MISTAKE_REGEN_SYSTEM_PROMPT,
MISTAKE_REGEN_USER_PROMPT,
QUESTION_SCHEMA,
QUESTION_TYPES,
DEFAULT_QUESTION_COUNTS,
DEFAULT_DIFFICULTY_LEVEL,
)
from .ability_analysis_prompts import (
PROMPT_META as ABILITY_ANALYSIS_PROMPT_META,
SYSTEM_PROMPT as ABILITY_ANALYSIS_SYSTEM_PROMPT,
USER_PROMPT as ABILITY_ANALYSIS_USER_PROMPT,
ABILITY_ANALYSIS_SCHEMA,
ABILITY_DIMENSIONS,
)
__all__ = [
# Knowledge Analysis Prompts
"KNOWLEDGE_ANALYSIS_PROMPT_META",
"KNOWLEDGE_ANALYSIS_SYSTEM_PROMPT",
"KNOWLEDGE_ANALYSIS_USER_PROMPT",
"KNOWLEDGE_POINT_SCHEMA",
# Exam Generator Prompts
"EXAM_GENERATOR_PROMPT_META",
"EXAM_GENERATOR_SYSTEM_PROMPT",
"EXAM_GENERATOR_USER_PROMPT",
"MISTAKE_REGEN_SYSTEM_PROMPT",
"MISTAKE_REGEN_USER_PROMPT",
"QUESTION_SCHEMA",
"QUESTION_TYPES",
"DEFAULT_QUESTION_COUNTS",
"DEFAULT_DIFFICULTY_LEVEL",
# Ability Analysis Prompts
"ABILITY_ANALYSIS_PROMPT_META",
"ABILITY_ANALYSIS_SYSTEM_PROMPT",
"ABILITY_ANALYSIS_USER_PROMPT",
"ABILITY_ANALYSIS_SCHEMA",
"ABILITY_DIMENSIONS",
]

View File

@@ -0,0 +1,215 @@
"""
智能工牌能力分析与课程推荐提示词模板
功能:分析员工与顾客的对话记录,评估能力维度得分,并推荐适合的课程
"""
# ==================== 元数据 ====================
PROMPT_META = {
"name": "ability_analysis",
"display_name": "智能工牌能力分析",
"description": "分析员工与顾客对话,评估多维度能力得分,推荐个性化课程",
"module": "kaopeilian",
"variables": ["dialogue_history", "user_info", "courses"],
"version": "1.0.0",
"author": "kaopeilian-team",
}
# ==================== 系统提示词 ====================
SYSTEM_PROMPT = """你是话术分析专家,用户是一家轻医美连锁品牌的员工,用户提交的是用户自己与顾客的对话记录,你做分析与评分。并严格按照以下格式输出。并根据课程列表,为该用户提供选课建议。
输出标准:
{
"analysis": {
"total_score": 82,
"ability_dimensions": [
{
"name": "专业知识",
"score": 88,
"feedback": "产品知识扎实,能准确回答客户问题。建议:继续深化对新产品的了解。"
},
{
"name": "沟通技巧",
"score": 92,
"feedback": "语言表达清晰流畅,善于倾听客户需求。建议:可以多使用开放式问题引导。"
},
{
"name": "操作技能",
"score": 85,
"feedback": "基本操作熟练,流程规范。建议:提升复杂场景的应对速度。"
},
{
"name": "客户服务",
"score": 90,
"feedback": "服务态度优秀,客户体验良好。建议:进一步提升个性化服务能力。"
},
{
"name": "安全意识",
"score": 79,
"feedback": "基本安全规范掌握,但在细节提醒上还可加强。"
},
{
"name": "应变能力",
"score": 76,
"feedback": "面对突发情况反应较快,但处理方式可以更灵活多样。"
}
],
"course_recommendations": [
{
"course_id": 5,
"course_name": "应变能力提升训练营",
"recommendation_reason": "该课程专注于提升应变能力包含大量实战案例分析和模拟演练针对您当前的薄弱环节应变能力76分设计。通过学习可提升15分左右。",
"priority": "high",
"match_score": 95
},
{
"course_id": 3,
"course_name": "安全规范与操作标准",
"recommendation_reason": "系统讲解安全规范和操作标准通过案例教学帮助建立安全意识。当前您的安全意识得分为79分通过本课程学习预计可提升12分。",
"priority": "high",
"match_score": 88
},
{
"course_id": 7,
"course_name": "高级销售技巧",
"recommendation_reason": "进阶课程帮助您将已有的沟通优势92分转化为更高级的销售技能进一步巩固客户服务能力90分",
"priority": "medium",
"match_score": 82
}
]
}
}
## 输出要求(严格执行)
1. 直接输出纯净的 JSON不要包含 Markdown 标记(如 ```json
2. 不要包含任何解释性文字
3. 能力维度必须包含:专业知识、沟通技巧、操作技能、客户服务、安全意识、应变能力
4. 课程推荐必须来自提供的课程列表,使用真实的 course_id
5. 推荐课程数量1-5个优先推荐能补齐短板的课程
6. priority 取值high得分<80的薄弱项、medium得分80-85、low锦上添花
## 评分标准
- 90-100优秀
- 80-89良好
- 70-79一般
- 60-69需改进
- <60亟需提升"""
# ==================== 用户提示词模板 ====================
USER_PROMPT = """对话记录:{dialogue_history}
---
用户的信息和岗位:{user_info}
---
所有可选课程:{courses}"""
# ==================== JSON Schema ====================
ABILITY_ANALYSIS_SCHEMA = {
"type": "object",
"required": ["analysis"],
"properties": {
"analysis": {
"type": "object",
"required": ["total_score", "ability_dimensions", "course_recommendations"],
"properties": {
"total_score": {
"type": "number",
"description": "总体评分0-100",
"minimum": 0,
"maximum": 100
},
"ability_dimensions": {
"type": "array",
"description": "能力维度评分列表",
"items": {
"type": "object",
"required": ["name", "score", "feedback"],
"properties": {
"name": {
"type": "string",
"description": "能力维度名称"
},
"score": {
"type": "number",
"description": "该维度得分0-100",
"minimum": 0,
"maximum": 100
},
"feedback": {
"type": "string",
"description": "该维度的反馈和建议"
}
}
},
"minItems": 1
},
"course_recommendations": {
"type": "array",
"description": "课程推荐列表",
"items": {
"type": "object",
"required": ["course_id", "course_name", "recommendation_reason", "priority", "match_score"],
"properties": {
"course_id": {
"type": "integer",
"description": "课程ID"
},
"course_name": {
"type": "string",
"description": "课程名称"
},
"recommendation_reason": {
"type": "string",
"description": "推荐理由"
},
"priority": {
"type": "string",
"description": "推荐优先级",
"enum": ["high", "medium", "low"]
},
"match_score": {
"type": "number",
"description": "匹配度得分0-100",
"minimum": 0,
"maximum": 100
}
}
}
}
}
}
}
}
# ==================== 能力维度常量 ====================
ABILITY_DIMENSIONS = [
"专业知识",
"沟通技巧",
"操作技能",
"客户服务",
"安全意识",
"应变能力",
]
PRIORITY_LEVELS = ["high", "medium", "low"]

View File

@@ -0,0 +1,48 @@
"""
答案判断器提示词模板
功能:判断填空题与问答题是否回答正确
"""
# ==================== 元数据 ====================
PROMPT_META = {
"name": "answer_judge",
"display_name": "答案判断器",
"description": "判断填空题与问答题的答案是否正确",
"module": "kaopeilian",
"variables": ["question", "correct_answer", "user_answer", "analysis"],
"version": "1.0.0",
"author": "kaopeilian-team",
}
# ==================== 系统提示词 ====================
SYSTEM_PROMPT = """你是一个答案判断器,根据用户提交的答案,比对题目、答案、解析。给出正确或错误的判断。
注意:仅输出"正确""错误",无需更多字符和说明。"""
# ==================== 用户提示词模板 ====================
USER_PROMPT = """题目:{question}
正确答案:{correct_answer}
解析:{analysis}
考生的回答:{user_answer}"""
# ==================== 判断结果常量 ====================
CORRECT_KEYWORDS = ["正确", "correct", "true", "yes", "", ""]
INCORRECT_KEYWORDS = ["错误", "incorrect", "false", "no", "wrong", "不正确", ""]

View File

@@ -0,0 +1,74 @@
"""
课程对话提示词模板
功能:基于课程知识点进行智能问答
"""
# ==================== 元数据 ====================
PROMPT_META = {
"name": "course_chat",
"display_name": "与课程对话",
"description": "基于课程知识点内容,为用户提供智能问答服务",
"module": "kaopeilian",
"variables": ["knowledge_base", "query"],
"version": "2.0.0",
"author": "kaopeilian-team",
}
# ==================== 系统提示词 ====================
SYSTEM_PROMPT = """你是知识拆解专家,精通以下知识库(课程)内容。请根据用户的问题,从知识库中找到最相关的信息,进行深入分析后,用简洁清晰的语言回答用户。为用户提供与课程对话的服务。
回答要求:
1. 直接针对问题核心,避免冗长铺垫
2. 使用通俗易懂的语言,必要时举例说明
3. 突出关键要点,帮助用户快速理解
4. 如果知识库中没有相关内容,请如实告知
知识库:
{knowledge_base}"""
# ==================== 用户提示词模板 ====================
USER_PROMPT = """{query}"""
# ==================== 知识库格式模板 ====================
KNOWLEDGE_ITEM_TEMPLATE = """{name}
{description}
"""
# ==================== 配置常量 ====================
# 会话历史窗口大小(保留最近 N 轮对话)
CONVERSATION_WINDOW_SIZE = 10
# 会话 TTL- 30 分钟
CONVERSATION_TTL = 1800
# 最大知识点数量
MAX_KNOWLEDGE_POINTS = 50
# 知识库最大字符数
MAX_KNOWLEDGE_BASE_LENGTH = 50000
# 默认模型
DEFAULT_CHAT_MODEL = "gemini-3-flash-preview"
# 温度参数(对话场景使用较高温度)
DEFAULT_TEMPERATURE = 0.7

View File

@@ -0,0 +1,300 @@
"""
试题生成器提示词模板
功能:根据岗位和知识点动态生成考试题目
"""
# ==================== 元数据 ====================
PROMPT_META = {
"name": "exam_generator",
"display_name": "试题生成器",
"description": "根据课程知识点和岗位特征,动态生成考试题目(单选、多选、判断、填空、问答)",
"module": "kaopeilian",
"variables": [
"total_count",
"single_choice_count",
"multiple_choice_count",
"true_false_count",
"fill_blank_count",
"essay_count",
"difficulty_level",
"position_info",
"knowledge_points",
],
"version": "2.0.0",
"author": "kaopeilian-team",
}
# ==================== 系统提示词(第一轮出题) ====================
SYSTEM_PROMPT = """## 角色
你是一位经验丰富的考试出题专家,能够依据用户提供的知识内容,结合用户的岗位特征,随机地生成{total_count}题考题。你会以专业、严谨且清晰的方式出题。
## 输出{single_choice_count}道单选题
1、每道题目只能有 1 个正确答案。
2、干扰项要具有合理性和迷惑性且所有选项必须与主题相关。
3、答案解析要简明扼要说明选择理由。
4、为每道题记录出题来源的知识点 id。
5、请以 JSON 格式输出。
6、为每道题输出一个序号。
### 输出结构:
{{
"num": "题号",
"type": "single_choice",
"topic": {{
"title": "清晰完整的题目描述",
"options": {{
"opt1": "A符合语境的选项",
"opt2": "B符合语境的选项",
"opt3": "C符合语境的选项",
"opt4": "D符合语境的选项"
}}
}},
"knowledge_point_id": "出题来源知识点的id",
"correct": "其中一个选项的全部原文",
"analysis": "准确的答案解析,包含选择原因和知识点说明"
}}
- 严格按照以上格式输出
## 输出{multiple_choice_count}道多选题
1、每道题目有多个正确答案。
2、"type": "multiple_choice"
3、其它事项同单选题。
## 输出{true_false_count}道判断题
1、每道题目只有 "正确""错误" 两种答案。
2、题目表述应明确清晰避免歧义。
3、题目应直接陈述事实或观点便于做出是非判断。
4、其它事项同单选题。
### 输出结构:
{{
"num": "题号",
"type": "true_false",
"topic": {{
"title": "清晰完整的题目描述"
}},
"knowledge_point_id": " 出题来源知识点的id",
"correct": "正确",
"analysis": "准确的答案解析,包含判断原因和知识点说明"
}}
- 严格按照以上格式输出
## 输出{fill_blank_count}道填空题
1. 题干应明确完整,空缺处需用横线"___"标示,且只能有一处空缺
2. 答案应唯一且明确,避免开放性表述
3. 空缺长度应与答案长度大致匹配
4. 解析需说明答案依据及相关知识点
5. 其余要求与单选题一致
### 输出结构:
{{
"num": "题号",
"type": "fill_blank",
"topic": {{
"title": "包含___空缺的题目描述"
}},
"knowledge_point_id": "出题来源知识点的id",
"correct": "准确的填空答案",
"analysis": "解析答案的依据和相关知识点说明"
}}
- 严格按照以上格式输出
### 输出{essay_count}道问答题
1. 问题应具体明确,限定回答范围
2. 答案需条理清晰,突出核心要点
3. 解析可补充扩展说明或评分要点
4. 避免过于宽泛或需要主观发挥的问题
5. 其余要求同单选题
### 输出结构:
{{
"num": "题号",
"type": "essay",
"topic": {{
"title": "需要详细回答的问题描述"
}},
"knowledge_point_id": "出题来源知识点的id",
"correct": "完整准确的参考答案(分点或连贯表述)",
"analysis": "对答案的补充说明、评分要点或相关知识点扩展"
}}
## 特殊要求
1. 题目难度:{difficulty_level}5 级为最难)
2. 避免使用模棱两可的表述
3. 选项内容要互斥,不能有重叠
4. 每个选项长度尽量均衡
5. 正确答案A、B、C、D分布要合理避免规律性
6. 正确答案必须使用其中一个选项中的全部原文,严禁修改
7. knowledge_point_id 必须是唯一的,即每道题的知识点来源只允许填一个 id。
## 输出格式要求
请直接输出一个纯净的 JSON 数组Array不要包含 Markdown 标记(如 ```json也不要包含任何解释性文字。
请按以上要求生成题目,确保每道题目质量。"""
# ==================== 用户提示词模板(第一轮出题) ====================
USER_PROMPT = """# 请针对岗位特征、待出题的知识点内容进行出题。
## 岗位信息:
{position_info}
---
## 知识点:
{knowledge_points}"""
# ==================== 错题重出系统提示词 ====================
MISTAKE_REGEN_SYSTEM_PROMPT = """## 角色
你是一位经验丰富的考试出题专家,能够依据用户提供的错题记录,重新为用户出题。你会为每道错题重新出一题,你会以专业、严谨且清晰的方式出题。
## 输出单选题
1、每道题目只能有 1 个正确答案。
2、干扰项要具有合理性和迷惑性且所有选项必须与主题相关。
3、答案解析要简明扼要说明选择理由。
4、为每道题记录出题来源的知识点 id。
5、请以 JSON 格式输出。
6、为每道题输出一个序号。
### 输出结构:
{{
"num": "题号",
"type": "single_choice",
"topic": {{
"title": "清晰完整的题目描述",
"options": {{
"opt1": "A符合语境的选项",
"opt2": "B符合语境的选项",
"opt3": "C符合语境的选项",
"opt4": "D符合语境的选项"
}}
}},
"knowledge_point_id": "出题来源知识点的id",
"correct": "其中一个选项的全部原文",
"analysis": "准确的答案解析,包含选择原因和知识点说明"
}}
- 严格按照以上格式输出
## 特殊要求
1. 题目难度:{difficulty_level}5 级为最难)
2. 避免使用模棱两可的表述
3. 选项内容要互斥,不能有重叠
4. 每个选项长度尽量均衡
5. 正确答案A、B、C、D分布要合理避免规律性
6. 正确答案必须使用其中一个选项中的全部原文,严禁修改
7. knowledge_point_id 必须是唯一的,即每道题的知识点来源只允许填一个 id。
## 输出格式要求
请直接输出一个纯净的 JSON 数组Array不要包含 Markdown 标记(如 ```json也不要包含任何解释性文字。
请按以上要求生成题目,确保每道题目质量。"""
# ==================== 错题重出用户提示词 ====================
MISTAKE_REGEN_USER_PROMPT = """## 错题记录:
{mistake_records}"""
# ==================== JSON Schema ====================
QUESTION_SCHEMA = {
"type": "array",
"items": {
"type": "object",
"required": ["num", "type", "topic", "correct"],
"properties": {
"num": {
"oneOf": [
{"type": "integer"},
{"type": "string"}
],
"description": "题号"
},
"type": {
"type": "string",
"enum": ["single_choice", "multiple_choice", "true_false", "fill_blank", "essay"],
"description": "题目类型"
},
"topic": {
"type": "object",
"required": ["title"],
"properties": {
"title": {
"type": "string",
"description": "题目标题"
},
"options": {
"type": "object",
"description": "选项(选择题必填)"
}
}
},
"knowledge_point_id": {
"oneOf": [
{"type": "integer"},
{"type": "string"},
{"type": "null"}
],
"description": "知识点ID"
},
"correct": {
"type": "string",
"description": "正确答案"
},
"analysis": {
"type": "string",
"description": "答案解析"
}
}
},
"minItems": 1,
"maxItems": 50
}
# ==================== 题目类型常量 ====================
QUESTION_TYPES = {
"single_choice": "单选题",
"multiple_choice": "多选题",
"true_false": "判断题",
"fill_blank": "填空题",
"essay": "问答题",
}
# 默认题目数量配置
DEFAULT_QUESTION_COUNTS = {
"single_choice_count": 4,
"multiple_choice_count": 2,
"true_false_count": 1,
"fill_blank_count": 2,
"essay_count": 1,
}
DEFAULT_DIFFICULTY_LEVEL = 3
MAX_DIFFICULTY_LEVEL = 5

View File

@@ -0,0 +1,148 @@
"""
知识点分析提示词模板
功能:从课程资料中提取知识点
"""
# ==================== 元数据 ====================
PROMPT_META = {
"name": "knowledge_analysis",
"display_name": "知识点分析",
"description": "从课程资料中提取和分析知识点支持PDF/Word/文本等格式",
"module": "kaopeilian",
"variables": ["course_name", "content"],
"version": "2.0.0",
"author": "kaopeilian-team",
}
# ==================== 系统提示词 ====================
SYSTEM_PROMPT = """# 角色
你是一个文件拆解高手,擅长将用户提交的内容进行精准拆分,拆分后的内容做个简单的优化处理使其更具可读性,但要尽量使用原文的原词原句。
## 技能
### 技能 1: 内容拆分
1. 当用户提交内容后,拆分为多段。
2. 对拆分后的内容做简单优化,使其更具可读性,比如去掉奇怪符号(如换行符、乱码),若语句不通顺,或格式原因导致错位,则重新表达。用户可能会提交录音转文字的内容,因此可能是有错字的,注意修复这些小瑕疵。
3. 优化过程中,尽量使用原文的原词原句,特别是话术类,必须保持原有的句式、保持原词原句,而不是重构。
4. 注意是拆分而不是重写,不需要润色,尽量不做任何处理。
5. 输出到 content。
### 技能 2: 为每一个选段概括一个标题
1. 为每个拆分出来的选段概括一个标题,并输出到 title。
### 技能 3: 为每一个选段说明与主题的关联
1. 详细说明这一段与全文核心主题的关联,并输出到 topic_relation。
### 技能 4: 为每一个选段打上一个类型标签
1. 用户提交的内容很有可能是一个课程、一篇讲义、一个产品的说明书,通常是用户希望他公司的员工或高管学习的知识。
2. 用户通常是医疗美容机构或轻医美、生活美容连锁品牌。
3. 你要为每个选段打上一个知识类型的标签,最好是这几个类型中的一个:"理论知识", "诊断设计", "操作步骤", "沟通话术", "案例分析", "注意事项", "技巧方法", "客诉处理"。当然你也可以为这个选段匹配一个更适合的。
## 输出要求(严格按要求输出)
请直接输出一个纯净的 JSON 数组Array不要包含 Markdown 标记(如 ```json也不要包含任何解释性文字。格式如下
[
{
"title": "知识点标题",
"content": "知识点内容",
"topic_relation": "知识点与主题的关系",
"type": "知识点类型"
},
{
"title": "第二个知识点标题",
"content": "第二个知识点内容...",
"topic_relation": "...",
"type": "..."
}
]
## 限制
- 仅围绕用户提交的内容进行拆分和关联标注,不涉及其他无关内容。
- 拆分后的内容必须最大程度保持与原文一致。
- 关联说明需清晰合理。
- 不论如何,不要拆分超过 20 段!"""
# ==================== 用户提示词模板 ====================
USER_PROMPT = """课程主题:{course_name}
## 用户提交的内容:
{content}
## 注意
- 以json的格式输出
- 不论如何不要拆分超过20 段!"""
# ==================== JSON Schema ====================
KNOWLEDGE_POINT_SCHEMA = {
"type": "array",
"items": {
"type": "object",
"required": ["title", "content", "type"],
"properties": {
"title": {
"type": "string",
"description": "知识点标题",
"maxLength": 200
},
"content": {
"type": "string",
"description": "知识点内容"
},
"topic_relation": {
"type": "string",
"description": "与主题的关系描述"
},
"type": {
"type": "string",
"description": "知识点类型",
"enum": [
"理论知识",
"诊断设计",
"操作步骤",
"沟通话术",
"案例分析",
"注意事项",
"技巧方法",
"客诉处理",
"其他"
]
}
}
},
"minItems": 1,
"maxItems": 20
}
# ==================== 知识点类型常量 ====================
KNOWLEDGE_POINT_TYPES = [
"理论知识",
"诊断设计",
"操作步骤",
"沟通话术",
"案例分析",
"注意事项",
"技巧方法",
"客诉处理",
]
DEFAULT_KNOWLEDGE_TYPE = "理论知识"

View File

@@ -0,0 +1,193 @@
"""
陪练分析报告提示词模板
功能:分析陪练对话,生成综合评分和改进建议
"""
# ==================== 元数据 ====================
PROMPT_META = {
"name": "practice_analysis",
"display_name": "陪练分析报告",
"description": "分析陪练对话,生成综合评分、能力维度评估、对话标注和改进建议",
"module": "kaopeilian",
"variables": ["dialogue_history"],
"version": "1.0.0",
"author": "kaopeilian-team",
}
# ==================== 系统提示词 ====================
SYSTEM_PROMPT = """你是话术分析专家,用户是一家轻医美连锁品牌的员工,用户提交的是用户自己与顾客的对话记录,你做分析与评分。并严格按照以下格式输出。
输出标准:
{
"analysis": {
"total_score": 88,
"score_breakdown": [
{"name": "开场技巧", "score": 92, "description": "开场自然,快速建立信任"},
{"name": "需求挖掘", "score": 90, "description": "能够有效识别客户需求"},
{"name": "产品介绍", "score": 88, "description": "产品介绍清晰,重点突出"},
{"name": "异议处理", "score": 85, "description": "处理客户异议还需加强"},
{"name": "成交技巧", "score": 86, "description": "成交话术运用良好"}
],
"ability_dimensions": [
{"name": "沟通表达", "score": 90, "feedback": "语言流畅,表达清晰,语调富有亲和力"},
{"name": "倾听理解", "score": 92, "feedback": "能够准确理解客户意图,给予恰当回应"},
{"name": "情绪控制", "score": 88, "feedback": "整体情绪稳定,面对异议时保持专业"},
{"name": "专业知识", "score": 93, "feedback": "对医美项目知识掌握扎实"},
{"name": "销售技巧", "score": 87, "feedback": "销售流程把控良好"},
{"name": "应变能力", "score": 85, "feedback": "面对突发问题能够快速反应"}
],
"dialogue_annotations": [
{"sequence": 1, "tags": ["亮点话术"], "comment": "开场专业,身份介绍清晰"},
{"sequence": 3, "tags": ["金牌话术"], "comment": "巧妙引导,从客户角度出发"},
{"sequence": 5, "tags": ["亮点话术"], "comment": "类比生动,让客户容易理解"},
{"sequence": 7, "tags": ["金牌话术"], "comment": "专业解答,打消客户疑虑"}
],
"suggestions": [
{"title": "控制语速", "content": "您的语速偏快,建议适当放慢,给客户更多思考时间", "example": "说完产品优势后停顿2-3秒观察客户反应"},
{"title": "多用开放式问题", "content": "增加开放式问题的使用,更深入了解客户需求", "example": "您对未来的保障有什么期望?而不是您需要保险吗?"},
{"title": "强化成交信号识别", "content": "客户已经表现出兴趣时,要及时推进成交", "example": "当客户问费用多少时,这是购买信号,应该立即报价并促成"}
]
}
}
## 输出要求(严格执行)
1. 直接输出纯净的 JSON不要包含 Markdown 标记(如 ```json
2. 不要包含任何解释性文字
3. score_breakdown 必须包含 5 项:开场技巧、需求挖掘、产品介绍、异议处理、成交技巧
4. ability_dimensions 必须包含 6 项:沟通表达、倾听理解、情绪控制、专业知识、销售技巧、应变能力
5. dialogue_annotations 标注有亮点或问题的对话轮次tags 可选:亮点话术、金牌话术、待改进、问题话术
6. suggestions 提供 2-4 条具体可操作的改进建议,每条包含 title、content、example
## 评分标准
- 90-100优秀
- 80-89良好
- 70-79一般
- 60-69需改进
- <60亟需提升"""
# ==================== 用户提示词模板 ====================
USER_PROMPT = """{dialogue_history}"""
# ==================== JSON Schema ====================
PRACTICE_ANALYSIS_SCHEMA = {
"type": "object",
"required": ["analysis"],
"properties": {
"analysis": {
"type": "object",
"required": ["total_score", "score_breakdown", "ability_dimensions", "dialogue_annotations", "suggestions"],
"properties": {
"total_score": {
"type": "number",
"description": "总体评分0-100",
"minimum": 0,
"maximum": 100
},
"score_breakdown": {
"type": "array",
"description": "分数细分5项",
"items": {
"type": "object",
"required": ["name", "score", "description"],
"properties": {
"name": {"type": "string", "description": "维度名称"},
"score": {"type": "number", "description": "得分0-100"},
"description": {"type": "string", "description": "评价描述"}
}
},
"minItems": 5
},
"ability_dimensions": {
"type": "array",
"description": "能力维度评分6项",
"items": {
"type": "object",
"required": ["name", "score", "feedback"],
"properties": {
"name": {"type": "string", "description": "能力维度名称"},
"score": {"type": "number", "description": "得分0-100"},
"feedback": {"type": "string", "description": "反馈评语"}
}
},
"minItems": 6
},
"dialogue_annotations": {
"type": "array",
"description": "对话标注",
"items": {
"type": "object",
"required": ["sequence", "tags", "comment"],
"properties": {
"sequence": {"type": "integer", "description": "对话轮次序号"},
"tags": {
"type": "array",
"description": "标签列表",
"items": {"type": "string"}
},
"comment": {"type": "string", "description": "点评内容"}
}
}
},
"suggestions": {
"type": "array",
"description": "改进建议",
"items": {
"type": "object",
"required": ["title", "content", "example"],
"properties": {
"title": {"type": "string", "description": "建议标题"},
"content": {"type": "string", "description": "建议内容"},
"example": {"type": "string", "description": "示例"}
}
},
"minItems": 2,
"maxItems": 5
}
}
}
}
}
# ==================== 常量定义 ====================
SCORE_BREAKDOWN_ITEMS = [
"开场技巧",
"需求挖掘",
"产品介绍",
"异议处理",
"成交技巧",
]
ABILITY_DIMENSIONS = [
"沟通表达",
"倾听理解",
"情绪控制",
"专业知识",
"销售技巧",
"应变能力",
]
ANNOTATION_TAGS = [
"亮点话术",
"金牌话术",
"待改进",
"问题话术",
]

View File

@@ -0,0 +1,192 @@
"""
陪练场景生成提示词模板
功能:根据课程知识点生成陪练场景配置
"""
# ==================== 元数据 ====================
PROMPT_META = {
"name": "practice_scene_generation",
"display_name": "陪练场景生成",
"description": "根据课程知识点生成 AI 陪练场景配置包含场景名称、背景、AI 角色、练习目标等",
"module": "kaopeilian",
"variables": ["knowledge_points"],
"version": "1.0.0",
"author": "kaopeilian-team",
}
# ==================== 系统提示词 ====================
SYSTEM_PROMPT = """你是一个训练场景研究专家,能将用户提交的知识点,转变为一个模拟陪练的场景,并严格按照以下格式输出。
输出标准:
{
"scene": {
"name": "轻医美产品咨询陪练",
"description": "模拟客户咨询轻医美产品的场景",
"background": "客户对脸部抗衰项目感兴趣。",
"ai_role": "AI扮演一位30岁女性客户",
"objectives": ["了解客户需求", "介绍产品优势", "处理价格异议"],
"keywords": ["抗衰", "玻尿酸", "价格"],
"type": "product-intro",
"difficulty": "intermediate"
}
}
## 字段说明
- **name**: 场景名称,简洁明了,体现陪练主题
- **description**: 场景描述,说明这是什么样的模拟场景
- **background**: 场景背景设定,描述客户的情况和需求
- **ai_role**: AI 角色描述,说明 AI 扮演什么角色(通常是客户)
- **objectives**: 练习目标数组,列出学员需要达成的目标
- **keywords**: 关键词数组,从知识点中提取的核心关键词
- **type**: 场景类型,可选值:
- phone: 电话销售
- face: 面对面销售
- complaint: 客户投诉
- after-sales: 售后服务
- product-intro: 产品介绍
- **difficulty**: 难度等级,可选值:
- beginner: 入门
- junior: 初级
- intermediate: 中级
- senior: 高级
- expert: 专家
## 输出要求
1. 直接输出纯净的 JSON 对象,不要包含 Markdown 标记(如 ```json
2. 不要包含任何解释性文字
3. 根据知识点内容合理设计场景,确保场景与知识点紧密相关
4. objectives 至少包含 2-3 个具体可操作的目标
5. keywords 提取 3-5 个核心关键词
6. 根据知识点的复杂程度选择合适的 difficulty
7. 根据知识点的应用场景选择合适的 type"""
# ==================== 用户提示词模板 ====================
USER_PROMPT = """请根据以下知识点内容,生成一个模拟陪练场景:
## 知识点内容
{knowledge_points}
## 要求
- 以 JSON 格式输出
- 场景要贴合知识点的实际应用场景
- AI 角色要符合轻医美行业的客户特征
- 练习目标要具体、可评估"""
# ==================== JSON Schema ====================
PRACTICE_SCENE_SCHEMA = {
"type": "object",
"required": ["scene"],
"properties": {
"scene": {
"type": "object",
"required": ["name", "description", "background", "ai_role", "objectives", "keywords", "type", "difficulty"],
"properties": {
"name": {
"type": "string",
"description": "场景名称",
"maxLength": 100
},
"description": {
"type": "string",
"description": "场景描述",
"maxLength": 500
},
"background": {
"type": "string",
"description": "场景背景设定",
"maxLength": 500
},
"ai_role": {
"type": "string",
"description": "AI 角色描述",
"maxLength": 200
},
"objectives": {
"type": "array",
"description": "练习目标",
"items": {
"type": "string"
},
"minItems": 2,
"maxItems": 5
},
"keywords": {
"type": "array",
"description": "关键词",
"items": {
"type": "string"
},
"minItems": 2,
"maxItems": 8
},
"type": {
"type": "string",
"description": "场景类型",
"enum": [
"phone",
"face",
"complaint",
"after-sales",
"product-intro"
]
},
"difficulty": {
"type": "string",
"description": "难度等级",
"enum": [
"beginner",
"junior",
"intermediate",
"senior",
"expert"
]
}
}
}
}
}
# ==================== 场景类型常量 ====================
SCENE_TYPES = {
"phone": "电话销售",
"face": "面对面销售",
"complaint": "客户投诉",
"after-sales": "售后服务",
"product-intro": "产品介绍",
}
DIFFICULTY_LEVELS = {
"beginner": "入门",
"junior": "初级",
"intermediate": "中级",
"senior": "高级",
"expert": "专家",
}
# 默认值
DEFAULT_SCENE_TYPE = "product-intro"
DEFAULT_DIFFICULTY = "intermediate"