Files
012-kaopeilian/backend/app/services/ai/exam_generator_service.py
111 442ac78b56
Some checks failed
continuous-integration/drone/push Build is failing
sync: 同步服务器最新代码 (2026-01-27)
更新内容:
- 后端 AI 服务优化(能力分析、知识点解析等)
- 前端考试和陪练界面更新
- 修复多个 prompt 和 JSON 解析问题
- 更新 Coze 语音客户端
2026-01-27 10:03:28 +08:00

518 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
试题生成服务 V2 - Python 原生实现
功能:
- 根据岗位和知识点动态生成考试题目
- 支持错题重出模式
- 调用 AI 生成并解析 JSON 结果
提供稳定可靠的试题生成能力。
"""
import json
import logging
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.exceptions import ExternalServiceError
from .ai_service import AIService, AIResponse
from .llm_json_parser import parse_with_fallback, clean_llm_output
from .prompts.exam_generator_prompts import (
SYSTEM_PROMPT,
USER_PROMPT,
MISTAKE_REGEN_SYSTEM_PROMPT,
MISTAKE_REGEN_USER_PROMPT,
QUESTION_SCHEMA,
DEFAULT_QUESTION_COUNTS,
DEFAULT_DIFFICULTY_LEVEL,
)
logger = logging.getLogger(__name__)
@dataclass
class ExamGeneratorConfig:
"""考试生成配置"""
course_id: int
position_id: int
single_choice_count: int = DEFAULT_QUESTION_COUNTS["single_choice_count"]
multiple_choice_count: int = DEFAULT_QUESTION_COUNTS["multiple_choice_count"]
true_false_count: int = DEFAULT_QUESTION_COUNTS["true_false_count"]
fill_blank_count: int = DEFAULT_QUESTION_COUNTS["fill_blank_count"]
essay_count: int = DEFAULT_QUESTION_COUNTS["essay_count"]
difficulty_level: int = DEFAULT_DIFFICULTY_LEVEL
mistake_records: str = ""
@property
def total_count(self) -> int:
"""计算总题量"""
return (
self.single_choice_count +
self.multiple_choice_count +
self.true_false_count +
self.fill_blank_count +
self.essay_count
)
@property
def has_mistakes(self) -> bool:
"""是否有错题记录"""
return bool(self.mistake_records and self.mistake_records.strip())
class ExamGeneratorService:
"""
试题生成服务 V2
使用 Python 原生实现。
使用示例:
```python
service = ExamGeneratorService()
result = await service.generate_exam(
db=db_session,
config=ExamGeneratorConfig(
course_id=1,
position_id=1,
single_choice_count=5,
multiple_choice_count=3,
difficulty_level=3
)
)
```
"""
def __init__(self):
"""初始化服务"""
self.ai_service = AIService(module_code="exam_generator")
async def generate_exam(
self,
db: AsyncSession,
config: ExamGeneratorConfig
) -> Dict[str, Any]:
"""
生成考试题目(主入口)
Args:
db: 数据库会话
config: 考试生成配置
Returns:
生成结果,包含 success、questions、total_count 等字段
"""
try:
logger.info(
f"开始生成试题 - course_id: {config.course_id}, position_id: {config.position_id}, "
f"total_count: {config.total_count}, has_mistakes: {config.has_mistakes}"
)
# 根据是否有错题记录,走不同分支
if config.has_mistakes:
return await self._regenerate_from_mistakes(db, config)
else:
return await self._generate_from_knowledge(db, config)
except ExternalServiceError:
raise
except Exception as e:
logger.error(
f"试题生成失败 - course_id: {config.course_id}, error: {e}",
exc_info=True
)
raise ExternalServiceError(f"试题生成失败: {e}")
async def _generate_from_knowledge(
self,
db: AsyncSession,
config: ExamGeneratorConfig
) -> Dict[str, Any]:
"""
基于知识点生成题目(无错题模式)
流程:
1. 查询岗位信息
2. 随机查询知识点
3. 调用 AI 生成题目
4. 解析并返回结果
"""
# 1. 查询岗位信息
position_info = await self._query_position(db, config.position_id)
if not position_info:
raise ExternalServiceError(f"岗位不存在: position_id={config.position_id}")
logger.info(f"岗位信息: {position_info.get('name', 'unknown')}")
# 2. 随机查询知识点
knowledge_points = await self._query_knowledge_points(
db,
config.course_id,
config.total_count
)
if not knowledge_points:
raise ExternalServiceError(
f"课程没有可用的知识点: course_id={config.course_id}"
)
logger.info(f"查询到 {len(knowledge_points)} 个知识点")
# 3. 构建提示词
system_prompt = SYSTEM_PROMPT.format(
total_count=config.total_count,
single_choice_count=config.single_choice_count,
multiple_choice_count=config.multiple_choice_count,
true_false_count=config.true_false_count,
fill_blank_count=config.fill_blank_count,
essay_count=config.essay_count,
difficulty_level=config.difficulty_level,
)
user_prompt = USER_PROMPT.format(
position_info=self._format_position_info(position_info),
knowledge_points=self._format_knowledge_points(knowledge_points),
)
# 4. 调用 AI 生成
ai_response = await self._call_ai_generate(system_prompt, user_prompt)
logger.info(
f"AI 生成完成 - provider: {ai_response.provider}, "
f"tokens: {ai_response.total_tokens}, latency: {ai_response.latency_ms}ms"
)
# 5. 解析题目
questions = self._parse_questions(ai_response.content)
logger.info(f"试题解析成功,数量: {len(questions)}")
return {
"success": True,
"questions": questions,
"total_count": len(questions),
"mode": "knowledge_based",
"ai_provider": ai_response.provider,
"ai_model": ai_response.model,
"ai_tokens": ai_response.total_tokens,
"ai_latency_ms": ai_response.latency_ms,
}
async def _regenerate_from_mistakes(
self,
db: AsyncSession,
config: ExamGeneratorConfig
) -> Dict[str, Any]:
"""
错题重出模式
流程:
1. 构建错题重出提示词
2. 调用 AI 生成新题
3. 解析并返回结果
"""
logger.info("进入错题重出模式")
# 1. 构建提示词
system_prompt = MISTAKE_REGEN_SYSTEM_PROMPT.format(
difficulty_level=config.difficulty_level,
)
user_prompt = MISTAKE_REGEN_USER_PROMPT.format(
mistake_records=config.mistake_records,
)
# 2. 调用 AI 生成
ai_response = await self._call_ai_generate(system_prompt, user_prompt)
logger.info(
f"错题重出完成 - provider: {ai_response.provider}, "
f"tokens: {ai_response.total_tokens}, latency: {ai_response.latency_ms}ms"
)
# 3. 解析题目
questions = self._parse_questions(ai_response.content)
logger.info(f"错题重出解析成功,数量: {len(questions)}")
return {
"success": True,
"questions": questions,
"total_count": len(questions),
"mode": "mistake_regen",
"ai_provider": ai_response.provider,
"ai_model": ai_response.model,
"ai_tokens": ai_response.total_tokens,
"ai_latency_ms": ai_response.latency_ms,
}
async def _query_position(
self,
db: AsyncSession,
position_id: int
) -> Optional[Dict[str, Any]]:
"""
查询岗位信息
SQLSELECT id, name, description, skills, level FROM positions
WHERE id = :id AND is_deleted = FALSE
"""
try:
result = await db.execute(
text("""
SELECT id, name, description, skills, level
FROM positions
WHERE id = :position_id AND is_deleted = FALSE
"""),
{"position_id": position_id}
)
row = result.fetchone()
if not row:
return None
# 将 Row 转换为字典
return {
"id": row[0],
"name": row[1],
"description": row[2],
"skills": row[3], # JSON 字段
"level": row[4],
}
except Exception as e:
logger.error(f"查询岗位信息失败: {e}")
raise ExternalServiceError(f"查询岗位信息失败: {e}")
async def _query_knowledge_points(
self,
db: AsyncSession,
course_id: int,
limit: int
) -> List[Dict[str, Any]]:
"""
随机查询知识点
SQLSELECT kp.id, kp.name, kp.description, kp.topic_relation
FROM knowledge_points kp
INNER JOIN course_materials cm ON kp.material_id = cm.id
WHERE kp.course_id = :course_id
AND kp.is_deleted = FALSE
AND cm.is_deleted = FALSE
ORDER BY RAND()
LIMIT :limit
"""
try:
result = await db.execute(
text("""
SELECT kp.id, kp.name, kp.description, kp.topic_relation
FROM knowledge_points kp
INNER JOIN course_materials cm ON kp.material_id = cm.id
WHERE kp.course_id = :course_id
AND kp.is_deleted = FALSE
AND cm.is_deleted = FALSE
ORDER BY RAND()
LIMIT :limit
"""),
{"course_id": course_id, "limit": limit}
)
rows = result.fetchall()
return [
{
"id": row[0],
"name": row[1],
"description": row[2],
"topic_relation": row[3],
}
for row in rows
]
except Exception as e:
logger.error(f"查询知识点失败: {e}")
raise ExternalServiceError(f"查询知识点失败: {e}")
async def _call_ai_generate(
self,
system_prompt: str,
user_prompt: str
) -> AIResponse:
"""调用 AI 生成题目"""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
response = await self.ai_service.chat(
messages=messages,
temperature=0.7, # 适当的创造性
prompt_name="exam_generator"
)
return response
def _parse_questions(self, ai_output: str) -> List[Dict[str, Any]]:
"""
解析 AI 输出的题目 JSON
使用 LLM JSON Parser 进行多层兜底解析
"""
# 先清洗输出
cleaned_output, rules = clean_llm_output(ai_output)
if rules:
logger.debug(f"AI 输出已清洗: {rules}")
# 使用带 Schema 校验的解析
questions = parse_with_fallback(
cleaned_output,
schema=QUESTION_SCHEMA,
default=[],
validate_schema=True,
on_error="default"
)
# 后处理:确保每个题目有必要字段
processed_questions = []
for i, q in enumerate(questions):
if isinstance(q, dict):
# 确保有 num 字段
if "num" not in q:
q["num"] = i + 1
# 确保 num 是整数
try:
q["num"] = int(q["num"])
except (ValueError, TypeError):
q["num"] = i + 1
# 确保有 type 字段
if "type" not in q:
# 根据是否有 options 推断类型
if q.get("topic", {}).get("options"):
q["type"] = "single_choice"
else:
q["type"] = "essay"
# 确保 knowledge_point_id 是整数或 None
kp_id = q.get("knowledge_point_id")
if kp_id is not None:
try:
q["knowledge_point_id"] = int(kp_id)
except (ValueError, TypeError):
q["knowledge_point_id"] = None
# 验证必要字段
if q.get("topic") and q.get("correct"):
processed_questions.append(q)
else:
logger.warning(f"题目缺少必要字段,已跳过: {q}")
if not processed_questions:
logger.warning("未能解析出有效的题目")
return processed_questions
def _format_position_info(self, position: Dict[str, Any]) -> str:
"""格式化岗位信息为文本"""
lines = [
f"岗位名称: {position.get('name', '未知')}",
f"岗位等级: {position.get('level', '未设置')}",
]
if position.get('description'):
lines.append(f"岗位描述: {position['description']}")
skills = position.get('skills')
if skills:
# skills 可能是 JSON 字符串或列表
if isinstance(skills, str):
try:
skills = json.loads(skills)
except json.JSONDecodeError:
skills = [skills]
if isinstance(skills, list) and skills:
lines.append(f"核心技能: {', '.join(str(s) for s in skills)}")
return '\n'.join(lines)
def _format_knowledge_points(self, knowledge_points: List[Dict[str, Any]]) -> str:
"""格式化知识点列表为文本"""
lines = []
for kp in knowledge_points:
kp_text = f"【知识点 ID: {kp['id']}{kp['name']}"
if kp.get('description'):
kp_text += f"\n{kp['description']}"
if kp.get('topic_relation'):
kp_text += f"\n关系描述: {kp['topic_relation']}"
lines.append(kp_text)
return '\n\n'.join(lines)
# 创建全局实例
exam_generator_service = ExamGeneratorService()
# ==================== 便捷函数 ====================
async def generate_exam(
db: AsyncSession,
course_id: int,
position_id: int,
single_choice_count: int = 4,
multiple_choice_count: int = 2,
true_false_count: int = 1,
fill_blank_count: int = 2,
essay_count: int = 1,
difficulty_level: int = 3,
mistake_records: str = ""
) -> Dict[str, Any]:
"""
便捷函数:生成考试题目
Args:
db: 数据库会话
course_id: 课程ID
position_id: 岗位ID
single_choice_count: 单选题数量
multiple_choice_count: 多选题数量
true_false_count: 判断题数量
fill_blank_count: 填空题数量
essay_count: 问答题数量
difficulty_level: 难度等级(1-5)
mistake_records: 错题记录JSON字符串
Returns:
生成结果
"""
config = ExamGeneratorConfig(
course_id=course_id,
position_id=position_id,
single_choice_count=single_choice_count,
multiple_choice_count=multiple_choice_count,
true_false_count=true_false_count,
fill_blank_count=fill_blank_count,
essay_count=essay_count,
difficulty_level=difficulty_level,
mistake_records=mistake_records,
)
return await exam_generator_service.generate_exam(db, config)