""" 答案判断服务 - Python 原生实现 功能: - 判断填空题与问答题的答案是否正确 - 通过 AI 语义理解比对用户答案与标准答案 提供稳定可靠的答案判断能力。 """ import logging from dataclasses import dataclass from typing import Any, Optional from .ai_service import AIService, AIResponse from .prompts.answer_judge_prompts import ( SYSTEM_PROMPT, USER_PROMPT, CORRECT_KEYWORDS, INCORRECT_KEYWORDS, ) logger = logging.getLogger(__name__) @dataclass class JudgeResult: """判断结果""" is_correct: bool raw_response: str ai_provider: str = "" ai_model: str = "" ai_tokens: int = 0 ai_latency_ms: int = 0 class AnswerJudgeService: """ 答案判断服务 使用 Python 原生实现。 使用示例: ```python service = AnswerJudgeService() result = await service.judge( db=db_session, # 传入 db_session 用于记录调用日志 question="玻尿酸的主要作用是什么?", correct_answer="补水保湿、填充塑形", user_answer="保湿和塑形", analysis="玻尿酸具有补水保湿和填充塑形两大功能" ) print(result.is_correct) # True ``` """ MODULE_CODE = "answer_judge" async def judge( self, question: str, correct_answer: str, user_answer: str, analysis: str = "", db: Any = None # 数据库会话,用于记录 AI 调用日志 ) -> JudgeResult: """ 判断答案是否正确 Args: question: 题目内容 correct_answer: 标准答案 user_answer: 用户答案 analysis: 答案解析(可选) db: 数据库会话,用于记录调用日志(符合 AI 接入规范) Returns: JudgeResult 判断结果 """ try: logger.info( f"开始判断答案 - question: {question[:50]}..., " f"user_answer: {user_answer[:50]}..." ) # 创建 AIService 实例(传入 db_session 用于记录调用日志) ai_service = AIService(module_code=self.MODULE_CODE, db_session=db) # 构建提示词 user_prompt = USER_PROMPT.format( question=question, correct_answer=correct_answer, user_answer=user_answer, analysis=analysis or "无" ) # 调用 AI messages = [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": user_prompt} ] ai_response = await ai_service.chat( messages=messages, model="gemini-3-flash-preview", # 使用快速模型,提升响应速度 temperature=0.1, # 低温度,确保输出稳定 prompt_name="answer_judge" ) logger.info( f"AI 判断完成 - provider: {ai_response.provider}, " f"response: {ai_response.content}, " f"latency: {ai_response.latency_ms}ms" ) # 解析 AI 输出 is_correct = self._parse_judge_result(ai_response.content) logger.info(f"答案判断结果: {is_correct}") return JudgeResult( is_correct=is_correct, raw_response=ai_response.content, ai_provider=ai_response.provider, ai_model=ai_response.model, ai_tokens=ai_response.total_tokens, ai_latency_ms=ai_response.latency_ms, ) except Exception as e: logger.error(f"答案判断失败: {e}", exc_info=True) # 出错时默认返回错误,保守处理 return JudgeResult( is_correct=False, raw_response=f"判断失败: {e}", ) def _parse_judge_result(self, ai_output: str) -> bool: """ 解析 AI 输出的判断结果 Args: ai_output: AI 返回的文本 Returns: bool: True 表示正确,False 表示错误 """ # 清洗输出 output = ai_output.strip().lower() # 检查是否包含正确关键词 for keyword in CORRECT_KEYWORDS: if keyword.lower() in output: return True # 检查是否包含错误关键词 for keyword in INCORRECT_KEYWORDS: if keyword.lower() in output: return False # 无法识别时,默认返回错误(保守处理) logger.warning(f"无法解析判断结果,默认返回错误: {ai_output}") return False # ==================== 全局实例 ==================== answer_judge_service = AnswerJudgeService() # ==================== 便捷函数 ==================== async def judge_answer( question: str, correct_answer: str, user_answer: str, analysis: str = "" ) -> bool: """ 便捷函数:判断答案是否正确 Args: question: 题目内容 correct_answer: 标准答案 user_answer: 用户答案 analysis: 答案解析 Returns: bool: True 表示正确,False 表示错误 """ result = await answer_judge_service.judge( question=question, correct_answer=correct_answer, user_answer=user_answer, analysis=analysis ) return result.is_correct