"""AI统计上报客户端""" import os import json import asyncio import logging import threading from datetime import datetime from decimal import Decimal from typing import Optional, List from dataclasses import dataclass, asdict from pathlib import Path try: import httpx HTTPX_AVAILABLE = True except ImportError: HTTPX_AVAILABLE = False from .trace import get_trace_id, get_tenant_id, get_user_id logger = logging.getLogger(__name__) @dataclass class AICallEvent: """AI调用事件""" tenant_id: int app_code: str module_code: str prompt_name: str model: str input_tokens: int = 0 output_tokens: int = 0 cost: Decimal = Decimal("0") latency_ms: int = 0 status: str = "success" user_id: Optional[int] = None trace_id: Optional[str] = None event_time: datetime = None def __post_init__(self): if self.event_time is None: self.event_time = datetime.now() if self.trace_id is None: self.trace_id = get_trace_id() if self.user_id is None: self.user_id = get_user_id() def to_dict(self) -> dict: """转换为可序列化的字典""" return { "tenant_id": self.tenant_id, "app_code": self.app_code, "module_code": self.module_code, "prompt_name": self.prompt_name, "model": self.model, "input_tokens": self.input_tokens, "output_tokens": self.output_tokens, "cost": str(self.cost), "latency_ms": self.latency_ms, "status": self.status, "user_id": self.user_id, "trace_id": self.trace_id, "event_time": self.event_time.isoformat() if self.event_time else None } class StatsClient: """统计上报客户端 使用示例: stats = StatsClient(tenant_id=1, app_code="011-ai-interview") # 上报AI调用 stats.report_ai_call( module_code="interview", prompt_name="generate_question", model="gpt-4", input_tokens=100, output_tokens=200, latency_ms=1500 ) """ # 失败事件持久化文件 FAILED_EVENTS_FILE = ".platform_failed_events.json" def __init__( self, tenant_id: int, app_code: str, platform_url: Optional[str] = None, api_key: Optional[str] = None, local_only: bool = False, max_retries: int = 3, retry_delay: float = 1.0, timeout: float = 10.0 ): self.tenant_id = tenant_id self.app_code = app_code self.platform_url = platform_url or os.getenv("PLATFORM_URL", "") self.api_key = api_key or os.getenv("PLATFORM_API_KEY", "") self.local_only = local_only or not self.platform_url or not HTTPX_AVAILABLE self.max_retries = max_retries self.retry_delay = retry_delay self.timeout = timeout # 批量上报缓冲区 self._buffer: List[AICallEvent] = [] self._buffer_size = 10 # 达到此数量时自动上报 self._lock = threading.Lock() # 在启动时尝试发送之前失败的事件 if not self.local_only: self._retry_failed_events() def report_ai_call( self, module_code: str, prompt_name: str, model: str, input_tokens: int = 0, output_tokens: int = 0, cost: Decimal = Decimal("0"), latency_ms: int = 0, status: str = "success", user_id: Optional[int] = None, flush: bool = False ) -> AICallEvent: """上报AI调用事件 Args: module_code: 模块编码 prompt_name: Prompt名称 model: 模型名称 input_tokens: 输入token数 output_tokens: 输出token数 cost: 成本 latency_ms: 延迟毫秒 status: 状态 (success/error) user_id: 用户ID(可选,默认从上下文获取) flush: 是否立即发送 Returns: 创建的事件对象 """ event = AICallEvent( tenant_id=self.tenant_id, app_code=self.app_code, module_code=module_code, prompt_name=prompt_name, model=model, input_tokens=input_tokens, output_tokens=output_tokens, cost=cost, latency_ms=latency_ms, status=status, user_id=user_id ) with self._lock: self._buffer.append(event) should_flush = flush or len(self._buffer) >= self._buffer_size if should_flush: self.flush() return event def flush(self): """发送缓冲区中的所有事件""" with self._lock: if not self._buffer: return events = self._buffer.copy() self._buffer.clear() if self.local_only: # 本地模式:仅打印 for event in events: logger.info(f"[STATS] {event.app_code}/{event.module_code}: " f"{event.prompt_name} - {event.input_tokens}+{event.output_tokens} tokens") else: # 远程上报 self._send_to_platform(events) def _send_to_platform(self, events: List[AICallEvent]): """发送事件到平台""" if not HTTPX_AVAILABLE: logger.warning("httpx not installed, falling back to local mode") return # 转换事件为可序列化格式 payload = {"events": [e.to_dict() for e in events]} # 尝试在事件循环中运行 try: loop = asyncio.get_running_loop() # 已在异步上下文中,创建任务 asyncio.create_task(self._send_async(payload, events)) except RuntimeError: # 没有运行中的事件循环,使用同步方式 self._send_sync(payload, events) def _send_sync(self, payload: dict, events: List[AICallEvent]): """同步发送事件""" url = f"{self.platform_url.rstrip('/')}/api/stats/report/batch" headers = {"X-API-Key": self.api_key, "Content-Type": "application/json"} for attempt in range(self.max_retries): try: with httpx.Client(timeout=self.timeout) as client: response = client.post(url, json=payload, headers=headers) if response.status_code == 200: result = response.json() logger.debug(f"Stats reported successfully: {result.get('count', len(events))} events") return else: logger.warning(f"Stats report failed with status {response.status_code}: {response.text}") except httpx.TimeoutException: logger.warning(f"Stats report timeout (attempt {attempt + 1}/{self.max_retries})") except httpx.RequestError as e: logger.warning(f"Stats report request error (attempt {attempt + 1}/{self.max_retries}): {e}") except Exception as e: logger.error(f"Stats report unexpected error: {e}") break # 重试延迟 if attempt < self.max_retries - 1: import time time.sleep(self.retry_delay * (attempt + 1)) # 所有重试都失败,持久化到文件 self._persist_failed_events(events) async def _send_async(self, payload: dict, events: List[AICallEvent]): """异步发送事件""" url = f"{self.platform_url.rstrip('/')}/api/stats/report/batch" headers = {"X-API-Key": self.api_key, "Content-Type": "application/json"} for attempt in range(self.max_retries): try: async with httpx.AsyncClient(timeout=self.timeout) as client: response = await client.post(url, json=payload, headers=headers) if response.status_code == 200: result = response.json() logger.debug(f"Stats reported successfully: {result.get('count', len(events))} events") return else: logger.warning(f"Stats report failed with status {response.status_code}: {response.text}") except httpx.TimeoutException: logger.warning(f"Stats report timeout (attempt {attempt + 1}/{self.max_retries})") except httpx.RequestError as e: logger.warning(f"Stats report request error (attempt {attempt + 1}/{self.max_retries}): {e}") except Exception as e: logger.error(f"Stats report unexpected error: {e}") break # 重试延迟 if attempt < self.max_retries - 1: await asyncio.sleep(self.retry_delay * (attempt + 1)) # 所有重试都失败,持久化到文件 self._persist_failed_events(events) def _persist_failed_events(self, events: List[AICallEvent]): """持久化失败的事件到文件""" try: failed_file = Path(self.FAILED_EVENTS_FILE) existing = [] if failed_file.exists(): try: existing = json.loads(failed_file.read_text()) except (json.JSONDecodeError, IOError): existing = [] # 添加新的失败事件 for event in events: existing.append(event.to_dict()) # 限制最多保存1000条 if len(existing) > 1000: existing = existing[-1000:] failed_file.write_text(json.dumps(existing, ensure_ascii=False, indent=2)) logger.info(f"Persisted {len(events)} failed events to {self.FAILED_EVENTS_FILE}") except Exception as e: logger.error(f"Failed to persist events: {e}") def _retry_failed_events(self): """重试之前失败的事件""" try: failed_file = Path(self.FAILED_EVENTS_FILE) if not failed_file.exists(): return events_data = json.loads(failed_file.read_text()) if not events_data: return logger.info(f"Retrying {len(events_data)} previously failed events") # 尝试发送 payload = {"events": events_data} url = f"{self.platform_url.rstrip('/')}/api/stats/report/batch" headers = {"X-API-Key": self.api_key, "Content-Type": "application/json"} try: with httpx.Client(timeout=self.timeout) as client: response = client.post(url, json=payload, headers=headers) if response.status_code == 200: # 成功后删除文件 failed_file.unlink() logger.info(f"Successfully sent {len(events_data)} previously failed events") except Exception as e: logger.warning(f"Failed to retry events: {e}") except Exception as e: logger.error(f"Error loading failed events: {e}") def __del__(self): """析构时发送剩余事件""" try: if self._buffer: self.flush() except Exception: pass # 忽略析构时的错误