#!/bin/bash # 智能项目定价模型 - 监控检查脚本 # 遵循瑞小美部署规范 set -e # 配置 ALERT_EMAIL="${ALERT_EMAIL:-admin@example.com}" WEBHOOK_URL="${WEBHOOK_URL:-}" # 企业微信/钉钉 webhook # 颜色输出 RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' log_info() { echo -e "${GREEN}[OK]${NC} $1" } log_warn() { echo -e "${YELLOW}[WARN]${NC} $1" } log_error() { echo -e "${RED}[ERROR]${NC} $1" } # 发送告警 send_alert() { local title="$1" local message="$2" local level="${3:-warning}" # warning, error local timestamp=$(date '+%Y-%m-%d %H:%M:%S') local full_message="[$timestamp] [智能项目定价模型] $title: $message" # 控制台输出 if [ "$level" = "error" ]; then log_error "$full_message" else log_warn "$full_message" fi # 发送企业微信/钉钉通知 if [ -n "$WEBHOOK_URL" ]; then curl -s -X POST "$WEBHOOK_URL" \ -H "Content-Type: application/json" \ -d "{\"msgtype\":\"text\",\"text\":{\"content\":\"$full_message\"}}" \ > /dev/null 2>&1 || true fi # 发送邮件(如果配置了 sendmail) if [ -n "$ALERT_EMAIL" ] && command -v sendmail &> /dev/null; then echo -e "Subject: [告警] 智能项目定价模型 - $title\n\n$full_message" | \ sendmail "$ALERT_EMAIL" 2>/dev/null || true fi } # 检查 Docker 容器状态 check_containers() { echo "检查容器状态..." echo "" local containers=("pricing-frontend" "pricing-backend" "pricing-mysql") local all_healthy=true for container in "${containers[@]}"; do local status=$(docker inspect --format='{{.State.Status}}' "$container" 2>/dev/null || echo "not_found") local health=$(docker inspect --format='{{.State.Health.Status}}' "$container" 2>/dev/null || echo "unknown") if [ "$status" = "running" ]; then if [ "$health" = "healthy" ] || [ "$health" = "unknown" ]; then log_info "$container: $status (health: $health)" else log_warn "$container: $status (health: $health)" all_healthy=false fi else log_error "$container: $status" send_alert "容器异常" "$container 状态异常: $status" "error" all_healthy=false fi done echo "" return $([ "$all_healthy" = true ] && echo 0 || echo 1) } # 检查服务健康 check_health() { echo "检查服务健康..." echo "" # 后端健康检查 local backend_health=$(curl -sf http://localhost:8000/health 2>/dev/null) if [ $? -eq 0 ]; then log_info "后端服务: 健康" echo " 响应: $backend_health" else log_error "后端服务: 不可访问" send_alert "服务异常" "后端服务健康检查失败" "error" fi # 前端健康检查(通过 Nginx) local frontend_health=$(curl -sf http://localhost/health 2>/dev/null) if [ $? -eq 0 ]; then log_info "前端服务: 健康" else log_warn "前端服务: 不可访问(可能需要通过 Nginx 代理)" fi echo "" } # 检查数据库连接 check_database() { echo "检查数据库..." echo "" local db_status=$(docker exec pricing-mysql mysqladmin ping -h localhost 2>&1 || echo "failed") if [[ "$db_status" == *"alive"* ]]; then log_info "MySQL: 连接正常" # 检查表数量 local table_count=$(docker exec pricing-mysql mysql -N -e "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema='pricing_model'" 2>/dev/null || echo "0") echo " 数据库表数量: $table_count" else log_error "MySQL: 连接失败" send_alert "数据库异常" "MySQL 连接失败" "error" fi echo "" } # 检查磁盘空间 check_disk() { echo "检查磁盘空间..." echo "" # 检查根目录 local disk_usage=$(df -h / | awk 'NR==2 {print $5}' | tr -d '%') if [ "$disk_usage" -lt 80 ]; then log_info "磁盘使用率: ${disk_usage}%" elif [ "$disk_usage" -lt 90 ]; then log_warn "磁盘使用率: ${disk_usage}% (警告)" send_alert "磁盘空间不足" "磁盘使用率达到 ${disk_usage}%" "warning" else log_error "磁盘使用率: ${disk_usage}% (危险)" send_alert "磁盘空间严重不足" "磁盘使用率达到 ${disk_usage}%" "error" fi # 检查 Docker 卷 local docker_disk=$(docker system df --format '{{.Size}}' 2>/dev/null | head -1) echo " Docker 磁盘占用: $docker_disk" echo "" } # 检查内存 check_memory() { echo "检查内存使用..." echo "" local mem_usage=$(free | awk 'NR==2 {printf "%.0f", $3*100/$2}') if [ "$mem_usage" -lt 80 ]; then log_info "内存使用率: ${mem_usage}%" elif [ "$mem_usage" -lt 90 ]; then log_warn "内存使用率: ${mem_usage}% (警告)" send_alert "内存不足" "内存使用率达到 ${mem_usage}%" "warning" else log_error "内存使用率: ${mem_usage}% (危险)" send_alert "内存严重不足" "内存使用率达到 ${mem_usage}%" "error" fi # 容器内存使用 echo " 容器内存使用:" docker stats --no-stream --format " {{.Name}}: {{.MemUsage}}" 2>/dev/null | grep pricing || true echo "" } # 检查日志错误 check_logs() { echo "检查最近错误日志..." echo "" # 检查后端错误日志(最近 100 行) local error_count=$(docker logs pricing-backend --tail 100 2>&1 | grep -c -i "error" || echo "0") if [ "$error_count" -eq 0 ]; then log_info "后端日志: 无错误" elif [ "$error_count" -lt 10 ]; then log_warn "后端日志: 发现 $error_count 个错误" else log_error "后端日志: 发现 $error_count 个错误" send_alert "日志错误过多" "后端日志发现 $error_count 个错误" "warning" fi echo "" } # 检查 API 响应时间 check_api_performance() { echo "检查 API 性能..." echo "" local start_time=$(date +%s%N) local response=$(curl -sf -o /dev/null -w '%{http_code}' http://localhost:8000/health 2>/dev/null || echo "000") local end_time=$(date +%s%N) local latency=$(( (end_time - start_time) / 1000000 )) if [ "$response" = "200" ]; then if [ "$latency" -lt 500 ]; then log_info "健康检查 API: ${latency}ms" elif [ "$latency" -lt 2000 ]; then log_warn "健康检查 API: ${latency}ms (较慢)" else log_error "健康检查 API: ${latency}ms (过慢)" send_alert "API 响应过慢" "健康检查 API 响应时间 ${latency}ms" "warning" fi else log_error "健康检查 API: 请求失败 (HTTP $response)" fi echo "" } # 生成报告 generate_report() { echo "==========================================" echo " 智能项目定价模型 - 监控报告" echo " $(date '+%Y-%m-%d %H:%M:%S')" echo "==========================================" echo "" check_containers check_health check_database check_disk check_memory check_logs check_api_performance echo "==========================================" echo " 检查完成" echo "==========================================" } # 主函数 main() { local action="${1:-report}" cd "$(dirname "$0")/.." case $action in report) generate_report ;; containers) check_containers ;; health) check_health ;; database) check_database ;; disk) check_disk ;; memory) check_memory ;; logs) check_logs ;; quick) # 快速检查(适合 cron) check_containers || exit 1 check_database || exit 1 check_disk || exit 1 ;; *) echo "智能项目定价模型 - 监控检查脚本" echo "" echo "用法: $0 {report|containers|health|database|disk|memory|logs|quick}" echo "" echo "命令:" echo " report 完整监控报告" echo " containers 检查容器状态" echo " health 检查服务健康" echo " database 检查数据库" echo " disk 检查磁盘空间" echo " memory 检查内存使用" echo " logs 检查错误日志" echo " quick 快速检查(适合 cron)" echo "" echo "环境变量:" echo " ALERT_EMAIL 告警邮箱" echo " WEBHOOK_URL 企业微信/钉钉 webhook" ;; esac } main "$@"