Files
smart-project-pricing/scripts/monitor.sh
2026-01-31 21:33:06 +08:00

319 lines
9.0 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
# 智能项目定价模型 - 监控检查脚本
# 遵循瑞小美部署规范
set -e
# 配置
ALERT_EMAIL="${ALERT_EMAIL:-admin@example.com}"
WEBHOOK_URL="${WEBHOOK_URL:-}" # 企业微信/钉钉 webhook
# 颜色输出
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
log_info() {
echo -e "${GREEN}[OK]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# 发送告警
send_alert() {
local title="$1"
local message="$2"
local level="${3:-warning}" # warning, error
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
local full_message="[$timestamp] [智能项目定价模型] $title: $message"
# 控制台输出
if [ "$level" = "error" ]; then
log_error "$full_message"
else
log_warn "$full_message"
fi
# 发送企业微信/钉钉通知
if [ -n "$WEBHOOK_URL" ]; then
curl -s -X POST "$WEBHOOK_URL" \
-H "Content-Type: application/json" \
-d "{\"msgtype\":\"text\",\"text\":{\"content\":\"$full_message\"}}" \
> /dev/null 2>&1 || true
fi
# 发送邮件(如果配置了 sendmail
if [ -n "$ALERT_EMAIL" ] && command -v sendmail &> /dev/null; then
echo -e "Subject: [告警] 智能项目定价模型 - $title\n\n$full_message" | \
sendmail "$ALERT_EMAIL" 2>/dev/null || true
fi
}
# 检查 Docker 容器状态
check_containers() {
echo "检查容器状态..."
echo ""
local containers=("pricing-frontend" "pricing-backend" "pricing-mysql")
local all_healthy=true
for container in "${containers[@]}"; do
local status=$(docker inspect --format='{{.State.Status}}' "$container" 2>/dev/null || echo "not_found")
local health=$(docker inspect --format='{{.State.Health.Status}}' "$container" 2>/dev/null || echo "unknown")
if [ "$status" = "running" ]; then
if [ "$health" = "healthy" ] || [ "$health" = "unknown" ]; then
log_info "$container: $status (health: $health)"
else
log_warn "$container: $status (health: $health)"
all_healthy=false
fi
else
log_error "$container: $status"
send_alert "容器异常" "$container 状态异常: $status" "error"
all_healthy=false
fi
done
echo ""
return $([ "$all_healthy" = true ] && echo 0 || echo 1)
}
# 检查服务健康
check_health() {
echo "检查服务健康..."
echo ""
# 后端健康检查
local backend_health=$(curl -sf http://localhost:8000/health 2>/dev/null)
if [ $? -eq 0 ]; then
log_info "后端服务: 健康"
echo " 响应: $backend_health"
else
log_error "后端服务: 不可访问"
send_alert "服务异常" "后端服务健康检查失败" "error"
fi
# 前端健康检查(通过 Nginx
local frontend_health=$(curl -sf http://localhost/health 2>/dev/null)
if [ $? -eq 0 ]; then
log_info "前端服务: 健康"
else
log_warn "前端服务: 不可访问(可能需要通过 Nginx 代理)"
fi
echo ""
}
# 检查数据库连接
check_database() {
echo "检查数据库..."
echo ""
local db_status=$(docker exec pricing-mysql mysqladmin ping -h localhost 2>&1 || echo "failed")
if [[ "$db_status" == *"alive"* ]]; then
log_info "MySQL: 连接正常"
# 检查表数量
local table_count=$(docker exec pricing-mysql mysql -N -e "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema='pricing_model'" 2>/dev/null || echo "0")
echo " 数据库表数量: $table_count"
else
log_error "MySQL: 连接失败"
send_alert "数据库异常" "MySQL 连接失败" "error"
fi
echo ""
}
# 检查磁盘空间
check_disk() {
echo "检查磁盘空间..."
echo ""
# 检查根目录
local disk_usage=$(df -h / | awk 'NR==2 {print $5}' | tr -d '%')
if [ "$disk_usage" -lt 80 ]; then
log_info "磁盘使用率: ${disk_usage}%"
elif [ "$disk_usage" -lt 90 ]; then
log_warn "磁盘使用率: ${disk_usage}% (警告)"
send_alert "磁盘空间不足" "磁盘使用率达到 ${disk_usage}%" "warning"
else
log_error "磁盘使用率: ${disk_usage}% (危险)"
send_alert "磁盘空间严重不足" "磁盘使用率达到 ${disk_usage}%" "error"
fi
# 检查 Docker 卷
local docker_disk=$(docker system df --format '{{.Size}}' 2>/dev/null | head -1)
echo " Docker 磁盘占用: $docker_disk"
echo ""
}
# 检查内存
check_memory() {
echo "检查内存使用..."
echo ""
local mem_usage=$(free | awk 'NR==2 {printf "%.0f", $3*100/$2}')
if [ "$mem_usage" -lt 80 ]; then
log_info "内存使用率: ${mem_usage}%"
elif [ "$mem_usage" -lt 90 ]; then
log_warn "内存使用率: ${mem_usage}% (警告)"
send_alert "内存不足" "内存使用率达到 ${mem_usage}%" "warning"
else
log_error "内存使用率: ${mem_usage}% (危险)"
send_alert "内存严重不足" "内存使用率达到 ${mem_usage}%" "error"
fi
# 容器内存使用
echo " 容器内存使用:"
docker stats --no-stream --format " {{.Name}}: {{.MemUsage}}" 2>/dev/null | grep pricing || true
echo ""
}
# 检查日志错误
check_logs() {
echo "检查最近错误日志..."
echo ""
# 检查后端错误日志(最近 100 行)
local error_count=$(docker logs pricing-backend --tail 100 2>&1 | grep -c -i "error" || echo "0")
if [ "$error_count" -eq 0 ]; then
log_info "后端日志: 无错误"
elif [ "$error_count" -lt 10 ]; then
log_warn "后端日志: 发现 $error_count 个错误"
else
log_error "后端日志: 发现 $error_count 个错误"
send_alert "日志错误过多" "后端日志发现 $error_count 个错误" "warning"
fi
echo ""
}
# 检查 API 响应时间
check_api_performance() {
echo "检查 API 性能..."
echo ""
local start_time=$(date +%s%N)
local response=$(curl -sf -o /dev/null -w '%{http_code}' http://localhost:8000/health 2>/dev/null || echo "000")
local end_time=$(date +%s%N)
local latency=$(( (end_time - start_time) / 1000000 ))
if [ "$response" = "200" ]; then
if [ "$latency" -lt 500 ]; then
log_info "健康检查 API: ${latency}ms"
elif [ "$latency" -lt 2000 ]; then
log_warn "健康检查 API: ${latency}ms (较慢)"
else
log_error "健康检查 API: ${latency}ms (过慢)"
send_alert "API 响应过慢" "健康检查 API 响应时间 ${latency}ms" "warning"
fi
else
log_error "健康检查 API: 请求失败 (HTTP $response)"
fi
echo ""
}
# 生成报告
generate_report() {
echo "=========================================="
echo " 智能项目定价模型 - 监控报告"
echo " $(date '+%Y-%m-%d %H:%M:%S')"
echo "=========================================="
echo ""
check_containers
check_health
check_database
check_disk
check_memory
check_logs
check_api_performance
echo "=========================================="
echo " 检查完成"
echo "=========================================="
}
# 主函数
main() {
local action="${1:-report}"
cd "$(dirname "$0")/.."
case $action in
report)
generate_report
;;
containers)
check_containers
;;
health)
check_health
;;
database)
check_database
;;
disk)
check_disk
;;
memory)
check_memory
;;
logs)
check_logs
;;
quick)
# 快速检查(适合 cron
check_containers || exit 1
check_database || exit 1
check_disk || exit 1
;;
*)
echo "智能项目定价模型 - 监控检查脚本"
echo ""
echo "用法: $0 {report|containers|health|database|disk|memory|logs|quick}"
echo ""
echo "命令:"
echo " report 完整监控报告"
echo " containers 检查容器状态"
echo " health 检查服务健康"
echo " database 检查数据库"
echo " disk 检查磁盘空间"
echo " memory 检查内存使用"
echo " logs 检查错误日志"
echo " quick 快速检查(适合 cron"
echo ""
echo "环境变量:"
echo " ALERT_EMAIL 告警邮箱"
echo " WEBHOOK_URL 企业微信/钉钉 webhook"
;;
esac
}
main "$@"