Initial commit: 智能项目定价模型
This commit is contained in:
318
scripts/monitor.sh
Executable file
318
scripts/monitor.sh
Executable file
@@ -0,0 +1,318 @@
|
||||
#!/bin/bash
|
||||
# 智能项目定价模型 - 监控检查脚本
|
||||
# 遵循瑞小美部署规范
|
||||
|
||||
set -e
|
||||
|
||||
# 配置
|
||||
ALERT_EMAIL="${ALERT_EMAIL:-admin@example.com}"
|
||||
WEBHOOK_URL="${WEBHOOK_URL:-}" # 企业微信/钉钉 webhook
|
||||
|
||||
# 颜色输出
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m'
|
||||
|
||||
log_info() {
|
||||
echo -e "${GREEN}[OK]${NC} $1"
|
||||
}
|
||||
|
||||
log_warn() {
|
||||
echo -e "${YELLOW}[WARN]${NC} $1"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
# 发送告警
|
||||
send_alert() {
|
||||
local title="$1"
|
||||
local message="$2"
|
||||
local level="${3:-warning}" # warning, error
|
||||
|
||||
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
||||
local full_message="[$timestamp] [智能项目定价模型] $title: $message"
|
||||
|
||||
# 控制台输出
|
||||
if [ "$level" = "error" ]; then
|
||||
log_error "$full_message"
|
||||
else
|
||||
log_warn "$full_message"
|
||||
fi
|
||||
|
||||
# 发送企业微信/钉钉通知
|
||||
if [ -n "$WEBHOOK_URL" ]; then
|
||||
curl -s -X POST "$WEBHOOK_URL" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"msgtype\":\"text\",\"text\":{\"content\":\"$full_message\"}}" \
|
||||
> /dev/null 2>&1 || true
|
||||
fi
|
||||
|
||||
# 发送邮件(如果配置了 sendmail)
|
||||
if [ -n "$ALERT_EMAIL" ] && command -v sendmail &> /dev/null; then
|
||||
echo -e "Subject: [告警] 智能项目定价模型 - $title\n\n$full_message" | \
|
||||
sendmail "$ALERT_EMAIL" 2>/dev/null || true
|
||||
fi
|
||||
}
|
||||
|
||||
# 检查 Docker 容器状态
|
||||
check_containers() {
|
||||
echo "检查容器状态..."
|
||||
echo ""
|
||||
|
||||
local containers=("pricing-frontend" "pricing-backend" "pricing-mysql")
|
||||
local all_healthy=true
|
||||
|
||||
for container in "${containers[@]}"; do
|
||||
local status=$(docker inspect --format='{{.State.Status}}' "$container" 2>/dev/null || echo "not_found")
|
||||
local health=$(docker inspect --format='{{.State.Health.Status}}' "$container" 2>/dev/null || echo "unknown")
|
||||
|
||||
if [ "$status" = "running" ]; then
|
||||
if [ "$health" = "healthy" ] || [ "$health" = "unknown" ]; then
|
||||
log_info "$container: $status (health: $health)"
|
||||
else
|
||||
log_warn "$container: $status (health: $health)"
|
||||
all_healthy=false
|
||||
fi
|
||||
else
|
||||
log_error "$container: $status"
|
||||
send_alert "容器异常" "$container 状态异常: $status" "error"
|
||||
all_healthy=false
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
return $([ "$all_healthy" = true ] && echo 0 || echo 1)
|
||||
}
|
||||
|
||||
# 检查服务健康
|
||||
check_health() {
|
||||
echo "检查服务健康..."
|
||||
echo ""
|
||||
|
||||
# 后端健康检查
|
||||
local backend_health=$(curl -sf http://localhost:8000/health 2>/dev/null)
|
||||
if [ $? -eq 0 ]; then
|
||||
log_info "后端服务: 健康"
|
||||
echo " 响应: $backend_health"
|
||||
else
|
||||
log_error "后端服务: 不可访问"
|
||||
send_alert "服务异常" "后端服务健康检查失败" "error"
|
||||
fi
|
||||
|
||||
# 前端健康检查(通过 Nginx)
|
||||
local frontend_health=$(curl -sf http://localhost/health 2>/dev/null)
|
||||
if [ $? -eq 0 ]; then
|
||||
log_info "前端服务: 健康"
|
||||
else
|
||||
log_warn "前端服务: 不可访问(可能需要通过 Nginx 代理)"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
# 检查数据库连接
|
||||
check_database() {
|
||||
echo "检查数据库..."
|
||||
echo ""
|
||||
|
||||
local db_status=$(docker exec pricing-mysql mysqladmin ping -h localhost 2>&1 || echo "failed")
|
||||
|
||||
if [[ "$db_status" == *"alive"* ]]; then
|
||||
log_info "MySQL: 连接正常"
|
||||
|
||||
# 检查表数量
|
||||
local table_count=$(docker exec pricing-mysql mysql -N -e "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema='pricing_model'" 2>/dev/null || echo "0")
|
||||
echo " 数据库表数量: $table_count"
|
||||
else
|
||||
log_error "MySQL: 连接失败"
|
||||
send_alert "数据库异常" "MySQL 连接失败" "error"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
# 检查磁盘空间
|
||||
check_disk() {
|
||||
echo "检查磁盘空间..."
|
||||
echo ""
|
||||
|
||||
# 检查根目录
|
||||
local disk_usage=$(df -h / | awk 'NR==2 {print $5}' | tr -d '%')
|
||||
|
||||
if [ "$disk_usage" -lt 80 ]; then
|
||||
log_info "磁盘使用率: ${disk_usage}%"
|
||||
elif [ "$disk_usage" -lt 90 ]; then
|
||||
log_warn "磁盘使用率: ${disk_usage}% (警告)"
|
||||
send_alert "磁盘空间不足" "磁盘使用率达到 ${disk_usage}%" "warning"
|
||||
else
|
||||
log_error "磁盘使用率: ${disk_usage}% (危险)"
|
||||
send_alert "磁盘空间严重不足" "磁盘使用率达到 ${disk_usage}%" "error"
|
||||
fi
|
||||
|
||||
# 检查 Docker 卷
|
||||
local docker_disk=$(docker system df --format '{{.Size}}' 2>/dev/null | head -1)
|
||||
echo " Docker 磁盘占用: $docker_disk"
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
# 检查内存
|
||||
check_memory() {
|
||||
echo "检查内存使用..."
|
||||
echo ""
|
||||
|
||||
local mem_usage=$(free | awk 'NR==2 {printf "%.0f", $3*100/$2}')
|
||||
|
||||
if [ "$mem_usage" -lt 80 ]; then
|
||||
log_info "内存使用率: ${mem_usage}%"
|
||||
elif [ "$mem_usage" -lt 90 ]; then
|
||||
log_warn "内存使用率: ${mem_usage}% (警告)"
|
||||
send_alert "内存不足" "内存使用率达到 ${mem_usage}%" "warning"
|
||||
else
|
||||
log_error "内存使用率: ${mem_usage}% (危险)"
|
||||
send_alert "内存严重不足" "内存使用率达到 ${mem_usage}%" "error"
|
||||
fi
|
||||
|
||||
# 容器内存使用
|
||||
echo " 容器内存使用:"
|
||||
docker stats --no-stream --format " {{.Name}}: {{.MemUsage}}" 2>/dev/null | grep pricing || true
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
# 检查日志错误
|
||||
check_logs() {
|
||||
echo "检查最近错误日志..."
|
||||
echo ""
|
||||
|
||||
# 检查后端错误日志(最近 100 行)
|
||||
local error_count=$(docker logs pricing-backend --tail 100 2>&1 | grep -c -i "error" || echo "0")
|
||||
|
||||
if [ "$error_count" -eq 0 ]; then
|
||||
log_info "后端日志: 无错误"
|
||||
elif [ "$error_count" -lt 10 ]; then
|
||||
log_warn "后端日志: 发现 $error_count 个错误"
|
||||
else
|
||||
log_error "后端日志: 发现 $error_count 个错误"
|
||||
send_alert "日志错误过多" "后端日志发现 $error_count 个错误" "warning"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
# 检查 API 响应时间
|
||||
check_api_performance() {
|
||||
echo "检查 API 性能..."
|
||||
echo ""
|
||||
|
||||
local start_time=$(date +%s%N)
|
||||
local response=$(curl -sf -o /dev/null -w '%{http_code}' http://localhost:8000/health 2>/dev/null || echo "000")
|
||||
local end_time=$(date +%s%N)
|
||||
|
||||
local latency=$(( (end_time - start_time) / 1000000 ))
|
||||
|
||||
if [ "$response" = "200" ]; then
|
||||
if [ "$latency" -lt 500 ]; then
|
||||
log_info "健康检查 API: ${latency}ms"
|
||||
elif [ "$latency" -lt 2000 ]; then
|
||||
log_warn "健康检查 API: ${latency}ms (较慢)"
|
||||
else
|
||||
log_error "健康检查 API: ${latency}ms (过慢)"
|
||||
send_alert "API 响应过慢" "健康检查 API 响应时间 ${latency}ms" "warning"
|
||||
fi
|
||||
else
|
||||
log_error "健康检查 API: 请求失败 (HTTP $response)"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
# 生成报告
|
||||
generate_report() {
|
||||
echo "=========================================="
|
||||
echo " 智能项目定价模型 - 监控报告"
|
||||
echo " $(date '+%Y-%m-%d %H:%M:%S')"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
check_containers
|
||||
check_health
|
||||
check_database
|
||||
check_disk
|
||||
check_memory
|
||||
check_logs
|
||||
check_api_performance
|
||||
|
||||
echo "=========================================="
|
||||
echo " 检查完成"
|
||||
echo "=========================================="
|
||||
}
|
||||
|
||||
# 主函数
|
||||
main() {
|
||||
local action="${1:-report}"
|
||||
|
||||
cd "$(dirname "$0")/.."
|
||||
|
||||
case $action in
|
||||
report)
|
||||
generate_report
|
||||
;;
|
||||
|
||||
containers)
|
||||
check_containers
|
||||
;;
|
||||
|
||||
health)
|
||||
check_health
|
||||
;;
|
||||
|
||||
database)
|
||||
check_database
|
||||
;;
|
||||
|
||||
disk)
|
||||
check_disk
|
||||
;;
|
||||
|
||||
memory)
|
||||
check_memory
|
||||
;;
|
||||
|
||||
logs)
|
||||
check_logs
|
||||
;;
|
||||
|
||||
quick)
|
||||
# 快速检查(适合 cron)
|
||||
check_containers || exit 1
|
||||
check_database || exit 1
|
||||
check_disk || exit 1
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "智能项目定价模型 - 监控检查脚本"
|
||||
echo ""
|
||||
echo "用法: $0 {report|containers|health|database|disk|memory|logs|quick}"
|
||||
echo ""
|
||||
echo "命令:"
|
||||
echo " report 完整监控报告"
|
||||
echo " containers 检查容器状态"
|
||||
echo " health 检查服务健康"
|
||||
echo " database 检查数据库"
|
||||
echo " disk 检查磁盘空间"
|
||||
echo " memory 检查内存使用"
|
||||
echo " logs 检查错误日志"
|
||||
echo " quick 快速检查(适合 cron)"
|
||||
echo ""
|
||||
echo "环境变量:"
|
||||
echo " ALERT_EMAIL 告警邮箱"
|
||||
echo " WEBHOOK_URL 企业微信/钉钉 webhook"
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user