eis/TestProject/scripts/pid_monitor.sh

298 lines
8.9 KiB
Bash
Raw Normal View History

#!/bin/bash
# 增强版进程资源监控脚本基于PID
# 用法: ./pid_monitor.sh <进程PID> <监控时长(秒)>
# 检查参数是否正确
if [ $# -ne 2 ]; then
echo "错误: 参数不正确!"
echo "用法: $0 <进程PID> <监控时长(秒)>"
exit 1
fi
TARGET_PID=$1
DURATION=$2
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
LOG_PREFIX="pid_monitor_${TARGET_PID}"
DETAIL_LOG="${LOG_PREFIX}_detail_${TIMESTAMP}.log"
SUMMARY_LOG="${LOG_PREFIX}_summary_${TIMESTAMP}.log"
# 函数:检查进程是否存在
check_process_exists() {
local pid=$1
if [ ! -d /proc/$pid ]; then
return 1
fi
return 0
}
# 函数:获取进程基本信息
get_process_info() {
local pid=$1
if [ -f /proc/$pid/status ]; then
local process_name=$(cat /proc/$pid/comm 2>/dev/null || echo "N/A")
local user=$(ps -o user= -p $pid 2>/dev/null || echo "N/A")
local cmdline=$(cat /proc/$pid/cmdline | tr '\0' ' ' | head -c 100 2>/dev/null || echo "N/A")
echo "$process_name" "$user" "$cmdline"
else
echo "N/A" "N/A" "N/A"
fi
}
# 检查目标进程是否存在
if ! check_process_exists $TARGET_PID; then
echo "错误: 进程 PID $TARGET_PID 不存在!"
exit 1
fi
# 获取进程基本信息
read process_name process_user process_cmdline <<< $(get_process_info $TARGET_PID)
echo "开始监控进程..."
echo "进程PID: $TARGET_PID"
echo "进程名称: $process_name"
echo "运行用户: $process_user"
echo "命令行: $process_cmdline"
echo "监控时长: $DURATION"
echo "开始时间: $(date)"
echo "详细日志: $DETAIL_LOG"
echo "汇总报告: $SUMMARY_LOG"
# 初始化日志文件
echo "进程监控报告 - PID: $TARGET_PID" > $DETAIL_LOG
echo "进程名称: $process_name" >> $DETAIL_LOG
echo "运行用户: $process_user" >> $DETAIL_LOG
echo "命令行: $process_cmdline" >> $DETAIL_LOG
echo "开始时间: $(date)" >> $DETAIL_LOG
echo "监控间隔: 1秒" >> $DETAIL_LOG
echo "===============================================================" >> $DETAIL_LOG
printf "%-20s %-8s %-8s %-12s %-12s %-12s\n" "时间戳" "CPU(%)" "内存(%)" "物理内存(MB)" "虚拟内存(MB)" "线程数" >> $DETAIL_LOG
# 初始化统计变量
COUNT=0
CPU_SUM=0
MEM_SUM=0
RSS_SUM=0
VSZ_SUM=0
THREADS_SUM=0
CPU_MAX=0
MEM_MAX=0
RSS_MAX=0
VSZ_MAX=0
THREADS_MAX=0
# 函数获取精确的CPU使用率[8](@ref)
get_cpu_usage() {
local pid=$1
# 方法1: 使用top命令获取实时CPU使用率[1](@ref)
local cpu_usage=$(top -bn1 -p $pid 2>/dev/null | awk -v pid=$pid '$1 == pid {print $9}')
if [ -z "$cpu_usage" ]; then
# 方法2: 通过/proc/stat计算[8](@ref)
if [ -f /proc/$pid/stat ]; then
# 获取进程CPU时间
local utime=$(awk '{print $14}' /proc/$pid/stat)
local stime=$(awk '{print $15}' /proc/$pid/stat)
local total_time=$((utime + stime))
# 获取系统启动时间
local uptime=$(awk '{print $1}' /proc/uptime)
local hz=$(getconf CLK_TCK)
# 计算进程启动时间
local starttime=$(awk '{print $22}' /proc/$pid/stat)
local process_uptime=$(echo "$uptime - $starttime / $hz" | bc -l)
if [ $COUNT -gt 0 ]; then
local prev_total=$PREV_TOTAL
local prev_seconds=$PREV_SECONDS
local current_total=$total_time
local current_seconds=$(echo "$uptime - $process_uptime" | bc -l)
if [ $(echo "$current_seconds > $prev_seconds" | bc) -eq 1 ]; then
cpu_usage=$(echo "scale=2; ($current_total - $prev_total) / ($current_seconds - $prev_seconds) / $hz * 100" | bc -l)
fi
PREV_TOTAL=$current_total
PREV_SECONDS=$current_seconds
else
PREV_TOTAL=$total_time
PREV_SECONDS=$(echo "$uptime - $process_uptime" | bc -l)
fi
fi
fi
echo ${cpu_usage:-0}
}
# 函数:获取内存信息[3,8](@ref)
get_memory_info() {
local pid=$1
local mem_usage=0
local rss_kb=0
local vsz_kb=0
local threads=0
if [ -f /proc/$pid/status ]; then
# 获取内存使用百分比[1](@ref)
mem_usage=$(ps -o %mem= -p $pid 2>/dev/null || echo 0)
# 获取RSS和VSZ[3](@ref)
rss_kb=$(grep VmRSS /proc/$pid/status 2>/dev/null | awk '{print $2}' || echo 0)
vsz_kb=$(grep VmSize /proc/$pid/status 2>/dev/null | awk '{print $2}' || echo 0)
# 获取线程数[7](@ref)
threads=$(ls /proc/$pid/task 2>/dev/null | wc -l || echo 1)
fi
# 转换为MB
local rss_mb=0
local vsz_mb=0
if [ $rss_kb -gt 0 ]; then
rss_mb=$((rss_kb / 1024))
fi
if [ $vsz_kb -gt 0 ]; then
vsz_mb=$((vsz_kb / 1024))
fi
echo "$mem_usage" "$rss_mb" "$vsz_mb" "$threads"
}
# 主监控循环
START_TIME=$(date +%s)
END_TIME=$((START_TIME + DURATION))
echo "开始监控进程 PID: $TARGET_PID ..."
while [ $(date +%s) -lt $END_TIME ]; do
CURRENT_TIME=$(date +%s)
ELAPSED=$((CURRENT_TIME - START_TIME))
REMAINING=$((DURATION - ELAPSED))
if [ $REMAINING -le 0 ]; then
break
fi
# 检查进程是否仍然存在
if ! check_process_exists $TARGET_PID; then
echo "警告: 进程 $TARGET_PID 在监控期间退出!"
break
fi
TIMESTAMP=$(date +"%Y-%m-%d %H:%M:%S")
# 获取CPU使用率
CPU_USAGE=$(get_cpu_usage $TARGET_PID)
# 获取内存信息
read MEM_USAGE RSS_MB VSZ_MB THREADS <<< $(get_memory_info $TARGET_PID)
# 记录到详细日志
printf "%-20s %-8.1f %-8.1f %-12s %-12s %-12s\n" "$TIMESTAMP" $CPU_USAGE $MEM_USAGE "${RSS_MB}MB" "${VSZ_MB}MB" "$THREADS" >> $DETAIL_LOG
# 更新统计信息
CPU_SUM=$(echo "$CPU_SUM + $CPU_USAGE" | bc)
MEM_SUM=$(echo "$MEM_SUM + $MEM_USAGE" | bc)
RSS_SUM=$((RSS_SUM + RSS_MB))
VSZ_SUM=$((VSZ_SUM + VSZ_MB))
THREADS_SUM=$((THREADS_SUM + THREADS))
# 更新最大值
if [ $(echo "$CPU_USAGE > $CPU_MAX" | bc) -eq 1 ]; then
CPU_MAX=$CPU_USAGE
fi
if [ $(echo "$MEM_USAGE > $MEM_MAX" | bc) -eq 1 ]; then
MEM_MAX=$MEM_USAGE
fi
if [ $RSS_MB -gt $RSS_MAX ]; then
RSS_MAX=$RSS_MB
fi
if [ $VSZ_MB -gt $VSZ_MAX ]; then
VSZ_MAX=$VSZ_MB
fi
if [ $THREADS -gt $THREADS_MAX ]; then
THREADS_MAX=$THREADS
fi
COUNT=$((COUNT + 1))
# 进度显示
if [ $((ELAPSED % 10)) -eq 0 ]; then
echo "进度: ${ELAPSED}/${DURATION}秒, CPU: ${CPU_USAGE}%, 内存: ${MEM_USAGE}%, 物理内存: ${RSS_MB}MB"
fi
sleep 1
done
# 计算平均值
if [ $COUNT -gt 0 ]; then
CPU_AVG=$(echo "scale=2; $CPU_SUM / $COUNT" | bc)
MEM_AVG=$(echo "scale=2; $MEM_SUM / $COUNT" | bc)
RSS_AVG=$((RSS_SUM / COUNT))
VSZ_AVG=$((VSZ_SUM / COUNT))
THREADS_AVG=$((THREADS_SUM / COUNT))
else
CPU_AVG=0
MEM_AVG=0
RSS_AVG=0
VSZ_AVG=0
THREADS_AVG=0
fi
# 生成汇总报告
echo "===============================================================" > $SUMMARY_LOG
echo "进程监控统计报告" >> $SUMMARY_LOG
echo "===============================================================" >> $SUMMARY_LOG
echo "进程PID: $TARGET_PID" >> $SUMMARY_LOG
echo "进程名称: $process_name" >> $SUMMARY_LOG
echo "运行用户: $process_user" >> $SUMMARY_LOG
echo "监控时长: $DURATION" >> $SUMMARY_LOG
echo "数据采样数: $COUNT" >> $SUMMARY_LOG
echo "开始时间: $(date -d @$START_TIME)" >> $SUMMARY_LOG
echo "结束时间: $(date)" >> $SUMMARY_LOG
echo "" >> $SUMMARY_LOG
echo "CPU使用率统计:" >> $SUMMARY_LOG
echo " 平均值: $CPU_AVG%" >> $SUMMARY_LOG
echo " 最大值: $CPU_MAX%" >> $SUMMARY_LOG
echo "" >> $SUMMARY_LOG
echo "内存使用率统计:" >> $SUMMARY_LOG
echo " 平均值: $MEM_AVG%" >> $SUMMARY_LOG
echo " 最大值: $MEM_MAX%" >> $SUMMARY_LOG
echo "" >> $SUMMARY_LOG
echo "物理内存(RSS)统计:" >> $SUMMARY_LOG
echo " 平均值: ${RSS_AVG}MB" >> $SUMMARY_LOG
echo " 最大值: ${RSS_MAX}MB" >> $SUMMARY_LOG
echo "" >> $SUMMARY_LOG
echo "虚拟内存(VSZ)统计:" >> $SUMMARY_LOG
echo " 平均值: ${VSZ_AVG}MB" >> $SUMMARY_LOG
echo " 最大值: ${VSZ_MAX}MB" >> $SUMMARY_LOG
echo "" >> $SUMMARY_LOG
echo "线程数统计:" >> $SUMMARY_LOG
echo " 平均值: $THREADS_AVG" >> $SUMMARY_LOG
echo " 最大值: $THREADS_MAX" >> $SUMMARY_LOG
echo "" >> $SUMMARY_LOG
# 屏幕输出总结
echo ""
echo "===== 监控完成 ====="
echo "详细监控数据: $DETAIL_LOG"
echo "统计报告: $SUMMARY_LOG"
echo ""
echo "监控统计摘要:"
echo " - 数据采样数: $COUNT"
echo " - CPU平均使用率: $CPU_AVG%"
echo " - CPU最大使用率: $CPU_MAX%"
echo " - 内存平均使用率: $MEM_AVG%"
echo " - 内存最大使用率: $MEM_MAX%"
echo " - 物理内存平均使用: ${RSS_AVG}MB"
echo " - 物理内存峰值使用: ${RSS_MAX}MB"