298 lines
8.9 KiB
Bash
298 lines
8.9 KiB
Bash
#!/bin/bash
|
||
|
||
# 增强版进程资源监控脚本(基于PID)
|
||
# 用法: ./pid_monitor.sh <进程PID> <监控时长(秒)>
|
||
|
||
# 检查参数是否正确
|
||
if [ $# -ne 2 ]; then
|
||
echo "错误: 参数不正确!"
|
||
echo "用法: $0 <进程PID> <监控时长(秒)>"
|
||
exit 1
|
||
fi
|
||
|
||
TARGET_PID=$1
|
||
DURATION=$2
|
||
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||
LOG_PREFIX="pid_monitor_${TARGET_PID}"
|
||
DETAIL_LOG="${LOG_PREFIX}_detail_${TIMESTAMP}.log"
|
||
SUMMARY_LOG="${LOG_PREFIX}_summary_${TIMESTAMP}.log"
|
||
|
||
# 函数:检查进程是否存在
|
||
check_process_exists() {
|
||
local pid=$1
|
||
if [ ! -d /proc/$pid ]; then
|
||
return 1
|
||
fi
|
||
return 0
|
||
}
|
||
|
||
# 函数:获取进程基本信息
|
||
get_process_info() {
|
||
local pid=$1
|
||
if [ -f /proc/$pid/status ]; then
|
||
local process_name=$(cat /proc/$pid/comm 2>/dev/null || echo "N/A")
|
||
local user=$(ps -o user= -p $pid 2>/dev/null || echo "N/A")
|
||
local cmdline=$(cat /proc/$pid/cmdline | tr '\0' ' ' | head -c 100 2>/dev/null || echo "N/A")
|
||
echo "$process_name" "$user" "$cmdline"
|
||
else
|
||
echo "N/A" "N/A" "N/A"
|
||
fi
|
||
}
|
||
|
||
# 检查目标进程是否存在
|
||
if ! check_process_exists $TARGET_PID; then
|
||
echo "错误: 进程 PID $TARGET_PID 不存在!"
|
||
exit 1
|
||
fi
|
||
|
||
# 获取进程基本信息
|
||
read process_name process_user process_cmdline <<< $(get_process_info $TARGET_PID)
|
||
|
||
echo "开始监控进程..."
|
||
echo "进程PID: $TARGET_PID"
|
||
echo "进程名称: $process_name"
|
||
echo "运行用户: $process_user"
|
||
echo "命令行: $process_cmdline"
|
||
echo "监控时长: $DURATION 秒"
|
||
echo "开始时间: $(date)"
|
||
echo "详细日志: $DETAIL_LOG"
|
||
echo "汇总报告: $SUMMARY_LOG"
|
||
|
||
# 初始化日志文件
|
||
echo "进程监控报告 - PID: $TARGET_PID" > $DETAIL_LOG
|
||
echo "进程名称: $process_name" >> $DETAIL_LOG
|
||
echo "运行用户: $process_user" >> $DETAIL_LOG
|
||
echo "命令行: $process_cmdline" >> $DETAIL_LOG
|
||
echo "开始时间: $(date)" >> $DETAIL_LOG
|
||
echo "监控间隔: 1秒" >> $DETAIL_LOG
|
||
echo "===============================================================" >> $DETAIL_LOG
|
||
printf "%-20s %-8s %-8s %-12s %-12s %-12s\n" "时间戳" "CPU(%)" "内存(%)" "物理内存(MB)" "虚拟内存(MB)" "线程数" >> $DETAIL_LOG
|
||
|
||
# 初始化统计变量
|
||
COUNT=0
|
||
CPU_SUM=0
|
||
MEM_SUM=0
|
||
RSS_SUM=0
|
||
VSZ_SUM=0
|
||
THREADS_SUM=0
|
||
|
||
CPU_MAX=0
|
||
MEM_MAX=0
|
||
RSS_MAX=0
|
||
VSZ_MAX=0
|
||
THREADS_MAX=0
|
||
|
||
# 函数:获取精确的CPU使用率[8](@ref)
|
||
get_cpu_usage() {
|
||
local pid=$1
|
||
# 方法1: 使用top命令获取实时CPU使用率[1](@ref)
|
||
local cpu_usage=$(top -bn1 -p $pid 2>/dev/null | awk -v pid=$pid '$1 == pid {print $9}')
|
||
|
||
if [ -z "$cpu_usage" ]; then
|
||
# 方法2: 通过/proc/stat计算[8](@ref)
|
||
if [ -f /proc/$pid/stat ]; then
|
||
# 获取进程CPU时间
|
||
local utime=$(awk '{print $14}' /proc/$pid/stat)
|
||
local stime=$(awk '{print $15}' /proc/$pid/stat)
|
||
local total_time=$((utime + stime))
|
||
|
||
# 获取系统启动时间
|
||
local uptime=$(awk '{print $1}' /proc/uptime)
|
||
local hz=$(getconf CLK_TCK)
|
||
|
||
# 计算进程启动时间
|
||
local starttime=$(awk '{print $22}' /proc/$pid/stat)
|
||
local process_uptime=$(echo "$uptime - $starttime / $hz" | bc -l)
|
||
|
||
if [ $COUNT -gt 0 ]; then
|
||
local prev_total=$PREV_TOTAL
|
||
local prev_seconds=$PREV_SECONDS
|
||
local current_total=$total_time
|
||
local current_seconds=$(echo "$uptime - $process_uptime" | bc -l)
|
||
|
||
if [ $(echo "$current_seconds > $prev_seconds" | bc) -eq 1 ]; then
|
||
cpu_usage=$(echo "scale=2; ($current_total - $prev_total) / ($current_seconds - $prev_seconds) / $hz * 100" | bc -l)
|
||
fi
|
||
|
||
PREV_TOTAL=$current_total
|
||
PREV_SECONDS=$current_seconds
|
||
else
|
||
PREV_TOTAL=$total_time
|
||
PREV_SECONDS=$(echo "$uptime - $process_uptime" | bc -l)
|
||
fi
|
||
fi
|
||
fi
|
||
|
||
echo ${cpu_usage:-0}
|
||
}
|
||
|
||
# 函数:获取内存信息[3,8](@ref)
|
||
get_memory_info() {
|
||
local pid=$1
|
||
local mem_usage=0
|
||
local rss_kb=0
|
||
local vsz_kb=0
|
||
local threads=0
|
||
|
||
if [ -f /proc/$pid/status ]; then
|
||
# 获取内存使用百分比[1](@ref)
|
||
mem_usage=$(ps -o %mem= -p $pid 2>/dev/null || echo 0)
|
||
|
||
# 获取RSS和VSZ[3](@ref)
|
||
rss_kb=$(grep VmRSS /proc/$pid/status 2>/dev/null | awk '{print $2}' || echo 0)
|
||
vsz_kb=$(grep VmSize /proc/$pid/status 2>/dev/null | awk '{print $2}' || echo 0)
|
||
|
||
# 获取线程数[7](@ref)
|
||
threads=$(ls /proc/$pid/task 2>/dev/null | wc -l || echo 1)
|
||
fi
|
||
|
||
# 转换为MB
|
||
local rss_mb=0
|
||
local vsz_mb=0
|
||
if [ $rss_kb -gt 0 ]; then
|
||
rss_mb=$((rss_kb / 1024))
|
||
fi
|
||
if [ $vsz_kb -gt 0 ]; then
|
||
vsz_mb=$((vsz_kb / 1024))
|
||
fi
|
||
|
||
echo "$mem_usage" "$rss_mb" "$vsz_mb" "$threads"
|
||
}
|
||
|
||
# 主监控循环
|
||
START_TIME=$(date +%s)
|
||
END_TIME=$((START_TIME + DURATION))
|
||
|
||
echo "开始监控进程 PID: $TARGET_PID ..."
|
||
|
||
while [ $(date +%s) -lt $END_TIME ]; do
|
||
CURRENT_TIME=$(date +%s)
|
||
ELAPSED=$((CURRENT_TIME - START_TIME))
|
||
REMAINING=$((DURATION - ELAPSED))
|
||
|
||
if [ $REMAINING -le 0 ]; then
|
||
break
|
||
fi
|
||
|
||
# 检查进程是否仍然存在
|
||
if ! check_process_exists $TARGET_PID; then
|
||
echo "警告: 进程 $TARGET_PID 在监控期间退出!"
|
||
break
|
||
fi
|
||
|
||
TIMESTAMP=$(date +"%Y-%m-%d %H:%M:%S")
|
||
|
||
# 获取CPU使用率
|
||
CPU_USAGE=$(get_cpu_usage $TARGET_PID)
|
||
|
||
# 获取内存信息
|
||
read MEM_USAGE RSS_MB VSZ_MB THREADS <<< $(get_memory_info $TARGET_PID)
|
||
|
||
# 记录到详细日志
|
||
printf "%-20s %-8.1f %-8.1f %-12s %-12s %-12s\n" "$TIMESTAMP" $CPU_USAGE $MEM_USAGE "${RSS_MB}MB" "${VSZ_MB}MB" "$THREADS" >> $DETAIL_LOG
|
||
|
||
# 更新统计信息
|
||
CPU_SUM=$(echo "$CPU_SUM + $CPU_USAGE" | bc)
|
||
MEM_SUM=$(echo "$MEM_SUM + $MEM_USAGE" | bc)
|
||
RSS_SUM=$((RSS_SUM + RSS_MB))
|
||
VSZ_SUM=$((VSZ_SUM + VSZ_MB))
|
||
THREADS_SUM=$((THREADS_SUM + THREADS))
|
||
|
||
# 更新最大值
|
||
if [ $(echo "$CPU_USAGE > $CPU_MAX" | bc) -eq 1 ]; then
|
||
CPU_MAX=$CPU_USAGE
|
||
fi
|
||
|
||
if [ $(echo "$MEM_USAGE > $MEM_MAX" | bc) -eq 1 ]; then
|
||
MEM_MAX=$MEM_USAGE
|
||
fi
|
||
|
||
if [ $RSS_MB -gt $RSS_MAX ]; then
|
||
RSS_MAX=$RSS_MB
|
||
fi
|
||
|
||
if [ $VSZ_MB -gt $VSZ_MAX ]; then
|
||
VSZ_MAX=$VSZ_MB
|
||
fi
|
||
|
||
if [ $THREADS -gt $THREADS_MAX ]; then
|
||
THREADS_MAX=$THREADS
|
||
fi
|
||
|
||
COUNT=$((COUNT + 1))
|
||
|
||
# 进度显示
|
||
if [ $((ELAPSED % 10)) -eq 0 ]; then
|
||
echo "进度: ${ELAPSED}/${DURATION}秒, CPU: ${CPU_USAGE}%, 内存: ${MEM_USAGE}%, 物理内存: ${RSS_MB}MB"
|
||
fi
|
||
|
||
sleep 1
|
||
done
|
||
|
||
# 计算平均值
|
||
if [ $COUNT -gt 0 ]; then
|
||
CPU_AVG=$(echo "scale=2; $CPU_SUM / $COUNT" | bc)
|
||
MEM_AVG=$(echo "scale=2; $MEM_SUM / $COUNT" | bc)
|
||
RSS_AVG=$((RSS_SUM / COUNT))
|
||
VSZ_AVG=$((VSZ_SUM / COUNT))
|
||
THREADS_AVG=$((THREADS_SUM / COUNT))
|
||
else
|
||
CPU_AVG=0
|
||
MEM_AVG=0
|
||
RSS_AVG=0
|
||
VSZ_AVG=0
|
||
THREADS_AVG=0
|
||
fi
|
||
|
||
# 生成汇总报告
|
||
echo "===============================================================" > $SUMMARY_LOG
|
||
echo "进程监控统计报告" >> $SUMMARY_LOG
|
||
echo "===============================================================" >> $SUMMARY_LOG
|
||
echo "进程PID: $TARGET_PID" >> $SUMMARY_LOG
|
||
echo "进程名称: $process_name" >> $SUMMARY_LOG
|
||
echo "运行用户: $process_user" >> $SUMMARY_LOG
|
||
echo "监控时长: $DURATION 秒" >> $SUMMARY_LOG
|
||
echo "数据采样数: $COUNT 次" >> $SUMMARY_LOG
|
||
echo "开始时间: $(date -d @$START_TIME)" >> $SUMMARY_LOG
|
||
echo "结束时间: $(date)" >> $SUMMARY_LOG
|
||
echo "" >> $SUMMARY_LOG
|
||
|
||
echo "CPU使用率统计:" >> $SUMMARY_LOG
|
||
echo " 平均值: $CPU_AVG%" >> $SUMMARY_LOG
|
||
echo " 最大值: $CPU_MAX%" >> $SUMMARY_LOG
|
||
echo "" >> $SUMMARY_LOG
|
||
|
||
echo "内存使用率统计:" >> $SUMMARY_LOG
|
||
echo " 平均值: $MEM_AVG%" >> $SUMMARY_LOG
|
||
echo " 最大值: $MEM_MAX%" >> $SUMMARY_LOG
|
||
echo "" >> $SUMMARY_LOG
|
||
|
||
echo "物理内存(RSS)统计:" >> $SUMMARY_LOG
|
||
echo " 平均值: ${RSS_AVG}MB" >> $SUMMARY_LOG
|
||
echo " 最大值: ${RSS_MAX}MB" >> $SUMMARY_LOG
|
||
echo "" >> $SUMMARY_LOG
|
||
|
||
echo "虚拟内存(VSZ)统计:" >> $SUMMARY_LOG
|
||
echo " 平均值: ${VSZ_AVG}MB" >> $SUMMARY_LOG
|
||
echo " 最大值: ${VSZ_MAX}MB" >> $SUMMARY_LOG
|
||
echo "" >> $SUMMARY_LOG
|
||
|
||
echo "线程数统计:" >> $SUMMARY_LOG
|
||
echo " 平均值: $THREADS_AVG" >> $SUMMARY_LOG
|
||
echo " 最大值: $THREADS_MAX" >> $SUMMARY_LOG
|
||
echo "" >> $SUMMARY_LOG
|
||
|
||
# 屏幕输出总结
|
||
echo ""
|
||
echo "===== 监控完成 ====="
|
||
echo "详细监控数据: $DETAIL_LOG"
|
||
echo "统计报告: $SUMMARY_LOG"
|
||
echo ""
|
||
echo "监控统计摘要:"
|
||
echo " - 数据采样数: $COUNT 次"
|
||
echo " - CPU平均使用率: $CPU_AVG%"
|
||
echo " - CPU最大使用率: $CPU_MAX%"
|
||
echo " - 内存平均使用率: $MEM_AVG%"
|
||
echo " - 内存最大使用率: $MEM_MAX%"
|
||
echo " - 物理内存平均使用: ${RSS_AVG}MB"
|
||
echo " - 物理内存峰值使用: ${RSS_MAX}MB" |