298 lines
8.9 KiB
Bash
298 lines
8.9 KiB
Bash
|
|
#!/bin/bash
|
|||
|
|
|
|||
|
|
# 增强版进程资源监控脚本(基于PID)
|
|||
|
|
# 用法: ./pid_monitor.sh <进程PID> <监控时长(秒)>
|
|||
|
|
|
|||
|
|
# 检查参数是否正确
|
|||
|
|
if [ $# -ne 2 ]; then
|
|||
|
|
echo "错误: 参数不正确!"
|
|||
|
|
echo "用法: $0 <进程PID> <监控时长(秒)>"
|
|||
|
|
exit 1
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
TARGET_PID=$1
|
|||
|
|
DURATION=$2
|
|||
|
|
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
|||
|
|
LOG_PREFIX="pid_monitor_${TARGET_PID}"
|
|||
|
|
DETAIL_LOG="${LOG_PREFIX}_detail_${TIMESTAMP}.log"
|
|||
|
|
SUMMARY_LOG="${LOG_PREFIX}_summary_${TIMESTAMP}.log"
|
|||
|
|
|
|||
|
|
# 函数:检查进程是否存在
|
|||
|
|
check_process_exists() {
|
|||
|
|
local pid=$1
|
|||
|
|
if [ ! -d /proc/$pid ]; then
|
|||
|
|
return 1
|
|||
|
|
fi
|
|||
|
|
return 0
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 函数:获取进程基本信息
|
|||
|
|
get_process_info() {
|
|||
|
|
local pid=$1
|
|||
|
|
if [ -f /proc/$pid/status ]; then
|
|||
|
|
local process_name=$(cat /proc/$pid/comm 2>/dev/null || echo "N/A")
|
|||
|
|
local user=$(ps -o user= -p $pid 2>/dev/null || echo "N/A")
|
|||
|
|
local cmdline=$(cat /proc/$pid/cmdline | tr '\0' ' ' | head -c 100 2>/dev/null || echo "N/A")
|
|||
|
|
echo "$process_name" "$user" "$cmdline"
|
|||
|
|
else
|
|||
|
|
echo "N/A" "N/A" "N/A"
|
|||
|
|
fi
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 检查目标进程是否存在
|
|||
|
|
if ! check_process_exists $TARGET_PID; then
|
|||
|
|
echo "错误: 进程 PID $TARGET_PID 不存在!"
|
|||
|
|
exit 1
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# 获取进程基本信息
|
|||
|
|
read process_name process_user process_cmdline <<< $(get_process_info $TARGET_PID)
|
|||
|
|
|
|||
|
|
echo "开始监控进程..."
|
|||
|
|
echo "进程PID: $TARGET_PID"
|
|||
|
|
echo "进程名称: $process_name"
|
|||
|
|
echo "运行用户: $process_user"
|
|||
|
|
echo "命令行: $process_cmdline"
|
|||
|
|
echo "监控时长: $DURATION 秒"
|
|||
|
|
echo "开始时间: $(date)"
|
|||
|
|
echo "详细日志: $DETAIL_LOG"
|
|||
|
|
echo "汇总报告: $SUMMARY_LOG"
|
|||
|
|
|
|||
|
|
# 初始化日志文件
|
|||
|
|
echo "进程监控报告 - PID: $TARGET_PID" > $DETAIL_LOG
|
|||
|
|
echo "进程名称: $process_name" >> $DETAIL_LOG
|
|||
|
|
echo "运行用户: $process_user" >> $DETAIL_LOG
|
|||
|
|
echo "命令行: $process_cmdline" >> $DETAIL_LOG
|
|||
|
|
echo "开始时间: $(date)" >> $DETAIL_LOG
|
|||
|
|
echo "监控间隔: 1秒" >> $DETAIL_LOG
|
|||
|
|
echo "===============================================================" >> $DETAIL_LOG
|
|||
|
|
printf "%-20s %-8s %-8s %-12s %-12s %-12s\n" "时间戳" "CPU(%)" "内存(%)" "物理内存(MB)" "虚拟内存(MB)" "线程数" >> $DETAIL_LOG
|
|||
|
|
|
|||
|
|
# 初始化统计变量
|
|||
|
|
COUNT=0
|
|||
|
|
CPU_SUM=0
|
|||
|
|
MEM_SUM=0
|
|||
|
|
RSS_SUM=0
|
|||
|
|
VSZ_SUM=0
|
|||
|
|
THREADS_SUM=0
|
|||
|
|
|
|||
|
|
CPU_MAX=0
|
|||
|
|
MEM_MAX=0
|
|||
|
|
RSS_MAX=0
|
|||
|
|
VSZ_MAX=0
|
|||
|
|
THREADS_MAX=0
|
|||
|
|
|
|||
|
|
# 函数:获取精确的CPU使用率[8](@ref)
|
|||
|
|
get_cpu_usage() {
|
|||
|
|
local pid=$1
|
|||
|
|
# 方法1: 使用top命令获取实时CPU使用率[1](@ref)
|
|||
|
|
local cpu_usage=$(top -bn1 -p $pid 2>/dev/null | awk -v pid=$pid '$1 == pid {print $9}')
|
|||
|
|
|
|||
|
|
if [ -z "$cpu_usage" ]; then
|
|||
|
|
# 方法2: 通过/proc/stat计算[8](@ref)
|
|||
|
|
if [ -f /proc/$pid/stat ]; then
|
|||
|
|
# 获取进程CPU时间
|
|||
|
|
local utime=$(awk '{print $14}' /proc/$pid/stat)
|
|||
|
|
local stime=$(awk '{print $15}' /proc/$pid/stat)
|
|||
|
|
local total_time=$((utime + stime))
|
|||
|
|
|
|||
|
|
# 获取系统启动时间
|
|||
|
|
local uptime=$(awk '{print $1}' /proc/uptime)
|
|||
|
|
local hz=$(getconf CLK_TCK)
|
|||
|
|
|
|||
|
|
# 计算进程启动时间
|
|||
|
|
local starttime=$(awk '{print $22}' /proc/$pid/stat)
|
|||
|
|
local process_uptime=$(echo "$uptime - $starttime / $hz" | bc -l)
|
|||
|
|
|
|||
|
|
if [ $COUNT -gt 0 ]; then
|
|||
|
|
local prev_total=$PREV_TOTAL
|
|||
|
|
local prev_seconds=$PREV_SECONDS
|
|||
|
|
local current_total=$total_time
|
|||
|
|
local current_seconds=$(echo "$uptime - $process_uptime" | bc -l)
|
|||
|
|
|
|||
|
|
if [ $(echo "$current_seconds > $prev_seconds" | bc) -eq 1 ]; then
|
|||
|
|
cpu_usage=$(echo "scale=2; ($current_total - $prev_total) / ($current_seconds - $prev_seconds) / $hz * 100" | bc -l)
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
PREV_TOTAL=$current_total
|
|||
|
|
PREV_SECONDS=$current_seconds
|
|||
|
|
else
|
|||
|
|
PREV_TOTAL=$total_time
|
|||
|
|
PREV_SECONDS=$(echo "$uptime - $process_uptime" | bc -l)
|
|||
|
|
fi
|
|||
|
|
fi
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
echo ${cpu_usage:-0}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 函数:获取内存信息[3,8](@ref)
|
|||
|
|
get_memory_info() {
|
|||
|
|
local pid=$1
|
|||
|
|
local mem_usage=0
|
|||
|
|
local rss_kb=0
|
|||
|
|
local vsz_kb=0
|
|||
|
|
local threads=0
|
|||
|
|
|
|||
|
|
if [ -f /proc/$pid/status ]; then
|
|||
|
|
# 获取内存使用百分比[1](@ref)
|
|||
|
|
mem_usage=$(ps -o %mem= -p $pid 2>/dev/null || echo 0)
|
|||
|
|
|
|||
|
|
# 获取RSS和VSZ[3](@ref)
|
|||
|
|
rss_kb=$(grep VmRSS /proc/$pid/status 2>/dev/null | awk '{print $2}' || echo 0)
|
|||
|
|
vsz_kb=$(grep VmSize /proc/$pid/status 2>/dev/null | awk '{print $2}' || echo 0)
|
|||
|
|
|
|||
|
|
# 获取线程数[7](@ref)
|
|||
|
|
threads=$(ls /proc/$pid/task 2>/dev/null | wc -l || echo 1)
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# 转换为MB
|
|||
|
|
local rss_mb=0
|
|||
|
|
local vsz_mb=0
|
|||
|
|
if [ $rss_kb -gt 0 ]; then
|
|||
|
|
rss_mb=$((rss_kb / 1024))
|
|||
|
|
fi
|
|||
|
|
if [ $vsz_kb -gt 0 ]; then
|
|||
|
|
vsz_mb=$((vsz_kb / 1024))
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
echo "$mem_usage" "$rss_mb" "$vsz_mb" "$threads"
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 主监控循环
|
|||
|
|
START_TIME=$(date +%s)
|
|||
|
|
END_TIME=$((START_TIME + DURATION))
|
|||
|
|
|
|||
|
|
echo "开始监控进程 PID: $TARGET_PID ..."
|
|||
|
|
|
|||
|
|
while [ $(date +%s) -lt $END_TIME ]; do
|
|||
|
|
CURRENT_TIME=$(date +%s)
|
|||
|
|
ELAPSED=$((CURRENT_TIME - START_TIME))
|
|||
|
|
REMAINING=$((DURATION - ELAPSED))
|
|||
|
|
|
|||
|
|
if [ $REMAINING -le 0 ]; then
|
|||
|
|
break
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# 检查进程是否仍然存在
|
|||
|
|
if ! check_process_exists $TARGET_PID; then
|
|||
|
|
echo "警告: 进程 $TARGET_PID 在监控期间退出!"
|
|||
|
|
break
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
TIMESTAMP=$(date +"%Y-%m-%d %H:%M:%S")
|
|||
|
|
|
|||
|
|
# 获取CPU使用率
|
|||
|
|
CPU_USAGE=$(get_cpu_usage $TARGET_PID)
|
|||
|
|
|
|||
|
|
# 获取内存信息
|
|||
|
|
read MEM_USAGE RSS_MB VSZ_MB THREADS <<< $(get_memory_info $TARGET_PID)
|
|||
|
|
|
|||
|
|
# 记录到详细日志
|
|||
|
|
printf "%-20s %-8.1f %-8.1f %-12s %-12s %-12s\n" "$TIMESTAMP" $CPU_USAGE $MEM_USAGE "${RSS_MB}MB" "${VSZ_MB}MB" "$THREADS" >> $DETAIL_LOG
|
|||
|
|
|
|||
|
|
# 更新统计信息
|
|||
|
|
CPU_SUM=$(echo "$CPU_SUM + $CPU_USAGE" | bc)
|
|||
|
|
MEM_SUM=$(echo "$MEM_SUM + $MEM_USAGE" | bc)
|
|||
|
|
RSS_SUM=$((RSS_SUM + RSS_MB))
|
|||
|
|
VSZ_SUM=$((VSZ_SUM + VSZ_MB))
|
|||
|
|
THREADS_SUM=$((THREADS_SUM + THREADS))
|
|||
|
|
|
|||
|
|
# 更新最大值
|
|||
|
|
if [ $(echo "$CPU_USAGE > $CPU_MAX" | bc) -eq 1 ]; then
|
|||
|
|
CPU_MAX=$CPU_USAGE
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
if [ $(echo "$MEM_USAGE > $MEM_MAX" | bc) -eq 1 ]; then
|
|||
|
|
MEM_MAX=$MEM_USAGE
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
if [ $RSS_MB -gt $RSS_MAX ]; then
|
|||
|
|
RSS_MAX=$RSS_MB
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
if [ $VSZ_MB -gt $VSZ_MAX ]; then
|
|||
|
|
VSZ_MAX=$VSZ_MB
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
if [ $THREADS -gt $THREADS_MAX ]; then
|
|||
|
|
THREADS_MAX=$THREADS
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
COUNT=$((COUNT + 1))
|
|||
|
|
|
|||
|
|
# 进度显示
|
|||
|
|
if [ $((ELAPSED % 10)) -eq 0 ]; then
|
|||
|
|
echo "进度: ${ELAPSED}/${DURATION}秒, CPU: ${CPU_USAGE}%, 内存: ${MEM_USAGE}%, 物理内存: ${RSS_MB}MB"
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
sleep 1
|
|||
|
|
done
|
|||
|
|
|
|||
|
|
# 计算平均值
|
|||
|
|
if [ $COUNT -gt 0 ]; then
|
|||
|
|
CPU_AVG=$(echo "scale=2; $CPU_SUM / $COUNT" | bc)
|
|||
|
|
MEM_AVG=$(echo "scale=2; $MEM_SUM / $COUNT" | bc)
|
|||
|
|
RSS_AVG=$((RSS_SUM / COUNT))
|
|||
|
|
VSZ_AVG=$((VSZ_SUM / COUNT))
|
|||
|
|
THREADS_AVG=$((THREADS_SUM / COUNT))
|
|||
|
|
else
|
|||
|
|
CPU_AVG=0
|
|||
|
|
MEM_AVG=0
|
|||
|
|
RSS_AVG=0
|
|||
|
|
VSZ_AVG=0
|
|||
|
|
THREADS_AVG=0
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# 生成汇总报告
|
|||
|
|
echo "===============================================================" > $SUMMARY_LOG
|
|||
|
|
echo "进程监控统计报告" >> $SUMMARY_LOG
|
|||
|
|
echo "===============================================================" >> $SUMMARY_LOG
|
|||
|
|
echo "进程PID: $TARGET_PID" >> $SUMMARY_LOG
|
|||
|
|
echo "进程名称: $process_name" >> $SUMMARY_LOG
|
|||
|
|
echo "运行用户: $process_user" >> $SUMMARY_LOG
|
|||
|
|
echo "监控时长: $DURATION 秒" >> $SUMMARY_LOG
|
|||
|
|
echo "数据采样数: $COUNT 次" >> $SUMMARY_LOG
|
|||
|
|
echo "开始时间: $(date -d @$START_TIME)" >> $SUMMARY_LOG
|
|||
|
|
echo "结束时间: $(date)" >> $SUMMARY_LOG
|
|||
|
|
echo "" >> $SUMMARY_LOG
|
|||
|
|
|
|||
|
|
echo "CPU使用率统计:" >> $SUMMARY_LOG
|
|||
|
|
echo " 平均值: $CPU_AVG%" >> $SUMMARY_LOG
|
|||
|
|
echo " 最大值: $CPU_MAX%" >> $SUMMARY_LOG
|
|||
|
|
echo "" >> $SUMMARY_LOG
|
|||
|
|
|
|||
|
|
echo "内存使用率统计:" >> $SUMMARY_LOG
|
|||
|
|
echo " 平均值: $MEM_AVG%" >> $SUMMARY_LOG
|
|||
|
|
echo " 最大值: $MEM_MAX%" >> $SUMMARY_LOG
|
|||
|
|
echo "" >> $SUMMARY_LOG
|
|||
|
|
|
|||
|
|
echo "物理内存(RSS)统计:" >> $SUMMARY_LOG
|
|||
|
|
echo " 平均值: ${RSS_AVG}MB" >> $SUMMARY_LOG
|
|||
|
|
echo " 最大值: ${RSS_MAX}MB" >> $SUMMARY_LOG
|
|||
|
|
echo "" >> $SUMMARY_LOG
|
|||
|
|
|
|||
|
|
echo "虚拟内存(VSZ)统计:" >> $SUMMARY_LOG
|
|||
|
|
echo " 平均值: ${VSZ_AVG}MB" >> $SUMMARY_LOG
|
|||
|
|
echo " 最大值: ${VSZ_MAX}MB" >> $SUMMARY_LOG
|
|||
|
|
echo "" >> $SUMMARY_LOG
|
|||
|
|
|
|||
|
|
echo "线程数统计:" >> $SUMMARY_LOG
|
|||
|
|
echo " 平均值: $THREADS_AVG" >> $SUMMARY_LOG
|
|||
|
|
echo " 最大值: $THREADS_MAX" >> $SUMMARY_LOG
|
|||
|
|
echo "" >> $SUMMARY_LOG
|
|||
|
|
|
|||
|
|
# 屏幕输出总结
|
|||
|
|
echo ""
|
|||
|
|
echo "===== 监控完成 ====="
|
|||
|
|
echo "详细监控数据: $DETAIL_LOG"
|
|||
|
|
echo "统计报告: $SUMMARY_LOG"
|
|||
|
|
echo ""
|
|||
|
|
echo "监控统计摘要:"
|
|||
|
|
echo " - 数据采样数: $COUNT 次"
|
|||
|
|
echo " - CPU平均使用率: $CPU_AVG%"
|
|||
|
|
echo " - CPU最大使用率: $CPU_MAX%"
|
|||
|
|
echo " - 内存平均使用率: $MEM_AVG%"
|
|||
|
|
echo " - 内存最大使用率: $MEM_MAX%"
|
|||
|
|
echo " - 物理内存平均使用: ${RSS_AVG}MB"
|
|||
|
|
echo " - 物理内存峰值使用: ${RSS_MAX}MB"
|