eis/TestProject/scripts/pid_monitor.sh

298 lines
8.9 KiB
Bash
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# 增强版进程资源监控脚本基于PID
# 用法: ./pid_monitor.sh <进程PID> <监控时长(秒)>
# 检查参数是否正确
if [ $# -ne 2 ]; then
echo "错误: 参数不正确!"
echo "用法: $0 <进程PID> <监控时长(秒)>"
exit 1
fi
TARGET_PID=$1
DURATION=$2
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
LOG_PREFIX="pid_monitor_${TARGET_PID}"
DETAIL_LOG="${LOG_PREFIX}_detail_${TIMESTAMP}.log"
SUMMARY_LOG="${LOG_PREFIX}_summary_${TIMESTAMP}.log"
# 函数:检查进程是否存在
check_process_exists() {
local pid=$1
if [ ! -d /proc/$pid ]; then
return 1
fi
return 0
}
# 函数:获取进程基本信息
get_process_info() {
local pid=$1
if [ -f /proc/$pid/status ]; then
local process_name=$(cat /proc/$pid/comm 2>/dev/null || echo "N/A")
local user=$(ps -o user= -p $pid 2>/dev/null || echo "N/A")
local cmdline=$(cat /proc/$pid/cmdline | tr '\0' ' ' | head -c 100 2>/dev/null || echo "N/A")
echo "$process_name" "$user" "$cmdline"
else
echo "N/A" "N/A" "N/A"
fi
}
# 检查目标进程是否存在
if ! check_process_exists $TARGET_PID; then
echo "错误: 进程 PID $TARGET_PID 不存在!"
exit 1
fi
# 获取进程基本信息
read process_name process_user process_cmdline <<< $(get_process_info $TARGET_PID)
echo "开始监控进程..."
echo "进程PID: $TARGET_PID"
echo "进程名称: $process_name"
echo "运行用户: $process_user"
echo "命令行: $process_cmdline"
echo "监控时长: $DURATION"
echo "开始时间: $(date)"
echo "详细日志: $DETAIL_LOG"
echo "汇总报告: $SUMMARY_LOG"
# 初始化日志文件
echo "进程监控报告 - PID: $TARGET_PID" > $DETAIL_LOG
echo "进程名称: $process_name" >> $DETAIL_LOG
echo "运行用户: $process_user" >> $DETAIL_LOG
echo "命令行: $process_cmdline" >> $DETAIL_LOG
echo "开始时间: $(date)" >> $DETAIL_LOG
echo "监控间隔: 1秒" >> $DETAIL_LOG
echo "===============================================================" >> $DETAIL_LOG
printf "%-20s %-8s %-8s %-12s %-12s %-12s\n" "时间戳" "CPU(%)" "内存(%)" "物理内存(MB)" "虚拟内存(MB)" "线程数" >> $DETAIL_LOG
# 初始化统计变量
COUNT=0
CPU_SUM=0
MEM_SUM=0
RSS_SUM=0
VSZ_SUM=0
THREADS_SUM=0
CPU_MAX=0
MEM_MAX=0
RSS_MAX=0
VSZ_MAX=0
THREADS_MAX=0
# 函数获取精确的CPU使用率[8](@ref)
get_cpu_usage() {
local pid=$1
# 方法1: 使用top命令获取实时CPU使用率[1](@ref)
local cpu_usage=$(top -bn1 -p $pid 2>/dev/null | awk -v pid=$pid '$1 == pid {print $9}')
if [ -z "$cpu_usage" ]; then
# 方法2: 通过/proc/stat计算[8](@ref)
if [ -f /proc/$pid/stat ]; then
# 获取进程CPU时间
local utime=$(awk '{print $14}' /proc/$pid/stat)
local stime=$(awk '{print $15}' /proc/$pid/stat)
local total_time=$((utime + stime))
# 获取系统启动时间
local uptime=$(awk '{print $1}' /proc/uptime)
local hz=$(getconf CLK_TCK)
# 计算进程启动时间
local starttime=$(awk '{print $22}' /proc/$pid/stat)
local process_uptime=$(echo "$uptime - $starttime / $hz" | bc -l)
if [ $COUNT -gt 0 ]; then
local prev_total=$PREV_TOTAL
local prev_seconds=$PREV_SECONDS
local current_total=$total_time
local current_seconds=$(echo "$uptime - $process_uptime" | bc -l)
if [ $(echo "$current_seconds > $prev_seconds" | bc) -eq 1 ]; then
cpu_usage=$(echo "scale=2; ($current_total - $prev_total) / ($current_seconds - $prev_seconds) / $hz * 100" | bc -l)
fi
PREV_TOTAL=$current_total
PREV_SECONDS=$current_seconds
else
PREV_TOTAL=$total_time
PREV_SECONDS=$(echo "$uptime - $process_uptime" | bc -l)
fi
fi
fi
echo ${cpu_usage:-0}
}
# 函数:获取内存信息[3,8](@ref)
get_memory_info() {
local pid=$1
local mem_usage=0
local rss_kb=0
local vsz_kb=0
local threads=0
if [ -f /proc/$pid/status ]; then
# 获取内存使用百分比[1](@ref)
mem_usage=$(ps -o %mem= -p $pid 2>/dev/null || echo 0)
# 获取RSS和VSZ[3](@ref)
rss_kb=$(grep VmRSS /proc/$pid/status 2>/dev/null | awk '{print $2}' || echo 0)
vsz_kb=$(grep VmSize /proc/$pid/status 2>/dev/null | awk '{print $2}' || echo 0)
# 获取线程数[7](@ref)
threads=$(ls /proc/$pid/task 2>/dev/null | wc -l || echo 1)
fi
# 转换为MB
local rss_mb=0
local vsz_mb=0
if [ $rss_kb -gt 0 ]; then
rss_mb=$((rss_kb / 1024))
fi
if [ $vsz_kb -gt 0 ]; then
vsz_mb=$((vsz_kb / 1024))
fi
echo "$mem_usage" "$rss_mb" "$vsz_mb" "$threads"
}
# 主监控循环
START_TIME=$(date +%s)
END_TIME=$((START_TIME + DURATION))
echo "开始监控进程 PID: $TARGET_PID ..."
while [ $(date +%s) -lt $END_TIME ]; do
CURRENT_TIME=$(date +%s)
ELAPSED=$((CURRENT_TIME - START_TIME))
REMAINING=$((DURATION - ELAPSED))
if [ $REMAINING -le 0 ]; then
break
fi
# 检查进程是否仍然存在
if ! check_process_exists $TARGET_PID; then
echo "警告: 进程 $TARGET_PID 在监控期间退出!"
break
fi
TIMESTAMP=$(date +"%Y-%m-%d %H:%M:%S")
# 获取CPU使用率
CPU_USAGE=$(get_cpu_usage $TARGET_PID)
# 获取内存信息
read MEM_USAGE RSS_MB VSZ_MB THREADS <<< $(get_memory_info $TARGET_PID)
# 记录到详细日志
printf "%-20s %-8.1f %-8.1f %-12s %-12s %-12s\n" "$TIMESTAMP" $CPU_USAGE $MEM_USAGE "${RSS_MB}MB" "${VSZ_MB}MB" "$THREADS" >> $DETAIL_LOG
# 更新统计信息
CPU_SUM=$(echo "$CPU_SUM + $CPU_USAGE" | bc)
MEM_SUM=$(echo "$MEM_SUM + $MEM_USAGE" | bc)
RSS_SUM=$((RSS_SUM + RSS_MB))
VSZ_SUM=$((VSZ_SUM + VSZ_MB))
THREADS_SUM=$((THREADS_SUM + THREADS))
# 更新最大值
if [ $(echo "$CPU_USAGE > $CPU_MAX" | bc) -eq 1 ]; then
CPU_MAX=$CPU_USAGE
fi
if [ $(echo "$MEM_USAGE > $MEM_MAX" | bc) -eq 1 ]; then
MEM_MAX=$MEM_USAGE
fi
if [ $RSS_MB -gt $RSS_MAX ]; then
RSS_MAX=$RSS_MB
fi
if [ $VSZ_MB -gt $VSZ_MAX ]; then
VSZ_MAX=$VSZ_MB
fi
if [ $THREADS -gt $THREADS_MAX ]; then
THREADS_MAX=$THREADS
fi
COUNT=$((COUNT + 1))
# 进度显示
if [ $((ELAPSED % 10)) -eq 0 ]; then
echo "进度: ${ELAPSED}/${DURATION}秒, CPU: ${CPU_USAGE}%, 内存: ${MEM_USAGE}%, 物理内存: ${RSS_MB}MB"
fi
sleep 1
done
# 计算平均值
if [ $COUNT -gt 0 ]; then
CPU_AVG=$(echo "scale=2; $CPU_SUM / $COUNT" | bc)
MEM_AVG=$(echo "scale=2; $MEM_SUM / $COUNT" | bc)
RSS_AVG=$((RSS_SUM / COUNT))
VSZ_AVG=$((VSZ_SUM / COUNT))
THREADS_AVG=$((THREADS_SUM / COUNT))
else
CPU_AVG=0
MEM_AVG=0
RSS_AVG=0
VSZ_AVG=0
THREADS_AVG=0
fi
# 生成汇总报告
echo "===============================================================" > $SUMMARY_LOG
echo "进程监控统计报告" >> $SUMMARY_LOG
echo "===============================================================" >> $SUMMARY_LOG
echo "进程PID: $TARGET_PID" >> $SUMMARY_LOG
echo "进程名称: $process_name" >> $SUMMARY_LOG
echo "运行用户: $process_user" >> $SUMMARY_LOG
echo "监控时长: $DURATION" >> $SUMMARY_LOG
echo "数据采样数: $COUNT" >> $SUMMARY_LOG
echo "开始时间: $(date -d @$START_TIME)" >> $SUMMARY_LOG
echo "结束时间: $(date)" >> $SUMMARY_LOG
echo "" >> $SUMMARY_LOG
echo "CPU使用率统计:" >> $SUMMARY_LOG
echo " 平均值: $CPU_AVG%" >> $SUMMARY_LOG
echo " 最大值: $CPU_MAX%" >> $SUMMARY_LOG
echo "" >> $SUMMARY_LOG
echo "内存使用率统计:" >> $SUMMARY_LOG
echo " 平均值: $MEM_AVG%" >> $SUMMARY_LOG
echo " 最大值: $MEM_MAX%" >> $SUMMARY_LOG
echo "" >> $SUMMARY_LOG
echo "物理内存(RSS)统计:" >> $SUMMARY_LOG
echo " 平均值: ${RSS_AVG}MB" >> $SUMMARY_LOG
echo " 最大值: ${RSS_MAX}MB" >> $SUMMARY_LOG
echo "" >> $SUMMARY_LOG
echo "虚拟内存(VSZ)统计:" >> $SUMMARY_LOG
echo " 平均值: ${VSZ_AVG}MB" >> $SUMMARY_LOG
echo " 最大值: ${VSZ_MAX}MB" >> $SUMMARY_LOG
echo "" >> $SUMMARY_LOG
echo "线程数统计:" >> $SUMMARY_LOG
echo " 平均值: $THREADS_AVG" >> $SUMMARY_LOG
echo " 最大值: $THREADS_MAX" >> $SUMMARY_LOG
echo "" >> $SUMMARY_LOG
# 屏幕输出总结
echo ""
echo "===== 监控完成 ====="
echo "详细监控数据: $DETAIL_LOG"
echo "统计报告: $SUMMARY_LOG"
echo ""
echo "监控统计摘要:"
echo " - 数据采样数: $COUNT"
echo " - CPU平均使用率: $CPU_AVG%"
echo " - CPU最大使用率: $CPU_MAX%"
echo " - 内存平均使用率: $MEM_AVG%"
echo " - 内存最大使用率: $MEM_MAX%"
echo " - 物理内存平均使用: ${RSS_AVG}MB"
echo " - 物理内存峰值使用: ${RSS_MAX}MB"