eis/TestProject/scripts/proc_monitor.sh

272 lines
8.5 KiB
Bash
Raw Permalink Normal View History

#!/bin/bash
# 增强版进程资源监控脚本(支持多同名进程)
# 用法: ./proc_monitor_enhanced.sh <进程名> <监控时长(秒)>
# 检查参数是否正确
if [ $# -ne 2 ]; then
echo "错误: 参数不正确!"
echo "用法: $0 <进程名> <监控时长(秒)>"
exit 1
fi
PROCESS_NAME=$1
DURATION=$2
BASE_LOG_PREFIX="process_monitor_${PROCESS_NAME}"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
SUMMARY_LOG="${BASE_LOG_PREFIX}_summary_${TIMESTAMP}.log"
# 函数获取所有匹配进程的PID
get_all_pids() {
local name=$1
# 方法1: 使用pgrep推荐更简洁
pgrep -f "$name" 2>/dev/null
# 方法2: 使用ps和awk备选更精确
# ps -ef | awk -v proc="$name" '$0 ~ proc && $0 !~ /grep|proc_monitor/ {print $2}'
}
echo "正在查找进程: $PROCESS_NAME"
PIDS=$(get_all_pids "$PROCESS_NAME")
if [ -z "$PIDS" ]; then
echo "错误: 未找到进程 '$PROCESS_NAME'!"
exit 1
fi
# 将PID转换为数组
PID_ARRAY=($PIDS)
PID_COUNT=${#PID_ARRAY[@]}
echo "找到 $PID_COUNT 个同名进程PID: $PIDS"
echo "监控时长: $DURATION"
echo "汇总日志: $SUMMARY_LOG"
echo "开始时间: $(date)"
# 为每个PID创建独立的日志文件
declare -A LOG_FILES
for pid in ${PID_ARRAY[@]}; do
LOG_FILES[$pid]="${BASE_LOG_PREFIX}_pid${pid}_${TIMESTAMP}.log"
# 初始化独立日志文件
echo "进程监控日志 - $PROCESS_NAME (PID: $pid)" > ${LOG_FILES[$pid]}
echo "开始时间: $(date)" >> ${LOG_FILES[$pid]}
echo "监控间隔: 2秒" >> ${LOG_FILES[$pid]}
echo "==============================================" >> ${LOG_FILES[$pid]}
printf "%-20s %-8s %-8s %-12s %-12s\n" "时间戳" "CPU(%)" "内存(%)" "虚拟内存" "物理内存" >> ${LOG_FILES[$pid]}
done
# 初始化统计数组
declare -A CPU_SUM
declare -A MEM_SUM
declare -A CPU_MAX
declare -A MEM_MAX
declare -A COUNT
# 初始化每个PID的统计变量
for pid in ${PID_ARRAY[@]}; do
CPU_SUM[$pid]=0
MEM_SUM[$pid]=0
CPU_MAX[$pid]=0
MEM_MAX[$pid]=0
COUNT[$pid]=0
done
# 全局统计变量
GLOBAL_CPU_SUM=0
GLOBAL_MEM_SUM=0
GLOBAL_CPU_MAX=0
GLOBAL_MEM_MAX=0
GLOBAL_COUNT=0
# 函数:监控单个进程
monitor_single_process() {
local pid=$1
local log_file=$2
if [ ! -d /proc/$pid ]; then
echo "进程 $pid 已退出"
return 1
fi
# 使用top命令获取进程资源使用情况
local PROCESS_INFO=$(top -bn1 -p $pid 2>/dev/null | grep -w $pid)
if [ -z "$PROCESS_INFO" ]; then
return 1
fi
# 解析CPU和内存使用率
local CPU_USAGE=$(echo $PROCESS_INFO | awk '{print $9}')
local MEM_USAGE=$(echo $PROCESS_INFO | awk '{print $10}')
# 从/proc获取详细内存信息(单位:MB)
local VMRSS=0
local VMSIZE=0
if [ -f /proc/$pid/status ]; then
VMRSS=$(grep VmRSS /proc/$pid/status | awk '{print $2}')
VMSIZE=$(grep VmSize /proc/$pid/status | awk '{print $2}')
# 转换为MB
VMRSS=$((VMRSS / 1024))
VMSIZE=$((VMSIZE / 1024))
fi
local TIMESTAMP=$(date +"%Y-%m-%d %H:%M:%S")
# 记录到独立日志文件
printf "%-20s %-8.1f %-8.1f %-12s %-12s\n" "$TIMESTAMP" $CPU_USAGE $MEM_USAGE "${VMSIZE}MB" "${VMRSS}MB" >> $log_file
# 更新单个进程的统计信息
CPU_SUM[$pid]=$(echo "scale=2; ${CPU_SUM[$pid]} + $CPU_USAGE" | bc)
MEM_SUM[$pid]=$(echo "scale=2; ${MEM_SUM[$pid]} + $MEM_USAGE" | bc)
if [ $(echo "$CPU_USAGE > ${CPU_MAX[$pid]}" | bc) -eq 1 ]; then
CPU_MAX[$pid]=$CPU_USAGE
fi
if [ $(echo "$MEM_USAGE > ${MEM_MAX[$pid]}" | bc) -eq 1 ]; then
MEM_MAX[$pid]=$MEM_USAGE
fi
COUNT[$pid]=$((COUNT[$pid] + 1))
# 更新全局统计
GLOBAL_CPU_SUM=$(echo "scale=2; $GLOBAL_CPU_SUM + $CPU_USAGE" | bc)
GLOBAL_MEM_SUM=$(echo "scale=2; $GLOBAL_MEM_SUM + $MEM_USAGE" | bc)
if [ $(echo "$CPU_USAGE > $GLOBAL_CPU_MAX" | bc) -eq 1 ]; then
GLOBAL_CPU_MAX=$CPU_USAGE
fi
if [ $(echo "$MEM_USAGE > $GLOBAL_MEM_MAX" | bc) -eq 1 ]; then
GLOBAL_MEM_MAX=$MEM_USAGE
fi
GLOBAL_COUNT=$((GLOBAL_COUNT + 1))
echo "$TIMESTAMP - PID $pid: CPU=${CPU_USAGE}%, 内存=${MEM_USAGE}%, 物理内存=${VMRSS}MB"
return 0
}
# 主监控循环
START_TIME=$(date +%s)
END_TIME=$((START_TIME + DURATION))
echo "开始监控进程..."
echo "进程列表: ${PID_ARRAY[*]}"
while [ $(date +%s) -lt $END_TIME ]; do
CURRENT_TIME=$(date +%s)
ELAPSED=$((CURRENT_TIME - START_TIME))
REMAINING=$((DURATION - ELAPSED))
if [ $REMAINING -le 0 ]; then
break
fi
echo "======= 监控进度: $ELAPSED/$DURATION 秒, 剩余: $REMAINING 秒 ======="
# 检查是否有新的同名进程启动
CURRENT_PIDS=$(get_all_pids "$PROCESS_NAME")
NEW_PIDS=$(echo "$CURRENT_PIDS" | grep -v -w "$(echo ${PID_ARRAY[@]} | sed 's/ /\\|/g')" || true)
if [ -n "$NEW_PIDS" ]; then
echo "发现新进程: $NEW_PIDS,已加入监控"
for new_pid in $NEW_PIDS; do
PID_ARRAY+=($new_pid)
LOG_FILES[$new_pid]="${BASE_LOG_PREFIX}_pid${new_pid}_${TIMESTAMP}.log"
# 初始化新进程的日志和统计
echo "进程监控日志 - $PROCESS_NAME (PID: $new_pid)" > ${LOG_FILES[$new_pid]}
echo "开始时间: $(date)" >> ${LOG_FILES[$new_pid]}
printf "%-20s %-8s %-8s %-12s %-12s\n" "时间戳" "CPU(%)" "内存(%)" "虚拟内存" "物理内存" >> ${LOG_FILES[$new_pid]}
CPU_SUM[$new_pid]=0
MEM_SUM[$new_pid]=0
CPU_MAX[$new_pid]=0
MEM_MAX[$new_pid]=0
COUNT[$new_pid]=0
done
fi
# 监控每个进程
ACTIVE_PIDS=()
for pid in ${PID_ARRAY[@]}; do
if monitor_single_process $pid ${LOG_FILES[$pid]}; then
ACTIVE_PIDS+=($pid)
fi
done
# 更新活跃PID数组
PID_ARRAY=(${ACTIVE_PIDS[@]})
PID_COUNT=${#PID_ARRAY[@]}
if [ $PID_COUNT -eq 0 ]; then
echo "所有监控的进程都已退出!"
break
fi
sleep 2
done
# 生成统计报告
echo "==============================================" > $SUMMARY_LOG
echo "多进程监控统计报告" >> $SUMMARY_LOG
echo "==============================================" >> $SUMMARY_LOG
echo "进程名称: $PROCESS_NAME" >> $SUMMARY_LOG
echo "监控时长: $DURATION" >> $SUMMARY_LOG
echo "开始时间: $(date -d @$START_TIME)" >> $SUMMARY_LOG
echo "结束时间: $(date)" >> $SUMMARY_LOG
echo "" >> $SUMMARY_LOG
# 单个进程统计
for pid in ${!LOG_FILES[@]}; do
if [ ${COUNT[$pid]} -gt 0 ]; then
CPU_AVG=$(echo "scale=2; ${CPU_SUM[$pid]} / ${COUNT[$pid]}" | bc)
MEM_AVG=$(echo "scale=2; ${MEM_SUM[$pid]} / ${COUNT[$pid]}" | bc)
echo "进程 PID: $pid" >> ${LOG_FILES[$pid]}
echo "数据点数: ${COUNT[$pid]}" >> ${LOG_FILES[$pid]}
echo "CPU平均使用率: $CPU_AVG%" >> ${LOG_FILES[$pid]}
echo "CPU最大使用率: ${CPU_MAX[$pid]}%" >> ${LOG_FILES[$pid]}
echo "内存平均使用率: $MEM_AVG%" >> ${LOG_FILES[$pid]}
echo "内存最大使用率: ${MEM_MAX[$pid]}%" >> ${LOG_FILES[$pid]}
echo "PID $pid - 平均CPU: $CPU_AVG%, 最大CPU: ${CPU_MAX[$pid]}%, 平均内存: $MEM_AVG%, 最大内存: ${MEM_MAX[$pid]}%" >> $SUMMARY_LOG
fi
done
# 全局统计
if [ $GLOBAL_COUNT -gt 0 ]; then
GLOBAL_CPU_AVG=$(echo "scale=2; $GLOBAL_CPU_SUM / $GLOBAL_COUNT" | bc)
GLOBAL_MEM_AVG=$(echo "scale=2; $GLOBAL_MEM_SUM / $GLOBAL_COUNT" | bc)
else
GLOBAL_CPU_AVG=0
GLOBAL_MEM_AVG=0
fi
echo "" >> $SUMMARY_LOG
echo "===== 全局统计(所有进程聚合) =====" >> $SUMMARY_LOG
echo "总监控进程数: ${#LOG_FILES[@]}" >> $SUMMARY_LOG
echo "总数据点数: $GLOBAL_COUNT" >> $SUMMARY_LOG
echo "全局CPU平均使用率: $GLOBAL_CPU_AVG%" >> $SUMMARY_LOG
echo "全局CPU最大使用率: $GLOBAL_CPU_MAX%" >> $SUMMARY_LOG
echo "全局内存平均使用率: $GLOBAL_MEM_AVG%" >> $SUMMARY_LOG
echo "全局内存最大使用率: $GLOBAL_MEM_MAX%" >> $SUMMARY_LOG
# 屏幕输出总结
echo ""
echo "===== 监控完成 ====="
echo "详细日志文件:"
for pid in ${!LOG_FILES[@]}; do
echo " - PID $pid: ${LOG_FILES[$pid]}"
done
echo "汇总报告: $SUMMARY_LOG"
echo ""
echo "全局统计结果:"
echo " - 总监控进程数: ${#LOG_FILES[@]}"
echo " - 全局CPU平均使用率: $GLOBAL_CPU_AVG%"
echo " - 全局CPU最大使用率: $GLOBAL_CPU_MAX%"
echo " - 全局内存平均使用率: $GLOBAL_MEM_AVG%"
echo " - 全局内存最大使用率: $GLOBAL_MEM_MAX%"