eis/TestProject/scripts/proc_monitor.sh

272 lines
8.5 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# 增强版进程资源监控脚本(支持多同名进程)
# 用法: ./proc_monitor_enhanced.sh <进程名> <监控时长(秒)>
# 检查参数是否正确
if [ $# -ne 2 ]; then
echo "错误: 参数不正确!"
echo "用法: $0 <进程名> <监控时长(秒)>"
exit 1
fi
PROCESS_NAME=$1
DURATION=$2
BASE_LOG_PREFIX="process_monitor_${PROCESS_NAME}"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
SUMMARY_LOG="${BASE_LOG_PREFIX}_summary_${TIMESTAMP}.log"
# 函数获取所有匹配进程的PID
get_all_pids() {
local name=$1
# 方法1: 使用pgrep推荐更简洁
pgrep -f "$name" 2>/dev/null
# 方法2: 使用ps和awk备选更精确
# ps -ef | awk -v proc="$name" '$0 ~ proc && $0 !~ /grep|proc_monitor/ {print $2}'
}
echo "正在查找进程: $PROCESS_NAME"
PIDS=$(get_all_pids "$PROCESS_NAME")
if [ -z "$PIDS" ]; then
echo "错误: 未找到进程 '$PROCESS_NAME'!"
exit 1
fi
# 将PID转换为数组
PID_ARRAY=($PIDS)
PID_COUNT=${#PID_ARRAY[@]}
echo "找到 $PID_COUNT 个同名进程PID: $PIDS"
echo "监控时长: $DURATION"
echo "汇总日志: $SUMMARY_LOG"
echo "开始时间: $(date)"
# 为每个PID创建独立的日志文件
declare -A LOG_FILES
for pid in ${PID_ARRAY[@]}; do
LOG_FILES[$pid]="${BASE_LOG_PREFIX}_pid${pid}_${TIMESTAMP}.log"
# 初始化独立日志文件
echo "进程监控日志 - $PROCESS_NAME (PID: $pid)" > ${LOG_FILES[$pid]}
echo "开始时间: $(date)" >> ${LOG_FILES[$pid]}
echo "监控间隔: 2秒" >> ${LOG_FILES[$pid]}
echo "==============================================" >> ${LOG_FILES[$pid]}
printf "%-20s %-8s %-8s %-12s %-12s\n" "时间戳" "CPU(%)" "内存(%)" "虚拟内存" "物理内存" >> ${LOG_FILES[$pid]}
done
# 初始化统计数组
declare -A CPU_SUM
declare -A MEM_SUM
declare -A CPU_MAX
declare -A MEM_MAX
declare -A COUNT
# 初始化每个PID的统计变量
for pid in ${PID_ARRAY[@]}; do
CPU_SUM[$pid]=0
MEM_SUM[$pid]=0
CPU_MAX[$pid]=0
MEM_MAX[$pid]=0
COUNT[$pid]=0
done
# 全局统计变量
GLOBAL_CPU_SUM=0
GLOBAL_MEM_SUM=0
GLOBAL_CPU_MAX=0
GLOBAL_MEM_MAX=0
GLOBAL_COUNT=0
# 函数:监控单个进程
monitor_single_process() {
local pid=$1
local log_file=$2
if [ ! -d /proc/$pid ]; then
echo "进程 $pid 已退出"
return 1
fi
# 使用top命令获取进程资源使用情况
local PROCESS_INFO=$(top -bn1 -p $pid 2>/dev/null | grep -w $pid)
if [ -z "$PROCESS_INFO" ]; then
return 1
fi
# 解析CPU和内存使用率
local CPU_USAGE=$(echo $PROCESS_INFO | awk '{print $9}')
local MEM_USAGE=$(echo $PROCESS_INFO | awk '{print $10}')
# 从/proc获取详细内存信息(单位:MB)
local VMRSS=0
local VMSIZE=0
if [ -f /proc/$pid/status ]; then
VMRSS=$(grep VmRSS /proc/$pid/status | awk '{print $2}')
VMSIZE=$(grep VmSize /proc/$pid/status | awk '{print $2}')
# 转换为MB
VMRSS=$((VMRSS / 1024))
VMSIZE=$((VMSIZE / 1024))
fi
local TIMESTAMP=$(date +"%Y-%m-%d %H:%M:%S")
# 记录到独立日志文件
printf "%-20s %-8.1f %-8.1f %-12s %-12s\n" "$TIMESTAMP" $CPU_USAGE $MEM_USAGE "${VMSIZE}MB" "${VMRSS}MB" >> $log_file
# 更新单个进程的统计信息
CPU_SUM[$pid]=$(echo "scale=2; ${CPU_SUM[$pid]} + $CPU_USAGE" | bc)
MEM_SUM[$pid]=$(echo "scale=2; ${MEM_SUM[$pid]} + $MEM_USAGE" | bc)
if [ $(echo "$CPU_USAGE > ${CPU_MAX[$pid]}" | bc) -eq 1 ]; then
CPU_MAX[$pid]=$CPU_USAGE
fi
if [ $(echo "$MEM_USAGE > ${MEM_MAX[$pid]}" | bc) -eq 1 ]; then
MEM_MAX[$pid]=$MEM_USAGE
fi
COUNT[$pid]=$((COUNT[$pid] + 1))
# 更新全局统计
GLOBAL_CPU_SUM=$(echo "scale=2; $GLOBAL_CPU_SUM + $CPU_USAGE" | bc)
GLOBAL_MEM_SUM=$(echo "scale=2; $GLOBAL_MEM_SUM + $MEM_USAGE" | bc)
if [ $(echo "$CPU_USAGE > $GLOBAL_CPU_MAX" | bc) -eq 1 ]; then
GLOBAL_CPU_MAX=$CPU_USAGE
fi
if [ $(echo "$MEM_USAGE > $GLOBAL_MEM_MAX" | bc) -eq 1 ]; then
GLOBAL_MEM_MAX=$MEM_USAGE
fi
GLOBAL_COUNT=$((GLOBAL_COUNT + 1))
echo "$TIMESTAMP - PID $pid: CPU=${CPU_USAGE}%, 内存=${MEM_USAGE}%, 物理内存=${VMRSS}MB"
return 0
}
# 主监控循环
START_TIME=$(date +%s)
END_TIME=$((START_TIME + DURATION))
echo "开始监控进程..."
echo "进程列表: ${PID_ARRAY[*]}"
while [ $(date +%s) -lt $END_TIME ]; do
CURRENT_TIME=$(date +%s)
ELAPSED=$((CURRENT_TIME - START_TIME))
REMAINING=$((DURATION - ELAPSED))
if [ $REMAINING -le 0 ]; then
break
fi
echo "======= 监控进度: $ELAPSED/$DURATION 秒, 剩余: $REMAINING 秒 ======="
# 检查是否有新的同名进程启动
CURRENT_PIDS=$(get_all_pids "$PROCESS_NAME")
NEW_PIDS=$(echo "$CURRENT_PIDS" | grep -v -w "$(echo ${PID_ARRAY[@]} | sed 's/ /\\|/g')" || true)
if [ -n "$NEW_PIDS" ]; then
echo "发现新进程: $NEW_PIDS,已加入监控"
for new_pid in $NEW_PIDS; do
PID_ARRAY+=($new_pid)
LOG_FILES[$new_pid]="${BASE_LOG_PREFIX}_pid${new_pid}_${TIMESTAMP}.log"
# 初始化新进程的日志和统计
echo "进程监控日志 - $PROCESS_NAME (PID: $new_pid)" > ${LOG_FILES[$new_pid]}
echo "开始时间: $(date)" >> ${LOG_FILES[$new_pid]}
printf "%-20s %-8s %-8s %-12s %-12s\n" "时间戳" "CPU(%)" "内存(%)" "虚拟内存" "物理内存" >> ${LOG_FILES[$new_pid]}
CPU_SUM[$new_pid]=0
MEM_SUM[$new_pid]=0
CPU_MAX[$new_pid]=0
MEM_MAX[$new_pid]=0
COUNT[$new_pid]=0
done
fi
# 监控每个进程
ACTIVE_PIDS=()
for pid in ${PID_ARRAY[@]}; do
if monitor_single_process $pid ${LOG_FILES[$pid]}; then
ACTIVE_PIDS+=($pid)
fi
done
# 更新活跃PID数组
PID_ARRAY=(${ACTIVE_PIDS[@]})
PID_COUNT=${#PID_ARRAY[@]}
if [ $PID_COUNT -eq 0 ]; then
echo "所有监控的进程都已退出!"
break
fi
sleep 2
done
# 生成统计报告
echo "==============================================" > $SUMMARY_LOG
echo "多进程监控统计报告" >> $SUMMARY_LOG
echo "==============================================" >> $SUMMARY_LOG
echo "进程名称: $PROCESS_NAME" >> $SUMMARY_LOG
echo "监控时长: $DURATION" >> $SUMMARY_LOG
echo "开始时间: $(date -d @$START_TIME)" >> $SUMMARY_LOG
echo "结束时间: $(date)" >> $SUMMARY_LOG
echo "" >> $SUMMARY_LOG
# 单个进程统计
for pid in ${!LOG_FILES[@]}; do
if [ ${COUNT[$pid]} -gt 0 ]; then
CPU_AVG=$(echo "scale=2; ${CPU_SUM[$pid]} / ${COUNT[$pid]}" | bc)
MEM_AVG=$(echo "scale=2; ${MEM_SUM[$pid]} / ${COUNT[$pid]}" | bc)
echo "进程 PID: $pid" >> ${LOG_FILES[$pid]}
echo "数据点数: ${COUNT[$pid]}" >> ${LOG_FILES[$pid]}
echo "CPU平均使用率: $CPU_AVG%" >> ${LOG_FILES[$pid]}
echo "CPU最大使用率: ${CPU_MAX[$pid]}%" >> ${LOG_FILES[$pid]}
echo "内存平均使用率: $MEM_AVG%" >> ${LOG_FILES[$pid]}
echo "内存最大使用率: ${MEM_MAX[$pid]}%" >> ${LOG_FILES[$pid]}
echo "PID $pid - 平均CPU: $CPU_AVG%, 最大CPU: ${CPU_MAX[$pid]}%, 平均内存: $MEM_AVG%, 最大内存: ${MEM_MAX[$pid]}%" >> $SUMMARY_LOG
fi
done
# 全局统计
if [ $GLOBAL_COUNT -gt 0 ]; then
GLOBAL_CPU_AVG=$(echo "scale=2; $GLOBAL_CPU_SUM / $GLOBAL_COUNT" | bc)
GLOBAL_MEM_AVG=$(echo "scale=2; $GLOBAL_MEM_SUM / $GLOBAL_COUNT" | bc)
else
GLOBAL_CPU_AVG=0
GLOBAL_MEM_AVG=0
fi
echo "" >> $SUMMARY_LOG
echo "===== 全局统计(所有进程聚合) =====" >> $SUMMARY_LOG
echo "总监控进程数: ${#LOG_FILES[@]}" >> $SUMMARY_LOG
echo "总数据点数: $GLOBAL_COUNT" >> $SUMMARY_LOG
echo "全局CPU平均使用率: $GLOBAL_CPU_AVG%" >> $SUMMARY_LOG
echo "全局CPU最大使用率: $GLOBAL_CPU_MAX%" >> $SUMMARY_LOG
echo "全局内存平均使用率: $GLOBAL_MEM_AVG%" >> $SUMMARY_LOG
echo "全局内存最大使用率: $GLOBAL_MEM_MAX%" >> $SUMMARY_LOG
# 屏幕输出总结
echo ""
echo "===== 监控完成 ====="
echo "详细日志文件:"
for pid in ${!LOG_FILES[@]}; do
echo " - PID $pid: ${LOG_FILES[$pid]}"
done
echo "汇总报告: $SUMMARY_LOG"
echo ""
echo "全局统计结果:"
echo " - 总监控进程数: ${#LOG_FILES[@]}"
echo " - 全局CPU平均使用率: $GLOBAL_CPU_AVG%"
echo " - 全局CPU最大使用率: $GLOBAL_CPU_MAX%"
echo " - 全局内存平均使用率: $GLOBAL_MEM_AVG%"
echo " - 全局内存最大使用率: $GLOBAL_MEM_MAX%"