#!/bin/bash # 增强版进程资源监控脚本(基于PID) # 用法: ./pid_monitor.sh <进程PID> <监控时长(秒)> # 检查参数是否正确 if [ $# -ne 2 ]; then echo "错误: 参数不正确!" echo "用法: $0 <进程PID> <监控时长(秒)>" exit 1 fi TARGET_PID=$1 DURATION=$2 TIMESTAMP=$(date +%Y%m%d_%H%M%S) LOG_PREFIX="pid_monitor_${TARGET_PID}" DETAIL_LOG="${LOG_PREFIX}_detail_${TIMESTAMP}.log" SUMMARY_LOG="${LOG_PREFIX}_summary_${TIMESTAMP}.log" # 函数:检查进程是否存在 check_process_exists() { local pid=$1 if [ ! -d /proc/$pid ]; then return 1 fi return 0 } # 函数:获取进程基本信息 get_process_info() { local pid=$1 if [ -f /proc/$pid/status ]; then local process_name=$(cat /proc/$pid/comm 2>/dev/null || echo "N/A") local user=$(ps -o user= -p $pid 2>/dev/null || echo "N/A") local cmdline=$(cat /proc/$pid/cmdline | tr '\0' ' ' | head -c 100 2>/dev/null || echo "N/A") echo "$process_name" "$user" "$cmdline" else echo "N/A" "N/A" "N/A" fi } # 检查目标进程是否存在 if ! check_process_exists $TARGET_PID; then echo "错误: 进程 PID $TARGET_PID 不存在!" exit 1 fi # 获取进程基本信息 read process_name process_user process_cmdline <<< $(get_process_info $TARGET_PID) echo "开始监控进程..." echo "进程PID: $TARGET_PID" echo "进程名称: $process_name" echo "运行用户: $process_user" echo "命令行: $process_cmdline" echo "监控时长: $DURATION 秒" echo "开始时间: $(date)" echo "详细日志: $DETAIL_LOG" echo "汇总报告: $SUMMARY_LOG" # 初始化日志文件 echo "进程监控报告 - PID: $TARGET_PID" > $DETAIL_LOG echo "进程名称: $process_name" >> $DETAIL_LOG echo "运行用户: $process_user" >> $DETAIL_LOG echo "命令行: $process_cmdline" >> $DETAIL_LOG echo "开始时间: $(date)" >> $DETAIL_LOG echo "监控间隔: 1秒" >> $DETAIL_LOG echo "===============================================================" >> $DETAIL_LOG printf "%-20s %-8s %-8s %-12s %-12s %-12s\n" "时间戳" "CPU(%)" "内存(%)" "物理内存(MB)" "虚拟内存(MB)" "线程数" >> $DETAIL_LOG # 初始化统计变量 COUNT=0 CPU_SUM=0 MEM_SUM=0 RSS_SUM=0 VSZ_SUM=0 THREADS_SUM=0 CPU_MAX=0 MEM_MAX=0 RSS_MAX=0 VSZ_MAX=0 THREADS_MAX=0 # 函数:获取精确的CPU使用率[8](@ref) get_cpu_usage() { local pid=$1 # 方法1: 使用top命令获取实时CPU使用率[1](@ref) local cpu_usage=$(top -bn1 -p $pid 2>/dev/null | awk -v pid=$pid '$1 == pid {print $9}') if [ -z "$cpu_usage" ]; then # 方法2: 通过/proc/stat计算[8](@ref) if [ -f /proc/$pid/stat ]; then # 获取进程CPU时间 local utime=$(awk '{print $14}' /proc/$pid/stat) local stime=$(awk '{print $15}' /proc/$pid/stat) local total_time=$((utime + stime)) # 获取系统启动时间 local uptime=$(awk '{print $1}' /proc/uptime) local hz=$(getconf CLK_TCK) # 计算进程启动时间 local starttime=$(awk '{print $22}' /proc/$pid/stat) local process_uptime=$(echo "$uptime - $starttime / $hz" | bc -l) if [ $COUNT -gt 0 ]; then local prev_total=$PREV_TOTAL local prev_seconds=$PREV_SECONDS local current_total=$total_time local current_seconds=$(echo "$uptime - $process_uptime" | bc -l) if [ $(echo "$current_seconds > $prev_seconds" | bc) -eq 1 ]; then cpu_usage=$(echo "scale=2; ($current_total - $prev_total) / ($current_seconds - $prev_seconds) / $hz * 100" | bc -l) fi PREV_TOTAL=$current_total PREV_SECONDS=$current_seconds else PREV_TOTAL=$total_time PREV_SECONDS=$(echo "$uptime - $process_uptime" | bc -l) fi fi fi echo ${cpu_usage:-0} } # 函数:获取内存信息[3,8](@ref) get_memory_info() { local pid=$1 local mem_usage=0 local rss_kb=0 local vsz_kb=0 local threads=0 if [ -f /proc/$pid/status ]; then # 获取内存使用百分比[1](@ref) mem_usage=$(ps -o %mem= -p $pid 2>/dev/null || echo 0) # 获取RSS和VSZ[3](@ref) rss_kb=$(grep VmRSS /proc/$pid/status 2>/dev/null | awk '{print $2}' || echo 0) vsz_kb=$(grep VmSize /proc/$pid/status 2>/dev/null | awk '{print $2}' || echo 0) # 获取线程数[7](@ref) threads=$(ls /proc/$pid/task 2>/dev/null | wc -l || echo 1) fi # 转换为MB local rss_mb=0 local vsz_mb=0 if [ $rss_kb -gt 0 ]; then rss_mb=$((rss_kb / 1024)) fi if [ $vsz_kb -gt 0 ]; then vsz_mb=$((vsz_kb / 1024)) fi echo "$mem_usage" "$rss_mb" "$vsz_mb" "$threads" } # 主监控循环 START_TIME=$(date +%s) END_TIME=$((START_TIME + DURATION)) echo "开始监控进程 PID: $TARGET_PID ..." while [ $(date +%s) -lt $END_TIME ]; do CURRENT_TIME=$(date +%s) ELAPSED=$((CURRENT_TIME - START_TIME)) REMAINING=$((DURATION - ELAPSED)) if [ $REMAINING -le 0 ]; then break fi # 检查进程是否仍然存在 if ! check_process_exists $TARGET_PID; then echo "警告: 进程 $TARGET_PID 在监控期间退出!" break fi TIMESTAMP=$(date +"%Y-%m-%d %H:%M:%S") # 获取CPU使用率 CPU_USAGE=$(get_cpu_usage $TARGET_PID) # 获取内存信息 read MEM_USAGE RSS_MB VSZ_MB THREADS <<< $(get_memory_info $TARGET_PID) # 记录到详细日志 printf "%-20s %-8.1f %-8.1f %-12s %-12s %-12s\n" "$TIMESTAMP" $CPU_USAGE $MEM_USAGE "${RSS_MB}MB" "${VSZ_MB}MB" "$THREADS" >> $DETAIL_LOG # 更新统计信息 CPU_SUM=$(echo "$CPU_SUM + $CPU_USAGE" | bc) MEM_SUM=$(echo "$MEM_SUM + $MEM_USAGE" | bc) RSS_SUM=$((RSS_SUM + RSS_MB)) VSZ_SUM=$((VSZ_SUM + VSZ_MB)) THREADS_SUM=$((THREADS_SUM + THREADS)) # 更新最大值 if [ $(echo "$CPU_USAGE > $CPU_MAX" | bc) -eq 1 ]; then CPU_MAX=$CPU_USAGE fi if [ $(echo "$MEM_USAGE > $MEM_MAX" | bc) -eq 1 ]; then MEM_MAX=$MEM_USAGE fi if [ $RSS_MB -gt $RSS_MAX ]; then RSS_MAX=$RSS_MB fi if [ $VSZ_MB -gt $VSZ_MAX ]; then VSZ_MAX=$VSZ_MB fi if [ $THREADS -gt $THREADS_MAX ]; then THREADS_MAX=$THREADS fi COUNT=$((COUNT + 1)) # 进度显示 if [ $((ELAPSED % 10)) -eq 0 ]; then echo "进度: ${ELAPSED}/${DURATION}秒, CPU: ${CPU_USAGE}%, 内存: ${MEM_USAGE}%, 物理内存: ${RSS_MB}MB" fi sleep 1 done # 计算平均值 if [ $COUNT -gt 0 ]; then CPU_AVG=$(echo "scale=2; $CPU_SUM / $COUNT" | bc) MEM_AVG=$(echo "scale=2; $MEM_SUM / $COUNT" | bc) RSS_AVG=$((RSS_SUM / COUNT)) VSZ_AVG=$((VSZ_SUM / COUNT)) THREADS_AVG=$((THREADS_SUM / COUNT)) else CPU_AVG=0 MEM_AVG=0 RSS_AVG=0 VSZ_AVG=0 THREADS_AVG=0 fi # 生成汇总报告 echo "===============================================================" > $SUMMARY_LOG echo "进程监控统计报告" >> $SUMMARY_LOG echo "===============================================================" >> $SUMMARY_LOG echo "进程PID: $TARGET_PID" >> $SUMMARY_LOG echo "进程名称: $process_name" >> $SUMMARY_LOG echo "运行用户: $process_user" >> $SUMMARY_LOG echo "监控时长: $DURATION 秒" >> $SUMMARY_LOG echo "数据采样数: $COUNT 次" >> $SUMMARY_LOG echo "开始时间: $(date -d @$START_TIME)" >> $SUMMARY_LOG echo "结束时间: $(date)" >> $SUMMARY_LOG echo "" >> $SUMMARY_LOG echo "CPU使用率统计:" >> $SUMMARY_LOG echo " 平均值: $CPU_AVG%" >> $SUMMARY_LOG echo " 最大值: $CPU_MAX%" >> $SUMMARY_LOG echo "" >> $SUMMARY_LOG echo "内存使用率统计:" >> $SUMMARY_LOG echo " 平均值: $MEM_AVG%" >> $SUMMARY_LOG echo " 最大值: $MEM_MAX%" >> $SUMMARY_LOG echo "" >> $SUMMARY_LOG echo "物理内存(RSS)统计:" >> $SUMMARY_LOG echo " 平均值: ${RSS_AVG}MB" >> $SUMMARY_LOG echo " 最大值: ${RSS_MAX}MB" >> $SUMMARY_LOG echo "" >> $SUMMARY_LOG echo "虚拟内存(VSZ)统计:" >> $SUMMARY_LOG echo " 平均值: ${VSZ_AVG}MB" >> $SUMMARY_LOG echo " 最大值: ${VSZ_MAX}MB" >> $SUMMARY_LOG echo "" >> $SUMMARY_LOG echo "线程数统计:" >> $SUMMARY_LOG echo " 平均值: $THREADS_AVG" >> $SUMMARY_LOG echo " 最大值: $THREADS_MAX" >> $SUMMARY_LOG echo "" >> $SUMMARY_LOG # 屏幕输出总结 echo "" echo "===== 监控完成 =====" echo "详细监控数据: $DETAIL_LOG" echo "统计报告: $SUMMARY_LOG" echo "" echo "监控统计摘要:" echo " - 数据采样数: $COUNT 次" echo " - CPU平均使用率: $CPU_AVG%" echo " - CPU最大使用率: $CPU_MAX%" echo " - 内存平均使用率: $MEM_AVG%" echo " - 内存最大使用率: $MEM_MAX%" echo " - 物理内存平均使用: ${RSS_AVG}MB" echo " - 物理内存峰值使用: ${RSS_MAX}MB"