我们如何计算一个进程对“commit”的贡献(/ proc / meminfo“Committed_AS”)

随着时间的推移,我们看到“提交”中出现“泄漏”,需要跟踪违规stream程。 RSS(“使用过的内存”)没有泄漏,但是提交是,并且它导致内核performance得非常残酷(当提交%达到〜200%时未能分配内存)。我知道我们可以调整它/ proc / sys / vm / overcommit_memory,但这不是重点 – 我们想find泄漏的过程。

我已经尝试过从/ etc /(proc)/ smap和dmap中进行各种计算,但是在所有进程中没有任何东西像在/ proc / meminfo中提交一样。 https://www.kernel.org/doc/Documentation/vm/overcommit-accounting是非常含糊的,我不清楚如何使用它。

有什么build议么?

我不知道我是否正确理解你的问题,但首先我想找出实际的内存使用情况以及实际的共享内存。请使用下面提到的Python脚本

https://raw.githubusercontent.com/pixelb/ps_mem/master/ps_mem.py 

希望这将有助于或第一步debugging…

发布提供我们如何解决这个问题 – 不是优雅或抛光,而不是一个直接的答案(我们没有得到一个过程的实际过度提交分配),但我们确定罪犯,发现一个微妙的内存泄漏。

不要道歉没有评论等,但这些是一个快速的使用工具。 接着就,随即 …

我们查看了pmap命令的结果,并扫描了/etc/<pid>/smaps进程的数据,将它们汇总为相同进程的副本,并将它们加载到电子表格中进行分析。 我们在一周内每天拍摄几次快照,并观察内存使用的趋势。 这向我们展示了一组具有相同名称的进程,随着时间的推移,内存的趋势,使我们能够确定有问题的进程集。

控制脚本(运行一组命令并将结果组织在一个文件中)


 gpmemstatFile = gpmemstat。$(date“+%F.%H.%M.%S.%N”| cut -c 1-23)
回声'------------------------------------------------ -----'>> $ gpmemstatFile
 echo $(hostname)>> $ gpmemstatFile
 echo $(date)>> $ gpmemstatFile
回声'------------------------------------------------ -----'>> $ gpmemstatFile
 echo >> $ gpmemstatFile

回声'------------------------------------------------ -----'>> $ gpmemstatFile
 echo'/ proc / meminfo'>> $ gpmemstatFile
回声'------------------------------------------------ -----'>> $ gpmemstatFile
 cat / proc / meminfo >> $ gpmemstatFile
 echo >> $ gpmemstatFile

回声'------------------------------------------------ -----'>> $ gpmemstatFile
 echo'sar -r'>> $ gpmemstatFile
回声'------------------------------------------------ -----'>> $ gpmemstatFile
 sar -r >> $ gpmemstatFile
 echo >> $ gpmemstatFile

回声'------------------------------------------------ -----'>> $ gpmemstatFile
 echo'sar -R'>> $ gpmemstatFile
回声'------------------------------------------------ -----'>> $ gpmemstatFile
 sar -R >> $ gpmemstatFile
 echo >> $ gpmemstatFile

回声'------------------------------------------------ -----'>> $ gpmemstatFile
 echo'gpsmapstat(/ proc /(pid)/ smaps aggregates)'>> $ gpmemstatFile
回声'------------------------------------------------ -----'>> $ gpmemstatFile
 gpsmapsstat httpd oninit fglrun java fastcgi gdcproxy >> $ gpmemstatFile
 echo >> $ gpmemstatFile

回声'------------------------------------------------ -----'>> $ gpmemstatFile
 echo'gppmapstat(pmap -d -x aggregates)'>> $ gpmemstatFile
回声'------------------------------------------------ -----'>> $ gpmemstatFile
 gppmapstat httpd oninit fglrun java fastcgi gdcproxy >> $ gpmemstatFile

如果[-n“$ 1”]; 然后
    scp $ gpmemstatFile $ 1
科幻

脚本通过pmap为特定名称的所有进程收集过程数据(您可以使用其他机制来识别要分析的过程)。 这汇总了一个types的所有进程的聚合内存使用情况。

printf "%8s %8s %8s %8s %8s %8s %8s %8s\n" name PID Total Resident Dirty Private Shared Unshared for name in $@ ; do unset sumTotalMemory sumResidentMemory sumDirtyMemory sumPrivateMemory sumSharedMemory sumUnsharedMemory for pid in $( ps aux | grep $name | awk '{print $2}') ; do xMemory=$(pmap -x $pid | tail -1) totalMemory=$(echo $xMemory | awk '{print $3}') if [ -n "$totalMemory" ] ; then sumTotalMemory=$(($sumTotalMemory + $totalMemory)) ; fi residentMemory=$(echo $xMemory | awk '{print $4}') if [ -n "$residentMemory" ] ; then sumResidentMemory=$(($sumResidentMemory + $residentMemory)) ; fi dirtyMemory=$(echo $xMemory | awk '{print $5}') if [ -n "$dirtyMemory" ] ; then sumDirtyMemory=$(($sumDirtyMemory + $dirtyMemory)) ; fi dMemory=$(pmap -d $pid | tail -1) privateMemory=$(echo $dMemory | awk '{print $4}') if [ -n "$privateMemory" ] ; then privateMemory=${privateMemory:0:${#privateMemory}-1} sumPrivateMemory=$(($sumPrivateMemory + $privateMemory)) fi sharedMemory=$(echo $dMemory | awk '{print $6}') unset unsharedMemory if [ -n "$sharedMemory" ] ; then sharedMemory=${sharedMemory:0:${#sharedMemory}-1} sumSharedMemory=$(($sumSharedMemory + $sharedMemory )) unsharedMemory=$(($totalMemory - $sharedMemory)) sumUnsharedMemory=$(( $sumUnsharedMemory + $unsharedMemory )) fi #printf "%8s %8s %8s %8s %8s %8s %8s %8s\n" $name $pid $totalMemory $residentMemory $dirtyMemory $privateMemory $sharedMemory $unsharedMemory done if [ -n "$sumTotalMemory" ] ; then printf "%8s %8s %8s %8s %8s %8s %8s %8s\n" $name Total $sumTotalMemory $sumResidentMemory $sumDirtyMemory $sumPrivateMemory $sumSharedMemory $sumUnsharedMemory printf "%8s %8s %8s %8s %8s %8s %8s %8s\n" $name MiB $(($sumTotalMemory/1024)) $(($sumResidentMemory/1024)) $(($sumDirtyMemory/1024)) $(($sumPrivateMemory/1024)) $(($sumSharedMemory/1024)) $(($sumUnsharedMemory/1024)) printf "%8s %8s %8s %8s %8s %8s %8s %8s\n" $name GiB $(echo "scale=2; $sumTotalMemory/1024/1024" | bc) $(echo "scale=2; $sumResidentMemory/1024/1024" | bc) $(echo "scale=2; $sumDirtyMemory/1024/1024" | bc) $(echo "scale=2; $sumPrivateMemory/1024/1024"| bc) $(echo "scale=2; $sumSharedMemory/1024/1024"| bc) $(echo "scale=2; $sumUnsharedMemory/1024/1024"| bc) fi done
printf "%8s %8s %8s %8s %8s %8s %8s %8s\n" name PID Total Resident Dirty Private Shared Unshared for name in $@ ; do unset sumTotalMemory sumResidentMemory sumDirtyMemory sumPrivateMemory sumSharedMemory sumUnsharedMemory for pid in $( ps aux | grep $name | awk '{print $2}') ; do xMemory=$(pmap -x $pid | tail -1) totalMemory=$(echo $xMemory | awk '{print $3}') if [ -n "$totalMemory" ] ; then sumTotalMemory=$(($sumTotalMemory + $totalMemory)) ; fi residentMemory=$(echo $xMemory | awk '{print $4}') if [ -n "$residentMemory" ] ; then sumResidentMemory=$(($sumResidentMemory + $residentMemory)) ; fi dirtyMemory=$(echo $xMemory | awk '{print $5}') if [ -n "$dirtyMemory" ] ; then sumDirtyMemory=$(($sumDirtyMemory + $dirtyMemory)) ; fi dMemory=$(pmap -d $pid | tail -1) privateMemory=$(echo $dMemory | awk '{print $4}') if [ -n "$privateMemory" ] ; then privateMemory=${privateMemory:0:${#privateMemory}-1} sumPrivateMemory=$(($sumPrivateMemory + $privateMemory)) fi sharedMemory=$(echo $dMemory | awk '{print $6}') unset unsharedMemory if [ -n "$sharedMemory" ] ; then sharedMemory=${sharedMemory:0:${#sharedMemory}-1} sumSharedMemory=$(($sumSharedMemory + $sharedMemory )) unsharedMemory=$(($totalMemory - $sharedMemory)) sumUnsharedMemory=$(( $sumUnsharedMemory + $unsharedMemory )) fi #printf "%8s %8s %8s %8s %8s %8s %8s %8s\n" $name $pid $totalMemory $residentMemory $dirtyMemory $privateMemory $sharedMemory $unsharedMemory done if [ -n "$sumTotalMemory" ] ; then printf "%8s %8s %8s %8s %8s %8s %8s %8s\n" $name Total $sumTotalMemory $sumResidentMemory $sumDirtyMemory $sumPrivateMemory $sumSharedMemory $sumUnsharedMemory printf "%8s %8s %8s %8s %8s %8s %8s %8s\n" $name MiB $(($sumTotalMemory/1024)) $(($sumResidentMemory/1024)) $(($sumDirtyMemory/1024)) $(($sumPrivateMemory/1024)) $(($sumSharedMemory/1024)) $(($sumUnsharedMemory/1024)) printf "%8s %8s %8s %8s %8s %8s %8s %8s\n" $name GiB $(echo "scale=2; $sumTotalMemory/1024/1024" | bc) $(echo "scale=2; $sumResidentMemory/1024/1024" | bc) $(echo "scale=2; $sumDirtyMemory/1024/1024" | bc) $(echo "scale=2; $sumPrivateMemory/1024/1024"| bc) $(echo "scale=2; $sumSharedMemory/1024/1024"| bc) $(echo "scale=2; $sumUnsharedMemory/1024/1024"| bc) fi done 

并且脚本查看所有proc/<pid>/smaps文件,并为特定名称的所有进程(也可以使用其他机制来标识要分析的进程)聚合内存数据。 再次总结一个types的所有进程的聚合内存使用情况。

printf "%8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n" name size rss pss shrdCln shrdDrty prvtCln prvtDrty refrncd swap for name in $*; do unset awkFiles for pid in $( ps aux | grep $name | awk '{print $2}') ; do if [ -e /proc/$pid/smaps ] ; then awkFiles=${awkFiles}" /proc/$pid/smaps" fi done if [ -n "$awkFiles" ] ; then awk -v name="$name" '{ if ($1 == "Size:" ){ sizeSum+=$2 } else if ($1 == "Rss:" ){ rssSum+=$2 } else if ($1 == "Pss:" ){ pssSum+=$2 } else if ($1 == "Shared_Clean:" ){ sharedCleanSum+=$2 } else if ($1 == "Shared_Dirty:" ){ sharedDirtySum+=$2 } else if ($1 == "Private_Clean:" ){ privateCleanSum+=$2 } else if ($1 == "Private_Dirty:" ){ privateDirtySum+=$2 } else if ($1 == "Referenced:" ){ referencedSum+=$2 } else if ($1 == "Swap:" ){ swapSum+=$2 } } END { printf "%8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n", name, sizeSum, rssSum, pssSum, sharedCleanSum, sharedDirtySum, privateCleanSum, privateDirtySum, referencedSum, swapSum}' $awkFiles fi done
printf "%8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n" name size rss pss shrdCln shrdDrty prvtCln prvtDrty refrncd swap for name in $*; do unset awkFiles for pid in $( ps aux | grep $name | awk '{print $2}') ; do if [ -e /proc/$pid/smaps ] ; then awkFiles=${awkFiles}" /proc/$pid/smaps" fi done if [ -n "$awkFiles" ] ; then awk -v name="$name" '{ if ($1 == "Size:" ){ sizeSum+=$2 } else if ($1 == "Rss:" ){ rssSum+=$2 } else if ($1 == "Pss:" ){ pssSum+=$2 } else if ($1 == "Shared_Clean:" ){ sharedCleanSum+=$2 } else if ($1 == "Shared_Dirty:" ){ sharedDirtySum+=$2 } else if ($1 == "Private_Clean:" ){ privateCleanSum+=$2 } else if ($1 == "Private_Dirty:" ){ privateDirtySum+=$2 } else if ($1 == "Referenced:" ){ referencedSum+=$2 } else if ($1 == "Swap:" ){ swapSum+=$2 } } END { printf "%8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n", name, sizeSum, rssSum, pssSum, sharedCleanSum, sharedDirtySum, privateCleanSum, privateDirtySum, referencedSum, swapSum}' $awkFiles fi done 

再次,没有抛光,而不是实际的答案(我仍然不知道如何获得一个有序的内存提交列表中的进程),但它在这里做了诡计。