File: //usr/lib64/nagios/plugins/vz7_check_nfs_kill.sh
#!/bin/bash
#
# To monitor and fix the weird nfs problems on VZ7 nodes and gather data
# Keeps alerting until the lock file created is cleared off my monitoring
# SYSENG-2500
#
checkdir="/var/cache/nfs_kill_check"
lockfile="${checkdir}"/ack.lck
log_data() {
sysctl -a 2>/dev/null|grep -iE "rpc|nfs"
printf "\n\n"
grep -i nfs /var/log/messages|tail -20
printf "\n\n"
ps auxwww|grep -i "rsyn[c]"
}
if [ -f "${lockfile}" ]; then
echo "nfs_kill_check - /proc/net/rpc/kill-tasks was recently set to 1"
find /var/cache/nfs_kill_check/ack.lck -mmin +15 -delete
exit 2
else
kill_tasks=$(awk '{print $1}' /proc/net/rpc/kill-tasks|head -1)
if [ ${kill_tasks} -eq 1 ]; then
echo "nfs_kill_check - /proc/net/rpc/kill-tasks is set to 1"
mkdir -p ${checkdir}
touch "${lockfile}"
datafile="${checkdir}/$(date +%F-%T)"
touch $datafile
log_data >"${datafile}" 2>&1
echo 0 >/proc/net/rpc/kill-tasks
exit 2
else
echo "nfs_kill_check - OK"
exit 0
fi
fi