File: //usr/lib64/nagios/plugins/check_a2_dbgov.shared
#!/bin/bash
# Tries manually restarting dbgov if not running, error after retries hit SYSENG-20225
nagios_cache_file="/var/tmp/$(basename $0)"
nagios_cache_age=900
# cacheage function
nagios_cacheage() {
file=$1
now=$(date +%s)
mtime=$(stat -c %Y "$file")
delta=$(( now - mtime ))
echo $delta
}
nagios_generate_cache() {
RETRIES=2
while [[ "${RETRIES}" -gt 0 ]]; do
if $(echo $(dbctl list) | grep -wq "can't connect to socket"); then
RETRIES="$((RETRIES -1))"
systemctl restart db_governor
sleep 15
else
break
fi
done
if [ "${RETRIES}" -eq 0 ]; then
echo "dbgov_check - db-governor is not running and can't be started"
echo "exit_code = 2"
else
echo "dbgov_check - is running"
echo "exit_code = 0"
fi
}
if [ ! -e "${nagios_cache_file}" ]; then
touch ${nagios_cache_file}
nagios_generate_cache > "${nagios_cache_file}"
else
cache_age=$(nagios_cacheage ${nagios_cache_file})
if [ "${cache_age}" -ge "${nagios_cache_age}" ]; then
nagios_generate_cache > "${nagios_cache_file}"
fi
fi
if [ -s "${nagios_cache_file}" ]; then
cat "${nagios_cache_file}" | grep -v "exit_code = "
if grep -q "exit_code = 0" "${nagios_cache_file}"; then
exit 0
else
exit 2
fi
fi