File: //usr/lib64/nagios/plugins/check_a2_xen_guest_vm.sh.flexdedi
#!/bin/bash
#
sanity_check() {
# check if there are more than one LV
lvs=$(lvs | grep -c "_img.*Xen")
if [ "${lvs}" -gt 1 ]; then
echo "guest_status - found more than 1 LV image - please remove the unused one"
exit 1
fi
# check if there's LV existing
if [[ -n ${guest_lv} ]]; then
# check if domU is not running
if [ -z "${xl_list}" ]; then
xen_vm_running=0
# othewise let's compare the two variables below to see if their values match, which means there's a guest VM provisioned and it's running
elif [[ ${xl_list} == "${guest_lv}" ]]; then
xen_vm_running=1
fi
xl_cfg=/home/xen/${guest_lv}/${guest_lv}.cfg
guest_vm_ip=$(grep ^vif "${xl_cfg}" | grep -Eo '([0-9]{1,3}\.){3}[0-9]{1,3}' | head -1)
guest_vm_hostname=$(grep hostname "${xl_cfg}" | sed "s/hostname = '//g; s/'//g" | tr -d '\n' )
else
echo "guest_status - No VM is provisioned"
exit 1
fi
}
check_guest() {
guest_lv=$(lvs | grep img | awk '{print $1}' | sed 's/_img//g')
xl_list=$(xl list | grep vm | awk '{print $1}')
sanity_check
}
check_status() {
status=0
# different status checks, each with 2 seconds timeout to speed it up and prevent unexpected issues in #nag
if ping -c 1 -w 2 "${guest_vm_ip}" &> /dev/null; then
# guest VM is responding to Ping check
status=1
elif nc -zv -w 2 "${guest_vm_ip}" 80 &> /dev/null; then
# guest VM is responding to HTTP check
status=1
elif nc -zv -w 2 "${guest_vm_ip}" 7822 &> /dev/null; then
# guest VM is responding to SSH check
status=1
fi
}
main() {
# first check if a2_status file is present - this will prevent flooding #nag for CRIT alerts
if [ -f "/root/a2_status" ]; then
# check if the guest VM is now responding to any of the checks, then delete the a2_status file
check_status
if [ ${status} == 1 ]; then
echo "guest_status - ${guest_vm_ip} [${guest_vm_hostname}] is running and responding to status checks"
exit 0
# delete the a2_status file
rm -f /root/a2_status
elif [ ${status} == 0 ]; then
echo "guest_status - ${guest_vm_ip} [${guest_vm_hostname}] is offline and ACK'd - $(cat /root/a2_status)"
exit 1
fi
# if there's no a2_status file is present, then check if the guest VM is not running
elif [[ ${xen_vm_running} == 0 ]]; then
# at this stage, monitoring guys will just need to power ON the guest VM (only if it's Active in WHMCS)
echo "guest_status - ${guest_vm_ip} [${guest_vm_hostname}] is offline - please check the server"
exit 2
# otherwise if the guest VM is running - run check_status function
elif [[ ${xen_vm_running} == 1 ]]; then
check_status
if [ ${status} == 1 ]; then
echo "guest_status - ${guest_vm_ip} [${guest_vm_hostname}] is running and responding to status checks"
exit 0
elif [ ${status} == 0 ]; then
# this is the time when monitoring guys will touch a2_status file in /root dir and put the bug-ID, then investigate the issue
echo "2 guest_status - ${guest_vm_ip} [${guest_vm_hostname}] is not responding to status checks - please check the server"
exit 2
fi
fi
}
check_guest
main