File: //usr/lib64/nagios/plugins/a2_temperature_check.shared.bmi
#!/bin/bash
# SYSENG-20654
if ! which ipmitool >/dev/null 2>&1; then
echo "ipmitool not found"
exit 2
fi
inlet_temp=$(ipmitool sensor get "Inlet Temp" 2>&1 | grep "Reading" | awk '{ print $4 }')
sys_temp=$(ipmitool sensor get "System Temp" 2>&1 | grep "Reading" | awk '{ print $4 }')
# notify levels
inlet_warn=45
inlet_crit=55
cpu_warn=60
cpu_crit=70
disk_warn=55
disk_crit=60
warnings="Temperature "
ok="Temperature OK"
ipmi_crit="Unable to get systems temperature, please check the ipmitool"
display_warn=0
cpu_temp () {
declare -A cpu_temps
local warn_found=0
# put cpu and temp in an array
while IFS=' ' read -r cpu temp; do
cpu_temps[$cpu]=$temp # assign the temperature to the cpu in array
done < <(ipmitool sdr list | grep 'CPU.*Temp' | awk '{print $1, $4}')
# loop through array
for cpu in "${!cpu_temps[@]}"; do
crit_cpu_msg="$cpu temperature is over ${cpu_crit}C, please investigate"
if [ -n "${cpu_temps[$cpu]}" ] && [ "${cpu_temps[$cpu]}" -gt "$cpu_crit" ]; then
echo "$crit_cpu_msg"
exit 2
elif [ -n "${cpu_temps[$cpu]}" ] && [ "${cpu_temps[$cpu]}" -gt "$cpu_warn" ]; then
warnings+="$cpu-${cpu_temps[$cpu]}C "
warn_found=1
display_warn=1
fi
done
if [ "$warn_found" -eq 1 ]; then
warnings+="CPU Warn threshold - ${cpu_warn}C "
fi
}
inlet_temp () {
crit_inlet_msg="inlet temperature is over ${inlet_crit}C, please investigate"
# if inlet temp doesn't display a number, then it's older and not supported on this system
if [[ "$inlet_temp" =~ ^[0-9]+([.][0-9]+)?$ ]]; then
if [ -n "$inlet_temp" ] && [ "$inlet_temp" -gt "$inlet_crit" ]; then
echo "$crit_inlet_msg"
exit 2
elif [ -n "$inlet_temp" ] && [ "$inlet_temp" -gt "$inlet_warn" ]; then
warnings+="inlet-${inlet_temp}C "
display_warn=1
fi
# sys_temp is found on legacy x10 systems
elif [[ "$sys_temp" =~ ^[0-9]+([.][0-9]+)?$ ]]; then
if [ -n "$sys_temp" ] && [ "$sys_temp" -gt "$inlet_crit" ]; then
echo "$crit_inlet_msg"
exit 2
elif [ -n "$sys_temp" ] && [ "$sys_temp" -gt "$inlet_warn" ]; then
warnings+="inlet-${sys_temp}C "
display_warn=1
fi
else
echo "$ipmi_crit"
exit 2
fi
}
disk_temp () {
readarray -t disks < <(lsblk -d | grep disk | grep -v ploop | cut -d ' ' -f1)
local warn_found=0
for disk in "${disks[@]}"; do
# Check if the disk name matches the pattern "sd*"
if [[ $disk == sd* ]]; then
disk_temp=$(smartctl -a /dev/"$disk" | grep Temperature | awk '{ print $10 }' | head -1)
else
disk_temp=$(smartctl -a /dev/"$disk" |grep Temperature: | awk '{ print $2 }' | head -1)
fi
crit_disk_msg="$disk temperature is over ${disk_crit}C, please investigate"
if [ -n "$disk_temp" ] && [ "$disk_temp" -gt "$disk_crit" ]; then
echo "$crit_disk_msg"
exit 2
elif [ -n "$disk_temp" ] && [ "$disk_temp" -gt "$disk_warn" ]; then
warnings+="$disk-${disk_temp}C "
warn_found=1
display_warn=1
fi
done
if [ "$warn_found" -eq 1 ]; then
warnings+="DISK Warn threshold - ${disk_warn}C"
fi
}
cpu_temp
inlet_temp
disk_temp
if [ "$display_warn" -eq 1 ]; then
echo "$warnings"
exit 1
else
echo "$ok"
fi