File: //usr/lib64/nagios/plugins/check_a2_zpool.sh
#!/bin/bash
# Ref - BFENG-667 - A script to automatically clear pool errors if no data error/corruption in the pool
zfs_disable_clear="/etc/.jbk_zpool_clear_disabled"
if [ -e "${zfs_disable_clear}" ]; then
echo "zfs pool monitoring is disabled currently"
exit 1
fi
check_zpool() {
zfs_state=$(zpool status | grep state:|awk '{print $NF}')
zfs_data_errors=$(zpool status | grep "data errors" | awk '{print $2}')
zfs_clear_count_file="/etc/.jbk_zpool_clear_count"
zfs_cleared_count=$(cat ${zfs_clear_count_file} 2>/dev/null)
if [ ! -e "${zfs_clear_count_file}" ]; then
echo 0 >${zfs_clear_count_file}
fi
if [ "${zfs_state}" == "DEGRADED" ]; then
if [ "${zfs_data_errors}" -gt 0 ]; then
echo "zfs pool is in ${zfs_state} state and has ${zfs_data_errors} data errors"
exit 2
else
zfs_cleared_count=$((zfs_cleared_count+1))
echo "${zfs_cleared_count}" > "${zfs_clear_count_file}"
if [ "${zfs_cleared_count}" -gt 5 ]; then
echo "zfs pool is in ${zfs_state} state and we have cleared it ${zfs_cleared_count} times"
exit 2
else
echo "zfs pool is in ${zfs_state} state and we have cleared it ${zfs_cleared_count} times"
zpool clear backups
exit 1
fi
fi
else
echo "zfs pool backups is healthy"
exit 0
fi
}
check_zpool