File: //lib64/nagios/plugins/check_a2_jetbackup.sh
#!/bin/bash
# This check_mk script will check to make sure JetBackup is functioning correctly. We should already be receiving daily emails about it, if it isn't, but we want to make sure since backups are important, mmkay.
#Checks
# Checks to make sure the API is functional as JB likes to hang...
# Checks to make sure all 3 notification profiles are there and not disabled.
# Checks to make sure there hasn't been 3 failures in the past 14 log entries (should be 14 days.. unless it's being ran manually)
# Checks to make sure there is a JB Destination called 'Cloud Backup' and it's enabled.
# Checks to make sure there is a JB Backup Job called 'API Backup Job' and it's enabled.
# Update - BFENG-2704 - fix while touch file creation is failed during quota full.
JBK_LOG=/var/log/check_jetbackup.log
# Check if the JetBackup5 is installed
installed=$(test -f /usr/local/cpanel/bin/admin/JetApps/JetBackup5.conf >/dev/null 2>&1 && echo "yes" || echo "no")
# If file not present, means its not installed
if [ "$installed" == "no" ]; then
echo "check_jetbackup - Jetbackup is not installed"
exit 0
fi
# Print out the cached data if it's less than a hour old
CACHE='/var/spool/icinga2/tmp/jetbackup-check-cache'
CURTIME=`date +%s`
_check_cache() {
# Read the first word (status code) and the rest of the message separately
read -r status message < "$CACHE"
# Print only the message (without the status code)
echo "$message"
# Exit with the correct status code
exit "$status"
}
if [[ -e $CACHE ]]; then
CACHETIME=`stat -c %Z $CACHE`
DIFFTIME=$(($CURTIME-$CACHETIME))
# Cache entry is less than a hour old, show that.
if [[ $DIFFTIME -lt 3600 ]]; then
_check_cache
fi
# Check if there's another check_jetbackup.sh running
CJPID=$(/usr/bin/pidof -o %PPID -x $0)
# If the cache is older than a hour and there is another check process running, kill it.. it's probably hung
if [[ $DIFFTIME -gt 3700 ]] && [ -n "$CJPID" ]; then
kill "$CJPID"
echo "1 check_jetbackup - check process found running for too long, killed old process" > $CACHE
_check_cache
fi
# Lastly, if check_jetbackup.sh is already running, don't re-run it - show cache instead
if [ -n "$CJPID" ]; then
_check_cache
fi
fi
## VERIFY API IS WORKING START
# Create a temporary file
TMP=$(mktemp)
# Test the JetBackup API to make sure it's functional
timeout 15 jetbackup5api -F listBackupJobs > "${TMP}"
# Verify we received results
RESULTS=$(/bin/grep 'success: 1' "${TMP}")
# Clean up Temporary File
rm -f "${TMP}"
# We received nothing from the JetBackup API (or it timed out after 15 seconds), notify #jetbackup_events
if [ -z "${RESULTS}" ]; then
echo "2 check_jetbackup - JetBackup API is not responding" > $CACHE
_check_cache
fi
## VERIFY API IS WORKING END
## VERIFY LICENSE START
license=$(/usr/bin/jetbackup5 --license)
if ! echo "$license" | grep -q "License is Active"; then
echo "2 check_jetbackup - JetBackup license is not active." > $CACHE
_check_cache
fi
## VERIFY LICENSE END
## DISABLE BACKUP JOBS AND EVERYTHING ELSE IF REMOTE BACKUP QUOTA STORAGE IS FULL
# generate variables to connect remote JBK to look for disable file
jbk_client_ip=$(hostname -i)
jbk_remote_host=$(jetbackup5api -F listBackupJobs -O json | jq -r '.data.jobs[].destination_details[0].options.host' | head -1)
jbk_local_key=$(jetbackup5api -F listBackupJobs -O json | jq -r '.data.jobs[].destination_details[0].options.internalprivatekey' | head -1)
jbk_local_key_path="/usr/local/jetapps/etc/jetbackup5/.ssh/keys/"
# function to loop through and disable all backup jobs for the client
disable_backup_jobs() {
for job in $(jetbackup5api -F listBackupJobs -O json | jq -r '.data.jobs[].disabled'); do
if [ ${job} == 0 ]; then
jbk_jobs=$(jetbackup5api -F listBackupJobs -O json | jq -r '.data.jobs[]._id')
echo "$(date) -------------- Disabling Backup jobs start -----------------"
for id in ${jbk_jobs}; do
echo "Found backup job: $id -- disabling"
jetbackup5api -F manageBackupJob -D "action=modify&_id=$id&disabled=1"
done
echo "$(date) -------------- Disabling Backup jobs end -----------------"
fi
done
}
enable_backup_jobs() {
for job in $(jetbackup5api -F listBackupJobs -O json | jq -r '.data.jobs[].disabled'); do
if [ ${job} == 1 ]; then
jbk_jobs=$(jetbackup5api -F listBackupJobs -O json | jq -r '.data.jobs[]._id')
echo "$(date) -------------- Enabling Backup jobs start -----------------"
for id in ${jbk_jobs}; do
echo "Found backup job: $id -- enabling"
jetbackup5api -F manageBackupJob -D "action=modify&_id=$id&disabled=0"
done
echo "$(date) -------------- Enabling Backup jobs end -----------------"
fi
done
}
# functions to call later to disable/enable notifications for jira, slack, debug etc
disable_notifications() {
echo "$(date) -------------- Disabling notifications start -----------------"
for id in $(/usr/bin/jetbackup5api -F listNotificationIntegrations --output json | jq -r '.data.notifications[]._id'); do
/usr/bin/jetbackup5api -F manageNotificationIntegration -D "action=modify&_id=$id&disabled=1"
done
echo "$(date) -------------- Disabling notifications end -----------------"
}
enable_notifications() {
echo "$(date) -------------- Enabling notifications start -----------------"
for id in $(/usr/bin/jetbackup5api -F listNotificationIntegrations --output json | jq -r '.data.notifications[]._id'); do
/usr/bin/jetbackup5api -F manageNotificationIntegration -D "action=modify&_id=$id&disabled=0"
done
echo "$(date) -------------- Enabling notifications end -----------------"
}
check_dedijbk_storage() {
# Collect remote storage status using quota check instead of file operations
backup_storage_status=$(ssh -o ConnectTimeout=1 -o ConnectionAttempts=1 -p 7922 ${jbk_client_ip}@${jbk_remote_host} -i ${jbk_local_key_path}/${jbk_local_key} -t "if [ -e .backup_quota_full ]; then echo backup_quota_full; elif [ -e .backup_quota_good ]; then echo backup_quota_good; else quota -s | awk '/used/' | awk '{print \$2}' ; fi")
# Dedicated Servers only - check if remote backup full flag exists, if so, disable all jobs and exit and do not check anything else
jbk_full_regex="quota_full"
jbk_good_regex="quota_good"
if [[ "${backup_storage_status}" =~ "${jbk_full_regex}" ]] || [[ "${backup_storage_status}" == "100%" ]]; then
echo 'backup_quota_full: A2 Cloud Backups are disabled on this server due to the client hitting their backup quota' > /etc/motd
disable_backup_jobs >> "${JBK_LOG}"
if /usr/bin/jetbackup5api -F listNotificationIntegrations --output json | jq -r '.data.notifications[] | select(.name == "Slack Notification", .name == "Slack Debug Notification") | .disabled' | grep -q false; then
disable_notifications >> "${JBK_LOG}"
fi
echo "1 check_jetbackup - JetBackup jobs and slack notifications are disabled since ${jbk_remote_host}:/backups/${jbk_client_ip} storage quota is full" > $CACHE
_check_cache
elif [[ "${backup_storage_status}" =~ "${jbk_good_regex}" ]]; then
sed -i '/^backup_quota_full:/d' /etc/motd
enable_backup_jobs >>"${JBK_LOG}"
if /usr/bin/jetbackup5api -F listNotificationIntegrations --output json | jq -r '.data.notifications[] | select(.name == "Slack Notification", .name == "Slack Debug Notification") | .disabled' | grep -q true; then
enable_notifications >> "${JBK_LOG}"
fi
fi
}
# Run check_dedijbk_storage only if the client host is dedicated
_server_hostname=$(hostname)
_match_regex_s="a2hosting.com"
_match_regex_r="supercp.com"
if [[ "${_server_hostname}" =~ "${_match_regex_s}" ]] || [[ "${_server_hostname}" =~ "${_match_regex_r}" ]]; then
echo "Skipping storage check for SRT servers." >/dev/null
else
check_dedijbk_storage
fi
## VERIFY BACKUP DURATION START
## Notify if any backups take longer than 48 hours!
LISTQUEUE=$(jetbackup5api -F listQueueGroups -D 'type=3' -O json | jq -r '.data.groups[] | select(.status == 2)')
if [ -n "$LISTQUEUE" ]; then
QUEUE_DURATION=$(echo "$LISTQUEUE" | jq -r '.actual_time')
RESULT=$(echo "scale=2; $QUEUE_DURATION / 3600" | bc)
if (( $(echo "$QUEUE_DURATION > 172800" | bc -l) )); then
QUEUENAME=$(echo "$LISTQUEUE" | jq -r '.data.name')
QUEUE_DURATION_READABLE=$(echo "scale=2; $QUEUE_DURATION / 3600" | bc)
echo "2 check_jetbackup - Backup Job $QUEUENAME has been running for over $QUEUE_DURATION_READABLE hours!" > $CACHE
_check_cache
fi
fi
## VERIFY BACKUP DURATION END
## VERIFY LAST BACKUP START
## Alert if the last run is over 3 days old
LASTRUN=$(jetbackup5api -O json -F listBackupJobs | jq -r ".data.jobs[] | select(.name | test(\"API Backup Job\")) | .last_run")
if [ -n "$LASTRUN" ]; then
LASTRUN_TS=$(date -d "$LASTRUN" +%s)
LASTRUN_TS_DIFF=$((CURTIME - LASTRUN_TS))
if (( $(echo "$LASTRUN_TS_DIFF > 259200" | bc -l) )); then
LASTRUN_TS_DIFF_READABLE=$(echo "scale=2; $LASTRUN_TS_DIFF / 3600" | bc)
echo "2 check_jetbackup - The API Backup Job hasn't ran in over $LASTRUN_TS_DIFF_READABLE hours!" >$CACHE
_check_cache
fi
else
echo "2 check_jetbackup - I was unable to find a backup job called API Backup Job! Please check my configuration!" >$CACHE
_check_cache
fi
## VERIFY LAST BACKUP END
## VERIFY NOTIFICATION PROFILES START
# Verify our notification profiles are there and active
notifyJson=`/usr/bin/jetbackup5api -F listNotificationIntegrations --output json`
slack_disabled=$(echo "$notifyJson" | jq -r '.data.notifications[] | select(.name == "Slack Notification") | .disabled')
if [ "$slack_disabled" != "false" ]; then
echo "2 check_jetbackup - The Slack Notification is disabled or missing. Please investigate." >$CACHE
_check_cache
fi
slack_debug_disabled=$(echo "$notifyJson" | jq -r '.data.notifications[] | select(.name == "Slack Debug Notification") | .disabled')
if [ "$slack_debug_disabled" != "false" ]; then
echo "2 check_jetbackup - The Slack Debug Notification is disabled or missing. Please investigate." >$CACHE
_check_cache
fi
## VERIFY NOTIFICATION PROFILES END
## VERIFY NO MORE THAN 3 FAILURES START
# Verify there hasn't been more than 3 failures in the past 14 log entries
logQuery=`jetbackup5api -F listLogs -D "find[type]=1&limit=14" -O json`
# Filter the logs for "API Backup" and count the failures
failures_count=$(echo "$logQuery" | jq -r --argjson now "$(date +%s)" '
.data.logs
| map(select(
(.info.Backup | contains("API Backup")) and
(.status | (. != 1 and . != 4 and . != 6)) and
(.end_time | sub("\\+00:00$"; "Z") | fromdateiso8601) >= ($now - 14 * 24 * 60 * 60)
))
| length
')
# Set failures_count to 0 if it is empty
if [ -z "$failures_count" ]; then
failures_count=0
fi
# Check if there are more than 3 failures
if [ "$failures_count" -gt 3 ]; then
echo "2 check_jetbackup - There has been more than 3 backup failures in the past 14 backups. Please investigate." >$CACHE
_check_cache
fi
## VERIFY NO MORE THAN 3 FAILURES END
## VERIFY CLOUD BACKUP IS ENABLED START
# Verify the Cloud Backup destination is enabled.
destLog=`/usr/bin/jetbackup5api -F listDestinations -O json`
# Check for the existence of an entry with the specified conditions
dest_exists=$(echo "$destLog" | jq -r '
.data.destinations
| any(.[]; (.name | contains("Cloud Backup")) and (.disabled == false))
')
if [ "$dest_exists" == "false" ]; then
echo "2 check_jetbackup - There isn't a JetBackup Destination with the name Cloud Backup that is enabled. Please investigate." >$CACHE
_check_cache
fi
## VERIFY CLOUD BACKUP IS ENABLED END
## VERIFY API BACKUP JOB IS ENABLED START
# Verify the "API Backup Job" is enabled and has destination A2 Cloud Backup.
backupJobLog=$(jetbackup5api -F listBackupJobs -O json)
entry_exists=$(echo "$backupJobLog" | jq -r '
.data.jobs
| any(.[]; select(.name | test("API Backup Job"; "i")) | select(.disabled == 0))
')
if [ "$entry_exists" == "false" ]; then
echo "2 check_jetbackup - There isn't a JetBackup Backup Log with the name containing 'API Backup Job' that is enabled. Please investigate." >$CACHE
_check_cache
fi
## VERIFY API BACKUP JOB IS ENABLED END
## If it got this far, we should be good to go. Let's tell check_mk the good news
echo "0 check_jetbackup - JetBackup appears to be working as expected." > $CACHE
_check_cache