File: //lib64/nagios/plugins/check_jetbackup_queue
#!/usr/bin/python
import json
import subprocess
import sys
from datetime import datetime, timedelta
import re
import argparse
def is_command_available(command):
try:
subprocess.call([command], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return True
except OSError:
return False
def execute_jetapi_command():
command = ["/usr/bin/jetapi", "backup", "-F", "listLogs", "-O", "json"]
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, _ = process.communicate()
return output.decode("utf-8")
def execute_jetbackup5api_command():
command = ["/usr/bin/jetbackup5api", "-F", "listQueueGroups", "-D", "type=1", "-O", "json"]
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, _ = process.communicate()
return output.decode("utf-8")
def execute_jetapi_jobs_command():
command = ["/usr/bin/jetapi", "backup", "-F", "listBackupJobs", "-O", "json"]
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, _ = process.communicate()
return output.decode("utf-8")
def execute_jetbackup5api_jobs_command():
command = ["/usr/bin/jetbackup5api", "-F", "listBackupJobs", "-O", "json"]
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, _ = process.communicate()
return output.decode("utf-8")
def parse_time(time_str):
if not time_str:
return None
time_str = re.sub(r"(\d{2}):(\d{2})$", r"\1\2", time_str)
timezone_offset = timedelta(hours=int(time_str[-4:-2]), minutes=int(time_str[-2:]))
return datetime.strptime(time_str[:-6], "%Y-%m-%dT%H:%M:%S") + timezone_offset
parser = argparse.ArgumentParser(description="Check JetBackup status and alerts.")
parser.add_argument("-c", "--critical", type=int, default=24, help="Critical threshold in hours (default: 24)")
parser.add_argument("-w", "--warning", type=int, default=12, help="Warning threshold in hours (default: 12)")
args = parser.parse_args()
critical_hours_threshold = args.critical
warning_hours_threshold = args.warning
if is_command_available("/usr/bin/jetbackup5api"):
command_type = "jetbackup5api"
output = execute_jetbackup5api_command()
elif is_command_available("/usr/bin/jetapi"):
command_type = "jetapi"
output = execute_jetapi_command()
else:
print("No suitable command found on the server.")
sys.exit(0)
if is_command_available("/usr/bin/jetbackup5api"):
command_type = "jetbackup5api"
output = execute_jetbackup5api_command()
jobs_output = execute_jetbackup5api_jobs_command()
elif is_command_available("/usr/bin/jetapi"):
command_type = "jetapi"
output = execute_jetapi_command()
jobs_output = execute_jetapi_jobs_command()
else:
print("No suitable command found on the server.")
sys.exit(0)
try:
data = json.loads(output)
except ValueError:
print("UNKNOWN: JetBackup error")
sys.exit(3)
try:
jobs_data = json.loads(jobs_output)
except ValueError:
print("UNKNOWN: JetBackup error")
sys.exit(3)
jobs = jobs_data.get("data", {}).get("jobs", [])
total_jobs = len(jobs)
critical_alerts = []
warning_alerts = []
for job in jobs:
job_id = job.get("_id")
running = job.get("running", False)
last_run = parse_time(job.get("last_run"))
if running and last_run:
now = datetime.now()
time_since_last_run = now - last_run
if time_since_last_run > timedelta(hours=critical_hours_threshold):
critical_alerts.append(job_id)
elif time_since_last_run > timedelta(hours=warning_hours_threshold):
warning_alerts.append(job_id)
if command_type == "jetapi":
if "data" in data and "logs" in data["data"]:
alerts = data["data"]["logs"]
elif "alerts" in data:
alerts = data["alerts"]
else:
print("No logs found.")
sys.exit(1)
status_key = "status"
status_value = 6
id_key = "_id"
elif command_type == "jetbackup5api":
if "data" in data and "groups" in data["data"]:
alerts = data["data"]["groups"]
elif "alerts" in data:
alerts = data["alerts"]
else:
print("No queue groups found.")
sys.exit(0)
status_key = "status"
status_value = 2
id_key = "log_id"
else:
print("Invalid command type.")
sys.exit(1)
if total_jobs == 0:
print("CRITICAL: No backup jobs found.")
sys.exit(2)
elif critical_alerts:
print("CRITICAL: BackupJob is running for more than {0} hours: {1}".format(
critical_hours_threshold,
", ".join(critical_alerts)
))
sys.exit(2)
elif warning_alerts:
print("WARNING: BackupJob is running for more than {0} hours: {1}".format(
warning_hours_threshold,
", ".join(warning_alerts)
))
sys.exit(1)
else:
print("JetBackup Queue is ok.")
sys.exit(0)