From b01feabffbea5b9a911821b19844839a916ff63c Mon Sep 17 00:00:00 2001 From: Zolfa Date: Sat, 6 Jun 2020 00:13:47 +0200 Subject: [PATCH] backup status monitoring --- .../icinga2-monitoring/templates/host.conf.j2 | 10 ++ roles/icinga2/files/check_backup.sh | 125 ++++++++++++++++++ .../icinga2/files/icinga2/command-backup.conf | 27 ++++ roles/icinga2/files/icinga2/services.conf | 8 ++ roles/icinga2/tasks/main.yaml | 3 +- roles/monitoring-agent/files/check_backup.sh | 125 ++++++++++++++++++ roles/monitoring-agent/tasks/main.yaml | 11 +- 7 files changed, 305 insertions(+), 4 deletions(-) create mode 100644 roles/icinga2/files/check_backup.sh create mode 100644 roles/icinga2/files/icinga2/command-backup.conf create mode 100644 roles/monitoring-agent/files/check_backup.sh diff --git a/roles/icinga2-monitoring/templates/host.conf.j2 b/roles/icinga2-monitoring/templates/host.conf.j2 index 08c3c10..d2e6087 100644 --- a/roles/icinga2-monitoring/templates/host.conf.j2 +++ b/roles/icinga2-monitoring/templates/host.conf.j2 @@ -16,6 +16,16 @@ object Host "{{ item.key }}" { vars.{{ var.key }} = "{{ var.value }}" {% endfor %} + +{% for repo in item.value.borg_repos|d({})|dict2items %} + vars.backup_repos["{{ repo.key }}"] = { +{% for v in repo.value|dict2items %} + {{ v.key }} = "{{ v.value }}" +{% endfor %} + } +{% endfor %} +} + {% for vhost in item.value.vhosts|d([]) %} object Host "{{ vhost }}" { import "generic-host" diff --git a/roles/icinga2/files/check_backup.sh b/roles/icinga2/files/check_backup.sh new file mode 100644 index 0000000..2a3e33e --- /dev/null +++ b/roles/icinga2/files/check_backup.sh @@ -0,0 +1,125 @@ +#!/bin/bash + +PROGNAME=$(basename $0) +PROGPATH=$(echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,') + +. $PROGPATH/utils.sh + +print_usage() { + echo "Usage: $PROGNAME -R repo [-d log_dir] [-w warning_age] [-c critical_age]" + echo "Usage: $PROGNAME --help" +} + +print_help() { + echo "" + exit $STATE_OK +} + +logdir=/var/log/backup-status +wage=93600 # 26h +cage=187200 # 52h +repo="" + +while test -n "$1"; do + case "$1" in + --help|-h) + print_help + exit $STATE_OK + ;; + --repo|-R) + repo=$2 + shift + ;; + --dir|-d) + logdir=$2 + shift + ;; + --wage|-w) + wage=$2 + shift + ;; + --cage|-c) + cage=$2 + shift + ;; + *) + echo "Unknown argument: $1" + print_usage + exit $STATE_UNKNOWN + ;; + esac + shift +done + +if [ "$repo" == "" ] or [ -d "$logdir/$repo" ]; then + echo "Unknown repo $repo." + exit $STATE_UNKNOWN + +fi +perf="" +message="" +warning=0 +critical=0 +unknown=0 + +for i in $logdir/$repo/*; do + IFS='|' read -ra STATE < $i + ENTRY=$(basename $i) + END=${STATE[0]} + START=${STATE[1]} + BACKUP_RC=${STATE[2]} + PRUNE_RC=${STATE[3]} + AGE=$(( $(date +%s) - ${START} )) + DURATION=$(( ${END} - ${START} )) + + case "$BACKUP_RC" in + 0) + ;; + 1) + warning=1 + message="${message} - [ Backup of ${ENTRY} returned 1. ]" + ;; + *) + critical=1 + message="${message} - [ Backup of ${ENTRY} returned ${BACKUP_RC}. ]" + ;; + esac + + case "$PRUNE_RC" in + 0) + ;; + 1) + warning=1 + message="${message} - [ Prune of ${ENTRY} returned 1. ]" + ;; + *) + critical=1 + message="${message} - [ Prune of ${ENTRY} returned ${PRUNE_RC}. ]" + ;; + esac + + if [ "${AGE}" -gt "${cage}" ]; then + critical=1 + message="${message} - [ Age of ${ENTRY} is CRITICAL ]" + elif [ "${AGE}" -gt "${wage}" ]; then + warning=1 + message="${message} - [ Age of ${ENTRY} is WARNING ]" + fi + + perf="${perf}${ENTRY}/age=${AGE}s;${wage};${cage};0;${cage} " + perf="${perf}${ENTRY}/duration=${DURATION}s;;0;3600 " + +done + +if [ "$critical" == "1" ]; then + echo "BACKUP CRITICAL${message}|${perf}" + exit $STATE_CRITICAL +elif [ "$warning" == "1" ]; then + echo "BACKUP WARNING${message}|${perf}" + exit $STATE_WARNING +else + echo "BACKUP OK|${perf}" + exit $STATE_OK +fi + +exit $STATE_UNKNOWN diff --git a/roles/icinga2/files/icinga2/command-backup.conf b/roles/icinga2/files/icinga2/command-backup.conf new file mode 100644 index 0000000..d4ed14f --- /dev/null +++ b/roles/icinga2/files/icinga2/command-backup.conf @@ -0,0 +1,27 @@ +object CheckCommand "backup" { + command = [ PluginDir + "/check_backup.sh" ] + + arguments = { + "-R" = { + value = "$backup_repo$" + description = "Name of the repository" + } + "-d" = { + value = "$backup_logdir$" + description = "Directory where backup log are saved." + } + "-w" = { + value = "$backup_wage$" + description = "Maximum backup age to return a warning status." + } + "-c" = { + value = "$backup_cage$" + description = "Maximum backup age to return a critical status." + } + + } + + vars.backup_logdir = "/var/log/backup-status" + vars.backup_wage = 26h + vars.backup_cage = 52h +} diff --git a/roles/icinga2/files/icinga2/services.conf b/roles/icinga2/files/icinga2/services.conf index 2ca46ae..4e2d10b 100644 --- a/roles/icinga2/files/icinga2/services.conf +++ b/roles/icinga2/files/icinga2/services.conf @@ -143,6 +143,14 @@ apply Service "ldap_sync" { assign where host.vars.ldap_master } +apply Service "backup: " for ( backup_repo => config in host.vars.backup_repos ) { + check_command = "backup" + + vars.backup_repo = backup_repo + vars += config + import "generic-agent-service" +} + apply Service "dns" { import "generic-service" diff --git a/roles/icinga2/tasks/main.yaml b/roles/icinga2/tasks/main.yaml index 281017c..cb8e681 100644 --- a/roles/icinga2/tasks/main.yaml +++ b/roles/icinga2/tasks/main.yaml @@ -43,6 +43,7 @@ group: 'nagios' loop: - 'check_ldap_syncrepl_status.pl' + - 'check_backup.sh' - name: 'create directory for hosts configuration' file: @@ -62,7 +63,7 @@ - 'services.conf' - 'apt.conf' - 'command-ldapsync.conf' - + - 'command-backup.conf' - name: 'disable local host conf.d' file: path: '/etc/icinga2/conf.d/hosts.conf' diff --git a/roles/monitoring-agent/files/check_backup.sh b/roles/monitoring-agent/files/check_backup.sh new file mode 100644 index 0000000..b3c4def --- /dev/null +++ b/roles/monitoring-agent/files/check_backup.sh @@ -0,0 +1,125 @@ +#!/bin/bash + +PROGNAME=$(basename $0) +PROGPATH=$(echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,') + +. $PROGPATH/utils.sh + +print_usage() { + echo "Usage: $PROGNAME -R repo [-d log_dir] [-w warning_age] [-c critical_age]" + echo "Usage: $PROGNAME --help" +} + +print_help() { + echo "" + exit $STATE_OK +} + +logdir=/var/log/backup-status +wage=93600 # 26h +cage=187200 # 52h +repo="" + +while test -n "$1"; do + case "$1" in + --help|-h) + print_help + exit $STATE_OK + ;; + --repo|-R) + repo=$2 + shift + ;; + --dir|-d) + logdir=$2 + shift + ;; + --wage|-w) + wage=$2 + shift + ;; + --cage|-c) + cage=$2 + shift + ;; + *) + echo "Unknown argument: $1" + print_usage + exit $STATE_UNKNOWN + ;; + esac + shift +done + +if [ "$repo" == "" ] || [ ! -d "$logdir/$repo" ]; then + echo "Unknown repo $repo." + exit $STATE_UNKNOWN + +fi +perf="" +message="" +warning=0 +critical=0 +unknown=0 + +for i in $logdir/$repo/*; do + IFS='|' read -ra STATE < $i + ENTRY=$(basename $i) + END=${STATE[0]} + START=${STATE[1]} + BACKUP_RC=${STATE[2]} + PRUNE_RC=${STATE[3]} + AGE=$(( $(date +%s) - ${START} )) + DURATION=$(( ${END} - ${START} )) + + case "$BACKUP_RC" in + 0) + ;; + 1) + warning=1 + message="${message} - [ Backup of ${ENTRY} returned 1. ]" + ;; + *) + critical=1 + message="${message} - [ Backup of ${ENTRY} returned ${BACKUP_RC}. ]" + ;; + esac + + case "$PRUNE_RC" in + 0) + ;; + 1) + warning=1 + message="${message} - [ Prune of ${ENTRY} returned 1. ]" + ;; + *) + critical=1 + message="${message} - [ Prune of ${ENTRY} returned ${PRUNE_RC}. ]" + ;; + esac + + if [ "${AGE}" -gt "${cage}" ]; then + critical=1 + message="${message} - [ Age of ${ENTRY} is CRITICAL ]" + elif [ "${AGE}" -gt "${wage}" ]; then + warning=1 + message="${message} - [ Age of ${ENTRY} is WARNING ]" + fi + + perf="${perf}${ENTRY}/age=${AGE}s;${wage};${cage};0;${cage} " + perf="${perf}${ENTRY}/duration=${DURATION}s " + +done + +if [ "$critical" == "1" ]; then + echo "BACKUP CRITICAL${message}|${perf}" + exit $STATE_CRITICAL +elif [ "$warning" == "1" ]; then + echo "BACKUP WARNING${message}|${perf}" + exit $STATE_WARNING +else + echo "BACKUP OK|${perf}" + exit $STATE_OK +fi + +exit $STATE_UNKNOWN diff --git a/roles/monitoring-agent/tasks/main.yaml b/roles/monitoring-agent/tasks/main.yaml index 6d62563..6623dcc 100644 --- a/roles/monitoring-agent/tasks/main.yaml +++ b/roles/monitoring-agent/tasks/main.yaml @@ -8,11 +8,16 @@ tags: - 'packages' -- name: 'add monitoring plugin - check_mem.pl' +- name: 'add monitoring plugins' copy: - src: 'check_mem.pl' - dest: '/usr/lib/nagios/plugins/check_mem.pl' + src: '{{ item }}' + dest: '/usr/lib/nagios/plugins/{{ item }}' mode: '0755' + loop: + - 'check_mem.pl' + - 'check_backup.sh' + tags: + - 'monitoring' - name: 'create monitoring user' user: