Browse Source

backup status monitoring

python3
Zolfa 4 years ago
parent
commit
b01feabffb
Signed by: zolfa GPG Key ID: E1A43B038C4D6616
7 changed files with 305 additions and 4 deletions
  1. +10
    -0
      roles/icinga2-monitoring/templates/host.conf.j2
  2. +125
    -0
      roles/icinga2/files/check_backup.sh
  3. +27
    -0
      roles/icinga2/files/icinga2/command-backup.conf
  4. +8
    -0
      roles/icinga2/files/icinga2/services.conf
  5. +2
    -1
      roles/icinga2/tasks/main.yaml
  6. +125
    -0
      roles/monitoring-agent/files/check_backup.sh
  7. +8
    -3
      roles/monitoring-agent/tasks/main.yaml

+ 10
- 0
roles/icinga2-monitoring/templates/host.conf.j2 View File

@ -16,6 +16,16 @@ object Host "{{ item.key }}" {
vars.{{ var.key }} = "{{ var.value }}"
{% endfor %}
{% for repo in item.value.borg_repos|d({})|dict2items %}
vars.backup_repos["{{ repo.key }}"] = {
{% for v in repo.value|dict2items %}
{{ v.key }} = "{{ v.value }}"
{% endfor %}
}
{% endfor %}
}
{% for vhost in item.value.vhosts|d([]) %}
object Host "{{ vhost }}" {
import "generic-host"


+ 125
- 0
roles/icinga2/files/check_backup.sh View File

@ -0,0 +1,125 @@
#!/bin/bash
PROGNAME=$(basename $0)
PROGPATH=$(echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,')
. $PROGPATH/utils.sh
print_usage() {
echo "Usage: $PROGNAME -R repo [-d log_dir] [-w warning_age] [-c critical_age]"
echo "Usage: $PROGNAME --help"
}
print_help() {
echo ""
exit $STATE_OK
}
logdir=/var/log/backup-status
wage=93600 # 26h
cage=187200 # 52h
repo=""
while test -n "$1"; do
case "$1" in
--help|-h)
print_help
exit $STATE_OK
;;
--repo|-R)
repo=$2
shift
;;
--dir|-d)
logdir=$2
shift
;;
--wage|-w)
wage=$2
shift
;;
--cage|-c)
cage=$2
shift
;;
*)
echo "Unknown argument: $1"
print_usage
exit $STATE_UNKNOWN
;;
esac
shift
done
if [ "$repo" == "" ] or [ -d "$logdir/$repo" ]; then
echo "Unknown repo $repo."
exit $STATE_UNKNOWN
fi
perf=""
message=""
warning=0
critical=0
unknown=0
for i in $logdir/$repo/*; do
IFS='|' read -ra STATE < $i
ENTRY=$(basename $i)
END=${STATE[0]}
START=${STATE[1]}
BACKUP_RC=${STATE[2]}
PRUNE_RC=${STATE[3]}
AGE=$(( $(date +%s) - ${START} ))
DURATION=$(( ${END} - ${START} ))
case "$BACKUP_RC" in
0)
;;
1)
warning=1
message="${message} - [ Backup of ${ENTRY} returned 1. ]"
;;
*)
critical=1
message="${message} - [ Backup of ${ENTRY} returned ${BACKUP_RC}. ]"
;;
esac
case "$PRUNE_RC" in
0)
;;
1)
warning=1
message="${message} - [ Prune of ${ENTRY} returned 1. ]"
;;
*)
critical=1
message="${message} - [ Prune of ${ENTRY} returned ${PRUNE_RC}. ]"
;;
esac
if [ "${AGE}" -gt "${cage}" ]; then
critical=1
message="${message} - [ Age of ${ENTRY} is CRITICAL ]"
elif [ "${AGE}" -gt "${wage}" ]; then
warning=1
message="${message} - [ Age of ${ENTRY} is WARNING ]"
fi
perf="${perf}${ENTRY}/age=${AGE}s;${wage};${cage};0;${cage} "
perf="${perf}${ENTRY}/duration=${DURATION}s;;0;3600 "
done
if [ "$critical" == "1" ]; then
echo "BACKUP CRITICAL${message}|${perf}"
exit $STATE_CRITICAL
elif [ "$warning" == "1" ]; then
echo "BACKUP WARNING${message}|${perf}"
exit $STATE_WARNING
else
echo "BACKUP OK|${perf}"
exit $STATE_OK
fi
exit $STATE_UNKNOWN

+ 27
- 0
roles/icinga2/files/icinga2/command-backup.conf View File

@ -0,0 +1,27 @@
object CheckCommand "backup" {
command = [ PluginDir + "/check_backup.sh" ]
arguments = {
"-R" = {
value = "$backup_repo$"
description = "Name of the repository"
}
"-d" = {
value = "$backup_logdir$"
description = "Directory where backup log are saved."
}
"-w" = {
value = "$backup_wage$"
description = "Maximum backup age to return a warning status."
}
"-c" = {
value = "$backup_cage$"
description = "Maximum backup age to return a critical status."
}
}
vars.backup_logdir = "/var/log/backup-status"
vars.backup_wage = 26h
vars.backup_cage = 52h
}

+ 8
- 0
roles/icinga2/files/icinga2/services.conf View File

@ -143,6 +143,14 @@ apply Service "ldap_sync" {
assign where host.vars.ldap_master
}
apply Service "backup: " for ( backup_repo => config in host.vars.backup_repos ) {
check_command = "backup"
vars.backup_repo = backup_repo
vars += config
import "generic-agent-service"
}
apply Service "dns" {
import "generic-service"


+ 2
- 1
roles/icinga2/tasks/main.yaml View File

@ -43,6 +43,7 @@
group: 'nagios'
loop:
- 'check_ldap_syncrepl_status.pl'
- 'check_backup.sh'
- name: 'create directory for hosts configuration'
file:
@ -62,7 +63,7 @@
- 'services.conf'
- 'apt.conf'
- 'command-ldapsync.conf'
- 'command-backup.conf'
- name: 'disable local host conf.d'
file:
path: '/etc/icinga2/conf.d/hosts.conf'


+ 125
- 0
roles/monitoring-agent/files/check_backup.sh View File

@ -0,0 +1,125 @@
#!/bin/bash
PROGNAME=$(basename $0)
PROGPATH=$(echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,')
. $PROGPATH/utils.sh
print_usage() {
echo "Usage: $PROGNAME -R repo [-d log_dir] [-w warning_age] [-c critical_age]"
echo "Usage: $PROGNAME --help"
}
print_help() {
echo ""
exit $STATE_OK
}
logdir=/var/log/backup-status
wage=93600 # 26h
cage=187200 # 52h
repo=""
while test -n "$1"; do
case "$1" in
--help|-h)
print_help
exit $STATE_OK
;;
--repo|-R)
repo=$2
shift
;;
--dir|-d)
logdir=$2
shift
;;
--wage|-w)
wage=$2
shift
;;
--cage|-c)
cage=$2
shift
;;
*)
echo "Unknown argument: $1"
print_usage
exit $STATE_UNKNOWN
;;
esac
shift
done
if [ "$repo" == "" ] || [ ! -d "$logdir/$repo" ]; then
echo "Unknown repo $repo."
exit $STATE_UNKNOWN
fi
perf=""
message=""
warning=0
critical=0
unknown=0
for i in $logdir/$repo/*; do
IFS='|' read -ra STATE < $i
ENTRY=$(basename $i)
END=${STATE[0]}
START=${STATE[1]}
BACKUP_RC=${STATE[2]}
PRUNE_RC=${STATE[3]}
AGE=$(( $(date +%s) - ${START} ))
DURATION=$(( ${END} - ${START} ))
case "$BACKUP_RC" in
0)
;;
1)
warning=1
message="${message} - [ Backup of ${ENTRY} returned 1. ]"
;;
*)
critical=1
message="${message} - [ Backup of ${ENTRY} returned ${BACKUP_RC}. ]"
;;
esac
case "$PRUNE_RC" in
0)
;;
1)
warning=1
message="${message} - [ Prune of ${ENTRY} returned 1. ]"
;;
*)
critical=1
message="${message} - [ Prune of ${ENTRY} returned ${PRUNE_RC}. ]"
;;
esac
if [ "${AGE}" -gt "${cage}" ]; then
critical=1
message="${message} - [ Age of ${ENTRY} is CRITICAL ]"
elif [ "${AGE}" -gt "${wage}" ]; then
warning=1
message="${message} - [ Age of ${ENTRY} is WARNING ]"
fi
perf="${perf}${ENTRY}/age=${AGE}s;${wage};${cage};0;${cage} "
perf="${perf}${ENTRY}/duration=${DURATION}s "
done
if [ "$critical" == "1" ]; then
echo "BACKUP CRITICAL${message}|${perf}"
exit $STATE_CRITICAL
elif [ "$warning" == "1" ]; then
echo "BACKUP WARNING${message}|${perf}"
exit $STATE_WARNING
else
echo "BACKUP OK|${perf}"
exit $STATE_OK
fi
exit $STATE_UNKNOWN

+ 8
- 3
roles/monitoring-agent/tasks/main.yaml View File

@ -8,11 +8,16 @@
tags:
- 'packages'
- name: 'add monitoring plugin - check_mem.pl'
- name: 'add monitoring plugins'
copy:
src: 'check_mem.pl'
dest: '/usr/lib/nagios/plugins/check_mem.pl'
src: '{{ item }}'
dest: '/usr/lib/nagios/plugins/{{ item }}'
mode: '0755'
loop:
- 'check_mem.pl'
- 'check_backup.sh'
tags:
- 'monitoring'
- name: 'create monitoring user'
user:


Loading…
Cancel
Save