monitoring-check-systemd-se.../check-systemd-service
2016-06-24 17:12:26 +02:00

185 lines
7.2 KiB
Python
Executable File

#!/usr/bin/python3
# -*- coding: utf-8 -*-
"""Nagios plugin to check a systemd service on different properties"""
import argparse
import logging
import subprocess
import os
import datetime
import collections
try:
import nagiosplugin
except ImportError as e:
print("Please install python3-nagiosplugin")
raise e
try:
from gi.repository.Gio import DBusProxy, BusType
except ImportError as e:
print("Please install python-gi")
raise e
_log = logging.getLogger('nagiosplugin')
class E_MyException(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return "do some things with value %r" %repr(self.value)
class Systemd_Service(nagiosplugin.Resource):
"""One Systemd Service"""
def __init__(self, **kwords):
_log.debug('Initializing Systemd_Service with %r', (kwords))
for key, value in kwords.items():
self.__setattr__(key, value)
self.normalize()
def connect_systemd(self):
# initializing systemd dbus connection
systemd = DBusProxy.new_for_bus_sync(BusType.SYSTEM,
0,
None,
'org.freedesktop.systemd1',
'/org/freedesktop/systemd1',
'org.freedesktop.systemd1.Manager',
None)
try:
loadedUnit = systemd.LoadUnit('(s)', self.unit)
except Exception as e:
_log.error(e)
raise e
service = DBusProxy.new_for_bus_sync(BusType.SYSTEM,
0,
None,
'org.freedesktop.systemd1',
loadedUnit,
'org.freedesktop.systemd1.Unit',
None)
self.service = service
self.activeStateD = collections.defaultdict(lambda: None, {
'active': 1.0,
'reloading': 0.9,
'activating': 0.8,
'deactivating': 0.4,
'inactive': 0.0,
'failed': 0.0,
})
def normalize(self):
if self.unit.find('.') < 0:
self.unit = self.unit + '.service'
_log.debug('Normalized unitname to check to %r', self.unit)
else:
_log.debug('Found \'.\' in ServiceName, so assuming you know what youre asking for')
@property
def name(self):
"""formatting the Testname (will be formatted as uppercase letters)"""
return "SYSTEMD SERVICE %s SERVICESTATE" % (self.unit.split('.service')[0])
def activestate(self):
"""
ActiveState contains a state value that reflects whether the unit is
currently active or not. The following states are currently defined:
active, reloading, inactive, failed, activating, deactivating. active
indicates that unit is active (obviously...). reloading indicates
that the unit is active and currently reloading its configuration.
inactive indicates that it is inactive and the previous run was
successful or no previous run has taken place yet. failed indicates
that it is inactive and the previous run was not successful (more
information about the reason for this is available on the unit type
specific interfaces, for example for services in the Result property,
see below). activating indicates that the unit has previously been
inactive but is currently in the process of entering an active state.
Conversely deactivating indicates that the unit is currently in the
process of deactivation.
"""
t = self.service.get_cached_property('ActiveState').unpack()
_log.debug('ServiceState of %r is %r', self.service, t)
return t
def substate(self):
"""
SubState encodes states of the same state machine that ActiveState
covers, but knows more fine-grained states that are unit-type-specific.
Where ActiveState only covers six high-level states, SubState covers
possibly many more low-level unit-type-specific states that are mapped
to the six high-level states. Note that multiple low-level states might
map to the same high-level state, but not vice versa. Not all
high-level states have low-level counterparts on all unit types. At
this point the low-level states are not documented here, and are more
likely to be extended later on than the common high-level states
explained above.
"""
t = self.service.get_cached_property('SubState').unpack()
_log.debug('Substate of %r is %r', self.service, t)
return t
def probe(self):
""" Create check metric for Systemd Service"""
self.connect_systemd()
self.running_service_found = self.activeStateD[self.activestate()]
self.service_state = (self.activestate(), self.substate())
#yield nagiosplugin.Metric('running_service_state', self.running_service_found, min=0, max=1)
yield nagiosplugin.Metric('service_state', self.service_state)
class Service_Fmt_Metric(object):
"""print a message for a bool-metric """
def __init__(self, msg_success, msg_fail):
self.msg_success = msg_success
self.msg_fail = msg_fail
def __call__(self, metric, context):
_log.debug('Value: %r UOM: %r', metric.value, metric.uom)
if metric.value:
return self.msg_success.format(v=metric.value, u=metric.uom)
else:
return self.msg_fail.format(v=metric.value, u=metric.uom)
class Service_Result(nagiosplugin.Result):
pass
@nagiosplugin.guarded
def main():
argp = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawTextHelpFormatter,
)
argp.add_argument('-v', '--verbose', action='count', default=0,
help='increase output verbosity (use up to 3 times)')
argp.add_argument('-t', '--timeout', default=10,
help='abort execution after TIMEOUT seconds')
argp.add_argument('unit', help='Check this Unit')
args = argp.parse_args()
_log.debug('Found arguments %r', args)
check = nagiosplugin.Check(
Systemd_Service(**vars(args)),
#nagiosplugin.ScalarContext('running_service_found',
# warning='0.2:1',
# critical='0.8:1',
# fmt_metric=Bool_Fmt_Metric('Running service found!',
# 'Service not running!')
#),
nagiosplugin.Context('service_state',
fmt_metric=Service_Fmt_Metric('Service is running. State is {v[0]}, {v[1]}',
'Service is broken! State is {v[0]}, {v[1]}')
),
)
check.main(args.verbose, args.timeout)
if __name__ == '__main__':
main()