mirror of
https://github.com/pengutronix/monitoring-check-systemd-service.git
synced 2025-07-23 13:54:44 +02:00
initial version ... does some good, but ...
* it always gives debug output. WTF * We have no scalars, so using ScalarContext is bullshit
This commit is contained in:
parent
b2233707a3
commit
d0043702df
176
check-systemd-service
Executable file
176
check-systemd-service
Executable file
@ -0,0 +1,176 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
"""Nagios plugin to check a systemd service on different properties"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
import subprocess
|
||||||
|
import os
|
||||||
|
import datetime
|
||||||
|
import collections
|
||||||
|
|
||||||
|
try:
|
||||||
|
import nagiosplugin
|
||||||
|
except ImportError as e:
|
||||||
|
print("Please install python3-nagiosplugin")
|
||||||
|
raise e
|
||||||
|
|
||||||
|
try:
|
||||||
|
from gi.repository.Gio import DBusProxy, BusType
|
||||||
|
except ImportError as e:
|
||||||
|
print("Please install python-gi")
|
||||||
|
raise e
|
||||||
|
|
||||||
|
_log = logging.getLogger('nagiosplugin')
|
||||||
|
|
||||||
|
class E_MyException(Exception):
|
||||||
|
def __init__(self, value):
|
||||||
|
self.value = value
|
||||||
|
def __str__(self):
|
||||||
|
return "do some things with value %r" %repr(self.value)
|
||||||
|
|
||||||
|
class Systemd_Service(nagiosplugin.Resource):
|
||||||
|
"""One Systemd Service"""
|
||||||
|
|
||||||
|
def __init__(self, **kwords):
|
||||||
|
_log.debug('Initializing Systemd_Service with %r', (kwords))
|
||||||
|
for key, value in kwords.items():
|
||||||
|
self.__setattr__(key, value)
|
||||||
|
self.normalize()
|
||||||
|
|
||||||
|
def connect_systemd(self):
|
||||||
|
# initializing systemd dbus connection
|
||||||
|
systemd = DBusProxy.new_for_bus_sync(BusType.SYSTEM,
|
||||||
|
0,
|
||||||
|
None,
|
||||||
|
'org.freedesktop.systemd1',
|
||||||
|
'/org/freedesktop/systemd1',
|
||||||
|
'org.freedesktop.systemd1.Manager',
|
||||||
|
None)
|
||||||
|
try:
|
||||||
|
loadedUnit = systemd.LoadUnit('(s)', self.unit)
|
||||||
|
except Exception as e:
|
||||||
|
_log.error(e)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
service = DBusProxy.new_for_bus_sync(BusType.SYSTEM,
|
||||||
|
0,
|
||||||
|
None,
|
||||||
|
'org.freedesktop.systemd1',
|
||||||
|
loadedUnit,
|
||||||
|
'org.freedesktop.systemd1.Unit',
|
||||||
|
None)
|
||||||
|
self.service = service
|
||||||
|
self.activeStateD = collections.defaultdict(lambda: None, {
|
||||||
|
'active': 1.0,
|
||||||
|
'reloading': 0.9,
|
||||||
|
'activating': 0.8,
|
||||||
|
'deactivating': 0.4,
|
||||||
|
'inactive': 0.0,
|
||||||
|
'failed': 0.0,
|
||||||
|
})
|
||||||
|
|
||||||
|
def normalize(self):
|
||||||
|
if self.unit.find('.') < 0:
|
||||||
|
self.unit = self.unit + '.service'
|
||||||
|
_log.debug('Normalized unitname to check to %r', self.unit)
|
||||||
|
else:
|
||||||
|
_log.debug('Found \'.\' in ServiceName, so assuming you know what youre asking for')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self):
|
||||||
|
"""formatting the Testname (will be formatted as uppercase letters)"""
|
||||||
|
return "SYSTEMD SERVICE %s SERVICESTATE" % (self.unit.split('.service')[0])
|
||||||
|
|
||||||
|
|
||||||
|
def activestate(self):
|
||||||
|
"""
|
||||||
|
ActiveState contains a state value that reflects whether the unit is
|
||||||
|
currently active or not. The following states are currently defined:
|
||||||
|
active, reloading, inactive, failed, activating, deactivating. active
|
||||||
|
indicates that unit is active (obviously...). reloading indicates
|
||||||
|
that the unit is active and currently reloading its configuration.
|
||||||
|
inactive indicates that it is inactive and the previous run was
|
||||||
|
successful or no previous run has taken place yet. failed indicates
|
||||||
|
that it is inactive and the previous run was not successful (more
|
||||||
|
information about the reason for this is available on the unit type
|
||||||
|
specific interfaces, for example for services in the Result property,
|
||||||
|
see below). activating indicates that the unit has previously been
|
||||||
|
inactive but is currently in the process of entering an active state.
|
||||||
|
Conversely deactivating indicates that the unit is currently in the
|
||||||
|
process of deactivation.
|
||||||
|
"""
|
||||||
|
t = self.service.get_cached_property('ActiveState').unpack()
|
||||||
|
_log.debug('ServiceState of %r is %r', self.service, t)
|
||||||
|
return t
|
||||||
|
|
||||||
|
def substate(self):
|
||||||
|
"""
|
||||||
|
SubState encodes states of the same state machine that ActiveState
|
||||||
|
covers, but knows more fine-grained states that are unit-type-specific.
|
||||||
|
Where ActiveState only covers six high-level states, SubState covers
|
||||||
|
possibly many more low-level unit-type-specific states that are mapped
|
||||||
|
to the six high-level states. Note that multiple low-level states might
|
||||||
|
map to the same high-level state, but not vice versa. Not all
|
||||||
|
high-level states have low-level counterparts on all unit types. At
|
||||||
|
this point the low-level states are not documented here, and are more
|
||||||
|
likely to be extended later on than the common high-level states
|
||||||
|
explained above.
|
||||||
|
"""
|
||||||
|
t = service.get_cached_property('SubState').unpack()
|
||||||
|
_log.debug('Substate of %r is %r', self.service, t)
|
||||||
|
return t
|
||||||
|
|
||||||
|
def probe(self):
|
||||||
|
""" Create check metric for Systemd Service"""
|
||||||
|
self.connect_systemd()
|
||||||
|
self.running_service_found = self.activeStateD[self.activestate()]
|
||||||
|
yield nagiosplugin.Metric('running_service_found', self.running_service_found, min=0, max=1)
|
||||||
|
|
||||||
|
class Bool_Fmt_Metric(object):
|
||||||
|
"""print a message for a bool-metric """
|
||||||
|
|
||||||
|
def __init__(self, msg_success, msg_fail):
|
||||||
|
self.msg_success = msg_success
|
||||||
|
self.msg_fail = msg_fail
|
||||||
|
|
||||||
|
def __call__(self, metric, context):
|
||||||
|
_log.debug('UOM: %r', metric.uom)
|
||||||
|
if metric.value:
|
||||||
|
return self.msg_success
|
||||||
|
else:
|
||||||
|
return self.msg_fail
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@nagiosplugin.guarded
|
||||||
|
def main():
|
||||||
|
argp = argparse.ArgumentParser(description=__doc__,
|
||||||
|
formatter_class=argparse.RawTextHelpFormatter,
|
||||||
|
)
|
||||||
|
argp.add_argument('-v', '--verbose', action='count', default=0,
|
||||||
|
help='increase output verbosity (use up to 3 times)')
|
||||||
|
argp.add_argument('-t', '--timeout', default=10,
|
||||||
|
help='abort execution after TIMEOUT seconds')
|
||||||
|
argp.add_argument('unit', help='Check this Unit')
|
||||||
|
args = argp.parse_args()
|
||||||
|
_log.debug('Found arguments %r', args)
|
||||||
|
check = nagiosplugin.Check(
|
||||||
|
Systemd_Service(**vars(args)),
|
||||||
|
nagiosplugin.ScalarContext('running_service_found',
|
||||||
|
warning='0.2:1',
|
||||||
|
critical='0.8:1',
|
||||||
|
fmt_metric=Bool_Fmt_Metric('Running service found!',
|
||||||
|
'Service not running!')
|
||||||
|
),
|
||||||
|
)
|
||||||
|
check.main(args.verbose, args.timeout)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user