pandorafms/pandora_agents/unix/plugins/autodiscover

350 lines
16 KiB
Python
Executable File

#!/usr/bin/env python3
###################################################
#
# Pandora FMS Autodiscovery plugin.
# Checks the status of the services in list and monitors CPU and Memory for each of them.
#
# (c) A. Kevin Rojas <kevin.rojas@pandorafms.com>
#
# Edited in 2023 by Alejandro Sánchez <alejandro.sanchez@pandorafms.com>
#
# TO DO LIST:
# - Enable child services detection (Windows)
# - Make CPU/Memory usage available for child services (Windows)
#
###################################################
try:
from sys import argv
from sys import stderr
from sys import exit
import psutil
from subprocess import Popen
from subprocess import PIPE
from subprocess import DEVNULL
from subprocess import getstatusoutput
import signal
except ModuleNotFoundError as err:
print("{} error: {}. Exiting...".format(argv[0], err), file=stderr)
exit(1)
module_list = []
VERSION = "1.3"
# Define a function to handle the SIGTERM signal
def sigterm_handler(signum, frame):
print("Received SIGTERM signal. Cleaning up...")
sys.exit(0)
signal.signal(signal.SIGTERM, sigterm_handler)
def win_service(servicelist, option=False, memcpu=False):
modules_default = []
modules_percentage=[]
## take all services
services=psutil.win_service_iter()
for service in services:
if service.name() in servicelist:
serv=service.as_dict()
if serv['status']=='running':
value=1
else:
value=0
## create module for each service
parent = build_module("Service " + str(serv['name']) + " - Status", value,"generic_proc")
modules_default +=parent
# memory and cpu percentage
if memcpu:
## process
srv_pid = service.pid()
process = psutil.Process(srv_pid)
proc_name = process.name()
##cpu
value_cpu=process.cpu_percent(interval=0.5)
parent = build_module("Service " + str(proc_name) + " - CPU usage", value_cpu,"generic_data")
parent[0].update([("unit","%"),("module_parent",str(serv['name']))])
modules_percentage +=parent
##mem
value_mem=process.memory_percent()
parent = build_module("Service " + str(proc_name) + " - Memory usage", value_mem,"generic_data")
parent[0].update([("unit","%"),("module_parent",str(serv['name']))])
modules_percentage +=parent
for module in modules_default:
print_module(module, 1)
if memcpu:
for module in modules_percentage:
print_module(module, 1)
def lnx_service(services_list, memcpu=False):
"""Creates modules for Linux servers"""
modules = []
sysctl = getstatusoutput("systemd-notify --booted")[0]
servic = getstatusoutput("command -v service")[0]
for srvc in services_list:
status = None
if sysctl == 0:
### Systemd available
syscall = Popen(["systemctl", "show", "-pLoadState", "-pActiveState", srvc], stdout=PIPE,
stdin=DEVNULL, universal_newlines=True)
result = syscall.communicate()
srvstatus = result[0].strip().lower().split("\n")
if srvstatus[0] == "loadstate=not-found":
next
else:
if srvstatus[1] == "activestate=active":
modules += build_module("Service " + srvc + " - Status", 1, "generic_proc")
status = 1
elif srvstatus[1] == "activestate=inactive":
modules += build_module("Service " +srvc+ " - Status", 0, "generic_proc")
status = 0
elif sysctl != 0 and servic == 0:
### Systemd not available, switch to service command
syscall = Popen(["service", srvc, "status"], stdout=PIPE,
stdin=DEVNULL, stderr=DEVNULL, universal_newlines=True)
result = syscall.communicate()[0].lower()
if "is running" in result:
modules += build_module("Service " + srvc + " - Status", 1, "generic_proc")
status = 1
elif "is stopped" in result:
modules += build_module("Service " +srvc+ " - Status", 0, "generic_proc")
status = 0
else:
next
else:
print("No systemd or service commands available. Exiting...", file=stderr)
exit()
if status:
module_list.append(srvc)
if memcpu:
modules += get_memcpu(srvc, None)
for m in modules:
print_module(m, 1)
def build_module(name, value, module_type, parent=None):
#print ("service_module BEGIN "+str(now(0,1)))
module = [{
"name" : name ,
"type" : module_type,
"value" : value,
"module_parent" : parent,
}]
#print ("service_module END "+str(now(0,1)))
return module
def get_memcpu (process, servicename=None):
"""Creates a module for Memory and CPU for a given process. Returns a list of dictionaries."""
modules = []
if process:
if servicename is not None:
parentname = servicename
else:
parentname = process
modules += [{
"name" : "Service "+ process + " - Memory usage",
"type" : "generic_data",
"value" : proc_percentbyname(process)[0],
"unit" : "%",
"module_parent" : "Service "+ parentname + " - Status",
},
{"name" : "Service "+ process + " - CPU usage",
"type" : "generic_data",
"value" : proc_percentbyname(process)[1],
"unit" : "%",
"module_parent" : "Service "+ parentname + " - Status",
}]
return modules
def proc_percentbyname(procname): ############# 03/03/2020
"""Gets Memory and CPU usage for a given process. Returns a list."""
#print ("proc_percentbyname BEGIN "+str(now(0,1)))
procs = [p for p in psutil.process_iter() if procname in p.name().lower()]
memory = []
cpu = []
try:
for proc in procs:
if proc.name() == procname:
cpu.append(proc.cpu_percent(interval=0.5))
memory.append(proc.memory_percent())
else:
next
except psutil.NoSuchProcess:
next
#print ("proc_percentbyname END "+str(now(0,1)))
return [sum(memory),sum(cpu)]
#########################################################################################
# print_module function
#########################################################################################
def print_module(module, str_flag=False):
"""Returns module in XML format. Accepts only {dict}.\n
+ Only works with one module at a time: otherwise iteration is needed.
+ Module "value" field accepts str type or [list] for datalists.
+ Use not_print_flag to avoid printing the XML (only populates variables).
"""
data = dict(module)
module_xml = ("<module>\n"
"\t<name><![CDATA[" + str(data["name"]) + "]]></name>\n"
"\t<type>" + str(data["type"]) + "</type>\n"
)
#### Strip spaces if module not generic_data_string
if type(data["type"]) is not str and "string" not in data["type"]:
data["value"] = data["value"].strip()
if isinstance(data["value"], list): # Checks if value is a list
module_xml += "\t<datalist>\n"
for value in data["value"]:
if type(value) is dict and "value" in value:
module_xml += "\t<data>\n"
module_xml += "\t\t<value><![CDATA[" + str(value["value"]) + "]]></value>\n"
if "timestamp" in value:
module_xml += "\t\t<timestamp><![CDATA[" + str(value["timestamp"]) + "]]></timestamp>\n"
module_xml += "\t</data>\n"
else:
module_xml += "\t<data><![CDATA[" + str(data["value"]) + "]]></data>\n"
if "desc" in data:
module_xml += "\t<description><![CDATA[" + str(data["desc"]) + "]]></description>\n"
if "unit" in data:
module_xml += "\t<unit><![CDATA[" + str(data["unit"]) + "]]></unit>\n"
if "interval" in data:
module_xml += "\t<module_interval><![CDATA[" + str(data["interval"]) + "]]></module_interval>\n"
if "tags" in data:
module_xml += "\t<tags>" + str(data["tags"]) + "</tags>\n"
if "module_group" in data:
module_xml += "\t<module_group>" + str(data["module_group"]) + "</module_group>\n"
if "module_parent" in data and data["module_parent"] != None:
module_xml += "\t<module_parent>" + str(data["module_parent"]) + "</module_parent>\n"
if "min_warning" in data:
module_xml += "\t<min_warning><![CDATA[" + str(data["min_warning"]) + "]]></min_warning>\n"
if "max_warning" in data:
module_xml += "\t<max_warning><![CDATA[" + str(data["max_warning"]) + "]]></max_warning>\n"
if "min_critical" in data:
module_xml += "\t<min_critical><![CDATA[" + str(data["min_critical"]) + "]]></min_critical>\n"
if "max_critical" in data:
module_xml += "\t<max_critical><![CDATA[" + str(data["max_critical"]) + "]]></max_critical>\n"
if "str_warning" in data:
module_xml += "\t<str_warning><![CDATA[" + str(data["str_warning"]) + "]]></str_warning>\n"
if "str_critical" in data:
module_xml += "\t<str_critical><![CDATA[" + str(data["str_critical"]) + "]]></str_critical>\n"
if "critical_inverse" in data:
module_xml += "\t<critical_inverse><![CDATA[" + str(data["critical_inverse"]) + "]]></critical_inverse>\n"
if "warning_inverse" in data:
module_xml += "\t<warning_inverse><![CDATA[" + str(data["warning_inverse"]) + "]]></warning_inverse>\n"
if "max" in data:
module_xml += "\t<max><![CDATA[" + str(data["max"]) + "]]></max>\n"
if "min" in data:
module_xml += "\t<min><![CDATA[" + str(data["min"]) + "]]></min>\n"
if "post_process" in data:
module_xml += "\t<post_process><![CDATA[" + str(data["post_process"]) + "]]></post_process>\n"
if "disabled" in data:
module_xml += "\t<disabled><![CDATA[" + str(data["disabled"]) + "]]></disabled>\n"
if "min_ff_event" in data:
module_xml += "\t<min_ff_event><![CDATA[" + str(data["min_ff_event"]) + "]]></min_ff_event>\n"
if "status" in data:
module_xml += "\t<status><![CDATA[" + str(data["status"]) + "]]></status>\n"
if "timestamp" in data:
module_xml += "\t<timestamp><![CDATA[" + str(data["timestamp"]) + "]]></timestamp>\n"
if "custom_id" in data:
module_xml += "\t<custom_id><![CDATA[" + str(data["custom_id"]) + "]]></custom_id>\n"
if "critical_instructions" in data:
module_xml += "\t<critical_instructions><![CDATA[" + str(data["critical_instructions"]) + "]]></critical_instructions>\n"
if "warning_instructions" in data:
module_xml += "\t<warning_instructions><![CDATA[" + str(data["warning_instructions"]) + "]]></warning_instructions>\n"
if "unknown_instructions" in data:
module_xml += "\t<unknown_instructions><![CDATA[" + str(data["unknown_instructions"]) + "]]></unknown_instructions>\n"
if "quiet" in data:
module_xml += "\t<quiet><![CDATA[" + str(data["quiet"]) + "]]></quiet>\n"
if "module_ff_interval" in data:
module_xml += "\t<module_ff_interval><![CDATA[" + str(data["module_ff_interval"]) + "]]></module_ff_interval>\n"
if "crontab" in data:
module_xml += "\t<crontab><![CDATA[" + str(data["crontab"]) + "]]></crontab>\n"
if "min_ff_event_normal" in data:
module_xml += "\t<min_ff_event_normal><![CDATA[" + str(data["min_ff_event_normal"]) + "]]></min_ff_event_normal>\n"
if "min_ff_event_warning" in data:
module_xml += "\t<min_ff_event_warning><![CDATA[" + str(data["min_ff_event_warning"]) + "]]></min_ff_event_warning>\n"
if "min_ff_event_critical" in data:
module_xml += "\t<min_ff_event_critical><![CDATA[" + str(data["min_ff_event_critical"]) + "]]></min_ff_event_critical>\n"
if "ff_type" in data:
module_xml += "\t<ff_type><![CDATA[" + str(data["ff_type"]) + "]]></ff_type>\n"
if "ff_timeout" in data:
module_xml += "\t<ff_timeout><![CDATA[" + str(data["ff_timeout"]) + "]]></ff_timeout>\n"
if "each_ff" in data:
module_xml += "\t<each_ff><![CDATA[" + str(data["each_ff"]) + "]]></each_ff>\n"
if "module_parent_unlink" in data:
module_xml += "\t<module_parent_unlink><![CDATA[" + str(data["parent_unlink"]) + "]]></module_parent_unlink>\n"
if "global_alerts" in data:
for alert in data["alert"]:
module_xml += "\t<alert_template><![CDATA[" + alert + "]]></alert_template>\n"
module_xml += "</module>\n"
#### Print flag
if str_flag is not False:
print(module_xml)
return (module_xml)
#########################################################################################
# MAIN
#########################################################################################
def main():
"""Checks OS and calls the discover function."""
if psutil.WINDOWS:
OS = "Windows"
service_list = ["MySQL", "postgresql", "pgsql", "oracle", "MSSQL", "IISADMIN",
"apache", "nginx", "W3svc", "NTDS", "Netlogon", "DNS", "MSExchangeADTopology",
"MSExchangeServiceHost", "MSExchangeSA", "MSExchangeTransport"]
discover(OS, service_list)
elif psutil.LINUX:
OS = "Linux"
service_list = ["httpd", "apache2", "nginx", "ldap", "docker",
"postfix", "mysqld", "postgres", "oracle", "mongod"]
discover(OS, service_list)
else:
print("OS not recognized. Exiting...", file=stderr)
exit()
def discover(osyst, servicelist):
"""Shows help and triggers the creation of service modules"""
if "--usage" in argv:
memcpu = True
else:
memcpu = False
if len(argv) > 2 and argv[1] == "--list":
servicelist = argv[2].split(",")
if osyst == "Windows":
win_service(servicelist, False, memcpu) ## False won't get children
elif osyst == "Linux":
lnx_service(servicelist, memcpu)
elif len(argv) > 1 and argv[1] == "--default":
if osyst == "Windows":
win_service(servicelist, False, memcpu) ## False won't get children
elif osyst == "Linux":
lnx_service(servicelist, memcpu)
else:
print("\nPandora FMS Autodiscovery plugin v{}".format(VERSION))
print("Checks the status of the services in list and monitors CPU and Memory for each of them.\n")
print("Usage:")
print("{} [options] [--usage]".format(argv[0]))
print("--help")
print("\tPrints this help screen")
print("--default")
print("\tRuns this tool with default monitoring.")
print("\tServices monitored by default for {}:".format(osyst))
print("\t", ", ".join(servicelist))
print("--list \"<srvc1,srvc2,srvc3>\"")
print("\tReplaces default services for a given list (comma-separated)")
if osyst == "Windows":
print("\tEach element of the list will be treated as a regexp, but they must be over 2 characters.")
print("\tElements under 2 characters will be discarded.")
print("--usage")
print("\tAdds modules for CPU and Memory usage per service/process (optional, can take some time).\n")
##### RUN ####
main()