pandorafms/pandora_server/bin/pandora_server

433 lines
14 KiB
Perl
Executable File

#!/usr/bin/perl
##########################################################################
# Pandora FMS Server
# Pandora FMS. the Flexible Monitoring System. http://www.pandorafms.org
##########################################################################
# Copyright (c) 2005-2011 Artica Soluciones Tecnologicas S.L
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; version 2
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
##########################################################################
use strict;
use warnings;
use POSIX qw(strftime);
# Default lib dir for RPM and DEB packages
use lib '/usr/lib/perl5';
# Pandora Modules
use PandoraFMS::DB;
use PandoraFMS::Config;
use PandoraFMS::Tools;
use PandoraFMS::Core;
use PandoraFMS::DataServer;
use PandoraFMS::NetworkServer;
use PandoraFMS::SNMPServer;
use PandoraFMS::ReconServer;
use PandoraFMS::WMIServer;
use PandoraFMS::PluginServer;
use PandoraFMS::PredictionServer;
# Global vars
my %Config;
my @Servers;
my $DBH;
########################################################################################
# Server shutdown. Handler to do a controlled shutdown.
########################################################################################
sub pandora_shutdown () {
logger (\%Config, 'Pandora FMS Server \'' . $Config{'servername'} . '\' Shutdown by signal ', 1);
# Stop servers
foreach my $server (@Servers) {
$server->downEvent ();
$server->stop ();
}
# Stop the netflow daemon
pandora_stop_netflow_daemon ();
print_message (\%Config, ' [*] Shutting down ' . $Config{'servername'} . "(received signal)...\n", 1);
db_disconnect ($DBH);
if ($Config{'PID'} ne "") {
unlink($Config{'PID'}) or logger (\%Config, "[W] Could not remove PID file: $!",1);
}
exit (0);
}
########################################################################################
# Server startup.
########################################################################################
sub pandora_startup () {
# Start logging
pandora_start_log (\%Config);
# Connect to the DB
$DBH = db_connect ($Config{'dbengine'}, $Config{'dbname'}, $Config{'dbhost'}, $Config{'dbport'},
$Config{'dbuser'}, $Config{'dbpass'});
# Grab config tokens shared with the console and not in the .conf
pandora_get_sharedconfig (\%Config, $DBH);
pandora_audit (\%Config, 'Pandora FMS Server Daemon starting', 'SYSTEM', 'System', $DBH);
# Load servers
pandora_reset_server (\%Config, $DBH);
push (@Servers, new PandoraFMS::DataServer (\%Config, $DBH));
push (@Servers, new PandoraFMS::NetworkServer (\%Config, $DBH));
push (@Servers, new PandoraFMS::ReconServer (\%Config, $DBH));
push (@Servers, new PandoraFMS::SNMPServer (\%Config, $DBH));
push (@Servers, new PandoraFMS::WMIServer (\%Config, $DBH));
push (@Servers, new PandoraFMS::PluginServer (\%Config, $DBH));
push (@Servers, new PandoraFMS::PredictionServer (\%Config, $DBH));
enterprise_hook('load_enterprise_servers', [\@Servers, \%Config, $DBH]);
# Start the netflow daemon if necessary
pandora_start_netflow_daemon ();
# Remove disabled servers
@Servers = grep { defined ($_) } @Servers;
# Run
foreach my $server (@Servers) {
$server->run ();
}
}
########################################################################################
# Server restart.
########################################################################################
sub pandora_restart () {
# Stop the servers
foreach my $server (@Servers) {
$server->stop ();
}
# Remove the servers
while (pop (@Servers)) {};
# Close STDERR, redirected by pandora_start_log
close (STDERR);
# Wait before trying to start again
sleep ($Config{'restart_delay'});
# Start the servers
pandora_startup ();
}
########################################################################################
# Server crash. Handler to write in the log unhandled errors and write it to console
########################################################################################
sub pandora_crash () {
my $full_error = "";
# Avoid show messages about enterprise library loading failurem, VERY
# confussing, all of them are warnigs and not critical, and user should be
# worried about that. If perl has a more "clean" way to avoid this messages
# will be nice to replace this code, but at this time it's the only way I know
foreach my $error_line (@_) {
# Trap the XML error and exit without nasty messages
if ($error_line =~ m/XML\/Parser/) {
logger (\%Config, "Problem parsing XML file, XML file discarded: $error_line", 2);
return;
}
elsif ($error_line !~ m/Enterprise/i && $error_line !~ m/Format_XS/i && $error_line !~ m/ConfigLocal/i){
logger (\%Config, '[E] \'' . $Config{'servername'} . "': $error_line", 1);
}
else {
if ($error_line !~ m/Can\'t\slocate/) {
logger (\%Config, '[E] \'' . $Config{'servername'} . "': $error_line", 1);
}
else {
# Known errors of loading Enterprise, Format_XS and ConfigLocal
# modules, non fatal.
return;
}
}
$full_error .= $error_line;
}
logger (\%Config, 'Pandora FMS Server \'' . $Config{'servername'} . '\' unhandled error.', 1);
# It's interesting show by console problems, not only in logs. This helps
# to solve stupid problems like Database credential problems for example
print_message (\%Config, ' [E] Unhandled error in "' . $Config{'servername'} . "\". See more information in logfiles at '/var/log/pandora' \n", 0);
print_message (\%Config, " Error description:\n", 0);
print_message (\%Config, $full_error, 0);
}
########################################################################################
# Start the netflow daemon if necessary.
########################################################################################
sub pandora_start_netflow_daemon () {
my $pid_file = '/var/run/pandora_nfcapd.pid';
# Check if netflow is enabled
if ($Config{'activate_netflow'} != 1) {
logger (\%Config, " [*] Netflow daemon disabled.", 1);
print_message (\%Config, " [*] Netflow daemon disabled.", 1);
return;
}
# Stop nfcapd if it's already running
my $pid = pandora_stop_netflow_daemon ();
if (pandora_stop_netflow_daemon () != 0) {
logger (\%Config, "nfcapd (pid $pid) is already running, attempting to kill it...", 1);
print_message (\%Config, "nfcapd (pid $pid) is already running, attempting to kill it...", 1);
}
# Start nfcapd
my $command = $Config{'netflow_daemon'} . ' -D -T all -w -t ' . $Config{'netflow_interval'} . ' -P ' . $pid_file . ' -l ' . $Config{'netflow_path'};
if (system ("$command >/dev/null 2>&1") != 0) {
logger (\%Config, " [E] Could not start nfcapd: $command", 1);
print_message (\%Config, " [E] Could not start nfcapd: $command", 1);
return;
}
logger (\%Config, "[*] Netflow daemon started.", 1);
print_message (\%Config, "[*] Netflow daemon started.", 1);
}
########################################################################################
# Stop the netflow daemon if it's running.
########################################################################################
sub pandora_stop_netflow_daemon () {
my $pid_file = '/var/run/pandora_nfcapd.pid';
# Open the pid file
if ( ! (-e $pid_file && open (PIDFILE, $pid_file))) {
return 0;
}
my $pid = <PIDFILE>;
close PIDFILE;
# Check if nfcapd is running
if (kill (0, $pid) > 0) {
kill (9, $pid);
return $pid;
}
return 0;
}
########################################################################################
# Additional tasks executed periodically by the Pandora FMS Server
########################################################################################
sub pandora_server_tasks ($) {
my ($pa_config) = @_;
# Get the console DB connection
my $dbh = db_connect ($pa_config->{'dbengine'}, $pa_config->{'dbname'}, $pa_config->{'dbhost'}, $pa_config->{'dbport'},
$pa_config->{'dbuser'}, $pa_config->{'dbpass'});
my $counter = 0;
while (1) {
# TASKS EXECUTED EVERY 5 SECONDS (Low latency tasks)
# --------------------------------------------------
if (($counter % 5) == 0) {
# Update forced alerts
pandora_exec_forced_alerts ($pa_config, $dbh);
}
# TASKS EXECUTED EVERY 30 SECONDS (Mid latency tasks)
# ---------------------------------------------------
if (($counter % 30) == 0) {
# Update module status and fired alert counts
my @agents = get_db_rows ($dbh, 'SELECT id_agente, nombre, update_module_count, update_alert_count FROM tagente WHERE disabled = 0 AND (update_module_count=1 OR update_alert_count=1)');
foreach my $agent (@agents) {
logger ($pa_config, "Updating module status and fired alert counts for agent " . $agent->{'nombre'}, 10);
if ($agent->{'update_module_count'} == 1) {
pandora_update_agent_module_count ($dbh, $agent->{'id_agente'});
}
if ($agent->{'update_alert_count'} == 1) {
pandora_update_agent_alert_count ($dbh, $agent->{'id_agente'});
}
}
# Keepalive module control.(very DB intensive, not run frecuently
pandora_module_keep_alive_nd ($pa_config, $dbh);
# Set the status of unknown modules
pandora_module_unknown ($pa_config, $dbh);
# Set event storm protection
pandora_set_event_storm_protection (pandora_get_tconfig_token ($dbh, 'event_storm_protection', 0));
}
# TASKS EXECUTED EVERY 60 SECONDS (High latency tasks)
# ----------------------------------------------------
if (($counter % 60) == 0) {
# Downtimes are executed only 30 x Server Threshold secs
pandora_planned_downtime ($pa_config, $dbh);
# Realtime stats (Only master server!) - ( VERY HEAVY !)
# Realtimestats == 1, generated by WEB Console, not by server!
if ($pa_config->{"pandora_master"} == 1
&& defined($pa_config->{"realtimestats"})
&& $pa_config->{"realtimestats"} == 0){
# Check if I need to refresh stats
my $last_execution_stats = get_db_value ($dbh, "SELECT MAX(utimestamp) FROM tgroup_stat");
if (!defined($last_execution_stats) || $last_execution_stats < (time() - $pa_config->{"stats_interval"})){
pandora_group_statistics ($pa_config, $dbh);
pandora_server_statistics ($pa_config, $dbh);
}
}
# Pandora self monitoring
if (defined($pa_config->{"self_monitoring"})
&& $pa_config->{"self_monitoring"} == 1){
pandora_self_monitoring ($pa_config, $dbh);
}
# Event auto-expiry
my $expiry_time = $pa_config->{"event_expiry_time"};
my $expiry_window = $pa_config->{"event_expiry_window"};
if ($expiry_time > 0 && $expiry_window > 0 && $expiry_window > $expiry_time) {
my $time_ref = time ();
my $expiry_limit = $time_ref - $expiry_time;
my $expiry_window = $time_ref - $expiry_window;
db_do ($dbh, 'UPDATE tevento SET estado=1, ack_utimestamp=? WHERE estado=0 AND utimestamp < ? AND utimestamp > ?', $time_ref, $expiry_limit, $expiry_window);
}
}
# Avoid counter overflow
if ($counter > 10000){
$counter = 0;
}
else {
$counter++;
}
sleep (1);
}
}
$SIG{'TERM'} = 'pandora_shutdown';
$SIG{'INT'} = 'pandora_shutdown';
# Error handler needs to be reviewed, Enterprise not found errors are too nasty :(
$SIG{__DIE__} = 'pandora_crash';
# Prevent alarm from bombing the main thread when called within a thread
$SIG{'ALRM'} = 'IGNORE';
# Initialize
pandora_init(\%Config, 'Pandora FMS Server');
pandora_load_config (\%Config);
# Daemonize and put in background
if ($Config{'daemon'} == 1) {
print_message (\%Config, " [*] Backgrounding Pandora FMS Server process.\n", 1);
pandora_daemonize (\%Config);
}
# Load enterprise module
if (enterprise_load (\%Config) == 0) {
print_message (\%Config, " [*] Pandora FMS Enterprise module not available.", 1);
logger (\%Config, " [*] Pandora FMS Enterprise module not available.", 1);
} else {
print_message (\%Config, " [*] Pandora FMS Enterprise module loaded.", 1);
logger (\%Config, " [*] Pandora FMS Enterprise module loaded.", 1);
if($Config{'policy_manager'} == 1) {
# Start thread to patrol policy queue
threads->create('pandora_process_policy_queue', (\%Config))->detach();
}
if($Config{'event_replication'} == 1) {
# Start thread to process event replication
threads->create('pandora_process_event_replication', (\%Config))->detach();
}
}
# Start the servers
pandora_startup ();
# Start thread to execute server tasks
threads->create('pandora_server_tasks', (\%Config))->detach();
# Generate 'going up' events
foreach my $server (@Servers) {
$server->upEvent ();
}
# Main loop
my $time_ref = time ();
while (1) {
eval {
# Update server status
foreach my $server (@Servers) {
die ($server->getErrStr ()) unless ($server->checkThreads () == 1);
$server->update();
}
# Not needed. The console assumes a server is down if it has not updated its status in the last 15 minutes.
## Update fallen servers
#db_do ($DBH, "UPDATE tserver SET status = 0 WHERE keepalive < ?", strftime ("%Y-%m-%d %H:%M:%S", localtime(time() - $Config{'keepalive'})));
};
# Restart on error or auto restart
if ($@) {
if ($Config{'restart'} eq '0') {
print_message (\%Config, $@, 1);
pandora_shutdown ();
}
# Generate 'restarting' events
foreach my $server (@Servers) {
$server->restartEvent ($@);
}
logger (\%Config, 'Pandora FMS Server restarting (' . $@ . ') in ' . $Config{'restart_delay'} . ' seconds.', 1);
pandora_restart ();
}
elsif (($Config{'auto_restart'} > 0) && (time () - $time_ref > $Config{'auto_restart'})) {
$time_ref = time ();
# Mute
open(OLDOUT, ">&STDOUT");
open (STDOUT, '>/dev/null');
# Restart
pandora_restart ();
# Unmute
open(STDOUT, ">&OLDOUT");
close (OLDOUT);
}
threads->yield;
sleep ($Config{'server_threshold'});
}