#!/usr/bin/perl ########################################################################## # Pandora FMS Server # Pandora FMS. the Flexible Monitoring System. http://www.pandorafms.org ########################################################################## # Copyright (c) 2005-2011 Artica Soluciones Tecnologicas S.L # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; version 2 # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ########################################################################## use strict; use warnings; use POSIX qw(strftime); # Default lib dir for RPM and DEB packages use lib '/usr/lib/perl5'; # Pandora Modules use PandoraFMS::DB; use PandoraFMS::Config; use PandoraFMS::Tools; use PandoraFMS::Core; use PandoraFMS::DataServer; use PandoraFMS::NetworkServer; use PandoraFMS::SNMPServer; use PandoraFMS::ReconServer; use PandoraFMS::WMIServer; use PandoraFMS::PluginServer; use PandoraFMS::PredictionServer; # Global vars my %Config; my @Servers; my $DBH; ######################################################################################## # Server shutdown. Handler to do a controlled shutdown. ######################################################################################## sub pandora_shutdown () { logger (\%Config, 'Pandora FMS Server \'' . $Config{'servername'} . '\' Shutdown by signal ', 1); # Stop servers foreach my $server (@Servers) { $server->downEvent (); $server->stop (); } # Stop the netflow daemon pandora_stop_netflow_daemon (); print_message (\%Config, ' [*] Shutting down ' . $Config{'servername'} . "(received signal)...\n", 1); db_disconnect ($DBH); if ($Config{'PID'} ne "") { unlink($Config{'PID'}) or logger (\%Config, "[W] Could not remove PID file: $!",1); } exit (0); } ######################################################################################## # Server startup. ######################################################################################## sub pandora_startup () { # Start logging pandora_start_log (\%Config); # Connect to the DB $DBH = db_connect ($Config{'dbengine'}, $Config{'dbname'}, $Config{'dbhost'}, $Config{'dbport'}, $Config{'dbuser'}, $Config{'dbpass'}); # Grab config tokens shared with the console and not in the .conf pandora_get_sharedconfig (\%Config, $DBH); pandora_audit (\%Config, 'Pandora FMS Server Daemon starting', 'SYSTEM', 'System', $DBH); # Load servers pandora_reset_server (\%Config, $DBH); push (@Servers, new PandoraFMS::DataServer (\%Config, $DBH)); push (@Servers, new PandoraFMS::NetworkServer (\%Config, $DBH)); push (@Servers, new PandoraFMS::ReconServer (\%Config, $DBH)); push (@Servers, new PandoraFMS::SNMPServer (\%Config, $DBH)); push (@Servers, new PandoraFMS::WMIServer (\%Config, $DBH)); push (@Servers, new PandoraFMS::PluginServer (\%Config, $DBH)); push (@Servers, new PandoraFMS::PredictionServer (\%Config, $DBH)); enterprise_hook('load_enterprise_servers', [\@Servers, \%Config, $DBH]); # Start the netflow daemon if necessary pandora_start_netflow_daemon (); # Remove disabled servers @Servers = grep { defined ($_) } @Servers; # Run foreach my $server (@Servers) { $server->run (); } } ######################################################################################## # Server restart. ######################################################################################## sub pandora_restart () { # Stop the servers foreach my $server (@Servers) { $server->stop (); } # Remove the servers while (pop (@Servers)) {}; # Close STDERR, redirected by pandora_start_log close (STDERR); # Wait before trying to start again sleep ($Config{'restart_delay'}); # Start the servers pandora_startup (); } ######################################################################################## # Server crash. Handler to write in the log unhandled errors and write it to console ######################################################################################## sub pandora_crash () { my $full_error = ""; # Avoid show messages about enterprise library loading failurem, VERY # confussing, all of them are warnigs and not critical, and user should be # worried about that. If perl has a more "clean" way to avoid this messages # will be nice to replace this code, but at this time it's the only way I know foreach my $error_line (@_) { # Trap the XML error and exit without nasty messages if ($error_line =~ m/XML\/Parser/) { logger (\%Config, "Problem parsing XML file, XML file discarded: $error_line", 2); return; } elsif ($error_line !~ m/Enterprise/i && $error_line !~ m/Format_XS/i && $error_line !~ m/ConfigLocal/i){ logger (\%Config, '[E] \'' . $Config{'servername'} . "': $error_line", 1); } else { if ($error_line !~ m/Can\'t\slocate/) { logger (\%Config, '[E] \'' . $Config{'servername'} . "': $error_line", 1); } else { # Known errors of loading Enterprise, Format_XS and ConfigLocal # modules, non fatal. return; } } $full_error .= $error_line; } logger (\%Config, 'Pandora FMS Server \'' . $Config{'servername'} . '\' unhandled error.', 1); # It's interesting show by console problems, not only in logs. This helps # to solve stupid problems like Database credential problems for example print_message (\%Config, ' [E] Unhandled error in "' . $Config{'servername'} . "\". See more information in logfiles at '/var/log/pandora' \n", 0); print_message (\%Config, " Error description:\n", 0); print_message (\%Config, $full_error, 0); } ######################################################################################## # Start the netflow daemon if necessary. ######################################################################################## sub pandora_start_netflow_daemon () { my $pid_file = '/var/run/pandora_nfcapd.pid'; # Check if netflow is enabled if ($Config{'activate_netflow'} != 1) { logger (\%Config, " [*] Netflow daemon disabled.", 1); print_message (\%Config, " [*] Netflow daemon disabled.", 1); return; } # Stop nfcapd if it's already running my $pid = pandora_stop_netflow_daemon (); if (pandora_stop_netflow_daemon () != 0) { logger (\%Config, "nfcapd (pid $pid) is already running, attempting to kill it...", 1); print_message (\%Config, "nfcapd (pid $pid) is already running, attempting to kill it...", 1); } # Start nfcapd my $command = $Config{'netflow_daemon'} . ' -D -T all -w -t ' . $Config{'netflow_interval'} . ' -P ' . $pid_file . ' -l ' . $Config{'netflow_path'}; if (system ("$command >/dev/null 2>&1") != 0) { logger (\%Config, " [E] Could not start nfcapd: $command", 1); print_message (\%Config, " [E] Could not start nfcapd: $command", 1); return; } logger (\%Config, "[*] Netflow daemon started.", 1); print_message (\%Config, "[*] Netflow daemon started.", 1); } ######################################################################################## # Stop the netflow daemon if it's running. ######################################################################################## sub pandora_stop_netflow_daemon () { my $pid_file = '/var/run/pandora_nfcapd.pid'; # Open the pid file if ( ! (-e $pid_file && open (PIDFILE, $pid_file))) { return 0; } my $pid = ; close PIDFILE; # Check if nfcapd is running if (kill (0, $pid) > 0) { kill (9, $pid); return $pid; } return 0; } $SIG{'TERM'} = 'pandora_shutdown'; $SIG{'INT'} = 'pandora_shutdown'; # Error handler needs to be reviewed, Enterprise not found errors are too nasty :( $SIG{__DIE__} = 'pandora_crash'; # Prevent alarm from bombing the main thread when called within a thread $SIG{'ALRM'} = 'IGNORE'; # Initialize pandora_init(\%Config, 'Pandora FMS Server'); pandora_load_config (\%Config); # Daemonize and put in background if ($Config{'daemon'} == 1) { print_message (\%Config, " [*] Backgrounding Pandora FMS Server process.\n", 1); pandora_daemonize (\%Config); } # Load enterprise module if (enterprise_load (\%Config) == 0) { print_message (\%Config, " [*] Pandora FMS Enterprise module not available.", 1); } else { print_message (\%Config, " [*] Pandora FMS Enterprise module loaded.", 1); if($Config{'policy_manager'} == 1) { # Start thread to patrol policy queue my $thr_policy_queue = threads->create('pandora_process_policy_queue', (\%Config)); } if($Config{'event_replication'} == 1) { # Start thread to process event replication my $thr_event_replication = threads->create('pandora_process_event_replication', (\%Config)); } } # Start the servers pandora_startup (); # Generate 'going up' events foreach my $server (@Servers) { $server->upEvent (); } # Main loop my $time_ref = time (); my $counter = 0; while (1) { eval { # TASKS DONE EACH 5 SECONDS (Low latency tasks) # --------------------------------------------- # Server status update each 5 seconds # Neightbourhood problem detection each 5 seconds # Forced alerts each 5 seconds as well if (($counter % 5) == 0) { # Update server status foreach my $server (@Servers) { die ($server->getErrStr ()) unless ($server->checkThreads () == 1); $server->update (); } # Update fallen servers db_do ($DBH, "UPDATE tserver SET status = 0 WHERE keepalive < ?", strftime ("%Y-%m-%d %H:%M:%S", localtime(time() - $Config{'keepalive'}))); # Update forced alerts pandora_exec_forced_alerts (\%Config, $DBH); } # TASKS DONE EACH 30 SECONDS (Med latency tasks) # --------------------------------------------- # Tasks executed only each Server Threshold x 30 secs, for low-priority tasks if (($counter % 30) == 0) { # Update module status and fired alert counts my @agents = get_db_rows ($DBH, 'SELECT id_agente, nombre, update_module_count, update_alert_count FROM tagente WHERE disabled = 0 AND (update_module_count=1 OR update_alert_count=1)'); foreach my $agent (@agents) { logger (\%Config, "Updating module status and alert fired counts for agent " . $agent->{'nombre'}, 10); if ($agent->{'update_module_count'} == 1) { pandora_update_agent_module_count ($DBH, $agent->{'id_agente'}); } if ($agent->{'update_alert_count'} == 1) { pandora_update_agent_alert_count ($DBH, $agent->{'id_agente'}); } } # Keepalive module control.(very DB intensive, not run frecuently pandora_module_keep_alive_nd (\%Config, $DBH); # Set the status of unknown modules pandora_module_unknown (\%Config, $DBH); # Set event storm protection pandora_set_event_storm_protection (pandora_get_tconfig_token ($DBH, 'event_storm_protection', 0)); } # TASKS DONE EACH 60 SECONDS (Low latency tasks) # --------------------------------------------- if (($counter % 60) == 0) { # Downtimes are executed only 30 x Server Threshold secs pandora_planned_downtime (\%Config, $DBH); # Realtime stats (Only master server!) - ( VERY HEAVY !) # Realtimestats == 1, generated by WEB Console, not by server! if ($Config{"pandora_master"} == 1 && defined($Config{"realtimestats"}) && $Config{"realtimestats"} == 0){ # Check if I need to refresh stats my $last_execution_stats = get_db_value ($DBH, "SELECT MAX(utimestamp) FROM tgroup_stat"); if (!defined($last_execution_stats) || $last_execution_stats < (time() - $Config{"stats_interval"})){ pandora_group_statistics (\%Config, $DBH); pandora_server_statistics (\%Config, $DBH); } } # Pandora self monitoring if (defined($Config{"self_monitoring"}) && $Config{"self_monitoring"} == 1){ pandora_self_monitoring (\%Config, $DBH); } } }; # Restart on error or auto restart if ($@) { if ($Config{'restart'} eq '0') { print_message (\%Config, $@, 1); pandora_shutdown (); } # Generate 'restarting' events foreach my $server (@Servers) { $server->restartEvent ($@); } logger (\%Config, 'Pandora FMS Server restarting (' . $@ . ') in ' . $Config{'restart_delay'} . ' seconds.', 1); pandora_restart (); } elsif (($Config{'auto_restart'} > 0) && (time () - $time_ref > $Config{'auto_restart'})) { $time_ref = time (); # Mute open(OLDOUT, ">&STDOUT"); open (STDOUT, '>/dev/null'); # Restart pandora_restart (); # Unmute open(STDOUT, ">&OLDOUT"); close (OLDOUT); } # Avoid counter overflow if ($counter > 10000){ $counter = 0; } else { $counter++; } threads->yield; sleep (1); }