pandorafms/pandora_server/bin/pandora_server

728 lines
25 KiB
Perl
Executable File

#!/usr/bin/perl
##########################################################################
# Pandora FMS Server
# Pandora FMS. the Flexible Monitoring System. http://www.pandorafms.org
##########################################################################
# Copyright (c) 2005-2011 Artica Soluciones Tecnologicas S.L
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; version 2
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
##########################################################################
use strict;
use warnings;
use POSIX qw(strftime);
use threads;
# Default lib dir for RPM and DEB packages
use lib '/usr/lib/perl5';
# Pandora Modules
use PandoraFMS::DB;
use PandoraFMS::Config;
use PandoraFMS::Tools;
use PandoraFMS::Core;
use PandoraFMS::DataServer;
use PandoraFMS::NetworkServer;
use PandoraFMS::SNMPServer;
use PandoraFMS::DiscoveryServer;
use PandoraFMS::WMIServer;
use PandoraFMS::PluginServer;
use PandoraFMS::PredictionServer;
# Constants for Win32 services.
use constant WIN32_SERVICE_STOPPED => 0x01;
use constant WIN32_SERVICE_RUNNING => 0x04;
# Global vars
my %Config :shared;
my @Servers;
my $DBH;
my $RUN :shared = 1;
my $MainThread = threads->self;
########################################################################################
# Server shutdown. Handler to do a controlled shutdown.
########################################################################################
sub pandora_shutdown () {
my $signal = shift;
logger (\%Config, $Config{'rb_product_name'} . ' Server \'' . $Config{'servername'} . '\' Caught SIG' . $signal . ' by thread(' . threads->self()->tid() . ')', 10);
if (!threads->self->equal($MainThread)) {
# deliver signal to the main thread since no other threads than main thread
# could disconnet $DBH properly
$MainThread->kill($signal);
return;
}
logger (\%Config, $Config{'rb_product_name'} . ' Server \'' . $Config{'servername'} . '\' Shutdown by signal ', 1);
# Stop servers
foreach my $server (@Servers) {
$server->downEvent ();
$server->stop ();
}
@Servers = ();
# Stop the netflow daemon
pandora_stop_netflow_daemon ();
print_message (\%Config, ' [*] Shutting down ' . $Config{'servername'} . "(received signal)...\n", 1);
db_disconnect ($DBH);
if ($Config{'PID'} ne "") {
unlink($Config{'PID'}) or logger (\%Config, "[W] Could not remove PID file: $!",1);
}
exit (0);
}
########################################################################################
# Server startup.
########################################################################################
sub pandora_startup () {
# Start logging
pandora_start_log (\%Config);
# Connect to the DB
$DBH = db_connect ($Config{'dbengine'}, $Config{'dbname'}, $Config{'dbhost'}, $Config{'dbport'},
$Config{'dbuser'}, $Config{'dbpass'});
# Grab config tokens shared with the console and not in the .conf
pandora_get_sharedconfig (\%Config, $DBH);
# Generate the encryption key after reading the passphrase.
$Config{"encryption_key"} = enterprise_hook('pandora_get_encryption_key', [\%Config, $Config{"encryption_passphrase"}]);
# Kill any running server threads.
stop_server_threads();
# Start the task execution thread.
start_server_thread(\&pandora_server_tasks, [\%Config]);
# Start the policy queue thread.
start_server_thread(\&pandora_process_policy_queue, [\%Config]) if ($Config{'__enterprise_enabled'} == 1 && $Config{'policy_manager'} == 1);
# Start the event replication thread. Do not start with start_server_thread, this thread may exit on its own.
threads->create(\&pandora_process_event_replication, \%Config) if($Config{'__enterprise_enabled'} == 1 && $Config{'event_replication'} == 1);
# Update the agent cache. Do not start with start_server_thread, this thread updates the agent cache and exits.
threads->create(\&enterprise_hook, 'update_agent_cache', [\%Config])->detach() if ($Config{'node_metaconsole'} == 1);
pandora_audit (\%Config, $Config{'rb_product_name'} . ' Server Daemon starting', 'SYSTEM', 'System', $DBH);
# Load servers
if (!is_metaconsole(\%Config)) {
pandora_reset_server (\%Config, $DBH);
push (@Servers, new PandoraFMS::DataServer (\%Config, $DBH));
push (@Servers, new PandoraFMS::NetworkServer (\%Config, $DBH));
push (@Servers, new PandoraFMS::DiscoveryServer (\%Config, $DBH));
push (@Servers, new PandoraFMS::SNMPServer (\%Config, $DBH));
push (@Servers, new PandoraFMS::WMIServer (\%Config, $DBH));
push (@Servers, new PandoraFMS::PluginServer (\%Config, $DBH));
push (@Servers, new PandoraFMS::PredictionServer (\%Config, $DBH));
} else {
# Metaconsole service modules are run by the prediction server
push (@Servers, new PandoraFMS::PredictionServer (\%Config, $DBH));
}
# There are enterprise metaconsole servers!
enterprise_hook('load_enterprise_servers', [\@Servers, \%Config, $DBH]);
# Start the netflow daemon if necessary
pandora_start_netflow_daemon ();
# Remove disabled servers
@Servers = grep { defined ($_) } @Servers;
# Run
foreach my $server (@Servers) {
$server->run ();
}
}
########################################################################################
# Server restart.
########################################################################################
sub pandora_restart (;$) {
my $sleep_time = @_ > 0 ? $_[0] : $Config{'restart_delay'};
# Stop the servers
eval {
foreach my $server (@Servers) {
$server->stop ();
}
};
# Remove the servers
while (pop (@Servers)) {};
# Close STDERR, redirected by pandora_start_log
close (STDERR);
# Wait before trying to start again
sleep ($sleep_time);
# Start the servers
pandora_startup ();
}
########################################################################################
# Server crash. Handler to write in the log unhandled errors and write it to console
########################################################################################
sub pandora_crash () {
my $full_error = "";
# Avoid show messages about enterprise library loading failurem, VERY
# confussing, all of them are warnigs and not critical, and user should be
# worried about that. If perl has a more "clean" way to avoid this messages
# will be nice to replace this code, but at this time it's the only way I know
foreach my $error_line (@_) {
# Trap the XML error and exit without nasty messages
if ($error_line =~ m/XML\/Parser/) {
logger (\%Config, "Problem parsing XML file, XML file discarded: $error_line", 2);
return;
}
elsif ($error_line !~ m/Enterprise/i && $error_line !~ m/Format_XS/i && $error_line !~ m/ConfigLocal/i){
logger (\%Config, '[E] \'' . $Config{'servername'} . "': $error_line", 1);
}
else {
if ($error_line !~ m/Can\'t\slocate/) {
logger (\%Config, '[E] \'' . $Config{'servername'} . "': $error_line", 1);
}
else {
# Known errors of loading Enterprise, Format_XS and ConfigLocal
# modules, non fatal.
return;
}
}
$full_error .= $error_line;
}
logger (\%Config, $Config{'rb_product_name'} . ' Server \'' . $Config{'servername'} . '\' unhandled error.', 1);
# It's interesting show by console problems, not only in logs. This helps
# to solve stupid problems like Database credential problems for example
print_message (\%Config, ' [E] Unhandled error in "' . $Config{'servername'} . "\". See more information in logfiles at '/var/log/pandora' \n", 0);
print_message (\%Config, " Error description:\n", 0);
print_message (\%Config, $full_error, 0);
callback_stop() if ($^O eq 'MSWin32' && defined($Config{'win32_service'}));
}
########################################################################################
# Start the netflow daemon if necessary.
########################################################################################
sub pandora_start_netflow_daemon () {
my $pid_file = '/var/run/pandora_nfcapd.pid';
# Check if netflow is enabled
if ($Config{'activate_netflow'} != 1) {
logger (\%Config, " [*] Netflow daemon disabled.", 1);
print_message (\%Config, " [*] Netflow daemon disabled.", 1);
return;
}
# Stop nfcapd if it's already running
my $pid = pandora_stop_netflow_daemon ();
if (pandora_stop_netflow_daemon () != 0) {
logger (\%Config, "nfcapd (pid $pid) is already running, attempting to kill it...", 1);
print_message (\%Config, "nfcapd (pid $pid) is already running, attempting to kill it...", 1);
}
# Start nfcapd
my $command = $Config{'netflow_daemon'} . ' -D -T all -w -t ' . $Config{'netflow_interval'} . ' -P ' . $pid_file . ' -l ' . $Config{'netflow_path'};
if (system ("$command >/dev/null 2>&1") != 0) {
logger (\%Config, " [E] Could not start nfcapd: $command", 1);
print_message (\%Config, " [E] Could not start nfcapd: $command", 1);
return;
}
logger (\%Config, "[*] Netflow daemon started.", 1);
print_message (\%Config, "[*] Netflow daemon started.", 1);
}
########################################################################################
# Stop the netflow daemon if it's running.
########################################################################################
sub pandora_stop_netflow_daemon () {
my $pid_file = '/var/run/pandora_nfcapd.pid';
# Open the pid file
if ( ! (-e $pid_file && open (PIDFILE, $pid_file))) {
return 0;
}
my $pid = <PIDFILE>;
close PIDFILE;
# Check if nfcapd is running
if (kill (0, $pid) > 0) {
kill (9, $pid);
return $pid;
}
return 0;
}
########################################################################################
# Additional tasks executed periodically by the Pandora FMS Server
########################################################################################
sub pandora_server_tasks ($) {
my ($pa_config) = @_;
# Get the console DB connection
my $dbh = db_connect ($pa_config->{'dbengine'}, $pa_config->{'dbname'}, $pa_config->{'dbhost'}, $pa_config->{'dbport'},
$pa_config->{'dbuser'}, $pa_config->{'dbpass'});
my $counter = 0;
while ($THRRUN == 1) {
if (pandora_is_master($pa_config) == 1) {
# TASKS EXECUTED EVERY 5 SECONDS (Low latency tasks)
# --------------------------------------------------
if (($counter % 5) == 0) {
# Update forced alerts
pandora_exec_forced_alerts ($pa_config, $dbh);
}
# TASKS EXECUTED EVERY 30 SECONDS (Mid latency tasks)
# ---------------------------------------------------
if (($counter % 30) == 0) {
# Update module status and fired alert counts
my @agents = get_db_rows ($dbh, 'SELECT id_agente, nombre, update_module_count, update_alert_count, update_secondary_groups FROM tagente WHERE (update_module_count=1 OR update_alert_count=1 OR update_secondary_groups=1)');
foreach my $agent (@agents) {
logger ($pa_config, "Updating module status and fired alert counts for agent " . $agent->{'nombre'}, 10);
if ($agent->{'update_module_count'} == 1) {
pandora_update_agent_module_count ($pa_config, $dbh, $agent->{'id_agente'});
}
if ($agent->{'update_alert_count'} == 1) {
pandora_update_agent_alert_count ($pa_config, $dbh, $agent->{'id_agente'});
}
if ($agent->{'update_secondary_groups'} == 1) {
pandora_update_secondary_groups_cache ($pa_config, $dbh, $agent->{'id_agente'});
}
}
# Keepalive module control.(very DB intensive, not run frecuently
pandora_module_keep_alive_nd ($pa_config, $dbh);
# Set the status of unknown modules
pandora_module_unknown ($pa_config, $dbh);
# Check if an autodisabled agent needs to be autodisable
pandora_disable_autodisable_agents ($pa_config, $dbh);
}
# TASKS EXECUTED EVERY 60 SECONDS (High latency tasks)
# ----------------------------------------------------
if (($counter % 60) == 0) {
# Downtimes are executed only 30 x Server Threshold secs
pandora_planned_downtime ($pa_config, $dbh);
# Realtime stats (Only master server!) - ( VERY HEAVY !)
# Realtimestats == 1, generated by WEB Console, not by server!
if (defined($pa_config->{"realtimestats"}) && $pa_config->{"realtimestats"} == 0){
# Check if I need to refresh stats
my $last_execution_stats = get_db_value ($dbh, "SELECT MAX(utimestamp) FROM tgroup_stat");
if (!defined($last_execution_stats) || $last_execution_stats < (time() - $pa_config->{"stats_interval"})){
pandora_group_statistics ($pa_config, $dbh);
pandora_server_statistics ($pa_config, $dbh);
}
}
# Event auto-expiry
my $expiry_time = $pa_config->{"event_expiry_time"};
my $expiry_window = $pa_config->{"event_expiry_window"};
if ($expiry_time > 0 && $expiry_window > 0 && $expiry_window > $expiry_time) {
my $time_ref = time ();
my $expiry_limit = $time_ref - $expiry_time;
my $expiry_window = $time_ref - $expiry_window;
db_do ($dbh, 'UPDATE tevento SET estado=1, ack_utimestamp=? WHERE estado=0 AND utimestamp < ? AND utimestamp > ?', $time_ref, $expiry_limit, $expiry_window);
}
}
}
# COMMON TASKS (master and non-master)
# ---------------------------------------------------------------
# Rotate Log File
if (($counter % 30) == 0) {
pandora_rotate_logfile($pa_config);
# Set event storm protection
pandora_set_event_storm_protection (pandora_get_tconfig_token ($dbh, 'event_storm_protection', 0));
}
# Pandora self monitoring
if (defined($pa_config->{"self_monitoring"})
&& $pa_config->{"self_monitoring"} == 1
&& !is_metaconsole($pa_config)
&& $counter % $pa_config->{'self_monitoring_interval'} == 0) {
pandora_self_monitoring ($pa_config, $dbh);
}
# Avoid counter overflow
if ($counter >= ~0){
$counter = 0;
}
else {
$counter++;
}
sleep (1);
}
db_disconnect($dbh);
}
################################################################################
## Install the Windows service.
################################################################################
sub win32_install_service() {
# Load Win32::Daemon.
eval "use Win32::Daemon";
die($@) if ($@);
# Configure and install the service.
my $service_path = $0;
my $service_params = "-S run \"" . $Config{'pandora_path'} ."\"";
my %service_hash = (
machine => '',
name => 'PANDORAFMSSRV',
display => 'Pandora FMS Server',
path => $service_path,
user => '',
pwd => '',
description => 'Pandora FMS Server http://pandorafms.com/',
parameters => $service_params
);
if (Win32::Daemon::CreateService(\%service_hash)) {
print "Successfully added.\n";
exit 0;
} else {
print "Failed to add service: " . Win32::FormatMessage(Win32::Daemon::GetLastError()) . "\n";
exit 1;
}
}
################################################################################
## Install the Windows service.
################################################################################
sub win32_uninstall_service() {
# Load Win32::Daemon.
eval "use Win32::Daemon";
die($@) if ($@);
# Uninstall the service.
if (Win32::Daemon::DeleteService('', 'PANDORAFMSSRV')) {
print "Successfully deleted.\n";
exit 0;
} else {
print "Failed to delete service: " . Win32::FormatMessage(Win32::Daemon::GetLastError()) . "\n";
exit 1;
}
}
################################################################################
## Windows service callback function for the running event.
################################################################################
sub callback_running {
if (Win32::Daemon::State() == WIN32_SERVICE_RUNNING) {
}
}
################################################################################
## Windows service callback function for the start event.
################################################################################
sub callback_start {
no strict;
# Accept_connections ();
my $thr = threads->create(\&main);
if (!defined($thr)) {
Win32::Daemon::State(WIN32_SERVICE_STOPPED);
Win32::Daemon::StopService();
return;
}
$thr->detach();
Win32::Daemon::State(WIN32_SERVICE_RUNNING);
}
################################################################################
## Windows service callback function for the stop event.
################################################################################
sub callback_stop {
$RUN = 0;
Win32::Daemon::State(WIN32_SERVICE_STOPPED);
Win32::Daemon::StopService();
}
################################################################################
# Run as a Windows service.
################################################################################
sub win32_service_run() {
# Load Win32::Daemon.
eval "use Win32::Daemon";
die($@) if ($@);
# Run the Pandora FMS Server as a Windows service.
Win32::Daemon::RegisterCallbacks({
start => \&callback_start,
running => \&callback_running,
stop => \&callback_stop,
});
Win32::Daemon::StartService();
}
################################################################################
## Parse command line options.
################################################################################
sub parse_service_options ($) {
my $config = shift;
# Sanity checks.
return unless defined($config->{'win32_service'});
die ("[ERROR] Windows services are only available on Win32.\n\n") if ($^O ne 'MSWin32');
# Win32 service management.
eval "use Win32::Daemon";
die($@) if ($@);
if ($config->{'win32_service'} eq 'install') {
win32_install_service();
} elsif ($config->{'win32_service'} eq 'uninstall') {
win32_uninstall_service();
} elsif ($config->{'win32_service'} eq 'run') {
} else {
die("[ERROR] Unknown action: " . $config->{'win32_service'});
}
}
################################################################
################################################################
## Main.
################################################################
################################################################
sub main() {
# Daemonize and put in background
if ($Config{'daemon'} == 1) {
print_message (\%Config, " [*] Backgrounding " . pandora_get_initial_product_name() . " Server process.\n", 1);
pandora_daemonize (\%Config);
}
# Load enterprise module
if (enterprise_load (\%Config) == 0) {
$Config{'__enterprise_enabled'} = 0;
print_message (\%Config, " [*] Pandora FMS Enterprise module not available.", 1);
logger (\%Config, " [*] Pandora FMS Enterprise module not available.", 1);
} else {
$Config{'__enterprise_enabled'} = 1;
print_message (\%Config, " [*] " . pandora_get_initial_product_name() . " Enterprise module loaded.", 1);
logger (\%Config, " [*] " . pandora_get_initial_product_name() . " Enterprise module loaded.", 1);
}
# Save the start time for warmup intervals.
$Config{'__start_utimestamp__'} = time();
# Start the servers
pandora_startup ();
if ($Config{'warmup_unknown_interval'} > 0) {
logger(\%Config, "Warmup mode for unknown modules started.", 3);
pandora_event (\%Config, "Warmup mode for unknown modules started.", 0, 0, 0, 0, 0, 'system', 0, $DBH);
}
if ($Config{'warmup_event_interval'} > 0) {
logger(\%Config, "Warmup mode for alerts started.", 3);
logger(\%Config, "Warmup mode for events started.", 3);
pandora_event (\%Config, "Warmup mode for alerts started.", 0, 0, 0, 0, 0, 'system', 0, $DBH);
pandora_event (\%Config, "Warmup mode for events started.", 0, 0, 0, 0, 0, 'system', 0, $DBH);
}
# Generate 'going up' events
foreach my $server (@Servers) {
$server->upEvent ();
}
# Check if the Data Server has too many threads
if ($Config{'dataserver_threads'} > 5) {
logger (\%Config, "[W] Server " . $Config{'servername'} . " have configured " . $Config{'dataserver_threads'}
. " threads for the data server. You should not use more than 5 threads for this server", 1);
print_message (\%Config, " [W] Server " . $Config{'servername'} . " have configured " . $Config{'dataserver_threads'}
. " threads for the data server. You should not use more than 5 threads for this server", 1);
pandora_event (\%Config, "Server " . $Config{'servername'} . " have configured "
. $Config{'dataserver_threads'} . " threads for the data server", 0, 0, 3, 0, 0, 'system', 0, $DBH);
}
# Check if the Pandora Server has too many threads
my $totalThreads = 0;
foreach my $server (@Servers) {
$totalThreads += $server->getNumThreads ();
}
if ($totalThreads > 40) {
logger (\%Config, '[W] Server ' . $Config{'servername'} . ' have configured a total of ' . $totalThreads
. ' threads. This setup is not recommended, you should reduce the number of total threads below 40', 1);
print_message (\%Config, ' [W] Server ' . $Config{'servername'} . ' have configured a total of ' . $totalThreads
. ' threads. This setup is not recommended, you should reduce the number of total threads below 40', 1);
pandora_event (\%Config, 'Server ' . $Config{'servername'} . ' have configured a total of ' . $totalThreads
. ' threads', 0, 0, 3, 0, 0, 'system', 0, $DBH);
}
# Check if the log verbosity is set to 10
if ($Config{'verbosity'} == 10) {
logger (\%Config, '[W] Log verbosity is set to 10. This will degrade the server performance. Please set to a lower value ASAP', 1);
print_message (\%Config, ' [W] Log verbosity is set to 10. This will degrade the server performance. Please set to a lower value ASAP', 1);
pandora_event (\%Config, 'Log verbosity is set to 10. This will degrade the server performance', 0, 0, 1, 0, 0, 'system', 0, $DBH);
}
# Main loop
my $time_ref = time ();
my $test_remote_interval = ($Config{'keepalive'}/$Config{'server_threshold'});
my $test_remote = 0;
while ($RUN == 1) {
eval {
# Update server status
foreach my $server (@Servers) {
die ($server->getErrStr ()) unless ($server->checkThreads () == 1);
$server->update();
}
# Make sure all server threads are running.
die("Server thread crashed.") unless (check_server_threads() == 1);
db_do ($DBH,
"UPDATE tserver SET status = 0
WHERE UNIX_TIMESTAMP(now())-UNIX_TIMESTAMP(keepalive) > 2*server_keepalive"
);
# Set the master server
pandora_set_master(\%Config, $DBH);
};
# Restart on error or auto restart
if ($@) {
if ($Config{'restart'} eq '0') {
print_message (\%Config, $@, 1);
pandora_shutdown ();
}
# Generate 'restarting' events
foreach my $server (@Servers) {
$server->restartEvent ($@);
}
logger (\%Config, $Config{'rb_product_name'} . ' Server restarting (' . $@ . ') in ' . $Config{'restart_delay'} . ' seconds.', 1);
pandora_restart ();
}
elsif (($Config{'auto_restart'} > 0) && (time () - $time_ref > $Config{'auto_restart'})) {
$time_ref = time ();
# Mute
open(OLDOUT, ">&STDOUT");
open (STDOUT, '>/dev/null');
# Restart
pandora_restart ();
# Unmute
open(STDOUT, ">&OLDOUT");
close (OLDOUT);
}
if ($test_remote >= $test_remote_interval) {
if ($Config{'remote_config'} == 1 && enterprise_hook ('pandora_remote_config_server', [\%Config])) {
# Generate 'restarting' events
foreach my $server (@Servers) {
$server->restartEvent ($@);
}
logger (\%Config, $Config{'rb_product_name'} . ' Server restarting (' . $@ . ') in 5 seconds.', 1);
pandora_load_config (\%Config);
if (enterprise_load (\%Config) == 0) {
$Config{'__enterprise_enabled'} = 0;
print_message (\%Config, " [*] Pandora FMS Enterprise module not available.", 1);
logger (\%Config, " [*] Pandora FMS Enterprise module not available.", 1);
} else {
$Config{'__enterprise_enabled'} = 1;
print_message (\%Config, " [*] " . pandora_get_initial_product_name() . " Enterprise module loaded.", 1);
logger (\%Config, " [*] " . pandora_get_initial_product_name() . " Enterprise module loaded.", 1);
}
pandora_restart (5);
}
$test_remote = 0;
}
else {
$test_remote++;
}
threads->yield;
sleep ($Config{'server_threshold'});
}
pandora_shutdown();
}
$SIG{'TERM'} = 'pandora_shutdown';
$SIG{'INT'} = 'pandora_shutdown';
# Error handler needs to be reviewed, Enterprise not found errors are too nasty :(
$SIG{__DIE__} = 'pandora_crash';
# Prevent alarm from bombing the main thread when called within a thread
$SIG{'ALRM'} = 'IGNORE';
# Initialize
pandora_init(\%Config, pandora_get_initial_product_name() . ' Server');
pandora_load_config (\%Config);
# Parse command line options.
parse_service_options(\%Config);
# Run as a regular process.
if (!defined($Config{'win32_service'})) {
main();
}
# Run as a Windows service.
else {
win32_service_run();
}
################################################################################
# Kill any scripts started by the Pandora FMS Server that are still running.
################################################################################
END {{
local $SIG{HUP} = "IGNORE";
kill("HUP", -$$);
}}