Added support for multiple master servers.

This commit is contained in:
Ramon Novoa 2015-01-13 10:54:19 +01:00
parent 9709123ec7
commit 8f9f65b482
7 changed files with 136 additions and 85 deletions

@ -264,88 +264,95 @@ sub pandora_server_tasks ($) {
my $counter = 0;
while ($RUN == 1) {
eval{
# TASKS EXECUTED EVERY 5 SECONDS (Low latency tasks)
# --------------------------------------------------
if (($counter % 5) == 0) {
# Update forced alerts
pandora_exec_forced_alerts ($pa_config, $dbh);
if (pandora_is_master($pa_config) == 1) {
# Rotate Log File
pandora_rotate_logfile($pa_config);
}
# TASKS EXECUTED EVERY 30 SECONDS (Mid latency tasks)
# ---------------------------------------------------
if (($counter % 30) == 0) {
# TASKS EXECUTED EVERY 5 SECONDS (Low latency tasks)
# --------------------------------------------------
if (($counter % 5) == 0) {
# Update forced alerts
pandora_exec_forced_alerts ($pa_config, $dbh);
# Update module status and fired alert counts
my @agents = get_db_rows ($dbh, 'SELECT id_agente, nombre, update_module_count, update_alert_count FROM tagente WHERE disabled = 0 AND (update_module_count=1 OR update_alert_count=1)');
foreach my $agent (@agents) {
logger ($pa_config, "Updating module status and fired alert counts for agent " . $agent->{'nombre'}, 10);
if ($agent->{'update_module_count'} == 1) {
pandora_update_agent_module_count ($dbh, $agent->{'id_agente'});
}
if ($agent->{'update_alert_count'} == 1) {
pandora_update_agent_alert_count ($dbh, $agent->{'id_agente'});
}
}
# Keepalive module control.(very DB intensive, not run frecuently
pandora_module_keep_alive_nd ($pa_config, $dbh);
# Set the status of unknown modules
pandora_module_unknown ($pa_config, $dbh);
# Set event storm protection
pandora_set_event_storm_protection (pandora_get_tconfig_token ($dbh, 'event_storm_protection', 0));
}
# TASKS EXECUTED EVERY 60 SECONDS (High latency tasks)
# ----------------------------------------------------
if (($counter % 60) == 0) {
# Downtimes are executed only 30 x Server Threshold secs
pandora_planned_downtime ($pa_config, $dbh);
# Realtime stats (Only master server!) - ( VERY HEAVY !)
# Realtimestats == 1, generated by WEB Console, not by server!
if (defined($pa_config->{"realtimestats"}) && $pa_config->{"realtimestats"} == 0){
# Check if I need to refresh stats
my $last_execution_stats = get_db_value ($dbh, "SELECT MAX(utimestamp) FROM tgroup_stat");
if (!defined($last_execution_stats) || $last_execution_stats < (time() - $pa_config->{"stats_interval"})){
pandora_group_statistics ($pa_config, $dbh);
pandora_server_statistics ($pa_config, $dbh);
}
# Rotate Log File
pandora_rotate_logfile($pa_config);
}
# Event auto-expiry
my $expiry_time = $pa_config->{"event_expiry_time"};
my $expiry_window = $pa_config->{"event_expiry_window"};
if ($expiry_time > 0 && $expiry_window > 0 && $expiry_window > $expiry_time) {
my $time_ref = time ();
my $expiry_limit = $time_ref - $expiry_time;
my $expiry_window = $time_ref - $expiry_window;
db_do ($dbh, 'UPDATE tevento SET estado=1, ack_utimestamp=? WHERE estado=0 AND utimestamp < ? AND utimestamp > ?', $time_ref, $expiry_limit, $expiry_window);
}
}
# Pandora self monitoring
if (defined($pa_config->{"self_monitoring"})
&& $pa_config->{"self_monitoring"} == 1
&& $counter % $pa_config->{'self_monitoring_interval'} == 0) {
pandora_self_monitoring ($pa_config, $dbh);
}
# TASKS EXECUTED EVERY 30 SECONDS (Mid latency tasks)
# ---------------------------------------------------
if (($counter % 30) == 0) {
# Avoid counter overflow
if ($counter > 10000){
$counter = 0;
# Update module status and fired alert counts
my @agents = get_db_rows ($dbh, 'SELECT id_agente, nombre, update_module_count, update_alert_count FROM tagente WHERE disabled = 0 AND (update_module_count=1 OR update_alert_count=1)');
foreach my $agent (@agents) {
logger ($pa_config, "Updating module status and fired alert counts for agent " . $agent->{'nombre'}, 10);
if ($agent->{'update_module_count'} == 1) {
pandora_update_agent_module_count ($dbh, $agent->{'id_agente'});
}
if ($agent->{'update_alert_count'} == 1) {
pandora_update_agent_alert_count ($dbh, $agent->{'id_agente'});
}
}
# Keepalive module control.(very DB intensive, not run frecuently
pandora_module_keep_alive_nd ($pa_config, $dbh);
# Set the status of unknown modules
pandora_module_unknown ($pa_config, $dbh);
# Set event storm protection
pandora_set_event_storm_protection (pandora_get_tconfig_token ($dbh, 'event_storm_protection', 0));
}
# TASKS EXECUTED EVERY 60 SECONDS (High latency tasks)
# ----------------------------------------------------
if (($counter % 60) == 0) {
# Downtimes are executed only 30 x Server Threshold secs
pandora_planned_downtime ($pa_config, $dbh);
# Realtime stats (Only master server!) - ( VERY HEAVY !)
# Realtimestats == 1, generated by WEB Console, not by server!
if (defined($pa_config->{"realtimestats"}) && $pa_config->{"realtimestats"} == 0){
# Check if I need to refresh stats
my $last_execution_stats = get_db_value ($dbh, "SELECT MAX(utimestamp) FROM tgroup_stat");
if (!defined($last_execution_stats) || $last_execution_stats < (time() - $pa_config->{"stats_interval"})){
pandora_group_statistics ($pa_config, $dbh);
pandora_server_statistics ($pa_config, $dbh);
}
}
# Event auto-expiry
my $expiry_time = $pa_config->{"event_expiry_time"};
my $expiry_window = $pa_config->{"event_expiry_window"};
if ($expiry_time > 0 && $expiry_window > 0 && $expiry_window > $expiry_time) {
my $time_ref = time ();
my $expiry_limit = $time_ref - $expiry_time;
my $expiry_window = $time_ref - $expiry_window;
db_do ($dbh, 'UPDATE tevento SET estado=1, ack_utimestamp=? WHERE estado=0 AND utimestamp < ? AND utimestamp > ?', $time_ref, $expiry_limit, $expiry_window);
}
}
# Pandora self monitoring
if (defined($pa_config->{"self_monitoring"})
&& $pa_config->{"self_monitoring"} == 1
&& $counter % $pa_config->{'self_monitoring_interval'} == 0) {
pandora_self_monitoring ($pa_config, $dbh);
}
# Avoid counter overflow
if ($counter > 10000){
$counter = 0;
}
else {
$counter++;
}
}
else {
$counter++;
# Do an additional sleep if we are not the master server
sleep ($pa_config->{'server_threshold'});
}
};
@ -518,7 +525,7 @@ sub main() {
pandora_startup ();
# Start thread to execute server tasks on the master server
threads->create('pandora_server_tasks', (\%Config))->detach() if ($Config{"pandora_master"} == 1);
threads->create('pandora_server_tasks', (\%Config))->detach();
# Generate 'going up' events
foreach my $server (@Servers) {
@ -568,9 +575,11 @@ sub main() {
$server->update();
}
# Not needed. The console assumes a server is down if it has not updated its status in the last 15 minutes.
## Update fallen servers
#db_do ($DBH, "UPDATE tserver SET status = 0 WHERE keepalive < ?", strftime ("%Y-%m-%d %H:%M:%S", localtime(time() - $Config{'keepalive'})));
# Update fallen servers
db_do ($DBH, "UPDATE tserver SET status = 0 WHERE keepalive < ?", strftime ("%Y-%m-%d %H:%M:%S", localtime(time() - $Config{'keepalive'})));
# Set the master server
pandora_set_master(\%Config, $DBH);
};
# Restart on error or auto restart

@ -850,8 +850,8 @@ sub pandora_load_config {
if ($pa_config->{"pandora_check"} == 1) {
print " [*] MD5 Security enabled.\n";
}
if ($pa_config->{"pandora_master"} == 1) {
print " [*] This server is running in MASTER mode.\n";
if ($pa_config->{"pandora_master"} != 0) {
print " [*] This server is running with MASTER priority " . $pa_config->{"pandora_master"} . "\n";
}
}

@ -165,6 +165,7 @@ our @EXPORT = qw(
pandora_get_module_phone_tags
pandora_get_module_email_tags
pandora_get_os
pandora_is_master
pandora_mark_agent_for_alert_update
pandora_mark_agent_for_module_update
pandora_module_keep_alive
@ -189,6 +190,7 @@ our @EXPORT = qw(
pandora_reset_server
pandora_server_keep_alive
pandora_set_event_storm_protection
pandora_set_master
pandora_update_agent
pandora_update_agent_address
pandora_update_agent_alert_count
@ -225,6 +227,9 @@ our @AlertStatus = ('Execute the alert', 'Do not execute the alert', 'Do not exe
# Event storm protection (no alerts or events)
our $EventStormProtection :shared = 0;
# Current master server
my $Master :shared = 0;
##########################################################################
# Return the agent given the IP address.
##########################################################################
@ -4221,6 +4226,43 @@ sub pandora_self_monitoring ($$) {
close (XMLFILE);
}
##########################################################################
=head2 C<< set_master (I<$pa_config>, I<$dbh>) >>
Set the current master server.
=cut
##########################################################################
sub pandora_set_master ($$) {
my ($pa_config, $dbh) = @_;
my $current_master = get_db_value ($dbh, 'SELECT name FROM tserver
WHERE master <> 0 AND status = 1
ORDER BY master DESC LIMIT 1');
return unless defined($current_master) and ($current_master ne $Master);
logger($pa_config, "Server $current_master is the current master.", 1);
$Master = $current_master;
}
##########################################################################
=head2 C<< is_master (I<$pa_config>) >>
Returns 1 if this server is the current master, 0 otherwise.
=cut
##########################################################################
sub pandora_is_master ($) {
my ($pa_config) = @_;
if ($Master eq $pa_config->{'servername'}) {
return 1;
}
return 0;
}
##########################################################################
=head2 C<< pandora_module_unknown (I<$pa_config>, I<$dbh>) >>

@ -96,7 +96,7 @@ sub data_producer ($) {
my @rows;
my $network_filter = enterprise_hook ('get_network_filter', [$pa_config]);
if ($pa_config->{'pandora_master'} == 0) {
if (pandora_is_master($pa_config) == 0) {
@rows = get_db_rows ($dbh, 'SELECT tagente_modulo.id_agente_modulo, tagente_modulo.flag, tagente_estado.current_interval + tagente_estado.last_execution_try AS time_left, last_execution_try
FROM tagente, tagente_modulo, tagente_estado
WHERE server_name = ?

@ -96,7 +96,7 @@ sub data_producer ($) {
my @tasks;
my @rows;
if ($pa_config->{'pandora_master'} != 1) {
if (pandora_is_master($pa_config) == 0) {
@rows = get_db_rows ($dbh, 'SELECT tagente_modulo.id_agente_modulo, tagente_modulo.flag, tagente_estado.current_interval + tagente_estado.last_execution_try AS time_left, last_execution_try
FROM tagente, tagente_modulo, tagente_estado
WHERE server_name = ?

@ -92,7 +92,7 @@ sub data_producer ($) {
my @tasks;
my @rows;
if ($pa_config->{'pandora_master'} != 1) {
if (pandora_is_master($pa_config) == 0) {
@rows = get_db_rows ($dbh, 'SELECT tagente_modulo.id_agente_modulo,
tagente_modulo.flag, last_execution_try
FROM tagente, tagente_modulo, tagente_estado

@ -94,7 +94,7 @@ sub data_producer ($) {
my @tasks;
my @rows;
if ($pa_config->{'pandora_master'} != 1) {
if (pandora_is_master($pa_config) == 0) {
@rows = get_db_rows ($dbh, 'SELECT tagente_modulo.id_agente_modulo, tagente_modulo.flag, tagente_estado.current_interval + tagente_estado.last_execution_try AS time_left, last_execution_try
FROM tagente, tagente_modulo, tagente_estado
WHERE server_name = ?