From 658f0c86207187c9d549d780338b44addbe1afbe Mon Sep 17 00:00:00 2001 From: ramonn Date: Wed, 17 Apr 2013 14:58:50 +0000 Subject: [PATCH] 2013-04-17 Ramon Novoa * lib/PandoraFMS/Server.pm, bin/pandora_server: Moved server tasks to their own thread. Fixes bug #3609450. git-svn-id: https://svn.code.sf.net/p/pandora/code/trunk@8003 c3f86ba8-e40f-0410-aaad-9ba5e7f4b01f --- pandora_server/ChangeLog | 6 + pandora_server/bin/pandora_server | 190 +++++++++++++----------- pandora_server/lib/PandoraFMS/Server.pm | 6 + 3 files changed, 113 insertions(+), 89 deletions(-) diff --git a/pandora_server/ChangeLog b/pandora_server/ChangeLog index 2d595d530d..0512f69e7c 100644 --- a/pandora_server/ChangeLog +++ b/pandora_server/ChangeLog @@ -1,3 +1,9 @@ +2013-04-17 Ramon Novoa + + * lib/PandoraFMS/Server.pm, + bin/pandora_server: Moved server tasks to their own thread. Fixes + bug #3609450. + 2013-04-17 Ramon Novoa * lib/PandoraFMS/WMIServer.pm: Style fix. diff --git a/pandora_server/bin/pandora_server b/pandora_server/bin/pandora_server index cdf596b194..eae15619f9 100755 --- a/pandora_server/bin/pandora_server +++ b/pandora_server/bin/pandora_server @@ -232,6 +232,94 @@ sub pandora_stop_netflow_daemon () { return 0; } +######################################################################################## +# Additional tasks executed periodically by the Pandora FMS Server +######################################################################################## +sub pandora_server_tasks ($) { + my ($pa_config) = @_; + + # Get the console DB connection + my $dbh = db_connect ($pa_config->{'dbengine'}, $pa_config->{'dbname'}, $pa_config->{'dbhost'}, $pa_config->{'dbport'}, + $pa_config->{'dbuser'}, $pa_config->{'dbpass'}); + + my $counter = 0; + while (1) { + + # TASKS EXECUTED EVERY 5 SECONDS (Low latency tasks) + # -------------------------------------------------- + if (($counter % 5) == 0) { + + # Update forced alerts + pandora_exec_forced_alerts ($pa_config, $dbh); + } + + # TASKS EXECUTED EVERY 30 SECONDS (Mid latency tasks) + # --------------------------------------------------- + if (($counter % 30) == 0) { + + # Update module status and fired alert counts + my @agents = get_db_rows ($dbh, 'SELECT id_agente, nombre, update_module_count, update_alert_count FROM tagente WHERE disabled = 0 AND (update_module_count=1 OR update_alert_count=1)'); + foreach my $agent (@agents) { + logger ($pa_config, "Updating module status and fired alert counts for agent " . $agent->{'nombre'}, 10); + + if ($agent->{'update_module_count'} == 1) { + pandora_update_agent_module_count ($dbh, $agent->{'id_agente'}); + } + + if ($agent->{'update_alert_count'} == 1) { + pandora_update_agent_alert_count ($dbh, $agent->{'id_agente'}); + } + } + + # Keepalive module control.(very DB intensive, not run frecuently + pandora_module_keep_alive_nd ($pa_config, $dbh); + + # Set the status of unknown modules + pandora_module_unknown ($pa_config, $dbh); + + # Set event storm protection + pandora_set_event_storm_protection (pandora_get_tconfig_token ($dbh, 'event_storm_protection', 0)); + } + + # TASKS EXECUTED EVERY 60 SECONDS (High latency tasks) + # ---------------------------------------------------- + if (($counter % 60) == 0) { + # Downtimes are executed only 30 x Server Threshold secs + pandora_planned_downtime ($pa_config, $dbh); + + # Realtime stats (Only master server!) - ( VERY HEAVY !) + # Realtimestats == 1, generated by WEB Console, not by server! + if ($pa_config->{"pandora_master"} == 1 + && defined($pa_config->{"realtimestats"}) + && $pa_config->{"realtimestats"} == 0){ + + # Check if I need to refresh stats + my $last_execution_stats = get_db_value ($dbh, "SELECT MAX(utimestamp) FROM tgroup_stat"); + if (!defined($last_execution_stats) || $last_execution_stats < (time() - $pa_config->{"stats_interval"})){ + pandora_group_statistics ($pa_config, $dbh); + pandora_server_statistics ($pa_config, $dbh); + } + } + + # Pandora self monitoring + if (defined($pa_config->{"self_monitoring"}) + && $pa_config->{"self_monitoring"} == 1){ + pandora_self_monitoring ($pa_config, $dbh); + } + } + + # Avoid counter overflow + if ($counter > 10000){ + $counter = 0; + } + else { + $counter++; + } + + sleep (1); + } +} + $SIG{'TERM'} = 'pandora_shutdown'; $SIG{'INT'} = 'pandora_shutdown'; @@ -259,18 +347,21 @@ if (enterprise_load (\%Config) == 0) { if($Config{'policy_manager'} == 1) { # Start thread to patrol policy queue - my $thr_policy_queue = threads->create('pandora_process_policy_queue', (\%Config)); + threads->create('pandora_process_policy_queue', (\%Config))->detach(); } if($Config{'event_replication'} == 1) { # Start thread to process event replication - my $thr_event_replication = threads->create('pandora_process_event_replication', (\%Config)); + threads->create('pandora_process_event_replication', (\%Config))->detach(); } } # Start the servers pandora_startup (); +# Start thread to execute server tasks +threads->create('pandora_server_tasks', (\%Config))->detach(); + # Generate 'going up' events foreach my $server (@Servers) { $server->upEvent (); @@ -278,89 +369,19 @@ foreach my $server (@Servers) { # Main loop my $time_ref = time (); -my $counter = 0; - while (1) { eval { - # TASKS DONE EACH 5 SECONDS (Low latency tasks) - # --------------------------------------------- - # Server status update each 5 seconds - # Neightbourhood problem detection each 5 seconds - # Forced alerts each 5 seconds as well - if (($counter % 5) == 0) { - - # Update server status - foreach my $server (@Servers) { - die ($server->getErrStr ()) unless ($server->checkThreads () == 1); - $server->update (); - } - - # Update fallen servers - db_do ($DBH, "UPDATE tserver SET status = 0 WHERE keepalive < ?", strftime ("%Y-%m-%d %H:%M:%S", localtime(time() - $Config{'keepalive'}))); - - # Update forced alerts - pandora_exec_forced_alerts (\%Config, $DBH); - + # Update server status + foreach my $server (@Servers) { + die ($server->getErrStr ()) unless ($server->checkThreads () == 1); + $server->update(); } - - # TASKS DONE EACH 30 SECONDS (Med latency tasks) - # --------------------------------------------- - # Tasks executed only each Server Threshold x 30 secs, for low-priority tasks - if (($counter % 30) == 0) { - # Update module status and fired alert counts - my @agents = get_db_rows ($DBH, 'SELECT id_agente, nombre, update_module_count, update_alert_count FROM tagente WHERE disabled = 0 AND (update_module_count=1 OR update_alert_count=1)'); - foreach my $agent (@agents) { - logger (\%Config, "Updating module status and alert fired counts for agent " . $agent->{'nombre'}, 10); - - if ($agent->{'update_module_count'} == 1) { - pandora_update_agent_module_count ($DBH, $agent->{'id_agente'}); - } - - if ($agent->{'update_alert_count'} == 1) { - pandora_update_agent_alert_count ($DBH, $agent->{'id_agente'}); - } - - } - - # Keepalive module control.(very DB intensive, not run frecuently - pandora_module_keep_alive_nd (\%Config, $DBH); - - # Set the status of unknown modules - pandora_module_unknown (\%Config, $DBH); - - # Set event storm protection - pandora_set_event_storm_protection (pandora_get_tconfig_token ($DBH, 'event_storm_protection', 0)); - } - - # TASKS DONE EACH 60 SECONDS (Low latency tasks) - # --------------------------------------------- - if (($counter % 60) == 0) { - # Downtimes are executed only 30 x Server Threshold secs - pandora_planned_downtime (\%Config, $DBH); - - # Realtime stats (Only master server!) - ( VERY HEAVY !) - # Realtimestats == 1, generated by WEB Console, not by server! - if ($Config{"pandora_master"} == 1 - && defined($Config{"realtimestats"}) - && $Config{"realtimestats"} == 0){ - - # Check if I need to refresh stats - my $last_execution_stats = get_db_value ($DBH, "SELECT MAX(utimestamp) FROM tgroup_stat"); - if (!defined($last_execution_stats) || $last_execution_stats < (time() - $Config{"stats_interval"})){ - pandora_group_statistics (\%Config, $DBH); - pandora_server_statistics (\%Config, $DBH); - } - } - - # Pandora self monitoring - if (defined($Config{"self_monitoring"}) - && $Config{"self_monitoring"} == 1){ - pandora_self_monitoring (\%Config, $DBH); - } - } + # Not needed. The console assumes a server is down if it has not updated its status in the last 15 minutes. + ## Update fallen servers + #db_do ($DBH, "UPDATE tserver SET status = 0 WHERE keepalive < ?", strftime ("%Y-%m-%d %H:%M:%S", localtime(time() - $Config{'keepalive'}))); }; # Restart on error or auto restart @@ -394,15 +415,6 @@ while (1) { close (OLDOUT); } - # Avoid counter overflow - if ($counter > 10000){ - $counter = 0; - } - else { - $counter++; - } - threads->yield; - sleep (1); - + sleep ($Config{'server_threshold'}); } diff --git a/pandora_server/lib/PandoraFMS/Server.pm b/pandora_server/lib/PandoraFMS/Server.pm index 3c04845e12..3a239908cf 100644 --- a/pandora_server/lib/PandoraFMS/Server.pm +++ b/pandora_server/lib/PandoraFMS/Server.pm @@ -215,6 +215,12 @@ sub checkThreads ($) { foreach my $tid (@{$self->{'_threads'}}) { my $thr = threads->object ($tid); + + # May happen when the server is killed + if (! defined ($thr)) { + next; + } + return 1 unless $thr->can ('is_running'); return 0 unless $thr->is_running (); }