Improve pandora_server_tasks.

* Do not let pandora_server_tasks die.
* Remove a query from self_monitoring that could freeze the
  pandora_server_tasks thread.
This commit is contained in:
Ramon Novoa 2023-02-02 11:49:09 +01:00
parent a200362ffe
commit 5106e658c3
2 changed files with 91 additions and 100 deletions

View File

@ -387,112 +387,114 @@ sub pandora_server_tasks ($) {
my $counter = 0;
my $first_run = 1;
while ($THRRUN == 1) {
if (pandora_is_master($pa_config) == 1) {
eval {
if (pandora_is_master($pa_config) == 1) {
# TASKS EXECUTED ONCE
# -------------------
if ($first_run == 1) {
$first_run = 0;
# TASKS EXECUTED ONCE
# -------------------
if ($first_run == 1) {
$first_run = 0;
# Update the agent cache.
enterprise_hook('update_agent_cache', [\%Config]);
}
# TASKS EXECUTED EVERY 5 SECONDS (Low latency tasks)
# --------------------------------------------------
if (($counter % 5) == 0) {
# Update forced alerts
pandora_exec_forced_alerts ($pa_config, $dbh);
my @agents = get_db_rows ($dbh, 'SELECT id_agente, update_alert_count FROM tagente WHERE update_alert_count=1');
foreach my $agent (@agents) {
if ($agent->{'update_alert_count'} == 1) {
pandora_update_agent_alert_count ($pa_config, $dbh, $agent->{'id_agente'});
}
# Update the agent cache.
enterprise_hook('update_agent_cache', [\%Config]);
}
}
# TASKS EXECUTED EVERY 30 SECONDS (Mid latency tasks)
# ---------------------------------------------------
if (($counter % 30) == 0) {
# TASKS EXECUTED EVERY 5 SECONDS (Low latency tasks)
# --------------------------------------------------
if (($counter % 5) == 0) {
# Update module status and fired alert counts
my @agents = get_db_rows ($dbh, 'SELECT id_agente, nombre, update_module_count, update_secondary_groups FROM tagente WHERE (update_module_count=1 OR update_secondary_groups=1)');
foreach my $agent (@agents) {
logger ($pa_config, "Updating module status and fired alert counts for agent " . $agent->{'nombre'}, 10);
# Update forced alerts
pandora_exec_forced_alerts ($pa_config, $dbh);
if ($agent->{'update_module_count'} == 1) {
pandora_update_agent_module_count ($pa_config, $dbh, $agent->{'id_agente'});
}
if ($agent->{'update_secondary_groups'} == 1) {
pandora_update_secondary_groups_cache ($pa_config, $dbh, $agent->{'id_agente'});
my @agents = get_db_rows ($dbh, 'SELECT id_agente, update_alert_count FROM tagente WHERE update_alert_count=1');
foreach my $agent (@agents) {
if ($agent->{'update_alert_count'} == 1) {
pandora_update_agent_alert_count ($pa_config, $dbh, $agent->{'id_agente'});
}
}
}
# Keepalive module control.(very DB intensive, not run frecuently
pandora_module_keep_alive_nd ($pa_config, $dbh);
# Set the status of unknown modules
pandora_module_unknown ($pa_config, $dbh);
# Check if an autodisabled agent needs to be autodisable
pandora_disable_autodisable_agents ($pa_config, $dbh);
}
# TASKS EXECUTED EVERY 60 SECONDS (High latency tasks)
# ----------------------------------------------------
if (($counter % 60) == 0) {
# Downtimes are executed only 30 x Server Threshold secs
pandora_planned_downtime ($pa_config, $dbh);
# Realtime stats (Only master server!) - ( VERY HEAVY !)
# Realtimestats == 1, generated by WEB Console, not by server!
if (defined($pa_config->{"realtimestats"}) && $pa_config->{"realtimestats"} == 0){
# TASKS EXECUTED EVERY 30 SECONDS (Mid latency tasks)
# ---------------------------------------------------
if (($counter % 30) == 0) {
# Update module status and fired alert counts
my @agents = get_db_rows ($dbh, 'SELECT id_agente, nombre, update_module_count, update_secondary_groups FROM tagente WHERE (update_module_count=1 OR update_secondary_groups=1)');
foreach my $agent (@agents) {
logger ($pa_config, "Updating module status and fired alert counts for agent " . $agent->{'nombre'}, 10);
if ($agent->{'update_module_count'} == 1) {
pandora_update_agent_module_count ($pa_config, $dbh, $agent->{'id_agente'});
}
if ($agent->{'update_secondary_groups'} == 1) {
pandora_update_secondary_groups_cache ($pa_config, $dbh, $agent->{'id_agente'});
}
}
# Keepalive module control.(very DB intensive, not run frecuently
pandora_module_keep_alive_nd ($pa_config, $dbh);
# Check if I need to refresh stats
my $last_execution_stats = get_db_value ($dbh, "SELECT MAX(utimestamp) FROM tgroup_stat");
if (!defined($last_execution_stats) || $last_execution_stats < (time() - $pa_config->{"stats_interval"})){
pandora_group_statistics ($pa_config, $dbh);
pandora_server_statistics ($pa_config, $dbh);
}
# Set the status of unknown modules
pandora_module_unknown ($pa_config, $dbh);
# Check if an autodisabled agent needs to be autodisable
pandora_disable_autodisable_agents ($pa_config, $dbh);
}
# Check if snmptrapd is freeze.
pandora_snmptrapd_still_working ($pa_config, $dbh);
# TASKS EXECUTED EVERY 60 SECONDS (High latency tasks)
# ----------------------------------------------------
if (($counter % 60) == 0) {
# Downtimes are executed only 30 x Server Threshold secs
pandora_planned_downtime ($pa_config, $dbh);
# Realtime stats (Only master server!) - ( VERY HEAVY !)
# Realtimestats == 1, generated by WEB Console, not by server!
if (defined($pa_config->{"realtimestats"}) && $pa_config->{"realtimestats"} == 0){
# Check if I need to refresh stats
my $last_execution_stats = get_db_value ($dbh, "SELECT MAX(utimestamp) FROM tgroup_stat");
if (!defined($last_execution_stats) || $last_execution_stats < (time() - $pa_config->{"stats_interval"})){
pandora_group_statistics ($pa_config, $dbh);
pandora_server_statistics ($pa_config, $dbh);
}
}
# Check if snmptrapd is freeze.
pandora_snmptrapd_still_working ($pa_config, $dbh);
# Event auto-expiry
my $expiry_time = $pa_config->{"event_expiry_time"};
my $expiry_window = $pa_config->{"event_expiry_window"};
if ($expiry_time > 0 && $expiry_window > 0 && $expiry_window > $expiry_time) {
my $time_ref = time ();
my $expiry_limit = $time_ref - $expiry_time;
my $expiry_window = $time_ref - $expiry_window;
db_do ($dbh, 'UPDATE tevento SET estado=1, ack_utimestamp=? WHERE estado=0 AND utimestamp < ? AND utimestamp > ?', $time_ref, $expiry_limit, $expiry_window);
# Event auto-expiry
my $expiry_time = $pa_config->{"event_expiry_time"};
my $expiry_window = $pa_config->{"event_expiry_window"};
if ($expiry_time > 0 && $expiry_window > 0 && $expiry_window > $expiry_time) {
my $time_ref = time ();
my $expiry_limit = $time_ref - $expiry_time;
my $expiry_window = $time_ref - $expiry_window;
db_do ($dbh, 'UPDATE tevento SET estado=1, ack_utimestamp=? WHERE estado=0 AND utimestamp < ? AND utimestamp > ?', $time_ref, $expiry_limit, $expiry_window);
}
}
}
}
# COMMON TASKS (master and non-master)
# ---------------------------------------------------------------
if (($counter % 30) == 0) {
# Update configuration options from the console.
pandora_get_sharedconfig ($pa_config, $dbh);
# COMMON TASKS (master and non-master)
# ---------------------------------------------------------------
if (($counter % 30) == 0) {
# Update configuration options from the console.
pandora_get_sharedconfig ($pa_config, $dbh);
# Rotate the log file.
pandora_rotate_logfile($pa_config);
# Set event storm protection
pandora_set_event_storm_protection (pandora_get_tconfig_token ($dbh, 'event_storm_protection', 0));
}
# Pandora self monitoring
if (defined($pa_config->{"self_monitoring"})
&& $pa_config->{"self_monitoring"} == 1
&& !is_metaconsole($pa_config)
&& $counter % $pa_config->{'self_monitoring_interval'} == 0) {
pandora_self_monitoring ($pa_config, $dbh);
}
# Rotate the log file.
pandora_rotate_logfile($pa_config);
# Set event storm protection
pandora_set_event_storm_protection (pandora_get_tconfig_token ($dbh, 'event_storm_protection', 0));
}
# Pandora self monitoring
if (defined($pa_config->{"self_monitoring"})
&& $pa_config->{"self_monitoring"} == 1
&& !is_metaconsole($pa_config)
&& $counter % $pa_config->{'self_monitoring_interval'} == 0) {
pandora_self_monitoring ($pa_config, $dbh);
}
};
# Avoid counter overflow
if ($counter >= ~0){

View File

@ -6005,10 +6005,6 @@ sub pandora_self_monitoring ($$) {
$pandoradb = 1;
}
my $start_performance = time;
get_db_value($dbh, "SELECT COUNT(*) FROM tagente_datos");
my $read_speed = int((time - $start_performance) * 1e6);
my $elasticsearch_perfomance = enterprise_hook("elasticsearch_performance", [$pa_config, $dbh]);
$xml_output .= $elasticsearch_perfomance if defined($elasticsearch_perfomance);
@ -6055,13 +6051,6 @@ sub pandora_self_monitoring ($$) {
$xml_output .=" </module>";
}
$xml_output .=" <module>";
$xml_output .=" <name>Execution_Time</name>";
$xml_output .=" <type>generic_data</type>";
$xml_output .=" <unit>us</unit>";
$xml_output .=" <data>$read_speed</data>";
$xml_output .=" </module>";
$xml_output .= "</agent_data>";
my $filename = $pa_config->{"incomingdir"}."/".$pa_config->{'servername'}.".self.".$utimestamp.".data";