Improve pandora_server_tasks.
* Do not let pandora_server_tasks die. * Remove a query from self_monitoring that could freeze the pandora_server_tasks thread.
This commit is contained in:
parent
a200362ffe
commit
5106e658c3
|
@ -387,112 +387,114 @@ sub pandora_server_tasks ($) {
|
|||
my $counter = 0;
|
||||
my $first_run = 1;
|
||||
while ($THRRUN == 1) {
|
||||
if (pandora_is_master($pa_config) == 1) {
|
||||
eval {
|
||||
if (pandora_is_master($pa_config) == 1) {
|
||||
|
||||
# TASKS EXECUTED ONCE
|
||||
# -------------------
|
||||
if ($first_run == 1) {
|
||||
$first_run = 0;
|
||||
# TASKS EXECUTED ONCE
|
||||
# -------------------
|
||||
if ($first_run == 1) {
|
||||
$first_run = 0;
|
||||
|
||||
# Update the agent cache.
|
||||
enterprise_hook('update_agent_cache', [\%Config]);
|
||||
}
|
||||
|
||||
# TASKS EXECUTED EVERY 5 SECONDS (Low latency tasks)
|
||||
# --------------------------------------------------
|
||||
if (($counter % 5) == 0) {
|
||||
|
||||
# Update forced alerts
|
||||
pandora_exec_forced_alerts ($pa_config, $dbh);
|
||||
|
||||
my @agents = get_db_rows ($dbh, 'SELECT id_agente, update_alert_count FROM tagente WHERE update_alert_count=1');
|
||||
foreach my $agent (@agents) {
|
||||
if ($agent->{'update_alert_count'} == 1) {
|
||||
pandora_update_agent_alert_count ($pa_config, $dbh, $agent->{'id_agente'});
|
||||
}
|
||||
# Update the agent cache.
|
||||
enterprise_hook('update_agent_cache', [\%Config]);
|
||||
}
|
||||
}
|
||||
|
||||
# TASKS EXECUTED EVERY 30 SECONDS (Mid latency tasks)
|
||||
# ---------------------------------------------------
|
||||
if (($counter % 30) == 0) {
|
||||
# TASKS EXECUTED EVERY 5 SECONDS (Low latency tasks)
|
||||
# --------------------------------------------------
|
||||
if (($counter % 5) == 0) {
|
||||
|
||||
# Update module status and fired alert counts
|
||||
my @agents = get_db_rows ($dbh, 'SELECT id_agente, nombre, update_module_count, update_secondary_groups FROM tagente WHERE (update_module_count=1 OR update_secondary_groups=1)');
|
||||
foreach my $agent (@agents) {
|
||||
logger ($pa_config, "Updating module status and fired alert counts for agent " . $agent->{'nombre'}, 10);
|
||||
# Update forced alerts
|
||||
pandora_exec_forced_alerts ($pa_config, $dbh);
|
||||
|
||||
if ($agent->{'update_module_count'} == 1) {
|
||||
pandora_update_agent_module_count ($pa_config, $dbh, $agent->{'id_agente'});
|
||||
}
|
||||
|
||||
if ($agent->{'update_secondary_groups'} == 1) {
|
||||
pandora_update_secondary_groups_cache ($pa_config, $dbh, $agent->{'id_agente'});
|
||||
my @agents = get_db_rows ($dbh, 'SELECT id_agente, update_alert_count FROM tagente WHERE update_alert_count=1');
|
||||
foreach my $agent (@agents) {
|
||||
if ($agent->{'update_alert_count'} == 1) {
|
||||
pandora_update_agent_alert_count ($pa_config, $dbh, $agent->{'id_agente'});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Keepalive module control.(very DB intensive, not run frecuently
|
||||
pandora_module_keep_alive_nd ($pa_config, $dbh);
|
||||
|
||||
# Set the status of unknown modules
|
||||
pandora_module_unknown ($pa_config, $dbh);
|
||||
|
||||
# Check if an autodisabled agent needs to be autodisable
|
||||
pandora_disable_autodisable_agents ($pa_config, $dbh);
|
||||
}
|
||||
|
||||
# TASKS EXECUTED EVERY 60 SECONDS (High latency tasks)
|
||||
# ----------------------------------------------------
|
||||
if (($counter % 60) == 0) {
|
||||
# Downtimes are executed only 30 x Server Threshold secs
|
||||
pandora_planned_downtime ($pa_config, $dbh);
|
||||
|
||||
# Realtime stats (Only master server!) - ( VERY HEAVY !)
|
||||
# Realtimestats == 1, generated by WEB Console, not by server!
|
||||
if (defined($pa_config->{"realtimestats"}) && $pa_config->{"realtimestats"} == 0){
|
||||
# TASKS EXECUTED EVERY 30 SECONDS (Mid latency tasks)
|
||||
# ---------------------------------------------------
|
||||
if (($counter % 30) == 0) {
|
||||
|
||||
# Update module status and fired alert counts
|
||||
my @agents = get_db_rows ($dbh, 'SELECT id_agente, nombre, update_module_count, update_secondary_groups FROM tagente WHERE (update_module_count=1 OR update_secondary_groups=1)');
|
||||
foreach my $agent (@agents) {
|
||||
logger ($pa_config, "Updating module status and fired alert counts for agent " . $agent->{'nombre'}, 10);
|
||||
|
||||
if ($agent->{'update_module_count'} == 1) {
|
||||
pandora_update_agent_module_count ($pa_config, $dbh, $agent->{'id_agente'});
|
||||
}
|
||||
|
||||
if ($agent->{'update_secondary_groups'} == 1) {
|
||||
pandora_update_secondary_groups_cache ($pa_config, $dbh, $agent->{'id_agente'});
|
||||
}
|
||||
}
|
||||
|
||||
# Keepalive module control.(very DB intensive, not run frecuently
|
||||
pandora_module_keep_alive_nd ($pa_config, $dbh);
|
||||
|
||||
# Check if I need to refresh stats
|
||||
my $last_execution_stats = get_db_value ($dbh, "SELECT MAX(utimestamp) FROM tgroup_stat");
|
||||
if (!defined($last_execution_stats) || $last_execution_stats < (time() - $pa_config->{"stats_interval"})){
|
||||
pandora_group_statistics ($pa_config, $dbh);
|
||||
pandora_server_statistics ($pa_config, $dbh);
|
||||
}
|
||||
# Set the status of unknown modules
|
||||
pandora_module_unknown ($pa_config, $dbh);
|
||||
|
||||
# Check if an autodisabled agent needs to be autodisable
|
||||
pandora_disable_autodisable_agents ($pa_config, $dbh);
|
||||
}
|
||||
|
||||
# Check if snmptrapd is freeze.
|
||||
pandora_snmptrapd_still_working ($pa_config, $dbh);
|
||||
# TASKS EXECUTED EVERY 60 SECONDS (High latency tasks)
|
||||
# ----------------------------------------------------
|
||||
if (($counter % 60) == 0) {
|
||||
# Downtimes are executed only 30 x Server Threshold secs
|
||||
pandora_planned_downtime ($pa_config, $dbh);
|
||||
|
||||
# Realtime stats (Only master server!) - ( VERY HEAVY !)
|
||||
# Realtimestats == 1, generated by WEB Console, not by server!
|
||||
if (defined($pa_config->{"realtimestats"}) && $pa_config->{"realtimestats"} == 0){
|
||||
|
||||
# Check if I need to refresh stats
|
||||
my $last_execution_stats = get_db_value ($dbh, "SELECT MAX(utimestamp) FROM tgroup_stat");
|
||||
if (!defined($last_execution_stats) || $last_execution_stats < (time() - $pa_config->{"stats_interval"})){
|
||||
pandora_group_statistics ($pa_config, $dbh);
|
||||
pandora_server_statistics ($pa_config, $dbh);
|
||||
}
|
||||
}
|
||||
|
||||
# Check if snmptrapd is freeze.
|
||||
pandora_snmptrapd_still_working ($pa_config, $dbh);
|
||||
|
||||
# Event auto-expiry
|
||||
my $expiry_time = $pa_config->{"event_expiry_time"};
|
||||
my $expiry_window = $pa_config->{"event_expiry_window"};
|
||||
if ($expiry_time > 0 && $expiry_window > 0 && $expiry_window > $expiry_time) {
|
||||
my $time_ref = time ();
|
||||
my $expiry_limit = $time_ref - $expiry_time;
|
||||
my $expiry_window = $time_ref - $expiry_window;
|
||||
db_do ($dbh, 'UPDATE tevento SET estado=1, ack_utimestamp=? WHERE estado=0 AND utimestamp < ? AND utimestamp > ?', $time_ref, $expiry_limit, $expiry_window);
|
||||
# Event auto-expiry
|
||||
my $expiry_time = $pa_config->{"event_expiry_time"};
|
||||
my $expiry_window = $pa_config->{"event_expiry_window"};
|
||||
if ($expiry_time > 0 && $expiry_window > 0 && $expiry_window > $expiry_time) {
|
||||
my $time_ref = time ();
|
||||
my $expiry_limit = $time_ref - $expiry_time;
|
||||
my $expiry_window = $time_ref - $expiry_window;
|
||||
db_do ($dbh, 'UPDATE tevento SET estado=1, ack_utimestamp=? WHERE estado=0 AND utimestamp < ? AND utimestamp > ?', $time_ref, $expiry_limit, $expiry_window);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# COMMON TASKS (master and non-master)
|
||||
# ---------------------------------------------------------------
|
||||
if (($counter % 30) == 0) {
|
||||
# Update configuration options from the console.
|
||||
pandora_get_sharedconfig ($pa_config, $dbh);
|
||||
# COMMON TASKS (master and non-master)
|
||||
# ---------------------------------------------------------------
|
||||
if (($counter % 30) == 0) {
|
||||
# Update configuration options from the console.
|
||||
pandora_get_sharedconfig ($pa_config, $dbh);
|
||||
|
||||
# Rotate the log file.
|
||||
pandora_rotate_logfile($pa_config);
|
||||
|
||||
# Set event storm protection
|
||||
pandora_set_event_storm_protection (pandora_get_tconfig_token ($dbh, 'event_storm_protection', 0));
|
||||
}
|
||||
# Pandora self monitoring
|
||||
if (defined($pa_config->{"self_monitoring"})
|
||||
&& $pa_config->{"self_monitoring"} == 1
|
||||
&& !is_metaconsole($pa_config)
|
||||
&& $counter % $pa_config->{'self_monitoring_interval'} == 0) {
|
||||
pandora_self_monitoring ($pa_config, $dbh);
|
||||
}
|
||||
# Rotate the log file.
|
||||
pandora_rotate_logfile($pa_config);
|
||||
|
||||
# Set event storm protection
|
||||
pandora_set_event_storm_protection (pandora_get_tconfig_token ($dbh, 'event_storm_protection', 0));
|
||||
}
|
||||
# Pandora self monitoring
|
||||
if (defined($pa_config->{"self_monitoring"})
|
||||
&& $pa_config->{"self_monitoring"} == 1
|
||||
&& !is_metaconsole($pa_config)
|
||||
&& $counter % $pa_config->{'self_monitoring_interval'} == 0) {
|
||||
pandora_self_monitoring ($pa_config, $dbh);
|
||||
}
|
||||
};
|
||||
|
||||
# Avoid counter overflow
|
||||
if ($counter >= ~0){
|
||||
|
|
|
@ -6005,10 +6005,6 @@ sub pandora_self_monitoring ($$) {
|
|||
$pandoradb = 1;
|
||||
}
|
||||
|
||||
my $start_performance = time;
|
||||
get_db_value($dbh, "SELECT COUNT(*) FROM tagente_datos");
|
||||
my $read_speed = int((time - $start_performance) * 1e6);
|
||||
|
||||
my $elasticsearch_perfomance = enterprise_hook("elasticsearch_performance", [$pa_config, $dbh]);
|
||||
|
||||
$xml_output .= $elasticsearch_perfomance if defined($elasticsearch_perfomance);
|
||||
|
@ -6055,13 +6051,6 @@ sub pandora_self_monitoring ($$) {
|
|||
$xml_output .=" </module>";
|
||||
}
|
||||
|
||||
$xml_output .=" <module>";
|
||||
$xml_output .=" <name>Execution_Time</name>";
|
||||
$xml_output .=" <type>generic_data</type>";
|
||||
$xml_output .=" <unit>us</unit>";
|
||||
$xml_output .=" <data>$read_speed</data>";
|
||||
$xml_output .=" </module>";
|
||||
|
||||
$xml_output .= "</agent_data>";
|
||||
|
||||
my $filename = $pa_config->{"incomingdir"}."/".$pa_config->{'servername'}.".self.".$utimestamp.".data";
|
||||
|
|
Loading…
Reference in New Issue