Check and restart server threads if necessary.
Ref pandora_enterprise#2593.
This commit is contained in:
parent
506beefca4
commit
9a9956296f
|
@ -101,8 +101,21 @@ sub pandora_startup () {
|
|||
# Generate the encryption key after reading the passphrase.
|
||||
$Config{"encryption_key"} = enterprise_hook('pandora_get_encryption_key', [\%Config, $Config{"encryption_passphrase"}]);
|
||||
|
||||
# Update the agent cache.
|
||||
threads->create('enterprise_hook', ('update_agent_cache', [\%Config]))->detach() if ($Config{'node_metaconsole'} == 1);
|
||||
# Kill any running server threads.
|
||||
stop_server_threads();
|
||||
|
||||
# Start the task execution thread.
|
||||
start_server_thread(\&pandora_server_tasks, [\%Config]);
|
||||
|
||||
# Start the policy queue thread.
|
||||
start_server_thread(\&pandora_process_policy_queue, [\%Config]) if ($Config{'__enterprise_enabled'} == 1 && $Config{'policy_manager'} == 1);
|
||||
|
||||
# Start the event replication thread. Do not start with start_server_thread, this thread may exit on its own.
|
||||
threads->create(\&pandora_process_event_replication, [\%Config]) if($Config{'__enterprise_enabled'} == 1 && $Config{'event_replication'} == 1);
|
||||
|
||||
# Update the agent cache. Do not start with start_server_thread, this thread updates the agent cache and exits.
|
||||
threads->create(\&enterprise_hook, ['update_agent_cache', [\%Config]])->detach() if ($Config{'node_metaconsole'} == 1);
|
||||
|
||||
pandora_audit (\%Config, $Config{'rb_product_name'} . ' Server Daemon starting', 'SYSTEM', 'System', $DBH);
|
||||
|
||||
# Load servers
|
||||
|
@ -143,9 +156,11 @@ sub pandora_restart (;$) {
|
|||
my $sleep_time = @_ > 0 ? $_[0] : $Config{'restart_delay'};
|
||||
|
||||
# Stop the servers
|
||||
foreach my $server (@Servers) {
|
||||
$server->stop ();
|
||||
}
|
||||
eval {
|
||||
foreach my $server (@Servers) {
|
||||
$server->stop ();
|
||||
}
|
||||
};
|
||||
|
||||
# Remove the servers
|
||||
while (pop (@Servers)) {};
|
||||
|
@ -274,111 +289,110 @@ sub pandora_server_tasks ($) {
|
|||
# Get the console DB connection
|
||||
my $dbh = db_connect ($pa_config->{'dbengine'}, $pa_config->{'dbname'}, $pa_config->{'dbhost'}, $pa_config->{'dbport'},
|
||||
$pa_config->{'dbuser'}, $pa_config->{'dbpass'});
|
||||
|
||||
my $counter = 0;
|
||||
while ($RUN == 1) {
|
||||
eval{
|
||||
if (pandora_is_master($pa_config) == 1) {
|
||||
while ($THRRUN == 1) {
|
||||
if (pandora_is_master($pa_config) == 1) {
|
||||
|
||||
# TASKS EXECUTED EVERY 5 SECONDS (Low latency tasks)
|
||||
# --------------------------------------------------
|
||||
if (($counter % 5) == 0) {
|
||||
# TASKS EXECUTED EVERY 5 SECONDS (Low latency tasks)
|
||||
# --------------------------------------------------
|
||||
if (($counter % 5) == 0) {
|
||||
|
||||
# Update forced alerts
|
||||
pandora_exec_forced_alerts ($pa_config, $dbh);
|
||||
}
|
||||
|
||||
# TASKS EXECUTED EVERY 30 SECONDS (Mid latency tasks)
|
||||
# ---------------------------------------------------
|
||||
if (($counter % 30) == 0) {
|
||||
|
||||
# Update module status and fired alert counts
|
||||
my @agents = get_db_rows ($dbh, 'SELECT id_agente, nombre, update_module_count, update_alert_count, update_secondary_groups FROM tagente WHERE disabled = 0 AND (update_module_count=1 OR update_alert_count=1 OR update_secondary_groups=1)');
|
||||
foreach my $agent (@agents) {
|
||||
logger ($pa_config, "Updating module status and fired alert counts for agent " . $agent->{'nombre'}, 10);
|
||||
|
||||
if ($agent->{'update_module_count'} == 1) {
|
||||
pandora_update_agent_module_count ($pa_config, $dbh, $agent->{'id_agente'});
|
||||
}
|
||||
|
||||
if ($agent->{'update_alert_count'} == 1) {
|
||||
pandora_update_agent_alert_count ($pa_config, $dbh, $agent->{'id_agente'});
|
||||
}
|
||||
|
||||
if ($agent->{'update_secondary_groups'} == 1) {
|
||||
pandora_update_secondary_groups_cache ($pa_config, $dbh, $agent->{'id_agente'});
|
||||
}
|
||||
}
|
||||
|
||||
# Keepalive module control.(very DB intensive, not run frecuently
|
||||
pandora_module_keep_alive_nd ($pa_config, $dbh);
|
||||
|
||||
# Set the status of unknown modules
|
||||
pandora_module_unknown ($pa_config, $dbh);
|
||||
|
||||
# Check if an autodisabled agent needs to be autodisable
|
||||
pandora_disable_autodisable_agents ($pa_config, $dbh);
|
||||
}
|
||||
|
||||
# TASKS EXECUTED EVERY 60 SECONDS (High latency tasks)
|
||||
# ----------------------------------------------------
|
||||
if (($counter % 60) == 0) {
|
||||
# Downtimes are executed only 30 x Server Threshold secs
|
||||
pandora_planned_downtime ($pa_config, $dbh);
|
||||
|
||||
# Realtime stats (Only master server!) - ( VERY HEAVY !)
|
||||
# Realtimestats == 1, generated by WEB Console, not by server!
|
||||
if (defined($pa_config->{"realtimestats"}) && $pa_config->{"realtimestats"} == 0){
|
||||
|
||||
# Check if I need to refresh stats
|
||||
my $last_execution_stats = get_db_value ($dbh, "SELECT MAX(utimestamp) FROM tgroup_stat");
|
||||
if (!defined($last_execution_stats) || $last_execution_stats < (time() - $pa_config->{"stats_interval"})){
|
||||
pandora_group_statistics ($pa_config, $dbh);
|
||||
pandora_server_statistics ($pa_config, $dbh);
|
||||
}
|
||||
}
|
||||
|
||||
# Event auto-expiry
|
||||
my $expiry_time = $pa_config->{"event_expiry_time"};
|
||||
my $expiry_window = $pa_config->{"event_expiry_window"};
|
||||
if ($expiry_time > 0 && $expiry_window > 0 && $expiry_window > $expiry_time) {
|
||||
my $time_ref = time ();
|
||||
my $expiry_limit = $time_ref - $expiry_time;
|
||||
my $expiry_window = $time_ref - $expiry_window;
|
||||
db_do ($dbh, 'UPDATE tevento SET estado=1, ack_utimestamp=? WHERE estado=0 AND utimestamp < ? AND utimestamp > ?', $time_ref, $expiry_limit, $expiry_window);
|
||||
}
|
||||
}
|
||||
# Update forced alerts
|
||||
pandora_exec_forced_alerts ($pa_config, $dbh);
|
||||
}
|
||||
|
||||
# COMMON TASKS (master and non-master)
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
# Rotate Log File
|
||||
|
||||
# TASKS EXECUTED EVERY 30 SECONDS (Mid latency tasks)
|
||||
# ---------------------------------------------------
|
||||
if (($counter % 30) == 0) {
|
||||
pandora_rotate_logfile($pa_config);
|
||||
|
||||
# Set event storm protection
|
||||
pandora_set_event_storm_protection (pandora_get_tconfig_token ($dbh, 'event_storm_protection', 0));
|
||||
}
|
||||
# Pandora self monitoring
|
||||
if (defined($pa_config->{"self_monitoring"})
|
||||
&& $pa_config->{"self_monitoring"} == 1
|
||||
&& !is_metaconsole($pa_config)
|
||||
&& $counter % $pa_config->{'self_monitoring_interval'} == 0) {
|
||||
pandora_self_monitoring ($pa_config, $dbh);
|
||||
}
|
||||
|
||||
# Avoid counter overflow
|
||||
if ($counter > 10000){
|
||||
$counter = 0;
|
||||
# Update module status and fired alert counts
|
||||
my @agents = get_db_rows ($dbh, 'SELECT id_agente, nombre, update_module_count, update_alert_count, update_secondary_groups FROM tagente WHERE disabled = 0 AND (update_module_count=1 OR update_alert_count=1 OR update_secondary_groups=1)');
|
||||
foreach my $agent (@agents) {
|
||||
logger ($pa_config, "Updating module status and fired alert counts for agent " . $agent->{'nombre'}, 10);
|
||||
|
||||
if ($agent->{'update_module_count'} == 1) {
|
||||
pandora_update_agent_module_count ($pa_config, $dbh, $agent->{'id_agente'});
|
||||
}
|
||||
|
||||
if ($agent->{'update_alert_count'} == 1) {
|
||||
pandora_update_agent_alert_count ($pa_config, $dbh, $agent->{'id_agente'});
|
||||
}
|
||||
|
||||
if ($agent->{'update_secondary_groups'} == 1) {
|
||||
pandora_update_secondary_groups_cache ($pa_config, $dbh, $agent->{'id_agente'});
|
||||
}
|
||||
}
|
||||
|
||||
# Keepalive module control.(very DB intensive, not run frecuently
|
||||
pandora_module_keep_alive_nd ($pa_config, $dbh);
|
||||
|
||||
# Set the status of unknown modules
|
||||
pandora_module_unknown ($pa_config, $dbh);
|
||||
|
||||
# Check if an autodisabled agent needs to be autodisable
|
||||
pandora_disable_autodisable_agents ($pa_config, $dbh);
|
||||
}
|
||||
else {
|
||||
$counter++;
|
||||
|
||||
# TASKS EXECUTED EVERY 60 SECONDS (High latency tasks)
|
||||
# ----------------------------------------------------
|
||||
if (($counter % 60) == 0) {
|
||||
# Downtimes are executed only 30 x Server Threshold secs
|
||||
pandora_planned_downtime ($pa_config, $dbh);
|
||||
|
||||
# Realtime stats (Only master server!) - ( VERY HEAVY !)
|
||||
# Realtimestats == 1, generated by WEB Console, not by server!
|
||||
if (defined($pa_config->{"realtimestats"}) && $pa_config->{"realtimestats"} == 0){
|
||||
|
||||
# Check if I need to refresh stats
|
||||
my $last_execution_stats = get_db_value ($dbh, "SELECT MAX(utimestamp) FROM tgroup_stat");
|
||||
if (!defined($last_execution_stats) || $last_execution_stats < (time() - $pa_config->{"stats_interval"})){
|
||||
pandora_group_statistics ($pa_config, $dbh);
|
||||
pandora_server_statistics ($pa_config, $dbh);
|
||||
}
|
||||
}
|
||||
|
||||
# Event auto-expiry
|
||||
my $expiry_time = $pa_config->{"event_expiry_time"};
|
||||
my $expiry_window = $pa_config->{"event_expiry_window"};
|
||||
if ($expiry_time > 0 && $expiry_window > 0 && $expiry_window > $expiry_time) {
|
||||
my $time_ref = time ();
|
||||
my $expiry_limit = $time_ref - $expiry_time;
|
||||
my $expiry_window = $time_ref - $expiry_window;
|
||||
db_do ($dbh, 'UPDATE tevento SET estado=1, ack_utimestamp=? WHERE estado=0 AND utimestamp < ? AND utimestamp > ?', $time_ref, $expiry_limit, $expiry_window);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
# COMMON TASKS (master and non-master)
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
# Rotate Log File
|
||||
if (($counter % 30) == 0) {
|
||||
pandora_rotate_logfile($pa_config);
|
||||
|
||||
# Set event storm protection
|
||||
pandora_set_event_storm_protection (pandora_get_tconfig_token ($dbh, 'event_storm_protection', 0));
|
||||
}
|
||||
# Pandora self monitoring
|
||||
if (defined($pa_config->{"self_monitoring"})
|
||||
&& $pa_config->{"self_monitoring"} == 1
|
||||
&& !is_metaconsole($pa_config)
|
||||
&& $counter % $pa_config->{'self_monitoring_interval'} == 0) {
|
||||
pandora_self_monitoring ($pa_config, $dbh);
|
||||
}
|
||||
|
||||
# Avoid counter overflow
|
||||
if ($counter > 10000){
|
||||
$counter = 0;
|
||||
}
|
||||
else {
|
||||
$counter++;
|
||||
}
|
||||
|
||||
sleep (1);
|
||||
}
|
||||
|
||||
db_disconnect($dbh);
|
||||
}
|
||||
|
||||
################################################################################
|
||||
|
@ -525,21 +539,13 @@ sub main() {
|
|||
|
||||
# Load enterprise module
|
||||
if (enterprise_load (\%Config) == 0) {
|
||||
$Config{'__enterprise_enabled'} = 0;
|
||||
print_message (\%Config, " [*] Pandora FMS Enterprise module not available.", 1);
|
||||
logger (\%Config, " [*] Pandora FMS Enterprise module not available.", 1);
|
||||
} else {
|
||||
$Config{'__enterprise_enabled'} = 1;
|
||||
print_message (\%Config, " [*] " . pandora_get_initial_product_name() . " Enterprise module loaded.", 1);
|
||||
logger (\%Config, " [*] " . pandora_get_initial_product_name() . " Enterprise module loaded.", 1);
|
||||
|
||||
if($Config{'policy_manager'} == 1) {
|
||||
# Start thread to patrol policy queue
|
||||
threads->create('pandora_process_policy_queue', (\%Config))->detach();
|
||||
}
|
||||
|
||||
if($Config{'event_replication'} == 1) {
|
||||
# Start thread to process event replication
|
||||
threads->create('pandora_process_event_replication', (\%Config))->detach();
|
||||
}
|
||||
}
|
||||
|
||||
# Save the start time for warmup intervals.
|
||||
|
@ -559,9 +565,6 @@ sub main() {
|
|||
pandora_event (\%Config, "Warmup mode for events started.", 0, 0, 0, 0, 0, 'system', 0, $DBH);
|
||||
}
|
||||
|
||||
# Start thread to execute server tasks on the master server
|
||||
threads->create('pandora_server_tasks', (\%Config))->detach();
|
||||
|
||||
# Generate 'going up' events
|
||||
foreach my $server (@Servers) {
|
||||
$server->upEvent ();
|
||||
|
@ -612,6 +615,9 @@ sub main() {
|
|||
$server->update();
|
||||
}
|
||||
|
||||
# Make sure all server threads are running.
|
||||
die("Server thread crashed.") unless (check_server_threads() == 1);
|
||||
|
||||
db_do ($DBH,
|
||||
"UPDATE tserver SET status = 0
|
||||
WHERE UNIX_TIMESTAMP(now())-UNIX_TIMESTAMP(keepalive) > 2*server_keepalive"
|
||||
|
|
|
@ -4557,7 +4557,7 @@ sub pandora_process_event_replication ($) {
|
|||
|
||||
logger($pa_config, "Starting replication events process.", 1);
|
||||
|
||||
while(1) {
|
||||
while($THRRUN == 1) {
|
||||
|
||||
# If we are not the master server sleep and check again.
|
||||
if (pandora_is_master($pa_config) == 0) {
|
||||
|
@ -4569,6 +4569,8 @@ sub pandora_process_event_replication ($) {
|
|||
sleep ($replication_interval);
|
||||
enterprise_hook('pandora_replicate_copy_events',[$pa_config, $dbh, $dbh_metaconsole, $metaconsole_server_id, $replication_mode]);
|
||||
}
|
||||
|
||||
db_disconnect($dbh);
|
||||
}
|
||||
|
||||
##########################################################################
|
||||
|
@ -4588,7 +4590,7 @@ sub pandora_process_policy_queue ($) {
|
|||
|
||||
logger($pa_config, "Starting policy queue patrol process.", 1);
|
||||
|
||||
while(1) {
|
||||
while($THRRUN == 1) {
|
||||
|
||||
# If we are not the master server sleep and check again.
|
||||
if (pandora_is_master($pa_config) == 0) {
|
||||
|
@ -4615,7 +4617,9 @@ sub pandora_process_policy_queue ($) {
|
|||
}
|
||||
|
||||
enterprise_hook('pandora_finish_queue_operation', [$dbh, $operation->{'id'}]);
|
||||
}
|
||||
}
|
||||
|
||||
db_disconnect($dbh);
|
||||
}
|
||||
|
||||
##########################################################################
|
||||
|
|
|
@ -77,6 +77,7 @@ our @EXPORT = qw(
|
|||
MODULE_WARNING
|
||||
MODULE_UNKNOWN
|
||||
MODULE_NOTINIT
|
||||
$THRRUN
|
||||
api_call_url
|
||||
cron_get_closest_in_range
|
||||
cron_next_execution
|
||||
|
@ -115,6 +116,9 @@ our @EXPORT = qw(
|
|||
valid_regex
|
||||
set_file_permissions
|
||||
uri_encode
|
||||
check_server_threads
|
||||
start_server_thread
|
||||
stop_server_threads
|
||||
);
|
||||
|
||||
# ID of the different servers
|
||||
|
@ -307,6 +311,12 @@ while (my ($ent, $chr) = each(%ENT2CHR)) {
|
|||
$CHR2ENT{$chr} = "&" . $ent . ";";
|
||||
}
|
||||
|
||||
# Threads started by the Pandora FMS Server.
|
||||
my @ServerThreads;
|
||||
|
||||
# Keep threads running.
|
||||
our $THRRUN :shared = 1;
|
||||
|
||||
###############################################################################
|
||||
# Sets user:group owner for the given file
|
||||
###############################################################################
|
||||
|
@ -1740,6 +1750,48 @@ sub api_call_url {
|
|||
return undef;
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Start a server thread and keep track of it.
|
||||
################################################################################
|
||||
sub start_server_thread {
|
||||
my ($fn, $args) = @_;
|
||||
|
||||
# Signal the threads to run.
|
||||
$THRRUN = 1;
|
||||
|
||||
my $thr = threads->create($fn, @{$args});
|
||||
push(@ServerThreads, $thr);
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Check the status of server threads. Returns 1 if all all running, 0 otherwise.
|
||||
################################################################################
|
||||
sub check_server_threads {
|
||||
my ($fn, $args) = @_;
|
||||
|
||||
foreach my $thr (@ServerThreads) {
|
||||
return 0 unless $thr->is_running();
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Stop all server threads.
|
||||
################################################################################
|
||||
sub stop_server_threads {
|
||||
my ($fn, $args) = @_;
|
||||
|
||||
# Signal the threads to exits.
|
||||
$THRRUN = 0;
|
||||
|
||||
foreach my $thr (@ServerThreads) {
|
||||
$thr->detach();
|
||||
}
|
||||
|
||||
@ServerThreads = ();
|
||||
}
|
||||
|
||||
# End of function declaration
|
||||
# End of defined Code
|
||||
|
||||
|
|
Loading…
Reference in New Issue