2010-02-18 Sancho Lerena <slerena@artica.es>
* conf/pandora_server.conf: New option, self_monitoring. * lib/PandoraFMS/Config.pm: Support for new option self_monitoring. * lib/PandoraFMS/Core.pm: Access update is not refreshed by network modules anymore (gaining a lot of performance here). Added new functions for internal statistics and automonitoring. * lib/PandoraFMS/Tools.pm: New funtions for self_monitoring. * bin/pandora_server: Modified error handler to avoid show info about bad load of some optional libraries. Modified general loop of maintance tasks in three blocks: high, medium and low latency. Placed here in fixed steps (5, 30 and one minute) all tasks pending before by server_threshold. Added self_monitoring and internal statistic mode (for group and servers). * util/pandora_db.pl: Update tconfig with last time of database maintance time git-svn-id: https://svn.code.sf.net/p/pandora/code/trunk@2377 c3f86ba8-e40f-0410-aaad-9ba5e7f4b01f
This commit is contained in:
parent
a0c493b80c
commit
888a284249
|
@ -1,3 +1,25 @@
|
|||
2010-02-18 Sancho Lerena <slerena@artica.es>
|
||||
|
||||
* conf/pandora_server.conf: New option, self_monitoring.
|
||||
|
||||
* lib/PandoraFMS/Config.pm: Support for new option self_monitoring.
|
||||
|
||||
* lib/PandoraFMS/Core.pm: Access update is not refreshed by network
|
||||
modules anymore (gaining a lot of performance here). Added new functions
|
||||
for internal statistics and automonitoring.
|
||||
|
||||
* lib/PandoraFMS/Tools.pm: New funtions for self_monitoring.
|
||||
|
||||
* bin/pandora_server: Modified error handler to avoid show info about bad
|
||||
load of some optional libraries.
|
||||
Modified general loop of maintance tasks in three
|
||||
blocks: high, medium and low latency. Placed here in fixed steps (5, 30 and
|
||||
one minute) all tasks pending before by server_threshold. Added
|
||||
self_monitoring and internal statistic mode (for group and servers).
|
||||
|
||||
* util/pandora_db.pl: Update tconfig with last time of database maintance
|
||||
time
|
||||
|
||||
2010-02-15 Pablo de la Concepción <pablo.concepcion@artica.es>
|
||||
|
||||
* conf/pandora_server.conf, lib/PandoraFMS/Config.pm: New configuration
|
||||
|
@ -85,7 +107,7 @@
|
|||
|
||||
* NetworkServer.pm: Implemented support for SNMP v3.
|
||||
|
||||
* util/pandora_db.pm: Very important upgrade to this script. Now will
|
||||
* util/pandora_db.pl: Very important upgrade to this script. Now will
|
||||
delete all huge tables (tagente_datos, tagente_datos_string and
|
||||
tagent_acccess) using several independent blocks (by default 100) avoiding
|
||||
mysql locks which happen in the past.
|
||||
|
|
|
@ -124,31 +124,36 @@ sub pandora_restart () {
|
|||
sub pandora_crash () {
|
||||
|
||||
my $full_error = "";
|
||||
my $show_error = 0;
|
||||
|
||||
# Avoid show messages about enterprise library loading failurem, VERY
|
||||
# confussing.
|
||||
# confussing, all of them are warnigs and not critical, and user should be
|
||||
# worried about that. If perl has a more "clean" way to avoid this messages
|
||||
# will be nice to replace this code, but at this time it's the only way I know
|
||||
|
||||
foreach my $error_line (@_) {
|
||||
if ($error_line !~ m/Enterprise/i && $error_line !~ m/ConfigLocal/i){
|
||||
if ($error_line !~ m/Enterprise/i && $error_line !~ m/Format_XS/i && $error_line !~ m/ConfigLocal/i){
|
||||
logger (\%Config, '[E] \'' . $Config{'servername'} . "': $error_line", 1);
|
||||
} else {
|
||||
$show_error = 1;
|
||||
if ($error_line !~ m/Can\'t\slocate/) {
|
||||
logger (\%Config, '[E] \'' . $Config{'servername'} . "': $error_line", 1);
|
||||
} else {
|
||||
# Known errors of loading Enterprise, Format_XS and ConfigLocal
|
||||
# modules, non fatal.
|
||||
return;
|
||||
}
|
||||
}
|
||||
$full_error .= $error_line;
|
||||
}
|
||||
|
||||
if ($show_error == 1){
|
||||
logger (\%Config, 'Pandora FMS Server \'' . $Config{'servername'} . '\' unhandled error.', 1);
|
||||
}
|
||||
logger (\%Config, 'Pandora FMS Server \'' . $Config{'servername'} . '\' unhandled error.', 1);
|
||||
|
||||
# It's interesting show by console problems, not only in logs. This helps
|
||||
# to solve stupid problems like Database credential problems for example
|
||||
|
||||
if ($full_error !~ m/Enterprise/i && $full_error !~ m/ConfigLocal/i) {
|
||||
print_message (\%Config, ' [E] Unhandled error in "' . $Config{'servername'} . "\". See more information in logfiles at '/var/log/pandora' \n", 0);
|
||||
print_message (\%Config, " Error description:\n", 0);
|
||||
print_message (\%Config, $full_error, 0);
|
||||
}
|
||||
print_message (\%Config, ' [E] Unhandled error in "' . $Config{'servername'} . "\". See more information in logfiles at '/var/log/pandora' \n", 0);
|
||||
print_message (\%Config, " Error description:\n", 0);
|
||||
print_message (\%Config, $full_error, 0);
|
||||
|
||||
}
|
||||
|
||||
$SIG{'TERM'} = 'pandora_shutdown';
|
||||
|
@ -187,39 +192,71 @@ foreach my $server (@Servers) {
|
|||
|
||||
# Main loop
|
||||
my $time_ref = time ();
|
||||
my $task_timer = 0;
|
||||
my $counter = 0;
|
||||
|
||||
while (1) {
|
||||
|
||||
eval {
|
||||
eval {
|
||||
|
||||
# TASKS DONE EACH 5 SECONDS (Low latency tasks)
|
||||
# ---------------------------------------------
|
||||
# Server status update each 5 seconds
|
||||
# Neightbourhood problem detection each 5 seconds
|
||||
# Forced alerts each 5 seconds as well
|
||||
if (($counter % 5) == 0) {
|
||||
|
||||
# Update server status
|
||||
foreach my $server (@Servers) {
|
||||
die ($server->getErrStr ()) unless ($server->checkThreads () == 1);
|
||||
$server->update ();
|
||||
# Update server status
|
||||
foreach my $server (@Servers) {
|
||||
die ($server->getErrStr ()) unless ($server->checkThreads () == 1);
|
||||
$server->update ();
|
||||
}
|
||||
|
||||
# Update fallen servers
|
||||
db_do ($DBH, "UPDATE tserver SET status = 0 WHERE keepalive < ?", strftime ("%Y-%m-%d %H:%M:%S", localtime(time() - $Config{'keepalive'})));
|
||||
|
||||
# Update forced alerts
|
||||
pandora_exec_forced_alerts (\%Config, $DBH);
|
||||
|
||||
}
|
||||
|
||||
# Update fallen servers
|
||||
db_do ($DBH, "UPDATE tserver SET status = 0 WHERE keepalive < ?", strftime ("%Y-%m-%d %H:%M:%S", localtime(time() - $Config{'keepalive'})));
|
||||
|
||||
pandora_exec_forced_alerts (\%Config, $DBH);
|
||||
|
||||
pandora_module_keep_alive_nd (\%Config, $DBH);
|
||||
|
||||
# TASKS DONE EACH 30 SECONDS (Med latency tasks)
|
||||
# ---------------------------------------------
|
||||
# Tasks executed only each Server Threshold x 30 secs, for low-priority tasks
|
||||
if (($counter % 30) == 0) {
|
||||
|
||||
# Keepalive module control.(very DB intensive, not run frecuently
|
||||
pandora_module_keep_alive_nd (\%Config, $DBH);
|
||||
|
||||
if ($task_timer > 30){
|
||||
# Multicast status report each 30 x Server Threshold secs
|
||||
enterprise_hook('mcast_status_report', [\%Config, $DBH]);
|
||||
$task_timer = 0;
|
||||
}
|
||||
|
||||
# TASKS DONE EACH 60 SECONDS (Low latency tasks)
|
||||
# ---------------------------------------------
|
||||
if (($counter % 60) == 0) {
|
||||
# Downtimes are executed only 30 x Server Threshold secs
|
||||
pandora_planned_downtime (\%Config, $DBH);
|
||||
|
||||
# Update fallen servers
|
||||
db_do ($DBH, "UPDATE tserver SET status = 0 WHERE keepalive < ?", strftime ("%Y-%m-%d %H:%M:%S", localtime(time() - $Config{'keepalive'})));
|
||||
# Realtime stats (Only master server!) - ( VERY HEAVY !)
|
||||
# Realtimestats == 1, generated by WEB Console, not by server!
|
||||
if ($Config{"pandora_master"} == 1
|
||||
&& defined($Config{"realtimestats"})
|
||||
&& $Config{"realtimestats"} == 0){
|
||||
|
||||
# Check if I need to refresh stats
|
||||
my $last_execution_stats = get_db_value ($DBH, "SELECT MAX(utimestamp) FROM tgroup_stat");
|
||||
if (!defined($last_execution_stats) || $last_execution_stats < (time() - $Config{"stats_interval"})){
|
||||
pandora_group_statistics (\%Config, $DBH);
|
||||
pandora_server_statistics (\%Config, $DBH);
|
||||
}
|
||||
|
||||
# Pandora self monitoring
|
||||
if (defined($Config{"self_monitoring"})
|
||||
&& $Config{"self_monitoring"} == 1){
|
||||
pandora_self_monitoring (\%Config, $DBH);
|
||||
}
|
||||
}
|
||||
}
|
||||
$task_timer++;
|
||||
};
|
||||
|
||||
# Restart on error or auto restart
|
||||
|
@ -252,6 +289,14 @@ while (1) {
|
|||
close (OLDOUT);
|
||||
}
|
||||
|
||||
# Avoid counter overflow
|
||||
if ($counter > 10000){
|
||||
$counter = 0;
|
||||
} else {
|
||||
$counter++;
|
||||
}
|
||||
|
||||
threads->yield;
|
||||
sleep ($Config{'server_threshold'});
|
||||
sleep (1);
|
||||
|
||||
}
|
||||
|
|
|
@ -240,7 +240,6 @@ max_queue_files 250
|
|||
# Radius of the Error in meters to consider two gis locations as the same location.
|
||||
# location_error 50
|
||||
|
||||
|
||||
# Recon reverse geolocation mode [disabled, sql, file]
|
||||
# * disabled: The recon task doesn't try to geolocate the ip discovered.
|
||||
# * sql: The recon task trys to query the SQL database to geolocate the ip discovered
|
||||
|
@ -257,3 +256,6 @@ max_queue_files 250
|
|||
# The center of the cicle is guessed by geolocating the IP.
|
||||
#recon_location_scatter_radius 1000
|
||||
|
||||
|
||||
# Pandora Server self-monitoring (embedded agent) (by default disabled)
|
||||
# self_monitoring 1
|
||||
|
|
|
@ -38,7 +38,7 @@ our @EXPORT = qw(
|
|||
|
||||
# version: Defines actual version of Pandora Server for this module only
|
||||
my $pandora_version = "3.1-dev";
|
||||
my $pandora_build = "100209";
|
||||
my $pandora_build = "100218";
|
||||
our $VERSION = $pandora_version." ".$pandora_build;
|
||||
|
||||
# Setup hash
|
||||
|
@ -274,6 +274,8 @@ sub pandora_load_config {
|
|||
# Restart server on error
|
||||
$pa_config->{'restart'} = 0;
|
||||
|
||||
# Self monitoring
|
||||
$pa_config->{'self_monitoring'} = 0;
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# This values are not stored in .conf files.
|
||||
|
@ -571,6 +573,9 @@ sub pandora_load_config {
|
|||
elsif ($parametro =~ m/^recon_location_scatter_radius\s+(\d+)/i) {
|
||||
$pa_config->{'recon_location_scatter_radius'} = clean_blank($1);
|
||||
}
|
||||
elsif ($parametro =~ m/^self_monitoring\s([0-1])/i) {
|
||||
$pa_config->{'self_monitoring'} = clean_blank($1);
|
||||
}
|
||||
} # end of loop for parameter #
|
||||
|
||||
|
||||
|
|
|
@ -140,6 +140,9 @@ our @EXPORT = qw(
|
|||
pandora_update_agent
|
||||
pandora_update_module_on_error
|
||||
pandora_update_server
|
||||
pandora_group_statistics
|
||||
pandora_server_statistics
|
||||
pandora_self_monitoring
|
||||
@ServerTypes
|
||||
);
|
||||
|
||||
|
@ -860,7 +863,13 @@ sub pandora_update_agent ($$$$$$$;$$$$$) {
|
|||
|
||||
my $timestamp = strftime ("%Y-%m-%d %H:%M:%S", localtime());
|
||||
|
||||
pandora_access_update ($pa_config, $agent_id, $dbh);
|
||||
|
||||
# No access update for data without interval.
|
||||
# Single modules from network server, for example. This could be very
|
||||
# Heavy for Pandora FMS
|
||||
if ($agent_interval != -1){
|
||||
pandora_access_update ($pa_config, $agent_id, $dbh);
|
||||
}
|
||||
|
||||
# No update for interval, timezone and position fields (some old agents don't support it)
|
||||
if ($agent_interval == -1){
|
||||
|
@ -1073,6 +1082,7 @@ sub pandora_event ($$$$$$$$$$) {
|
|||
my $utimestamp = time ();
|
||||
my $timestamp = strftime ("%Y-%m-%d %H:%M:%S", localtime ($utimestamp));
|
||||
$id_agentmodule = 0 unless defined ($id_agentmodule);
|
||||
|
||||
db_do ($dbh, 'INSERT INTO tevento (`id_agente`, `id_grupo`, `evento`, `timestamp`, `estado`, `utimestamp`, `event_type`, `id_agentmodule`, `id_alert_am`, `criticity`)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', $id_agente, $id_grupo, $evento, $timestamp, $event_status, $utimestamp, $event_type, $id_agentmodule, $id_alert_am, $severity);
|
||||
}
|
||||
|
@ -1676,27 +1686,237 @@ sub save_agent_position($$$$$$$$) {
|
|||
|
||||
}
|
||||
|
||||
|
||||
|
||||
##########################################################################
|
||||
# Process server statistics for statistics table
|
||||
##########################################################################
|
||||
sub pandora_server_statistics ($$) {
|
||||
my ($pa_config, $dbh) = @_;
|
||||
|
||||
my $lag_time= 0;
|
||||
my $lag_modules = 0;
|
||||
my $total_modules_running = 0;
|
||||
my $my_modules = 0;
|
||||
my $stat_utimestamp = 0;
|
||||
my $lag_row;
|
||||
|
||||
# Get all servers with my name (each server only refresh it's own stats)
|
||||
my @servers = get_db_rows ($dbh, 'SELECT * FROM tserver WHERE name = "'.$pa_config->{'servername'}.'"');
|
||||
|
||||
# For each server, update stats: Simple.
|
||||
foreach my $server (@servers) {
|
||||
if ($server->{"server_type"} !=3) {
|
||||
|
||||
# Get LAG
|
||||
$server->{"modules"} = get_db_value ($dbh, "SELECT count(tagente_estado.id_agente_modulo) FROM tagente_estado, tagente_modulo, tagente WHERE tagente.disabled=0 AND tagente_modulo.id_agente = tagente.id_agente AND tagente_modulo.disabled = 0 AND tagente_modulo.id_agente_modulo = tagente_estado.id_agente_modulo AND tagente_estado.running_by = ".$server->{"id_server"});
|
||||
|
||||
$server->{"modules_total"} = get_db_value ($dbh,"SELECT count(tagente_estado.id_agente_modulo) FROM tserver, tagente_estado, tagente_modulo, tagente WHERE tagente.disabled=0 AND tagente_modulo.id_agente = tagente.id_agente AND tagente_modulo.disabled = 0 AND tagente_modulo.id_agente_modulo = tagente_estado.id_agente_modulo AND tagente_estado.running_by = tserver.id_server AND tserver.server_type = ".$server->{"server_type"});
|
||||
|
||||
if ($server->{"server_type"} != 0){
|
||||
$lag_row = get_db_single_row ($dbh, "SELECT COUNT(tagente_modulo.id_agente_modulo) AS module_lag, AVG(UNIX_TIMESTAMP() - utimestamp - current_interval) AS lag FROM tagente_estado, tagente_modulo
|
||||
WHERE utimestamp > 0
|
||||
AND tagente_modulo.disabled = 0
|
||||
AND tagente_modulo.id_agente_modulo = tagente_estado.id_agente_modulo
|
||||
AND current_interval > 0
|
||||
AND running_by = ".$server->{"id_server"}."
|
||||
AND (UNIX_TIMESTAMP() - utimestamp) < ( current_interval * 10)
|
||||
AND (UNIX_TIMESTAMP() - utimestamp) > current_interval");
|
||||
} else {
|
||||
# Local/Dataserver server LAG calculation:
|
||||
$lag_row = get_db_single_row ($dbh, "SELECT COUNT(tagente_modulo.id_agente_modulo) AS module_lag, AVG(UNIX_TIMESTAMP() - utimestamp - current_interval) AS lag FROM tagente_estado, tagente_modulo
|
||||
WHERE utimestamp > 0
|
||||
AND tagente_modulo.disabled = 0
|
||||
AND tagente_modulo.id_tipo_modulo < 5
|
||||
AND tagente_modulo.id_agente_modulo = tagente_estado.id_agente_modulo
|
||||
AND current_interval > 0
|
||||
AND (UNIX_TIMESTAMP() - utimestamp) < ( current_interval * 10)
|
||||
AND running_by = ".$server->{"id_server"}."
|
||||
AND (UNIX_TIMESTAMP() - utimestamp) > (current_interval * 1.1)");
|
||||
}
|
||||
|
||||
$server->{"module_lag"} = $lag_row->{'module_lag'};
|
||||
$server->{"lag"} = $lag_row->{'lag'};
|
||||
|
||||
} else {
|
||||
# Recon server only
|
||||
|
||||
# Total jobs running on this recon server
|
||||
$server->{"modules"} = get_db_value ($dbh, "SELECT COUNT(id_rt) FROM trecon_task WHERE id_recon_server = ".$server->{"id_server"});
|
||||
|
||||
# Total recon jobs (all servers)
|
||||
$server->{"modules_total"} = get_db_value ($dbh, "SELECT COUNT(status) FROM trecon_task");
|
||||
|
||||
# Lag (take average active time of all active tasks)
|
||||
|
||||
$server->{"lag"} = get_db_value ($dbh, "SELECT UNIX_TIMESTAMP() - utimestamp from trecon_task WHERE UNIX_TIMESTAMP() > (utimestamp + interval_sweep) AND id_recon_server = ".$server->{"id_server"});
|
||||
|
||||
$server->{"module_lag"} = get_db_value ($dbh, "SELECT COUNT(id_rt) FROM trecon_task WHERE UNIX_TIMESTAMP() > (utimestamp + interval_sweep) AND id_recon_server = ".$server->{"id_server"});
|
||||
|
||||
}
|
||||
|
||||
# Check that all values are defined and set to 0 if not
|
||||
|
||||
if (!defined($server->{"lag"})){
|
||||
$server->{"lag"} = 0;
|
||||
}
|
||||
|
||||
if (!defined($server->{"module_lag"})){
|
||||
$server->{"module_lag"} = 0;
|
||||
}
|
||||
|
||||
if (!defined($server->{"modules_total"})){
|
||||
$server->{"modules_total"} = 0;
|
||||
}
|
||||
|
||||
if (!defined($server->{"modules"})){
|
||||
$server->{"modules"} = 0;
|
||||
}
|
||||
|
||||
# Update server record
|
||||
db_do ($dbh, "UPDATE tserver SET lag_time = '".$server->{"lag"}."', lag_modules = '".$server->{"module_lag"}."', total_modules_running = '".$server->{"modules_total"}."', my_modules = '".$server->{"modules"}."' , stat_utimestamp = UNIX_TIMESTAMP() WHERE id_server = " . $server->{"id_server"} );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
##########################################################################
|
||||
# Process system statistics for statistics table
|
||||
##########################################################################
|
||||
sub pandora_group_statistics ($$) {
|
||||
my ($pa_config, $dbh) = @_;
|
||||
|
||||
# Variable init
|
||||
my $modules = 0;
|
||||
my $normal = 0;
|
||||
my $critical = 0;
|
||||
my $warning = 0;
|
||||
my $unknown = 0;
|
||||
my $non_init = 0;
|
||||
my $alerts = 0;
|
||||
my $alerts_fired = 0;
|
||||
my $agents = 0;
|
||||
my $agents_unknown = 0;
|
||||
my $utimestamp = 0;
|
||||
my $group = 0;
|
||||
|
||||
# Get all groups
|
||||
my @groups = get_db_rows ($dbh, 'SELECT id_grupo FROM tgrupo WHERE disabled = 0 AND id_grupo > 1');
|
||||
|
||||
# For each valid group get the stats: Simple uh?
|
||||
foreach my $group_row (@groups) {
|
||||
|
||||
$group = $group_row->{'id_grupo'};
|
||||
|
||||
$agents_unknown = get_db_value ($dbh, "SELECT COUNT(*) FROM tagente WHERE id_grupo = $group AND disabled = 0 AND ultimo_contacto < NOW() - (intervalo *2)");
|
||||
|
||||
$agents = get_db_value ($dbh, "SELECT COUNT(*) FROM tagente WHERE id_grupo = $group AND disabled = 0");
|
||||
|
||||
$modules = get_db_value ($dbh, "SELECT COUNT(tagente_estado.id_agente_estado) FROM tagente_estado, tagente, tagente_modulo WHERE tagente.id_grupo = $group AND tagente.disabled = 0 AND tagente_estado.id_agente = tagente.id_agente AND tagente_estado.id_agente_modulo = tagente_modulo.id_agente_modulo AND tagente_modulo.disabled = 0");
|
||||
|
||||
# Following threelines gets critical/warning modules, skipping the unknown. By default
|
||||
# we consider status (ok, warning, critical) as a separate status from unknown.
|
||||
|
||||
# $normal = get_db_value ($dbh, "SELECT COUNT(tagente_estado.id_agente_estado) FROM tagente_estado, tagente, tagente_modulo WHERE tagente.id_grupo = $group AND tagente.disabled = 0 AND tagente_estado.id_agente = tagente.id_agente AND tagente_estado.id_agente_modulo = tagente_modulo.id_agente_modulo AND tagente_modulo.disabled = 0 AND estado = 0 AND ((tagente_modulo.id_tipo_modulo NOT IN (21,22,23,100) AND utimestamp > ( UNIX_TIMESTAMP() - (current_interval * 2))) OR (tagente_modulo.id_tipo_modulo IN (21,22,23,100)))");
|
||||
|
||||
# $critical = get_db_value ($dbh, "SELECT COUNT(tagente_estado.id_agente_estado) FROM tagente_estado, tagente, tagente_modulo WHERE tagente.id_grupo = $group AND tagente.disabled = 0 AND tagente_estado.id_agente = tagente.id_agente AND tagente_estado.id_agente_modulo = tagente_modulo.id_agente_modulo AND tagente_modulo.disabled = 0 AND estado = 1 AND((tagente_modulo.id_tipo_modulo NOT IN (21,22,23,100) AND utimestamp > ( UNIX_TIMESTAMP() - (current_interval * 2))) OR (tagente_modulo.id_tipo_modulo IN (21,22,23,100)))");
|
||||
|
||||
# $warning = get_db_value ($dbh, "SELECT COUNT(tagente_estado.id_agente_estado) FROM tagente_estado, tagente, tagente_modulo WHERE tagente.id_grupo = $group AND tagente.disabled = 0 AND tagente_estado.id_agente = tagente.id_agente AND tagente_estado.id_agente_modulo = tagente_modulo.id_agente_modulo AND tagente_modulo.disabled = 0 AND estado = 2 AND ((tagente_modulo.id_tipo_modulo NOT IN (21,22,23,100) AND utimestamp > ( UNIX_TIMESTAMP() - (current_interval * 2))) OR (tagente_modulo.id_tipo_modulo IN (21,22,23,100)))");
|
||||
|
||||
$normal = get_db_value ($dbh, "SELECT COUNT(tagente_estado.id_agente_estado) FROM tagente_estado, tagente, tagente_modulo WHERE tagente.id_grupo = $group AND tagente.disabled = 0 AND tagente_estado.id_agente = tagente.id_agente AND tagente_estado.id_agente_modulo = tagente_modulo.id_agente_modulo AND tagente_modulo.disabled = 0 AND estado = 0");
|
||||
|
||||
$critical = get_db_value ($dbh, "SELECT COUNT(tagente_estado.id_agente_estado) FROM tagente_estado, tagente, tagente_modulo WHERE tagente.id_grupo = $group AND tagente.disabled = 0 AND tagente_estado.id_agente = tagente.id_agente AND tagente_estado.id_agente_modulo = tagente_modulo.id_agente_modulo AND tagente_modulo.disabled = 0 AND estado = 1");
|
||||
|
||||
$warning = get_db_value ($dbh, "SELECT COUNT(tagente_estado.id_agente_estado) FROM tagente_estado, tagente, tagente_modulo WHERE tagente.id_grupo = $group AND tagente.disabled = 0 AND tagente_estado.id_agente = tagente.id_agente AND tagente_estado.id_agente_modulo = tagente_modulo.id_agente_modulo AND tagente_modulo.disabled = 0 AND estado = 2 ");
|
||||
|
||||
$unknown = get_db_value ($dbh, "SELECT COUNT(tagente_estado.id_agente_estado) FROM tagente_estado, tagente, tagente_modulo WHERE tagente.id_grupo = $group AND tagente.disabled = 0 AND tagente.id_agente = tagente_estado.id_agente AND tagente_estado.id_agente_modulo = tagente_modulo.id_agente_modulo AND tagente_modulo.disabled = 0 AND tagente_modulo.id_tipo_modulo NOT IN (21,22,23,100) AND utimestamp < ( UNIX_TIMESTAMP() - (current_interval * 2))");
|
||||
|
||||
$non_init = get_db_value ($dbh, "SELECT COUNT(tagente_estado.id_agente_estado) FROM tagente_estado, tagente, tagente_modulo WHERE tagente.id_grupo = $group AND tagente.disabled = 0 AND tagente_estado.id_agente = tagente.id_agente AND tagente_estado.id_agente_modulo = tagente_modulo.id_agente_modulo AND tagente_modulo.disabled = 0
|
||||
AND tagente_modulo.id_tipo_modulo NOT IN (21,22,23,100) AND tagente_estado.utimestamp = 0");
|
||||
|
||||
$alerts = get_db_value ($dbh, "SELECT COUNT(talert_template_modules.id) FROM talert_template_modules, tagente_modulo, tagente_estado, tagente WHERE tagente.id_grupo = $group AND tagente_modulo.id_agente = tagente.id_agente AND tagente_estado.id_agente_modulo = tagente_modulo.id_agente_modulo AND tagente_modulo.disabled = 0 AND tagente.disabled = 0 AND talert_template_modules.id_agent_module = tagente_modulo.id_agente_modulo");
|
||||
|
||||
$alerts_fired = get_db_value ($dbh, "SELECT COUNT(talert_template_modules.id) FROM talert_template_modules, tagente_modulo, tagente_estado, tagente WHERE tagente.id_grupo = $group AND tagente_modulo.id_agente = tagente.id_agente AND tagente_estado.id_agente_modulo = tagente_modulo.id_agente_modulo AND tagente_modulo.disabled = 0 AND tagente.disabled = 0 AND talert_template_modules.id_agent_module = tagente_modulo.id_agente_modulo AND times_fired > 0");
|
||||
|
||||
# Update the record.
|
||||
|
||||
db_do ($dbh, "DELETE FROM tgroup_stat WHERE id_group = $group");
|
||||
|
||||
db_do ($dbh, "INSERT INTO tgroup_stat (id_group, modules, normal, critical, warning, unknown, `non-init`, alerts, alerts_fired, agents, agents_unknown, utimestamp) VALUES ($group, $modules, $normal, $critical, $warning, $unknown, $non_init, $alerts, $alerts_fired, $agents, $agents_unknown, UNIX_TIMESTAMP())");
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
##########################################################################
|
||||
# Pandora self monitoring process
|
||||
##########################################################################
|
||||
sub pandora_self_monitoring ($$) {
|
||||
my ($pa_config, $dbh) = @_;
|
||||
my $timezone_offset = 0; # PENDING (TODO) !
|
||||
my $utimestamp = time ();
|
||||
my $timestamp = strftime ("%Y-%m-%d %H:%M:%S", localtime());
|
||||
|
||||
my $xml_output = "";
|
||||
|
||||
$xml_output = "<agent_data os_name='Linux' os_version='".$pa_config->{'version'}."' agent_name='".$pa_config->{'servername'}."' interval='".$pa_config->{"stats_interval"}."' timestamp='".$timestamp."' >";
|
||||
$xml_output .=" <module>";
|
||||
$xml_output .=" <name>Status</name>";
|
||||
$xml_output .=" <type>generic_proc</type>";
|
||||
$xml_output .=" <data>1</data>";
|
||||
$xml_output .=" </module>";
|
||||
|
||||
my $load_average = load_average();
|
||||
my $free_mem = free_mem();;
|
||||
my $free_disk_spool = disk_free ($pa_config->{"incomingdir"});
|
||||
my $my_data_server = get_db_value ($dbh, "SELECT id_server FROM tserver WHERE server_type = 0 AND name = '".$pa_config->{"servername"}."'");
|
||||
|
||||
my $agents_unknown = get_db_value ($dbh, "SELECT * FROM tagente_estado, tagente WHERE tagente.disabled =0 AND tagente.id_agente = tagente_estado.id_agente AND running_by = $my_data_server AND utimestamp < NOW() - (current_interval * 2) limit 10;");
|
||||
|
||||
my $queued_modules = get_db_value ($dbh, "SELECT SUM(queued_modules) FROM tserver WHERE name = '".$pa_config->{"servername"}."'");
|
||||
|
||||
$xml_output .=" <module>";
|
||||
$xml_output .=" <name>Queued_Modules</name>";
|
||||
$xml_output .=" <type>generic_data</type>";
|
||||
$xml_output .=" <data>$queued_modules</data>";
|
||||
$xml_output .=" </module>";
|
||||
|
||||
$xml_output .=" <module>";
|
||||
$xml_output .=" <name>Agents_Unknown</name>";
|
||||
$xml_output .=" <type>generic_data</type>";
|
||||
$xml_output .=" <data>$agents_unknown</data>";
|
||||
$xml_output .=" </module>";
|
||||
|
||||
$xml_output .=" <module>";
|
||||
$xml_output .=" <name>System_Load_AVG</name>";
|
||||
$xml_output .=" <type>generic_data</type>";
|
||||
$xml_output .=" <data>$load_average</data>";
|
||||
$xml_output .=" </module>";
|
||||
|
||||
$xml_output .=" <module>";
|
||||
$xml_output .=" <name>Free_RAM</name>";
|
||||
$xml_output .=" <type>generic_data</type>";
|
||||
$xml_output .=" <data>$free_mem</data>";
|
||||
$xml_output .=" </module>";
|
||||
|
||||
$xml_output .=" <module>";
|
||||
$xml_output .=" <name>FreeDisk_SpoolDir</name>";
|
||||
$xml_output .=" <type>generic_data</type>";
|
||||
$xml_output .=" <data>$free_disk_spool</data>";
|
||||
$xml_output .=" </module>";
|
||||
|
||||
$xml_output .= "</agent_data>";
|
||||
|
||||
my $filename = $pa_config->{"incomingdir"}."/".$pa_config->{'servername'}.".".$utimestamp.".data";
|
||||
|
||||
open (XMLFILE, ">> $filename") or die "[FATAL] Could not open internal monitoring XML file for deploying monitorization at '$filename'";
|
||||
print XMLFILE $xml_output;
|
||||
close (XMLFILE);
|
||||
}
|
||||
|
||||
|
||||
# End of function declaration
|
||||
# End of defined Code
|
||||
|
||||
1;
|
||||
__END__
|
||||
|
||||
=head1 DEPENDENCIES
|
||||
|
||||
L<DBI>, L<XML::Simple>, L<HTML::Entities>, L<Time::Local>, L<POSIX>, L<PandoraFMS::DB>, L<PandoraFMS::Config>, L<PandoraFMS::Tools>, L<PandoraFMS::GIS>
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
This is released under the GNU Lesser General Public License.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<DBI>, L<XML::Simple>, L<HTML::Entities>, L<Time::Local>, L<POSIX>, L<PandoraFMS::DB>, L<PandoraFMS::Config>, L<PandoraFMS::Tools>, L<PandoraFMS::GIS>
|
||||
|
||||
=head1 COPYRIGHT
|
||||
|
||||
Copyright (c) 2005-2010 Artica Soluciones Tecnologicas S.L
|
||||
|
||||
|
||||
=cut
|
||||
|
|
|
@ -44,6 +44,9 @@ our @EXPORT = qw(
|
|||
enterprise_load
|
||||
print_message
|
||||
get_tag_value
|
||||
disk_free
|
||||
load_average
|
||||
free_mem
|
||||
);
|
||||
|
||||
##########################################################################
|
||||
|
@ -396,6 +399,30 @@ sub get_tag_value ($$$) {
|
|||
return $def_value;
|
||||
}
|
||||
|
||||
##############################################################################
|
||||
# Below some "internal" functions for automonitoring feature
|
||||
# TODO: Implement the same for other systems like Solaris or BSD
|
||||
##############################################################################
|
||||
|
||||
sub disk_free ($) {
|
||||
my $target = $_[0];
|
||||
|
||||
# Try to use df command with Posix parameters...
|
||||
my $command = "df -k -P ".$target." | tail -1 | awk '{ print \$4/1024}'";
|
||||
my $output = `$command`;
|
||||
return $output;
|
||||
}
|
||||
|
||||
sub load_average {
|
||||
my $load_average = `cat /proc/loadavg | awk '{ print \$1 }'`;
|
||||
return $load_average;
|
||||
}
|
||||
|
||||
sub free_mem {
|
||||
my $free_mem = `free | grep Mem | awk '{ print \$4 }'`;
|
||||
return $free_mem;
|
||||
}
|
||||
|
||||
# End of function declaration
|
||||
# End of defined Code
|
||||
|
||||
|
|
|
@ -404,5 +404,10 @@ sub pandoradb_main ($$$) {
|
|||
pandora_compactdb ($conf, defined ($history_dbh) ? $history_dbh : $dbh);
|
||||
}
|
||||
|
||||
# Update tconfig with last time of database maintance time (now)
|
||||
|
||||
db_do ($dbh, "DELETE FROM tconfig WHERE token = 'db_maintance'");
|
||||
db_do ($dbh, "INSERT INTO tconfig (token, value) VALUES ('db_maintance', '".time()."')");
|
||||
|
||||
print "Ending at ". strftime ("%Y-%m-%d %H:%M:%S", localtime()) . "\n";
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue