From 888a284249dcaa7a6307ee6576650b53fdf2fcb8 Mon Sep 17 00:00:00 2001 From: slerena Date: Thu, 18 Feb 2010 17:21:40 +0000 Subject: [PATCH] 2010-02-18 Sancho Lerena * conf/pandora_server.conf: New option, self_monitoring. * lib/PandoraFMS/Config.pm: Support for new option self_monitoring. * lib/PandoraFMS/Core.pm: Access update is not refreshed by network modules anymore (gaining a lot of performance here). Added new functions for internal statistics and automonitoring. * lib/PandoraFMS/Tools.pm: New funtions for self_monitoring. * bin/pandora_server: Modified error handler to avoid show info about bad load of some optional libraries. Modified general loop of maintance tasks in three blocks: high, medium and low latency. Placed here in fixed steps (5, 30 and one minute) all tasks pending before by server_threshold. Added self_monitoring and internal statistic mode (for group and servers). * util/pandora_db.pl: Update tconfig with last time of database maintance time git-svn-id: https://svn.code.sf.net/p/pandora/code/trunk@2377 c3f86ba8-e40f-0410-aaad-9ba5e7f4b01f --- pandora_server/ChangeLog | 24 ++- pandora_server/bin/pandora_server | 107 +++++++--- pandora_server/conf/pandora_server.conf | 4 +- pandora_server/lib/PandoraFMS/Config.pm | 7 +- pandora_server/lib/PandoraFMS/Core.pm | 260 ++++++++++++++++++++++-- pandora_server/lib/PandoraFMS/Tools.pm | 27 +++ pandora_server/util/pandora_db.pl | 5 + 7 files changed, 380 insertions(+), 54 deletions(-) diff --git a/pandora_server/ChangeLog b/pandora_server/ChangeLog index f82d4506b6..85ccc06af0 100644 --- a/pandora_server/ChangeLog +++ b/pandora_server/ChangeLog @@ -1,3 +1,25 @@ +2010-02-18 Sancho Lerena + + * conf/pandora_server.conf: New option, self_monitoring. + + * lib/PandoraFMS/Config.pm: Support for new option self_monitoring. + + * lib/PandoraFMS/Core.pm: Access update is not refreshed by network + modules anymore (gaining a lot of performance here). Added new functions + for internal statistics and automonitoring. + + * lib/PandoraFMS/Tools.pm: New funtions for self_monitoring. + + * bin/pandora_server: Modified error handler to avoid show info about bad + load of some optional libraries. + Modified general loop of maintance tasks in three + blocks: high, medium and low latency. Placed here in fixed steps (5, 30 and + one minute) all tasks pending before by server_threshold. Added + self_monitoring and internal statistic mode (for group and servers). + + * util/pandora_db.pl: Update tconfig with last time of database maintance + time + 2010-02-15 Pablo de la Concepción * conf/pandora_server.conf, lib/PandoraFMS/Config.pm: New configuration @@ -85,7 +107,7 @@ * NetworkServer.pm: Implemented support for SNMP v3. - * util/pandora_db.pm: Very important upgrade to this script. Now will + * util/pandora_db.pl: Very important upgrade to this script. Now will delete all huge tables (tagente_datos, tagente_datos_string and tagent_acccess) using several independent blocks (by default 100) avoiding mysql locks which happen in the past. diff --git a/pandora_server/bin/pandora_server b/pandora_server/bin/pandora_server index f77081facd..19adf5aed3 100755 --- a/pandora_server/bin/pandora_server +++ b/pandora_server/bin/pandora_server @@ -124,31 +124,36 @@ sub pandora_restart () { sub pandora_crash () { my $full_error = ""; - my $show_error = 0; # Avoid show messages about enterprise library loading failurem, VERY - # confussing. + # confussing, all of them are warnigs and not critical, and user should be + # worried about that. If perl has a more "clean" way to avoid this messages + # will be nice to replace this code, but at this time it's the only way I know + foreach my $error_line (@_) { - if ($error_line !~ m/Enterprise/i && $error_line !~ m/ConfigLocal/i){ + if ($error_line !~ m/Enterprise/i && $error_line !~ m/Format_XS/i && $error_line !~ m/ConfigLocal/i){ logger (\%Config, '[E] \'' . $Config{'servername'} . "': $error_line", 1); } else { - $show_error = 1; + if ($error_line !~ m/Can\'t\slocate/) { + logger (\%Config, '[E] \'' . $Config{'servername'} . "': $error_line", 1); + } else { + # Known errors of loading Enterprise, Format_XS and ConfigLocal + # modules, non fatal. + return; + } } $full_error .= $error_line; } - if ($show_error == 1){ - logger (\%Config, 'Pandora FMS Server \'' . $Config{'servername'} . '\' unhandled error.', 1); - } + logger (\%Config, 'Pandora FMS Server \'' . $Config{'servername'} . '\' unhandled error.', 1); # It's interesting show by console problems, not only in logs. This helps # to solve stupid problems like Database credential problems for example - if ($full_error !~ m/Enterprise/i && $full_error !~ m/ConfigLocal/i) { - print_message (\%Config, ' [E] Unhandled error in "' . $Config{'servername'} . "\". See more information in logfiles at '/var/log/pandora' \n", 0); - print_message (\%Config, " Error description:\n", 0); - print_message (\%Config, $full_error, 0); - } + print_message (\%Config, ' [E] Unhandled error in "' . $Config{'servername'} . "\". See more information in logfiles at '/var/log/pandora' \n", 0); + print_message (\%Config, " Error description:\n", 0); + print_message (\%Config, $full_error, 0); + } $SIG{'TERM'} = 'pandora_shutdown'; @@ -187,39 +192,71 @@ foreach my $server (@Servers) { # Main loop my $time_ref = time (); -my $task_timer = 0; +my $counter = 0; while (1) { - eval { + eval { + + # TASKS DONE EACH 5 SECONDS (Low latency tasks) + # --------------------------------------------- + # Server status update each 5 seconds + # Neightbourhood problem detection each 5 seconds + # Forced alerts each 5 seconds as well + if (($counter % 5) == 0) { - # Update server status - foreach my $server (@Servers) { - die ($server->getErrStr ()) unless ($server->checkThreads () == 1); - $server->update (); + # Update server status + foreach my $server (@Servers) { + die ($server->getErrStr ()) unless ($server->checkThreads () == 1); + $server->update (); + } + + # Update fallen servers + db_do ($DBH, "UPDATE tserver SET status = 0 WHERE keepalive < ?", strftime ("%Y-%m-%d %H:%M:%S", localtime(time() - $Config{'keepalive'}))); + + # Update forced alerts + pandora_exec_forced_alerts (\%Config, $DBH); + } - # Update fallen servers - db_do ($DBH, "UPDATE tserver SET status = 0 WHERE keepalive < ?", strftime ("%Y-%m-%d %H:%M:%S", localtime(time() - $Config{'keepalive'}))); - - pandora_exec_forced_alerts (\%Config, $DBH); - - pandora_module_keep_alive_nd (\%Config, $DBH); - + # TASKS DONE EACH 30 SECONDS (Med latency tasks) + # --------------------------------------------- # Tasks executed only each Server Threshold x 30 secs, for low-priority tasks + if (($counter % 30) == 0) { + + # Keepalive module control.(very DB intensive, not run frecuently + pandora_module_keep_alive_nd (\%Config, $DBH); - if ($task_timer > 30){ # Multicast status report each 30 x Server Threshold secs enterprise_hook('mcast_status_report', [\%Config, $DBH]); - $task_timer = 0; + } + # TASKS DONE EACH 60 SECONDS (Low latency tasks) + # --------------------------------------------- + if (($counter % 60) == 0) { # Downtimes are executed only 30 x Server Threshold secs pandora_planned_downtime (\%Config, $DBH); - # Update fallen servers - db_do ($DBH, "UPDATE tserver SET status = 0 WHERE keepalive < ?", strftime ("%Y-%m-%d %H:%M:%S", localtime(time() - $Config{'keepalive'}))); + # Realtime stats (Only master server!) - ( VERY HEAVY !) + # Realtimestats == 1, generated by WEB Console, not by server! + if ($Config{"pandora_master"} == 1 + && defined($Config{"realtimestats"}) + && $Config{"realtimestats"} == 0){ + + # Check if I need to refresh stats + my $last_execution_stats = get_db_value ($DBH, "SELECT MAX(utimestamp) FROM tgroup_stat"); + if (!defined($last_execution_stats) || $last_execution_stats < (time() - $Config{"stats_interval"})){ + pandora_group_statistics (\%Config, $DBH); + pandora_server_statistics (\%Config, $DBH); + } + + # Pandora self monitoring + if (defined($Config{"self_monitoring"}) + && $Config{"self_monitoring"} == 1){ + pandora_self_monitoring (\%Config, $DBH); + } + } } - $task_timer++; }; # Restart on error or auto restart @@ -252,6 +289,14 @@ while (1) { close (OLDOUT); } + # Avoid counter overflow + if ($counter > 10000){ + $counter = 0; + } else { + $counter++; + } + threads->yield; - sleep ($Config{'server_threshold'}); + sleep (1); + } diff --git a/pandora_server/conf/pandora_server.conf b/pandora_server/conf/pandora_server.conf index 888c040197..73b7f04ecb 100755 --- a/pandora_server/conf/pandora_server.conf +++ b/pandora_server/conf/pandora_server.conf @@ -240,7 +240,6 @@ max_queue_files 250 # Radius of the Error in meters to consider two gis locations as the same location. # location_error 50 - # Recon reverse geolocation mode [disabled, sql, file] # * disabled: The recon task doesn't try to geolocate the ip discovered. # * sql: The recon task trys to query the SQL database to geolocate the ip discovered @@ -257,3 +256,6 @@ max_queue_files 250 # The center of the cicle is guessed by geolocating the IP. #recon_location_scatter_radius 1000 + +# Pandora Server self-monitoring (embedded agent) (by default disabled) +# self_monitoring 1 diff --git a/pandora_server/lib/PandoraFMS/Config.pm b/pandora_server/lib/PandoraFMS/Config.pm index a60ff02524..ef054fb117 100644 --- a/pandora_server/lib/PandoraFMS/Config.pm +++ b/pandora_server/lib/PandoraFMS/Config.pm @@ -38,7 +38,7 @@ our @EXPORT = qw( # version: Defines actual version of Pandora Server for this module only my $pandora_version = "3.1-dev"; -my $pandora_build = "100209"; +my $pandora_build = "100218"; our $VERSION = $pandora_version." ".$pandora_build; # Setup hash @@ -274,6 +274,8 @@ sub pandora_load_config { # Restart server on error $pa_config->{'restart'} = 0; + # Self monitoring + $pa_config->{'self_monitoring'} = 0; # ------------------------------------------------------------------------- # This values are not stored in .conf files. @@ -571,6 +573,9 @@ sub pandora_load_config { elsif ($parametro =~ m/^recon_location_scatter_radius\s+(\d+)/i) { $pa_config->{'recon_location_scatter_radius'} = clean_blank($1); } + elsif ($parametro =~ m/^self_monitoring\s([0-1])/i) { + $pa_config->{'self_monitoring'} = clean_blank($1); + } } # end of loop for parameter # diff --git a/pandora_server/lib/PandoraFMS/Core.pm b/pandora_server/lib/PandoraFMS/Core.pm index 6685b12816..1c6f5afc3d 100644 --- a/pandora_server/lib/PandoraFMS/Core.pm +++ b/pandora_server/lib/PandoraFMS/Core.pm @@ -140,6 +140,9 @@ our @EXPORT = qw( pandora_update_agent pandora_update_module_on_error pandora_update_server + pandora_group_statistics + pandora_server_statistics + pandora_self_monitoring @ServerTypes ); @@ -860,7 +863,13 @@ sub pandora_update_agent ($$$$$$$;$$$$$) { my $timestamp = strftime ("%Y-%m-%d %H:%M:%S", localtime()); - pandora_access_update ($pa_config, $agent_id, $dbh); + + # No access update for data without interval. + # Single modules from network server, for example. This could be very + # Heavy for Pandora FMS + if ($agent_interval != -1){ + pandora_access_update ($pa_config, $agent_id, $dbh); + } # No update for interval, timezone and position fields (some old agents don't support it) if ($agent_interval == -1){ @@ -1073,6 +1082,7 @@ sub pandora_event ($$$$$$$$$$) { my $utimestamp = time (); my $timestamp = strftime ("%Y-%m-%d %H:%M:%S", localtime ($utimestamp)); $id_agentmodule = 0 unless defined ($id_agentmodule); + db_do ($dbh, 'INSERT INTO tevento (`id_agente`, `id_grupo`, `evento`, `timestamp`, `estado`, `utimestamp`, `event_type`, `id_agentmodule`, `id_alert_am`, `criticity`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', $id_agente, $id_grupo, $evento, $timestamp, $event_status, $utimestamp, $event_type, $id_agentmodule, $id_alert_am, $severity); } @@ -1676,27 +1686,237 @@ sub save_agent_position($$$$$$$$) { } + + +########################################################################## +# Process server statistics for statistics table +########################################################################## +sub pandora_server_statistics ($$) { + my ($pa_config, $dbh) = @_; + + my $lag_time= 0; + my $lag_modules = 0; + my $total_modules_running = 0; + my $my_modules = 0; + my $stat_utimestamp = 0; + my $lag_row; + + # Get all servers with my name (each server only refresh it's own stats) + my @servers = get_db_rows ($dbh, 'SELECT * FROM tserver WHERE name = "'.$pa_config->{'servername'}.'"'); + + # For each server, update stats: Simple. + foreach my $server (@servers) { + if ($server->{"server_type"} !=3) { + + # Get LAG + $server->{"modules"} = get_db_value ($dbh, "SELECT count(tagente_estado.id_agente_modulo) FROM tagente_estado, tagente_modulo, tagente WHERE tagente.disabled=0 AND tagente_modulo.id_agente = tagente.id_agente AND tagente_modulo.disabled = 0 AND tagente_modulo.id_agente_modulo = tagente_estado.id_agente_modulo AND tagente_estado.running_by = ".$server->{"id_server"}); + + $server->{"modules_total"} = get_db_value ($dbh,"SELECT count(tagente_estado.id_agente_modulo) FROM tserver, tagente_estado, tagente_modulo, tagente WHERE tagente.disabled=0 AND tagente_modulo.id_agente = tagente.id_agente AND tagente_modulo.disabled = 0 AND tagente_modulo.id_agente_modulo = tagente_estado.id_agente_modulo AND tagente_estado.running_by = tserver.id_server AND tserver.server_type = ".$server->{"server_type"}); + + if ($server->{"server_type"} != 0){ + $lag_row = get_db_single_row ($dbh, "SELECT COUNT(tagente_modulo.id_agente_modulo) AS module_lag, AVG(UNIX_TIMESTAMP() - utimestamp - current_interval) AS lag FROM tagente_estado, tagente_modulo + WHERE utimestamp > 0 + AND tagente_modulo.disabled = 0 + AND tagente_modulo.id_agente_modulo = tagente_estado.id_agente_modulo + AND current_interval > 0 + AND running_by = ".$server->{"id_server"}." + AND (UNIX_TIMESTAMP() - utimestamp) < ( current_interval * 10) + AND (UNIX_TIMESTAMP() - utimestamp) > current_interval"); + } else { + # Local/Dataserver server LAG calculation: + $lag_row = get_db_single_row ($dbh, "SELECT COUNT(tagente_modulo.id_agente_modulo) AS module_lag, AVG(UNIX_TIMESTAMP() - utimestamp - current_interval) AS lag FROM tagente_estado, tagente_modulo + WHERE utimestamp > 0 + AND tagente_modulo.disabled = 0 + AND tagente_modulo.id_tipo_modulo < 5 + AND tagente_modulo.id_agente_modulo = tagente_estado.id_agente_modulo + AND current_interval > 0 + AND (UNIX_TIMESTAMP() - utimestamp) < ( current_interval * 10) + AND running_by = ".$server->{"id_server"}." + AND (UNIX_TIMESTAMP() - utimestamp) > (current_interval * 1.1)"); + } + + $server->{"module_lag"} = $lag_row->{'module_lag'}; + $server->{"lag"} = $lag_row->{'lag'}; + + } else { + # Recon server only + + # Total jobs running on this recon server + $server->{"modules"} = get_db_value ($dbh, "SELECT COUNT(id_rt) FROM trecon_task WHERE id_recon_server = ".$server->{"id_server"}); + + # Total recon jobs (all servers) + $server->{"modules_total"} = get_db_value ($dbh, "SELECT COUNT(status) FROM trecon_task"); + + # Lag (take average active time of all active tasks) + + $server->{"lag"} = get_db_value ($dbh, "SELECT UNIX_TIMESTAMP() - utimestamp from trecon_task WHERE UNIX_TIMESTAMP() > (utimestamp + interval_sweep) AND id_recon_server = ".$server->{"id_server"}); + + $server->{"module_lag"} = get_db_value ($dbh, "SELECT COUNT(id_rt) FROM trecon_task WHERE UNIX_TIMESTAMP() > (utimestamp + interval_sweep) AND id_recon_server = ".$server->{"id_server"}); + + } + + # Check that all values are defined and set to 0 if not + + if (!defined($server->{"lag"})){ + $server->{"lag"} = 0; + } + + if (!defined($server->{"module_lag"})){ + $server->{"module_lag"} = 0; + } + + if (!defined($server->{"modules_total"})){ + $server->{"modules_total"} = 0; + } + + if (!defined($server->{"modules"})){ + $server->{"modules"} = 0; + } + + # Update server record + db_do ($dbh, "UPDATE tserver SET lag_time = '".$server->{"lag"}."', lag_modules = '".$server->{"module_lag"}."', total_modules_running = '".$server->{"modules_total"}."', my_modules = '".$server->{"modules"}."' , stat_utimestamp = UNIX_TIMESTAMP() WHERE id_server = " . $server->{"id_server"} ); + } +} + + +########################################################################## +# Process system statistics for statistics table +########################################################################## +sub pandora_group_statistics ($$) { + my ($pa_config, $dbh) = @_; + + # Variable init + my $modules = 0; + my $normal = 0; + my $critical = 0; + my $warning = 0; + my $unknown = 0; + my $non_init = 0; + my $alerts = 0; + my $alerts_fired = 0; + my $agents = 0; + my $agents_unknown = 0; + my $utimestamp = 0; + my $group = 0; + + # Get all groups + my @groups = get_db_rows ($dbh, 'SELECT id_grupo FROM tgrupo WHERE disabled = 0 AND id_grupo > 1'); + + # For each valid group get the stats: Simple uh? + foreach my $group_row (@groups) { + + $group = $group_row->{'id_grupo'}; + + $agents_unknown = get_db_value ($dbh, "SELECT COUNT(*) FROM tagente WHERE id_grupo = $group AND disabled = 0 AND ultimo_contacto < NOW() - (intervalo *2)"); + + $agents = get_db_value ($dbh, "SELECT COUNT(*) FROM tagente WHERE id_grupo = $group AND disabled = 0"); + + $modules = get_db_value ($dbh, "SELECT COUNT(tagente_estado.id_agente_estado) FROM tagente_estado, tagente, tagente_modulo WHERE tagente.id_grupo = $group AND tagente.disabled = 0 AND tagente_estado.id_agente = tagente.id_agente AND tagente_estado.id_agente_modulo = tagente_modulo.id_agente_modulo AND tagente_modulo.disabled = 0"); + + # Following threelines gets critical/warning modules, skipping the unknown. By default + # we consider status (ok, warning, critical) as a separate status from unknown. + +# $normal = get_db_value ($dbh, "SELECT COUNT(tagente_estado.id_agente_estado) FROM tagente_estado, tagente, tagente_modulo WHERE tagente.id_grupo = $group AND tagente.disabled = 0 AND tagente_estado.id_agente = tagente.id_agente AND tagente_estado.id_agente_modulo = tagente_modulo.id_agente_modulo AND tagente_modulo.disabled = 0 AND estado = 0 AND ((tagente_modulo.id_tipo_modulo NOT IN (21,22,23,100) AND utimestamp > ( UNIX_TIMESTAMP() - (current_interval * 2))) OR (tagente_modulo.id_tipo_modulo IN (21,22,23,100)))"); + +# $critical = get_db_value ($dbh, "SELECT COUNT(tagente_estado.id_agente_estado) FROM tagente_estado, tagente, tagente_modulo WHERE tagente.id_grupo = $group AND tagente.disabled = 0 AND tagente_estado.id_agente = tagente.id_agente AND tagente_estado.id_agente_modulo = tagente_modulo.id_agente_modulo AND tagente_modulo.disabled = 0 AND estado = 1 AND((tagente_modulo.id_tipo_modulo NOT IN (21,22,23,100) AND utimestamp > ( UNIX_TIMESTAMP() - (current_interval * 2))) OR (tagente_modulo.id_tipo_modulo IN (21,22,23,100)))"); + +# $warning = get_db_value ($dbh, "SELECT COUNT(tagente_estado.id_agente_estado) FROM tagente_estado, tagente, tagente_modulo WHERE tagente.id_grupo = $group AND tagente.disabled = 0 AND tagente_estado.id_agente = tagente.id_agente AND tagente_estado.id_agente_modulo = tagente_modulo.id_agente_modulo AND tagente_modulo.disabled = 0 AND estado = 2 AND ((tagente_modulo.id_tipo_modulo NOT IN (21,22,23,100) AND utimestamp > ( UNIX_TIMESTAMP() - (current_interval * 2))) OR (tagente_modulo.id_tipo_modulo IN (21,22,23,100)))"); + + $normal = get_db_value ($dbh, "SELECT COUNT(tagente_estado.id_agente_estado) FROM tagente_estado, tagente, tagente_modulo WHERE tagente.id_grupo = $group AND tagente.disabled = 0 AND tagente_estado.id_agente = tagente.id_agente AND tagente_estado.id_agente_modulo = tagente_modulo.id_agente_modulo AND tagente_modulo.disabled = 0 AND estado = 0"); + + $critical = get_db_value ($dbh, "SELECT COUNT(tagente_estado.id_agente_estado) FROM tagente_estado, tagente, tagente_modulo WHERE tagente.id_grupo = $group AND tagente.disabled = 0 AND tagente_estado.id_agente = tagente.id_agente AND tagente_estado.id_agente_modulo = tagente_modulo.id_agente_modulo AND tagente_modulo.disabled = 0 AND estado = 1"); + + $warning = get_db_value ($dbh, "SELECT COUNT(tagente_estado.id_agente_estado) FROM tagente_estado, tagente, tagente_modulo WHERE tagente.id_grupo = $group AND tagente.disabled = 0 AND tagente_estado.id_agente = tagente.id_agente AND tagente_estado.id_agente_modulo = tagente_modulo.id_agente_modulo AND tagente_modulo.disabled = 0 AND estado = 2 "); + + $unknown = get_db_value ($dbh, "SELECT COUNT(tagente_estado.id_agente_estado) FROM tagente_estado, tagente, tagente_modulo WHERE tagente.id_grupo = $group AND tagente.disabled = 0 AND tagente.id_agente = tagente_estado.id_agente AND tagente_estado.id_agente_modulo = tagente_modulo.id_agente_modulo AND tagente_modulo.disabled = 0 AND tagente_modulo.id_tipo_modulo NOT IN (21,22,23,100) AND utimestamp < ( UNIX_TIMESTAMP() - (current_interval * 2))"); + + $non_init = get_db_value ($dbh, "SELECT COUNT(tagente_estado.id_agente_estado) FROM tagente_estado, tagente, tagente_modulo WHERE tagente.id_grupo = $group AND tagente.disabled = 0 AND tagente_estado.id_agente = tagente.id_agente AND tagente_estado.id_agente_modulo = tagente_modulo.id_agente_modulo AND tagente_modulo.disabled = 0 + AND tagente_modulo.id_tipo_modulo NOT IN (21,22,23,100) AND tagente_estado.utimestamp = 0"); + + $alerts = get_db_value ($dbh, "SELECT COUNT(talert_template_modules.id) FROM talert_template_modules, tagente_modulo, tagente_estado, tagente WHERE tagente.id_grupo = $group AND tagente_modulo.id_agente = tagente.id_agente AND tagente_estado.id_agente_modulo = tagente_modulo.id_agente_modulo AND tagente_modulo.disabled = 0 AND tagente.disabled = 0 AND talert_template_modules.id_agent_module = tagente_modulo.id_agente_modulo"); + + $alerts_fired = get_db_value ($dbh, "SELECT COUNT(talert_template_modules.id) FROM talert_template_modules, tagente_modulo, tagente_estado, tagente WHERE tagente.id_grupo = $group AND tagente_modulo.id_agente = tagente.id_agente AND tagente_estado.id_agente_modulo = tagente_modulo.id_agente_modulo AND tagente_modulo.disabled = 0 AND tagente.disabled = 0 AND talert_template_modules.id_agent_module = tagente_modulo.id_agente_modulo AND times_fired > 0"); + + # Update the record. + + db_do ($dbh, "DELETE FROM tgroup_stat WHERE id_group = $group"); + + db_do ($dbh, "INSERT INTO tgroup_stat (id_group, modules, normal, critical, warning, unknown, `non-init`, alerts, alerts_fired, agents, agents_unknown, utimestamp) VALUES ($group, $modules, $normal, $critical, $warning, $unknown, $non_init, $alerts, $alerts_fired, $agents, $agents_unknown, UNIX_TIMESTAMP())"); + + } + +} + + +########################################################################## +# Pandora self monitoring process +########################################################################## +sub pandora_self_monitoring ($$) { + my ($pa_config, $dbh) = @_; + my $timezone_offset = 0; # PENDING (TODO) ! + my $utimestamp = time (); + my $timestamp = strftime ("%Y-%m-%d %H:%M:%S", localtime()); + + my $xml_output = ""; + + $xml_output = ""; + $xml_output .=" "; + $xml_output .=" Status"; + $xml_output .=" generic_proc"; + $xml_output .=" 1"; + $xml_output .=" "; + + my $load_average = load_average(); + my $free_mem = free_mem();; + my $free_disk_spool = disk_free ($pa_config->{"incomingdir"}); + my $my_data_server = get_db_value ($dbh, "SELECT id_server FROM tserver WHERE server_type = 0 AND name = '".$pa_config->{"servername"}."'"); + + my $agents_unknown = get_db_value ($dbh, "SELECT * FROM tagente_estado, tagente WHERE tagente.disabled =0 AND tagente.id_agente = tagente_estado.id_agente AND running_by = $my_data_server AND utimestamp < NOW() - (current_interval * 2) limit 10;"); + + my $queued_modules = get_db_value ($dbh, "SELECT SUM(queued_modules) FROM tserver WHERE name = '".$pa_config->{"servername"}."'"); + + $xml_output .=" "; + $xml_output .=" Queued_Modules"; + $xml_output .=" generic_data"; + $xml_output .=" $queued_modules"; + $xml_output .=" "; + + $xml_output .=" "; + $xml_output .=" Agents_Unknown"; + $xml_output .=" generic_data"; + $xml_output .=" $agents_unknown"; + $xml_output .=" "; + + $xml_output .=" "; + $xml_output .=" System_Load_AVG"; + $xml_output .=" generic_data"; + $xml_output .=" $load_average"; + $xml_output .=" "; + + $xml_output .=" "; + $xml_output .=" Free_RAM"; + $xml_output .=" generic_data"; + $xml_output .=" $free_mem"; + $xml_output .=" "; + + $xml_output .=" "; + $xml_output .=" FreeDisk_SpoolDir"; + $xml_output .=" generic_data"; + $xml_output .=" $free_disk_spool"; + $xml_output .=" "; + + $xml_output .= ""; + + my $filename = $pa_config->{"incomingdir"}."/".$pa_config->{'servername'}.".".$utimestamp.".data"; + + open (XMLFILE, ">> $filename") or die "[FATAL] Could not open internal monitoring XML file for deploying monitorization at '$filename'"; + print XMLFILE $xml_output; + close (XMLFILE); +} + + # End of function declaration # End of defined Code 1; __END__ - -=head1 DEPENDENCIES - -L, L, L, L, L, L, L, L, L - -=head1 LICENSE - -This is released under the GNU Lesser General Public License. - -=head1 SEE ALSO - -L, L, L, L, L, L, L, L, L - -=head1 COPYRIGHT - -Copyright (c) 2005-2010 Artica Soluciones Tecnologicas S.L - - -=cut diff --git a/pandora_server/lib/PandoraFMS/Tools.pm b/pandora_server/lib/PandoraFMS/Tools.pm index 39c1031dc8..e8b43bc8df 100644 --- a/pandora_server/lib/PandoraFMS/Tools.pm +++ b/pandora_server/lib/PandoraFMS/Tools.pm @@ -44,6 +44,9 @@ our @EXPORT = qw( enterprise_load print_message get_tag_value + disk_free + load_average + free_mem ); ########################################################################## @@ -396,6 +399,30 @@ sub get_tag_value ($$$) { return $def_value; } +############################################################################## +# Below some "internal" functions for automonitoring feature +# TODO: Implement the same for other systems like Solaris or BSD +############################################################################## + +sub disk_free ($) { + my $target = $_[0]; + + # Try to use df command with Posix parameters... + my $command = "df -k -P ".$target." | tail -1 | awk '{ print \$4/1024}'"; + my $output = `$command`; + return $output; +} + +sub load_average { + my $load_average = `cat /proc/loadavg | awk '{ print \$1 }'`; + return $load_average; +} + +sub free_mem { + my $free_mem = `free | grep Mem | awk '{ print \$4 }'`; + return $free_mem; +} + # End of function declaration # End of defined Code diff --git a/pandora_server/util/pandora_db.pl b/pandora_server/util/pandora_db.pl index 453ab78d2a..9dd5fb82a3 100755 --- a/pandora_server/util/pandora_db.pl +++ b/pandora_server/util/pandora_db.pl @@ -404,5 +404,10 @@ sub pandoradb_main ($$$) { pandora_compactdb ($conf, defined ($history_dbh) ? $history_dbh : $dbh); } + # Update tconfig with last time of database maintance time (now) + + db_do ($dbh, "DELETE FROM tconfig WHERE token = 'db_maintance'"); + db_do ($dbh, "INSERT INTO tconfig (token, value) VALUES ('db_maintance', '".time()."')"); + print "Ending at ". strftime ("%Y-%m-%d %H:%M:%S", localtime()) . "\n"; }