Fix check statistics are mixing host/service checks.

Fixes #6313
This commit is contained in:
Michael Friedrich 2014-05-26 20:56:34 +02:00
parent b5681b5e38
commit e070db65c8
10 changed files with 193 additions and 63 deletions

View File

@ -240,7 +240,8 @@ void CheckerComponent::ResultTimerHandler(void)
{
boost::mutex::scoped_lock lock(m_Mutex);
msgbuf << "Pending checkables: " << m_PendingCheckables.size() << "; Idle checkables: " << m_IdleCheckables.size() << "; Checks/s: " << CIB::GetActiveChecksStatistics(5) / 5.0;
msgbuf << "Pending checkables: " << m_PendingCheckables.size() << "; Idle checkables: " << m_IdleCheckables.size() << "; Checks/s: "
<< (CIB::GetActiveHostChecksStatistics(5) + CIB::GetActiveServiceChecksStatistics(5)) / 5.0;
}
Log(LogNotice, "checker", msgbuf.str());

View File

@ -803,8 +803,10 @@ void StatusDataWriter::StatusTimerHandler(void)
"\t" "enable_flap_detection=" << (IcingaApplication::GetInstance()->GetEnableFlapping() ? 1 : 0) << "\n"
"\t" "enable_failure_prediction=0" "\n"
"\t" "process_performance_data=" << (IcingaApplication::GetInstance()->GetEnablePerfdata() ? 1 : 0) << "\n"
"\t" "active_scheduled_service_check_stats=" << CIB::GetActiveChecksStatistics(60) << "," << CIB::GetActiveChecksStatistics(5 * 60) << "," << CIB::GetActiveChecksStatistics(15 * 60) << "\n"
"\t" "passive_service_check_stats=" << CIB::GetPassiveChecksStatistics(60) << "," << CIB::GetPassiveChecksStatistics(5 * 60) << "," << CIB::GetPassiveChecksStatistics(15 * 60) << "\n"
"\t" "active_scheduled_host_check_stats=" << CIB::GetActiveHostChecksStatistics(60) << "," << CIB::GetActiveHostChecksStatistics(5 * 60) << "," << CIB::GetActiveHostChecksStatistics(15 * 60) << "\n"
"\t" "passive_host_check_stats=" << CIB::GetPassiveHostChecksStatistics(60) << "," << CIB::GetPassiveHostChecksStatistics(5 * 60) << "," << CIB::GetPassiveHostChecksStatistics(15 * 60) << "\n"
"\t" "active_scheduled_service_check_stats=" << CIB::GetActiveServiceChecksStatistics(60) << "," << CIB::GetActiveServiceChecksStatistics(5 * 60) << "," << CIB::GetActiveServiceChecksStatistics(15 * 60) << "\n"
"\t" "passive_service_check_stats=" << CIB::GetPassiveServiceChecksStatistics(60) << "," << CIB::GetPassiveServiceChecksStatistics(5 * 60) << "," << CIB::GetPassiveServiceChecksStatistics(15 * 60) << "\n"
"\t" "next_downtime_id=" << Service::GetNextDowntimeID() << "\n"
"\t" "next_comment_id=" << Service::GetNextCommentID() << "\n";

View File

@ -50,8 +50,8 @@ void StatusTable::AddColumns(Table *table, const String& prefix,
table->AddColumn(prefix + "service_checks", Column(&StatusTable::ServiceChecksAccessor, objectAccessor));
table->AddColumn(prefix + "service_checks_rate", Column(&StatusTable::ServiceChecksRateAccessor, objectAccessor));
table->AddColumn(prefix + "host_checks", Column(&Table::ZeroAccessor, objectAccessor));
table->AddColumn(prefix + "host_checks_rate", Column(&Table::ZeroAccessor, objectAccessor));
table->AddColumn(prefix + "host_checks", Column(&StatusTable::HostChecksAccessor, objectAccessor));
table->AddColumn(prefix + "host_checks_rate", Column(&StatusTable::HostChecksRateAccessor, objectAccessor));
table->AddColumn(prefix + "forks", Column(&Table::ZeroAccessor, objectAccessor));
table->AddColumn(prefix + "forks_rate", Column(&Table::ZeroAccessor, objectAccessor));
@ -126,16 +126,28 @@ Value StatusTable::ConnectionsRateAccessor(const Value&)
return (LivestatusListener::GetConnections() / (Utility::GetTime() - Application::GetStartTime()));
}
Value StatusTable::HostChecksAccessor(const Value&)
{
long timespan = static_cast<long>(Utility::GetTime() - Application::GetStartTime());
return CIB::GetActiveHostChecksStatistics(timespan);
}
Value StatusTable::HostChecksRateAccessor(const Value&)
{
long timespan = static_cast<long>(Utility::GetTime() - Application::GetStartTime());
return (CIB::GetActiveHostChecksStatistics(timespan) / (Utility::GetTime() - Application::GetStartTime()));
}
Value StatusTable::ServiceChecksAccessor(const Value&)
{
long timespan = static_cast<long>(Utility::GetTime() - Application::GetStartTime());
return CIB::GetActiveChecksStatistics(timespan);
return CIB::GetActiveServiceChecksStatistics(timespan);
}
Value StatusTable::ServiceChecksRateAccessor(const Value&)
{
long timespan = static_cast<long>(Utility::GetTime() - Application::GetStartTime());
return (CIB::GetActiveChecksStatistics(timespan) / (Utility::GetTime() - Application::GetStartTime()));
return (CIB::GetActiveServiceChecksStatistics(timespan) / (Utility::GetTime() - Application::GetStartTime()));
}
Value StatusTable::ExternalCommandsAccessor(const Value&)

View File

@ -49,6 +49,8 @@ protected:
static Value ConnectionsRateAccessor(const Value& row);
static Value ServiceChecksAccessor(const Value& row);
static Value ServiceChecksRateAccessor(const Value& row);
static Value HostChecksAccessor(const Value& row);
static Value HostChecksRateAccessor(const Value& row);
static Value ExternalCommandsAccessor(const Value& row);
static Value ExternalCommandsRateAccessor(const Value& row);
static Value NagiosPidAccessor(const Value& row);

View File

@ -349,7 +349,16 @@ void Checkable::ProcessCheckResult(const CheckResult::Ptr& cr, const MessageOrig
if (new_state != ServiceOK)
TriggerDowntimes();
Checkable::UpdateStatistics(cr);
Host::Ptr host;
Service::Ptr service;
tie(host, service) = GetHostService(GetSelf());
CheckableType checkable_type = CheckableHost;
if (service)
checkable_type = CheckableService;
/* statistics for external tools */
Checkable::UpdateStatistics(cr, checkable_type);
bool in_downtime = IsInDowntime();
bool send_notification = hardChange && notification_reachable && !in_downtime && !IsAcknowledged();
@ -402,9 +411,6 @@ void Checkable::ProcessCheckResult(const CheckResult::Ptr& cr, const MessageOrig
/* signal status updates to for example db_ido */
OnStateChanged(GetSelf());
Host::Ptr host;
Service::Ptr service;
tie(host, service) = GetHostService(GetSelf());
String old_state_str = (service ? Service::StateToString(old_state) : Host::StateToString(Host::CalculateState(old_state)));
String new_state_str = (service ? Service::StateToString(new_state) : Host::StateToString(Host::CalculateState(new_state)));
@ -480,14 +486,23 @@ void Checkable::ExecuteCheck(void)
GetCheckCommand()->Execute(GetSelf(), result);
}
void Checkable::UpdateStatistics(const CheckResult::Ptr& cr)
void Checkable::UpdateStatistics(const CheckResult::Ptr& cr, CheckableType type)
{
time_t ts = cr->GetScheduleEnd();
if (cr->GetActive())
CIB::UpdateActiveChecksStatistics(ts, 1);
else
CIB::UpdatePassiveChecksStatistics(ts, 1);
if (type == CheckableHost) {
if (cr->GetActive())
CIB::UpdateActiveHostChecksStatistics(ts, 1);
else
CIB::UpdatePassiveHostChecksStatistics(ts, 1);
} else if (type == CheckableService) {
if (cr->GetActive())
CIB::UpdateActiveServiceChecksStatistics(ts, 1);
else
CIB::UpdatePassiveServiceChecksStatistics(ts, 1);
} else {
Log(LogWarning, "icinga", "Unknown checkable type for statistic update.");
}
}
double Checkable::CalculateExecutionTime(const CheckResult::Ptr& cr)

View File

@ -132,7 +132,7 @@ public:
bool GetForceNextCheck(void) const;
void SetForceNextCheck(bool forced, const MessageOrigin& origin = MessageOrigin());
static void UpdateStatistics(const CheckResult::Ptr& cr);
static void UpdateStatistics(const CheckResult::Ptr& cr, CheckableType type);
void ExecuteCheck(void);
void ProcessCheckResult(const CheckResult::Ptr& cr, const MessageOrigin& origin = MessageOrigin());

View File

@ -18,6 +18,7 @@
******************************************************************************/
#include "icinga/cib.hpp"
#include "icinga/host.hpp"
#include "icinga/service.hpp"
#include "base/objectlock.hpp"
#include "base/utility.hpp"
@ -27,30 +28,101 @@
using namespace icinga;
RingBuffer CIB::m_ActiveChecksStatistics(15 * 60);
RingBuffer CIB::m_PassiveChecksStatistics(15 * 60);
RingBuffer CIB::m_ActiveHostChecksStatistics(15 * 60);
RingBuffer CIB::m_ActiveServiceChecksStatistics(15 * 60);
RingBuffer CIB::m_PassiveHostChecksStatistics(15 * 60);
RingBuffer CIB::m_PassiveServiceChecksStatistics(15 * 60);
void CIB::UpdateActiveChecksStatistics(long tv, int num)
void CIB::UpdateActiveHostChecksStatistics(long tv, int num)
{
m_ActiveChecksStatistics.InsertValue(tv, num);
m_ActiveHostChecksStatistics.InsertValue(tv, num);
}
int CIB::GetActiveChecksStatistics(long timespan)
void CIB::UpdateActiveServiceChecksStatistics(long tv, int num)
{
return m_ActiveChecksStatistics.GetValues(timespan);
m_ActiveServiceChecksStatistics.InsertValue(tv, num);
}
void CIB::UpdatePassiveChecksStatistics(long tv, int num)
int CIB::GetActiveHostChecksStatistics(long timespan)
{
m_PassiveChecksStatistics.InsertValue(tv, num);
return m_ActiveHostChecksStatistics.GetValues(timespan);
}
int CIB::GetPassiveChecksStatistics(long timespan)
int CIB::GetActiveServiceChecksStatistics(long timespan)
{
return m_PassiveChecksStatistics.GetValues(timespan);
return m_ActiveServiceChecksStatistics.GetValues(timespan);
}
ServiceCheckStatistics CIB::CalculateServiceCheckStats(void)
void CIB::UpdatePassiveHostChecksStatistics(long tv, int num)
{
m_PassiveServiceChecksStatistics.InsertValue(tv, num);
}
void CIB::UpdatePassiveServiceChecksStatistics(long tv, int num)
{
m_PassiveServiceChecksStatistics.InsertValue(tv, num);
}
int CIB::GetPassiveHostChecksStatistics(long timespan)
{
return m_PassiveHostChecksStatistics.GetValues(timespan);
}
int CIB::GetPassiveServiceChecksStatistics(long timespan)
{
return m_PassiveServiceChecksStatistics.GetValues(timespan);
}
CheckableCheckStatistics CIB::CalculateHostCheckStats(void)
{
double min_latency = -1, max_latency = 0, sum_latency = 0;
int count_latency = 0;
double min_execution_time = -1, max_execution_time = 0, sum_execution_time = 0;
int count_execution_time = 0;
BOOST_FOREACH(const Host::Ptr& host, DynamicType::GetObjects<Host>()) {
ObjectLock olock(host);
CheckResult::Ptr cr = host->GetLastCheckResult();
/* latency */
double latency = Host::CalculateLatency(cr);
if (min_latency == -1 || latency < min_latency)
min_latency = latency;
if (latency > max_latency)
max_latency = latency;
sum_latency += latency;
count_latency++;
/* execution_time */
double execution_time = Host::CalculateExecutionTime(cr);
if (min_execution_time == -1 || execution_time < min_execution_time)
min_execution_time = execution_time;
if (execution_time > max_execution_time)
max_execution_time = execution_time;
sum_execution_time += execution_time;
count_execution_time++;
}
CheckableCheckStatistics ccs;
ccs.min_latency = min_latency;
ccs.max_latency = max_latency;
ccs.avg_latency = sum_latency / count_latency;
ccs.min_execution_time = min_execution_time;
ccs.max_execution_time = max_execution_time;
ccs.avg_execution_time = sum_execution_time / count_execution_time;
return ccs;
}
CheckableCheckStatistics CIB::CalculateServiceCheckStats(void)
{
double min_latency = -1, max_latency = 0, sum_latency = 0;
int count_latency = 0;
@ -87,16 +159,16 @@ ServiceCheckStatistics CIB::CalculateServiceCheckStats(void)
count_execution_time++;
}
ServiceCheckStatistics scs;
CheckableCheckStatistics ccs;
scs.min_latency = min_latency;
scs.max_latency = max_latency;
scs.avg_latency = sum_latency / count_latency;
scs.min_execution_time = min_execution_time;
scs.max_execution_time = max_execution_time;
scs.avg_execution_time = sum_execution_time / count_execution_time;
ccs.min_latency = min_latency;
ccs.max_latency = max_latency;
ccs.avg_latency = sum_latency / count_latency;
ccs.min_execution_time = min_execution_time;
ccs.max_execution_time = max_execution_time;
ccs.avg_execution_time = sum_execution_time / count_execution_time;
return scs;
return ccs;
}
ServiceStatistics CIB::CalculateServiceStats(void)

View File

@ -27,7 +27,7 @@
namespace icinga
{
struct ServiceCheckStatistics {
struct CheckableCheckStatistics {
double min_latency;
double max_latency;
double avg_latency;
@ -67,15 +67,22 @@ struct HostStatistics {
class I2_ICINGA_API CIB
{
public:
static void UpdateActiveChecksStatistics(long tv, int num);
static int GetActiveChecksStatistics(long timespan);
static void UpdateActiveHostChecksStatistics(long tv, int num);
static int GetActiveHostChecksStatistics(long timespan);
static void UpdatePassiveChecksStatistics(long tv, int num);
static int GetPassiveChecksStatistics(long timespan);
static void UpdateActiveServiceChecksStatistics(long tv, int num);
static int GetActiveServiceChecksStatistics(long timespan);
static ServiceCheckStatistics CalculateServiceCheckStats(void);
static ServiceStatistics CalculateServiceStats(void);
static void UpdatePassiveHostChecksStatistics(long tv, int num);
static int GetPassiveHostChecksStatistics(long timespan);
static void UpdatePassiveServiceChecksStatistics(long tv, int num);
static int GetPassiveServiceChecksStatistics(long timespan);
static CheckableCheckStatistics CalculateHostCheckStats(void);
static CheckableCheckStatistics CalculateServiceCheckStats(void);
static HostStatistics CalculateHostStats(void);
static ServiceStatistics CalculateServiceStats(void);
static std::pair<Dictionary::Ptr, Dictionary::Ptr> GetFeatureStats(void);
@ -83,8 +90,10 @@ private:
CIB(void);
static boost::mutex m_Mutex;
static RingBuffer m_ActiveChecksStatistics;
static RingBuffer m_PassiveChecksStatistics;
static RingBuffer m_ActiveHostChecksStatistics;
static RingBuffer m_PassiveHostChecksStatistics;
static RingBuffer m_ActiveServiceChecksStatistics;
static RingBuffer m_PassiveServiceChecksStatistics;
};
}

View File

@ -85,17 +85,25 @@ Dictionary::Ptr IcingaStatusWriter::GetStatusData(void)
if (interval > 60)
interval = 60;
icinga_stats->Set("active_checks", CIB::GetActiveChecksStatistics(interval) / interval);
icinga_stats->Set("passive_checks", CIB::GetPassiveChecksStatistics(interval) / interval);
icinga_stats->Set("active_host_checks", CIB::GetActiveHostChecksStatistics(interval) / interval);
icinga_stats->Set("passive_host_checks", CIB::GetPassiveHostChecksStatistics(interval) / interval);
icinga_stats->Set("active_host_checks_1min", CIB::GetActiveHostChecksStatistics(60));
icinga_stats->Set("passive_host_checks_1min", CIB::GetPassiveHostChecksStatistics(60));
icinga_stats->Set("active_host_checks_5min", CIB::GetActiveHostChecksStatistics(60 * 5));
icinga_stats->Set("passive_host_checks_5min", CIB::GetPassiveHostChecksStatistics(60 * 5));
icinga_stats->Set("active_host_checks_15min", CIB::GetActiveHostChecksStatistics(60 * 15));
icinga_stats->Set("passive_host_checks_15min", CIB::GetPassiveHostChecksStatistics(60 * 15));
icinga_stats->Set("active_checks_1min", CIB::GetActiveChecksStatistics(60));
icinga_stats->Set("passive_checks_1min", CIB::GetPassiveChecksStatistics(60));
icinga_stats->Set("active_checks_5min", CIB::GetActiveChecksStatistics(60 * 5));
icinga_stats->Set("passive_checks_5min", CIB::GetPassiveChecksStatistics(60 * 5));
icinga_stats->Set("active_checks_15min", CIB::GetActiveChecksStatistics(60 * 15));
icinga_stats->Set("passive_checks_15min", CIB::GetPassiveChecksStatistics(60 * 15));
icinga_stats->Set("active_service_checks", CIB::GetActiveServiceChecksStatistics(interval) / interval);
icinga_stats->Set("passive_service_checks", CIB::GetPassiveServiceChecksStatistics(interval) / interval);
icinga_stats->Set("active_service_checks_1min", CIB::GetActiveServiceChecksStatistics(60));
icinga_stats->Set("passive_service_checks_1min", CIB::GetPassiveServiceChecksStatistics(60));
icinga_stats->Set("active_service_checks_5min", CIB::GetActiveServiceChecksStatistics(60 * 5));
icinga_stats->Set("passive_service_checks_5min", CIB::GetPassiveServiceChecksStatistics(60 * 5));
icinga_stats->Set("active_service_checks_15min", CIB::GetActiveServiceChecksStatistics(60 * 15));
icinga_stats->Set("passive_service_checks_15min", CIB::GetPassiveServiceChecksStatistics(60 * 15));
ServiceCheckStatistics scs = CIB::CalculateServiceCheckStats();
CheckableCheckStatistics scs = CIB::CalculateServiceCheckStats();
icinga_stats->Set("min_latency", scs.min_latency);
icinga_stats->Set("max_latency", scs.max_latency);

View File

@ -39,17 +39,26 @@ void IcingaCheckTask::ScriptFunc(const Checkable::Ptr& service, const CheckResul
interval = 60;
Dictionary::Ptr perfdata = make_shared<Dictionary>();
perfdata->Set("active_checks", CIB::GetActiveChecksStatistics(interval) / interval);
perfdata->Set("passive_checks", CIB::GetPassiveChecksStatistics(interval) / interval);
perfdata->Set("active_checks_1min", CIB::GetActiveChecksStatistics(60));
perfdata->Set("passive_checks_1min", CIB::GetPassiveChecksStatistics(60));
perfdata->Set("active_checks_5min", CIB::GetActiveChecksStatistics(60 * 5));
perfdata->Set("passive_checks_5min", CIB::GetPassiveChecksStatistics(60 * 5));
perfdata->Set("active_checks_15min", CIB::GetActiveChecksStatistics(60 * 15));
perfdata->Set("passive_checks_15min", CIB::GetPassiveChecksStatistics(60 * 15));
perfdata->Set("active_host_checks", CIB::GetActiveHostChecksStatistics(interval) / interval);
perfdata->Set("passive_host_checks", CIB::GetPassiveHostChecksStatistics(interval) / interval);
perfdata->Set("active_host_checks_1min", CIB::GetActiveHostChecksStatistics(60));
perfdata->Set("passive_host_checks_1min", CIB::GetPassiveHostChecksStatistics(60));
perfdata->Set("active_host_checks_5min", CIB::GetActiveHostChecksStatistics(60 * 5));
perfdata->Set("passive_host_checks_5min", CIB::GetPassiveHostChecksStatistics(60 * 5));
perfdata->Set("active_host_checks_15min", CIB::GetActiveHostChecksStatistics(60 * 15));
perfdata->Set("passive_host_checks_15min", CIB::GetPassiveHostChecksStatistics(60 * 15));
ServiceCheckStatistics scs = CIB::CalculateServiceCheckStats();
perfdata->Set("active_service_checks", CIB::GetActiveServiceChecksStatistics(interval) / interval);
perfdata->Set("passive_service_checks", CIB::GetPassiveServiceChecksStatistics(interval) / interval);
perfdata->Set("active_service_checks_1min", CIB::GetActiveServiceChecksStatistics(60));
perfdata->Set("passive_service_checks_1min", CIB::GetPassiveServiceChecksStatistics(60));
perfdata->Set("active_service_checks_5min", CIB::GetActiveServiceChecksStatistics(60 * 5));
perfdata->Set("passive_service_checks_5min", CIB::GetPassiveServiceChecksStatistics(60 * 5));
perfdata->Set("active_service_checks_15min", CIB::GetActiveServiceChecksStatistics(60 * 15));
perfdata->Set("passive_service_checks_15min", CIB::GetPassiveServiceChecksStatistics(60 * 15));
CheckableCheckStatistics scs = CIB::CalculateServiceCheckStats();
perfdata->Set("min_latency", scs.min_latency);
perfdata->Set("max_latency", scs.max_latency);