Merge branch 'feature/cluster-status-json-5444' into next

Fixes #5444
This commit is contained in:
Michael Friedrich 2014-02-13 19:24:09 +01:00
commit fc56782695
8 changed files with 210 additions and 42 deletions

View File

@ -38,4 +38,5 @@ install(TARGETS cluster RUNTIME DESTINATION ${CMAKE_INSTALL_SBINDIR} LIBRARY DES
#install(CODE "file(MAKE_DIRECTORY \"\$ENV{DESTDIR}${CMAKE_INSTALL_FULL_LOCALSTATEDIR}/lib/icinga2/cluster\")")
install(CODE "file(MAKE_DIRECTORY \"\$ENV{DESTDIR}${CMAKE_INSTALL_FULL_LOCALSTATEDIR}/lib/icinga2/cluster/config\")")
install(CODE "file(MAKE_DIRECTORY \"\$ENV{DESTDIR}${CMAKE_INSTALL_FULL_LOCALSTATEDIR}/lib/icinga2/cluster/log\")")
install(CODE "file(MAKE_DIRECTORY \"\$ENV{DESTDIR}${CMAKE_INSTALL_FULL_LOCALSTATEDIR}/cache/icinga2/cluster\")")

View File

@ -34,7 +34,10 @@ type ClusterListener {
%attribute array "peers" {
%attribute name(Endpoint) "*"
}
},
%attribute string "status_path",
%attribute number "status_update_interval"
}
type Endpoint {

View File

@ -37,52 +37,55 @@ REGISTER_SCRIPTFUNCTION(ClusterCheck, &ClusterCheckTask::ScriptFunc);
CheckResult::Ptr ClusterCheckTask::ScriptFunc(const Service::Ptr&)
{
double interval = Utility::GetTime() - Application::GetStartTime();
if (interval > 60)
interval = 60;
double count_endpoints = 0;
std::vector<String> not_connected_endpoints;
std::vector<String> connected_endpoints;
Dictionary::Ptr status;
BOOST_FOREACH(const ClusterListener::Ptr& cluster_listener, DynamicType::GetObjects<ClusterListener>()) {
String identity = cluster_listener->GetIdentity();
BOOST_FOREACH(const Endpoint::Ptr& endpoint, DynamicType::GetObjects<Endpoint>()) {
count_endpoints++;
if(!endpoint->IsConnected() && endpoint->GetName() != identity)
not_connected_endpoints.push_back(endpoint->GetName());
else if(endpoint->IsConnected() && endpoint->GetName() != identity)
connected_endpoints.push_back(endpoint->GetName());
}
/* XXX there's only one cluster listener */
status = cluster_listener->GetClusterStatus();
}
std::sort(not_connected_endpoints.begin(), not_connected_endpoints.end());
std::sort(connected_endpoints.begin(), connected_endpoints.end());
String connected_endpoints = FormatArray(status->Get("conn_endpoints"));
String not_connected_endpoints = FormatArray(status->Get("not_conn_endpoints"));
/* remove unneeded perfdata */
status->Set("conn_endpoints", Empty);
status->Set("not_conn_endpoints", Empty);
ServiceState state = StateOK;
String output = "Icinga 2 Cluster is running: Connected Endpoints: "+ Convert::ToString(connected_endpoints.size()) + " (" +
boost::algorithm::join(connected_endpoints, ",") + ").";
String output = "Icinga 2 Cluster is running: Connected Endpoints: "+ Convert::ToString(status->Get("num_conn_endpoints")) + " (" +
connected_endpoints + ").";
if (not_connected_endpoints.size() > 0) {
if (status->Get("num_not_conn_endpoints") > 0) {
state = StateCritical;
output = "Icinga 2 Cluster Problem: " + Convert::ToString(not_connected_endpoints.size()) +
" Endpoints (" + boost::algorithm::join(not_connected_endpoints, ",") + ") not connected.";
output = "Icinga 2 Cluster Problem: " + Convert::ToString(status->Get("num_not_conn_endpoints")) +
" Endpoints (" + not_connected_endpoints + ") not connected.";
}
Dictionary::Ptr perfdata = make_shared<Dictionary>();
perfdata->Set("num_endpoints", count_endpoints);
perfdata->Set("num_conn_endpoints", connected_endpoints.size());
perfdata->Set("num_not_conn_endpoints", not_connected_endpoints.size());
CheckResult::Ptr cr = make_shared<CheckResult>();
cr->SetOutput(output);
cr->SetPerformanceData(perfdata);
cr->SetPerformanceData(status);
cr->SetState(state);
cr->SetCheckSource(IcingaApplication::GetInstance()->GetNodeName());
return cr;
}
String ClusterCheckTask::FormatArray(const Array::Ptr& arr)
{
bool first = true;
String str;
if (arr) {
ObjectLock olock(arr);
BOOST_FOREACH(const Value& value, arr) {
if (first)
first = false;
else
str += ",";
str += Convert::ToString(value);
}
}
return str;
}

View File

@ -37,6 +37,7 @@ public:
private:
ClusterCheckTask(void);
static String FormatArray(const Array::Ptr& arr);
};
}

View File

@ -19,6 +19,7 @@
#include "cluster/clusterlistener.h"
#include "cluster/endpoint.h"
#include "icinga/cib.h"
#include "icinga/domain.h"
#include "icinga/icingaapplication.h"
#include "base/netstring.h"
@ -119,6 +120,12 @@ void ClusterListener::Start(void)
}
}
}
m_StatusTimer = make_shared<Timer>();
m_StatusTimer->SetInterval(GetStatusUpdateInterval());
m_StatusTimer->OnTimerExpired.connect(boost::bind(&ClusterListener::StatusTimerHandler, this));
m_StatusTimer->Start();
m_StatusTimer->Reschedule(0);
}
/**
@ -1558,3 +1565,141 @@ bool ClusterListener::SupportsNotifications(void)
return !type->GetObjects().empty() && IcingaApplication::GetInstance()->GetEnableNotifications();
}
bool ClusterListener::SupportsFeature(const String& name)
{
DynamicType::Ptr type = DynamicType::GetByName(name);
if (!type)
return false;
return !type->GetObjects().empty();
}
void ClusterListener::StatusTimerHandler(void)
{
Log(LogInformation, "cluster", "Writing cluster.json file");
String statuspath = GetStatusPath();
String statuspathtmp = statuspath + ".tmp"; /* XXX make this a global definition */
std::ofstream statusfp;
statusfp.open(statuspathtmp.CStr(), std::ofstream::out | std::ofstream::trunc);
statusfp << std::fixed;
statusfp << JsonSerialize(GetClusterStatus());
statusfp.close();
#ifdef _WIN32
_unlink(statuspath.CStr());
#endif /* _WIN32 */
if (rename(statuspathtmp.CStr(), statuspath.CStr()) < 0) {
BOOST_THROW_EXCEPTION(posix_error()
<< boost::errinfo_api_function("rename")
<< boost::errinfo_errno(errno)
<< boost::errinfo_file_name(statuspathtmp));
}
Log(LogInformation, "cluster", "Finished writing cluster.json file");
}
Dictionary::Ptr ClusterListener::GetClusterStatus(void)
{
Dictionary::Ptr bag = make_shared<Dictionary>();
/* cluster stats */
bag->Set("node", IcingaApplication::GetInstance()->GetNodeName());
bag->Set("identity", GetIdentity());
double count_endpoints = 0;
Array::Ptr not_connected_endpoints = make_shared<Array>();
Array::Ptr connected_endpoints = make_shared<Array>();
BOOST_FOREACH(const Endpoint::Ptr& endpoint, DynamicType::GetObjects<Endpoint>()) {
count_endpoints++;
if(!endpoint->IsConnected() && endpoint->GetName() != GetIdentity())
not_connected_endpoints->Add(endpoint->GetName());
else if(endpoint->IsConnected() && endpoint->GetName() != GetIdentity())
connected_endpoints->Add(endpoint->GetName());
}
std::sort(not_connected_endpoints->Begin(), not_connected_endpoints->End());
std::sort(connected_endpoints->Begin(), connected_endpoints->End());
bag->Set("num_endpoints", count_endpoints);
bag->Set("num_conn_endpoints", connected_endpoints->GetLength());
bag->Set("num_not_conn_endpoints", not_connected_endpoints->GetLength());
bag->Set("conn_endpoints", connected_endpoints);
bag->Set("not_conn_endpoints", not_connected_endpoints);
/* features */
bag->Set("feature_CheckerComponent", SupportsChecks() ? 1 : 0);
bag->Set("feature_NotificationComponent", SupportsNotifications() ? 1 : 0);
/* XXX find a more generic way of getting features as a list */
bag->Set("feature_IdoMysqlConnection", SupportsFeature("IdoMysqlConnection") ? 1 : 0);
bag->Set("feature_IdoPgsqlConnection", SupportsFeature("IdoPgsqlConnection") ? 1 : 0);
bag->Set("feature_StatusDataWriter", SupportsFeature("StatusDataWriter") ? 1 : 0);
bag->Set("feature_CompatLogger", SupportsFeature("CompatLogger") ? 1 : 0);
bag->Set("feature_ExternalCommandListener", SupportsFeature("ExternalCommandListener") ? 1 : 0);
bag->Set("feature_CheckResultReader", SupportsFeature("CheckResultReader") ? 1 : 0);
bag->Set("feature_LivestatusListener", SupportsFeature("LivestatusListener") ? 1 : 0);
bag->Set("feature_GraphiteWriter", SupportsFeature("GraphiteWriter") ? 1 : 0);
bag->Set("feature_PerfdataWriter", SupportsFeature("PerfdataWriter") ? 1 : 0);
bag->Set("feature_FileLogger", SupportsFeature("FileLogger") ? 1 : 0);
bag->Set("feature_SyslogLogger", SupportsFeature("SyslogLogger") ? 1 : 0);
/* icinga stats */
double interval = Utility::GetTime() - Application::GetStartTime();
if (interval > 60)
interval = 60;
bag->Set("active_checks", CIB::GetActiveChecksStatistics(interval) / interval);
bag->Set("passive_checks", CIB::GetPassiveChecksStatistics(interval) / interval);
bag->Set("active_checks_1min", CIB::GetActiveChecksStatistics(60));
bag->Set("passive_checks_1min", CIB::GetPassiveChecksStatistics(60));
bag->Set("active_checks_5min", CIB::GetActiveChecksStatistics(60 * 5));
bag->Set("passive_checks_5min", CIB::GetPassiveChecksStatistics(60 * 5));
bag->Set("active_checks_15min", CIB::GetActiveChecksStatistics(60 * 15));
bag->Set("passive_checks_15min", CIB::GetPassiveChecksStatistics(60 * 15));
ServiceCheckStatistics scs = CIB::CalculateServiceCheckStats();
bag->Set("min_latency", scs.min_latency);
bag->Set("max_latency", scs.max_latency);
bag->Set("avg_latency", scs.avg_latency);
bag->Set("min_execution_time", scs.min_latency);
bag->Set("max_execution_time", scs.max_latency);
bag->Set("avg_execution_time", scs.avg_execution_time);
ServiceStatistics ss = CIB::CalculateServiceStats();
bag->Set("num_services_ok", ss.services_ok);
bag->Set("num_services_warning", ss.services_warning);
bag->Set("num_services_critical", ss.services_critical);
bag->Set("num_services_unknown", ss.services_unknown);
bag->Set("num_services_pending", ss.services_pending);
bag->Set("num_services_unreachable", ss.services_unreachable);
bag->Set("num_services_flapping", ss.services_flapping);
bag->Set("num_services_in_downtime", ss.services_in_downtime);
bag->Set("num_services_acknowledged", ss.services_acknowledged);
HostStatistics hs = CIB::CalculateHostStats();
bag->Set("num_hosts_up", hs.hosts_up);
bag->Set("num_hosts_down", hs.hosts_down);
bag->Set("num_hosts_unreachable", hs.hosts_unreachable);
bag->Set("num_hosts_flapping", hs.hosts_flapping);
bag->Set("num_hosts_in_downtime", hs.hosts_in_downtime);
bag->Set("num_hosts_acknowledged", hs.hosts_acknowledged);
return bag;
}

View File

@ -51,6 +51,8 @@ public:
shared_ptr<SSL_CTX> GetSSLContext(void) const;
String GetClusterDir(void) const;
Dictionary::Ptr GetClusterStatus(void);
private:
shared_ptr<SSL_CTX> m_SSLContext;
@ -61,6 +63,9 @@ private:
Timer::Ptr m_ClusterTimer;
void ClusterTimerHandler(void);
Timer::Ptr m_StatusTimer;
void StatusTimerHandler(void);
std::set<TcpSocket::Ptr> m_Servers;
void AddListener(const String& service);
@ -107,6 +112,7 @@ private:
static bool SupportsChecks(void);
static bool SupportsNotifications(void);
static bool SupportsFeature(const String& name);
void SetSecurityInfo(const Dictionary::Ptr& message, const DynamicObject::Ptr& object, int privs);

View File

@ -1,4 +1,5 @@
#include "base/dynamicobject.h"
#include "base/application.h"
namespace icinga
{
@ -14,6 +15,12 @@ class ClusterListener : DynamicObject
[config] Array::Ptr peers;
[state] double log_message_timestamp;
String identity;
[config] String status_path {
default {{{ return Application::GetLocalStateDir() + "/cache/icinga2/cluster/cluster.json"; }}}
};
[config] double status_update_interval {
default {{{ return 15; }}}
};
};
}

View File

@ -851,15 +851,17 @@ Example:
Attributes:
Name |Description
----------------|----------------
cert\_path |**Required.** Path to the public key.
key\_path |**Required.** Path to the private key.
ca\_path |**Required.** Path to the CA certificate file.
crl\_path |**Optional.** Path to the CRL file.
bind\_host |**Optional.** The IP address the cluster listener should be bound to.
bind\_port |**Optional.** The port the cluster listener should be bound to.
peers |**Optional.** A list of
Name |Description
--------------------------|--------------------------
cert\_path |**Required.** Path to the public key.
key\_path |**Required.** Path to the private key.
ca\_path |**Required.** Path to the CA certificate file.
crl\_path |**Optional.** Path to the CRL file.
bind\_host |**Optional.** The IP address the cluster listener should be bound to.
bind\_port |**Optional.** The port the cluster listener should be bound to.
peers |**Optional.** A list of
status\_path |**Optional.** Path to cluster status file. Defaults to IcingaLocalStateDir + "/cache/icinga2/cluster/cluster.json"
status\_update\_interval |**Optional.** The interval in which the status files are updated. Defaults to 15 seconds.
### <a id="objecttype-endpoint"></a> Endpoint