Add Cluster Health Check.

Fixes #5438
This commit is contained in:
Michael Friedrich 2014-02-07 14:03:53 +01:00
parent 7d9dc1a36b
commit e534f9bc1b
8 changed files with 171 additions and 5 deletions

View File

@ -273,6 +273,7 @@ You can find the state file in `/var/lib/icinga2/icinga2.state`. Before copying
the state file you should make sure that all your cluster nodes are properly shut
down.
### <a id="assign-services-to-cluster-nodes"></a> Assign Services to Cluster Nodes
By default all services are distributed among the cluster nodes with the `Checker`
@ -295,6 +296,29 @@ attribute. Required Endpoints must be defined as array.
> services based on their location, inheriting from a global service template
> defining the authorities.
### <a id="cluster-health-check"></a> Cluster Health Check
The Icinga 2 [ITL](#itl) ships an internal check command checking all configured
`EndPoints` in the cluster setup. The check result will become critical if
one or more configured nodes are not connected.
Example:
object Host "icinga2a" inherits "generic-host" {
services["cluster"] = {
templates = [ "generic-service" ],
check_interval = 1m,
check_command = "cluster",
authorities = [ "icinga2a" ]
},
}
> **Note**
>
> Each cluster node should execute its own local cluster health check to
> get an idea about network related connection problems from different
> point of views. Use the `authorities` attribute to assign the service
> check to the configured node.
## <a id="dependencies"></a> Dependencies

View File

@ -238,3 +238,6 @@ object CheckCommand "snmp-uptime" inherits "snmp" {
object CheckCommand "icinga" inherits "icinga-check-command" {
}
object CheckCommand "cluster" inherits "cluster-check-command" {
}

View File

@ -25,6 +25,12 @@ template CheckCommand "icinga-check-command" {
}
}
template CheckCommand "cluster-check-command" {
methods = {
execute = "ClusterCheck"
}
}
template CheckCommand "plugin-check-command" {
methods = {
execute = "PluginCheck"

View File

@ -765,8 +765,8 @@ void Application::MakeVariablesConstant(void)
ScriptVariable::GetByName("IcingaSysconfDir")->SetConstant(true);
ScriptVariable::GetByName("IcingaLocalStateDir")->SetConstant(true);
ScriptVariable::GetByName("IcingaPkgDataDir")->SetConstant(true);
ScriptVariable::GetByName("IcingaStatePath")->SetConstant(true);
ScriptVariable::GetByName("IcingaPidPath")->SetConstant(true);
ScriptVariable::GetByName("IcingaStatePath")->SetConstant(false);
ScriptVariable::GetByName("IcingaPidPath")->SetConstant(false);
ScriptVariable::GetByName("ApplicationType")->SetConstant(true);
}

View File

@ -96,7 +96,7 @@ Dictionary::Ptr IcingaApplication::GetMacros(void) const
String IcingaApplication::GetNodeName(void) const
{
return ScriptVariable::Get("IcingaNodeName");
return ScriptVariable::Get("IcingaNodeName");
}
bool IcingaApplication::ResolveMacro(const String& macro, const CheckResult::Ptr&, String *result) const

View File

@ -16,12 +16,12 @@
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
add_library(methods SHARED
icingachecktask.cpp nullchecktask.cpp nulleventtask.cpp
clusterchecktask.cpp icingachecktask.cpp nullchecktask.cpp nulleventtask.cpp
pluginchecktask.cpp plugineventtask.cpp pluginnotificationtask.cpp
randomchecktask.cpp timeperiodtask.cpp
)
target_link_libraries(methods ${Boost_LIBRARIES} base config icinga)
target_link_libraries(methods ${Boost_LIBRARIES} base config icinga cluster)
set_target_properties (
methods PROPERTIES

View File

@ -0,0 +1,88 @@
/******************************************************************************
* Icinga 2 *
* Copyright (C) 2012-present Icinga Development Team (http://www.icinga.org) *
* *
* This program is free software; you can redistribute it and/or *
* modify it under the terms of the GNU General Public License *
* as published by the Free Software Foundation; either version 2 *
* of the License, or (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the Free Software Foundation *
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. *
******************************************************************************/
#include "methods/clusterchecktask.h"
#include "cluster/endpoint.h"
#include "cluster/clusterlistener.h"
#include "icinga/cib.h"
#include "icinga/service.h"
#include "icinga/icingaapplication.h"
#include "base/application.h"
#include "base/objectlock.h"
#include "base/convert.h"
#include "base/utility.h"
#include "base/scriptfunction.h"
#include "base/dynamictype.h"
#include <boost/algorithm/string/join.hpp>
using namespace icinga;
REGISTER_SCRIPTFUNCTION(ClusterCheck, &ClusterCheckTask::ScriptFunc);
CheckResult::Ptr ClusterCheckTask::ScriptFunc(const Service::Ptr&)
{
double interval = Utility::GetTime() - Application::GetStartTime();
if (interval > 60)
interval = 60;
double count_endpoints = 0;
std::vector<String> not_connected_endpoints;
std::vector<String> connected_endpoints;
BOOST_FOREACH(const ClusterListener::Ptr& cluster_listener, DynamicType::GetObjects<ClusterListener>()) {
String identity = cluster_listener->GetIdentity();
BOOST_FOREACH(const Endpoint::Ptr& endpoint, DynamicType::GetObjects<Endpoint>()) {
count_endpoints++;
if(!endpoint->IsConnected() && endpoint->GetName() != identity)
not_connected_endpoints.push_back(endpoint->GetName());
else if(endpoint->IsConnected() && endpoint->GetName() != identity)
connected_endpoints.push_back(endpoint->GetName());
}
}
std::sort(not_connected_endpoints.begin(), not_connected_endpoints.end());
std::sort(connected_endpoints.begin(), connected_endpoints.end());
ServiceState state = StateOK;
String output = "Icinga 2 Cluster is running: Connected Endpoints: "+ Convert::ToString(connected_endpoints.size()) + " (" +
boost::algorithm::join(connected_endpoints, ",") + ").";
if (not_connected_endpoints.size() > 0) {
state = StateCritical;
output = "Icinga 2 Cluster Problem: " + Convert::ToString(not_connected_endpoints.size()) +
" Endpoints (" + boost::algorithm::join(not_connected_endpoints, ",") + ") not connected.";
}
Dictionary::Ptr perfdata = make_shared<Dictionary>();
perfdata->Set("num_endpoints", count_endpoints);
perfdata->Set("num_conn_endpoints", connected_endpoints.size());
perfdata->Set("num_not_conn_endpoints", not_connected_endpoints.size());
CheckResult::Ptr cr = make_shared<CheckResult>();
cr->SetOutput(output);
cr->SetPerformanceData(perfdata);
cr->SetState(state);
cr->SetCheckSource(IcingaApplication::GetInstance()->GetNodeName());
return cr;
}

View File

@ -0,0 +1,45 @@
/******************************************************************************
* Icinga 2 *
* Copyright (C) 2012-present Icinga Development Team (http://www.icinga.org) *
* *
* This program is free software; you can redistribute it and/or *
* modify it under the terms of the GNU General Public License *
* as published by the Free Software Foundation; either version 2 *
* of the License, or (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the Free Software Foundation *
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. *
******************************************************************************/
#ifndef CLUSTERCHECKTASK_H
#define CLUSTERCHECKTASK_H
#include "methods/i2-methods.h"
#include "icinga/service.h"
namespace icinga
{
/**
* Cluster check type.
*
* @ingroup methods
*/
class I2_METHODS_API ClusterCheckTask
{
public:
static CheckResult::Ptr ScriptFunc(const Service::Ptr& service);
private:
ClusterCheckTask(void);
};
}
#endif /* CLUSTERCHECKTASK_H */