Implemented service dependencies.

This commit is contained in:
Gunnar Beutner 2012-07-03 14:18:46 +02:00
parent d577406a66
commit 734ec2f5fb
11 changed files with 269 additions and 43 deletions

View File

@ -4,6 +4,7 @@ using namespace icinga;
int CIB::m_Types;
Ringbuffer CIB::m_TaskStatistics(15 * 60);
boost::signal<void (const ServiceStatusMessage&)> CIB::OnServiceStatusUpdate;
void CIB::RequireInformation(InformationType types)
{

View File

@ -20,6 +20,8 @@ public:
static void UpdateTaskStatistics(long tv, int num);
static int GetTaskStatistics(long timespan);
static boost::signal<void (const ServiceStatusMessage&)> OnServiceStatusUpdate;
private:
static int m_Types;

View File

@ -22,8 +22,6 @@
#include "service.h"
#include "servicegroup.h"
#include "cib.h"
#include "macroprocessor.h"
#include "checkresult.h"
#include "checktask.h"
@ -31,4 +29,6 @@
#include "servicestatusmessage.h"
#include "cib.h"
#endif /* I2CIB_H */

View File

@ -214,6 +214,12 @@ bool NagiosCheckTask::RunTask(void)
exitcode = WEXITSTATUS(status);
#else /* _MSC_VER */
exitcode = status;
/* cmd.exe returns error code 1 (warning) when the plugin
* could not be executed - change the exit status to "unknown"
* when we have no plugin output. */
if (output.empty())
exitcode = 128;
#endif /* _MSC_VER */
ServiceState state;

View File

@ -2,6 +2,8 @@
using namespace icinga;
bool Service::m_DependencyCacheValid = false;
string Service::GetAlias(void) const
{
string value;
@ -105,6 +107,142 @@ Dictionary::Ptr Service::GetCheckers(void) const
return value;
}
vector<Service> Service::GetParents(void) const
{
vector<Service> parents;
Dictionary::Ptr dependencies = GetDependencies();
if (dependencies) {
Dictionary::Iterator it;
for (it = dependencies->Begin(); it != dependencies->End(); it++)
parents.push_back(Service::GetByName(it->second));
}
return parents;
}
vector<Service> Service::GetChildren(void) const
{
vector<Service> children;
UpdateDependencyCache();
Dictionary::Ptr childrenCache;
GetConfigObject()->GetTag("dependency_children", &childrenCache);
if (childrenCache) {
Dictionary::Iterator it;
for (it = childrenCache->Begin(); it != childrenCache->End(); it++)
children.push_back(Service::GetByName(it->second));
}
return children;
}
void Service::UpdateDependencyCache(void)
{
static long cacheTx = 0;
if (m_DependencyCacheValid)
return;
cacheTx++;
ConfigObject::TMap::Range range = ConfigObject::GetObjects("service");
ConfigObject::TMap::Iterator it;
for (it = range.first; it != range.second; it++) {
Service child = it->second;
vector<Service> parents = child.GetParents();
vector<Service>::iterator st;
for (st = parents.begin(); st != parents.end(); st++) {
Service parent = *st;
long tx = 0;
parent.GetConfigObject()->GetTag("dependency_cache_tx", &tx);
Dictionary::Ptr children;
/* rather than resetting the dependency dictionary in a separate loop we use the cache_tx
* tag to check if the dictionary is from this cache update run. */
if (tx != cacheTx) {
children = boost::make_shared<Dictionary>();
parent.GetConfigObject()->SetTag("dependency_children", children);
parent.GetConfigObject()->SetTag("dependency_cache_tx", cacheTx);
} else {
parent.GetConfigObject()->GetTag("dependency_children", &children);
assert(children);
}
children->AddUnnamedProperty(child.GetName());
}
}
m_DependencyCacheValid = true;
}
void Service::InvalidateDependencyCache(void)
{
m_DependencyCacheValid = false;
}
ServiceStatusMessage Service::CalculateCombinedStatus(ServiceStatusMessage *input, const vector<Service>& parents)
{
vector<Service> failedServices;
time_t nextCheck = -1;
time_t lastChange = -1;
vector<Service>::const_iterator it;
for (it = parents.begin(); it != parents.end(); it++) {
Service parent = *it;
if (parent.GetState() != StateOK && parent.GetState() != StateWarning)
failedServices.push_back(parent);
if (lastChange == -1 || parent.GetLastStateChange() > lastChange)
lastChange = parent.GetLastStateChange();
if (nextCheck == -1 || parent.GetNextCheck() < nextCheck)
nextCheck = parent.GetNextCheck();
}
string message;
ServiceState state;
if (failedServices.empty()) {
if (input)
return *input;
state = StateOK;
message = "Dependant services are available.";
} else {
state = StateUnreachable;
message = "One or more dependant services have failed.";
}
ServiceStatusMessage result;
result.SetState(state);
result.SetStateType(StateTypeHard);
result.SetCurrentCheckAttempt(1);
result.SetNextCheck(nextCheck);
time_t now;
time(&now);
CheckResult cr;
cr.SetScheduleStart(now);
cr.SetScheduleEnd(now);
cr.SetExecutionStart(now);
cr.SetExecutionEnd(now);
cr.SetOutput(message);
cr.SetState(state);
result.SetCheckResult(cr);
return result;
}
void Service::SetNextCheck(time_t nextCheck)
{
GetConfigObject()->SetTag("next_check", (long)nextCheck);
@ -179,16 +317,23 @@ ServiceStateType Service::GetStateType(void) const
return static_cast<ServiceStateType>(value);
}
void Service::SetLastCheckResult(const Dictionary::Ptr& result)
void Service::SetLastCheckResult(const CheckResult& result)
{
GetConfigObject()->SetTag("last_result", result);
GetConfigObject()->SetTag("last_result", result.GetDictionary());
}
Dictionary::Ptr Service::GetLastCheckResult(void) const
bool Service::HasLastCheckResult(void) const
{
Dictionary::Ptr value;
GetConfigObject()->GetTag("last_result", &value);
return value;
return GetConfigObject()->GetTag("last_result", &value) && value;
}
CheckResult Service::GetLastCheckResult(void) const
{
Dictionary::Ptr value;
if (!GetConfigObject()->GetTag("last_result", &value))
throw invalid_argument("Service has no last check result.");
return CheckResult(value);
}
void Service::SetLastStateChange(time_t ts)
@ -315,4 +460,3 @@ bool Service::IsAllowedChecker(const string& checker) const
return false;
}

View File

@ -21,6 +21,7 @@ enum ServiceStateType
};
class CheckResult;
class ServiceStatusMessage;
class I2_CIB_API Service : public ConfigObjectAdapter
{
@ -44,6 +45,13 @@ public:
Dictionary::Ptr GetGroups(void) const;
Dictionary::Ptr GetCheckers(void) const;
vector<Service> GetParents(void) const;
vector<Service> GetChildren(void) const;
static void UpdateDependencyCache(void);
static void InvalidateDependencyCache(void);
static ServiceStatusMessage CalculateCombinedStatus(ServiceStatusMessage *input, const vector<Service>& parents);
void SetNextCheck(time_t nextCheck);
time_t GetNextCheck(void);
void UpdateNextCheck(void);
@ -62,8 +70,9 @@ public:
void SetStateType(ServiceStateType type);
ServiceStateType GetStateType(void) const;
void SetLastCheckResult(const Dictionary::Ptr& result);
Dictionary::Ptr GetLastCheckResult(void) const;
bool HasLastCheckResult(void) const;
void SetLastCheckResult(const CheckResult& result);
CheckResult GetLastCheckResult(void) const;
void SetLastStateChange(time_t ts);
time_t GetLastStateChange(void) const;
@ -78,6 +87,9 @@ public:
static ServiceStateType StateTypeFromString(const string& state);
static string StateTypeToString(ServiceStateType state);
private:
static bool m_DependencyCacheValid;
};
}

View File

@ -79,25 +79,27 @@ void CIBSyncComponent::Stop(void)
void CIBSyncComponent::ServiceStatusRequestHandler(const Endpoint::Ptr& sender, const RequestMessage& request)
{
MessagePart params;
ServiceStatusMessage params;
if (!request.GetParams(&params))
return;
CIB::OnServiceStatusUpdate(params);
string svcname;
if (!params.GetProperty("service", &svcname))
if (!params.GetService(&svcname))
return;
Service service = Service::GetByName(svcname);
long nextCheck;
if (params.GetProperty("next_check", &nextCheck))
time_t nextCheck;
if (params.GetNextCheck(&nextCheck))
service.SetNextCheck(nextCheck);
long state, stateType;
if (params.GetProperty("state", &state) && params.GetProperty("state_type", &stateType)) {
long old_state, old_stateType;
old_state = service.GetState();
old_stateType = service.GetStateType();
ServiceState state;
ServiceStateType stateType;
if (params.GetState(&state) && params.GetStateType(&stateType)) {
ServiceState old_state = service.GetState();
ServiceStateType old_stateType = service.GetStateType();
if (state != old_state) {
time_t now;
@ -109,16 +111,16 @@ void CIBSyncComponent::ServiceStatusRequestHandler(const Endpoint::Ptr& sender,
service.SetLastHardStateChange(now);
}
service.SetState(static_cast<ServiceState>(state));
service.SetStateType(static_cast<ServiceStateType>(stateType));
service.SetState(state);
service.SetStateType(stateType);
}
long attempt;
if (params.GetProperty("current_attempt", &attempt))
if (params.GetCurrentCheckAttempt(&attempt))
service.SetCurrentCheckAttempt(attempt);
Dictionary::Ptr cr;
if (params.GetProperty("result", &cr))
CheckResult cr;
if (params.GetCheckResult(&cr))
service.SetLastCheckResult(cr);
time_t now;

View File

@ -94,24 +94,22 @@ void CompatComponent::DumpHostObject(ofstream& fp, Host host)
void CompatComponent::DumpServiceStatus(ofstream& fp, Service service)
{
Dictionary::Ptr cr;
cr = service.GetLastCheckResult();
string output;
string perfdata;
long schedule_start = -1, schedule_end = -1;
long execution_start = -1, execution_end = -1;
if (cr) {
cr->GetProperty("output", &output);
cr->GetProperty("schedule_start", &schedule_start);
cr->GetProperty("schedule_end", &schedule_end);
cr->GetProperty("execution_start", &execution_start);
cr->GetProperty("execution_end", &execution_end);
cr->GetProperty("performance_data_raw", &perfdata);
time_t schedule_start = -1, schedule_end = -1;
time_t execution_start = -1, execution_end = -1;
if (service.HasLastCheckResult()) {
CheckResult cr = service.GetLastCheckResult();
output = cr.GetOutput();
schedule_start = cr.GetScheduleStart();
schedule_end = cr.GetScheduleEnd();
execution_start = cr.GetExecutionStart();
execution_end = cr.GetExecutionEnd();
perfdata = cr.GetPerformanceDataRaw();
}
long execution_time = (execution_end - execution_start);
long latency = (schedule_end - schedule_start) - execution_time;
time_t execution_time = (execution_end - execution_start);
time_t latency = (schedule_end - schedule_start) - execution_time;
int state = service.GetState();
@ -123,7 +121,7 @@ void CompatComponent::DumpServiceStatus(ofstream& fp, Service service)
<< "\t" << "service_description=" << service.GetAlias() << "\n"
<< "\t" << "check_interval=" << service.GetCheckInterval() / 60.0 << "\n"
<< "\t" << "retry_interval=" << service.GetRetryInterval() / 60.0 << "\n"
<< "\t" << "has_been_checked=" << (cr ? 1 : 0) << "\n"
<< "\t" << "has_been_checked=" << (service.HasLastCheckResult() ? 1 : 0) << "\n"
<< "\t" << "should_be_scheduled=1" << "\n"
<< "\t" << "check_execution_time=" << execution_time << "\n"
<< "\t" << "check_latency=" << latency << "\n"

View File

@ -297,7 +297,7 @@ void DelegationComponent::CheckResultRequestHandler(const Endpoint::Ptr& sender,
return;
string svcname;
if (params.GetService(&svcname))
if (!params.GetService(&svcname))
return;
Service service = Service::GetByName(svcname);
@ -306,12 +306,39 @@ void DelegationComponent::CheckResultRequestHandler(const Endpoint::Ptr& sender,
if (!service.IsAllowedChecker(sender->GetIdentity()))
return;
/* TODO: send state update for dependant services */
vector<Service> children = service.GetChildren();
vector<Service>::iterator it;
for (it = children.begin(); it != children.end(); it++) {
Service child = *it;
vector<Service> affectedServices = child.GetParents();
affectedServices.push_back(child);
ServiceStatusMessage statusmsg = Service::CalculateCombinedStatus(NULL, affectedServices);
statusmsg.SetService(child.GetName());
ServiceState state = StateUnreachable;
statusmsg.GetState(&state);
if (child.GetState() == StateUnreachable || state == StateUnreachable) {
RequestMessage rm;
rm.SetMethod("delegation::ServiceStatus");
rm.SetParams(statusmsg);
EndpointManager::GetInstance()->SendMulticastMessage(m_Endpoint, rm);
}
}
/* send state update */
RequestMessage rm;
rm.SetMethod("delegation::ServiceStatus");
rm.SetParams(params);
vector<Service> parents = service.GetParents();
ServiceStatusMessage statusmsg = Service::CalculateCombinedStatus(&params, parents);
statusmsg.SetService(service.GetName());
rm.SetParams(statusmsg);
EndpointManager::GetInstance()->SendMulticastMessage(m_Endpoint, rm);
}

View File

@ -1,4 +1,12 @@
local object application "icinga" {
cert = "icinga-c1.pem",
ca = "ca.crt",
node = "192.168.2.235",
service = 7777
}
local object component "discovery" {
}
@ -10,6 +18,23 @@ local object component "delegation" {
}
local object endpoint "icinga-c2" {
roles = { "all" }
}
local object endpoint "icinga-c3" {
roles = { "all" }
}
local object endpoint "icinga-c4" {
roles = { "all" }
}
local object role "all" {
publications = { "*" },
subscriptions = { "*" }
}
object host "localhost" {
}
@ -41,5 +66,7 @@ object service "localhost-ping2" inherits "ping" {
macros += {
address = "localhost"
}
},
dependencies = { "localhost-ping1" }
}

View File

@ -41,6 +41,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "demo", "components\demo\dem
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "discovery", "components\discovery\discovery.vcxproj", "{EAD41628-BB96-4F99-9070-8A9676801295}"
ProjectSection(ProjectDependencies) = postProject
{538D7F53-A6A0-459A-AE4F-70DB135BC9AF} = {538D7F53-A6A0-459A-AE4F-70DB135BC9AF}
{C1FC77E1-04A4-481B-A78B-2F7AF489C2F8} = {C1FC77E1-04A4-481B-A78B-2F7AF489C2F8}
EndProjectSection
EndProject
@ -78,6 +79,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cibsync", "components\cibsy
{C1FC77E1-04A4-481B-A78B-2F7AF489C2F8} = {C1FC77E1-04A4-481B-A78B-2F7AF489C2F8}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "compat", "components\compat\compat.vcxproj", "{2BD1C70C-43DB-4F44-B66B-67CF5C7044AA}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
@ -140,6 +143,10 @@ Global
{704DDD8E-9E6D-4C22-80BD-6DE10F3A5E1C}.Debug|Win32.Build.0 = Debug|Win32
{704DDD8E-9E6D-4C22-80BD-6DE10F3A5E1C}.Release|Win32.ActiveCfg = Release|Win32
{704DDD8E-9E6D-4C22-80BD-6DE10F3A5E1C}.Release|Win32.Build.0 = Release|Win32
{2BD1C70C-43DB-4F44-B66B-67CF5C7044AA}.Debug|Win32.ActiveCfg = Debug|Win32
{2BD1C70C-43DB-4F44-B66B-67CF5C7044AA}.Debug|Win32.Build.0 = Debug|Win32
{2BD1C70C-43DB-4F44-B66B-67CF5C7044AA}.Release|Win32.ActiveCfg = Release|Win32
{2BD1C70C-43DB-4F44-B66B-67CF5C7044AA}.Release|Win32.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE