icinga2/lib/icinga/service-check.cpp

756 lines
16 KiB
C++
Raw Normal View History

/******************************************************************************
* Icinga 2 *
* Copyright (C) 2012 Icinga Development Team (http://www.icinga.org/) *
* *
* This program is free software; you can redistribute it and/or *
* modify it under the terms of the GNU General Public License *
* as published by the Free Software Foundation; either version 2 *
* of the License, or (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the Free Software Foundation *
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. *
******************************************************************************/
#include "i2-icinga.h"
using namespace icinga;
const int Service::DefaultMaxCheckAttempts = 3;
2013-03-07 12:04:20 +01:00
const double Service::DefaultCheckInterval = 5 * 60;
const double Service::CheckIntervalDivisor = 5.0;
2013-03-04 15:52:42 +01:00
signals2::signal<void (const Service::Ptr&)> Service::OnCheckerChanged;
signals2::signal<void (const Service::Ptr&)> Service::OnNextCheckChanged;
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
Value Service::GetCheckCommand(void) const
{
2013-02-26 10:13:54 +01:00
return m_CheckCommand;
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
long Service::GetMaxCheckAttempts(void) const
{
2013-02-26 10:13:54 +01:00
if (m_MaxCheckAttempts.IsEmpty())
return DefaultMaxCheckAttempts;
2013-02-26 10:13:54 +01:00
return m_MaxCheckAttempts;
}
2013-03-13 16:04:53 +01:00
/**
* @threadsafety Always.
*/
TimePeriod::Ptr Service::GetCheckPeriod(void) const
{
return TimePeriod::GetByName(m_CheckPeriod);
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
double Service::GetCheckInterval(void) const
{
2013-02-26 10:13:54 +01:00
if (m_CheckInterval.IsEmpty())
return DefaultCheckInterval;
2013-02-26 10:13:54 +01:00
return m_CheckInterval;
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
double Service::GetRetryInterval(void) const
{
2013-02-26 10:13:54 +01:00
if (m_RetryInterval.IsEmpty())
return GetCheckInterval() / CheckIntervalDivisor;
2013-02-26 10:13:54 +01:00
return m_RetryInterval;
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
2013-03-14 12:17:46 +01:00
Array::Ptr Service::GetCheckers(void) const
{
2013-02-26 10:13:54 +01:00
return m_Checkers;
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
void Service::SetSchedulingOffset(long offset)
{
2013-02-26 10:13:54 +01:00
m_SchedulingOffset = offset;
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
long Service::GetSchedulingOffset(void)
{
2013-02-26 10:13:54 +01:00
return m_SchedulingOffset;
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
void Service::SetNextCheck(double nextCheck)
{
2013-02-26 10:13:54 +01:00
m_NextCheck = nextCheck;
Touch("next_check");
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
double Service::GetNextCheck(void)
{
2013-02-26 10:13:54 +01:00
return m_NextCheck;
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
void Service::UpdateNextCheck(void)
{
2013-03-02 09:07:47 +01:00
ObjectLock olock(this);
double interval;
if (GetStateType() == StateTypeSoft)
interval = GetRetryInterval();
else
interval = GetCheckInterval();
double now = Utility::GetTime();
double adj = 0;
if (interval > 1)
2013-03-07 09:48:00 +01:00
adj = fmod(now * 1000 + GetSchedulingOffset(), interval * 1000) / 1000.0;
SetNextCheck(now - adj + interval);
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
2013-02-26 10:13:54 +01:00
void Service::SetCurrentChecker(const String& checker)
{
2013-02-26 10:13:54 +01:00
m_CurrentChecker = checker;
Touch("current_checker");
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
2013-02-26 10:13:54 +01:00
String Service::GetCurrentChecker(void) const
{
2013-02-26 10:13:54 +01:00
return m_CurrentChecker;
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
void Service::SetCurrentCheckAttempt(long attempt)
{
2013-02-26 10:13:54 +01:00
m_CheckAttempt = attempt;
Touch("check_attempt");
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
long Service::GetCurrentCheckAttempt(void) const
{
2013-02-26 10:13:54 +01:00
if (m_CheckAttempt.IsEmpty())
return 1;
2013-02-26 10:13:54 +01:00
return m_CheckAttempt;
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
void Service::SetState(ServiceState state)
{
2013-02-26 10:13:54 +01:00
m_State = static_cast<long>(state);
Touch("state");
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
ServiceState Service::GetState(void) const
{
2013-02-26 10:13:54 +01:00
if (m_State.IsEmpty())
return StateUnknown;
2013-02-26 10:13:54 +01:00
int ivalue = static_cast<int>(m_State);
return static_cast<ServiceState>(ivalue);
}
2013-03-07 12:04:20 +01:00
void Service::SetLastState(ServiceState state)
{
m_LastState = static_cast<long>(state);
Touch("last_state");
}
ServiceState Service::GetLastState(void) const
{
if (m_LastState.IsEmpty())
return StateUnknown;
int ivalue = static_cast<int>(m_LastState);
return static_cast<ServiceState>(ivalue);
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
2013-03-07 12:04:20 +01:00
void Service::SetStateType(StateType type)
{
2013-02-26 10:13:54 +01:00
m_StateType = static_cast<long>(type);
Touch("state_type");
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
2013-03-07 12:04:20 +01:00
StateType Service::GetStateType(void) const
{
2013-02-26 10:13:54 +01:00
if (m_StateType.IsEmpty())
return StateTypeSoft;
2013-02-26 10:13:54 +01:00
int ivalue = static_cast<int>(m_StateType);
2013-03-07 12:04:20 +01:00
return static_cast<StateType>(ivalue);
}
/**
* @threadsafety Always.
*/
void Service::SetLastStateType(StateType type)
{
m_LastStateType = static_cast<long>(type);
Touch("last_state_type");
}
/**
* @threadsafety Always.
*/
StateType Service::GetLastStateType(void) const
{
if (m_LastStateType.IsEmpty())
return StateTypeSoft;
int ivalue = static_cast<int>(m_LastStateType);
return static_cast<StateType>(ivalue);
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
void Service::SetLastCheckResult(const Dictionary::Ptr& result)
{
2013-02-26 10:13:54 +01:00
m_LastResult = result;
Touch("last_result");
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
Dictionary::Ptr Service::GetLastCheckResult(void) const
{
2013-02-26 10:13:54 +01:00
return m_LastResult;
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
void Service::SetLastStateChange(double ts)
{
2013-02-26 10:13:54 +01:00
m_LastStateChange = ts;
Touch("last_state_change");
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
double Service::GetLastStateChange(void) const
{
2013-02-26 10:13:54 +01:00
if (m_LastStateChange.IsEmpty())
return IcingaApplication::GetInstance()->GetStartTime();
2013-02-26 10:13:54 +01:00
return m_LastStateChange;
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
void Service::SetLastHardStateChange(double ts)
{
2013-02-26 10:13:54 +01:00
m_LastHardStateChange = ts;
Touch("last_hard_state_change");
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
double Service::GetLastHardStateChange(void) const
{
2013-02-26 10:13:54 +01:00
if (m_LastHardStateChange.IsEmpty())
return IcingaApplication::GetInstance()->GetStartTime();
2013-02-26 10:13:54 +01:00
return m_LastHardStateChange;
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
bool Service::GetEnableActiveChecks(void) const
{
2013-02-26 10:13:54 +01:00
if (m_EnableActiveChecks.IsEmpty())
return true;
else
return m_EnableActiveChecks;
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
void Service::SetEnableActiveChecks(bool enabled)
{
2013-02-26 10:13:54 +01:00
m_EnableActiveChecks = enabled ? 1 : 0;
Touch("enable_active_checks");
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
bool Service::GetEnablePassiveChecks(void) const
{
2013-02-26 10:13:54 +01:00
if (m_EnablePassiveChecks.IsEmpty())
return true;
else
return m_EnablePassiveChecks;
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
void Service::SetEnablePassiveChecks(bool enabled)
{
2013-02-26 10:13:54 +01:00
m_EnablePassiveChecks = enabled ? 1 : 0;
Touch("enable_passive_checks");
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
bool Service::GetForceNextCheck(void) const
{
2013-02-26 10:13:54 +01:00
if (m_ForceNextCheck.IsEmpty())
return false;
2013-02-26 10:13:54 +01:00
return static_cast<bool>(m_ForceNextCheck);
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
void Service::SetForceNextCheck(bool forced)
{
2013-02-26 10:13:54 +01:00
m_ForceNextCheck = forced ? 1 : 0;
Touch("force_next_check");
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
void Service::ProcessCheckResult(const Dictionary::Ptr& cr)
{
2013-03-06 11:03:50 +01:00
bool reachable = IsReachable();
ASSERT(!OwnsLock());
2013-03-02 09:07:47 +01:00
ObjectLock olock(this);
ServiceState old_state = GetState();
2013-03-07 12:04:20 +01:00
StateType old_stateType = GetStateType();
2013-02-10 15:07:32 +01:00
bool hardChange = false;
2013-02-24 01:10:34 +01:00
bool recovery;
/* The BeginExecuteCheck function already sets the old state, but we need to do it again
* in case this was a passive check result. */
2013-03-07 12:04:20 +01:00
SetLastState(old_state);
SetLastStateType(old_stateType);
long attempt = GetCurrentCheckAttempt();
if (cr->Get("state") == StateOK) {
2013-02-27 16:57:06 +01:00
if (old_state != StateOK && old_stateType == StateTypeHard)
SetStateType(StateTypeSoft); // HARD NON-OK -> SOFT OK
if (old_state == StateOK && old_stateType == StateTypeSoft)
2013-02-27 16:57:06 +01:00
hardChange = true; // SOFT OK -> HARD OK
if (old_state == StateOK || old_stateType == StateTypeSoft)
2013-02-27 16:57:06 +01:00
SetStateType(StateTypeHard); // SOFT OK -> HARD OK or SOFT NON-OK -> HARD OK
attempt = 1;
2013-02-24 01:10:34 +01:00
recovery = true;
} else {
if (attempt >= GetMaxCheckAttempts()) {
SetStateType(StateTypeHard);
attempt = 1;
2013-02-10 15:07:32 +01:00
hardChange = true;
} else if (GetStateType() == StateTypeSoft || GetState() == StateOK) {
SetStateType(StateTypeSoft);
attempt++;
}
2013-02-24 01:10:34 +01:00
recovery = false;
}
SetCurrentCheckAttempt(attempt);
int state = cr->Get("state");
SetState(static_cast<ServiceState>(state));
double now = Utility::GetTime();
if (old_state != GetState()) {
SetLastStateChange(now);
/* remove acknowledgements */
if (GetAcknowledgement() == AcknowledgementNormal ||
(GetAcknowledgement() == AcknowledgementSticky && GetStateType() == StateTypeHard && GetState() == StateOK)) {
SetAcknowledgement(AcknowledgementNone);
SetAcknowledgementExpiry(0);
}
/* reschedule service dependencies */
2013-03-02 09:07:47 +01:00
BOOST_FOREACH(const Service::Ptr& parent, GetParentServices()) {
2013-03-04 15:52:42 +01:00
ObjectLock olock(parent);
parent->SetNextCheck(Utility::GetTime());
}
/* reschedule host dependencies */
2013-03-02 09:07:47 +01:00
BOOST_FOREACH(const Host::Ptr& parent, GetParentHosts()) {
Service::Ptr service = parent->GetHostCheckService();
2013-03-04 15:52:42 +01:00
if (service && service->GetName() != GetName()) {
ObjectLock olock(service);
service->SetNextCheck(Utility::GetTime());
}
}
}
2013-03-02 09:07:47 +01:00
if (hardChange)
SetLastHardStateChange(now);
if (GetState() != StateOK)
TriggerDowntimes();
2013-03-02 09:07:47 +01:00
Service::UpdateStatistics(cr);
2013-03-06 11:03:50 +01:00
bool send_notification = hardChange && reachable && !IsInDowntime() && !IsAcknowledged();
2013-03-02 09:07:47 +01:00
olock.Unlock();
/* Update macros - these are used by event handlers and notifications. */
cr->Set("macros", CalculateAllMacros());
cr->Seal();
olock.Lock();
SetLastCheckResult(cr);
olock.Unlock();
2013-03-02 09:07:47 +01:00
/* Flush the object so other instances see the service's
* new state when they receive the CheckResult message */
Flush();
RequestMessage rm;
rm.SetMethod("checker::CheckResult");
/* TODO: add _old_ state to message */
CheckResultMessage params;
params.SetService(GetName());
params.SetCheckResult(cr);
rm.SetParams(params);
EndpointManager::GetInstance()->SendMulticastMessage(rm);
2013-03-06 11:03:50 +01:00
if (send_notification)
2013-03-07 12:04:20 +01:00
RequestNotifications(recovery ? NotificationRecovery : NotificationProblem, cr);
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
ServiceState Service::StateFromString(const String& state)
{
2013-02-24 01:10:34 +01:00
if (state == "OK")
return StateOK;
2013-02-24 01:10:34 +01:00
else if (state == "WARNING")
return StateWarning;
2013-02-24 01:10:34 +01:00
else if (state == "CRITICAL")
return StateCritical;
2013-02-24 01:10:34 +01:00
else if (state == "UNCHECKABLE")
return StateUncheckable;
else
return StateUnknown;
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
String Service::StateToString(ServiceState state)
{
switch (state) {
case StateOK:
2013-02-24 01:10:34 +01:00
return "OK";
case StateWarning:
2013-02-24 01:10:34 +01:00
return "WARNING";
case StateCritical:
2013-02-24 01:10:34 +01:00
return "CRITICAL";
case StateUncheckable:
2013-02-24 01:10:34 +01:00
return "UNCHECKABLE";
case StateUnknown:
default:
2013-02-24 01:10:34 +01:00
return "UNKNOWN";
}
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
2013-03-07 12:04:20 +01:00
StateType Service::StateTypeFromString(const String& type)
{
2013-02-24 01:10:34 +01:00
if (type == "SOFT")
return StateTypeSoft;
else
return StateTypeHard;
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
2013-03-07 12:04:20 +01:00
String Service::StateTypeToString(StateType type)
{
if (type == StateTypeSoft)
2013-02-24 01:10:34 +01:00
return "SOFT";
else
2013-02-24 01:10:34 +01:00
return "HARD";
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
bool Service::IsAllowedChecker(const String& checker) const
{
2013-03-14 12:17:46 +01:00
Array::Ptr checkers = GetCheckers();
if (!checkers)
return true;
2013-03-02 09:07:47 +01:00
ObjectLock olock(checkers);
2013-03-14 12:17:46 +01:00
BOOST_FOREACH(const Value& pattern, checkers) {
if (Utility::Match(pattern, checker))
return true;
}
return false;
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
2013-03-04 15:52:42 +01:00
void Service::BeginExecuteCheck(const function<void (void)>& callback)
{
ASSERT(!OwnsLock());
2013-02-24 01:10:34 +01:00
2013-03-06 11:03:50 +01:00
{
ObjectLock olock(this);
/* don't run another check if there is one pending */
if (m_CheckRunning) {
olock.Unlock();
2013-02-24 01:10:34 +01:00
2013-03-06 11:03:50 +01:00
/* we need to call the callback anyway */
callback();
2013-03-06 11:03:50 +01:00
return;
}
m_CheckRunning = true;
SetLastState(GetState());
SetLastStateType(GetLastStateType());
}
/* keep track of scheduling info in case the check type doesn't provide its own information */
2013-02-24 01:10:34 +01:00
Dictionary::Ptr checkInfo = boost::make_shared<Dictionary>();
2013-03-04 15:52:42 +01:00
checkInfo->Set("schedule_start", GetNextCheck());
2013-03-02 09:07:47 +01:00
checkInfo->Set("execution_start", Utility::GetTime());
2013-02-24 01:10:34 +01:00
2013-03-07 12:04:20 +01:00
Dictionary::Ptr macros = CalculateAllMacros();
2013-02-24 01:10:34 +01:00
checkInfo->Set("macros", macros);
2013-03-04 15:52:42 +01:00
Service::Ptr self = GetSelf();
2013-02-19 07:26:52 +01:00
vector<Value> arguments;
2013-02-24 01:10:34 +01:00
arguments.push_back(self);
arguments.push_back(macros);
2013-03-04 15:52:42 +01:00
ScriptTask::Ptr task = MakeMethodTask("check", arguments);
2013-02-24 01:10:34 +01:00
{
2013-03-06 11:03:50 +01:00
ObjectLock olock(this);
2013-02-26 10:13:54 +01:00
self->m_CurrentTask = task;
2013-02-24 01:10:34 +01:00
}
2013-02-24 01:10:34 +01:00
task->Start(boost::bind(&Service::CheckCompletedHandler, self, checkInfo, _1, callback));
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
2013-02-24 01:10:34 +01:00
void Service::CheckCompletedHandler(const Dictionary::Ptr& checkInfo,
const ScriptTask::Ptr& task, const function<void (void)>& callback)
{
ASSERT(!OwnsLock());
2013-03-02 09:07:47 +01:00
2013-02-24 01:10:34 +01:00
checkInfo->Set("execution_end", Utility::GetTime());
checkInfo->Set("schedule_end", Utility::GetTime());
2013-03-01 12:07:52 +01:00
checkInfo->Seal();
Dictionary::Ptr result;
try {
2013-03-01 12:07:52 +01:00
Value vresult = task->GetResult();
if (vresult.IsObjectType<Dictionary>())
result = vresult;
} catch (const exception& ex) {
stringstream msgbuf;
msgbuf << "Exception occured during check for service '"
<< GetName() << "': " << diagnostic_information(ex);
String message = msgbuf.str();
Logger::Write(LogWarning, "icinga", message);
result = boost::make_shared<Dictionary>();
result->Set("state", StateUnknown);
result->Set("output", message);
}
if (result) {
if (!result->Contains("schedule_start"))
2013-02-24 01:10:34 +01:00
result->Set("schedule_start", checkInfo->Get("schedule_start"));
if (!result->Contains("schedule_end"))
2013-02-24 01:10:34 +01:00
result->Set("schedule_end", checkInfo->Get("schedule_end"));
if (!result->Contains("execution_start"))
2013-02-24 01:10:34 +01:00
result->Set("execution_start", checkInfo->Get("execution_start"));
if (!result->Contains("execution_end"))
2013-02-24 01:10:34 +01:00
result->Set("execution_end", checkInfo->Get("execution_end"));
if (!result->Contains("macros"))
result->Set("macros", checkInfo->Get("macros"));
if (!result->Contains("active"))
result->Set("active", 1);
2013-02-26 10:13:54 +01:00
if (!result->Contains("current_checker")) {
2013-02-24 01:10:34 +01:00
EndpointManager::Ptr em = EndpointManager::GetInstance();
2013-02-26 10:13:54 +01:00
result->Set("current_checker", em->GetIdentity());
2013-02-24 01:10:34 +01:00
}
}
2013-03-02 09:07:47 +01:00
if (result)
ProcessCheckResult(result);
2013-03-07 12:04:20 +01:00
/* figure out when the next check is for this service; the call to
* ProcessCheckResult() should've already done this but lets do it again
* just in case there was no check result. */
UpdateNextCheck();
2013-02-24 01:10:34 +01:00
{
ObjectLock olock(this);
2013-02-26 10:13:54 +01:00
m_CurrentTask.reset();
2013-03-06 11:03:50 +01:00
m_CheckRunning = false;
2013-02-24 01:10:34 +01:00
}
2013-03-02 09:07:47 +01:00
callback();
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
void Service::UpdateStatistics(const Dictionary::Ptr& cr)
{
time_t ts;
Value schedule_end = cr->Get("schedule_end");
if (!schedule_end.IsEmpty())
ts = static_cast<time_t>(schedule_end);
else
ts = static_cast<time_t>(Utility::GetTime());
Value active = cr->Get("active");
if (active.IsEmpty() || static_cast<long>(active))
CIB::UpdateActiveChecksStatistics(ts, 1);
else
CIB::UpdatePassiveChecksStatistics(ts, 1);
}
2013-02-24 01:10:34 +01:00
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
2013-02-24 01:10:34 +01:00
double Service::CalculateExecutionTime(const Dictionary::Ptr& cr)
{
double execution_start = 0, execution_end = 0;
if (cr) {
if (!cr->Contains("execution_start") || !cr->Contains("execution_end"))
return 0;
execution_start = cr->Get("execution_start");
execution_end = cr->Get("execution_end");
}
return (execution_end - execution_start);
}
2013-03-02 09:07:47 +01:00
/**
* @threadsafety Always.
*/
2013-02-24 01:10:34 +01:00
double Service::CalculateLatency(const Dictionary::Ptr& cr)
{
double schedule_start = 0, schedule_end = 0;
if (cr) {
if (!cr->Contains("schedule_start") || !cr->Contains("schedule_end"))
return 0;
schedule_start = cr->Get("schedule_start");
schedule_end = cr->Get("schedule_end");
}
return (schedule_end - schedule_start) - CalculateExecutionTime(cr);
}