icinga2/lib/icinga/checkable-check.cpp
Yonas Habteab f049f5258a Checkable: update next_check ts in ExecuteCheck only if it's needed
Since the scheduler accounts for already running checks, we only need to
update the `next_check` timestamp in `Checkable::ExecuteCheck()` only
where it actually makes sense to do so, and as for local checks this
doesn't make sense at all. There only two cases where we need to update
the next check beforehand:

1) The execute command event is sent to a connected remote endpoint, so
   we need to set the next check to a time in the future until we actually
   receive the check result back from the remote endpoint. However, it must
   not be too far in the future to avoid that the check is not re-run for
   too long in case the remote endpoint never responds.
2) The check is a remote check, but either the endpoint is currently syncing
   replay logs or not connected at all, and we are within the magical 5min
   cold startup window. In these cases, the check is effectively skipped, and
   there will be no check result for it coming in, we manually update the next
   check normally as if the check was executed.

In the other cases, either the check is executed locally, which means the
`m_RunningCheck` flag already prevents the scheduler from re-running the check,
or this is a remote check and the endpoint is not connected, but we are outside
the cold startup window, in which case we also don't do anything as we've already
called `Checkable::ProcessCheckResult()` with an appropriate error state, which
in turn will call `Checkable::UpdateNextCheck()`.
2025-09-24 10:15:41 +02:00

737 lines
24 KiB
C++

/* Icinga 2 | (c) 2012 Icinga GmbH | GPLv2+ */
#include "icinga/checkable.hpp"
#include "icinga/service.hpp"
#include "icinga/host.hpp"
#include "icinga/checkcommand.hpp"
#include "icinga/icingaapplication.hpp"
#include "icinga/cib.hpp"
#include "icinga/clusterevents.hpp"
#include "remote/messageorigin.hpp"
#include "remote/apilistener.hpp"
#include "base/objectlock.hpp"
#include "base/logger.hpp"
#include "base/convert.hpp"
#include "base/utility.hpp"
#include "base/context.hpp"
#include <shared_mutex>
using namespace icinga;
boost::signals2::signal<void (const Checkable::Ptr&, const CheckResult::Ptr&, const MessageOrigin::Ptr&)> Checkable::OnNewCheckResult;
boost::signals2::signal<void (const Checkable::Ptr&, const CheckResult::Ptr&, StateType, const MessageOrigin::Ptr&)> Checkable::OnStateChange;
boost::signals2::signal<void (const Checkable::Ptr&, const CheckResult::Ptr&, std::set<Checkable::Ptr>, const MessageOrigin::Ptr&)> Checkable::OnReachabilityChanged;
boost::signals2::signal<void (const Checkable::Ptr&, NotificationType, const CheckResult::Ptr&, const String&, const String&, const MessageOrigin::Ptr&)> Checkable::OnNotificationsRequested;
boost::signals2::signal<void (const Checkable::Ptr&)> Checkable::OnNextCheckUpdated;
Atomic<uint_fast64_t> Checkable::CurrentConcurrentChecks (0);
std::mutex Checkable::m_StatsMutex;
int Checkable::m_PendingChecks = 0;
std::condition_variable Checkable::m_PendingChecksCV;
CheckCommand::Ptr Checkable::GetCheckCommand() const
{
return dynamic_pointer_cast<CheckCommand>(NavigateCheckCommandRaw());
}
TimePeriod::Ptr Checkable::GetCheckPeriod() const
{
return TimePeriod::GetByName(GetCheckPeriodRaw());
}
void Checkable::SetSchedulingOffset(long offset)
{
m_SchedulingOffset = offset;
}
long Checkable::GetSchedulingOffset() const
{
return m_SchedulingOffset;
}
void Checkable::UpdateNextCheck(const MessageOrigin::Ptr& origin)
{
double interval;
if (GetStateType() == StateTypeSoft && GetLastCheckResult() != nullptr)
interval = GetRetryInterval();
else
interval = GetCheckInterval();
double now = Utility::GetTime();
double adj = 0;
if (interval > 1)
adj = fmod(now * 100 + GetSchedulingOffset(), interval * 100) / 100.0;
if (adj != 0.0)
adj = std::min(0.5 + fmod(GetSchedulingOffset(), interval * 5) / 100.0, adj);
double nextCheck = now - adj + interval;
double lastCheck = GetLastCheck();
Log(LogDebug, "Checkable")
<< std::fixed << std::setprecision(0)
<< "Update checkable '" << GetName() << "' with check interval '" << GetCheckInterval()
<< "' from last check time at " << Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", (lastCheck < 0 ? 0 : lastCheck))
<< " (" << lastCheck << ") to next check time at "
<< Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", nextCheck) << " (" << nextCheck << ").";
SetNextCheck(nextCheck, false, origin);
}
bool Checkable::HasBeenChecked() const
{
return GetLastCheckResult() != nullptr;
}
bool Checkable::HasRunningCheck() const
{
return m_CheckRunning;
}
double Checkable::GetLastCheck() const
{
CheckResult::Ptr cr = GetLastCheckResult();
double schedule_end = -1;
if (cr)
schedule_end = cr->GetScheduleEnd();
return schedule_end;
}
Checkable::ProcessingResult Checkable::ProcessCheckResult(const CheckResult::Ptr& cr, const WaitGroup::Ptr& producer, const MessageOrigin::Ptr& origin)
{
using Result = Checkable::ProcessingResult;
VERIFY(cr);
VERIFY(producer);
ObjectLock olock(this);
m_CheckRunning.store(false);
double now = Utility::GetTime();
if (cr->GetScheduleStart() == 0)
cr->SetScheduleStart(now);
if (cr->GetScheduleEnd() == 0)
cr->SetScheduleEnd(now);
if (cr->GetExecutionStart() == 0)
cr->SetExecutionStart(now);
if (cr->GetExecutionEnd() == 0)
cr->SetExecutionEnd(now);
if (!origin || origin->IsLocal())
cr->SetSchedulingSource(IcingaApplication::GetInstance()->GetNodeName());
Endpoint::Ptr command_endpoint = GetCommandEndpoint();
if (cr->GetCheckSource().IsEmpty()) {
if ((!origin || origin->IsLocal()))
cr->SetCheckSource(IcingaApplication::GetInstance()->GetNodeName());
/* override check source if command_endpoint was defined */
if (command_endpoint && !GetExtension("agent_check"))
cr->SetCheckSource(command_endpoint->GetName());
}
std::shared_lock producerLock (*producer, std::try_to_lock);
if (!producerLock) {
// Discard the check result to not delay the current reload.
// We'll re-run the check immediately after the reload.
return Result::CheckableInactive;
}
/* agent checks go through the api */
if (command_endpoint && GetExtension("agent_check")) {
ApiListener::Ptr listener = ApiListener::GetInstance();
if (listener) {
/* send message back to its origin */
Dictionary::Ptr message = ClusterEvents::MakeCheckResultMessage(this, cr);
listener->SyncSendMessage(command_endpoint, message);
}
return Result::Ok;
}
if (!IsActive())
return Result::CheckableInactive;
bool reachable = IsReachable();
bool notification_reachable = IsReachable(DependencyNotification);
// Cache whether the previous state of this Checkable affects its children before overwriting the last check result.
// This will be used to determine whether the on reachability changed event should be triggered.
bool affectsPreviousStateChildren(reachable && AffectsChildren());
CheckResult::Ptr old_cr = GetLastCheckResult();
ServiceState old_state = GetStateRaw();
StateType old_stateType = GetStateType();
long old_attempt = GetCheckAttempt();
bool recovery = false;
/* When we have an check result already (not after fresh start),
* prevent to accept old check results and allow overrides for
* CRs happened in the future.
*/
if (old_cr) {
double currentCRTimestamp = old_cr->GetExecutionStart();
double newCRTimestamp = cr->GetExecutionStart();
/* Our current timestamp may be from the future (wrong server time adjusted again). Allow overrides here. */
if (currentCRTimestamp > now) {
/* our current CR is from the future, let the new CR override it. */
Log(LogDebug, "Checkable")
<< std::fixed << std::setprecision(6) << "Processing check result for checkable '" << GetName() << "' from "
<< Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", newCRTimestamp) << " (" << newCRTimestamp
<< "). Overriding since ours is from the future at "
<< Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", currentCRTimestamp) << " (" << currentCRTimestamp << ").";
} else {
/* Current timestamp is from the past, but the new timestamp is even more in the past. Skip it. */
if (newCRTimestamp < currentCRTimestamp) {
Log(LogDebug, "Checkable")
<< std::fixed << std::setprecision(6) << "Skipping check result for checkable '" << GetName() << "' from "
<< Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", newCRTimestamp) << " (" << newCRTimestamp
<< "). It is in the past compared to ours at "
<< Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", currentCRTimestamp) << " (" << currentCRTimestamp << ").";
return Result::NewerCheckResultPresent;
}
}
}
/* The ExecuteCheck function already sets the old state, but we need to do it again
* in case this was a passive check result. */
SetLastStateRaw(old_state);
SetLastStateType(old_stateType);
SetLastReachable(reachable);
Host::Ptr host;
Service::Ptr service;
tie(host, service) = GetHostService(this);
CheckableType checkableType = CheckableHost;
if (service)
checkableType = CheckableService;
long attempt = 1;
std::set<Checkable::Ptr> children = GetChildren();
if (IsStateOK(cr->GetState())) {
SetStateType(StateTypeHard); // NOT-OK -> HARD OK
if (!IsStateOK(old_state))
recovery = true;
ResetNotificationNumbers();
SaveLastState(ServiceOK, cr->GetExecutionEnd());
} else {
/* OK -> NOT-OK change, first SOFT state. Reset attempt counter. */
if (IsStateOK(old_state)) {
SetStateType(StateTypeSoft);
attempt = 1;
}
/* SOFT state change, increase attempt counter. */
if (old_stateType == StateTypeSoft && !IsStateOK(old_state)) {
SetStateType(StateTypeSoft);
attempt = old_attempt + 1;
}
/* HARD state change (e.g. previously 2/3 and this next attempt). Reset attempt counter. */
if (attempt >= GetMaxCheckAttempts()) {
SetStateType(StateTypeHard);
attempt = 1;
}
if (!IsStateOK(cr->GetState())) {
SaveLastState(cr->GetState(), cr->GetExecutionEnd());
}
}
if (!reachable)
SetLastStateUnreachable(cr->GetExecutionEnd());
SetCheckAttempt(attempt);
ServiceState new_state = cr->GetState();
SetStateRaw(new_state);
bool stateChange;
/* Exception on state change calculation for hosts. */
if (checkableType == CheckableService)
stateChange = (old_state != new_state);
else
stateChange = (Host::CalculateState(old_state) != Host::CalculateState(new_state));
/* Store the current last state change for the next iteration. */
SetPreviousStateChange(GetLastStateChange());
if (stateChange) {
SetLastStateChange(cr->GetExecutionEnd());
/* remove acknowledgements */
if (GetAcknowledgement() == AcknowledgementNormal ||
(GetAcknowledgement() == AcknowledgementSticky && IsStateOK(new_state))) {
ClearAcknowledgement("");
}
}
bool remove_acknowledgement_comments = false;
if (GetAcknowledgement() == AcknowledgementNone)
remove_acknowledgement_comments = true;
bool hardChange = (GetStateType() == StateTypeHard && old_stateType == StateTypeSoft);
if (stateChange && old_stateType == StateTypeHard && GetStateType() == StateTypeHard)
hardChange = true;
bool is_volatile = GetVolatile();
if (hardChange || is_volatile) {
SetLastHardStateRaw(new_state);
SetLastHardStateChange(cr->GetExecutionEnd());
SetLastHardStatesRaw(GetLastHardStatesRaw() / 100u + new_state * 100u);
}
if (stateChange) {
SetLastSoftStatesRaw(GetLastSoftStatesRaw() / 100u + new_state * 100u);
}
cr->SetPreviousHardState(ServiceState(GetLastHardStatesRaw() % 100u));
if (!IsStateOK(new_state))
TriggerDowntimes(cr->GetExecutionEnd());
/* statistics for external tools */
Checkable::UpdateStatistics(cr, checkableType);
bool in_downtime = IsInDowntime();
bool send_notification = false;
bool suppress_notification = !notification_reachable || in_downtime || IsAcknowledged();
/* Send notifications whether when a hard state change occurred. */
if (hardChange && !(old_stateType == StateTypeSoft && IsStateOK(new_state)))
send_notification = true;
/* Or if the checkable is volatile and in a HARD state. */
else if (is_volatile && GetStateType() == StateTypeHard)
send_notification = true;
if (IsStateOK(old_state) && old_stateType == StateTypeSoft)
send_notification = false; /* Don't send notifications for SOFT-OK -> HARD-OK. */
if (is_volatile && IsStateOK(old_state) && IsStateOK(new_state))
send_notification = false; /* Don't send notifications for volatile OK -> OK changes. */
if (remove_acknowledgement_comments)
RemoveAckComments(String(), cr->GetExecutionEnd());
Dictionary::Ptr vars_after = new Dictionary({
{ "state", new_state },
{ "state_type", GetStateType() },
{ "attempt", GetCheckAttempt() },
{ "reachable", reachable }
});
if (old_cr)
cr->SetVarsBefore(old_cr->GetVarsAfter());
cr->SetVarsAfter(vars_after);
if (service) {
SetLastCheckResult(cr);
} else {
bool wasProblem = GetProblem();
SetLastCheckResult(cr);
if (GetProblem() != wasProblem) {
auto services = host->GetServices();
for (auto& service : services) {
Service::OnHostProblemChanged(service, cr, origin);
}
}
}
bool was_flapping = IsFlapping();
UpdateFlappingStatus(cr->GetState());
bool is_flapping = IsFlapping();
// Don't recompute the next check when the current check isn't generated by this endpoint. When the check is
// remotely generated we should've already received the "SetNextCheck" event before the "event::CheckResult"
// cluster event. Otherwise, the next check received before this check will be invalidated and cause the Checkable
// "next_check/next_update" in an HA setup to always be different from the other endpoint as the "m_SchedulingOffset"
// is randomly initialised on each node.
if (!origin) {
if (cr->GetActive()) {
UpdateNextCheck();
} else {
/* Reschedule the next check for external passive check results. The side effect of
* this is that for as long as we receive results for a service we
* won't execute any active checks. */
double offset;
double ttl = cr->GetTtl();
if (ttl > 0)
offset = ttl;
else
offset = GetCheckInterval();
SetNextCheck(Utility::GetTime() + offset);
}
}
#ifdef I2_DEBUG /* I2_DEBUG */
Log(LogDebug, "Checkable")
<< "Flapping: Checkable " << GetName()
<< " was: " << was_flapping
<< " is: " << is_flapping
<< " threshold low: " << GetFlappingThresholdLow()
<< " threshold high: " << GetFlappingThresholdHigh()
<< "% current: " << GetFlappingCurrent() << "%.";
#endif /* I2_DEBUG */
OnNewCheckResult(this, cr, origin);
/* signal status updates to for example db_ido */
OnStateChanged(this);
String old_state_str = (service ? Service::StateToString(old_state) : Host::StateToString(Host::CalculateState(old_state)));
String new_state_str = (service ? Service::StateToString(new_state) : Host::StateToString(Host::CalculateState(new_state)));
/* Whether a hard state change or a volatile state change except OK -> OK happened. */
if (hardChange || (is_volatile && !(IsStateOK(old_state) && IsStateOK(new_state)))) {
OnStateChange(this, cr, StateTypeHard, origin);
Log(LogNotice, "Checkable")
<< "State Change: Checkable '" << GetName() << "' hard state change from " << old_state_str << " to " << new_state_str << " detected." << (is_volatile ? " Checkable is volatile." : "");
}
/* Whether a state change happened or the state type is SOFT (must be logged too). */
else if (stateChange || GetStateType() == StateTypeSoft) {
OnStateChange(this, cr, StateTypeSoft, origin);
Log(LogNotice, "Checkable")
<< "State Change: Checkable '" << GetName() << "' soft state change from " << old_state_str << " to " << new_state_str << " detected.";
}
if (GetStateType() == StateTypeSoft || hardChange || recovery ||
(is_volatile && !(IsStateOK(old_state) && IsStateOK(new_state))))
ExecuteEventHandler();
int suppressed_types = 0;
/* Flapping start/end notifications */
if (!was_flapping && is_flapping) {
/* FlappingStart notifications happen on state changes, not in downtimes */
if (!IsPaused()) {
if (in_downtime) {
suppressed_types |= NotificationFlappingStart;
} else {
OnNotificationsRequested(this, NotificationFlappingStart, cr, "", "", nullptr);
}
}
Log(LogNotice, "Checkable")
<< "Flapping Start: Checkable '" << GetName() << "' started flapping (Current flapping value "
<< GetFlappingCurrent() << "% > high threshold " << GetFlappingThresholdHigh() << "%).";
NotifyFlapping(origin);
} else if (was_flapping && !is_flapping) {
/* FlappingEnd notifications are independent from state changes, must not happen in downtine */
if (!IsPaused()) {
if (in_downtime) {
suppressed_types |= NotificationFlappingEnd;
} else {
OnNotificationsRequested(this, NotificationFlappingEnd, cr, "", "", nullptr);
}
}
Log(LogNotice, "Checkable")
<< "Flapping Stop: Checkable '" << GetName() << "' stopped flapping (Current flapping value "
<< GetFlappingCurrent() << "% < low threshold " << GetFlappingThresholdLow() << "%).";
NotifyFlapping(origin);
}
if (send_notification && !is_flapping) {
if (!IsPaused()) {
/* If there are still some pending suppressed state notification, keep the suppression until these are
* handled by Checkable::FireSuppressedNotifications().
*/
bool pending = GetSuppressedNotifications() & (NotificationRecovery|NotificationProblem);
if (suppress_notification || pending) {
suppressed_types |= (recovery ? NotificationRecovery : NotificationProblem);
} else {
OnNotificationsRequested(this, recovery ? NotificationRecovery : NotificationProblem, cr, "", "", nullptr);
}
}
}
if (suppressed_types) {
/* If some notifications were suppressed, but just because of e.g. a downtime,
* stash them into a notification types bitmask for maybe re-sending later.
*/
int suppressed_types_before (GetSuppressedNotifications());
int suppressed_types_after (suppressed_types_before | suppressed_types);
const int conflict = NotificationFlappingStart | NotificationFlappingEnd;
if ((suppressed_types_after & conflict) == conflict) {
/* Flapping start and end cancel out each other. */
suppressed_types_after &= ~conflict;
}
const int stateNotifications = NotificationRecovery | NotificationProblem;
if (!(suppressed_types_before & stateNotifications) && (suppressed_types & stateNotifications)) {
/* A state-related notification is suppressed for the first time, store the previous state. When
* notifications are no longer suppressed, this can be compared with the current state to determine
* if a notification must be sent. This is done differently compared to flapping notifications just above
* as for state notifications, problem and recovery don't always cancel each other. For example,
* WARNING -> OK -> CRITICAL generates both types once, but there should still be a notification.
*/
SetStateBeforeSuppression(old_stateType == StateTypeHard ? old_state : ServiceOK);
}
if (suppressed_types_after != suppressed_types_before) {
SetSuppressedNotifications(suppressed_types_after);
}
}
/* update reachability for child objects */
if ((stateChange || hardChange) && !children.empty() && (affectsPreviousStateChildren || AffectsChildren()))
OnReachabilityChanged(this, cr, children, origin);
olock.Unlock();
if (recovery) {
for (auto& child : children) {
if (child->GetProblem() && child->GetEnableActiveChecks()) {
auto nextCheck (now + Utility::Random() % 60);
ObjectLock oLock (child);
if (nextCheck < child->GetNextCheck()) {
child->SetNextCheck(nextCheck);
}
}
}
}
if (stateChange) {
/* reschedule direct parents */
for (const Checkable::Ptr& parent : GetParents()) {
if (parent.get() == this)
continue;
if (!parent->GetEnableActiveChecks())
continue;
if (parent->GetNextCheck() >= now + parent->GetRetryInterval()) {
ObjectLock olock(parent);
parent->SetNextCheck(now);
}
}
}
return Result::Ok;
}
void Checkable::ExecuteRemoteCheck(const WaitGroup::Ptr& producer, const Dictionary::Ptr& resolvedMacros)
{
CONTEXT("Executing remote check for object '" << GetName() << "'");
double scheduled_start = GetNextCheck();
double before_check = Utility::GetTime();
CheckResult::Ptr cr = new CheckResult();
cr->SetScheduleStart(scheduled_start);
cr->SetExecutionStart(before_check);
GetCheckCommand()->Execute(this, cr, producer, resolvedMacros, true);
}
void Checkable::ExecuteCheck(const WaitGroup::Ptr& producer)
{
CONTEXT("Executing check for object '" << GetName() << "'");
/* don't run another check if there is one pending */
if (m_CheckRunning.exchange(true))
return; // Should never happen as the checker already takes care of this.
/* keep track of scheduling info in case the check type doesn't provide its own information */
double scheduled_start = GetNextCheck();
double before_check = Utility::GetTime();
SetLastCheckStarted(Utility::GetTime());
bool reachable = IsReachable();
{
ObjectLock olock(this);
SetLastStateRaw(GetStateRaw());
SetLastStateType(GetLastStateType());
SetLastReachable(reachable);
}
CheckResult::Ptr cr = new CheckResult();
cr->SetScheduleStart(scheduled_start);
cr->SetExecutionStart(before_check);
Endpoint::Ptr endpoint = GetCommandEndpoint();
bool local = !endpoint || endpoint == Endpoint::GetLocalEndpoint();
if (local) {
GetCheckCommand()->Execute(this, cr, producer, nullptr, false);
} else {
Dictionary::Ptr macros = new Dictionary();
GetCheckCommand()->Execute(this, cr, producer, macros, false);
if (endpoint->GetConnected()) {
/* perform check on remote endpoint */
Dictionary::Ptr message = new Dictionary();
message->Set("jsonrpc", "2.0");
message->Set("method", "event::ExecuteCommand");
Host::Ptr host;
Service::Ptr service;
tie(host, service) = GetHostService(this);
Dictionary::Ptr params = new Dictionary();
message->Set("params", params);
params->Set("command_type", "check_command");
params->Set("command", GetCheckCommand()->GetName());
params->Set("host", host->GetName());
if (service)
params->Set("service", service->GetShortName());
double checkTimeout = GetCheckCommand()->GetTimeout();
/*
* If the host/service object specifies the 'check_timeout' attribute,
* forward this to the remote endpoint to limit the command execution time.
*/
if (auto ckCheckTimeout(GetCheckTimeout()); !ckCheckTimeout.IsEmpty()) {
checkTimeout = Convert::ToDouble(ckCheckTimeout);
params->Set("check_timeout", ckCheckTimeout);
}
params->Set("macros", macros);
ApiListener::Ptr listener = ApiListener::GetInstance();
if (listener)
listener->SyncSendMessage(endpoint, message);
/*
* Let the checker use a dummy next check time until we actually receive the check result from the
* remote endpoint. This should be sufficiently far in the future to avoid excessive CPU load by
* constantly re-running the check, but not too far in the future to avoid that the check is not
* re-run for too long in case the remote endpoint never responds. We add a small grace period
* to the check command timeout to account for network latency and processing time on the remote
* endpoint. So, we only need to silently update this without notifying any listeners, and once
* this function returns, the checker is going access it via GetNextCheck() again.
*/
SetNextCheck(Utility::GetTime() + checkTimeout + 30, true);
/*
* Let the user know that there was a problem with the check if
* 1) The endpoint is not syncing (replay log, etc.)
* 2) Outside of the cold startup window (5min)
*/
} else if (!endpoint->GetSyncing() && Application::GetInstance()->GetStartTime() < Utility::GetTime() - 300) {
/* fail to perform check on unconnected endpoint */
cr->SetState(ServiceUnknown);
String output = "Remote Icinga instance '" + endpoint->GetName() + "' is not connected to ";
Endpoint::Ptr localEndpoint = Endpoint::GetLocalEndpoint();
if (localEndpoint)
output += "'" + localEndpoint->GetName() + "'";
else
output += "this instance";
cr->SetOutput(output);
ProcessCheckResult(cr, producer);
} else {
/**
* The endpoint is currently either syncing its state or not connected yet and we are within
* the magical 5min cold startup window. In both cases, we just don't do anything and wait for
* the next check interval to re-try the check again. So, this check is effectively skipped.
*/
UpdateNextCheck();
}
/**
* If this is a remote check, we don't know when the check result will be received and processed.
* Therefore, we must mark the check as no longer running here, otherwise, no further checks
* would be executed for this checkable as it would always appear as having a running check
* (see the check at the start of this function).
*/
m_CheckRunning.store(false);
}
}
void Checkable::UpdateStatistics(const CheckResult::Ptr& cr, CheckableType type)
{
time_t ts = cr->GetScheduleEnd();
if (type == CheckableHost) {
if (cr->GetActive())
CIB::UpdateActiveHostChecksStatistics(ts, 1);
else
CIB::UpdatePassiveHostChecksStatistics(ts, 1);
} else if (type == CheckableService) {
if (cr->GetActive())
CIB::UpdateActiveServiceChecksStatistics(ts, 1);
else
CIB::UpdatePassiveServiceChecksStatistics(ts, 1);
} else {
Log(LogWarning, "Checkable", "Unknown checkable type for statistic update.");
}
}
void Checkable::IncreasePendingChecks()
{
std::unique_lock<std::mutex> lock(m_StatsMutex);
m_PendingChecks++;
}
void Checkable::DecreasePendingChecks()
{
std::unique_lock<std::mutex> lock(m_StatsMutex);
m_PendingChecks--;
m_PendingChecksCV.notify_one();
}
int Checkable::GetPendingChecks()
{
std::unique_lock<std::mutex> lock(m_StatsMutex);
return m_PendingChecks;
}
void Checkable::AquirePendingCheckSlot(int maxPendingChecks)
{
std::unique_lock<std::mutex> lock(m_StatsMutex);
while (m_PendingChecks >= maxPendingChecks)
m_PendingChecksCV.wait(lock);
m_PendingChecks++;
}