Introduce OnRescheduleCheck signal & make use of it where appropriate

This commit introduces a new kinda special `OnRescheduleCheck` signal
that is emitted whenever we want to inform the checker to reschedule the
checkable at a specific timestamp without actually changing the next
check time. Previously, we called `SetNextCheck` with some random
timestamp just to enforce the checker to either pick it up immediately
or at a specific time. Then at some point in time, subscribing to the
`OnNextCheckChanged` signal became effectively unusable for any other
purpose than to inform the checker about a new next check time. Thus,
it resulted in introducing a new signal that is solely responsible for
informing the Icigna DB and IDO about a new next check time in places
where calling `SetNextCheck` did make sense. This commit does quite the
opposite: it replaces all calls to `SetNextCheck` that were only used to
inform the checker about a new next check time wit `OnRescheduleCheck`
calls. Only places where we actually wanted to change the next check time
still call `SetNextCheck` and thus inform the checker and all other listeners
about the new next check time. And as a bonus point, we now got rid of the
two object locks for child and parent at the same time.
This commit is contained in:
Yonas Habteab 2025-09-23 18:10:57 +02:00
parent f049f5258a
commit fc8de9e087
5 changed files with 70 additions and 16 deletions

View File

@ -55,6 +55,9 @@ void CheckerComponent::OnConfigLoaded()
Checkable::OnNextCheckChanged.connect([this](const Checkable::Ptr& checkable, const Value&) {
NextCheckChangedHandler(checkable);
});
Checkable::OnRescheduleCheck.connect([this](const Checkable::Ptr& checkable, double nextCheck) {
NextCheckChangedHandler(checkable, nextCheck);
});
}
void CheckerComponent::Start(bool runtimeCreated)
@ -341,7 +344,7 @@ CheckableScheduleInfo CheckerComponent::GetCheckableScheduleInfo(const Checkable
return csi;
}
void CheckerComponent::NextCheckChangedHandler(const Checkable::Ptr& checkable)
void CheckerComponent::NextCheckChangedHandler(const Checkable::Ptr& checkable, double nextCheck)
{
std::unique_lock<std::mutex> lock(m_Mutex);
@ -356,7 +359,13 @@ void CheckerComponent::NextCheckChangedHandler(const Checkable::Ptr& checkable)
idx.erase(checkable);
CheckableScheduleInfo csi = GetCheckableScheduleInfo(checkable);
CheckableScheduleInfo csi;
if (nextCheck < 0) {
csi = GetCheckableScheduleInfo(checkable);
} else {
csi.NextCheck = nextCheck;
csi.Object = checkable;
}
idx.insert(csi);
m_CV.notify_all();

View File

@ -93,7 +93,7 @@ private:
void AdjustCheckTimer();
void ObjectHandler(const ConfigObject::Ptr& object);
void NextCheckChangedHandler(const Checkable::Ptr& checkable);
void NextCheckChangedHandler(const Checkable::Ptr& checkable, double nextCheck = -1);
void RescheduleCheckTimer();

View File

@ -23,6 +23,7 @@ boost::signals2::signal<void (const Checkable::Ptr&, const CheckResult::Ptr&, St
boost::signals2::signal<void (const Checkable::Ptr&, const CheckResult::Ptr&, std::set<Checkable::Ptr>, const MessageOrigin::Ptr&)> Checkable::OnReachabilityChanged;
boost::signals2::signal<void (const Checkable::Ptr&, NotificationType, const CheckResult::Ptr&, const String&, const String&, const MessageOrigin::Ptr&)> Checkable::OnNotificationsRequested;
boost::signals2::signal<void (const Checkable::Ptr&)> Checkable::OnNextCheckUpdated;
boost::signals2::signal<void (const Checkable::Ptr&, double)> Checkable::OnRescheduleCheck;
Atomic<uint_fast64_t> Checkable::CurrentConcurrentChecks (0);
@ -50,7 +51,17 @@ long Checkable::GetSchedulingOffset() const
return m_SchedulingOffset;
}
void Checkable::UpdateNextCheck(const MessageOrigin::Ptr& origin)
/**
* Update the next check time of this checkable based on its check interval and last check time.
*
* If onlyReschedule is true, the next check time is not actually updated, but the @c Checkable::OnRescheduleCheck
* signal is emitted with the new calculated next check time. Otherwise, the next check time is updated
* and the @c Checkable::OnNextCheckChanged signal is emitted accordingly.
*
* @param origin The origin of the message triggering this update, can be nullptr.
* @param onlyReschedule If true, only emit @c OnRescheduleCheck without updating the next check time.
*/
void Checkable::UpdateNextCheck(const MessageOrigin::Ptr& origin, bool onlyReschedule)
{
double interval;
@ -78,7 +89,14 @@ void Checkable::UpdateNextCheck(const MessageOrigin::Ptr& origin)
<< " (" << lastCheck << ") to next check time at "
<< Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", nextCheck) << " (" << nextCheck << ").";
SetNextCheck(nextCheck, false, origin);
if (onlyReschedule) {
// Someone requested to only reschedule the next check without actually changing it.
// So, just tell the checker about this new timestamp and return.
OnRescheduleCheck(this, nextCheck);
} else {
// Otherwise, set the next check to the newly calculated timestamp and inform all its listeners.
SetNextCheck(nextCheck, false, origin);
}
}
bool Checkable::HasBeenChecked() const
@ -518,12 +536,15 @@ Checkable::ProcessingResult Checkable::ProcessCheckResult(const CheckResult::Ptr
if (recovery) {
for (auto& child : children) {
if (child->GetProblem() && child->GetEnableActiveChecks()) {
auto nextCheck (now + Utility::Random() % 60);
ObjectLock oLock (child);
if (nextCheck < child->GetNextCheck()) {
child->SetNextCheck(nextCheck);
if (auto nextCheck (now + Utility::Random() % 60); nextCheck < child->GetNextCheck()) {
/**
* We only want to enforce the checker to pick this up sooner, and no need to actually change
* the timesatmp. Plus, no other listeners should be informed about this other than the checker,
* so we emit the OnRescheduleCheck signal directly. In case our checker isn't responsible for
* this child object, we've already broadcasted the `CheckResult` event which will cause on the
* responsible node to enter this exact branch and do the rescheduling for its own checker.
*/
OnRescheduleCheck(child, nextCheck);
}
}
}
@ -539,8 +560,8 @@ Checkable::ProcessingResult Checkable::ProcessCheckResult(const CheckResult::Ptr
continue;
if (parent->GetNextCheck() >= now + parent->GetRetryInterval()) {
ObjectLock olock(parent);
parent->SetNextCheck(now);
// See comment above for children. We want to just enforce an immediate check by our checker.
OnRescheduleCheck(parent, now);
}
}
}

View File

@ -88,14 +88,26 @@ void Checkable::Start(bool runtimeCreated)
auto cr (GetLastCheckResult());
if (GetLastCheckStarted() > (cr ? cr->GetExecutionEnd() : 0.0)) {
SetNextCheck(GetLastCheckStarted());
/**
* Each node performs a check immediately after startup before the object authority is determined.
* Meaning, for runtime created objects we should not enter this branch and for non-runtime created
* objects we're going to call `ApiListener::UpdateObjectAuthority()` after all objects have been
* started, so we can safely assume that this timestamp is only relevant for our own checker.
*
* However, until the authority is determined, by default all objects are paused and thus triggering
* `OnRescheduleCheck` would have no effect, as the checker component ignores paused objects. Therefore,
* our only option is to set this dummy ts silently here, so that when the object is unpaused later,
* the checker component will pick it up and schedule the next check properly.
*/
SetNextCheck(GetLastCheckStarted(), true);
}
}
if (GetNextCheck() < now + 60) {
double delta = std::min(GetCheckInterval(), 60.0);
delta *= (double)std::rand() / RAND_MAX;
SetNextCheck(now + delta);
// We only want to jitter the next check a bit, and inform the scheduler about it, so not setting it directly.
SetNextCheck(now + delta, true);
}
ObjectImpl<Checkable>::Start(runtimeCreated);

View File

@ -104,7 +104,7 @@ public:
long GetSchedulingOffset() const;
void SetSchedulingOffset(long offset);
void UpdateNextCheck(const MessageOrigin::Ptr& origin = nullptr);
void UpdateNextCheck(const MessageOrigin::Ptr& origin = nullptr, bool onlyReschedule = false);
static String StateTypeToString(StateType type);
@ -148,6 +148,18 @@ public:
static boost::signals2::signal<void (const Checkable::Ptr&, const String&, double, const MessageOrigin::Ptr&)> OnAcknowledgementCleared;
static boost::signals2::signal<void (const Checkable::Ptr&, double)> OnFlappingChange;
static boost::signals2::signal<void (const Checkable::Ptr&)> OnNextCheckUpdated;
/**
* Think again! Are you really sure you want to subscribe to this signal?
*
* This signal is a very special and noisy one. It is emitted whenever someone wants to enforce the
* LOCAL scheduler to reschedule the next check of a checkable at the given timestamp. This can be
* due to a number of reasons, for example: Icinga 2 was interrupted while a check was being executed,
* and the checkable needs to be rescheduled on startup; or a parent host changed its state and all its
* children need to be rescheduled to reflect the new reachability state. There are other cases as well,
* but the point is: this signal is meant to only be used by the local @c CheckerComponent to update its
* internal queues. If you want to use this for something else, think twice!!
*/
static boost::signals2::signal<void (const Checkable::Ptr&, double)> OnRescheduleCheck;
static boost::signals2::signal<void (const Checkable::Ptr&)> OnEventCommandExecuted;
static Atomic<uint_fast64_t> CurrentConcurrentChecks;