Merge pull request #10011 from Icinga/next-check-cluster-sync-issue

Checkable: Don't recalculate `next_check` for remotely generated `cr`
This commit is contained in:
Julian Brost 2024-08-30 13:37:41 +02:00 committed by GitHub
commit 4c6b93d617
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 20 additions and 13 deletions

View File

@ -360,21 +360,28 @@ Checkable::ProcessingResult Checkable::ProcessCheckResult(const CheckResult::Ptr
bool is_flapping = IsFlapping();
if (cr->GetActive()) {
UpdateNextCheck(origin);
} else {
/* Reschedule the next check for external passive check results. The side effect of
* this is that for as long as we receive results for a service we
* won't execute any active checks. */
double offset;
double ttl = cr->GetTtl();
// Don't recompute the next check when the current check isn't generated by this endpoint. When the check is
// remotely generated we should've already received the "SetNextCheck" event before the "event::CheckResult"
// cluster event. Otherwise, the next check received before this check will be invalidated and cause the Checkable
// "next_check/next_update" in a HA setup to always be different from the other endpoint as the "m_SchedulingOffset"
// is randomly initialised on each node.
if (!origin) {
if (cr->GetActive()) {
UpdateNextCheck();
} else {
/* Reschedule the next check for external passive check results. The side effect of
* this is that for as long as we receive results for a service we
* won't execute any active checks. */
double offset;
double ttl = cr->GetTtl();
if (ttl > 0)
offset = ttl;
else
offset = GetCheckInterval();
if (ttl > 0)
offset = ttl;
else
offset = GetCheckInterval();
SetNextCheck(Utility::GetTime() + offset, false, origin);
SetNextCheck(Utility::GetTime() + offset);
}
}
olock.Unlock();