From 485e287d281e0eb181c070e23aacb454f14cc811 Mon Sep 17 00:00:00 2001 From: Julian Brost Date: Thu, 17 Feb 2022 16:13:25 +0100 Subject: [PATCH] Prevent deadlock in ProcessCheckResult Without this commit, children and parents of a checkable were rescheduled on a state change while holding the lock for the current checkable. If both ends of a dependency are checked at the same time and both change state, they could end up in a deadlock waiting for each other. This commit fixes this problem by changing the code so that other checkables are rescheduled only after releasing the lock for the current checkable. --- lib/icinga/checkable-check.cpp | 58 ++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/lib/icinga/checkable-check.cpp b/lib/icinga/checkable-check.cpp index 4c6ce2ca1..78d61e507 100644 --- a/lib/icinga/checkable-check.cpp +++ b/lib/icinga/checkable-check.cpp @@ -242,20 +242,6 @@ void Checkable::ProcessCheckResult(const CheckResult::Ptr& cr, const MessageOrig OnReachabilityChanged(this, cr, children, origin); } - if (recovery) { - for (auto& child : children) { - if (child->GetProblem() && child->GetEnableActiveChecks()) { - auto nextCheck (now + Utility::Random() % 60); - - ObjectLock oLock (child); - - if (nextCheck < child->GetNextCheck()) { - child->SetNextCheck(nextCheck); - } - } - } - } - if (!reachable) SetLastStateUnreachable(Utility::GetTime()); @@ -283,20 +269,6 @@ void Checkable::ProcessCheckResult(const CheckResult::Ptr& cr, const MessageOrig (GetAcknowledgement() == AcknowledgementSticky && IsStateOK(new_state))) { ClearAcknowledgement(""); } - - /* reschedule direct parents */ - for (const Checkable::Ptr& parent : GetParents()) { - if (parent.get() == this) - continue; - - if (!parent->GetEnableActiveChecks()) - continue; - - if (parent->GetNextCheck() >= now + parent->GetRetryInterval()) { - ObjectLock olock(parent); - parent->SetNextCheck(now); - } - } } bool remove_acknowledgement_comments = false; @@ -416,6 +388,36 @@ void Checkable::ProcessCheckResult(const CheckResult::Ptr& cr, const MessageOrig << "% current: " << GetFlappingCurrent() << "%."; #endif /* I2_DEBUG */ + if (recovery) { + for (auto& child : children) { + if (child->GetProblem() && child->GetEnableActiveChecks()) { + auto nextCheck (now + Utility::Random() % 60); + + ObjectLock oLock (child); + + if (nextCheck < child->GetNextCheck()) { + child->SetNextCheck(nextCheck); + } + } + } + } + + if (stateChange) { + /* reschedule direct parents */ + for (const Checkable::Ptr& parent : GetParents()) { + if (parent.get() == this) + continue; + + if (!parent->GetEnableActiveChecks()) + continue; + + if (parent->GetNextCheck() >= now + parent->GetRetryInterval()) { + ObjectLock olock(parent); + parent->SetNextCheck(now); + } + } + } + OnNewCheckResult(this, cr, origin); /* signal status updates to for example db_ido */