Prevent deadlock in ProcessCheckResult

Without this commit, children and parents of a checkable were rescheduled on a
state change while holding the lock for the current checkable. If both ends of
a dependency are checked at the same time and both change state, they could end
up in a deadlock waiting for each other.

This commit fixes this problem by changing the code so that other checkables
are rescheduled only after releasing the lock for the current checkable.
This commit is contained in:
Julian Brost 2022-02-17 16:13:25 +01:00
parent 8016b013ac
commit 3bb9cdb8cc

View File

@ -239,20 +239,6 @@ void Checkable::ProcessCheckResult(const CheckResult::Ptr& cr, const MessageOrig
}
}
if (recovery) {
for (auto& child : children) {
if (child->GetProblem() && child->GetEnableActiveChecks()) {
auto nextCheck (now + Utility::Random() % 60);
ObjectLock oLock (child);
if (nextCheck < child->GetNextCheck()) {
child->SetNextCheck(nextCheck);
}
}
}
}
if (!reachable)
SetLastStateUnreachable(cr->GetExecutionEnd());
@ -280,20 +266,6 @@ void Checkable::ProcessCheckResult(const CheckResult::Ptr& cr, const MessageOrig
(GetAcknowledgement() == AcknowledgementSticky && IsStateOK(new_state))) {
ClearAcknowledgement("");
}
/* reschedule direct parents */
for (const Checkable::Ptr& parent : GetParents()) {
if (parent.get() == this)
continue;
if (!parent->GetEnableActiveChecks())
continue;
if (parent->GetNextCheck() >= now + parent->GetRetryInterval()) {
ObjectLock olock(parent);
parent->SetNextCheck(now);
}
}
}
bool remove_acknowledgement_comments = false;
@ -415,6 +387,36 @@ void Checkable::ProcessCheckResult(const CheckResult::Ptr& cr, const MessageOrig
<< "% current: " << GetFlappingCurrent() << "%.";
#endif /* I2_DEBUG */
if (recovery) {
for (auto& child : children) {
if (child->GetProblem() && child->GetEnableActiveChecks()) {
auto nextCheck (now + Utility::Random() % 60);
ObjectLock oLock (child);
if (nextCheck < child->GetNextCheck()) {
child->SetNextCheck(nextCheck);
}
}
}
}
if (stateChange) {
/* reschedule direct parents */
for (const Checkable::Ptr& parent : GetParents()) {
if (parent.get() == this)
continue;
if (!parent->GetEnableActiveChecks())
continue;
if (parent->GetNextCheck() >= now + parent->GetRetryInterval()) {
ObjectLock olock(parent);
parent->SetNextCheck(now);
}
}
}
OnNewCheckResult(this, cr, origin);
/* signal status updates to for example db_ido */