diff --git a/lib/icinga/checkable-notification.cpp b/lib/icinga/checkable-notification.cpp index 43b4589fa..59d20fda4 100644 --- a/lib/icinga/checkable-notification.cpp +++ b/lib/icinga/checkable-notification.cpp @@ -10,6 +10,7 @@ #include "base/exception.hpp" #include "base/context.hpp" #include "base/convert.hpp" +#include "base/lazy-init.hpp" #include "remote/apilistener.hpp" using namespace icinga; @@ -145,73 +146,96 @@ static void FireSuppressedNotifications(Checkable* checkable) int subtract = 0; - for (auto type : {NotificationProblem, NotificationRecovery, NotificationFlappingStart, NotificationFlappingEnd}) { - if (suppressed_types & type) { - bool still_applies; + { + LazyInit wasLastParentRecoveryRecent ([&checkable]() { auto cr (checkable->GetLastCheckResult()); - switch (type) { - case NotificationProblem: - still_applies = cr && !checkable->IsStateOK(cr->GetState()) && checkable->GetStateType() == StateTypeHard; - break; - case NotificationRecovery: - still_applies = cr && checkable->IsStateOK(cr->GetState()); - break; - case NotificationFlappingStart: - still_applies = checkable->IsFlapping(); - break; - case NotificationFlappingEnd: - still_applies = !checkable->IsFlapping(); - break; - default: - break; + if (!cr) { + return true; } - if (still_applies) { - bool still_suppressed; + auto threshold (cr->GetExecutionStart()); + + for (auto& dep : checkable->GetDependencies()) { + auto parent (dep->GetParent()); + ObjectLock oLock (parent); + + if (!parent->GetProblem() && parent->GetLastStateChange() >= threshold) { + return true; + } + } + + return false; + }); + + for (auto type : {NotificationProblem, NotificationRecovery, NotificationFlappingStart, NotificationFlappingEnd}) { + if (suppressed_types & type) { + bool still_applies; + auto cr (checkable->GetLastCheckResult()); switch (type) { case NotificationProblem: - /* Fall through. */ + still_applies = cr && !checkable->IsStateOK(cr->GetState()) && checkable->GetStateType() == StateTypeHard; + break; case NotificationRecovery: - still_suppressed = !checkable->IsReachable(DependencyNotification) || checkable->IsInDowntime() || checkable->IsAcknowledged(); + still_applies = cr && checkable->IsStateOK(cr->GetState()); break; case NotificationFlappingStart: - /* Fall through. */ + still_applies = checkable->IsFlapping(); + break; case NotificationFlappingEnd: - still_suppressed = checkable->IsInDowntime(); + still_applies = !checkable->IsFlapping(); break; default: break; } - if (!still_suppressed && checkable->GetEnableActiveChecks()) { - /* If e.g. the downtime just ended, but the service is still not ok, we would re-send the stashed problem notification. - * But if the next check result recovers the service soon, we would send a recovery notification soon after the problem one. - * This is not desired, especially for lots of services at once. - * Because of that if there's likely to be a check result soon, - * we delay the re-sending of the stashed notification until the next check. - * That check either doesn't change anything and we finally re-send the stashed problem notification - * or recovers the service and we drop the stashed notification. */ + if (still_applies) { + bool still_suppressed; - /* One minute unless the check interval is too short so the next check will always run during the next minute. */ - auto threshold (checkable->GetCheckInterval() - 10); + switch (type) { + case NotificationProblem: + /* Fall through. */ + case NotificationRecovery: + still_suppressed = !checkable->IsReachable(DependencyNotification) || checkable->IsInDowntime() || checkable->IsAcknowledged(); + break; + case NotificationFlappingStart: + /* Fall through. */ + case NotificationFlappingEnd: + still_suppressed = checkable->IsInDowntime(); + break; + default: + break; + } - if (threshold > 60) - threshold = 60; - else if (threshold < 0) - threshold = 0; + if (!still_suppressed && checkable->GetEnableActiveChecks()) { + /* If e.g. the downtime just ended, but the service is still not ok, we would re-send the stashed problem notification. + * But if the next check result recovers the service soon, we would send a recovery notification soon after the problem one. + * This is not desired, especially for lots of services at once. + * Because of that if there's likely to be a check result soon, + * we delay the re-sending of the stashed notification until the next check. + * That check either doesn't change anything and we finally re-send the stashed problem notification + * or recovers the service and we drop the stashed notification. */ - still_suppressed = checkable->GetNextCheck() <= Utility::GetTime() + threshold; - } + /* One minute unless the check interval is too short so the next check will always run during the next minute. */ + auto threshold (checkable->GetCheckInterval() - 10); - if (!still_suppressed) { - Checkable::OnNotificationsRequested(checkable, type, cr, "", "", nullptr); + if (threshold > 60) + threshold = 60; + else if (threshold < 0) + threshold = 0; + still_suppressed = checkable->GetNextCheck() <= Utility::GetTime() + threshold; + } + + if (!still_suppressed && !wasLastParentRecoveryRecent.Get()) { + Checkable::OnNotificationsRequested(checkable, type, cr, "", "", nullptr); + + subtract |= type; + } + } else { subtract |= type; } - } else { - subtract |= type; } } }