diff --git a/doc/19-technical-concepts.md b/doc/19-technical-concepts.md index 9fc0edeed..5e46985c8 100644 --- a/doc/19-technical-concepts.md +++ b/doc/19-technical-concepts.md @@ -1465,6 +1465,39 @@ Message updates will be dropped when: * Checkable does not exist. * Origin endpoint's zone is not allowed to access this checkable. +#### event::SetSuppressedNotificationTypes + +> Location: `clusterevents.cpp` + +##### Message Body + +Key | Value +----------|--------- +jsonrpc | 2.0 +method | event::SetSuppressedNotificationTypes +params | Dictionary + +##### Params + +Key | Type | Description +-------------------------|--------|------------------ +notification | String | Notification name +supressed\_notifications | Number | Bitmask for suppressed notifications. + +##### Functions + +Event Sender: `Notification::OnSuppressedNotificationsChanged` +Event Receiver: `SuppressedNotificationTypesChangedAPIHandler` + +##### Permissions + +The receiver will not process messages from not configured endpoints. + +Message updates will be dropped when: + +* Notification does not exist. +* Origin endpoint's zone is not allowed to access this notification. + #### event::SetNextNotification diff --git a/lib/icinga/checkable-notification.cpp b/lib/icinga/checkable-notification.cpp index 43b4589fa..8dc0dfd58 100644 --- a/lib/icinga/checkable-notification.cpp +++ b/lib/icinga/checkable-notification.cpp @@ -147,25 +147,7 @@ static void FireSuppressedNotifications(Checkable* checkable) for (auto type : {NotificationProblem, NotificationRecovery, NotificationFlappingStart, NotificationFlappingEnd}) { if (suppressed_types & type) { - bool still_applies; - auto cr (checkable->GetLastCheckResult()); - - switch (type) { - case NotificationProblem: - still_applies = cr && !checkable->IsStateOK(cr->GetState()) && checkable->GetStateType() == StateTypeHard; - break; - case NotificationRecovery: - still_applies = cr && checkable->IsStateOK(cr->GetState()); - break; - case NotificationFlappingStart: - still_applies = checkable->IsFlapping(); - break; - case NotificationFlappingEnd: - still_applies = !checkable->IsFlapping(); - break; - default: - break; - } + bool still_applies = checkable->NotificationReasonApplies(type); if (still_applies) { bool still_suppressed; @@ -185,28 +167,8 @@ static void FireSuppressedNotifications(Checkable* checkable) break; } - if (!still_suppressed && checkable->GetEnableActiveChecks()) { - /* If e.g. the downtime just ended, but the service is still not ok, we would re-send the stashed problem notification. - * But if the next check result recovers the service soon, we would send a recovery notification soon after the problem one. - * This is not desired, especially for lots of services at once. - * Because of that if there's likely to be a check result soon, - * we delay the re-sending of the stashed notification until the next check. - * That check either doesn't change anything and we finally re-send the stashed problem notification - * or recovers the service and we drop the stashed notification. */ - - /* One minute unless the check interval is too short so the next check will always run during the next minute. */ - auto threshold (checkable->GetCheckInterval() - 10); - - if (threshold > 60) - threshold = 60; - else if (threshold < 0) - threshold = 0; - - still_suppressed = checkable->GetNextCheck() <= Utility::GetTime() + threshold; - } - - if (!still_suppressed) { - Checkable::OnNotificationsRequested(checkable, type, cr, "", "", nullptr); + if (!still_suppressed && !checkable->IsLikelyToBeCheckedSoon()) { + Checkable::OnNotificationsRequested(checkable, type, checkable->GetLastCheckResult(), "", "", nullptr); subtract |= type; } @@ -241,3 +203,62 @@ void Checkable::FireSuppressedNotifications(const Timer * const&) ::FireSuppressedNotifications(service.get()); } } + +/** + * Returns whether sending a notification of type type right now would represent *this' current state correctly. + * + * @param type The type of notification to send (or not to send). + * + * @return Whether to send the notification. + */ +bool Checkable::NotificationReasonApplies(NotificationType type) +{ + switch (type) { + case NotificationProblem: + { + auto cr (GetLastCheckResult()); + return cr && !IsStateOK(cr->GetState()) && GetStateType() == StateTypeHard; + } + case NotificationRecovery: + { + auto cr (GetLastCheckResult()); + return cr && IsStateOK(cr->GetState()); + } + case NotificationFlappingStart: + return IsFlapping(); + case NotificationFlappingEnd: + return !IsFlapping(); + default: + VERIFY(!"Checkable#NotificationReasonStillApplies(): given type not implemented"); + return false; + } +} + +/** + * E.g. we're going to re-send a stashed problem notification as *this is still not ok. + * But if the next check result recovers *this soon, we would send a recovery notification soon after the problem one. + * This is not desired, especially for lots of checkables at once. + * Because of that if there's likely to be a check result soon, + * we delay the re-sending of the stashed notification until the next check. + * That check either doesn't change anything and we finally re-send the stashed problem notification + * or recovers *this and we drop the stashed notification. + * + * @return Whether *this is likely to be checked soon + */ +bool Checkable::IsLikelyToBeCheckedSoon() +{ + if (!GetEnableActiveChecks()) { + return false; + } + + // One minute unless the check interval is too short so the next check will always run during the next minute. + auto threshold (GetCheckInterval() - 10); + + if (threshold > 60) { + threshold = 60; + } else if (threshold < 0) { + threshold = 0; + } + + return GetNextCheck() <= Utility::GetTime() + threshold; +} diff --git a/lib/icinga/checkable.hpp b/lib/icinga/checkable.hpp index 0eb1c5950..dc782aca5 100644 --- a/lib/icinga/checkable.hpp +++ b/lib/icinga/checkable.hpp @@ -171,6 +171,9 @@ public: void ValidateRetryInterval(const Lazy& lvalue, const ValidationUtils& value) final; void ValidateMaxCheckAttempts(const Lazy& lvalue, const ValidationUtils& value) final; + bool NotificationReasonApplies(NotificationType type); + bool IsLikelyToBeCheckedSoon(); + static void IncreasePendingChecks(); static void DecreasePendingChecks(); static int GetPendingChecks(); diff --git a/lib/icinga/clusterevents.cpp b/lib/icinga/clusterevents.cpp index 37809a8c6..c9112eb0d 100644 --- a/lib/icinga/clusterevents.cpp +++ b/lib/icinga/clusterevents.cpp @@ -26,6 +26,7 @@ REGISTER_APIFUNCTION(CheckResult, event, &ClusterEvents::CheckResultAPIHandler); REGISTER_APIFUNCTION(SetNextCheck, event, &ClusterEvents::NextCheckChangedAPIHandler); REGISTER_APIFUNCTION(SetLastCheckStarted, event, &ClusterEvents::LastCheckStartedChangedAPIHandler); REGISTER_APIFUNCTION(SetSuppressedNotifications, event, &ClusterEvents::SuppressedNotificationsChangedAPIHandler); +REGISTER_APIFUNCTION(SetSuppressedNotificationTypes, event, &ClusterEvents::SuppressedNotificationTypesChangedAPIHandler); REGISTER_APIFUNCTION(SetNextNotification, event, &ClusterEvents::NextNotificationChangedAPIHandler); REGISTER_APIFUNCTION(SetForceNextCheck, event, &ClusterEvents::ForceNextCheckChangedAPIHandler); REGISTER_APIFUNCTION(SetForceNextNotification, event, &ClusterEvents::ForceNextNotificationChangedAPIHandler); @@ -42,6 +43,7 @@ void ClusterEvents::StaticInitialize() Checkable::OnNextCheckChanged.connect(&ClusterEvents::NextCheckChangedHandler); Checkable::OnLastCheckStartedChanged.connect(&ClusterEvents::LastCheckStartedChangedHandler); Checkable::OnSuppressedNotificationsChanged.connect(&ClusterEvents::SuppressedNotificationsChangedHandler); + Notification::OnSuppressedNotificationsChanged.connect(&ClusterEvents::SuppressedNotificationTypesChangedHandler); Notification::OnNextNotificationChanged.connect(&ClusterEvents::NextNotificationChangedHandler); Checkable::OnForceNextCheckChanged.connect(&ClusterEvents::ForceNextCheckChangedHandler); Checkable::OnForceNextNotificationChanged.connect(&ClusterEvents::ForceNextNotificationChangedHandler); @@ -360,6 +362,52 @@ Value ClusterEvents::SuppressedNotificationsChangedAPIHandler(const MessageOrigi return Empty; } +void ClusterEvents::SuppressedNotificationTypesChangedHandler(const Notification::Ptr& notification, const MessageOrigin::Ptr& origin) +{ + ApiListener::Ptr listener = ApiListener::GetInstance(); + + if (!listener) + return; + + Dictionary::Ptr params = new Dictionary(); + params->Set("notification", notification->GetName()); + params->Set("suppressed_notifications", notification->GetSuppressedNotifications()); + + Dictionary::Ptr message = new Dictionary(); + message->Set("jsonrpc", "2.0"); + message->Set("method", "event::SetSuppressedNotificationTypes"); + message->Set("params", params); + + listener->RelayMessage(origin, notification, message, true); +} + +Value ClusterEvents::SuppressedNotificationTypesChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params) +{ + Endpoint::Ptr endpoint = origin->FromClient->GetEndpoint(); + + if (!endpoint) { + Log(LogNotice, "ClusterEvents") + << "Discarding 'suppressed notifications changed' message from '" << origin->FromClient->GetIdentity() << "': Invalid endpoint origin (client not allowed)."; + return Empty; + } + + auto notification (Notification::GetByName(params->Get("notification"))); + + if (!notification) + return Empty; + + if (origin->FromZone && !origin->FromZone->CanAccessObject(notification)) { + Log(LogNotice, "ClusterEvents") + << "Discarding 'suppressed notification types changed' message for notification '" << notification->GetName() + << "' from '" << origin->FromClient->GetIdentity() << "': Unauthorized access."; + return Empty; + } + + notification->SetSuppressedNotifications(params->Get("suppressed_notifications"), false, origin); + + return Empty; +} + void ClusterEvents::NextNotificationChangedHandler(const Notification::Ptr& notification, const MessageOrigin::Ptr& origin) { ApiListener::Ptr listener = ApiListener::GetInstance(); diff --git a/lib/icinga/clusterevents.hpp b/lib/icinga/clusterevents.hpp index 539dd961f..174a03d44 100644 --- a/lib/icinga/clusterevents.hpp +++ b/lib/icinga/clusterevents.hpp @@ -32,6 +32,9 @@ public: static void SuppressedNotificationsChangedHandler(const Checkable::Ptr& checkable, const MessageOrigin::Ptr& origin); static Value SuppressedNotificationsChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params); + static void SuppressedNotificationTypesChangedHandler(const Notification::Ptr& notification, const MessageOrigin::Ptr& origin); + static Value SuppressedNotificationTypesChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params); + static void NextNotificationChangedHandler(const Notification::Ptr& notification, const MessageOrigin::Ptr& origin); static Value NextNotificationChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params); diff --git a/lib/icinga/notification.cpp b/lib/icinga/notification.cpp index 9c2ae7ec8..7555b457c 100644 --- a/lib/icinga/notification.cpp +++ b/lib/icinga/notification.cpp @@ -234,6 +234,39 @@ void Notification::BeginExecuteNotification(NotificationType type, const CheckRe Log(LogNotice, "Notification") << "Not sending " << (reminder ? "reminder " : "") << "notifications for notification object '" << notificationName << "': not in timeperiod '" << tp->GetName() << "'"; + + if (!reminder) { + switch (type) { + case NotificationProblem: + case NotificationRecovery: + case NotificationFlappingStart: + case NotificationFlappingEnd: + { + /* If a non-reminder notification was suppressed, but just because of its time period, + * stash it into a notification types bitmask for maybe re-sending later. + */ + + ObjectLock olock (this); + int suppressedTypesBefore (GetSuppressedNotifications()); + int suppressedTypesAfter (suppressedTypesBefore | type); + + for (int conflict : {NotificationProblem | NotificationRecovery, NotificationFlappingStart | NotificationFlappingEnd}) { + /* E.g. problem and recovery notifications neutralize each other. */ + + if ((suppressedTypesAfter & conflict) == conflict) { + suppressedTypesAfter &= ~conflict; + } + } + + if (suppressedTypesAfter != suppressedTypesBefore) { + SetSuppressedNotifications(suppressedTypesAfter); + } + } + default: + ; // Cheating the compiler on "5 enumeration values not handled in switch" + } + } + return; } diff --git a/lib/icinga/notification.ti b/lib/icinga/notification.ti index a283bbb84..e76a4f775 100644 --- a/lib/icinga/notification.ti +++ b/lib/icinga/notification.ti @@ -86,6 +86,10 @@ class Notification : CustomVarObject < NotificationNameComposer [state] int notification_number; [state] Timestamp last_problem_notification; + [state, no_user_view, no_user_modify] int suppressed_notifications { + default {{{ return 0; }}} + }; + [config, navigation] name(Endpoint) command_endpoint (CommandEndpointRaw) { navigate {{{ return Endpoint::GetByName(GetCommandEndpointRaw()); diff --git a/lib/notification/notificationcomponent.cpp b/lib/notification/notificationcomponent.cpp index aa9601201..3bc4b5a63 100644 --- a/lib/notification/notificationcomponent.cpp +++ b/lib/notification/notificationcomponent.cpp @@ -56,6 +56,69 @@ void NotificationComponent::Stop(bool runtimeRemoved) ObjectImpl::Stop(runtimeRemoved); } +static inline +void SubtractSuppressedNotificationTypes(const Notification::Ptr& notification, int types) +{ + ObjectLock olock (notification); + + int suppressedTypesBefore (notification->GetSuppressedNotifications()); + int suppressedTypesAfter (suppressedTypesBefore & ~types); + + if (suppressedTypesAfter != suppressedTypesBefore) { + notification->SetSuppressedNotifications(suppressedTypesAfter); + } +} + +static inline +void FireSuppressedNotifications(const Notification::Ptr& notification) +{ + int suppressedTypes (notification->GetSuppressedNotifications()); + if (!suppressedTypes) + return; + + int subtract = 0; + auto checkable (notification->GetCheckable()); + + for (auto type : {NotificationProblem, NotificationRecovery, NotificationFlappingStart, NotificationFlappingEnd}) { + if ((suppressedTypes & type) && !checkable->NotificationReasonApplies(type)) { + subtract |= type; + suppressedTypes &= ~type; + } + } + + if (suppressedTypes) { + auto tp (notification->GetPeriod()); + + if ((!tp || tp->IsInside(Utility::GetTime())) && !checkable->IsLikelyToBeCheckedSoon()) { + for (auto type : {NotificationProblem, NotificationRecovery, NotificationFlappingStart, NotificationFlappingEnd}) { + if (!(suppressedTypes & type)) + continue; + + auto notificationName (notification->GetName()); + + Log(LogNotice, "NotificationComponent") + << "Attempting to re-send previously suppressed notification '" << notificationName << "'."; + + subtract |= type; + SubtractSuppressedNotificationTypes(notification, subtract); + subtract = 0; + + try { + notification->BeginExecuteNotification(type, checkable->GetLastCheckResult(), false, false); + } catch (const std::exception& ex) { + Log(LogWarning, "NotificationComponent") + << "Exception occurred during notification for object '" + << notificationName << "': " << DiagnosticInformation(ex, false); + } + } + } + } + + if (subtract) { + SubtractSuppressedNotificationTypes(notification, subtract); + } +} + /** * Periodically sends notifications. * @@ -104,37 +167,41 @@ void NotificationComponent::NotificationTimerHandler() bool reachable = checkable->IsReachable(DependencyNotification); if (reachable) { - Array::Ptr unstashedNotifications = new Array(); - { - auto stashedNotifications (notification->GetStashedNotifications()); - ObjectLock olock(stashedNotifications); + Array::Ptr unstashedNotifications = new Array(); - stashedNotifications->CopyTo(unstashedNotifications); - stashedNotifications->Clear(); - } + { + auto stashedNotifications (notification->GetStashedNotifications()); + ObjectLock olock(stashedNotifications); - ObjectLock olock(unstashedNotifications); + stashedNotifications->CopyTo(unstashedNotifications); + stashedNotifications->Clear(); + } - for (Dictionary::Ptr unstashedNotification : unstashedNotifications) { - try { - Log(LogNotice, "NotificationComponent") - << "Attempting to send stashed notification '" << notificationName << "'."; + ObjectLock olock(unstashedNotifications); - notification->BeginExecuteNotification( - (NotificationType)(int)unstashedNotification->Get("type"), - (CheckResult::Ptr)unstashedNotification->Get("cr"), - (bool)unstashedNotification->Get("force"), - (bool)unstashedNotification->Get("reminder"), - (String)unstashedNotification->Get("author"), - (String)unstashedNotification->Get("text") - ); - } catch (const std::exception& ex) { - Log(LogWarning, "NotificationComponent") - << "Exception occurred during notification for object '" - << notificationName << "': " << DiagnosticInformation(ex, false); + for (Dictionary::Ptr unstashedNotification : unstashedNotifications) { + try { + Log(LogNotice, "NotificationComponent") + << "Attempting to send stashed notification '" << notificationName << "'."; + + notification->BeginExecuteNotification( + (NotificationType)(int)unstashedNotification->Get("type"), + (CheckResult::Ptr)unstashedNotification->Get("cr"), + (bool)unstashedNotification->Get("force"), + (bool)unstashedNotification->Get("reminder"), + (String)unstashedNotification->Get("author"), + (String)unstashedNotification->Get("text") + ); + } catch (const std::exception& ex) { + Log(LogWarning, "NotificationComponent") + << "Exception occurred during notification for object '" + << notificationName << "': " << DiagnosticInformation(ex, false); + } } } + + FireSuppressedNotifications(notification); } if (notification->GetInterval() <= 0 && notification->GetNoMoreNotifications()) {