Merge pull request #7816 from Icinga/feature/notification-timeperiod-6167

Re-send notifications previously suppressed by their time periods
This commit is contained in:
Eric Lippmann 2020-08-03 10:04:27 +02:00 committed by GitHub
commit e8745f7e96
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 277 additions and 65 deletions

View File

@ -1465,6 +1465,39 @@ Message updates will be dropped when:
* Checkable does not exist. * Checkable does not exist.
* Origin endpoint's zone is not allowed to access this checkable. * Origin endpoint's zone is not allowed to access this checkable.
#### event::SetSuppressedNotificationTypes <a id="technical-concepts-json-rpc-messages-event-setsuppressednotificationtypes"></a>
> Location: `clusterevents.cpp`
##### Message Body
Key | Value
----------|---------
jsonrpc | 2.0
method | event::SetSuppressedNotificationTypes
params | Dictionary
##### Params
Key | Type | Description
-------------------------|--------|------------------
notification | String | Notification name
supressed\_notifications | Number | Bitmask for suppressed notifications.
##### Functions
Event Sender: `Notification::OnSuppressedNotificationsChanged`
Event Receiver: `SuppressedNotificationTypesChangedAPIHandler`
##### Permissions
The receiver will not process messages from not configured endpoints.
Message updates will be dropped when:
* Notification does not exist.
* Origin endpoint's zone is not allowed to access this notification.
#### event::SetNextNotification <a id="technical-concepts-json-rpc-messages-event-setnextnotification"></a> #### event::SetNextNotification <a id="technical-concepts-json-rpc-messages-event-setnextnotification"></a>

View File

@ -147,25 +147,7 @@ static void FireSuppressedNotifications(Checkable* checkable)
for (auto type : {NotificationProblem, NotificationRecovery, NotificationFlappingStart, NotificationFlappingEnd}) { for (auto type : {NotificationProblem, NotificationRecovery, NotificationFlappingStart, NotificationFlappingEnd}) {
if (suppressed_types & type) { if (suppressed_types & type) {
bool still_applies; bool still_applies = checkable->NotificationReasonApplies(type);
auto cr (checkable->GetLastCheckResult());
switch (type) {
case NotificationProblem:
still_applies = cr && !checkable->IsStateOK(cr->GetState()) && checkable->GetStateType() == StateTypeHard;
break;
case NotificationRecovery:
still_applies = cr && checkable->IsStateOK(cr->GetState());
break;
case NotificationFlappingStart:
still_applies = checkable->IsFlapping();
break;
case NotificationFlappingEnd:
still_applies = !checkable->IsFlapping();
break;
default:
break;
}
if (still_applies) { if (still_applies) {
bool still_suppressed; bool still_suppressed;
@ -185,28 +167,8 @@ static void FireSuppressedNotifications(Checkable* checkable)
break; break;
} }
if (!still_suppressed && checkable->GetEnableActiveChecks()) { if (!still_suppressed && !checkable->IsLikelyToBeCheckedSoon()) {
/* If e.g. the downtime just ended, but the service is still not ok, we would re-send the stashed problem notification. Checkable::OnNotificationsRequested(checkable, type, checkable->GetLastCheckResult(), "", "", nullptr);
* But if the next check result recovers the service soon, we would send a recovery notification soon after the problem one.
* This is not desired, especially for lots of services at once.
* Because of that if there's likely to be a check result soon,
* we delay the re-sending of the stashed notification until the next check.
* That check either doesn't change anything and we finally re-send the stashed problem notification
* or recovers the service and we drop the stashed notification. */
/* One minute unless the check interval is too short so the next check will always run during the next minute. */
auto threshold (checkable->GetCheckInterval() - 10);
if (threshold > 60)
threshold = 60;
else if (threshold < 0)
threshold = 0;
still_suppressed = checkable->GetNextCheck() <= Utility::GetTime() + threshold;
}
if (!still_suppressed) {
Checkable::OnNotificationsRequested(checkable, type, cr, "", "", nullptr);
subtract |= type; subtract |= type;
} }
@ -241,3 +203,62 @@ void Checkable::FireSuppressedNotifications(const Timer * const&)
::FireSuppressedNotifications(service.get()); ::FireSuppressedNotifications(service.get());
} }
} }
/**
* Returns whether sending a notification of type type right now would represent *this' current state correctly.
*
* @param type The type of notification to send (or not to send).
*
* @return Whether to send the notification.
*/
bool Checkable::NotificationReasonApplies(NotificationType type)
{
switch (type) {
case NotificationProblem:
{
auto cr (GetLastCheckResult());
return cr && !IsStateOK(cr->GetState()) && GetStateType() == StateTypeHard;
}
case NotificationRecovery:
{
auto cr (GetLastCheckResult());
return cr && IsStateOK(cr->GetState());
}
case NotificationFlappingStart:
return IsFlapping();
case NotificationFlappingEnd:
return !IsFlapping();
default:
VERIFY(!"Checkable#NotificationReasonStillApplies(): given type not implemented");
return false;
}
}
/**
* E.g. we're going to re-send a stashed problem notification as *this is still not ok.
* But if the next check result recovers *this soon, we would send a recovery notification soon after the problem one.
* This is not desired, especially for lots of checkables at once.
* Because of that if there's likely to be a check result soon,
* we delay the re-sending of the stashed notification until the next check.
* That check either doesn't change anything and we finally re-send the stashed problem notification
* or recovers *this and we drop the stashed notification.
*
* @return Whether *this is likely to be checked soon
*/
bool Checkable::IsLikelyToBeCheckedSoon()
{
if (!GetEnableActiveChecks()) {
return false;
}
// One minute unless the check interval is too short so the next check will always run during the next minute.
auto threshold (GetCheckInterval() - 10);
if (threshold > 60) {
threshold = 60;
} else if (threshold < 0) {
threshold = 0;
}
return GetNextCheck() <= Utility::GetTime() + threshold;
}

View File

@ -171,6 +171,9 @@ public:
void ValidateRetryInterval(const Lazy<double>& lvalue, const ValidationUtils& value) final; void ValidateRetryInterval(const Lazy<double>& lvalue, const ValidationUtils& value) final;
void ValidateMaxCheckAttempts(const Lazy<int>& lvalue, const ValidationUtils& value) final; void ValidateMaxCheckAttempts(const Lazy<int>& lvalue, const ValidationUtils& value) final;
bool NotificationReasonApplies(NotificationType type);
bool IsLikelyToBeCheckedSoon();
static void IncreasePendingChecks(); static void IncreasePendingChecks();
static void DecreasePendingChecks(); static void DecreasePendingChecks();
static int GetPendingChecks(); static int GetPendingChecks();

View File

@ -26,6 +26,7 @@ REGISTER_APIFUNCTION(CheckResult, event, &ClusterEvents::CheckResultAPIHandler);
REGISTER_APIFUNCTION(SetNextCheck, event, &ClusterEvents::NextCheckChangedAPIHandler); REGISTER_APIFUNCTION(SetNextCheck, event, &ClusterEvents::NextCheckChangedAPIHandler);
REGISTER_APIFUNCTION(SetLastCheckStarted, event, &ClusterEvents::LastCheckStartedChangedAPIHandler); REGISTER_APIFUNCTION(SetLastCheckStarted, event, &ClusterEvents::LastCheckStartedChangedAPIHandler);
REGISTER_APIFUNCTION(SetSuppressedNotifications, event, &ClusterEvents::SuppressedNotificationsChangedAPIHandler); REGISTER_APIFUNCTION(SetSuppressedNotifications, event, &ClusterEvents::SuppressedNotificationsChangedAPIHandler);
REGISTER_APIFUNCTION(SetSuppressedNotificationTypes, event, &ClusterEvents::SuppressedNotificationTypesChangedAPIHandler);
REGISTER_APIFUNCTION(SetNextNotification, event, &ClusterEvents::NextNotificationChangedAPIHandler); REGISTER_APIFUNCTION(SetNextNotification, event, &ClusterEvents::NextNotificationChangedAPIHandler);
REGISTER_APIFUNCTION(SetForceNextCheck, event, &ClusterEvents::ForceNextCheckChangedAPIHandler); REGISTER_APIFUNCTION(SetForceNextCheck, event, &ClusterEvents::ForceNextCheckChangedAPIHandler);
REGISTER_APIFUNCTION(SetForceNextNotification, event, &ClusterEvents::ForceNextNotificationChangedAPIHandler); REGISTER_APIFUNCTION(SetForceNextNotification, event, &ClusterEvents::ForceNextNotificationChangedAPIHandler);
@ -42,6 +43,7 @@ void ClusterEvents::StaticInitialize()
Checkable::OnNextCheckChanged.connect(&ClusterEvents::NextCheckChangedHandler); Checkable::OnNextCheckChanged.connect(&ClusterEvents::NextCheckChangedHandler);
Checkable::OnLastCheckStartedChanged.connect(&ClusterEvents::LastCheckStartedChangedHandler); Checkable::OnLastCheckStartedChanged.connect(&ClusterEvents::LastCheckStartedChangedHandler);
Checkable::OnSuppressedNotificationsChanged.connect(&ClusterEvents::SuppressedNotificationsChangedHandler); Checkable::OnSuppressedNotificationsChanged.connect(&ClusterEvents::SuppressedNotificationsChangedHandler);
Notification::OnSuppressedNotificationsChanged.connect(&ClusterEvents::SuppressedNotificationTypesChangedHandler);
Notification::OnNextNotificationChanged.connect(&ClusterEvents::NextNotificationChangedHandler); Notification::OnNextNotificationChanged.connect(&ClusterEvents::NextNotificationChangedHandler);
Checkable::OnForceNextCheckChanged.connect(&ClusterEvents::ForceNextCheckChangedHandler); Checkable::OnForceNextCheckChanged.connect(&ClusterEvents::ForceNextCheckChangedHandler);
Checkable::OnForceNextNotificationChanged.connect(&ClusterEvents::ForceNextNotificationChangedHandler); Checkable::OnForceNextNotificationChanged.connect(&ClusterEvents::ForceNextNotificationChangedHandler);
@ -360,6 +362,52 @@ Value ClusterEvents::SuppressedNotificationsChangedAPIHandler(const MessageOrigi
return Empty; return Empty;
} }
void ClusterEvents::SuppressedNotificationTypesChangedHandler(const Notification::Ptr& notification, const MessageOrigin::Ptr& origin)
{
ApiListener::Ptr listener = ApiListener::GetInstance();
if (!listener)
return;
Dictionary::Ptr params = new Dictionary();
params->Set("notification", notification->GetName());
params->Set("suppressed_notifications", notification->GetSuppressedNotifications());
Dictionary::Ptr message = new Dictionary();
message->Set("jsonrpc", "2.0");
message->Set("method", "event::SetSuppressedNotificationTypes");
message->Set("params", params);
listener->RelayMessage(origin, notification, message, true);
}
Value ClusterEvents::SuppressedNotificationTypesChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params)
{
Endpoint::Ptr endpoint = origin->FromClient->GetEndpoint();
if (!endpoint) {
Log(LogNotice, "ClusterEvents")
<< "Discarding 'suppressed notifications changed' message from '" << origin->FromClient->GetIdentity() << "': Invalid endpoint origin (client not allowed).";
return Empty;
}
auto notification (Notification::GetByName(params->Get("notification")));
if (!notification)
return Empty;
if (origin->FromZone && !origin->FromZone->CanAccessObject(notification)) {
Log(LogNotice, "ClusterEvents")
<< "Discarding 'suppressed notification types changed' message for notification '" << notification->GetName()
<< "' from '" << origin->FromClient->GetIdentity() << "': Unauthorized access.";
return Empty;
}
notification->SetSuppressedNotifications(params->Get("suppressed_notifications"), false, origin);
return Empty;
}
void ClusterEvents::NextNotificationChangedHandler(const Notification::Ptr& notification, const MessageOrigin::Ptr& origin) void ClusterEvents::NextNotificationChangedHandler(const Notification::Ptr& notification, const MessageOrigin::Ptr& origin)
{ {
ApiListener::Ptr listener = ApiListener::GetInstance(); ApiListener::Ptr listener = ApiListener::GetInstance();

View File

@ -32,6 +32,9 @@ public:
static void SuppressedNotificationsChangedHandler(const Checkable::Ptr& checkable, const MessageOrigin::Ptr& origin); static void SuppressedNotificationsChangedHandler(const Checkable::Ptr& checkable, const MessageOrigin::Ptr& origin);
static Value SuppressedNotificationsChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params); static Value SuppressedNotificationsChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params);
static void SuppressedNotificationTypesChangedHandler(const Notification::Ptr& notification, const MessageOrigin::Ptr& origin);
static Value SuppressedNotificationTypesChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params);
static void NextNotificationChangedHandler(const Notification::Ptr& notification, const MessageOrigin::Ptr& origin); static void NextNotificationChangedHandler(const Notification::Ptr& notification, const MessageOrigin::Ptr& origin);
static Value NextNotificationChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params); static Value NextNotificationChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params);

View File

@ -234,6 +234,39 @@ void Notification::BeginExecuteNotification(NotificationType type, const CheckRe
Log(LogNotice, "Notification") Log(LogNotice, "Notification")
<< "Not sending " << (reminder ? "reminder " : "") << "notifications for notification object '" << notificationName << "Not sending " << (reminder ? "reminder " : "") << "notifications for notification object '" << notificationName
<< "': not in timeperiod '" << tp->GetName() << "'"; << "': not in timeperiod '" << tp->GetName() << "'";
if (!reminder) {
switch (type) {
case NotificationProblem:
case NotificationRecovery:
case NotificationFlappingStart:
case NotificationFlappingEnd:
{
/* If a non-reminder notification was suppressed, but just because of its time period,
* stash it into a notification types bitmask for maybe re-sending later.
*/
ObjectLock olock (this);
int suppressedTypesBefore (GetSuppressedNotifications());
int suppressedTypesAfter (suppressedTypesBefore | type);
for (int conflict : {NotificationProblem | NotificationRecovery, NotificationFlappingStart | NotificationFlappingEnd}) {
/* E.g. problem and recovery notifications neutralize each other. */
if ((suppressedTypesAfter & conflict) == conflict) {
suppressedTypesAfter &= ~conflict;
}
}
if (suppressedTypesAfter != suppressedTypesBefore) {
SetSuppressedNotifications(suppressedTypesAfter);
}
}
default:
; // Cheating the compiler on "5 enumeration values not handled in switch"
}
}
return; return;
} }

View File

@ -86,6 +86,10 @@ class Notification : CustomVarObject < NotificationNameComposer
[state] int notification_number; [state] int notification_number;
[state] Timestamp last_problem_notification; [state] Timestamp last_problem_notification;
[state, no_user_view, no_user_modify] int suppressed_notifications {
default {{{ return 0; }}}
};
[config, navigation] name(Endpoint) command_endpoint (CommandEndpointRaw) { [config, navigation] name(Endpoint) command_endpoint (CommandEndpointRaw) {
navigate {{{ navigate {{{
return Endpoint::GetByName(GetCommandEndpointRaw()); return Endpoint::GetByName(GetCommandEndpointRaw());

View File

@ -56,6 +56,69 @@ void NotificationComponent::Stop(bool runtimeRemoved)
ObjectImpl<NotificationComponent>::Stop(runtimeRemoved); ObjectImpl<NotificationComponent>::Stop(runtimeRemoved);
} }
static inline
void SubtractSuppressedNotificationTypes(const Notification::Ptr& notification, int types)
{
ObjectLock olock (notification);
int suppressedTypesBefore (notification->GetSuppressedNotifications());
int suppressedTypesAfter (suppressedTypesBefore & ~types);
if (suppressedTypesAfter != suppressedTypesBefore) {
notification->SetSuppressedNotifications(suppressedTypesAfter);
}
}
static inline
void FireSuppressedNotifications(const Notification::Ptr& notification)
{
int suppressedTypes (notification->GetSuppressedNotifications());
if (!suppressedTypes)
return;
int subtract = 0;
auto checkable (notification->GetCheckable());
for (auto type : {NotificationProblem, NotificationRecovery, NotificationFlappingStart, NotificationFlappingEnd}) {
if ((suppressedTypes & type) && !checkable->NotificationReasonApplies(type)) {
subtract |= type;
suppressedTypes &= ~type;
}
}
if (suppressedTypes) {
auto tp (notification->GetPeriod());
if ((!tp || tp->IsInside(Utility::GetTime())) && !checkable->IsLikelyToBeCheckedSoon()) {
for (auto type : {NotificationProblem, NotificationRecovery, NotificationFlappingStart, NotificationFlappingEnd}) {
if (!(suppressedTypes & type))
continue;
auto notificationName (notification->GetName());
Log(LogNotice, "NotificationComponent")
<< "Attempting to re-send previously suppressed notification '" << notificationName << "'.";
subtract |= type;
SubtractSuppressedNotificationTypes(notification, subtract);
subtract = 0;
try {
notification->BeginExecuteNotification(type, checkable->GetLastCheckResult(), false, false);
} catch (const std::exception& ex) {
Log(LogWarning, "NotificationComponent")
<< "Exception occurred during notification for object '"
<< notificationName << "': " << DiagnosticInformation(ex, false);
}
}
}
}
if (subtract) {
SubtractSuppressedNotificationTypes(notification, subtract);
}
}
/** /**
* Periodically sends notifications. * Periodically sends notifications.
* *
@ -104,6 +167,7 @@ void NotificationComponent::NotificationTimerHandler()
bool reachable = checkable->IsReachable(DependencyNotification); bool reachable = checkable->IsReachable(DependencyNotification);
if (reachable) { if (reachable) {
{
Array::Ptr unstashedNotifications = new Array(); Array::Ptr unstashedNotifications = new Array();
{ {
@ -137,6 +201,9 @@ void NotificationComponent::NotificationTimerHandler()
} }
} }
FireSuppressedNotifications(notification);
}
if (notification->GetInterval() <= 0 && notification->GetNoMoreNotifications()) { if (notification->GetInterval() <= 0 && notification->GetNoMoreNotifications()) {
Log(LogNotice, "NotificationComponent") Log(LogNotice, "NotificationComponent")
<< "Reminder notification '" << notificationName << "': Notification was sent out once and interval=0 disables reminder notifications."; << "Reminder notification '" << notificationName << "': Notification was sent out once and interval=0 disables reminder notifications.";