mirror of https://github.com/Icinga/icinga2.git
Merge pull request #7816 from Icinga/feature/notification-timeperiod-6167
Re-send notifications previously suppressed by their time periods
This commit is contained in:
commit
e8745f7e96
|
@ -1465,6 +1465,39 @@ Message updates will be dropped when:
|
|||
* Checkable does not exist.
|
||||
* Origin endpoint's zone is not allowed to access this checkable.
|
||||
|
||||
#### event::SetSuppressedNotificationTypes <a id="technical-concepts-json-rpc-messages-event-setsuppressednotificationtypes"></a>
|
||||
|
||||
> Location: `clusterevents.cpp`
|
||||
|
||||
##### Message Body
|
||||
|
||||
Key | Value
|
||||
----------|---------
|
||||
jsonrpc | 2.0
|
||||
method | event::SetSuppressedNotificationTypes
|
||||
params | Dictionary
|
||||
|
||||
##### Params
|
||||
|
||||
Key | Type | Description
|
||||
-------------------------|--------|------------------
|
||||
notification | String | Notification name
|
||||
supressed\_notifications | Number | Bitmask for suppressed notifications.
|
||||
|
||||
##### Functions
|
||||
|
||||
Event Sender: `Notification::OnSuppressedNotificationsChanged`
|
||||
Event Receiver: `SuppressedNotificationTypesChangedAPIHandler`
|
||||
|
||||
##### Permissions
|
||||
|
||||
The receiver will not process messages from not configured endpoints.
|
||||
|
||||
Message updates will be dropped when:
|
||||
|
||||
* Notification does not exist.
|
||||
* Origin endpoint's zone is not allowed to access this notification.
|
||||
|
||||
|
||||
#### event::SetNextNotification <a id="technical-concepts-json-rpc-messages-event-setnextnotification"></a>
|
||||
|
||||
|
|
|
@ -147,25 +147,7 @@ static void FireSuppressedNotifications(Checkable* checkable)
|
|||
|
||||
for (auto type : {NotificationProblem, NotificationRecovery, NotificationFlappingStart, NotificationFlappingEnd}) {
|
||||
if (suppressed_types & type) {
|
||||
bool still_applies;
|
||||
auto cr (checkable->GetLastCheckResult());
|
||||
|
||||
switch (type) {
|
||||
case NotificationProblem:
|
||||
still_applies = cr && !checkable->IsStateOK(cr->GetState()) && checkable->GetStateType() == StateTypeHard;
|
||||
break;
|
||||
case NotificationRecovery:
|
||||
still_applies = cr && checkable->IsStateOK(cr->GetState());
|
||||
break;
|
||||
case NotificationFlappingStart:
|
||||
still_applies = checkable->IsFlapping();
|
||||
break;
|
||||
case NotificationFlappingEnd:
|
||||
still_applies = !checkable->IsFlapping();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
bool still_applies = checkable->NotificationReasonApplies(type);
|
||||
|
||||
if (still_applies) {
|
||||
bool still_suppressed;
|
||||
|
@ -185,28 +167,8 @@ static void FireSuppressedNotifications(Checkable* checkable)
|
|||
break;
|
||||
}
|
||||
|
||||
if (!still_suppressed && checkable->GetEnableActiveChecks()) {
|
||||
/* If e.g. the downtime just ended, but the service is still not ok, we would re-send the stashed problem notification.
|
||||
* But if the next check result recovers the service soon, we would send a recovery notification soon after the problem one.
|
||||
* This is not desired, especially for lots of services at once.
|
||||
* Because of that if there's likely to be a check result soon,
|
||||
* we delay the re-sending of the stashed notification until the next check.
|
||||
* That check either doesn't change anything and we finally re-send the stashed problem notification
|
||||
* or recovers the service and we drop the stashed notification. */
|
||||
|
||||
/* One minute unless the check interval is too short so the next check will always run during the next minute. */
|
||||
auto threshold (checkable->GetCheckInterval() - 10);
|
||||
|
||||
if (threshold > 60)
|
||||
threshold = 60;
|
||||
else if (threshold < 0)
|
||||
threshold = 0;
|
||||
|
||||
still_suppressed = checkable->GetNextCheck() <= Utility::GetTime() + threshold;
|
||||
}
|
||||
|
||||
if (!still_suppressed) {
|
||||
Checkable::OnNotificationsRequested(checkable, type, cr, "", "", nullptr);
|
||||
if (!still_suppressed && !checkable->IsLikelyToBeCheckedSoon()) {
|
||||
Checkable::OnNotificationsRequested(checkable, type, checkable->GetLastCheckResult(), "", "", nullptr);
|
||||
|
||||
subtract |= type;
|
||||
}
|
||||
|
@ -241,3 +203,62 @@ void Checkable::FireSuppressedNotifications(const Timer * const&)
|
|||
::FireSuppressedNotifications(service.get());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether sending a notification of type type right now would represent *this' current state correctly.
|
||||
*
|
||||
* @param type The type of notification to send (or not to send).
|
||||
*
|
||||
* @return Whether to send the notification.
|
||||
*/
|
||||
bool Checkable::NotificationReasonApplies(NotificationType type)
|
||||
{
|
||||
switch (type) {
|
||||
case NotificationProblem:
|
||||
{
|
||||
auto cr (GetLastCheckResult());
|
||||
return cr && !IsStateOK(cr->GetState()) && GetStateType() == StateTypeHard;
|
||||
}
|
||||
case NotificationRecovery:
|
||||
{
|
||||
auto cr (GetLastCheckResult());
|
||||
return cr && IsStateOK(cr->GetState());
|
||||
}
|
||||
case NotificationFlappingStart:
|
||||
return IsFlapping();
|
||||
case NotificationFlappingEnd:
|
||||
return !IsFlapping();
|
||||
default:
|
||||
VERIFY(!"Checkable#NotificationReasonStillApplies(): given type not implemented");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* E.g. we're going to re-send a stashed problem notification as *this is still not ok.
|
||||
* But if the next check result recovers *this soon, we would send a recovery notification soon after the problem one.
|
||||
* This is not desired, especially for lots of checkables at once.
|
||||
* Because of that if there's likely to be a check result soon,
|
||||
* we delay the re-sending of the stashed notification until the next check.
|
||||
* That check either doesn't change anything and we finally re-send the stashed problem notification
|
||||
* or recovers *this and we drop the stashed notification.
|
||||
*
|
||||
* @return Whether *this is likely to be checked soon
|
||||
*/
|
||||
bool Checkable::IsLikelyToBeCheckedSoon()
|
||||
{
|
||||
if (!GetEnableActiveChecks()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// One minute unless the check interval is too short so the next check will always run during the next minute.
|
||||
auto threshold (GetCheckInterval() - 10);
|
||||
|
||||
if (threshold > 60) {
|
||||
threshold = 60;
|
||||
} else if (threshold < 0) {
|
||||
threshold = 0;
|
||||
}
|
||||
|
||||
return GetNextCheck() <= Utility::GetTime() + threshold;
|
||||
}
|
||||
|
|
|
@ -171,6 +171,9 @@ public:
|
|||
void ValidateRetryInterval(const Lazy<double>& lvalue, const ValidationUtils& value) final;
|
||||
void ValidateMaxCheckAttempts(const Lazy<int>& lvalue, const ValidationUtils& value) final;
|
||||
|
||||
bool NotificationReasonApplies(NotificationType type);
|
||||
bool IsLikelyToBeCheckedSoon();
|
||||
|
||||
static void IncreasePendingChecks();
|
||||
static void DecreasePendingChecks();
|
||||
static int GetPendingChecks();
|
||||
|
|
|
@ -26,6 +26,7 @@ REGISTER_APIFUNCTION(CheckResult, event, &ClusterEvents::CheckResultAPIHandler);
|
|||
REGISTER_APIFUNCTION(SetNextCheck, event, &ClusterEvents::NextCheckChangedAPIHandler);
|
||||
REGISTER_APIFUNCTION(SetLastCheckStarted, event, &ClusterEvents::LastCheckStartedChangedAPIHandler);
|
||||
REGISTER_APIFUNCTION(SetSuppressedNotifications, event, &ClusterEvents::SuppressedNotificationsChangedAPIHandler);
|
||||
REGISTER_APIFUNCTION(SetSuppressedNotificationTypes, event, &ClusterEvents::SuppressedNotificationTypesChangedAPIHandler);
|
||||
REGISTER_APIFUNCTION(SetNextNotification, event, &ClusterEvents::NextNotificationChangedAPIHandler);
|
||||
REGISTER_APIFUNCTION(SetForceNextCheck, event, &ClusterEvents::ForceNextCheckChangedAPIHandler);
|
||||
REGISTER_APIFUNCTION(SetForceNextNotification, event, &ClusterEvents::ForceNextNotificationChangedAPIHandler);
|
||||
|
@ -42,6 +43,7 @@ void ClusterEvents::StaticInitialize()
|
|||
Checkable::OnNextCheckChanged.connect(&ClusterEvents::NextCheckChangedHandler);
|
||||
Checkable::OnLastCheckStartedChanged.connect(&ClusterEvents::LastCheckStartedChangedHandler);
|
||||
Checkable::OnSuppressedNotificationsChanged.connect(&ClusterEvents::SuppressedNotificationsChangedHandler);
|
||||
Notification::OnSuppressedNotificationsChanged.connect(&ClusterEvents::SuppressedNotificationTypesChangedHandler);
|
||||
Notification::OnNextNotificationChanged.connect(&ClusterEvents::NextNotificationChangedHandler);
|
||||
Checkable::OnForceNextCheckChanged.connect(&ClusterEvents::ForceNextCheckChangedHandler);
|
||||
Checkable::OnForceNextNotificationChanged.connect(&ClusterEvents::ForceNextNotificationChangedHandler);
|
||||
|
@ -360,6 +362,52 @@ Value ClusterEvents::SuppressedNotificationsChangedAPIHandler(const MessageOrigi
|
|||
return Empty;
|
||||
}
|
||||
|
||||
void ClusterEvents::SuppressedNotificationTypesChangedHandler(const Notification::Ptr& notification, const MessageOrigin::Ptr& origin)
|
||||
{
|
||||
ApiListener::Ptr listener = ApiListener::GetInstance();
|
||||
|
||||
if (!listener)
|
||||
return;
|
||||
|
||||
Dictionary::Ptr params = new Dictionary();
|
||||
params->Set("notification", notification->GetName());
|
||||
params->Set("suppressed_notifications", notification->GetSuppressedNotifications());
|
||||
|
||||
Dictionary::Ptr message = new Dictionary();
|
||||
message->Set("jsonrpc", "2.0");
|
||||
message->Set("method", "event::SetSuppressedNotificationTypes");
|
||||
message->Set("params", params);
|
||||
|
||||
listener->RelayMessage(origin, notification, message, true);
|
||||
}
|
||||
|
||||
Value ClusterEvents::SuppressedNotificationTypesChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params)
|
||||
{
|
||||
Endpoint::Ptr endpoint = origin->FromClient->GetEndpoint();
|
||||
|
||||
if (!endpoint) {
|
||||
Log(LogNotice, "ClusterEvents")
|
||||
<< "Discarding 'suppressed notifications changed' message from '" << origin->FromClient->GetIdentity() << "': Invalid endpoint origin (client not allowed).";
|
||||
return Empty;
|
||||
}
|
||||
|
||||
auto notification (Notification::GetByName(params->Get("notification")));
|
||||
|
||||
if (!notification)
|
||||
return Empty;
|
||||
|
||||
if (origin->FromZone && !origin->FromZone->CanAccessObject(notification)) {
|
||||
Log(LogNotice, "ClusterEvents")
|
||||
<< "Discarding 'suppressed notification types changed' message for notification '" << notification->GetName()
|
||||
<< "' from '" << origin->FromClient->GetIdentity() << "': Unauthorized access.";
|
||||
return Empty;
|
||||
}
|
||||
|
||||
notification->SetSuppressedNotifications(params->Get("suppressed_notifications"), false, origin);
|
||||
|
||||
return Empty;
|
||||
}
|
||||
|
||||
void ClusterEvents::NextNotificationChangedHandler(const Notification::Ptr& notification, const MessageOrigin::Ptr& origin)
|
||||
{
|
||||
ApiListener::Ptr listener = ApiListener::GetInstance();
|
||||
|
|
|
@ -32,6 +32,9 @@ public:
|
|||
static void SuppressedNotificationsChangedHandler(const Checkable::Ptr& checkable, const MessageOrigin::Ptr& origin);
|
||||
static Value SuppressedNotificationsChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params);
|
||||
|
||||
static void SuppressedNotificationTypesChangedHandler(const Notification::Ptr& notification, const MessageOrigin::Ptr& origin);
|
||||
static Value SuppressedNotificationTypesChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params);
|
||||
|
||||
static void NextNotificationChangedHandler(const Notification::Ptr& notification, const MessageOrigin::Ptr& origin);
|
||||
static Value NextNotificationChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params);
|
||||
|
||||
|
|
|
@ -234,6 +234,39 @@ void Notification::BeginExecuteNotification(NotificationType type, const CheckRe
|
|||
Log(LogNotice, "Notification")
|
||||
<< "Not sending " << (reminder ? "reminder " : "") << "notifications for notification object '" << notificationName
|
||||
<< "': not in timeperiod '" << tp->GetName() << "'";
|
||||
|
||||
if (!reminder) {
|
||||
switch (type) {
|
||||
case NotificationProblem:
|
||||
case NotificationRecovery:
|
||||
case NotificationFlappingStart:
|
||||
case NotificationFlappingEnd:
|
||||
{
|
||||
/* If a non-reminder notification was suppressed, but just because of its time period,
|
||||
* stash it into a notification types bitmask for maybe re-sending later.
|
||||
*/
|
||||
|
||||
ObjectLock olock (this);
|
||||
int suppressedTypesBefore (GetSuppressedNotifications());
|
||||
int suppressedTypesAfter (suppressedTypesBefore | type);
|
||||
|
||||
for (int conflict : {NotificationProblem | NotificationRecovery, NotificationFlappingStart | NotificationFlappingEnd}) {
|
||||
/* E.g. problem and recovery notifications neutralize each other. */
|
||||
|
||||
if ((suppressedTypesAfter & conflict) == conflict) {
|
||||
suppressedTypesAfter &= ~conflict;
|
||||
}
|
||||
}
|
||||
|
||||
if (suppressedTypesAfter != suppressedTypesBefore) {
|
||||
SetSuppressedNotifications(suppressedTypesAfter);
|
||||
}
|
||||
}
|
||||
default:
|
||||
; // Cheating the compiler on "5 enumeration values not handled in switch"
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -86,6 +86,10 @@ class Notification : CustomVarObject < NotificationNameComposer
|
|||
[state] int notification_number;
|
||||
[state] Timestamp last_problem_notification;
|
||||
|
||||
[state, no_user_view, no_user_modify] int suppressed_notifications {
|
||||
default {{{ return 0; }}}
|
||||
};
|
||||
|
||||
[config, navigation] name(Endpoint) command_endpoint (CommandEndpointRaw) {
|
||||
navigate {{{
|
||||
return Endpoint::GetByName(GetCommandEndpointRaw());
|
||||
|
|
|
@ -56,6 +56,69 @@ void NotificationComponent::Stop(bool runtimeRemoved)
|
|||
ObjectImpl<NotificationComponent>::Stop(runtimeRemoved);
|
||||
}
|
||||
|
||||
static inline
|
||||
void SubtractSuppressedNotificationTypes(const Notification::Ptr& notification, int types)
|
||||
{
|
||||
ObjectLock olock (notification);
|
||||
|
||||
int suppressedTypesBefore (notification->GetSuppressedNotifications());
|
||||
int suppressedTypesAfter (suppressedTypesBefore & ~types);
|
||||
|
||||
if (suppressedTypesAfter != suppressedTypesBefore) {
|
||||
notification->SetSuppressedNotifications(suppressedTypesAfter);
|
||||
}
|
||||
}
|
||||
|
||||
static inline
|
||||
void FireSuppressedNotifications(const Notification::Ptr& notification)
|
||||
{
|
||||
int suppressedTypes (notification->GetSuppressedNotifications());
|
||||
if (!suppressedTypes)
|
||||
return;
|
||||
|
||||
int subtract = 0;
|
||||
auto checkable (notification->GetCheckable());
|
||||
|
||||
for (auto type : {NotificationProblem, NotificationRecovery, NotificationFlappingStart, NotificationFlappingEnd}) {
|
||||
if ((suppressedTypes & type) && !checkable->NotificationReasonApplies(type)) {
|
||||
subtract |= type;
|
||||
suppressedTypes &= ~type;
|
||||
}
|
||||
}
|
||||
|
||||
if (suppressedTypes) {
|
||||
auto tp (notification->GetPeriod());
|
||||
|
||||
if ((!tp || tp->IsInside(Utility::GetTime())) && !checkable->IsLikelyToBeCheckedSoon()) {
|
||||
for (auto type : {NotificationProblem, NotificationRecovery, NotificationFlappingStart, NotificationFlappingEnd}) {
|
||||
if (!(suppressedTypes & type))
|
||||
continue;
|
||||
|
||||
auto notificationName (notification->GetName());
|
||||
|
||||
Log(LogNotice, "NotificationComponent")
|
||||
<< "Attempting to re-send previously suppressed notification '" << notificationName << "'.";
|
||||
|
||||
subtract |= type;
|
||||
SubtractSuppressedNotificationTypes(notification, subtract);
|
||||
subtract = 0;
|
||||
|
||||
try {
|
||||
notification->BeginExecuteNotification(type, checkable->GetLastCheckResult(), false, false);
|
||||
} catch (const std::exception& ex) {
|
||||
Log(LogWarning, "NotificationComponent")
|
||||
<< "Exception occurred during notification for object '"
|
||||
<< notificationName << "': " << DiagnosticInformation(ex, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (subtract) {
|
||||
SubtractSuppressedNotificationTypes(notification, subtract);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Periodically sends notifications.
|
||||
*
|
||||
|
@ -104,37 +167,41 @@ void NotificationComponent::NotificationTimerHandler()
|
|||
bool reachable = checkable->IsReachable(DependencyNotification);
|
||||
|
||||
if (reachable) {
|
||||
Array::Ptr unstashedNotifications = new Array();
|
||||
|
||||
{
|
||||
auto stashedNotifications (notification->GetStashedNotifications());
|
||||
ObjectLock olock(stashedNotifications);
|
||||
Array::Ptr unstashedNotifications = new Array();
|
||||
|
||||
stashedNotifications->CopyTo(unstashedNotifications);
|
||||
stashedNotifications->Clear();
|
||||
}
|
||||
{
|
||||
auto stashedNotifications (notification->GetStashedNotifications());
|
||||
ObjectLock olock(stashedNotifications);
|
||||
|
||||
ObjectLock olock(unstashedNotifications);
|
||||
stashedNotifications->CopyTo(unstashedNotifications);
|
||||
stashedNotifications->Clear();
|
||||
}
|
||||
|
||||
for (Dictionary::Ptr unstashedNotification : unstashedNotifications) {
|
||||
try {
|
||||
Log(LogNotice, "NotificationComponent")
|
||||
<< "Attempting to send stashed notification '" << notificationName << "'.";
|
||||
ObjectLock olock(unstashedNotifications);
|
||||
|
||||
notification->BeginExecuteNotification(
|
||||
(NotificationType)(int)unstashedNotification->Get("type"),
|
||||
(CheckResult::Ptr)unstashedNotification->Get("cr"),
|
||||
(bool)unstashedNotification->Get("force"),
|
||||
(bool)unstashedNotification->Get("reminder"),
|
||||
(String)unstashedNotification->Get("author"),
|
||||
(String)unstashedNotification->Get("text")
|
||||
);
|
||||
} catch (const std::exception& ex) {
|
||||
Log(LogWarning, "NotificationComponent")
|
||||
<< "Exception occurred during notification for object '"
|
||||
<< notificationName << "': " << DiagnosticInformation(ex, false);
|
||||
for (Dictionary::Ptr unstashedNotification : unstashedNotifications) {
|
||||
try {
|
||||
Log(LogNotice, "NotificationComponent")
|
||||
<< "Attempting to send stashed notification '" << notificationName << "'.";
|
||||
|
||||
notification->BeginExecuteNotification(
|
||||
(NotificationType)(int)unstashedNotification->Get("type"),
|
||||
(CheckResult::Ptr)unstashedNotification->Get("cr"),
|
||||
(bool)unstashedNotification->Get("force"),
|
||||
(bool)unstashedNotification->Get("reminder"),
|
||||
(String)unstashedNotification->Get("author"),
|
||||
(String)unstashedNotification->Get("text")
|
||||
);
|
||||
} catch (const std::exception& ex) {
|
||||
Log(LogWarning, "NotificationComponent")
|
||||
<< "Exception occurred during notification for object '"
|
||||
<< notificationName << "': " << DiagnosticInformation(ex, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FireSuppressedNotifications(notification);
|
||||
}
|
||||
|
||||
if (notification->GetInterval() <= 0 && notification->GetNoMoreNotifications()) {
|
||||
|
|
Loading…
Reference in New Issue