diff --git a/lib/icinga/checkable-check.cpp b/lib/icinga/checkable-check.cpp index cab1557ea..90c6395e1 100644 --- a/lib/icinga/checkable-check.cpp +++ b/lib/icinga/checkable-check.cpp @@ -309,15 +309,14 @@ void Checkable::ProcessCheckResult(const CheckResult::Ptr& cr, const MessageOrig bool in_downtime = IsInDowntime(); bool send_notification = false; + bool suppress_notification = !notification_reachable || in_downtime || IsAcknowledged(); - if (notification_reachable && !in_downtime && !IsAcknowledged()) { - /* Send notifications whether when a hard state change occurred. */ - if (hardChange && !(old_stateType == StateTypeSoft && IsStateOK(new_state))) - send_notification = true; - /* Or if the checkable is volatile and in a HARD state. */ - else if (is_volatile && GetStateType() == StateTypeHard) - send_notification = true; - } + /* Send notifications whether when a hard state change occurred. */ + if (hardChange && !(old_stateType == StateTypeSoft && IsStateOK(new_state))) + send_notification = true; + /* Or if the checkable is volatile and in a HARD state. */ + else if (is_volatile && GetStateType() == StateTypeHard) + send_notification = true; if (IsStateOK(old_state) && old_stateType == StateTypeSoft) send_notification = false; /* Don't send notifications for SOFT-OK -> HARD-OK. */ @@ -405,21 +404,33 @@ void Checkable::ProcessCheckResult(const CheckResult::Ptr& cr, const MessageOrig (is_volatile && !(IsStateOK(old_state) && IsStateOK(new_state)))) ExecuteEventHandler(); + int suppressed_types = 0; + /* Flapping start/end notifications */ - if (!in_downtime && !was_flapping && is_flapping) { + if (!was_flapping && is_flapping) { /* FlappingStart notifications happen on state changes, not in downtimes */ - if (!IsPaused()) - OnNotificationsRequested(this, NotificationFlappingStart, cr, "", "", nullptr); + if (!IsPaused()) { + if (in_downtime) { + suppressed_types |= NotificationFlappingStart; + } else { + OnNotificationsRequested(this, NotificationFlappingStart, cr, "", "", nullptr); + } + } Log(LogNotice, "Checkable") << "Flapping Start: Checkable '" << GetName() << "' started flapping (Current flapping value " << GetFlappingCurrent() << "% > high threshold " << GetFlappingThresholdHigh() << "%)."; NotifyFlapping(origin); - } else if (!in_downtime && was_flapping && !is_flapping) { + } else if (was_flapping && !is_flapping) { /* FlappingEnd notifications are independent from state changes, must not happen in downtine */ - if (!IsPaused()) - OnNotificationsRequested(this, NotificationFlappingEnd, cr, "", "", nullptr); + if (!IsPaused()) { + if (in_downtime) { + suppressed_types |= NotificationFlappingEnd; + } else { + OnNotificationsRequested(this, NotificationFlappingEnd, cr, "", "", nullptr); + } + } Log(LogNotice, "Checkable") << "Flapping Stop: Checkable '" << GetName() << "' stopped flapping (Current flapping value " @@ -429,8 +440,35 @@ void Checkable::ProcessCheckResult(const CheckResult::Ptr& cr, const MessageOrig } if (send_notification && !is_flapping) { - if (!IsPaused()) - OnNotificationsRequested(this, recovery ? NotificationRecovery : NotificationProblem, cr, "", "", nullptr); + if (!IsPaused()) { + if (suppress_notification) { + suppressed_types |= (recovery ? NotificationRecovery : NotificationProblem); + } else { + OnNotificationsRequested(this, recovery ? NotificationRecovery : NotificationProblem, cr, "", "", nullptr); + } + } + } + + if (suppressed_types) { + /* If some notifications were suppressed, but just because of e.g. a downtime, + * stash them into a notification types bitmask for maybe re-sending later. + */ + + ObjectLock olock (this); + int suppressed_types_before (GetSuppressedNotifications()); + int suppressed_types_after (suppressed_types_before | suppressed_types); + + for (int conflict : {NotificationProblem | NotificationRecovery, NotificationFlappingStart | NotificationFlappingEnd}) { + /* E.g. problem and recovery notifications neutralize each other. */ + + if (suppressed_types_after & conflict == conflict) { + suppressed_types_after &= ~conflict; + } + } + + if (suppressed_types_after != suppressed_types_before) { + SetSuppressedNotifications(suppressed_types_after); + } } } diff --git a/lib/icinga/checkable-notification.cpp b/lib/icinga/checkable-notification.cpp index 568ff6c52..c00c32370 100644 --- a/lib/icinga/checkable-notification.cpp +++ b/lib/icinga/checkable-notification.cpp @@ -1,7 +1,9 @@ /* Icinga 2 | (c) 2012 Icinga GmbH | GPLv2+ */ #include "icinga/checkable.hpp" +#include "icinga/host.hpp" #include "icinga/icingaapplication.hpp" +#include "icinga/service.hpp" #include "base/objectlock.hpp" #include "base/logger.hpp" #include "base/exception.hpp" @@ -84,3 +86,88 @@ void Checkable::UnregisterNotification(const Notification::Ptr& notification) boost::mutex::scoped_lock lock(m_NotificationMutex); m_Notifications.erase(notification); } + +static void FireSuppressedNotifications(Checkable* checkable) +{ + if (!checkable->IsActive()) + return; + + if (checkable->IsPaused()) + return; + + if (!checkable->GetEnableNotifications()) + return; + + int suppressed_types (checkable->GetSuppressedNotifications()); + if (!suppressed_types) + return; + + int subtract = 0; + + for (auto type : {NotificationProblem, NotificationRecovery, NotificationFlappingStart, NotificationFlappingEnd}) { + if (suppressed_types & type) { + bool still_applies; + auto cr (checkable->GetLastCheckResult()); + + switch (type) { + case NotificationProblem: + still_applies = cr && !checkable->IsStateOK(cr->GetState()) && checkable->GetStateType() == StateTypeHard; + break; + case NotificationRecovery: + still_applies = cr && checkable->IsStateOK(cr->GetState()); + break; + case NotificationFlappingStart: + still_applies = checkable->IsFlapping(); + break; + case NotificationFlappingEnd: + still_applies = !checkable->IsFlapping(); + } + + if (still_applies) { + bool still_suppressed; + + switch (type) { + case NotificationProblem: + case NotificationRecovery: + still_suppressed = !checkable->IsReachable(DependencyNotification) || checkable->IsInDowntime() || checkable->IsAcknowledged(); + break; + case NotificationFlappingStart: + case NotificationFlappingEnd: + still_suppressed = checkable->IsInDowntime(); + } + + if (!still_suppressed) { + Checkable::OnNotificationsRequested(checkable, type, cr, "", "", nullptr); + + subtract |= type; + } + } else { + subtract |= type; + } + } + } + + if (subtract) { + ObjectLock olock (checkable); + int suppressed_types_before (checkable->GetSuppressedNotifications()); + int suppressed_types_after (suppressed_types_before & ~subtract); + + if (suppressed_types_after != suppressed_types_before) { + checkable->SetSuppressedNotifications(suppressed_types_after); + } + } +} + +/** + * Re-sends all notifications previously suppressed by e.g. downtimes if the notification reason still applies. + */ +void Checkable::FireSuppressedNotifications(const Timer * const&) +{ + for (auto& host : ConfigType::GetObjectsByType()) { + ::FireSuppressedNotifications(host.get()); + } + + for (auto& service : ConfigType::GetObjectsByType()) { + ::FireSuppressedNotifications(service.get()); + } +} diff --git a/lib/icinga/checkable.cpp b/lib/icinga/checkable.cpp index 0f1879dda..c4265d05f 100644 --- a/lib/icinga/checkable.cpp +++ b/lib/icinga/checkable.cpp @@ -7,6 +7,8 @@ #include "base/objectlock.hpp" #include "base/utility.hpp" #include "base/exception.hpp" +#include "base/timer.hpp" +#include using namespace icinga; @@ -16,6 +18,8 @@ INITIALIZE_ONCE(&Checkable::StaticInitialize); boost::signals2::signal Checkable::OnAcknowledgementSet; boost::signals2::signal Checkable::OnAcknowledgementCleared; +static Timer::Ptr l_CheckablesFireSuppressedNotifications; + void Checkable::StaticInitialize() { /* fixed downtime start */ @@ -65,6 +69,15 @@ void Checkable::Start(bool runtimeCreated) } ObjectImpl::Start(runtimeCreated); + + static boost::once_flag once = BOOST_ONCE_INIT; + + boost::call_once(once, []() { + l_CheckablesFireSuppressedNotifications = new Timer(); + l_CheckablesFireSuppressedNotifications->SetInterval(5); + l_CheckablesFireSuppressedNotifications->OnTimerExpired.connect(&Checkable::FireSuppressedNotifications); + l_CheckablesFireSuppressedNotifications->Start(); + }); } void Checkable::AddGroup(const String& name) diff --git a/lib/icinga/checkable.hpp b/lib/icinga/checkable.hpp index fcfb3f74b..ee7212860 100644 --- a/lib/icinga/checkable.hpp +++ b/lib/icinga/checkable.hpp @@ -3,6 +3,7 @@ #ifndef CHECKABLE_H #define CHECKABLE_H +#include "base/timer.hpp" #include "icinga/i2-icinga.hpp" #include "icinga/checkable-ti.hpp" #include "icinga/timeperiod.hpp" @@ -211,6 +212,8 @@ private: static void NotifyDowntimeEnd(const Downtime::Ptr& downtime); + static void FireSuppressedNotifications(const Timer * const&); + /* Comments */ std::set m_Comments; mutable boost::mutex m_CommentMutex; diff --git a/lib/icinga/checkable.ti b/lib/icinga/checkable.ti index 418236316..7969d6f46 100644 --- a/lib/icinga/checkable.ti +++ b/lib/icinga/checkable.ti @@ -154,6 +154,9 @@ abstract class Checkable : CustomVarObject [state, no_user_view, no_user_modify] int flapping_buffer; [state, no_user_view, no_user_modify] int flapping_index; [state, protected] bool flapping; + [state, no_user_view, no_user_modify] int suppressed_notifications { + default {{{ return 0; }}} + }; [config, navigation] name(Endpoint) command_endpoint (CommandEndpointRaw) { navigate {{{ diff --git a/lib/icinga/clusterevents.cpp b/lib/icinga/clusterevents.cpp index 2c14a3550..313adb1eb 100644 --- a/lib/icinga/clusterevents.cpp +++ b/lib/icinga/clusterevents.cpp @@ -24,6 +24,7 @@ INITIALIZE_ONCE(&ClusterEvents::StaticInitialize); REGISTER_APIFUNCTION(CheckResult, event, &ClusterEvents::CheckResultAPIHandler); REGISTER_APIFUNCTION(SetNextCheck, event, &ClusterEvents::NextCheckChangedAPIHandler); +REGISTER_APIFUNCTION(SetSuppressedNotifications, event, &ClusterEvents::SuppressedNotificationsChangedAPIHandler); REGISTER_APIFUNCTION(SetNextNotification, event, &ClusterEvents::NextNotificationChangedAPIHandler); REGISTER_APIFUNCTION(SetForceNextCheck, event, &ClusterEvents::ForceNextCheckChangedAPIHandler); REGISTER_APIFUNCTION(SetForceNextNotification, event, &ClusterEvents::ForceNextNotificationChangedAPIHandler); @@ -38,6 +39,7 @@ void ClusterEvents::StaticInitialize() { Checkable::OnNewCheckResult.connect(&ClusterEvents::CheckResultHandler); Checkable::OnNextCheckChanged.connect(&ClusterEvents::NextCheckChangedHandler); + Checkable::OnSuppressedNotificationsChanged.connect(&ClusterEvents::SuppressedNotificationsChangedHandler); Notification::OnNextNotificationChanged.connect(&ClusterEvents::NextNotificationChangedHandler); Checkable::OnForceNextCheckChanged.connect(&ClusterEvents::ForceNextCheckChangedHandler); Checkable::OnForceNextNotificationChanged.connect(&ClusterEvents::ForceNextNotificationChangedHandler); @@ -232,6 +234,68 @@ Value ClusterEvents::NextCheckChangedAPIHandler(const MessageOrigin::Ptr& origin return Empty; } +void ClusterEvents::SuppressedNotificationsChangedHandler(const Checkable::Ptr& checkable, const MessageOrigin::Ptr& origin) +{ + ApiListener::Ptr listener = ApiListener::GetInstance(); + + if (!listener) + return; + + Host::Ptr host; + Service::Ptr service; + tie(host, service) = GetHostService(checkable); + + Dictionary::Ptr params = new Dictionary(); + params->Set("host", host->GetName()); + if (service) + params->Set("service", service->GetShortName()); + params->Set("suppressed_notifications", checkable->GetSuppressedNotifications()); + + Dictionary::Ptr message = new Dictionary(); + message->Set("jsonrpc", "2.0"); + message->Set("method", "event::SetSuppressedNotifications"); + message->Set("params", params); + + listener->RelayMessage(origin, checkable, message, true); +} + +Value ClusterEvents::SuppressedNotificationsChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params) +{ + Endpoint::Ptr endpoint = origin->FromClient->GetEndpoint(); + + if (!endpoint) { + Log(LogNotice, "ClusterEvents") + << "Discarding 'suppressed notifications changed' message from '" << origin->FromClient->GetIdentity() << "': Invalid endpoint origin (client not allowed)."; + return Empty; + } + + Host::Ptr host = Host::GetByName(params->Get("host")); + + if (!host) + return Empty; + + Checkable::Ptr checkable; + + if (params->Contains("service")) + checkable = host->GetServiceByShortName(params->Get("service")); + else + checkable = host; + + if (!checkable) + return Empty; + + if (origin->FromZone && !origin->FromZone->CanAccessObject(checkable)) { + Log(LogNotice, "ClusterEvents") + << "Discarding 'suppressed notifications changed' message for checkable '" << checkable->GetName() + << "' from '" << origin->FromClient->GetIdentity() << "': Unauthorized access."; + return Empty; + } + + checkable->SetSuppressedNotifications(params->Get("suppressed_notifications"), false, origin); + + return Empty; +} + void ClusterEvents::NextNotificationChangedHandler(const Notification::Ptr& notification, const MessageOrigin::Ptr& origin) { ApiListener::Ptr listener = ApiListener::GetInstance(); diff --git a/lib/icinga/clusterevents.hpp b/lib/icinga/clusterevents.hpp index 144155cc5..8dc6f48b9 100644 --- a/lib/icinga/clusterevents.hpp +++ b/lib/icinga/clusterevents.hpp @@ -26,6 +26,9 @@ public: static void NextCheckChangedHandler(const Checkable::Ptr& checkable, const MessageOrigin::Ptr& origin); static Value NextCheckChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params); + static void SuppressedNotificationsChangedHandler(const Checkable::Ptr& checkable, const MessageOrigin::Ptr& origin); + static Value SuppressedNotificationsChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params); + static void NextNotificationChangedHandler(const Notification::Ptr& notification, const MessageOrigin::Ptr& origin); static Value NextNotificationChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params);