2019-02-25 14:48:22 +01:00
|
|
|
/* Icinga 2 | (c) 2012 Icinga GmbH | GPLv2+ */
|
2013-02-09 15:20:10 +01:00
|
|
|
|
2014-05-25 16:23:35 +02:00
|
|
|
#include "icinga/checkable.hpp"
|
2019-07-02 11:23:16 +02:00
|
|
|
#include "icinga/host.hpp"
|
2014-05-25 16:23:35 +02:00
|
|
|
#include "icinga/icingaapplication.hpp"
|
2019-07-02 11:23:16 +02:00
|
|
|
#include "icinga/service.hpp"
|
2019-07-08 18:31:42 +02:00
|
|
|
#include "base/dictionary.hpp"
|
2014-05-25 16:23:35 +02:00
|
|
|
#include "base/objectlock.hpp"
|
2014-10-19 14:21:12 +02:00
|
|
|
#include "base/logger.hpp"
|
2014-05-25 16:23:35 +02:00
|
|
|
#include "base/exception.hpp"
|
|
|
|
#include "base/context.hpp"
|
|
|
|
#include "base/convert.hpp"
|
2020-12-01 18:22:02 +01:00
|
|
|
#include "base/lazy-init.hpp"
|
2019-07-08 18:31:42 +02:00
|
|
|
#include "remote/apilistener.hpp"
|
2013-02-09 15:20:10 +01:00
|
|
|
|
|
|
|
using namespace icinga;
|
|
|
|
|
2014-04-03 15:36:13 +02:00
|
|
|
boost::signals2::signal<void (const Notification::Ptr&, const Checkable::Ptr&, const std::set<User::Ptr>&,
|
2017-12-19 15:50:05 +01:00
|
|
|
const NotificationType&, const CheckResult::Ptr&, const String&, const String&,
|
|
|
|
const MessageOrigin::Ptr&)> Checkable::OnNotificationSentToAllUsers;
|
2014-04-03 15:36:13 +02:00
|
|
|
boost::signals2::signal<void (const Notification::Ptr&, const Checkable::Ptr&, const User::Ptr&,
|
2020-01-07 14:20:59 +01:00
|
|
|
const NotificationType&, const CheckResult::Ptr&, const String&, const String&, const String&,
|
|
|
|
const MessageOrigin::Ptr&)> Checkable::OnNotificationSentToUser;
|
2014-04-03 15:36:13 +02:00
|
|
|
|
2018-01-04 04:25:35 +01:00
|
|
|
void Checkable::ResetNotificationNumbers()
|
2013-07-18 17:04:09 +02:00
|
|
|
{
|
2016-08-25 06:19:44 +02:00
|
|
|
for (const Notification::Ptr& notification : GetNotifications()) {
|
2013-07-18 17:04:09 +02:00
|
|
|
ObjectLock olock(notification);
|
|
|
|
notification->ResetNotificationNumber();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-04-03 15:36:13 +02:00
|
|
|
void Checkable::SendNotifications(NotificationType type, const CheckResult::Ptr& cr, const String& author, const String& text)
|
2013-02-09 15:20:10 +01:00
|
|
|
{
|
2019-02-07 15:42:09 +01:00
|
|
|
String checkableName = GetName();
|
|
|
|
|
2022-11-24 12:40:36 +01:00
|
|
|
CONTEXT("Sending notifications for object '" << checkableName << "'");
|
2013-11-19 07:49:41 +01:00
|
|
|
|
2013-08-30 15:56:03 +02:00
|
|
|
bool force = GetForceNextNotification();
|
2013-03-21 13:42:46 +01:00
|
|
|
|
2016-11-17 11:46:40 +01:00
|
|
|
SetForceNextNotification(false);
|
|
|
|
|
2013-10-08 11:57:35 +02:00
|
|
|
if (!IcingaApplication::GetInstance()->GetEnableNotifications() || !GetEnableNotifications()) {
|
2013-08-30 15:56:03 +02:00
|
|
|
if (!force) {
|
2014-10-19 17:52:17 +02:00
|
|
|
Log(LogInformation, "Checkable")
|
2019-02-07 15:42:09 +01:00
|
|
|
<< "Notifications are disabled for checkable '" << checkableName << "'.";
|
2013-03-21 13:22:26 +01:00
|
|
|
return;
|
|
|
|
}
|
2013-02-26 12:37:25 +01:00
|
|
|
}
|
|
|
|
|
2013-03-16 21:18:53 +01:00
|
|
|
std::set<Notification::Ptr> notifications = GetNotifications();
|
2013-02-09 15:20:10 +01:00
|
|
|
|
2019-07-02 16:33:11 +02:00
|
|
|
String notificationTypeName = Notification::NotificationTypeToString(type);
|
2013-02-09 15:20:10 +01:00
|
|
|
|
2019-07-02 16:33:11 +02:00
|
|
|
// Bail early if there are no notifications.
|
|
|
|
if (notifications.empty()) {
|
|
|
|
Log(LogNotice, "Checkable")
|
|
|
|
<< "Skipping checkable '" << checkableName << "' which doesn't have any notification objects configured.";
|
2019-02-07 15:42:09 +01:00
|
|
|
return;
|
2019-07-02 16:33:11 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
Log(LogInformation, "Checkable")
|
|
|
|
<< "Checkable '" << checkableName << "' has " << notifications.size()
|
|
|
|
<< " notification(s). Checking filters for type '" << notificationTypeName << "', sends will be logged.";
|
2014-01-27 17:22:48 +01:00
|
|
|
|
2016-08-25 06:19:44 +02:00
|
|
|
for (const Notification::Ptr& notification : notifications) {
|
2019-07-02 16:33:11 +02:00
|
|
|
// Re-send stashed notifications from cold startup.
|
2019-07-08 18:31:42 +02:00
|
|
|
if (ApiListener::UpdatedObjectAuthority()) {
|
|
|
|
try {
|
|
|
|
if (!notification->IsPaused()) {
|
|
|
|
auto stashedNotifications (notification->GetStashedNotifications());
|
|
|
|
|
|
|
|
if (stashedNotifications->GetLength()) {
|
|
|
|
Log(LogNotice, "Notification")
|
|
|
|
<< "Notification '" << notification->GetName() << "': there are some stashed notifications. Stashing notification to preserve order.";
|
|
|
|
|
|
|
|
stashedNotifications->Add(new Dictionary({
|
2021-12-13 17:27:38 +01:00
|
|
|
{"notification_type", type},
|
2019-07-08 18:31:42 +02:00
|
|
|
{"cr", cr},
|
|
|
|
{"force", force},
|
|
|
|
{"reminder", false},
|
|
|
|
{"author", author},
|
|
|
|
{"text", text}
|
|
|
|
}));
|
|
|
|
} else {
|
|
|
|
notification->BeginExecuteNotification(type, cr, force, false, author, text);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
Log(LogNotice, "Notification")
|
|
|
|
<< "Notification '" << notification->GetName() << "': HA cluster active, this endpoint does not have the authority (paused=true). Skipping.";
|
|
|
|
}
|
|
|
|
} catch (const std::exception& ex) {
|
|
|
|
Log(LogWarning, "Checkable")
|
|
|
|
<< "Exception occurred during notification '" << notification->GetName() << "' for checkable '"
|
|
|
|
<< GetName() << "': " << DiagnosticInformation(ex, false);
|
2019-02-07 15:42:09 +01:00
|
|
|
}
|
2019-07-08 18:31:42 +02:00
|
|
|
} else {
|
2019-07-02 16:33:11 +02:00
|
|
|
// Cold startup phase. Stash notification for later.
|
2019-07-08 18:31:42 +02:00
|
|
|
Log(LogNotice, "Notification")
|
|
|
|
<< "Notification '" << notification->GetName() << "': object authority hasn't been updated, yet. Stashing notification.";
|
|
|
|
|
|
|
|
notification->GetStashedNotifications()->Add(new Dictionary({
|
2021-12-13 17:27:38 +01:00
|
|
|
{"notification_type", type},
|
2019-07-08 18:31:42 +02:00
|
|
|
{"cr", cr},
|
|
|
|
{"force", force},
|
|
|
|
{"reminder", false},
|
|
|
|
{"author", author},
|
|
|
|
{"text", text}
|
|
|
|
}));
|
2013-02-24 08:26:10 +01:00
|
|
|
}
|
2013-02-09 15:20:10 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-04 04:25:35 +01:00
|
|
|
std::set<Notification::Ptr> Checkable::GetNotifications() const
|
2013-02-27 15:23:25 +01:00
|
|
|
{
|
2021-02-02 10:16:04 +01:00
|
|
|
std::unique_lock<std::mutex> lock(m_NotificationMutex);
|
2013-08-20 11:06:04 +02:00
|
|
|
return m_Notifications;
|
2013-02-27 15:23:25 +01:00
|
|
|
}
|
|
|
|
|
2015-08-20 17:18:48 +02:00
|
|
|
void Checkable::RegisterNotification(const Notification::Ptr& notification)
|
2013-02-09 15:20:10 +01:00
|
|
|
{
|
2021-02-02 10:16:04 +01:00
|
|
|
std::unique_lock<std::mutex> lock(m_NotificationMutex);
|
2013-08-20 11:06:04 +02:00
|
|
|
m_Notifications.insert(notification);
|
2013-02-09 15:20:10 +01:00
|
|
|
}
|
|
|
|
|
2015-08-20 17:18:48 +02:00
|
|
|
void Checkable::UnregisterNotification(const Notification::Ptr& notification)
|
2013-02-09 15:20:10 +01:00
|
|
|
{
|
2021-02-02 10:16:04 +01:00
|
|
|
std::unique_lock<std::mutex> lock(m_NotificationMutex);
|
2013-08-20 11:06:04 +02:00
|
|
|
m_Notifications.erase(notification);
|
2013-02-09 15:20:10 +01:00
|
|
|
}
|
2019-07-02 11:23:16 +02:00
|
|
|
|
2022-01-28 15:15:44 +01:00
|
|
|
void Checkable::FireSuppressedNotifications()
|
2019-07-02 11:23:16 +02:00
|
|
|
{
|
2022-01-28 15:15:44 +01:00
|
|
|
if (!IsActive())
|
2019-07-02 11:23:16 +02:00
|
|
|
return;
|
|
|
|
|
2022-01-28 15:15:44 +01:00
|
|
|
if (IsPaused())
|
2019-07-02 11:23:16 +02:00
|
|
|
return;
|
|
|
|
|
2022-01-28 15:15:44 +01:00
|
|
|
if (!GetEnableNotifications())
|
2019-07-02 11:23:16 +02:00
|
|
|
return;
|
|
|
|
|
2022-01-28 15:15:44 +01:00
|
|
|
int suppressed_types (GetSuppressedNotifications());
|
2019-07-02 11:23:16 +02:00
|
|
|
if (!suppressed_types)
|
|
|
|
return;
|
|
|
|
|
|
|
|
int subtract = 0;
|
|
|
|
|
2020-12-01 18:22:02 +01:00
|
|
|
{
|
2022-01-28 15:15:44 +01:00
|
|
|
LazyInit<bool> wasLastParentRecoveryRecent ([this]() {
|
|
|
|
auto cr (GetLastCheckResult());
|
2020-12-01 18:22:02 +01:00
|
|
|
|
|
|
|
if (!cr) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto threshold (cr->GetExecutionStart());
|
2022-04-19 15:54:51 +02:00
|
|
|
Host::Ptr host;
|
|
|
|
Service::Ptr service;
|
|
|
|
tie(host, service) = GetHostService(this);
|
|
|
|
|
|
|
|
if (service) {
|
|
|
|
ObjectLock oLock (host);
|
|
|
|
|
|
|
|
if (!host->GetProblem() && host->GetLastStateChange() >= threshold) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
2020-12-01 18:22:02 +01:00
|
|
|
|
2022-01-28 15:15:44 +01:00
|
|
|
for (auto& dep : GetDependencies()) {
|
2020-12-01 18:22:02 +01:00
|
|
|
auto parent (dep->GetParent());
|
|
|
|
ObjectLock oLock (parent);
|
|
|
|
|
|
|
|
if (!parent->GetProblem() && parent->GetLastStateChange() >= threshold) {
|
|
|
|
return true;
|
2019-07-02 11:23:16 +02:00
|
|
|
}
|
2020-12-01 18:22:02 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
});
|
|
|
|
|
2022-01-28 15:15:38 +01:00
|
|
|
if (suppressed_types & (NotificationProblem|NotificationRecovery)) {
|
2022-01-28 15:15:44 +01:00
|
|
|
CheckResult::Ptr cr = GetLastCheckResult();
|
|
|
|
NotificationType type = cr && IsStateOK(cr->GetState()) ? NotificationRecovery : NotificationProblem;
|
|
|
|
bool state_suppressed = NotificationReasonSuppressed(NotificationProblem) || NotificationReasonSuppressed(NotificationRecovery);
|
2022-01-28 15:15:38 +01:00
|
|
|
|
|
|
|
/* Only process (i.e. send or dismiss) suppressed state notifications if the following conditions are met:
|
|
|
|
*
|
|
|
|
* 1. State notifications are not suppressed at the moment. State notifications must only be removed from
|
|
|
|
* the suppressed notifications bitset after the reason for the suppression is gone as these bits are
|
|
|
|
* used as a marker for when to set the state_before_suppression attribute.
|
|
|
|
* 2. The checkable is in a hard state. Soft states represent a state where we are not certain yet about
|
|
|
|
* the actual state and wait with sending notifications. If we want to immediately send a notification,
|
|
|
|
* we might send a recovery notification for something that just started failing or a problem
|
|
|
|
* notification which might be for an intermittent problem that would have never received a
|
|
|
|
* notification if there was no suppression as it still was in a soft state. Both cases aren't ideal so
|
|
|
|
* better wait until we are certain.
|
|
|
|
* 3. The checkable isn't likely checked soon. For example, if a downtime ended, give the checkable a
|
|
|
|
* chance to recover afterwards before sending a notification.
|
|
|
|
* 4. No parent recovered recently. Similar to the previous condition, give the checkable a chance to
|
|
|
|
* recover after one of its dependencies recovered before sending a notification.
|
|
|
|
*
|
|
|
|
* If any of these conditions is not met, processing the suppressed notification is further delayed.
|
|
|
|
*/
|
2022-01-28 15:15:44 +01:00
|
|
|
if (!state_suppressed && GetStateType() == StateTypeHard && !IsLikelyToBeCheckedSoon() && !wasLastParentRecoveryRecent.Get()) {
|
|
|
|
if (NotificationReasonApplies(type)) {
|
|
|
|
Checkable::OnNotificationsRequested(this, type, cr, "", "", nullptr);
|
2022-01-28 15:15:38 +01:00
|
|
|
}
|
|
|
|
subtract |= NotificationRecovery|NotificationProblem;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto type : {NotificationFlappingStart, NotificationFlappingEnd}) {
|
2020-12-01 18:22:02 +01:00
|
|
|
if (suppressed_types & type) {
|
2022-01-28 15:15:44 +01:00
|
|
|
bool still_applies = NotificationReasonApplies(type);
|
2020-12-01 18:22:02 +01:00
|
|
|
|
|
|
|
if (still_applies) {
|
2022-01-28 15:15:44 +01:00
|
|
|
if (!NotificationReasonSuppressed(type) && !IsLikelyToBeCheckedSoon() && !wasLastParentRecoveryRecent.Get()) {
|
|
|
|
Checkable::OnNotificationsRequested(this, type, GetLastCheckResult(), "", "", nullptr);
|
2019-07-02 11:23:16 +02:00
|
|
|
|
2020-12-01 18:22:02 +01:00
|
|
|
subtract |= type;
|
|
|
|
}
|
|
|
|
} else {
|
2019-07-02 11:23:16 +02:00
|
|
|
subtract |= type;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (subtract) {
|
2022-01-28 15:15:44 +01:00
|
|
|
ObjectLock olock (this);
|
2019-07-10 11:51:58 +02:00
|
|
|
|
2022-01-28 15:15:44 +01:00
|
|
|
int suppressed_types_before (GetSuppressedNotifications());
|
2019-07-02 11:23:16 +02:00
|
|
|
int suppressed_types_after (suppressed_types_before & ~subtract);
|
|
|
|
|
|
|
|
if (suppressed_types_after != suppressed_types_before) {
|
2022-01-28 15:15:44 +01:00
|
|
|
SetSuppressedNotifications(suppressed_types_after);
|
2019-07-02 11:23:16 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Re-sends all notifications previously suppressed by e.g. downtimes if the notification reason still applies.
|
|
|
|
*/
|
2022-01-28 15:15:44 +01:00
|
|
|
void Checkable::FireSuppressedNotificationsTimer(const Timer * const&)
|
2019-07-02 11:23:16 +02:00
|
|
|
{
|
|
|
|
for (auto& host : ConfigType::GetObjectsByType<Host>()) {
|
2022-01-28 15:15:44 +01:00
|
|
|
host->FireSuppressedNotifications();
|
2019-07-02 11:23:16 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
for (auto& service : ConfigType::GetObjectsByType<Service>()) {
|
2022-01-28 15:15:44 +01:00
|
|
|
service->FireSuppressedNotifications();
|
2019-07-02 11:23:16 +02:00
|
|
|
}
|
|
|
|
}
|
2020-07-29 17:13:41 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns whether sending a notification of type type right now would represent *this' current state correctly.
|
|
|
|
*
|
|
|
|
* @param type The type of notification to send (or not to send).
|
|
|
|
*
|
|
|
|
* @return Whether to send the notification.
|
|
|
|
*/
|
|
|
|
bool Checkable::NotificationReasonApplies(NotificationType type)
|
|
|
|
{
|
|
|
|
switch (type) {
|
|
|
|
case NotificationProblem:
|
|
|
|
{
|
|
|
|
auto cr (GetLastCheckResult());
|
2022-01-28 15:15:38 +01:00
|
|
|
return cr && !IsStateOK(cr->GetState()) && cr->GetState() != GetStateBeforeSuppression();
|
2020-07-29 17:13:41 +02:00
|
|
|
}
|
|
|
|
case NotificationRecovery:
|
|
|
|
{
|
|
|
|
auto cr (GetLastCheckResult());
|
2022-01-28 15:15:38 +01:00
|
|
|
return cr && IsStateOK(cr->GetState()) && cr->GetState() != GetStateBeforeSuppression();
|
2020-07-29 17:13:41 +02:00
|
|
|
}
|
|
|
|
case NotificationFlappingStart:
|
|
|
|
return IsFlapping();
|
|
|
|
case NotificationFlappingEnd:
|
|
|
|
return !IsFlapping();
|
|
|
|
default:
|
|
|
|
VERIFY(!"Checkable#NotificationReasonStillApplies(): given type not implemented");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-11-27 17:03:34 +01:00
|
|
|
/**
|
2021-01-27 15:43:37 +01:00
|
|
|
* Checks if notifications of a given type should be suppressed for this Checkable at the moment.
|
2020-11-27 17:03:34 +01:00
|
|
|
*
|
2021-01-27 15:43:37 +01:00
|
|
|
* @param type The notification type for which to query the suppression status.
|
2020-11-27 17:03:34 +01:00
|
|
|
*
|
2021-01-27 15:43:37 +01:00
|
|
|
* @return true if no notification of this type should be sent.
|
2020-11-27 17:03:34 +01:00
|
|
|
*/
|
|
|
|
bool Checkable::NotificationReasonSuppressed(NotificationType type)
|
|
|
|
{
|
|
|
|
switch (type) {
|
|
|
|
case NotificationProblem:
|
|
|
|
case NotificationRecovery:
|
|
|
|
return !IsReachable(DependencyNotification) || IsInDowntime() || IsAcknowledged();
|
|
|
|
case NotificationFlappingStart:
|
|
|
|
case NotificationFlappingEnd:
|
|
|
|
return IsInDowntime();
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-29 17:13:41 +02:00
|
|
|
/**
|
|
|
|
* E.g. we're going to re-send a stashed problem notification as *this is still not ok.
|
|
|
|
* But if the next check result recovers *this soon, we would send a recovery notification soon after the problem one.
|
|
|
|
* This is not desired, especially for lots of checkables at once.
|
|
|
|
* Because of that if there's likely to be a check result soon,
|
|
|
|
* we delay the re-sending of the stashed notification until the next check.
|
|
|
|
* That check either doesn't change anything and we finally re-send the stashed problem notification
|
|
|
|
* or recovers *this and we drop the stashed notification.
|
|
|
|
*
|
|
|
|
* @return Whether *this is likely to be checked soon
|
|
|
|
*/
|
|
|
|
bool Checkable::IsLikelyToBeCheckedSoon()
|
|
|
|
{
|
|
|
|
if (!GetEnableActiveChecks()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// One minute unless the check interval is too short so the next check will always run during the next minute.
|
|
|
|
auto threshold (GetCheckInterval() - 10);
|
|
|
|
|
|
|
|
if (threshold > 60) {
|
|
|
|
threshold = 60;
|
|
|
|
} else if (threshold < 0) {
|
|
|
|
threshold = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return GetNextCheck() <= Utility::GetTime() + threshold;
|
|
|
|
}
|