diff --git a/doc/16-upgrading-icinga-2.md b/doc/16-upgrading-icinga-2.md index 55314d4e5..b4a1c570d 100644 --- a/doc/16-upgrading-icinga-2.md +++ b/doc/16-upgrading-icinga-2.md @@ -10,6 +10,26 @@ follow the instructions for v2.7 too. ## Upgrading to v2.12 +### Behavior changes + +The behavior of multi parent [dependencies](03-monitoring-basics.md#dependencies) was fixed to e.g. render hosts unreachable when both router uplinks are down. + +Previous behaviour: + +1) parentHost1 DOWN, parentHost2 UP => childHost **not reachable** +2) parentHost1 DOWN, parentHost2 DOWN => childHost **not reachable** + +New behavior: + +1) parentHost1 DOWN, parentHost2 UP => childHost **reachable** +2) parentHost1 DOWN, parentHost2 DOWN => childHost **not reachable** + +Please review your [Dependency](09-object-types.md#objecttype-dependency) configuration as 1) may lead to +different results for + +- `last_reachable` via REST API query +- Notifications not suppressed by faulty reachability calculation anymore + ### Breaking changes As of v2.12 our [API](12-icinga2-api.md) URL endpoint [`/v1/actions/acknowledge-problem`](12-icinga2-api.md#icinga2-api-actions-acknowledge-problem) refuses acknowledging an already acknowledged checkable by overwriting the acknowledgement. diff --git a/lib/icinga/checkable-dependency.cpp b/lib/icinga/checkable-dependency.cpp index 10e39ab58..821c883a6 100644 --- a/lib/icinga/checkable-dependency.cpp +++ b/lib/icinga/checkable-dependency.cpp @@ -72,15 +72,27 @@ bool Checkable::IsReachable(DependencyType dt, Dependency::Ptr *failedDependency } } - for (const Dependency::Ptr& dep : GetDependencies()) { + auto deps = GetDependencies(); + + int countDeps = deps.size(); + int countFailed = 0; + + for (const Dependency::Ptr& dep : deps) { if (!dep->IsAvailable(dt)) { + countFailed++; + if (failedDependency) *failedDependency = dep; - - return false; } } + /* If there are dependencies, and all of them failed, mark as unreachable. */ + if (countDeps > 0 && countFailed == countDeps) { + Log(LogDebug, "Checkable") + << "All dependencies have failed for checkable '" << GetName() << "': Marking as unreachable."; + + return false; + } if (failedDependency) *failedDependency = nullptr; diff --git a/lib/icinga/dependency.cpp b/lib/icinga/dependency.cpp index bfb91d226..7dd90f5e5 100644 --- a/lib/icinga/dependency.cpp +++ b/lib/icinga/dependency.cpp @@ -201,3 +201,13 @@ void Dependency::ValidateStates(const Lazy& lvalue, const Validation BOOST_THROW_EXCEPTION(ValidationError(this, { "states" }, "State filter is invalid for service dependency.")); } +void Dependency::SetParent(intrusive_ptr parent) +{ + m_Parent = parent; +} + +void Dependency::SetChild(intrusive_ptr child) +{ + m_Child = child; +} + diff --git a/lib/icinga/dependency.hpp b/lib/icinga/dependency.hpp index 6a80d84f5..bc3ae5388 100644 --- a/lib/icinga/dependency.hpp +++ b/lib/icinga/dependency.hpp @@ -37,6 +37,10 @@ public: static void EvaluateApplyRules(const intrusive_ptr& host); static void EvaluateApplyRules(const intrusive_ptr& service); + /* Note: Only use them for unit test mocks. Prefer OnConfigLoaded(). */ + void SetParent(intrusive_ptr parent); + void SetChild(intrusive_ptr child); + protected: void OnConfigLoaded() override; void OnAllConfigLoaded() override; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index bfcd83ffd..8bb12e365 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -25,6 +25,7 @@ set(base_test_SOURCES base-value.cpp config-ops.cpp icinga-checkresult.cpp + icinga-dependencies.cpp icinga-legacytimeperiod.cpp icinga-macros.cpp icinga-notification.cpp @@ -126,6 +127,7 @@ add_boost_test(base icinga_checkresult/service_3attempts icinga_checkresult/host_flapping_notification icinga_checkresult/service_flapping_notification + icinga_dependencies/multi_parent icinga_notification/strings icinga_notification/state_filter icinga_notification/type_filter diff --git a/test/icinga-checkresult.cpp b/test/icinga-checkresult.cpp index 7db7fffb3..86eea5c49 100644 --- a/test/icinga-checkresult.cpp +++ b/test/icinga-checkresult.cpp @@ -57,6 +57,7 @@ BOOST_AUTO_TEST_CASE(host_1attempt) BOOST_CHECK(host->GetState() == HostUp); BOOST_CHECK(host->GetStateType() == StateTypeHard); BOOST_CHECK(host->GetCheckAttempt() == 1); + BOOST_CHECK(host->IsReachable() == true); CheckNotification(host, false); std::cout << "First check result (unknown)" << std::endl; @@ -64,6 +65,7 @@ BOOST_AUTO_TEST_CASE(host_1attempt) BOOST_CHECK(host->GetState() == HostDown); BOOST_CHECK(host->GetStateType() == StateTypeHard); BOOST_CHECK(host->GetCheckAttempt() == 1); + BOOST_CHECK(host->IsReachable() == true); CheckNotification(host, true, NotificationProblem); std::cout << "Second check result (ok)" << std::endl; @@ -71,6 +73,7 @@ BOOST_AUTO_TEST_CASE(host_1attempt) BOOST_CHECK(host->GetState() == HostUp); BOOST_CHECK(host->GetStateType() == StateTypeHard); BOOST_CHECK(host->GetCheckAttempt() == 1); + BOOST_CHECK(host->IsReachable() == true); CheckNotification(host, true, NotificationRecovery); std::cout << "Third check result (critical)" << std::endl; @@ -78,6 +81,7 @@ BOOST_AUTO_TEST_CASE(host_1attempt) BOOST_CHECK(host->GetState() == HostDown); BOOST_CHECK(host->GetStateType() == StateTypeHard); BOOST_CHECK(host->GetCheckAttempt() == 1); + BOOST_CHECK(host->IsReachable() == true); CheckNotification(host, true, NotificationProblem); std::cout << "Fourth check result (ok)" << std::endl; @@ -85,6 +89,7 @@ BOOST_AUTO_TEST_CASE(host_1attempt) BOOST_CHECK(host->GetState() == HostUp); BOOST_CHECK(host->GetStateType() == StateTypeHard); BOOST_CHECK(host->GetCheckAttempt() == 1); + BOOST_CHECK(host->IsReachable() == true); CheckNotification(host, true, NotificationRecovery); c.disconnect(); @@ -106,6 +111,7 @@ BOOST_AUTO_TEST_CASE(host_2attempts) BOOST_CHECK(host->GetState() == HostUp); BOOST_CHECK(host->GetStateType() == StateTypeHard); BOOST_CHECK(host->GetCheckAttempt() == 1); + BOOST_CHECK(host->IsReachable() == true); CheckNotification(host, false); std::cout << "First check result (unknown)" << std::endl; @@ -113,6 +119,7 @@ BOOST_AUTO_TEST_CASE(host_2attempts) BOOST_CHECK(host->GetState() == HostDown); BOOST_CHECK(host->GetStateType() == StateTypeSoft); BOOST_CHECK(host->GetCheckAttempt() == 1); + BOOST_CHECK(host->IsReachable() == true); CheckNotification(host, false); std::cout << "Second check result (critical)" << std::endl; @@ -120,6 +127,7 @@ BOOST_AUTO_TEST_CASE(host_2attempts) BOOST_CHECK(host->GetState() == HostDown); BOOST_CHECK(host->GetStateType() == StateTypeHard); BOOST_CHECK(host->GetCheckAttempt() == 1); + BOOST_CHECK(host->IsReachable() == true); CheckNotification(host, true, NotificationProblem); std::cout << "Third check result (ok)" << std::endl; @@ -127,6 +135,7 @@ BOOST_AUTO_TEST_CASE(host_2attempts) BOOST_CHECK(host->GetState() == HostUp); BOOST_CHECK(host->GetStateType() == StateTypeHard); BOOST_CHECK(host->GetCheckAttempt() == 1); + BOOST_CHECK(host->IsReachable() == true); CheckNotification(host, true, NotificationRecovery); std::cout << "Fourth check result (critical)" << std::endl; @@ -134,6 +143,7 @@ BOOST_AUTO_TEST_CASE(host_2attempts) BOOST_CHECK(host->GetState() == HostDown); BOOST_CHECK(host->GetStateType() == StateTypeSoft); BOOST_CHECK(host->GetCheckAttempt() == 1); + BOOST_CHECK(host->IsReachable() == true); CheckNotification(host, false); std::cout << "Fifth check result (ok)" << std::endl; @@ -141,6 +151,7 @@ BOOST_AUTO_TEST_CASE(host_2attempts) BOOST_CHECK(host->GetState() == HostUp); BOOST_CHECK(host->GetStateType() == StateTypeHard); BOOST_CHECK(host->GetCheckAttempt() == 1); + BOOST_CHECK(host->IsReachable() == true); CheckNotification(host, false); c.disconnect(); @@ -162,6 +173,7 @@ BOOST_AUTO_TEST_CASE(host_3attempts) BOOST_CHECK(host->GetState() == HostUp); BOOST_CHECK(host->GetStateType() == StateTypeHard); BOOST_CHECK(host->GetCheckAttempt() == 1); + BOOST_CHECK(host->IsReachable() == true); CheckNotification(host, false); std::cout << "First check result (unknown)" << std::endl; @@ -169,6 +181,7 @@ BOOST_AUTO_TEST_CASE(host_3attempts) BOOST_CHECK(host->GetState() == HostDown); BOOST_CHECK(host->GetStateType() == StateTypeSoft); BOOST_CHECK(host->GetCheckAttempt() == 1); + BOOST_CHECK(host->IsReachable() == true); CheckNotification(host, false); std::cout << "Second check result (critical)" << std::endl; @@ -176,6 +189,7 @@ BOOST_AUTO_TEST_CASE(host_3attempts) BOOST_CHECK(host->GetState() == HostDown); BOOST_CHECK(host->GetStateType() == StateTypeSoft); BOOST_CHECK(host->GetCheckAttempt() == 2); + BOOST_CHECK(host->IsReachable() == true); CheckNotification(host, false); std::cout << "Third check result (critical)" << std::endl; @@ -183,6 +197,7 @@ BOOST_AUTO_TEST_CASE(host_3attempts) BOOST_CHECK(host->GetState() == HostDown); BOOST_CHECK(host->GetStateType() == StateTypeHard); BOOST_CHECK(host->GetCheckAttempt() == 1); + BOOST_CHECK(host->IsReachable() == true); CheckNotification(host, true, NotificationProblem); std::cout << "Fourth check result (ok)" << std::endl; @@ -190,6 +205,7 @@ BOOST_AUTO_TEST_CASE(host_3attempts) BOOST_CHECK(host->GetState() == HostUp); BOOST_CHECK(host->GetStateType() == StateTypeHard); BOOST_CHECK(host->GetCheckAttempt() == 1); + BOOST_CHECK(host->IsReachable() == true); CheckNotification(host, true, NotificationRecovery); std::cout << "Fifth check result (critical)" << std::endl; @@ -197,6 +213,7 @@ BOOST_AUTO_TEST_CASE(host_3attempts) BOOST_CHECK(host->GetState() == HostDown); BOOST_CHECK(host->GetStateType() == StateTypeSoft); BOOST_CHECK(host->GetCheckAttempt() == 1); + BOOST_CHECK(host->IsReachable() == true); CheckNotification(host, false); std::cout << "Sixth check result (ok)" << std::endl; @@ -204,6 +221,7 @@ BOOST_AUTO_TEST_CASE(host_3attempts) BOOST_CHECK(host->GetState() == HostUp); BOOST_CHECK(host->GetStateType() == StateTypeHard); BOOST_CHECK(host->GetCheckAttempt() == 1); + BOOST_CHECK(host->IsReachable() == true); CheckNotification(host, false); c.disconnect(); @@ -225,6 +243,7 @@ BOOST_AUTO_TEST_CASE(service_1attempt) BOOST_CHECK(service->GetState() == ServiceOK); BOOST_CHECK(service->GetStateType() == StateTypeHard); BOOST_CHECK(service->GetCheckAttempt() == 1); + BOOST_CHECK(service->IsReachable() == true); CheckNotification(service, false); std::cout << "First check result (unknown)" << std::endl; @@ -232,6 +251,7 @@ BOOST_AUTO_TEST_CASE(service_1attempt) BOOST_CHECK(service->GetState() == ServiceUnknown); BOOST_CHECK(service->GetStateType() == StateTypeHard); BOOST_CHECK(service->GetCheckAttempt() == 1); + BOOST_CHECK(service->IsReachable() == true); CheckNotification(service, true, NotificationProblem); std::cout << "Second check result (ok)" << std::endl; @@ -239,6 +259,7 @@ BOOST_AUTO_TEST_CASE(service_1attempt) BOOST_CHECK(service->GetState() == ServiceOK); BOOST_CHECK(service->GetStateType() == StateTypeHard); BOOST_CHECK(service->GetCheckAttempt() == 1); + BOOST_CHECK(service->IsReachable() == true); CheckNotification(service, true, NotificationRecovery); std::cout << "Third check result (critical)" << std::endl; @@ -246,6 +267,7 @@ BOOST_AUTO_TEST_CASE(service_1attempt) BOOST_CHECK(service->GetState() == ServiceCritical); BOOST_CHECK(service->GetStateType() == StateTypeHard); BOOST_CHECK(service->GetCheckAttempt() == 1); + BOOST_CHECK(service->IsReachable() == true); CheckNotification(service, true, NotificationProblem); std::cout << "Fourth check result (ok)" << std::endl; @@ -253,6 +275,7 @@ BOOST_AUTO_TEST_CASE(service_1attempt) BOOST_CHECK(service->GetState() == ServiceOK); BOOST_CHECK(service->GetStateType() == StateTypeHard); BOOST_CHECK(service->GetCheckAttempt() == 1); + BOOST_CHECK(service->IsReachable() == true); CheckNotification(service, true, NotificationRecovery); c.disconnect(); @@ -274,6 +297,7 @@ BOOST_AUTO_TEST_CASE(service_2attempts) BOOST_CHECK(service->GetState() == ServiceOK); BOOST_CHECK(service->GetStateType() == StateTypeHard); BOOST_CHECK(service->GetCheckAttempt() == 1); + BOOST_CHECK(service->IsReachable() == true); CheckNotification(service, false); std::cout << "First check result (unknown)" << std::endl; @@ -281,6 +305,7 @@ BOOST_AUTO_TEST_CASE(service_2attempts) BOOST_CHECK(service->GetState() == ServiceUnknown); BOOST_CHECK(service->GetStateType() == StateTypeSoft); BOOST_CHECK(service->GetCheckAttempt() == 1); + BOOST_CHECK(service->IsReachable() == true); CheckNotification(service, false); std::cout << "Second check result (critical)" << std::endl; @@ -288,6 +313,7 @@ BOOST_AUTO_TEST_CASE(service_2attempts) BOOST_CHECK(service->GetState() == ServiceCritical); BOOST_CHECK(service->GetStateType() == StateTypeHard); BOOST_CHECK(service->GetCheckAttempt() == 1); + BOOST_CHECK(service->IsReachable() == true); CheckNotification(service, true, NotificationProblem); std::cout << "Third check result (ok)" << std::endl; @@ -295,6 +321,7 @@ BOOST_AUTO_TEST_CASE(service_2attempts) BOOST_CHECK(service->GetState() == ServiceOK); BOOST_CHECK(service->GetStateType() == StateTypeHard); BOOST_CHECK(service->GetCheckAttempt() == 1); + BOOST_CHECK(service->IsReachable() == true); CheckNotification(service, true, NotificationRecovery); std::cout << "Fourth check result (critical)" << std::endl; @@ -302,6 +329,7 @@ BOOST_AUTO_TEST_CASE(service_2attempts) BOOST_CHECK(service->GetState() == ServiceCritical); BOOST_CHECK(service->GetStateType() == StateTypeSoft); BOOST_CHECK(service->GetCheckAttempt() == 1); + BOOST_CHECK(service->IsReachable() == true); CheckNotification(service, false); std::cout << "Fifth check result (ok)" << std::endl; @@ -309,6 +337,7 @@ BOOST_AUTO_TEST_CASE(service_2attempts) BOOST_CHECK(service->GetState() == ServiceOK); BOOST_CHECK(service->GetStateType() == StateTypeHard); BOOST_CHECK(service->GetCheckAttempt() == 1); + BOOST_CHECK(service->IsReachable() == true); CheckNotification(service, false); c.disconnect(); @@ -330,6 +359,7 @@ BOOST_AUTO_TEST_CASE(service_3attempts) BOOST_CHECK(service->GetState() == ServiceOK); BOOST_CHECK(service->GetStateType() == StateTypeHard); BOOST_CHECK(service->GetCheckAttempt() == 1); + BOOST_CHECK(service->IsReachable() == true); CheckNotification(service, false); std::cout << "First check result (unknown)" << std::endl; @@ -337,6 +367,7 @@ BOOST_AUTO_TEST_CASE(service_3attempts) BOOST_CHECK(service->GetState() == ServiceUnknown); BOOST_CHECK(service->GetStateType() == StateTypeSoft); BOOST_CHECK(service->GetCheckAttempt() == 1); + BOOST_CHECK(service->IsReachable() == true); CheckNotification(service, false); std::cout << "Second check result (critical)" << std::endl; @@ -344,6 +375,7 @@ BOOST_AUTO_TEST_CASE(service_3attempts) BOOST_CHECK(service->GetState() == ServiceCritical); BOOST_CHECK(service->GetStateType() == StateTypeSoft); BOOST_CHECK(service->GetCheckAttempt() == 2); + BOOST_CHECK(service->IsReachable() == true); CheckNotification(service, false); std::cout << "Third check result (critical)" << std::endl; @@ -351,6 +383,7 @@ BOOST_AUTO_TEST_CASE(service_3attempts) BOOST_CHECK(service->GetState() == ServiceCritical); BOOST_CHECK(service->GetStateType() == StateTypeHard); BOOST_CHECK(service->GetCheckAttempt() == 1); + BOOST_CHECK(service->IsReachable() == true); CheckNotification(service, true, NotificationProblem); std::cout << "Fourth check result (ok)" << std::endl; @@ -358,6 +391,7 @@ BOOST_AUTO_TEST_CASE(service_3attempts) BOOST_CHECK(service->GetState() == ServiceOK); BOOST_CHECK(service->GetStateType() == StateTypeHard); BOOST_CHECK(service->GetCheckAttempt() == 1); + BOOST_CHECK(service->IsReachable() == true); CheckNotification(service, true, NotificationRecovery); std::cout << "Fifth check result (critical)" << std::endl; @@ -365,6 +399,7 @@ BOOST_AUTO_TEST_CASE(service_3attempts) BOOST_CHECK(service->GetState() == ServiceCritical); BOOST_CHECK(service->GetStateType() == StateTypeSoft); BOOST_CHECK(service->GetCheckAttempt() == 1); + BOOST_CHECK(service->IsReachable() == true); CheckNotification(service, false); std::cout << "Sixth check result (ok)" << std::endl; @@ -372,6 +407,7 @@ BOOST_AUTO_TEST_CASE(service_3attempts) BOOST_CHECK(service->GetState() == ServiceOK); BOOST_CHECK(service->GetStateType() == StateTypeHard); BOOST_CHECK(service->GetCheckAttempt() == 1); + BOOST_CHECK(service->IsReachable() == true); CheckNotification(service, false); c.disconnect(); diff --git a/test/icinga-dependencies.cpp b/test/icinga-dependencies.cpp new file mode 100644 index 000000000..b31f540b1 --- /dev/null +++ b/test/icinga-dependencies.cpp @@ -0,0 +1,89 @@ +/* Icinga 2 | (c) 2020 Icinga GmbH | GPLv2+ */ + +#include "icinga/host.hpp" +#include "icinga/dependency.hpp" +#include +#include + +using namespace icinga; + +BOOST_AUTO_TEST_SUITE(icinga_dependencies) + +BOOST_AUTO_TEST_CASE(multi_parent) +{ + /* One child host, two parent hosts. Simulate multi-parent dependencies. */ + std::cout << "Testing reachability for multi parent dependencies." << std::endl; + + /* + * Our mock requires: + * - SetParent/SetChild functions for the dependency + * - Parent objects need a CheckResult object + * - Dependencies need a StateFilter + */ + Host::Ptr parentHost1 = new Host(); + parentHost1->SetActive(true); + parentHost1->SetMaxCheckAttempts(1); + parentHost1->Activate(); + parentHost1->SetAuthority(true); + parentHost1->SetStateRaw(ServiceCritical); + parentHost1->SetStateType(StateTypeHard); + parentHost1->SetLastCheckResult(new CheckResult()); + + Host::Ptr parentHost2 = new Host(); + parentHost2->SetActive(true); + parentHost2->SetMaxCheckAttempts(1); + parentHost2->Activate(); + parentHost2->SetAuthority(true); + parentHost2->SetStateRaw(ServiceOK); + parentHost2->SetStateType(StateTypeHard); + parentHost2->SetLastCheckResult(new CheckResult()); + + Host::Ptr childHost = new Host(); + childHost->SetActive(true); + childHost->SetMaxCheckAttempts(1); + childHost->Activate(); + childHost->SetAuthority(true); + childHost->SetStateRaw(ServiceOK); + childHost->SetStateType(StateTypeHard); + + /* Build the dependency tree. */ + Dependency::Ptr dep1 = new Dependency(); + + dep1->SetParent(parentHost1); + dep1->SetChild(childHost); + dep1->SetStateFilter(StateFilterUp); + + // Reverse dependencies + childHost->AddDependency(dep1); + parentHost1->AddReverseDependency(dep1); + + Dependency::Ptr dep2 = new Dependency(); + + dep2->SetParent(parentHost2); + dep2->SetChild(childHost); + dep2->SetStateFilter(StateFilterUp); + + // Reverse dependencies + childHost->AddDependency(dep2); + parentHost2->AddReverseDependency(dep2); + + + /* Test the reachability from this point. + * parentHost1 is DOWN, parentHost2 is UP. + * Expected result: childHost is reachable. + */ + parentHost1->SetStateRaw(ServiceCritical); // parent Host 1 DOWN + parentHost2->SetStateRaw(ServiceOK); // parent Host 2 UP + + BOOST_CHECK(childHost->IsReachable() == true); + + /* parentHost1 is DOWN, parentHost2 is DOWN. + * Expected result: childHost is unreachable. + */ + parentHost1->SetStateRaw(ServiceCritical); // parent Host 1 DOWN + parentHost2->SetStateRaw(ServiceCritical); // parent Host 2 DOWN + + BOOST_CHECK(childHost->IsReachable() == false); +} + +BOOST_AUTO_TEST_SUITE_END()