Merge pull request #8218 from efuss/redundancy_group

Introduce redundancy groups for Dependency Objects
This commit is contained in:
Julian Brost 2023-04-05 18:49:58 +02:00 committed by GitHub
commit 50018c1d2b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 65 additions and 11 deletions

View File

@ -2738,6 +2738,27 @@ apply Dependency "internet" to Service {
}
```
### Redundancy Groups <a id="dependencies-redundancy-groups"></a>
Sometimes you want dependencies to accumulate,
i.e. to consider the parent reachable only if no dependency is violated.
Sometimes you want them to be regarded as redundant,
i.e. to consider the parent unreachable only if no dependency is fulfilled.
Think of a host connected to both a network and a storage switch vs. a host connected to redundant routers.
Sometimes you even want a mixture of both.
Think of a service like SSH depeding on both LDAP and DNS to function,
while operating redundant LDAP servers as well as redundant DNS resolvers.
Before v2.12, Icinga regarded all dependecies as cumulative.
In v2.12 and v2.13, Icinga regarded all dependencies redundant.
The latter led to unrelated services being inadvertantly regarded to be redundant to each other.
v2.14 restored the former behavior and allowed to override it.
I.e. all dependecies are regarded as essential for the parent by default.
Specifying the `redundancy_group` attribute for two dependecies of a child object with the equal value
causes them to be regarded as redundant (only inside that redundancy group).
<!-- Keep this for compatibility -->
<a id="dependencies-apply-custom-attríbutes"></a>

View File

@ -201,6 +201,7 @@ Configuration Attributes:
parent\_service\_name | Object name | **Optional.** The parent service. If omitted, this dependency object is treated as host dependency.
child\_host\_name | Object name | **Required.** The child host.
child\_service\_name | Object name | **Optional.** The child service. If omitted, this dependency object is treated as host dependency.
redundancy\_group | String | **Optional.** Puts the dependency into a group of [mutually redundant ones](03-monitoring-basics.md#dependencies-redundancy-groups).
disable\_checks | Boolean | **Optional.** Whether to disable checks (i.e., don't schedule active checks and drop passive results) when this dependency fails. Defaults to false.
disable\_notifications | Boolean | **Optional.** Whether to disable notifications when this dependency fails. Defaults to true.
ignore\_soft\_states | Boolean | **Optional.** Whether to ignore soft states for the reachability calculation. Defaults to true.

View File

@ -3,6 +3,7 @@
#include "icinga/service.hpp"
#include "icinga/dependency.hpp"
#include "base/logger.hpp"
#include <unordered_map>
using namespace icinga;
@ -74,25 +75,42 @@ bool Checkable::IsReachable(DependencyType dt, Dependency::Ptr *failedDependency
auto deps = GetDependencies();
int countDeps = deps.size();
int countFailed = 0;
std::unordered_map<std::string, Dependency::Ptr> violated; // key: redundancy group, value: nullptr if satisfied, violating dependency otherwise
for (const Dependency::Ptr& dep : deps) {
if (!dep->IsAvailable(dt)) {
countFailed++;
std::string redundancy_group = dep->GetRedundancyGroup();
if (failedDependency)
*failedDependency = dep;
if (!dep->IsAvailable(dt)) {
if (redundancy_group.empty()) {
Log(LogDebug, "Checkable")
<< "Non-redundant dependency '" << dep->GetName() << "' failed for checkable '" << GetName() << "': Marking as unreachable.";
if (failedDependency)
*failedDependency = dep;
return false;
}
// tentatively mark this dependency group as failed unless it is already marked;
// so it either passed before (don't overwrite) or already failed (so don't care)
// note that std::unordered_map::insert() will not overwrite an existing entry
violated.insert(std::make_pair(redundancy_group, dep));
} else if (!redundancy_group.empty()) {
violated[redundancy_group] = nullptr;
}
}
/* If there are dependencies, and all of them failed, mark as unreachable. */
if (countDeps > 0 && countFailed == countDeps) {
auto violator = std::find_if(violated.begin(), violated.end(), [](auto& v) { return v.second != nullptr; });
if (violator != violated.end()) {
Log(LogDebug, "Checkable")
<< "All dependencies have failed for checkable '" << GetName() << "': Marking as unreachable.";
<< "All dependencies in redundancy group '" << violator->first << "' have failed for checkable '" << GetName() << "': Marking as unreachable.";
if (failedDependency)
*failedDependency = violator->second;
return false;
}
if (failedDependency)
*failedDependency = nullptr;

View File

@ -77,6 +77,8 @@ class Dependency : CustomVarObject < DependencyNameComposer
}}}
};
[config] String redundancy_group;
[config, navigation] name(TimePeriod) period (PeriodRaw) {
navigate {{{
return TimePeriod::GetByName(GetPeriodRaw());

View File

@ -70,14 +70,26 @@ BOOST_AUTO_TEST_CASE(multi_parent)
/* Test the reachability from this point.
* parentHost1 is DOWN, parentHost2 is UP.
* Expected result: childHost is reachable.
* Expected result: childHost is unreachable.
*/
parentHost1->SetStateRaw(ServiceCritical); // parent Host 1 DOWN
parentHost2->SetStateRaw(ServiceOK); // parent Host 2 UP
BOOST_CHECK(childHost->IsReachable() == false);
/* The only DNS server is DOWN.
* Expected result: childHost is unreachable.
*/
dep1->SetRedundancyGroup("DNS");
BOOST_CHECK(childHost->IsReachable() == false);
/* 1/2 DNS servers is DOWN.
* Expected result: childHost is reachable.
*/
dep2->SetRedundancyGroup("DNS");
BOOST_CHECK(childHost->IsReachable() == true);
/* parentHost1 is DOWN, parentHost2 is DOWN.
/* Both DNS servers are DOWN.
* Expected result: childHost is unreachable.
*/
parentHost1->SetStateRaw(ServiceCritical); // parent Host 1 DOWN