From 0bbe7a9b2f438035b836dc97f4a330082a71c659 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Fri, 15 Nov 2024 12:56:45 +0100 Subject: [PATCH] IcingaDB Check: Multiple Responsible Instances By design, only one Icinga 2 instance should be responsible in the HA context. If this promise is broken, the Icinga 2 IcingaDB check should report it. The code did not check for invalid data in icingadb:telemetry:heartbeat. With this change, it will go CRITICAL with a descriptive message and report the actual number of icingadb_responsible_instances in the performance data. --- lib/icingadb/icingadbchecktask.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/icingadb/icingadbchecktask.cpp b/lib/icingadb/icingadbchecktask.cpp index f7c596457..cee93cd33 100644 --- a/lib/icingadb/icingadbchecktask.cpp +++ b/lib/icingadb/icingadbchecktask.cpp @@ -227,7 +227,9 @@ void IcingadbCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckR perfdata->Add(new PerfdataValue("icinga2_heartbeat_age", heartbeatLag, false, "seconds", heartbeatLagWarning, Empty, 0)); } - if (weResponsible) { + if (weResponsible && otherResponsible) { + critmsgs << " Both this instance and another instance are responsible!"; + } else if (weResponsible) { idbokmsgs << "\n* Responsible"; } else if (otherResponsible) { idbokmsgs << "\n* Not responsible, but another instance is"; @@ -235,7 +237,7 @@ void IcingadbCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckR critmsgs << " No instance is responsible!"; } - perfdata->Add(new PerfdataValue("icingadb_responsible_instances", int(weResponsible || otherResponsible), false, "", Empty, Empty, 0, 1)); + perfdata->Add(new PerfdataValue("icingadb_responsible_instances", int(weResponsible) + int(otherResponsible), false, "", Empty, Empty, 0, 1)); const auto clockDriftWarning (5); const auto clockDriftCritical (30);