2022-06-01 11:38:17 +02:00
|
|
|
/* Icinga 2 | (c) 2022 Icinga GmbH | GPLv2+ */
|
|
|
|
|
|
|
|
#include "icingadb/icingadbchecktask.hpp"
|
|
|
|
#include "icinga/host.hpp"
|
|
|
|
#include "icinga/checkcommand.hpp"
|
|
|
|
#include "icinga/macroprocessor.hpp"
|
2022-06-24 12:27:07 +02:00
|
|
|
#include "icinga/pluginutility.hpp"
|
2022-06-01 11:38:17 +02:00
|
|
|
#include "base/function.hpp"
|
|
|
|
#include "base/utility.hpp"
|
|
|
|
#include "base/perfdatavalue.hpp"
|
|
|
|
#include "base/convert.hpp"
|
|
|
|
#include <utility>
|
|
|
|
|
|
|
|
using namespace icinga;
|
|
|
|
|
|
|
|
REGISTER_FUNCTION_NONCONST(Internal, IcingadbCheck, &IcingadbCheckTask::ScriptFunc, "checkable:cr:resolvedMacros:useResolvedMacros");
|
|
|
|
|
|
|
|
static void ReportIcingadbCheck(
|
|
|
|
const Checkable::Ptr& checkable, const CheckCommand::Ptr& commandObj,
|
|
|
|
const CheckResult::Ptr& cr, String output, ServiceState state)
|
|
|
|
{
|
|
|
|
if (Checkable::ExecuteCommandProcessFinishedHandler) {
|
|
|
|
double now = Utility::GetTime();
|
|
|
|
ProcessResult pr;
|
|
|
|
pr.PID = -1;
|
|
|
|
pr.Output = std::move(output);
|
|
|
|
pr.ExecutionStart = now;
|
|
|
|
pr.ExecutionEnd = now;
|
|
|
|
pr.ExitStatus = state;
|
|
|
|
|
|
|
|
Checkable::ExecuteCommandProcessFinishedHandler(commandObj->GetName(), pr);
|
|
|
|
} else {
|
|
|
|
cr->SetState(state);
|
|
|
|
cr->SetOutput(output);
|
|
|
|
checkable->ProcessCheckResult(cr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline
|
|
|
|
double GetXMessageTs(const Array::Ptr& xMessage)
|
|
|
|
{
|
|
|
|
return Convert::ToLong(String(xMessage->Get(0)).Split("-")[0]) / 1000.0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void IcingadbCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckResult::Ptr& cr,
|
|
|
|
const Dictionary::Ptr& resolvedMacros, bool useResolvedMacros)
|
|
|
|
{
|
|
|
|
CheckCommand::Ptr commandObj = CheckCommand::ExecuteOverride ? CheckCommand::ExecuteOverride : checkable->GetCheckCommand();
|
|
|
|
|
|
|
|
Host::Ptr host;
|
|
|
|
Service::Ptr service;
|
|
|
|
tie(host, service) = GetHostService(checkable);
|
|
|
|
|
|
|
|
MacroProcessor::ResolverList resolvers;
|
|
|
|
String silenceMissingMacroWarning;
|
|
|
|
|
|
|
|
if (MacroResolver::OverrideMacros)
|
|
|
|
resolvers.emplace_back("override", MacroResolver::OverrideMacros);
|
|
|
|
|
|
|
|
if (service)
|
|
|
|
resolvers.emplace_back("service", service);
|
|
|
|
resolvers.emplace_back("host", host);
|
|
|
|
resolvers.emplace_back("command", commandObj);
|
|
|
|
resolvers.emplace_back("icinga", IcingaApplication::GetInstance());
|
|
|
|
|
|
|
|
auto resolve ([&](const String& macro) {
|
|
|
|
return MacroProcessor::ResolveMacros(macro, resolvers, checkable->GetLastCheckResult(),
|
|
|
|
&silenceMissingMacroWarning, MacroProcessor::EscapeCallback(), resolvedMacros, useResolvedMacros);
|
|
|
|
});
|
|
|
|
|
|
|
|
struct Thresholds
|
|
|
|
{
|
|
|
|
Value Warning, Critical;
|
|
|
|
};
|
|
|
|
|
|
|
|
auto resolveThresholds ([&resolve](const String& wmacro, const String& cmacro) {
|
|
|
|
return Thresholds{resolve(wmacro), resolve(cmacro)};
|
|
|
|
});
|
|
|
|
|
|
|
|
String icingadbName = resolve("$icingadb_name$");
|
|
|
|
|
2022-06-27 09:12:04 +02:00
|
|
|
auto dumpTakesThresholds (resolveThresholds("$icingadb_full_dump_duration_warning$", "$icingadb_full_dump_duration_critical$"));
|
|
|
|
auto syncTakesThresholds (resolveThresholds("$icingadb_full_sync_duration_warning$", "$icingadb_full_sync_duration_critical$"));
|
2022-06-01 11:38:17 +02:00
|
|
|
auto icingaBacklogThresholds (resolveThresholds("$icingadb_redis_backlog_warning$", "$icingadb_redis_backlog_critical$"));
|
|
|
|
auto icingadbBacklogThresholds (resolveThresholds("$icingadb_database_backlog_warning$", "$icingadb_database_backlog_critical$"));
|
|
|
|
|
|
|
|
if (resolvedMacros && !useResolvedMacros)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (icingadbName.IsEmpty()) {
|
|
|
|
ReportIcingadbCheck(checkable, commandObj, cr, "Icinga DB UNKNOWN: Attribute 'icingadb_name' must be set.", ServiceUnknown);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto conn (IcingaDB::GetByName(icingadbName));
|
|
|
|
|
|
|
|
if (!conn) {
|
|
|
|
ReportIcingadbCheck(checkable, commandObj, cr, "Icinga DB UNKNOWN: Icinga DB connection '" + icingadbName + "' does not exist.", ServiceUnknown);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto redis (conn->GetConnection());
|
|
|
|
|
2022-06-27 16:33:25 +02:00
|
|
|
if (!redis || !redis->GetConnected()) {
|
2022-06-27 16:40:34 +02:00
|
|
|
ReportIcingadbCheck(checkable, commandObj, cr, "Icinga DB CRITICAL: Not connected to Redis.", ServiceCritical);
|
2022-06-01 11:38:17 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto now (Utility::GetTime());
|
2022-06-24 15:30:08 +02:00
|
|
|
Array::Ptr redisTime, xReadHeartbeat, xReadStats, xReadRuntimeBacklog, xReadHistoryBacklog;
|
2022-06-01 11:38:17 +02:00
|
|
|
|
|
|
|
try {
|
|
|
|
auto replies (redis->GetResultsOfQueries(
|
|
|
|
{
|
|
|
|
{"TIME"},
|
|
|
|
{"XREAD", "STREAMS", "icingadb:telemetry:heartbeat", "0-0"},
|
|
|
|
{"XREAD", "STREAMS", "icingadb:telemetry:stats", "0-0"},
|
2022-06-24 15:30:08 +02:00
|
|
|
{"XREAD", "COUNT", "1", "STREAMS", "icinga:runtime", "icinga:runtime:state", "0-0", "0-0"},
|
2022-06-01 11:38:17 +02:00
|
|
|
{
|
|
|
|
"XREAD", "COUNT", "1", "STREAMS",
|
2022-06-24 15:30:08 +02:00
|
|
|
"icinga:history:stream:acknowledgement",
|
|
|
|
"icinga:history:stream:comment",
|
|
|
|
"icinga:history:stream:downtime",
|
|
|
|
"icinga:history:stream:flapping",
|
|
|
|
"icinga:history:stream:notification",
|
|
|
|
"icinga:history:stream:state",
|
|
|
|
"0-0", "0-0", "0-0", "0-0", "0-0", "0-0",
|
2022-06-01 11:38:17 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
RedisConnection::QueryPriority::Heartbeat
|
|
|
|
));
|
|
|
|
|
|
|
|
redisTime = std::move(replies.at(0));
|
|
|
|
xReadHeartbeat = std::move(replies.at(1));
|
|
|
|
xReadStats = std::move(replies.at(2));
|
2022-06-24 15:30:08 +02:00
|
|
|
xReadRuntimeBacklog = std::move(replies.at(3));
|
|
|
|
xReadHistoryBacklog = std::move(replies.at(4));
|
2022-06-01 11:38:17 +02:00
|
|
|
} catch (const std::exception& ex) {
|
|
|
|
ReportIcingadbCheck(
|
|
|
|
checkable, commandObj, cr,
|
2022-06-24 14:48:31 +02:00
|
|
|
String("Icinga DB CRITICAL: Could not query Redis: ") + ex.what(), ServiceCritical
|
2022-06-01 11:38:17 +02:00
|
|
|
);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!xReadHeartbeat) {
|
|
|
|
ReportIcingadbCheck(
|
|
|
|
checkable, commandObj, cr,
|
|
|
|
"Icinga DB CRITICAL: The Icinga DB daemon seems to have never run. (Missing heartbeat)",
|
|
|
|
ServiceCritical
|
|
|
|
);
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto redisOldestPending (redis->GetOldestPendingQueryTs());
|
|
|
|
auto ongoingDumpStart (conn->GetOngoingDumpStart());
|
|
|
|
auto dumpWhen (conn->GetLastdumpEnd());
|
|
|
|
auto dumpTook (conn->GetLastdumpTook());
|
|
|
|
|
|
|
|
auto redisNow (Convert::ToLong(redisTime->Get(0)) + Convert::ToLong(redisTime->Get(1)) / 1000000.0);
|
|
|
|
Array::Ptr heartbeatMessage = Array::Ptr(Array::Ptr(xReadHeartbeat->Get(0))->Get(1))->Get(0);
|
|
|
|
auto heartbeatTime (GetXMessageTs(heartbeatMessage));
|
|
|
|
std::map<String, String> heartbeatData;
|
|
|
|
|
|
|
|
IcingaDB::AddKvsToMap(heartbeatMessage->Get(1), heartbeatData);
|
|
|
|
|
2022-06-24 14:27:21 +02:00
|
|
|
String version = heartbeatData.at("version");
|
|
|
|
auto icingadbNow (Convert::ToLong(heartbeatData.at("time")) / 1000.0 + (redisNow - heartbeatTime));
|
|
|
|
auto icingadbStartTime (Convert::ToLong(heartbeatData.at("start-time")) / 1000.0);
|
|
|
|
String errMsg (heartbeatData.at("error"));
|
|
|
|
auto errSince (Convert::ToLong(heartbeatData.at("error-since")) / 1000.0);
|
|
|
|
String perfdataFromRedis = heartbeatData.at("performance-data");
|
|
|
|
auto heartbeatLastReceived (Convert::ToLong(heartbeatData.at("last-heartbeat-received")) / 1000.0);
|
|
|
|
bool weResponsible = Convert::ToLong(heartbeatData.at("ha-responsible"));
|
|
|
|
auto weResponsibleTs (Convert::ToLong(heartbeatData.at("ha-responsible-ts")) / 1000.0);
|
|
|
|
bool otherResponsible = Convert::ToLong(heartbeatData.at("ha-other-responsible"));
|
|
|
|
auto syncOngoingSince (Convert::ToLong(heartbeatData.at("sync-ongoing-since")) / 1000.0);
|
|
|
|
auto syncSuccessWhen (Convert::ToLong(heartbeatData.at("sync-success-finish")) / 1000.0);
|
|
|
|
auto syncSuccessTook (Convert::ToLong(heartbeatData.at("sync-success-duration")) / 1000.0);
|
2022-06-01 11:38:17 +02:00
|
|
|
|
|
|
|
std::ostringstream i2okmsgs, idbokmsgs, warnmsgs, critmsgs;
|
|
|
|
Array::Ptr perfdata = new Array();
|
|
|
|
|
|
|
|
i2okmsgs << std::fixed << std::setprecision(3);
|
|
|
|
idbokmsgs << std::fixed << std::setprecision(3);
|
|
|
|
warnmsgs << std::fixed << std::setprecision(3);
|
|
|
|
critmsgs << std::fixed << std::setprecision(3);
|
|
|
|
|
|
|
|
const auto downForCritical (10);
|
|
|
|
auto downFor (redisNow - heartbeatTime);
|
|
|
|
bool down = false;
|
|
|
|
|
|
|
|
if (downFor > downForCritical) {
|
|
|
|
down = true;
|
|
|
|
|
|
|
|
critmsgs << " Last seen " << Utility::FormatDuration(downFor)
|
|
|
|
<< " ago, greater than CRITICAL threshold (" << Utility::FormatDuration(downForCritical) << ")!";
|
|
|
|
} else {
|
|
|
|
idbokmsgs << "\n* Last seen: " << Utility::FormatDuration(downFor) << " ago";
|
|
|
|
}
|
|
|
|
|
|
|
|
perfdata->Add(new PerfdataValue("icingadb_heartbeat_age", downFor, false, "seconds", Empty, downForCritical, 0));
|
|
|
|
|
|
|
|
const auto errForCritical (10);
|
|
|
|
auto err (!errMsg.IsEmpty());
|
|
|
|
auto errFor (icingadbNow - errSince);
|
|
|
|
|
|
|
|
if (err) {
|
|
|
|
if (errFor > errForCritical) {
|
|
|
|
critmsgs << " ERROR: " << errMsg << "!";
|
|
|
|
}
|
|
|
|
|
2022-06-24 15:05:59 +02:00
|
|
|
perfdata->Add(new PerfdataValue("error_for", errFor * (err ? 1 : -1), false, "seconds", Empty, errForCritical, 0));
|
2022-06-01 11:38:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!down) {
|
|
|
|
const auto heartbeatLagWarning (3/* Icinga DB read freq. */ + 1/* Icinga DB write freq. */ + 2/* threshold */);
|
|
|
|
auto heartbeatLag (fmin(icingadbNow - heartbeatLastReceived, 10 * 60));
|
|
|
|
|
|
|
|
if (!heartbeatLastReceived) {
|
|
|
|
critmsgs << " Lost Icinga 2 heartbeat!";
|
|
|
|
} else if (heartbeatLag > heartbeatLagWarning) {
|
|
|
|
warnmsgs << " Icinga 2 heartbeat lag: " << Utility::FormatDuration(heartbeatLag)
|
|
|
|
<< ", greater than WARNING threshold (" << Utility::FormatDuration(heartbeatLagWarning) << ").";
|
|
|
|
}
|
|
|
|
|
2022-06-27 12:43:56 +02:00
|
|
|
perfdata->Add(new PerfdataValue("icinga2_heartbeat_age", heartbeatLag, false, "seconds", heartbeatLagWarning, Empty, 0));
|
2022-06-01 11:38:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (weResponsible) {
|
|
|
|
idbokmsgs << "\n* Responsible";
|
|
|
|
} else if (otherResponsible) {
|
|
|
|
idbokmsgs << "\n* Not responsible, but another instance is";
|
|
|
|
} else {
|
|
|
|
critmsgs << " No instance is responsible!";
|
|
|
|
}
|
|
|
|
|
|
|
|
perfdata->Add(new PerfdataValue("icingadb_responsible_instances", int(weResponsible || otherResponsible), false, "", Empty, Empty, 0, 1));
|
|
|
|
|
|
|
|
const auto clockDriftWarning (5);
|
|
|
|
const auto clockDriftCritical (30);
|
2022-06-24 15:06:14 +02:00
|
|
|
auto clockDrift (std::max({
|
|
|
|
fabs(now - redisNow),
|
|
|
|
fabs(redisNow - icingadbNow),
|
|
|
|
fabs(icingadbNow - now),
|
|
|
|
}));
|
2022-06-01 11:38:17 +02:00
|
|
|
|
|
|
|
if (clockDrift > clockDriftCritical) {
|
|
|
|
critmsgs << " Icinga 2/Redis/Icinga DB clock drift: " << Utility::FormatDuration(clockDrift)
|
|
|
|
<< ", greater than CRITICAL threshold (" << Utility::FormatDuration(clockDriftCritical) << ")!";
|
|
|
|
} else if (clockDrift > clockDriftWarning) {
|
|
|
|
warnmsgs << " Icinga 2/Redis/Icinga DB clock drift: " << Utility::FormatDuration(clockDrift)
|
|
|
|
<< ", greater than WARNING threshold (" << Utility::FormatDuration(clockDriftWarning) << ").";
|
|
|
|
}
|
|
|
|
|
|
|
|
perfdata->Add(new PerfdataValue("clock_drift", clockDrift, false, "seconds", clockDriftWarning, clockDriftCritical, 0));
|
|
|
|
|
|
|
|
if (ongoingDumpStart) {
|
|
|
|
auto ongoingDumpTakes (now - ongoingDumpStart);
|
|
|
|
|
|
|
|
if (!dumpTakesThresholds.Critical.IsEmpty() && ongoingDumpTakes > dumpTakesThresholds.Critical) {
|
2022-06-28 11:30:11 +02:00
|
|
|
critmsgs << " Current Icinga 2 full dump already takes " << Utility::FormatDuration(ongoingDumpTakes)
|
2022-06-01 11:38:17 +02:00
|
|
|
<< ", greater than CRITICAL threshold (" << Utility::FormatDuration(dumpTakesThresholds.Critical) << ")!";
|
|
|
|
} else if (!dumpTakesThresholds.Warning.IsEmpty() && ongoingDumpTakes > dumpTakesThresholds.Warning) {
|
2022-06-28 11:30:11 +02:00
|
|
|
warnmsgs << " Current Icinga 2 full dump already takes " << Utility::FormatDuration(ongoingDumpTakes)
|
2022-06-01 11:38:17 +02:00
|
|
|
<< ", greater than WARNING threshold (" << Utility::FormatDuration(dumpTakesThresholds.Warning) << ").";
|
2022-06-28 11:30:11 +02:00
|
|
|
} else {
|
|
|
|
i2okmsgs << "\n* Current full dump running for " << Utility::FormatDuration(ongoingDumpTakes);
|
2022-06-01 11:38:17 +02:00
|
|
|
}
|
|
|
|
|
2022-06-28 10:51:36 +02:00
|
|
|
perfdata->Add(new PerfdataValue("icinga2_current_full_dump_duration", ongoingDumpTakes, false, "seconds",
|
2022-06-01 11:38:17 +02:00
|
|
|
dumpTakesThresholds.Warning, dumpTakesThresholds.Critical, 0));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!down && syncOngoingSince) {
|
|
|
|
auto ongoingSyncTakes (icingadbNow - syncOngoingSince);
|
|
|
|
|
|
|
|
if (!syncTakesThresholds.Critical.IsEmpty() && ongoingSyncTakes > syncTakesThresholds.Critical) {
|
2022-06-28 11:30:11 +02:00
|
|
|
critmsgs << " Current full sync already takes " << Utility::FormatDuration(ongoingSyncTakes)
|
2022-06-01 11:38:17 +02:00
|
|
|
<< ", greater than CRITICAL threshold (" << Utility::FormatDuration(syncTakesThresholds.Critical) << ")!";
|
|
|
|
} else if (!syncTakesThresholds.Warning.IsEmpty() && ongoingSyncTakes > syncTakesThresholds.Warning) {
|
2022-06-28 11:30:11 +02:00
|
|
|
warnmsgs << " Current full sync already takes " << Utility::FormatDuration(ongoingSyncTakes)
|
2022-06-01 11:38:17 +02:00
|
|
|
<< ", greater than WARNING threshold (" << Utility::FormatDuration(syncTakesThresholds.Warning) << ").";
|
2022-06-28 11:30:11 +02:00
|
|
|
} else {
|
|
|
|
idbokmsgs << "\n* Current full sync running for " << Utility::FormatDuration(ongoingSyncTakes);
|
2022-06-01 11:38:17 +02:00
|
|
|
}
|
|
|
|
|
2022-06-28 10:51:36 +02:00
|
|
|
perfdata->Add(new PerfdataValue("icingadb_current_full_sync_duration", ongoingSyncTakes, false, "seconds",
|
2022-06-01 11:38:17 +02:00
|
|
|
syncTakesThresholds.Warning, syncTakesThresholds.Critical, 0));
|
|
|
|
}
|
|
|
|
|
|
|
|
auto redisBacklog (now - redisOldestPending);
|
|
|
|
|
|
|
|
if (!redisOldestPending) {
|
|
|
|
redisBacklog = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!icingaBacklogThresholds.Critical.IsEmpty() && redisBacklog > icingaBacklogThresholds.Critical) {
|
2022-06-27 12:43:56 +02:00
|
|
|
critmsgs << " Icinga 2 Redis query backlog: " << Utility::FormatDuration(redisBacklog)
|
2022-06-01 11:38:17 +02:00
|
|
|
<< ", greater than CRITICAL threshold (" << Utility::FormatDuration(icingaBacklogThresholds.Critical) << ")!";
|
|
|
|
} else if (!icingaBacklogThresholds.Warning.IsEmpty() && redisBacklog > icingaBacklogThresholds.Warning) {
|
2022-06-27 12:43:56 +02:00
|
|
|
warnmsgs << " Icinga 2 Redis query backlog: " << Utility::FormatDuration(redisBacklog)
|
2022-06-01 11:38:17 +02:00
|
|
|
<< ", greater than WARNING threshold (" << Utility::FormatDuration(icingaBacklogThresholds.Warning) << ").";
|
|
|
|
}
|
|
|
|
|
2022-06-27 12:43:56 +02:00
|
|
|
perfdata->Add(new PerfdataValue("icinga2_redis_query_backlog", redisBacklog, false, "seconds",
|
2022-06-01 11:38:17 +02:00
|
|
|
icingaBacklogThresholds.Warning, icingaBacklogThresholds.Critical, 0));
|
|
|
|
|
|
|
|
if (!down) {
|
2022-06-24 15:30:08 +02:00
|
|
|
auto getBacklog = [redisNow](const Array::Ptr& streams) -> double {
|
|
|
|
if (!streams) {
|
|
|
|
return 0;
|
|
|
|
}
|
2022-06-01 11:38:17 +02:00
|
|
|
|
|
|
|
double minTs = 0;
|
2022-06-24 15:30:08 +02:00
|
|
|
ObjectLock lock (streams);
|
2022-06-01 11:38:17 +02:00
|
|
|
|
2022-06-24 15:30:08 +02:00
|
|
|
for (Array::Ptr stream : streams) {
|
2022-06-01 11:38:17 +02:00
|
|
|
auto ts (GetXMessageTs(Array::Ptr(stream->Get(1))->Get(0)));
|
|
|
|
|
|
|
|
if (minTs == 0 || ts < minTs) {
|
|
|
|
minTs = ts;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (minTs > 0) {
|
2022-06-24 15:30:08 +02:00
|
|
|
return redisNow - minTs;
|
|
|
|
} else {
|
|
|
|
return 0;
|
2022-06-01 11:38:17 +02:00
|
|
|
}
|
2022-06-24 15:30:08 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
double historyBacklog = getBacklog(xReadHistoryBacklog);
|
2022-06-01 11:38:17 +02:00
|
|
|
|
2022-06-24 15:30:08 +02:00
|
|
|
if (!icingadbBacklogThresholds.Critical.IsEmpty() && historyBacklog > icingadbBacklogThresholds.Critical) {
|
|
|
|
critmsgs << " History backlog: " << Utility::FormatDuration(historyBacklog)
|
2022-06-01 11:38:17 +02:00
|
|
|
<< ", greater than CRITICAL threshold (" << Utility::FormatDuration(icingadbBacklogThresholds.Critical) << ")!";
|
2022-06-24 15:30:08 +02:00
|
|
|
} else if (!icingadbBacklogThresholds.Warning.IsEmpty() && historyBacklog > icingadbBacklogThresholds.Warning) {
|
|
|
|
warnmsgs << " History backlog: " << Utility::FormatDuration(historyBacklog)
|
2022-06-01 11:38:17 +02:00
|
|
|
<< ", greater than WARNING threshold (" << Utility::FormatDuration(icingadbBacklogThresholds.Warning) << ").";
|
|
|
|
}
|
|
|
|
|
2022-06-27 12:43:56 +02:00
|
|
|
perfdata->Add(new PerfdataValue("icingadb_history_backlog", historyBacklog, false, "seconds",
|
2022-06-24 15:30:08 +02:00
|
|
|
icingadbBacklogThresholds.Warning, icingadbBacklogThresholds.Critical, 0));
|
|
|
|
|
|
|
|
double runtimeBacklog = 0;
|
|
|
|
|
2022-06-28 12:18:11 +02:00
|
|
|
if (weResponsible && !syncOngoingSince) {
|
|
|
|
// These streams are only processed by the responsible instance after the full sync finished,
|
|
|
|
// it's fine for some backlog to exist otherwise.
|
2022-06-24 15:30:08 +02:00
|
|
|
runtimeBacklog = getBacklog(xReadRuntimeBacklog);
|
|
|
|
|
|
|
|
if (!icingadbBacklogThresholds.Critical.IsEmpty() && runtimeBacklog > icingadbBacklogThresholds.Critical) {
|
|
|
|
critmsgs << " Runtime update backlog: " << Utility::FormatDuration(runtimeBacklog)
|
|
|
|
<< ", greater than CRITICAL threshold (" << Utility::FormatDuration(icingadbBacklogThresholds.Critical) << ")!";
|
|
|
|
} else if (!icingadbBacklogThresholds.Warning.IsEmpty() && runtimeBacklog > icingadbBacklogThresholds.Warning) {
|
|
|
|
warnmsgs << " Runtime update backlog: " << Utility::FormatDuration(runtimeBacklog)
|
|
|
|
<< ", greater than WARNING threshold (" << Utility::FormatDuration(icingadbBacklogThresholds.Warning) << ").";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-06-28 12:18:11 +02:00
|
|
|
// Also report the perfdata value on the standby instance or during a full sync (as 0 in this case).
|
2022-06-27 12:43:56 +02:00
|
|
|
perfdata->Add(new PerfdataValue("icingadb_runtime_update_backlog", runtimeBacklog, false, "seconds",
|
2022-06-01 11:38:17 +02:00
|
|
|
icingadbBacklogThresholds.Warning, icingadbBacklogThresholds.Critical, 0));
|
|
|
|
}
|
|
|
|
|
|
|
|
auto dumpAgo (now - dumpWhen);
|
|
|
|
|
|
|
|
if (dumpWhen) {
|
2022-06-28 10:51:36 +02:00
|
|
|
perfdata->Add(new PerfdataValue("icinga2_last_full_dump_ago", dumpAgo, false, "seconds", Empty, Empty, 0));
|
2022-06-01 11:38:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (dumpTook) {
|
2022-06-28 10:51:36 +02:00
|
|
|
perfdata->Add(new PerfdataValue("icinga2_last_full_dump_duration", dumpTook, false, "seconds", Empty, Empty, 0));
|
2022-06-01 11:38:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (dumpWhen && dumpTook) {
|
2022-06-28 11:30:11 +02:00
|
|
|
i2okmsgs << "\n* Last full dump: " << Utility::FormatDuration(dumpAgo)
|
2022-06-01 11:38:17 +02:00
|
|
|
<< " ago, took " << Utility::FormatDuration(dumpTook);
|
|
|
|
}
|
|
|
|
|
|
|
|
auto icingadbUptime (icingadbNow - icingadbStartTime);
|
|
|
|
|
|
|
|
if (!down) {
|
|
|
|
perfdata->Add(new PerfdataValue("icingadb_uptime", icingadbUptime, false, "seconds", Empty, Empty, 0));
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
2022-06-24 12:27:07 +02:00
|
|
|
Array::Ptr values = PluginUtility::SplitPerfdata(perfdataFromRedis);
|
|
|
|
ObjectLock lock (values);
|
2022-06-01 11:38:17 +02:00
|
|
|
|
2022-06-24 12:27:07 +02:00
|
|
|
for (auto& v : values) {
|
|
|
|
perfdata->Add(PerfdataValue::Parse(v));
|
2022-06-01 11:38:17 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (weResponsibleTs) {
|
|
|
|
perfdata->Add(new PerfdataValue("icingadb_responsible_for",
|
|
|
|
(weResponsible ? 1 : -1) * (icingadbNow - weResponsibleTs), false, "seconds"));
|
|
|
|
}
|
|
|
|
|
|
|
|
auto syncAgo (icingadbNow - syncSuccessWhen);
|
|
|
|
|
|
|
|
if (syncSuccessWhen) {
|
2022-06-28 10:51:36 +02:00
|
|
|
perfdata->Add(new PerfdataValue("icingadb_last_full_sync_ago", syncAgo, false, "seconds", Empty, Empty, 0));
|
2022-06-01 11:38:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (syncSuccessTook) {
|
2022-06-28 10:51:36 +02:00
|
|
|
perfdata->Add(new PerfdataValue("icingadb_last_full_sync_duration", syncSuccessTook, false, "seconds", Empty, Empty, 0));
|
2022-06-01 11:38:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (syncSuccessWhen && syncSuccessTook) {
|
2022-06-28 11:30:11 +02:00
|
|
|
idbokmsgs << "\n* Last full sync: " << Utility::FormatDuration(syncAgo)
|
2022-06-01 11:38:17 +02:00
|
|
|
<< " ago, took " << Utility::FormatDuration(syncSuccessTook);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::map<String, RingBuffer> statsPerOp;
|
|
|
|
|
|
|
|
const char * const icingadbKnownStats[] = {
|
2022-06-24 16:35:14 +02:00
|
|
|
"config_sync", "state_sync", "history_sync", "overdue_sync", "history_cleanup"
|
2022-06-01 11:38:17 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
for (auto metric : icingadbKnownStats) {
|
|
|
|
statsPerOp.emplace(std::piecewise_construct, std::forward_as_tuple(metric), std::forward_as_tuple(15 * 60));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (xReadStats) {
|
|
|
|
Array::Ptr messages = Array::Ptr(xReadStats->Get(0))->Get(1);
|
|
|
|
ObjectLock lock (messages);
|
|
|
|
|
|
|
|
for (Array::Ptr message : messages) {
|
|
|
|
auto ts (GetXMessageTs(message));
|
|
|
|
std::map<String, String> opsPerSec;
|
|
|
|
|
|
|
|
IcingaDB::AddKvsToMap(message->Get(1), opsPerSec);
|
|
|
|
|
|
|
|
for (auto& kv : opsPerSec) {
|
|
|
|
auto buf (statsPerOp.find(kv.first));
|
|
|
|
|
|
|
|
if (buf == statsPerOp.end()) {
|
|
|
|
buf = statsPerOp.emplace(
|
|
|
|
std::piecewise_construct,
|
|
|
|
std::forward_as_tuple(kv.first), std::forward_as_tuple(15 * 60)
|
|
|
|
).first;
|
|
|
|
}
|
|
|
|
|
|
|
|
buf->second.InsertValue(ts, Convert::ToLong(kv.second));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto& kv : statsPerOp) {
|
2022-06-28 10:47:24 +02:00
|
|
|
perfdata->Add(new PerfdataValue("icingadb_" + kv.first + "_items_1min", kv.second.UpdateAndGetValues(now, 60), false, "", Empty, Empty, 0));
|
|
|
|
perfdata->Add(new PerfdataValue("icingadb_" + kv.first + "_items_5mins", kv.second.UpdateAndGetValues(now, 5 * 60), false, "", Empty, Empty, 0));
|
|
|
|
perfdata->Add(new PerfdataValue("icingadb_" + kv.first + "_items_15mins", kv.second.UpdateAndGetValues(now, 15 * 60), false, "", Empty, Empty, 0));
|
2022-06-01 11:38:17 +02:00
|
|
|
}
|
|
|
|
|
2022-06-27 12:43:56 +02:00
|
|
|
perfdata->Add(new PerfdataValue("icinga2_redis_queries_1min", redis->GetQueryCount(60), false, "", Empty, Empty, 0));
|
|
|
|
perfdata->Add(new PerfdataValue("icinga2_redis_queries_5mins", redis->GetQueryCount(5 * 60), false, "", Empty, Empty, 0));
|
|
|
|
perfdata->Add(new PerfdataValue("icinga2_redis_queries_15mins", redis->GetQueryCount(15 * 60), false, "", Empty, Empty, 0));
|
2022-06-01 11:38:17 +02:00
|
|
|
|
2022-06-27 12:43:56 +02:00
|
|
|
perfdata->Add(new PerfdataValue("icinga2_redis_pending_queries", redis->GetPendingQueryCount(), false, "", Empty, Empty, 0));
|
2022-06-01 11:38:17 +02:00
|
|
|
|
|
|
|
struct {
|
|
|
|
const char * Name;
|
|
|
|
int (RedisConnection::* Getter)(RingBuffer::SizeType span, RingBuffer::SizeType tv);
|
|
|
|
} const icingaWriteSubjects[] = {
|
2022-06-28 10:47:24 +02:00
|
|
|
{"config_dump", &RedisConnection::GetWrittenConfigFor},
|
|
|
|
{"state_dump", &RedisConnection::GetWrittenStateFor},
|
|
|
|
{"history_dump", &RedisConnection::GetWrittenHistoryFor}
|
2022-06-01 11:38:17 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
for (auto subject : icingaWriteSubjects) {
|
2022-06-28 10:47:24 +02:00
|
|
|
perfdata->Add(new PerfdataValue(String("icinga2_") + subject.Name + "_items_1min", (redis.get()->*subject.Getter)(60, now), false, "", Empty, Empty, 0));
|
|
|
|
perfdata->Add(new PerfdataValue(String("icinga2_") + subject.Name + "_items_5mins", (redis.get()->*subject.Getter)(5 * 60, now), false, "", Empty, Empty, 0));
|
|
|
|
perfdata->Add(new PerfdataValue(String("icinga2_") + subject.Name + "_items_15mins", (redis.get()->*subject.Getter)(15 * 60, now), false, "", Empty, Empty, 0));
|
2022-06-01 11:38:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
ServiceState state;
|
|
|
|
std::ostringstream msgbuf;
|
|
|
|
auto i2okmsg (i2okmsgs.str());
|
|
|
|
auto idbokmsg (idbokmsgs.str());
|
|
|
|
auto warnmsg (warnmsgs.str());
|
|
|
|
auto critmsg (critmsgs.str());
|
|
|
|
|
|
|
|
msgbuf << "Icinga DB ";
|
|
|
|
|
|
|
|
if (!critmsg.empty()) {
|
|
|
|
state = ServiceCritical;
|
|
|
|
msgbuf << "CRITICAL:" << critmsg;
|
|
|
|
|
|
|
|
if (!warnmsg.empty()) {
|
|
|
|
msgbuf << "\n\nWARNING:" << warnmsg;
|
|
|
|
}
|
|
|
|
} else if (!warnmsg.empty()) {
|
|
|
|
state = ServiceWarning;
|
|
|
|
msgbuf << "WARNING:" << warnmsg;
|
|
|
|
} else {
|
|
|
|
state = ServiceOK;
|
|
|
|
msgbuf << "OK: Uptime: " << Utility::FormatDuration(icingadbUptime) << ". Version: " << version << ".";
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!i2okmsg.empty()) {
|
2022-06-27 13:21:46 +02:00
|
|
|
msgbuf << "\n\nIcinga 2:\n" << i2okmsg;
|
2022-06-01 11:38:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!idbokmsg.empty()) {
|
2022-06-27 13:21:46 +02:00
|
|
|
msgbuf << "\n\nIcinga DB:\n" << idbokmsg;
|
2022-06-01 11:38:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
cr->SetPerformanceData(perfdata);
|
|
|
|
ReportIcingadbCheck(checkable, commandObj, cr, msgbuf.str(), state);
|
|
|
|
}
|