mirror of https://github.com/Icinga/icinga2.git
Icinga DB Check: report history and runtime update backlog separately
Probably makes little difference for an end-user, but for support and development it's great to know which of the two is causing problems.
This commit is contained in:
parent
2a4605f4b7
commit
d70a27b982
|
@ -107,7 +107,7 @@ void IcingadbCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckR
|
||||||
}
|
}
|
||||||
|
|
||||||
auto now (Utility::GetTime());
|
auto now (Utility::GetTime());
|
||||||
Array::Ptr redisTime, xReadHeartbeat, xReadStats, xReadRtuHistory;
|
Array::Ptr redisTime, xReadHeartbeat, xReadStats, xReadRuntimeBacklog, xReadHistoryBacklog;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
auto replies (redis->GetResultsOfQueries(
|
auto replies (redis->GetResultsOfQueries(
|
||||||
|
@ -115,13 +115,16 @@ void IcingadbCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckR
|
||||||
{"TIME"},
|
{"TIME"},
|
||||||
{"XREAD", "STREAMS", "icingadb:telemetry:heartbeat", "0-0"},
|
{"XREAD", "STREAMS", "icingadb:telemetry:heartbeat", "0-0"},
|
||||||
{"XREAD", "STREAMS", "icingadb:telemetry:stats", "0-0"},
|
{"XREAD", "STREAMS", "icingadb:telemetry:stats", "0-0"},
|
||||||
|
{"XREAD", "COUNT", "1", "STREAMS", "icinga:runtime", "icinga:runtime:state", "0-0", "0-0"},
|
||||||
{
|
{
|
||||||
"XREAD", "COUNT", "1", "STREAMS",
|
"XREAD", "COUNT", "1", "STREAMS",
|
||||||
"icinga:runtime", "icinga:runtime:state",
|
"icinga:history:stream:acknowledgement",
|
||||||
"icinga:history:stream:acknowledgement", "icinga:history:stream:comment",
|
"icinga:history:stream:comment",
|
||||||
"icinga:history:stream:downtime", "icinga:history:stream:flapping",
|
"icinga:history:stream:downtime",
|
||||||
"icinga:history:stream:notification", "icinga:history:stream:state",
|
"icinga:history:stream:flapping",
|
||||||
"0-0", "0-0", "0-0", "0-0", "0-0", "0-0", "0-0", "0-0"
|
"icinga:history:stream:notification",
|
||||||
|
"icinga:history:stream:state",
|
||||||
|
"0-0", "0-0", "0-0", "0-0", "0-0", "0-0",
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
RedisConnection::QueryPriority::Heartbeat
|
RedisConnection::QueryPriority::Heartbeat
|
||||||
|
@ -130,7 +133,8 @@ void IcingadbCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckR
|
||||||
redisTime = std::move(replies.at(0));
|
redisTime = std::move(replies.at(0));
|
||||||
xReadHeartbeat = std::move(replies.at(1));
|
xReadHeartbeat = std::move(replies.at(1));
|
||||||
xReadStats = std::move(replies.at(2));
|
xReadStats = std::move(replies.at(2));
|
||||||
xReadRtuHistory = std::move(replies.at(3));
|
xReadRuntimeBacklog = std::move(replies.at(3));
|
||||||
|
xReadHistoryBacklog = std::move(replies.at(4));
|
||||||
} catch (const std::exception& ex) {
|
} catch (const std::exception& ex) {
|
||||||
ReportIcingadbCheck(
|
ReportIcingadbCheck(
|
||||||
checkable, commandObj, cr,
|
checkable, commandObj, cr,
|
||||||
|
@ -300,21 +304,15 @@ void IcingadbCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckR
|
||||||
icingaBacklogThresholds.Warning, icingaBacklogThresholds.Critical, 0));
|
icingaBacklogThresholds.Warning, icingaBacklogThresholds.Critical, 0));
|
||||||
|
|
||||||
if (!down) {
|
if (!down) {
|
||||||
double icingadbBacklog = 0;
|
auto getBacklog = [redisNow](const Array::Ptr& streams) -> double {
|
||||||
|
if (!streams) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (xReadRtuHistory) {
|
|
||||||
double minTs = 0;
|
double minTs = 0;
|
||||||
ObjectLock lock (xReadRtuHistory);
|
ObjectLock lock (streams);
|
||||||
|
|
||||||
for (Array::Ptr stream : xReadRtuHistory) {
|
|
||||||
if (!weResponsible) {
|
|
||||||
String name = stream->Get(0);
|
|
||||||
|
|
||||||
if (name == "icinga:runtime" || name == "icinga:runtime:state") {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
for (Array::Ptr stream : streams) {
|
||||||
auto ts (GetXMessageTs(Array::Ptr(stream->Get(1))->Get(0)));
|
auto ts (GetXMessageTs(Array::Ptr(stream->Get(1))->Get(0)));
|
||||||
|
|
||||||
if (minTs == 0 || ts < minTs) {
|
if (minTs == 0 || ts < minTs) {
|
||||||
|
@ -323,19 +321,42 @@ void IcingadbCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckR
|
||||||
}
|
}
|
||||||
|
|
||||||
if (minTs > 0) {
|
if (minTs > 0) {
|
||||||
icingadbBacklog = redisNow - minTs;
|
return redisNow - minTs;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
|
||||||
if (!icingadbBacklogThresholds.Critical.IsEmpty() && icingadbBacklog > icingadbBacklogThresholds.Critical) {
|
double historyBacklog = getBacklog(xReadHistoryBacklog);
|
||||||
critmsgs << " Query backlog: " << Utility::FormatDuration(icingadbBacklog)
|
|
||||||
|
if (!icingadbBacklogThresholds.Critical.IsEmpty() && historyBacklog > icingadbBacklogThresholds.Critical) {
|
||||||
|
critmsgs << " History backlog: " << Utility::FormatDuration(historyBacklog)
|
||||||
<< ", greater than CRITICAL threshold (" << Utility::FormatDuration(icingadbBacklogThresholds.Critical) << ")!";
|
<< ", greater than CRITICAL threshold (" << Utility::FormatDuration(icingadbBacklogThresholds.Critical) << ")!";
|
||||||
} else if (!icingadbBacklogThresholds.Warning.IsEmpty() && icingadbBacklog > icingadbBacklogThresholds.Warning) {
|
} else if (!icingadbBacklogThresholds.Warning.IsEmpty() && historyBacklog > icingadbBacklogThresholds.Warning) {
|
||||||
warnmsgs << " Query backlog: " << Utility::FormatDuration(icingadbBacklog)
|
warnmsgs << " History backlog: " << Utility::FormatDuration(historyBacklog)
|
||||||
<< ", greater than WARNING threshold (" << Utility::FormatDuration(icingadbBacklogThresholds.Warning) << ").";
|
<< ", greater than WARNING threshold (" << Utility::FormatDuration(icingadbBacklogThresholds.Warning) << ").";
|
||||||
}
|
}
|
||||||
|
|
||||||
perfdata->Add(new PerfdataValue("database_backlog", icingadbBacklog, false, "seconds",
|
perfdata->Add(new PerfdataValue("history_backlog", historyBacklog, false, "seconds",
|
||||||
|
icingadbBacklogThresholds.Warning, icingadbBacklogThresholds.Critical, 0));
|
||||||
|
|
||||||
|
double runtimeBacklog = 0;
|
||||||
|
|
||||||
|
if (weResponsible) {
|
||||||
|
// These streams are only processed by one instance, it's fine for the other instance to have some backlog.
|
||||||
|
runtimeBacklog = getBacklog(xReadRuntimeBacklog);
|
||||||
|
|
||||||
|
if (!icingadbBacklogThresholds.Critical.IsEmpty() && runtimeBacklog > icingadbBacklogThresholds.Critical) {
|
||||||
|
critmsgs << " Runtime update backlog: " << Utility::FormatDuration(runtimeBacklog)
|
||||||
|
<< ", greater than CRITICAL threshold (" << Utility::FormatDuration(icingadbBacklogThresholds.Critical) << ")!";
|
||||||
|
} else if (!icingadbBacklogThresholds.Warning.IsEmpty() && runtimeBacklog > icingadbBacklogThresholds.Warning) {
|
||||||
|
warnmsgs << " Runtime update backlog: " << Utility::FormatDuration(runtimeBacklog)
|
||||||
|
<< ", greater than WARNING threshold (" << Utility::FormatDuration(icingadbBacklogThresholds.Warning) << ").";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Also report the perfdata value on the other instance (as 0 in this case).
|
||||||
|
perfdata->Add(new PerfdataValue("runtime_backlog", runtimeBacklog, false, "seconds",
|
||||||
icingadbBacklogThresholds.Warning, icingadbBacklogThresholds.Critical, 0));
|
icingadbBacklogThresholds.Warning, icingadbBacklogThresholds.Critical, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue