icinga2/lib/perfdata/graphitewriter.cpp

393 lines
11 KiB
C++
Raw Normal View History

/* Icinga 2 | (c) 2012 Icinga GmbH | GPLv2+ */
2013-10-14 20:12:42 +02:00
2014-05-25 16:23:35 +02:00
#include "perfdata/graphitewriter.hpp"
2018-01-18 13:50:38 +01:00
#include "perfdata/graphitewriter-ti.cpp"
2014-05-25 16:23:35 +02:00
#include "icinga/service.hpp"
2019-01-17 08:53:55 +01:00
#include "icinga/checkcommand.hpp"
2014-05-25 16:23:35 +02:00
#include "icinga/macroprocessor.hpp"
#include "icinga/icingaapplication.hpp"
#include "base/tcpsocket.hpp"
#include "base/configtype.hpp"
2014-05-25 16:23:35 +02:00
#include "base/objectlock.hpp"
2014-10-19 14:21:12 +02:00
#include "base/logger.hpp"
2014-05-25 16:23:35 +02:00
#include "base/convert.hpp"
#include "base/utility.hpp"
#include "base/perfdatavalue.hpp"
2014-05-25 16:23:35 +02:00
#include "base/application.hpp"
#include "base/stream.hpp"
#include "base/networkstream.hpp"
#include "base/exception.hpp"
#include "base/statsfunction.hpp"
#include <boost/algorithm/string.hpp>
2013-10-15 20:37:58 +02:00
#include <boost/algorithm/string/replace.hpp>
#include <utility>
2013-10-14 20:12:42 +02:00
using namespace icinga;
REGISTER_TYPE(GraphiteWriter);
REGISTER_STATSFUNCTION(GraphiteWriter, &GraphiteWriter::StatsFunc);
void GraphiteWriter::OnConfigLoaded()
{
ObjectImpl<GraphiteWriter>::OnConfigLoaded();
m_WorkQueue.SetName("GraphiteWriter, " + GetName());
if (!GetEnableHa()) {
Log(LogDebug, "GraphiteWriter")
<< "HA functionality disabled. Won't pause connection: " << GetName();
SetHAMode(HARunEverywhere);
} else {
SetHAMode(HARunOnce);
}
}
void GraphiteWriter::StatsFunc(const Dictionary::Ptr& status, const Array::Ptr& perfdata)
{
DictionaryData nodes;
for (const GraphiteWriter::Ptr& graphitewriter : ConfigType::GetObjectsByType<GraphiteWriter>()) {
size_t workQueueItems = graphitewriter->m_WorkQueue.GetLength();
double workQueueItemRate = graphitewriter->m_WorkQueue.GetTaskCount(60) / 60.0;
nodes.emplace_back(graphitewriter->GetName(), new Dictionary({
{ "work_queue_items", workQueueItems },
{ "work_queue_item_rate", workQueueItemRate },
{ "connected", graphitewriter->GetConnected() }
}));
perfdata->Add(new PerfdataValue("graphitewriter_" + graphitewriter->GetName() + "_work_queue_items", workQueueItems));
perfdata->Add(new PerfdataValue("graphitewriter_" + graphitewriter->GetName() + "_work_queue_item_rate", workQueueItemRate));
}
status->Set("graphitewriter", new Dictionary(std::move(nodes)));
}
void GraphiteWriter::Resume()
2013-10-14 20:12:42 +02:00
{
ObjectImpl<GraphiteWriter>::Resume();
2013-10-14 20:12:42 +02:00
Log(LogInformation, "GraphiteWriter")
<< "'" << GetName() << "' resumed.";
/* Register exception handler for WQ tasks. */
m_WorkQueue.SetExceptionCallback(std::bind(&GraphiteWriter::ExceptionHandler, this, _1));
/* Timer for reconnecting */
m_ReconnectTimer = new Timer();
2013-10-14 20:12:42 +02:00
m_ReconnectTimer->SetInterval(10);
m_ReconnectTimer->OnTimerExpired.connect(std::bind(&GraphiteWriter::ReconnectTimerHandler, this));
2013-10-14 20:12:42 +02:00
m_ReconnectTimer->Start();
m_ReconnectTimer->Reschedule(0);
/* Register event handlers. */
Checkable::OnNewCheckResult.connect(std::bind(&GraphiteWriter::CheckResultHandler, this, _1, _2));
2013-10-14 20:12:42 +02:00
}
/* Pause is equivalent to Stop, but with HA capabilities to resume at runtime. */
void GraphiteWriter::Pause()
{
m_ReconnectTimer.reset();
try {
ReconnectInternal();
} catch (const std::exception&) {
Log(LogInformation, "GraphiteWriter")
<< "'" << GetName() << "' paused. Unable to connect, not flushing buffers. Data may be lost on reload.";
ObjectImpl<GraphiteWriter>::Pause();
return;
}
m_WorkQueue.Join();
DisconnectInternal();
Log(LogInformation, "GraphiteWriter")
<< "'" << GetName() << "' paused.";
ObjectImpl<GraphiteWriter>::Pause();
}
void GraphiteWriter::AssertOnWorkQueue()
2013-10-14 20:12:42 +02:00
{
ASSERT(m_WorkQueue.IsWorkerThread());
}
void GraphiteWriter::ExceptionHandler(boost::exception_ptr exp)
{
Log(LogCritical, "GraphiteWriter", "Exception during Graphite operation: Verify that your backend is operational!");
Log(LogDebug, "GraphiteWriter")
<< "Exception during Graphite operation: " << DiagnosticInformation(std::move(exp));
if (GetConnected()) {
m_Stream->Close();
SetConnected(false);
}
}
void GraphiteWriter::Reconnect()
{
AssertOnWorkQueue();
if (IsPaused()) {
SetConnected(false);
return;
}
ReconnectInternal();
}
void GraphiteWriter::ReconnectInternal()
{
double startTime = Utility::GetTime();
CONTEXT("Reconnecting to Graphite '" + GetName() + "'");
SetShouldConnect(true);
if (GetConnected())
return;
2013-10-14 20:12:42 +02:00
TcpSocket::Ptr socket = new TcpSocket();
2013-10-14 20:12:42 +02:00
2014-10-20 10:09:57 +02:00
Log(LogNotice, "GraphiteWriter")
<< "Reconnecting to Graphite on host '" << GetHost() << "' port '" << GetPort() << "'.";
try {
socket->Connect(GetHost(), GetPort());
} catch (const std::exception& ex) {
2014-10-20 10:09:57 +02:00
Log(LogCritical, "GraphiteWriter")
<< "Can't connect to Graphite on host '" << GetHost() << "' port '" << GetPort() << "'.";
throw ex;
}
2013-10-14 20:12:42 +02:00
m_Stream = new NetworkStream(socket);
SetConnected(true);
Log(LogInformation, "GraphiteWriter")
<< "Finished reconnecting to Graphite in " << std::setw(2) << Utility::GetTime() - startTime << " second(s).";
}
void GraphiteWriter::ReconnectTimerHandler()
{
if (IsPaused())
return;
m_WorkQueue.Enqueue(std::bind(&GraphiteWriter::Reconnect, this), PriorityNormal);
}
void GraphiteWriter::Disconnect()
{
AssertOnWorkQueue();
DisconnectInternal();
}
void GraphiteWriter::DisconnectInternal()
{
if (!GetConnected())
return;
m_Stream->Close();
SetConnected(false);
2013-10-14 20:12:42 +02:00
}
2014-04-03 15:36:13 +02:00
void GraphiteWriter::CheckResultHandler(const Checkable::Ptr& checkable, const CheckResult::Ptr& cr)
2013-10-14 20:12:42 +02:00
{
if (IsPaused())
return;
m_WorkQueue.Enqueue(std::bind(&GraphiteWriter::CheckResultHandlerInternal, this, checkable, cr));
}
void GraphiteWriter::CheckResultHandlerInternal(const Checkable::Ptr& checkable, const CheckResult::Ptr& cr)
{
AssertOnWorkQueue();
CONTEXT("Processing check result for '" + checkable->GetName() + "'");
/* TODO: Deal with missing connection here. Needs refactoring
* into parsing the actual performance data and then putting it
* into a queue for re-inserting. */
2014-04-03 15:36:13 +02:00
if (!IcingaApplication::GetInstance()->GetEnablePerfdata() || !checkable->GetEnablePerfdata())
2013-10-14 20:12:42 +02:00
return;
2014-04-03 15:36:13 +02:00
Host::Ptr host;
Service::Ptr service;
2017-11-22 12:05:36 +01:00
tie(host, service) = GetHostService(checkable);
MacroProcessor::ResolverList resolvers;
if (service)
resolvers.emplace_back("service", service);
resolvers.emplace_back("host", host);
resolvers.emplace_back("icinga", IcingaApplication::GetInstance());
String prefix;
if (service) {
2017-12-14 15:37:20 +01:00
prefix = MacroProcessor::ResolveMacros(GetServiceNameTemplate(), resolvers, cr, nullptr, std::bind(&GraphiteWriter::EscapeMacroMetric, _1));
} else {
2017-12-14 15:37:20 +01:00
prefix = MacroProcessor::ResolveMacros(GetHostNameTemplate(), resolvers, cr, nullptr, std::bind(&GraphiteWriter::EscapeMacroMetric, _1));
}
String prefixPerfdata = prefix + ".perfdata";
String prefixMetadata = prefix + ".metadata";
double ts = cr->GetExecutionEnd();
if (GetEnableSendMetadata()) {
if (service) {
2019-01-17 08:53:55 +01:00
SendMetric(checkable, prefixMetadata, "state", service->GetState(), ts);
} else {
2019-01-17 08:53:55 +01:00
SendMetric(checkable, prefixMetadata, "state", host->GetState(), ts);
}
2019-01-17 08:53:55 +01:00
SendMetric(checkable, prefixMetadata, "current_attempt", checkable->GetCheckAttempt(), ts);
SendMetric(checkable, prefixMetadata, "max_check_attempts", checkable->GetMaxCheckAttempts(), ts);
SendMetric(checkable, prefixMetadata, "state_type", checkable->GetStateType(), ts);
SendMetric(checkable, prefixMetadata, "reachable", checkable->IsReachable(), ts);
SendMetric(checkable, prefixMetadata, "downtime_depth", checkable->GetDowntimeDepth(), ts);
SendMetric(checkable, prefixMetadata, "acknowledgement", checkable->GetAcknowledgement(), ts);
SendMetric(checkable, prefixMetadata, "latency", cr->CalculateLatency(), ts);
SendMetric(checkable, prefixMetadata, "execution_time", cr->CalculateExecutionTime(), ts);
}
2019-01-17 08:53:55 +01:00
SendPerfdata(checkable, prefixPerfdata, cr, ts);
}
2019-01-17 08:53:55 +01:00
void GraphiteWriter::SendPerfdata(const Checkable::Ptr& checkable, const String& prefix, const CheckResult::Ptr& cr, double ts)
{
Array::Ptr perfdata = cr->GetPerformanceData();
2013-10-14 20:12:42 +02:00
if (!perfdata)
return;
2019-01-17 08:53:55 +01:00
CheckCommand::Ptr checkCommand = checkable->GetCheckCommand();
ObjectLock olock(perfdata);
for (const Value& val : perfdata) {
PerfdataValue::Ptr pdv;
if (val.IsObjectType<PerfdataValue>())
pdv = val;
else {
try {
pdv = PerfdataValue::Parse(val);
} catch (const std::exception&) {
2014-10-20 10:09:57 +02:00
Log(LogWarning, "GraphiteWriter")
2019-01-17 08:53:55 +01:00
<< "Ignoring invalid perfdata for checkable '"
<< checkable->GetName() << "' and command '"
<< checkCommand->GetName() << "' with value: " << val;
continue;
}
}
String escapedKey = EscapeMetricLabel(pdv->GetLabel());
2019-01-17 08:53:55 +01:00
SendMetric(checkable, prefix, escapedKey + ".value", pdv->GetValue(), ts);
if (GetEnableSendThresholds()) {
if (pdv->GetCrit())
2019-01-17 08:53:55 +01:00
SendMetric(checkable, prefix, escapedKey + ".crit", pdv->GetCrit(), ts);
if (pdv->GetWarn())
2019-01-17 08:53:55 +01:00
SendMetric(checkable, prefix, escapedKey + ".warn", pdv->GetWarn(), ts);
if (pdv->GetMin())
2019-01-17 08:53:55 +01:00
SendMetric(checkable, prefix, escapedKey + ".min", pdv->GetMin(), ts);
if (pdv->GetMax())
2019-01-17 08:53:55 +01:00
SendMetric(checkable, prefix, escapedKey + ".max", pdv->GetMax(), ts);
}
}
2013-10-14 20:12:42 +02:00
}
2019-01-17 08:53:55 +01:00
void GraphiteWriter::SendMetric(const Checkable::Ptr& checkable, const String& prefix, const String& name, double value, double ts)
2013-10-14 20:12:42 +02:00
{
std::ostringstream msgbuf;
msgbuf << prefix << "." << name << " " << Convert::ToString(value) << " " << static_cast<long>(ts);
2013-10-14 20:12:42 +02:00
2014-10-19 17:52:17 +02:00
Log(LogDebug, "GraphiteWriter")
2019-01-17 08:53:55 +01:00
<< "Checkable '" << checkable->GetName() << "' adds to metric list: '" << msgbuf.str() << "'.";
// do not send \n to debug log
msgbuf << "\n";
String metric = msgbuf.str();
2013-10-14 20:12:42 +02:00
boost::mutex::scoped_lock lock(m_StreamMutex);
2013-10-14 20:12:42 +02:00
if (!GetConnected())
return;
try {
m_Stream->Write(metric.CStr(), metric.GetLength());
} catch (const std::exception& ex) {
2014-10-19 17:52:17 +02:00
Log(LogCritical, "GraphiteWriter")
<< "Cannot write to TCP socket on host '" << GetHost() << "' port '" << GetPort() << "'.";
throw ex;
2013-10-14 20:12:42 +02:00
}
}
String GraphiteWriter::EscapeMetric(const String& str)
{
String result = str;
//don't allow '.' in metric prefixes
boost::replace_all(result, " ", "_");
boost::replace_all(result, ".", "_");
boost::replace_all(result, "\\", "_");
boost::replace_all(result, "/", "_");
return result;
}
String GraphiteWriter::EscapeMetricLabel(const String& str)
{
String result = str;
//allow to pass '.' in perfdata labels
boost::replace_all(result, " ", "_");
boost::replace_all(result, "\\", "_");
boost::replace_all(result, "/", "_");
boost::replace_all(result, "::", ".");
return result;
}
Value GraphiteWriter::EscapeMacroMetric(const Value& value)
{
if (value.IsObjectType<Array>()) {
Array::Ptr arr = value;
ArrayData result;
ObjectLock olock(arr);
for (const Value& arg : arr) {
result.push_back(EscapeMetric(arg));
}
return Utility::Join(new Array(std::move(result)), '.');
} else
return EscapeMetric(value);
}
void GraphiteWriter::ValidateHostNameTemplate(const Lazy<String>& lvalue, const ValidationUtils& utils)
{
ObjectImpl<GraphiteWriter>::ValidateHostNameTemplate(lvalue, utils);
2015-02-11 15:47:45 +01:00
if (!MacroProcessor::ValidateMacroString(lvalue()))
BOOST_THROW_EXCEPTION(ValidationError(this, { "host_name_template" }, "Closing $ not found in macro format string '" + lvalue() + "'."));
}
void GraphiteWriter::ValidateServiceNameTemplate(const Lazy<String>& lvalue, const ValidationUtils& utils)
{
ObjectImpl<GraphiteWriter>::ValidateServiceNameTemplate(lvalue, utils);
if (!MacroProcessor::ValidateMacroString(lvalue()))
BOOST_THROW_EXCEPTION(ValidationError(this, { "service_name_template" }, "Closing $ not found in macro format string '" + lvalue() + "'."));
}