mirror of https://github.com/Icinga/icinga2.git
Fix performance issues for InfluxdbWriter
This commit is contained in:
parent
680939a289
commit
7f92016e38
|
@ -49,6 +49,24 @@
|
||||||
|
|
||||||
using namespace icinga;
|
using namespace icinga;
|
||||||
|
|
||||||
|
class InfluxdbInteger : public Object
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
DECLARE_PTR_TYPEDEFS(InfluxdbInteger);
|
||||||
|
|
||||||
|
InfluxdbInteger(int value)
|
||||||
|
: m_Value(value)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
int GetValue(void) const
|
||||||
|
{
|
||||||
|
return m_Value;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
int m_Value;
|
||||||
|
};
|
||||||
|
|
||||||
REGISTER_TYPE(InfluxdbWriter);
|
REGISTER_TYPE(InfluxdbWriter);
|
||||||
|
|
||||||
REGISTER_STATSFUNCTION(InfluxdbWriter, &InfluxdbWriter::StatsFunc);
|
REGISTER_STATSFUNCTION(InfluxdbWriter, &InfluxdbWriter::StatsFunc);
|
||||||
|
@ -106,7 +124,7 @@ void InfluxdbWriter::Start(bool runtimeCreated)
|
||||||
m_FlushTimer->Reschedule(0);
|
m_FlushTimer->Reschedule(0);
|
||||||
|
|
||||||
/* Register for new metrics. */
|
/* Register for new metrics. */
|
||||||
Service::OnNewCheckResult.connect(boost::bind(&InfluxdbWriter::CheckResultHandler, this, _1, _2));
|
Checkable::OnNewCheckResult.connect(boost::bind(&InfluxdbWriter::CheckResultHandler, this, _1, _2));
|
||||||
}
|
}
|
||||||
|
|
||||||
void InfluxdbWriter::Stop(bool runtimeRemoved)
|
void InfluxdbWriter::Stop(bool runtimeRemoved)
|
||||||
|
@ -134,7 +152,7 @@ void InfluxdbWriter::ExceptionHandler(boost::exception_ptr exp)
|
||||||
//TODO: Close the connection, if we keep it open.
|
//TODO: Close the connection, if we keep it open.
|
||||||
}
|
}
|
||||||
|
|
||||||
Stream::Ptr InfluxdbWriter::Connect()
|
Stream::Ptr InfluxdbWriter::Connect(void)
|
||||||
{
|
{
|
||||||
TcpSocket::Ptr socket = new TcpSocket();
|
TcpSocket::Ptr socket = new TcpSocket();
|
||||||
|
|
||||||
|
@ -176,10 +194,10 @@ Stream::Ptr InfluxdbWriter::Connect()
|
||||||
|
|
||||||
void InfluxdbWriter::CheckResultHandler(const Checkable::Ptr& checkable, const CheckResult::Ptr& cr)
|
void InfluxdbWriter::CheckResultHandler(const Checkable::Ptr& checkable, const CheckResult::Ptr& cr)
|
||||||
{
|
{
|
||||||
m_WorkQueue.Enqueue(boost::bind(&InfluxdbWriter::InternalCheckResultHandler, this, checkable, cr));
|
m_WorkQueue.Enqueue(boost::bind(&InfluxdbWriter::CheckResultHandlerWQ, this, checkable, cr), PriorityLow);
|
||||||
}
|
}
|
||||||
|
|
||||||
void InfluxdbWriter::InternalCheckResultHandler(const Checkable::Ptr& checkable, const CheckResult::Ptr& cr)
|
void InfluxdbWriter::CheckResultHandlerWQ(const Checkable::Ptr& checkable, const CheckResult::Ptr& cr)
|
||||||
{
|
{
|
||||||
AssertOnWorkQueue();
|
AssertOnWorkQueue();
|
||||||
|
|
||||||
|
@ -211,28 +229,16 @@ void InfluxdbWriter::InternalCheckResultHandler(const Checkable::Ptr& checkable,
|
||||||
if (tags) {
|
if (tags) {
|
||||||
ObjectLock olock(tags);
|
ObjectLock olock(tags);
|
||||||
for (const Dictionary::Pair& pair : tags) {
|
for (const Dictionary::Pair& pair : tags) {
|
||||||
// Prevent missing macros from warning; will return an empty value
|
|
||||||
// which will be filtered out in SendMetric()
|
|
||||||
String missing_macro;
|
String missing_macro;
|
||||||
tags->Set(pair.first, MacroProcessor::ResolveMacros(pair.second, resolvers, cr, &missing_macro));
|
Value value = MacroProcessor::ResolveMacros(pair.second, resolvers, cr, &missing_macro);
|
||||||
|
|
||||||
|
if (!missing_macro.IsEmpty())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
tags->Set(pair.first, value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SendPerfdata(tmpl, checkable, cr, ts);
|
|
||||||
}
|
|
||||||
|
|
||||||
String InfluxdbWriter::FormatInteger(int val)
|
|
||||||
{
|
|
||||||
return Convert::ToString(val) + "i";
|
|
||||||
}
|
|
||||||
|
|
||||||
String InfluxdbWriter::FormatBoolean(bool val)
|
|
||||||
{
|
|
||||||
return val ? "true" : "false";
|
|
||||||
}
|
|
||||||
|
|
||||||
void InfluxdbWriter::SendPerfdata(const Dictionary::Ptr& tmpl, const Checkable::Ptr& checkable, const CheckResult::Ptr& cr, double ts)
|
|
||||||
{
|
|
||||||
Array::Ptr perfdata = cr->GetPerformanceData();
|
Array::Ptr perfdata = cr->GetPerformanceData();
|
||||||
if (perfdata) {
|
if (perfdata) {
|
||||||
ObjectLock olock(perfdata);
|
ObjectLock olock(perfdata);
|
||||||
|
@ -277,16 +283,16 @@ void InfluxdbWriter::SendPerfdata(const Dictionary::Ptr& tmpl, const Checkable::
|
||||||
Dictionary::Ptr fields = new Dictionary();
|
Dictionary::Ptr fields = new Dictionary();
|
||||||
|
|
||||||
if (service)
|
if (service)
|
||||||
fields->Set("state", FormatInteger(service->GetState()));
|
fields->Set("state", new InfluxdbInteger(service->GetState()));
|
||||||
else
|
else
|
||||||
fields->Set("state", FormatInteger(host->GetState()));
|
fields->Set("state", new InfluxdbInteger(host->GetState()));
|
||||||
|
|
||||||
fields->Set("current_attempt", FormatInteger(checkable->GetCheckAttempt()));
|
fields->Set("current_attempt", new InfluxdbInteger(checkable->GetCheckAttempt()));
|
||||||
fields->Set("max_check_attempts", FormatInteger(checkable->GetMaxCheckAttempts()));
|
fields->Set("max_check_attempts", new InfluxdbInteger(checkable->GetMaxCheckAttempts()));
|
||||||
fields->Set("state_type", FormatInteger(checkable->GetStateType()));
|
fields->Set("state_type", new InfluxdbInteger(checkable->GetStateType()));
|
||||||
fields->Set("reachable", FormatBoolean(checkable->IsReachable()));
|
fields->Set("reachable", checkable->IsReachable());
|
||||||
fields->Set("downtime_depth", FormatInteger(checkable->GetDowntimeDepth()));
|
fields->Set("downtime_depth", new InfluxdbInteger(checkable->GetDowntimeDepth()));
|
||||||
fields->Set("acknowledgement", FormatInteger(checkable->GetAcknowledgement()));
|
fields->Set("acknowledgement", new InfluxdbInteger(checkable->GetAcknowledgement()));
|
||||||
fields->Set("latency", cr->CalculateLatency());
|
fields->Set("latency", cr->CalculateLatency());
|
||||||
fields->Set("execution_time", cr->CalculateExecutionTime());
|
fields->Set("execution_time", cr->CalculateExecutionTime());
|
||||||
|
|
||||||
|
@ -294,7 +300,7 @@ void InfluxdbWriter::SendPerfdata(const Dictionary::Ptr& tmpl, const Checkable::
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
String InfluxdbWriter::EscapeKey(const String& str)
|
String InfluxdbWriter::EscapeKeyOrTagValue(const String& str)
|
||||||
{
|
{
|
||||||
// Iterate over the key name and escape commas and spaces with a backslash
|
// Iterate over the key name and escape commas and spaces with a backslash
|
||||||
String result = str;
|
String result = str;
|
||||||
|
@ -302,57 +308,28 @@ String InfluxdbWriter::EscapeKey(const String& str)
|
||||||
boost::algorithm::replace_all(result, "=", "\\=");
|
boost::algorithm::replace_all(result, "=", "\\=");
|
||||||
boost::algorithm::replace_all(result, ",", "\\,");
|
boost::algorithm::replace_all(result, ",", "\\,");
|
||||||
boost::algorithm::replace_all(result, " ", "\\ ");
|
boost::algorithm::replace_all(result, " ", "\\ ");
|
||||||
|
|
||||||
// InfluxDB 'feature': although backslashes are allowed in keys they also act
|
|
||||||
// as escape sequences when followed by ',' or ' '. When your tag is like
|
|
||||||
// 'metric=C:\' bad things happen. Backslashes themselves cannot be escaped
|
|
||||||
// and through experimentation they also escape '='. To be safe we replace
|
|
||||||
// trailing backslashes with and underscore.
|
|
||||||
size_t length = result.GetLength();
|
|
||||||
if (result[length - 1] == '\\')
|
|
||||||
result[length - 1] = '_';
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
String InfluxdbWriter::EscapeField(const String& str)
|
String InfluxdbWriter::EscapeValue(const Value& value)
|
||||||
{
|
{
|
||||||
//TODO: Evaluate whether boost::regex is really needed here.
|
if (value.IsObjectType<InfluxdbInteger>()) {
|
||||||
|
std::ostringstream os;
|
||||||
// Handle integers
|
os << static_cast<InfluxdbInteger::Ptr>(value)->GetValue()
|
||||||
boost::regex integer("-?\\d+i");
|
<< "i";
|
||||||
if (boost::regex_match(str.GetData(), integer)) {
|
return os.str();
|
||||||
return str;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle numerics
|
if (value.IsBoolean())
|
||||||
boost::regex numeric("-?\\d+(\\.\\d+)?((e|E)[+-]?\\d+)?");
|
return value ? "true" : "false";
|
||||||
if (boost::regex_match(str.GetData(), numeric)) {
|
|
||||||
return str;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle booleans
|
return value;
|
||||||
boost::regex boolean_true("t|true", boost::regex::icase);
|
|
||||||
if (boost::regex_match(str.GetData(), boolean_true))
|
|
||||||
return "true";
|
|
||||||
boost::regex boolean_false("f|false", boost::regex::icase);
|
|
||||||
if (boost::regex_match(str.GetData(), boolean_false))
|
|
||||||
return "false";
|
|
||||||
|
|
||||||
// Handle NaNs
|
|
||||||
if (boost::math::isnan(str))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
// Otherwise it's a string and needs escaping and quoting
|
|
||||||
String result = str;
|
|
||||||
boost::algorithm::replace_all(result, "\"", "\\\"");
|
|
||||||
return "\"" + result + "\"";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void InfluxdbWriter::SendMetric(const Dictionary::Ptr& tmpl, const String& label, const Dictionary::Ptr& fields, double ts)
|
void InfluxdbWriter::SendMetric(const Dictionary::Ptr& tmpl, const String& label, const Dictionary::Ptr& fields, double ts)
|
||||||
{
|
{
|
||||||
std::ostringstream msgbuf;
|
std::ostringstream msgbuf;
|
||||||
msgbuf << EscapeKey(tmpl->Get("measurement"));
|
msgbuf << EscapeKeyOrTagValue(tmpl->Get("measurement"));
|
||||||
|
|
||||||
Dictionary::Ptr tags = tmpl->Get("tags");
|
Dictionary::Ptr tags = tmpl->Get("tags");
|
||||||
if (tags) {
|
if (tags) {
|
||||||
|
@ -360,14 +337,14 @@ void InfluxdbWriter::SendMetric(const Dictionary::Ptr& tmpl, const String& label
|
||||||
for (const Dictionary::Pair& pair : tags) {
|
for (const Dictionary::Pair& pair : tags) {
|
||||||
// Empty macro expansion, no tag
|
// Empty macro expansion, no tag
|
||||||
if (!pair.second.IsEmpty()) {
|
if (!pair.second.IsEmpty()) {
|
||||||
msgbuf << "," << EscapeKey(pair.first) << "=" << EscapeKey(pair.second);
|
msgbuf << "," << EscapeKeyOrTagValue(pair.first) << "=" << EscapeKeyOrTagValue(pair.second);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Label is may be empty in the case of metadata
|
// Label may be empty in the case of metadata
|
||||||
if (!label.IsEmpty())
|
if (!label.IsEmpty())
|
||||||
msgbuf << ",metric=" << EscapeKey(label);
|
msgbuf << ",metric=" << EscapeKeyOrTagValue(label);
|
||||||
|
|
||||||
msgbuf << " ";
|
msgbuf << " ";
|
||||||
|
|
||||||
|
@ -381,45 +358,54 @@ void InfluxdbWriter::SendMetric(const Dictionary::Ptr& tmpl, const String& label
|
||||||
else
|
else
|
||||||
msgbuf << ",";
|
msgbuf << ",";
|
||||||
|
|
||||||
msgbuf << EscapeKey(pair.first) << "=" << EscapeField(pair.second);
|
msgbuf << EscapeKeyOrTagValue(pair.first) << "=" << EscapeValue(pair.second);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
msgbuf << " " << static_cast<unsigned long>(ts);
|
msgbuf << " " << static_cast<unsigned long>(ts);
|
||||||
|
|
||||||
|
#ifdef I2_DEBUG
|
||||||
Log(LogDebug, "InfluxdbWriter")
|
Log(LogDebug, "InfluxdbWriter")
|
||||||
<< "Add to metric list: '" << msgbuf.str() << "'.";
|
<< "Add to metric list: '" << msgbuf.str() << "'.";
|
||||||
|
#endif /* I2_DEBUG */
|
||||||
|
|
||||||
// Atomically buffer the data point
|
// Buffer the data point
|
||||||
boost::mutex::scoped_lock lock(m_DataBufferMutex);
|
m_DataBuffer.push_back(msgbuf.str());
|
||||||
m_DataBuffer.push_back(String(msgbuf.str()));
|
|
||||||
|
|
||||||
// Flush if we've buffered too much to prevent excessive memory use
|
// Flush if we've buffered too much to prevent excessive memory use
|
||||||
if (static_cast<int>(m_DataBuffer.size()) >= GetFlushThreshold()) {
|
if (static_cast<int>(m_DataBuffer.size()) >= GetFlushThreshold()) {
|
||||||
Log(LogDebug, "InfluxdbWriter")
|
Log(LogDebug, "InfluxdbWriter")
|
||||||
<< "Data buffer overflow writing " << m_DataBuffer.size() << " data points";
|
<< "Data buffer overflow writing " << m_DataBuffer.size() << " data points";
|
||||||
|
|
||||||
|
try {
|
||||||
Flush();
|
Flush();
|
||||||
|
} catch (...) {
|
||||||
|
/* Do nothing. */
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void InfluxdbWriter::FlushTimeout(void)
|
void InfluxdbWriter::FlushTimeout(void)
|
||||||
{
|
{
|
||||||
// Prevent new data points from being added to the array, there is a
|
m_WorkQueue.Enqueue(boost::bind(&InfluxdbWriter::FlushTimeoutWQ, this), PriorityHigh);
|
||||||
// race condition where they could disappear
|
}
|
||||||
boost::mutex::scoped_lock lock(m_DataBufferMutex);
|
|
||||||
|
void InfluxdbWriter::FlushTimeoutWQ(void)
|
||||||
|
{
|
||||||
|
AssertOnWorkQueue();
|
||||||
|
|
||||||
// Flush if there are any data available
|
// Flush if there are any data available
|
||||||
if (m_DataBuffer.size() > 0) {
|
if (m_DataBuffer.empty())
|
||||||
|
return;
|
||||||
|
|
||||||
Log(LogDebug, "InfluxdbWriter")
|
Log(LogDebug, "InfluxdbWriter")
|
||||||
<< "Timer expired writing " << m_DataBuffer.size() << " data points";
|
<< "Timer expired writing " << m_DataBuffer.size() << " data points";
|
||||||
|
|
||||||
Flush();
|
Flush();
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
void InfluxdbWriter::Flush(void)
|
void InfluxdbWriter::Flush(void)
|
||||||
{
|
{
|
||||||
// Ensure you hold a lock against m_DataBuffer so that things
|
|
||||||
// don't go missing after creating the body and clearing the buffer
|
|
||||||
String body = boost::algorithm::join(m_DataBuffer, "\n");
|
String body = boost::algorithm::join(m_DataBuffer, "\n");
|
||||||
m_DataBuffer.clear();
|
m_DataBuffer.clear();
|
||||||
|
|
||||||
|
@ -457,25 +443,27 @@ void InfluxdbWriter::Flush(void)
|
||||||
throw ex;
|
throw ex;
|
||||||
}
|
}
|
||||||
|
|
||||||
//TODO: Evaluate whether waiting for the result makes sense here. KeepAlive and close are options.
|
|
||||||
HttpResponse resp(stream, req);
|
HttpResponse resp(stream, req);
|
||||||
StreamReadContext context;
|
StreamReadContext context;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
resp.Parse(context, true);
|
while (resp.Parse(context, true) && !resp.Complete)
|
||||||
|
; /* Do nothing */
|
||||||
} catch (const std::exception& ex) {
|
} catch (const std::exception& ex) {
|
||||||
Log(LogWarning, "InfluxdbWriter")
|
Log(LogWarning, "InfluxdbWriter")
|
||||||
<< "Cannot read from TCP socket from host '" << GetHost() << "' port '" << GetPort() << "'.";
|
<< "Failed to parse HTTP response from host '" << GetHost() << "' port '" << GetPort() << "': " << DiagnosticInformation(ex);
|
||||||
throw ex;
|
throw ex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!resp.Complete) {
|
||||||
|
Log(LogWarning, "InfluxdbWriter")
|
||||||
|
<< "Failed to read a complete HTTP response from the InfluxDB server.";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (resp.StatusCode != 204) {
|
if (resp.StatusCode != 204) {
|
||||||
Log(LogWarning, "InfluxdbWriter")
|
Log(LogWarning, "InfluxdbWriter")
|
||||||
<< "Unexpected response code " << resp.StatusCode;
|
<< "Unexpected response code: " << resp.StatusCode;
|
||||||
|
|
||||||
// Finish parsing the headers and body
|
|
||||||
while (!resp.Complete)
|
|
||||||
resp.Parse(context, true);
|
|
||||||
|
|
||||||
String contentType = resp.Headers->Get("content-type");
|
String contentType = resp.Headers->Get("content-type");
|
||||||
if (contentType != "application/json") {
|
if (contentType != "application/json") {
|
||||||
|
@ -502,6 +490,8 @@ void InfluxdbWriter::Flush(void)
|
||||||
|
|
||||||
Log(LogCritical, "InfluxdbWriter")
|
Log(LogCritical, "InfluxdbWriter")
|
||||||
<< "InfluxDB error message:\n" << error;
|
<< "InfluxDB error message:\n" << error;
|
||||||
|
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -59,20 +59,16 @@ private:
|
||||||
WorkQueue m_WorkQueue;
|
WorkQueue m_WorkQueue;
|
||||||
Timer::Ptr m_FlushTimer;
|
Timer::Ptr m_FlushTimer;
|
||||||
std::vector<String> m_DataBuffer;
|
std::vector<String> m_DataBuffer;
|
||||||
boost::mutex m_DataBufferMutex;
|
|
||||||
|
|
||||||
void CheckResultHandler(const Checkable::Ptr& checkable, const CheckResult::Ptr& cr);
|
void CheckResultHandler(const Checkable::Ptr& checkable, const CheckResult::Ptr& cr);
|
||||||
void InternalCheckResultHandler(const Checkable::Ptr& checkable, const CheckResult::Ptr& cr);
|
void CheckResultHandlerWQ(const Checkable::Ptr& checkable, const CheckResult::Ptr& cr);
|
||||||
void SendPerfdata(const Dictionary::Ptr& tmpl, const Checkable::Ptr& checkable, const CheckResult::Ptr& cr, double ts);
|
|
||||||
void SendMetric(const Dictionary::Ptr& tmpl, const String& label, const Dictionary::Ptr& fields, double ts);
|
void SendMetric(const Dictionary::Ptr& tmpl, const String& label, const Dictionary::Ptr& fields, double ts);
|
||||||
void FlushTimeout(void);
|
void FlushTimeout(void);
|
||||||
|
void FlushTimeoutWQ(void);
|
||||||
void Flush(void);
|
void Flush(void);
|
||||||
|
|
||||||
static String FormatInteger(int val);
|
static String EscapeKeyOrTagValue(const String& str);
|
||||||
static String FormatBoolean(bool val);
|
static String EscapeValue(const Value& value);
|
||||||
|
|
||||||
static String EscapeKey(const String& str);
|
|
||||||
static String EscapeField(const String& str);
|
|
||||||
|
|
||||||
Stream::Ptr Connect();
|
Stream::Ptr Connect();
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue