mirror of https://github.com/Icinga/icinga2.git
Merge pull request #6970 from Icinga/bugfix/perfdata-gaps
Improve reload handling for features (metric & queue flush, activation priority)
This commit is contained in:
commit
e2df11520e
|
@ -440,6 +440,7 @@ Constant | Description
|
|||
--------------------|-------------------
|
||||
Vars |**Read-write.** Contains a dictionary with global custom attributes. Not set by default.
|
||||
NodeName |**Read-write.** Contains the cluster node name. Set to the local hostname by default.
|
||||
ReloadTimeout |**Read-write.** Defines the reload timeout for child processes. Defaults to `300s`.
|
||||
Environment |**Read-write.** The name of the Icinga environment. Included in the SNI host name for outbound connections. Not set by default.
|
||||
RunAsUser |**Read-write.** Defines the user the Icinga 2 daemon is running as. Set in the Icinga 2 sysconfig.
|
||||
RunAsGroup |**Read-write.** Defines the group the Icinga 2 daemon is running as. Set in the Icinga 2 sysconfig.
|
||||
|
|
|
@ -60,6 +60,14 @@ void Application::OnConfigLoaded()
|
|||
|
||||
ASSERT(m_Instance == nullptr);
|
||||
m_Instance = this;
|
||||
|
||||
String reloadTimeout;
|
||||
|
||||
if (ScriptGlobal::Exists("ReloadTimeout"))
|
||||
reloadTimeout = ScriptGlobal::Get("ReloadTimeout");
|
||||
|
||||
if (!reloadTimeout.IsEmpty())
|
||||
Configuration::ReloadTimeout = Convert::ToDouble(reloadTimeout);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -384,8 +392,6 @@ static void ReloadProcessCallback(const ProcessResult& pr)
|
|||
|
||||
pid_t Application::StartReloadProcess()
|
||||
{
|
||||
Log(LogInformation, "Application", "Got reload command: Starting new instance.");
|
||||
|
||||
// prepare arguments
|
||||
ArrayData args;
|
||||
args.push_back(GetExePath(m_ArgV[0]));
|
||||
|
@ -405,9 +411,14 @@ pid_t Application::StartReloadProcess()
|
|||
#endif /* _WIN32 */
|
||||
|
||||
Process::Ptr process = new Process(Process::PrepareCommand(new Array(std::move(args))));
|
||||
process->SetTimeout(300);
|
||||
process->SetTimeout(Configuration::ReloadTimeout);
|
||||
process->Run(&ReloadProcessCallback);
|
||||
|
||||
Log(LogInformation, "Application")
|
||||
<< "Got reload command: Started new instance with PID '"
|
||||
<< (unsigned long)(process->GetPID()) << "' (timeout is "
|
||||
<< Configuration::ReloadTimeout << "s).";
|
||||
|
||||
return process->GetPID();
|
||||
}
|
||||
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
#include "base/workqueue.hpp"
|
||||
#include "base/context.hpp"
|
||||
#include "base/application.hpp"
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <boost/exception/errinfo_api_function.hpp>
|
||||
#include <boost/exception/errinfo_errno.hpp>
|
||||
|
@ -601,10 +600,12 @@ void ConfigObject::RestoreObjects(const String& filename, int attributeTypes)
|
|||
|
||||
void ConfigObject::StopObjects()
|
||||
{
|
||||
auto types = Type::GetAllTypes();
|
||||
std::vector<Type::Ptr> types = Type::GetAllTypes();
|
||||
|
||||
std::sort(types.begin(), types.end(), [](const Type::Ptr& a, const Type::Ptr& b) {
|
||||
return a->GetActivationPriority() > b->GetActivationPriority();
|
||||
if (a->GetActivationPriority() > b->GetActivationPriority())
|
||||
return true;
|
||||
return false;
|
||||
});
|
||||
|
||||
for (const Type::Ptr& type : types) {
|
||||
|
|
|
@ -25,6 +25,7 @@ String Configuration::PidPath;
|
|||
String Configuration::PkgDataDir;
|
||||
String Configuration::PrefixDir;
|
||||
String Configuration::ProgramData;
|
||||
double Configuration::ReloadTimeout{300};
|
||||
int Configuration::RLimitFiles;
|
||||
int Configuration::RLimitProcesses;
|
||||
int Configuration::RLimitStack;
|
||||
|
@ -223,6 +224,16 @@ void Configuration::SetProgramData(const String& val, bool suppress_events, cons
|
|||
HandleUserWrite("ProgramData", &Configuration::ProgramData, val, m_ReadOnly);
|
||||
}
|
||||
|
||||
double Configuration::GetReloadTimeout() const
|
||||
{
|
||||
return Configuration::ReloadTimeout;
|
||||
}
|
||||
|
||||
void Configuration::SetReloadTimeout(double val, bool suppress_events, const Value& cookie)
|
||||
{
|
||||
HandleUserWrite("ReloadTimeout", &Configuration::ReloadTimeout, val, m_ReadOnly);
|
||||
}
|
||||
|
||||
int Configuration::GetRLimitFiles() const
|
||||
{
|
||||
return Configuration::RLimitFiles;
|
||||
|
|
|
@ -70,6 +70,9 @@ public:
|
|||
String GetProgramData() const override;
|
||||
void SetProgramData(const String& value, bool suppress_events = false, const Value& cookie = Empty) override;
|
||||
|
||||
double GetReloadTimeout() const override;
|
||||
void SetReloadTimeout(double value, bool suppress_events = false, const Value& cookie = Empty) override;
|
||||
|
||||
int GetRLimitFiles() const override;
|
||||
void SetRLimitFiles(int value, bool suppress_events = false, const Value& cookie = Empty) override;
|
||||
|
||||
|
@ -130,6 +133,7 @@ public:
|
|||
static String PkgDataDir;
|
||||
static String PrefixDir;
|
||||
static String ProgramData;
|
||||
static double ReloadTimeout;
|
||||
static int RLimitFiles;
|
||||
static int RLimitProcesses;
|
||||
static int RLimitStack;
|
||||
|
|
|
@ -94,6 +94,11 @@ abstract class Configuration
|
|||
set;
|
||||
};
|
||||
|
||||
[config, no_storage, virtual] double ReloadTimeout {
|
||||
get;
|
||||
set;
|
||||
};
|
||||
|
||||
[config, no_storage, virtual] int RLimitFiles {
|
||||
get;
|
||||
set;
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
#include "icinga/cib.hpp"
|
||||
#include "remote/apilistener.hpp"
|
||||
#include "base/configtype.hpp"
|
||||
#include "base/defer.hpp"
|
||||
#include "base/objectlock.hpp"
|
||||
#include "base/utility.hpp"
|
||||
#include "base/perfdatavalue.hpp"
|
||||
|
@ -57,7 +56,6 @@ void CheckerComponent::Start(bool runtimeCreated)
|
|||
Log(LogInformation, "CheckerComponent")
|
||||
<< "'" << GetName() << "' started.";
|
||||
|
||||
m_RunningChecks.store(0);
|
||||
|
||||
m_Thread = std::thread(std::bind(&CheckerComponent::CheckThreadProc, this));
|
||||
|
||||
|
@ -75,13 +73,32 @@ void CheckerComponent::Stop(bool runtimeRemoved)
|
|||
m_CV.notify_all();
|
||||
}
|
||||
|
||||
double wait = 0.0;
|
||||
|
||||
while (GetPendingCheckables() > 0) {
|
||||
Log(LogDebug, "CheckerComponent")
|
||||
<< "Waiting for running checks (" << GetPendingCheckables()
|
||||
<< ") to finish. Waited for " << wait << " seconds now.";
|
||||
|
||||
Utility::Sleep(0.1);
|
||||
wait += 0.1;
|
||||
|
||||
/* Pick a timeout slightly shorther than the process reload timeout. */
|
||||
double waitMax = Configuration::ReloadTimeout - 30;
|
||||
if (waitMax <= 0)
|
||||
waitMax = 1;
|
||||
|
||||
if (wait > waitMax) {
|
||||
Log(LogWarning, "CheckerComponent")
|
||||
<< "Checks running too long for " << wait
|
||||
<< " seconds, hard shutdown before reload timeout: " << Configuration::ReloadTimeout << ".";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
m_ResultTimer->Stop();
|
||||
m_Thread.join();
|
||||
|
||||
while (m_RunningChecks.load()) {
|
||||
Utility::Sleep(1.0 / 60.0);
|
||||
}
|
||||
|
||||
Log(LogInformation, "CheckerComponent")
|
||||
<< "'" << GetName() << "' stopped.";
|
||||
|
||||
|
@ -196,8 +213,6 @@ void CheckerComponent::CheckThreadProc()
|
|||
|
||||
Checkable::IncreasePendingChecks();
|
||||
|
||||
m_RunningChecks.fetch_add(1);
|
||||
|
||||
Utility::QueueAsyncCallback(std::bind(&CheckerComponent::ExecuteCheckHelper, CheckerComponent::Ptr(this), checkable));
|
||||
|
||||
lock.lock();
|
||||
|
@ -206,10 +221,6 @@ void CheckerComponent::CheckThreadProc()
|
|||
|
||||
void CheckerComponent::ExecuteCheckHelper(const Checkable::Ptr& checkable)
|
||||
{
|
||||
Defer decrementRunningChecks ([this]{
|
||||
m_RunningChecks.fetch_sub(1);
|
||||
});
|
||||
|
||||
try {
|
||||
checkable->ExecuteCheck();
|
||||
} catch (const std::exception& ex) {
|
||||
|
|
|
@ -8,13 +8,11 @@
|
|||
#include "base/configobject.hpp"
|
||||
#include "base/timer.hpp"
|
||||
#include "base/utility.hpp"
|
||||
#include <atomic>
|
||||
#include <boost/thread/mutex.hpp>
|
||||
#include <boost/thread/condition_variable.hpp>
|
||||
#include <boost/multi_index_container.hpp>
|
||||
#include <boost/multi_index/ordered_index.hpp>
|
||||
#include <boost/multi_index/key_extractors.hpp>
|
||||
#include <cstdint>
|
||||
#include <thread>
|
||||
|
||||
namespace icinga
|
||||
|
@ -75,7 +73,6 @@ private:
|
|||
boost::condition_variable m_CV;
|
||||
bool m_Stopped{false};
|
||||
std::thread m_Thread;
|
||||
std::atomic<uintmax_t> m_RunningChecks;
|
||||
|
||||
CheckableSet m_IdleCheckables;
|
||||
CheckableSet m_PendingCheckables;
|
||||
|
|
|
@ -9,7 +9,7 @@ namespace icinga
|
|||
|
||||
class CheckerComponent : ConfigObject
|
||||
{
|
||||
activation_priority 100;
|
||||
activation_priority 300;
|
||||
|
||||
[config, no_storage] int concurrent_checks {
|
||||
get {{{
|
||||
|
|
|
@ -183,9 +183,6 @@ void ServiceDbObject::OnConfigUpdateHeavy()
|
|||
DbObject::OnMultipleQueries(queries);
|
||||
|
||||
/* service dependencies */
|
||||
Log(LogDebug, "ServiceDbObject")
|
||||
<< "service dependencies for '" << service->GetName() << "'";
|
||||
|
||||
queries.clear();
|
||||
|
||||
DbQuery query2;
|
||||
|
@ -233,9 +230,6 @@ void ServiceDbObject::OnConfigUpdateHeavy()
|
|||
DbObject::OnMultipleQueries(queries);
|
||||
|
||||
/* service contacts, contactgroups */
|
||||
Log(LogDebug, "ServiceDbObject")
|
||||
<< "service contacts: " << service->GetName();
|
||||
|
||||
queries.clear();
|
||||
|
||||
DbQuery query3;
|
||||
|
@ -248,9 +242,6 @@ void ServiceDbObject::OnConfigUpdateHeavy()
|
|||
queries.emplace_back(std::move(query3));
|
||||
|
||||
for (const User::Ptr& user : CompatUtility::GetCheckableNotificationUsers(service)) {
|
||||
Log(LogDebug, "ServiceDbObject")
|
||||
<< "service contacts: " << user->GetName();
|
||||
|
||||
DbQuery query_contact;
|
||||
query_contact.Table = GetType()->GetTable() + "_contacts";
|
||||
query_contact.Type = DbQueryInsert;
|
||||
|
@ -266,9 +257,6 @@ void ServiceDbObject::OnConfigUpdateHeavy()
|
|||
|
||||
DbObject::OnMultipleQueries(queries);
|
||||
|
||||
Log(LogDebug, "ServiceDbObject")
|
||||
<< "service contactgroups: " << service->GetName();
|
||||
|
||||
queries.clear();
|
||||
|
||||
DbQuery query4;
|
||||
|
@ -281,9 +269,6 @@ void ServiceDbObject::OnConfigUpdateHeavy()
|
|||
queries.emplace_back(std::move(query4));
|
||||
|
||||
for (const UserGroup::Ptr& usergroup : CompatUtility::GetCheckableNotificationUserGroups(service)) {
|
||||
Log(LogDebug, "ServiceDbObject")
|
||||
<< "service contactgroups: " << usergroup->GetName();
|
||||
|
||||
DbQuery query_contact;
|
||||
query_contact.Table = GetType()->GetTable() + "_contactgroups";
|
||||
query_contact.Type = DbQueryInsert;
|
||||
|
|
|
@ -90,9 +90,6 @@ void IdoMysqlConnection::Resume()
|
|||
|
||||
void IdoMysqlConnection::Pause()
|
||||
{
|
||||
Log(LogInformation, "IdoMysqlConnection")
|
||||
<< "'" << GetName() << "' paused.";
|
||||
|
||||
m_ReconnectTimer.reset();
|
||||
|
||||
DbConnection::Pause();
|
||||
|
@ -102,8 +99,12 @@ void IdoMysqlConnection::Pause()
|
|||
<< "Rescheduling disconnect task.";
|
||||
#endif /* I2_DEBUG */
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::Disconnect, this), PriorityHigh);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::Disconnect, this), PriorityLow);
|
||||
m_QueryQueue.Join();
|
||||
|
||||
Log(LogInformation, "IdoMysqlConnection")
|
||||
<< "'" << GetName() << "' paused.";
|
||||
|
||||
}
|
||||
|
||||
void IdoMysqlConnection::ExceptionHandler(boost::exception_ptr exp)
|
||||
|
@ -175,7 +176,7 @@ void IdoMysqlConnection::ReconnectTimerHandler()
|
|||
<< "Scheduling reconnect task.";
|
||||
#endif /* I2_DEBUG */
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::Reconnect, this), PriorityLow);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::Reconnect, this), PriorityHigh);
|
||||
}
|
||||
|
||||
void IdoMysqlConnection::Reconnect()
|
||||
|
@ -434,9 +435,9 @@ void IdoMysqlConnection::Reconnect()
|
|||
<< "Scheduling session table clear and finish connect task.";
|
||||
#endif /* I2_DEBUG */
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::ClearTablesBySession, this), PriorityLow);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::ClearTablesBySession, this), PriorityHigh);
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::FinishConnect, this, startTime), PriorityLow);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::FinishConnect, this, startTime), PriorityHigh);
|
||||
}
|
||||
|
||||
void IdoMysqlConnection::FinishConnect(double startTime)
|
||||
|
@ -709,7 +710,7 @@ void IdoMysqlConnection::ActivateObject(const DbObject::Ptr& dbobj)
|
|||
<< "Scheduling object activation task for '" << dbobj->GetName1() << "!" << dbobj->GetName2() << "'.";
|
||||
#endif /* I2_DEBUG */
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalActivateObject, this, dbobj), PriorityLow);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalActivateObject, this, dbobj), PriorityHigh);
|
||||
}
|
||||
|
||||
void IdoMysqlConnection::InternalActivateObject(const DbObject::Ptr& dbobj)
|
||||
|
@ -754,7 +755,7 @@ void IdoMysqlConnection::DeactivateObject(const DbObject::Ptr& dbobj)
|
|||
<< "Scheduling object deactivation task for '" << dbobj->GetName1() << "!" << dbobj->GetName2() << "'.";
|
||||
#endif /* I2_DEBUG */
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalDeactivateObject, this, dbobj), PriorityLow);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalDeactivateObject, this, dbobj), PriorityHigh);
|
||||
}
|
||||
|
||||
void IdoMysqlConnection::InternalDeactivateObject(const DbObject::Ptr& dbobj)
|
||||
|
|
|
@ -97,15 +97,16 @@ void IdoPgsqlConnection::Resume()
|
|||
|
||||
void IdoPgsqlConnection::Pause()
|
||||
{
|
||||
Log(LogInformation, "IdoPgsqlConnection")
|
||||
<< "'" << GetName() << "' paused.";
|
||||
|
||||
m_ReconnectTimer.reset();
|
||||
|
||||
DbConnection::Pause();
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::Disconnect, this), PriorityHigh);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::Disconnect, this), PriorityLow);
|
||||
m_QueryQueue.Join();
|
||||
|
||||
Log(LogInformation, "IdoPgsqlConnection")
|
||||
<< "'" << GetName() << "' paused.";
|
||||
|
||||
}
|
||||
|
||||
void IdoPgsqlConnection::ExceptionHandler(boost::exception_ptr exp)
|
||||
|
@ -165,7 +166,7 @@ void IdoPgsqlConnection::InternalNewTransaction()
|
|||
|
||||
void IdoPgsqlConnection::ReconnectTimerHandler()
|
||||
{
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::Reconnect, this), PriorityLow);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::Reconnect, this), PriorityHigh);
|
||||
}
|
||||
|
||||
void IdoPgsqlConnection::Reconnect()
|
||||
|
@ -408,9 +409,9 @@ void IdoPgsqlConnection::Reconnect()
|
|||
|
||||
UpdateAllObjects();
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::ClearTablesBySession, this), PriorityLow);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::ClearTablesBySession, this), PriorityHigh);
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::FinishConnect, this, startTime), PriorityLow);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::FinishConnect, this, startTime), PriorityHigh);
|
||||
}
|
||||
|
||||
void IdoPgsqlConnection::FinishConnect(double startTime)
|
||||
|
@ -558,7 +559,7 @@ void IdoPgsqlConnection::ActivateObject(const DbObject::Ptr& dbobj)
|
|||
if (IsPaused())
|
||||
return;
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::InternalActivateObject, this, dbobj), PriorityLow);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::InternalActivateObject, this, dbobj), PriorityHigh);
|
||||
}
|
||||
|
||||
void IdoPgsqlConnection::InternalActivateObject(const DbObject::Ptr& dbobj)
|
||||
|
@ -595,7 +596,7 @@ void IdoPgsqlConnection::DeactivateObject(const DbObject::Ptr& dbobj)
|
|||
if (IsPaused())
|
||||
return;
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::InternalDeactivateObject, this, dbobj), PriorityLow);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::InternalDeactivateObject, this, dbobj), PriorityHigh);
|
||||
}
|
||||
|
||||
void IdoPgsqlConnection::InternalDeactivateObject(const DbObject::Ptr& dbobj)
|
||||
|
|
|
@ -20,6 +20,8 @@ public:
|
|||
|
||||
class Downtime : ConfigObject < DowntimeNameComposer
|
||||
{
|
||||
activation_priority -10;
|
||||
|
||||
load_after Host;
|
||||
load_after Service;
|
||||
|
||||
|
|
|
@ -9,6 +9,8 @@ namespace icinga
|
|||
|
||||
class IcingaApplication : Application
|
||||
{
|
||||
activation_priority -50;
|
||||
|
||||
[config, no_storage, virtual] String environment {
|
||||
get;
|
||||
set;
|
||||
|
|
|
@ -9,7 +9,7 @@ namespace icinga
|
|||
|
||||
class NotificationComponent : ConfigObject
|
||||
{
|
||||
activation_priority 100;
|
||||
activation_priority 200;
|
||||
|
||||
[config] bool enable_ha (EnableHA) {
|
||||
default {{{ return true; }}}
|
||||
|
|
|
@ -87,13 +87,16 @@ void ElasticsearchWriter::Resume()
|
|||
Checkable::OnNotificationSentToAllUsers.connect(std::bind(&ElasticsearchWriter::NotificationSentToAllUsersHandler, this, _1, _2, _3, _4, _5, _6, _7));
|
||||
}
|
||||
|
||||
/* Pause is equivalent to Stop, but with HA capabilities to resume at runtime. */
|
||||
void ElasticsearchWriter::Pause()
|
||||
{
|
||||
Flush();
|
||||
m_WorkQueue.Join();
|
||||
Flush();
|
||||
|
||||
Log(LogInformation, "ElasticsearchWriter")
|
||||
<< "'" << GetName() << "' paused.";
|
||||
|
||||
m_WorkQueue.Join();
|
||||
|
||||
ObjectImpl<ElasticsearchWriter>::Pause();
|
||||
}
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ namespace icinga
|
|||
|
||||
class ElasticsearchWriter : ConfigObject
|
||||
{
|
||||
activation_priority 80;
|
||||
activation_priority 100;
|
||||
|
||||
[config, required] String host {
|
||||
default {{{ return "127.0.0.1"; }}}
|
||||
|
|
|
@ -90,12 +90,26 @@ void GelfWriter::Resume()
|
|||
Checkable::OnStateChange.connect(std::bind(&GelfWriter::StateChangeHandler, this, _1, _2, _3));
|
||||
}
|
||||
|
||||
/* Pause is equivalent to Stop, but with HA capabilities to resume at runtime. */
|
||||
void GelfWriter::Pause()
|
||||
{
|
||||
Log(LogInformation, "GelfWriter")
|
||||
<< "'" << GetName() << "' paused.";
|
||||
m_ReconnectTimer.reset();
|
||||
|
||||
try {
|
||||
ReconnectInternal();
|
||||
} catch (const std::exception&) {
|
||||
Log(LogInformation, "GelfWriter")
|
||||
<< "'" << GetName() << "' paused. Unable to connect, not flushing buffers. Data may be lost on reload.";
|
||||
|
||||
ObjectImpl<GelfWriter>::Pause();
|
||||
return;
|
||||
}
|
||||
|
||||
m_WorkQueue.Join();
|
||||
DisconnectInternal();
|
||||
|
||||
Log(LogInformation, "GraphiteWriter")
|
||||
<< "'" << GetName() << "' paused.";
|
||||
|
||||
ObjectImpl<GelfWriter>::Pause();
|
||||
}
|
||||
|
@ -128,6 +142,11 @@ void GelfWriter::Reconnect()
|
|||
return;
|
||||
}
|
||||
|
||||
ReconnectInternal();
|
||||
}
|
||||
|
||||
void GelfWriter::ReconnectInternal()
|
||||
{
|
||||
double startTime = Utility::GetTime();
|
||||
|
||||
CONTEXT("Reconnecting to Graylog Gelf '" + GetName() + "'");
|
||||
|
@ -167,6 +186,11 @@ void GelfWriter::Disconnect()
|
|||
{
|
||||
AssertOnWorkQueue();
|
||||
|
||||
DisconnectInternal();
|
||||
}
|
||||
|
||||
void GelfWriter::DisconnectInternal()
|
||||
{
|
||||
if (!GetConnected())
|
||||
return;
|
||||
|
||||
|
|
|
@ -55,7 +55,9 @@ private:
|
|||
void ReconnectTimerHandler();
|
||||
|
||||
void Disconnect();
|
||||
void DisconnectInternal();
|
||||
void Reconnect();
|
||||
void ReconnectInternal();
|
||||
|
||||
void AssertOnWorkQueue();
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ namespace icinga
|
|||
|
||||
class GelfWriter : ConfigObject
|
||||
{
|
||||
activation_priority 80;
|
||||
activation_priority 100;
|
||||
|
||||
[config] String host {
|
||||
default {{{ return "127.0.0.1"; }}}
|
||||
|
|
|
@ -85,12 +85,26 @@ void GraphiteWriter::Resume()
|
|||
Checkable::OnNewCheckResult.connect(std::bind(&GraphiteWriter::CheckResultHandler, this, _1, _2));
|
||||
}
|
||||
|
||||
/* Pause is equivalent to Stop, but with HA capabilities to resume at runtime. */
|
||||
void GraphiteWriter::Pause()
|
||||
{
|
||||
Log(LogInformation, "GraphiteWriter")
|
||||
<< "'" << GetName() << "' paused.";
|
||||
m_ReconnectTimer.reset();
|
||||
|
||||
try {
|
||||
ReconnectInternal();
|
||||
} catch (const std::exception&) {
|
||||
Log(LogInformation, "GraphiteWriter")
|
||||
<< "'" << GetName() << "' paused. Unable to connect, not flushing buffers. Data may be lost on reload.";
|
||||
|
||||
ObjectImpl<GraphiteWriter>::Pause();
|
||||
return;
|
||||
}
|
||||
|
||||
m_WorkQueue.Join();
|
||||
DisconnectInternal();
|
||||
|
||||
Log(LogInformation, "GraphiteWriter")
|
||||
<< "'" << GetName() << "' paused.";
|
||||
|
||||
ObjectImpl<GraphiteWriter>::Pause();
|
||||
}
|
||||
|
@ -123,6 +137,11 @@ void GraphiteWriter::Reconnect()
|
|||
return;
|
||||
}
|
||||
|
||||
ReconnectInternal();
|
||||
}
|
||||
|
||||
void GraphiteWriter::ReconnectInternal()
|
||||
{
|
||||
double startTime = Utility::GetTime();
|
||||
|
||||
CONTEXT("Reconnecting to Graphite '" + GetName() + "'");
|
||||
|
@ -155,6 +174,9 @@ void GraphiteWriter::Reconnect()
|
|||
|
||||
void GraphiteWriter::ReconnectTimerHandler()
|
||||
{
|
||||
if (IsPaused())
|
||||
return;
|
||||
|
||||
m_WorkQueue.Enqueue(std::bind(&GraphiteWriter::Reconnect, this), PriorityNormal);
|
||||
}
|
||||
|
||||
|
@ -162,6 +184,11 @@ void GraphiteWriter::Disconnect()
|
|||
{
|
||||
AssertOnWorkQueue();
|
||||
|
||||
DisconnectInternal();
|
||||
}
|
||||
|
||||
void GraphiteWriter::DisconnectInternal()
|
||||
{
|
||||
if (!GetConnected())
|
||||
return;
|
||||
|
||||
|
@ -184,6 +211,10 @@ void GraphiteWriter::CheckResultHandlerInternal(const Checkable::Ptr& checkable,
|
|||
|
||||
CONTEXT("Processing check result for '" + checkable->GetName() + "'");
|
||||
|
||||
/* TODO: Deal with missing connection here. Needs refactoring
|
||||
* into parsing the actual performance data and then putting it
|
||||
* into a queue for re-inserting. */
|
||||
|
||||
if (!IcingaApplication::GetInstance()->GetEnablePerfdata() || !checkable->GetEnablePerfdata())
|
||||
return;
|
||||
|
||||
|
@ -276,7 +307,7 @@ void GraphiteWriter::SendMetric(const String& prefix, const String& name, double
|
|||
msgbuf << prefix << "." << name << " " << Convert::ToString(value) << " " << static_cast<long>(ts);
|
||||
|
||||
Log(LogDebug, "GraphiteWriter")
|
||||
<< "Add to metric list:'" << msgbuf.str() << "'.";
|
||||
<< "Add to metric list: '" << msgbuf.str() << "'.";
|
||||
|
||||
// do not send \n to debug log
|
||||
msgbuf << "\n";
|
||||
|
|
|
@ -54,7 +54,9 @@ private:
|
|||
void ReconnectTimerHandler();
|
||||
|
||||
void Disconnect();
|
||||
void DisconnectInternal();
|
||||
void Reconnect();
|
||||
void ReconnectInternal();
|
||||
|
||||
void AssertOnWorkQueue();
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ namespace icinga
|
|||
|
||||
class GraphiteWriter : ConfigObject
|
||||
{
|
||||
activation_priority 80;
|
||||
activation_priority 100;
|
||||
|
||||
[config] String host {
|
||||
default {{{ return "127.0.0.1"; }}}
|
||||
|
|
|
@ -113,24 +113,33 @@ void InfluxdbWriter::Resume()
|
|||
Checkable::OnNewCheckResult.connect(std::bind(&InfluxdbWriter::CheckResultHandler, this, _1, _2));
|
||||
}
|
||||
|
||||
/* Pause is equivalent to Stop, but with HA capabilities to resume at runtime. */
|
||||
void InfluxdbWriter::Pause()
|
||||
{
|
||||
/* Force a flush. */
|
||||
Log(LogDebug, "InfluxdbWriter")
|
||||
<< "Flushing pending data buffers.";
|
||||
|
||||
Flush();
|
||||
|
||||
/* Work on the missing tasks. TODO: Find a way to cache them on disk. */
|
||||
Log(LogDebug, "InfluxdbWriter")
|
||||
<< "Joining existing WQ tasks.";
|
||||
|
||||
m_WorkQueue.Join();
|
||||
|
||||
/* Flush again after the WQ tasks have filled the data buffer. */
|
||||
Log(LogDebug, "InfluxdbWriter")
|
||||
<< "Flushing data buffers from WQ tasks.";
|
||||
|
||||
Flush();
|
||||
|
||||
Log(LogInformation, "InfluxdbWriter")
|
||||
<< "'" << GetName() << "' paused.";
|
||||
|
||||
m_WorkQueue.Join();
|
||||
|
||||
ObjectImpl<InfluxdbWriter>::Pause();
|
||||
}
|
||||
|
||||
void InfluxdbWriter::Stop(bool runtimeDeleted)
|
||||
{
|
||||
FlushTimeout();
|
||||
m_WorkQueue.Join();
|
||||
|
||||
ObjectImpl<InfluxdbWriter>::Stop(runtimeDeleted);
|
||||
}
|
||||
|
||||
void InfluxdbWriter::AssertOnWorkQueue()
|
||||
{
|
||||
ASSERT(m_WorkQueue.IsWorkerThread());
|
||||
|
@ -419,6 +428,9 @@ void InfluxdbWriter::FlushTimeoutWQ()
|
|||
|
||||
void InfluxdbWriter::Flush()
|
||||
{
|
||||
Log(LogDebug, "InfluxdbWriter")
|
||||
<< "Flushing data buffer to InfluxDB.";
|
||||
|
||||
String body = boost::algorithm::join(m_DataBuffer, "\n");
|
||||
m_DataBuffer.clear();
|
||||
|
||||
|
|
|
@ -34,7 +34,6 @@ protected:
|
|||
void OnConfigLoaded() override;
|
||||
void Resume() override;
|
||||
void Pause() override;
|
||||
void Stop(bool runtimeDeleted) override;
|
||||
|
||||
private:
|
||||
WorkQueue m_WorkQueue{10000000, 1};
|
||||
|
|
|
@ -9,7 +9,7 @@ namespace icinga
|
|||
|
||||
class InfluxdbWriter : ConfigObject
|
||||
{
|
||||
activation_priority 80;
|
||||
activation_priority 100;
|
||||
|
||||
[config, required] String host {
|
||||
default {{{ return "127.0.0.1"; }}}
|
||||
|
|
|
@ -68,8 +68,11 @@ void OpenTsdbWriter::Resume()
|
|||
Service::OnNewCheckResult.connect(std::bind(&OpenTsdbWriter::CheckResultHandler, this, _1, _2));
|
||||
}
|
||||
|
||||
/* Pause is equivalent to Stop, but with HA capabilities to resume at runtime. */
|
||||
void OpenTsdbWriter::Pause()
|
||||
{
|
||||
m_ReconnectTimer.reset();
|
||||
|
||||
Log(LogInformation, "OpentsdbWriter")
|
||||
<< "'" << GetName() << "' paused.";
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ namespace icinga
|
|||
|
||||
class OpenTsdbWriter : ConfigObject
|
||||
{
|
||||
activation_priority 80;
|
||||
activation_priority 100;
|
||||
|
||||
[config] String host {
|
||||
default {{{ return "127.0.0.1"; }}}
|
||||
|
|
|
@ -66,6 +66,16 @@ void PerfdataWriter::Resume()
|
|||
|
||||
void PerfdataWriter::Pause()
|
||||
{
|
||||
m_RotationTimer.reset();
|
||||
|
||||
#ifdef I2_DEBUG
|
||||
//m_HostOutputFile << "\n# Pause the feature" << "\n\n";
|
||||
//m_ServiceOutputFile << "\n# Pause the feature" << "\n\n";
|
||||
#endif /* I2_DEBUG */
|
||||
|
||||
/* Force a rotation closing the file stream. */
|
||||
RotateAllFiles();
|
||||
|
||||
Log(LogInformation, "PerfdataWriter")
|
||||
<< "'" << GetName() << "' paused.";
|
||||
|
||||
|
@ -108,7 +118,8 @@ void PerfdataWriter::CheckResultHandler(const Checkable::Ptr& checkable, const C
|
|||
String line = MacroProcessor::ResolveMacros(GetServiceFormatTemplate(), resolvers, cr, nullptr, &PerfdataWriter::EscapeMacroMetric);
|
||||
|
||||
{
|
||||
ObjectLock olock(this);
|
||||
boost::mutex::scoped_lock lock(m_StreamMutex);
|
||||
|
||||
if (!m_ServiceOutputFile.good())
|
||||
return;
|
||||
|
||||
|
@ -118,7 +129,8 @@ void PerfdataWriter::CheckResultHandler(const Checkable::Ptr& checkable, const C
|
|||
String line = MacroProcessor::ResolveMacros(GetHostFormatTemplate(), resolvers, cr, nullptr, &PerfdataWriter::EscapeMacroMetric);
|
||||
|
||||
{
|
||||
ObjectLock olock(this);
|
||||
boost::mutex::scoped_lock lock(m_StreamMutex);
|
||||
|
||||
if (!m_HostOutputFile.good())
|
||||
return;
|
||||
|
||||
|
@ -129,13 +141,20 @@ void PerfdataWriter::CheckResultHandler(const Checkable::Ptr& checkable, const C
|
|||
|
||||
void PerfdataWriter::RotateFile(std::ofstream& output, const String& temp_path, const String& perfdata_path)
|
||||
{
|
||||
ObjectLock olock(this);
|
||||
Log(LogDebug, "PerfdataWriter")
|
||||
<< "Rotating perfdata files.";
|
||||
|
||||
boost::mutex::scoped_lock lock(m_StreamMutex);
|
||||
|
||||
if (output.good()) {
|
||||
output.close();
|
||||
|
||||
if (Utility::PathExists(temp_path)) {
|
||||
String finalFile = perfdata_path + "." + Convert::ToString((long)Utility::GetTime());
|
||||
|
||||
Log(LogDebug, "PerfdataWriter")
|
||||
<< "Closed output file and renaming into '" << finalFile << "'.";
|
||||
|
||||
if (rename(temp_path.CStr(), finalFile.CStr()) < 0) {
|
||||
BOOST_THROW_EXCEPTION(posix_error()
|
||||
<< boost::errinfo_api_function("rename")
|
||||
|
@ -147,9 +166,10 @@ void PerfdataWriter::RotateFile(std::ofstream& output, const String& temp_path,
|
|||
|
||||
output.open(temp_path.CStr());
|
||||
|
||||
if (!output.good())
|
||||
if (!output.good()) {
|
||||
Log(LogWarning, "PerfdataWriter")
|
||||
<< "Could not open perfdata file '" << temp_path << "' for writing. Perfdata will be lost.";
|
||||
}
|
||||
}
|
||||
|
||||
void PerfdataWriter::RotationTimerHandler()
|
||||
|
@ -157,6 +177,11 @@ void PerfdataWriter::RotationTimerHandler()
|
|||
if (IsPaused())
|
||||
return;
|
||||
|
||||
RotateAllFiles();
|
||||
}
|
||||
|
||||
void PerfdataWriter::RotateAllFiles()
|
||||
{
|
||||
RotateFile(m_ServiceOutputFile, GetServiceTempPath(), GetServicePerfdataPath());
|
||||
RotateFile(m_HostOutputFile, GetHostTempPath(), GetHostPerfdataPath());
|
||||
}
|
||||
|
|
|
@ -34,14 +34,16 @@ protected:
|
|||
void Pause() override;
|
||||
|
||||
private:
|
||||
Timer::Ptr m_RotationTimer;
|
||||
std::ofstream m_ServiceOutputFile;
|
||||
std::ofstream m_HostOutputFile;
|
||||
boost::mutex m_StreamMutex;
|
||||
|
||||
void CheckResultHandler(const Checkable::Ptr& checkable, const CheckResult::Ptr& cr);
|
||||
static Value EscapeMacroMetric(const Value& value);
|
||||
|
||||
Timer::Ptr m_RotationTimer;
|
||||
void RotationTimerHandler();
|
||||
|
||||
std::ofstream m_ServiceOutputFile;
|
||||
std::ofstream m_HostOutputFile;
|
||||
void RotateAllFiles();
|
||||
void RotateFile(std::ofstream& output, const String& temp_path, const String& perfdata_path);
|
||||
};
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ namespace icinga
|
|||
|
||||
class PerfdataWriter : ConfigObject
|
||||
{
|
||||
activation_priority 80;
|
||||
activation_priority 100;
|
||||
|
||||
[config] String host_perfdata_path {
|
||||
default {{{ return Configuration::SpoolDir + "/perfdata/host-perfdata"; }}}
|
||||
|
|
|
@ -218,7 +218,7 @@ void ConfigPackageUtility::AsyncTryActivateStage(const String& packageName, cons
|
|||
args->Add("ActiveStageOverride=" + packageName + ":" + stageName);
|
||||
|
||||
Process::Ptr process = new Process(Process::PrepareCommand(args));
|
||||
process->SetTimeout(300);
|
||||
process->SetTimeout(Configuration::ReloadTimeout);
|
||||
process->Run(std::bind(&TryActivateStageCallback, _1, packageName, stageName, reload));
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue