mirror of
https://github.com/Icinga/icinga2.git
synced 2025-07-22 05:04:30 +02:00
Merge pull request #7062 from Icinga/bugfix/ido-reconnect-failover
Improve DB IDO HA failover behaviour
This commit is contained in:
commit
1e124fd76d
@ -875,7 +875,7 @@ Configuration Attributes:
|
||||
instance\_name | String | **Optional.** Unique identifier for the local Icinga 2 instance. Defaults to `default`.
|
||||
instance\_description | String | **Optional.** Description for the Icinga 2 instance.
|
||||
enable\_ha | Boolean | **Optional.** Enable the high availability functionality. Only valid in a [cluster setup](06-distributed-monitoring.md#distributed-monitoring-high-availability-db-ido). Defaults to `true`.
|
||||
failover\_timeout | Duration | **Optional.** Set the failover timeout in a [HA cluster](06-distributed-monitoring.md#distributed-monitoring-high-availability-db-ido). Must not be lower than 60s. Defaults to `60s`.
|
||||
failover\_timeout | Duration | **Optional.** Set the failover timeout in a [HA cluster](06-distributed-monitoring.md#distributed-monitoring-high-availability-db-ido). Must not be lower than 30s. Defaults to `30s`.
|
||||
cleanup | Dictionary | **Optional.** Dictionary with items for historical table cleanup.
|
||||
categories | Array | **Optional.** Array of information types that should be written to the database.
|
||||
|
||||
@ -924,6 +924,12 @@ by Icinga Web 2 in the table above.
|
||||
In addition to the category flags listed above the `DbCatEverything`
|
||||
flag may be used as a shortcut for listing all flags.
|
||||
|
||||
Runtime Attributes:
|
||||
|
||||
Name | Type | Description
|
||||
----------------------------|-----------------------|-----------------
|
||||
last\_failover | Timestamp | When the last failover happened for this connection (only available with `enable_ha = true`.
|
||||
|
||||
## IdoPgsqlConnection <a id="objecttype-idopgsqlconnection"></a>
|
||||
|
||||
IDO database adapter for PostgreSQL.
|
||||
@ -963,7 +969,7 @@ Configuration Attributes:
|
||||
instance\_name | String | **Optional.** Unique identifier for the local Icinga 2 instance. Defaults to `default`.
|
||||
instance\_description | String | **Optional.** Description for the Icinga 2 instance.
|
||||
enable\_ha | Boolean | **Optional.** Enable the high availability functionality. Only valid in a [cluster setup](06-distributed-monitoring.md#distributed-monitoring-high-availability-db-ido). Defaults to `true`.
|
||||
failover\_timeout | Duration | **Optional.** Set the failover timeout in a [HA cluster](06-distributed-monitoring.md#distributed-monitoring-high-availability-db-ido). Must not be lower than 60s. Defaults to `60s`.
|
||||
failover\_timeout | Duration | **Optional.** Set the failover timeout in a [HA cluster](06-distributed-monitoring.md#distributed-monitoring-high-availability-db-ido). Must not be lower than 30s. Defaults to `30s`.
|
||||
cleanup | Dictionary | **Optional.** Dictionary with items for historical table cleanup.
|
||||
categories | Array | **Optional.** Array of information types that should be written to the database.
|
||||
|
||||
@ -1012,6 +1018,12 @@ by Icinga Web 2 in the table above.
|
||||
In addition to the category flags listed above the `DbCatEverything`
|
||||
flag may be used as a shortcut for listing all flags.
|
||||
|
||||
Runtime Attributes:
|
||||
|
||||
Name | Type | Description
|
||||
----------------------------|-----------------------|-----------------
|
||||
last\_failover | Timestamp | When the last failover happened for this connection (only available with `enable_ha = true`.
|
||||
|
||||
## InfluxdbWriter <a id="objecttype-influxdbwriter"></a>
|
||||
|
||||
Writes check result metrics and performance data to a defined InfluxDB host.
|
||||
|
@ -28,6 +28,19 @@ This affects the following features:
|
||||
* [OpenTsdb](09-object-types.md#objecttype-opentsdbwriter)
|
||||
* [Perfdata](09-object-types.md#objecttype-perfdatawriter) (for PNP)
|
||||
|
||||
### HA Failover <a id="upgrading-to-2-11-ha-failover"></a>
|
||||
|
||||
The reconnect failover has been improved, and the default `failover_timeout`
|
||||
for the DB IDO features has been lowered from 60 to 30 seconds.
|
||||
Object authority updates (required for balancing in the cluster) happen
|
||||
more frequenty (was 30, is 10 seconds).
|
||||
Also the cold startup without object authority updates has been reduced
|
||||
from 60 to 30 seconds. This is to allow cluster reconnects (lowered from 60s to 10s in 2.10)
|
||||
before actually considering a failover/split brain scenario.
|
||||
|
||||
The [IdoMysqlConnection](09-object-types.md#objecttype-idomysqlconnection) and [IdoPgsqlConnection](09-object-types.md#objecttype-idopgsqlconnection)
|
||||
objects provide a new attribute named `last_failover` which shows the last failover timestamp.
|
||||
This value also is available in the [ido](10-icinga-template-library.md#itl-icinga-ido) CheckCommand output.
|
||||
|
||||
### Package Dependencies <a id="upgrading-to-2-11-package-dependencies"></a>
|
||||
|
||||
|
@ -19,9 +19,10 @@ namespace icinga
|
||||
|
||||
enum WorkQueuePriority
|
||||
{
|
||||
PriorityLow,
|
||||
PriorityNormal,
|
||||
PriorityHigh
|
||||
PriorityLow = 0,
|
||||
PriorityNormal = 1,
|
||||
PriorityHigh = 2,
|
||||
PriorityImmediate = 4
|
||||
};
|
||||
|
||||
using TaskFunction = std::function<void ()>;
|
||||
|
@ -435,8 +435,8 @@ void DbConnection::ValidateFailoverTimeout(const Lazy<double>& lvalue, const Val
|
||||
{
|
||||
ObjectImpl<DbConnection>::ValidateFailoverTimeout(lvalue, utils);
|
||||
|
||||
if (lvalue() < 60)
|
||||
BOOST_THROW_EXCEPTION(ValidationError(this, { "failover_timeout" }, "Failover timeout minimum is 60s."));
|
||||
if (lvalue() < 30)
|
||||
BOOST_THROW_EXCEPTION(ValidationError(this, { "failover_timeout" }, "Failover timeout minimum is 30s."));
|
||||
}
|
||||
|
||||
void DbConnection::ValidateCategories(const Lazy<Array::Ptr>& lvalue, const ValidationUtils& utils)
|
||||
|
@ -42,9 +42,11 @@ abstract class DbConnection : ConfigObject
|
||||
};
|
||||
|
||||
[config] double failover_timeout {
|
||||
default {{{ return 60; }}}
|
||||
default {{{ return 30; }}}
|
||||
};
|
||||
|
||||
[state, no_user_modify] double last_failover;
|
||||
|
||||
[no_user_modify] String schema_version;
|
||||
[no_user_modify] bool connected;
|
||||
[no_user_modify] bool should_connect {
|
||||
|
@ -139,6 +139,12 @@ void IdoCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckResult
|
||||
cr->SetState(ServiceOK);
|
||||
}
|
||||
|
||||
if (conn->GetEnableHa()) {
|
||||
double failoverTs = conn->GetLastFailover();
|
||||
|
||||
msgbuf << " Last failover: " << Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", failoverTs) << ".";
|
||||
}
|
||||
|
||||
/* Check whether the thresholds have been defined and match. */
|
||||
if (missingQueriesCritical.IsEmpty() && qps < queriesCritical) {
|
||||
msgbuf << " " << qps << " queries/s lower than critical threshold (" << queriesCritical << " queries/s).";
|
||||
|
@ -65,8 +65,6 @@ void IdoMysqlConnection::StatsFunc(const Dictionary::Ptr& status, const Array::P
|
||||
|
||||
void IdoMysqlConnection::Resume()
|
||||
{
|
||||
DbConnection::Resume();
|
||||
|
||||
Log(LogInformation, "IdoMysqlConnection")
|
||||
<< "'" << GetName() << "' resumed.";
|
||||
|
||||
@ -74,6 +72,9 @@ void IdoMysqlConnection::Resume()
|
||||
|
||||
m_QueryQueue.SetExceptionCallback(std::bind(&IdoMysqlConnection::ExceptionHandler, this, _1));
|
||||
|
||||
/* Immediately try to connect on Resume() without timer. */
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::Reconnect, this), PriorityImmediate);
|
||||
|
||||
m_TxTimer = new Timer();
|
||||
m_TxTimer->SetInterval(1);
|
||||
m_TxTimer->OnTimerExpired.connect(std::bind(&IdoMysqlConnection::TxTimerHandler, this));
|
||||
@ -83,23 +84,30 @@ void IdoMysqlConnection::Resume()
|
||||
m_ReconnectTimer->SetInterval(10);
|
||||
m_ReconnectTimer->OnTimerExpired.connect(std::bind(&IdoMysqlConnection::ReconnectTimerHandler, this));
|
||||
m_ReconnectTimer->Start();
|
||||
m_ReconnectTimer->Reschedule(0);
|
||||
|
||||
/* Start with queries after connect. */
|
||||
DbConnection::Resume();
|
||||
|
||||
ASSERT(m_Mysql->thread_safe());
|
||||
}
|
||||
|
||||
void IdoMysqlConnection::Pause()
|
||||
{
|
||||
m_ReconnectTimer.reset();
|
||||
Log(LogDebug, "IdoMysqlConnection")
|
||||
<< "Attempting to pause '" << GetName() << "'.";
|
||||
|
||||
DbConnection::Pause();
|
||||
|
||||
m_ReconnectTimer.reset();
|
||||
|
||||
#ifdef I2_DEBUG /* I2_DEBUG */
|
||||
Log(LogDebug, "IdoMysqlConnection")
|
||||
<< "Rescheduling disconnect task.";
|
||||
#endif /* I2_DEBUG */
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::Disconnect, this), PriorityLow);
|
||||
|
||||
/* Work on remaining tasks but never delete the threads, for HA resuming later. */
|
||||
m_QueryQueue.Join();
|
||||
|
||||
Log(LogInformation, "IdoMysqlConnection")
|
||||
@ -137,6 +145,9 @@ void IdoMysqlConnection::Disconnect()
|
||||
m_Mysql->close(&m_Connection);
|
||||
|
||||
SetConnected(false);
|
||||
|
||||
Log(LogInformation, "IdoMysqlConnection")
|
||||
<< "Disconnected from '" << GetName() << "' database '" << GetDatabase() << "'.";
|
||||
}
|
||||
|
||||
void IdoMysqlConnection::TxTimerHandler()
|
||||
@ -154,8 +165,8 @@ void IdoMysqlConnection::NewTransaction()
|
||||
<< "Scheduling new transaction and finishing async queries.";
|
||||
#endif /* I2_DEBUG */
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalNewTransaction, this), PriorityHigh);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::FinishAsyncQueries, this), PriorityHigh);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalNewTransaction, this), PriorityNormal);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::FinishAsyncQueries, this), PriorityNormal);
|
||||
}
|
||||
|
||||
void IdoMysqlConnection::InternalNewTransaction()
|
||||
@ -176,7 +187,8 @@ void IdoMysqlConnection::ReconnectTimerHandler()
|
||||
<< "Scheduling reconnect task.";
|
||||
#endif /* I2_DEBUG */
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::Reconnect, this), PriorityHigh);
|
||||
/* Only allow Reconnect events with high priority. */
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::Reconnect, this), PriorityImmediate);
|
||||
}
|
||||
|
||||
void IdoMysqlConnection::Reconnect()
|
||||
@ -194,6 +206,7 @@ void IdoMysqlConnection::Reconnect()
|
||||
|
||||
bool reconnect = false;
|
||||
|
||||
/* Ensure to close old connections first. */
|
||||
if (GetConnected()) {
|
||||
/* Check if we're really still connected */
|
||||
if (m_Mysql->ping(&m_Connection) == 0)
|
||||
@ -204,6 +217,9 @@ void IdoMysqlConnection::Reconnect()
|
||||
reconnect = true;
|
||||
}
|
||||
|
||||
Log(LogDebug, "IdoMysqlConnection")
|
||||
<< "Reconnect: Clearing ID cache.";
|
||||
|
||||
ClearIDCache();
|
||||
|
||||
String ihost, isocket_path, iuser, ipasswd, idb;
|
||||
@ -258,6 +274,9 @@ void IdoMysqlConnection::Reconnect()
|
||||
BOOST_THROW_EXCEPTION(std::runtime_error(m_Mysql->error(&m_Connection)));
|
||||
}
|
||||
|
||||
Log(LogNotice, "IdoMysqlConnection")
|
||||
<< "Reconnect: '" << GetName() << "' is now connected to database '" << GetDatabase() << "'.";
|
||||
|
||||
SetConnected(true);
|
||||
|
||||
IdoMysqlResult result = Query("SELECT @@global.max_allowed_packet AS max_allowed_packet");
|
||||
@ -343,12 +362,16 @@ void IdoMysqlConnection::Reconnect()
|
||||
else
|
||||
status_update_time = 0;
|
||||
|
||||
double status_update_age = Utility::GetTime() - status_update_time;
|
||||
double now = Utility::GetTime();
|
||||
|
||||
Log(LogNotice, "IdoMysqlConnection")
|
||||
<< "Last update by '" << endpoint_name << "' was " << status_update_age << "s ago.";
|
||||
double status_update_age = now - status_update_time;
|
||||
double failoverTimeout = GetFailoverTimeout();
|
||||
|
||||
if (status_update_age < failoverTimeout) {
|
||||
Log(LogInformation, "IdoMysqlConnection")
|
||||
<< "Last update by endpoint '" << endpoint_name << "' was "
|
||||
<< status_update_age << "s ago (< failover timeout of " << failoverTimeout << "s). Retrying.";
|
||||
|
||||
if (status_update_age < GetFailoverTimeout()) {
|
||||
m_Mysql->close(&m_Connection);
|
||||
SetConnected(false);
|
||||
SetShouldConnect(false);
|
||||
@ -366,9 +389,15 @@ void IdoMysqlConnection::Reconnect()
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
SetLastFailover(now);
|
||||
|
||||
Log(LogInformation, "IdoMysqlConnection")
|
||||
<< "Last update by endpoint '" << endpoint_name << "' was "
|
||||
<< status_update_age << "s ago. Taking over '" << GetName() << "' in HA zone '" << Zone::GetLocalZone()->GetName() << "'.";
|
||||
}
|
||||
|
||||
Log(LogNotice, "IdoMysqlConnection", "Enabling IDO connection.");
|
||||
Log(LogNotice, "IdoMysqlConnection", "Enabling IDO connection in HA zone.");
|
||||
}
|
||||
|
||||
Log(LogInformation, "IdoMysqlConnection")
|
||||
@ -435,9 +464,9 @@ void IdoMysqlConnection::Reconnect()
|
||||
<< "Scheduling session table clear and finish connect task.";
|
||||
#endif /* I2_DEBUG */
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::ClearTablesBySession, this), PriorityHigh);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::ClearTablesBySession, this), PriorityNormal);
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::FinishConnect, this, startTime), PriorityHigh);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::FinishConnect, this, startTime), PriorityNormal);
|
||||
}
|
||||
|
||||
void IdoMysqlConnection::FinishConnect(double startTime)
|
||||
@ -450,7 +479,8 @@ void IdoMysqlConnection::FinishConnect(double startTime)
|
||||
FinishAsyncQueries();
|
||||
|
||||
Log(LogInformation, "IdoMysqlConnection")
|
||||
<< "Finished reconnecting to MySQL IDO database in " << std::setw(2) << Utility::GetTime() - startTime << " second(s).";
|
||||
<< "Finished reconnecting to '" << GetName() << "' database '" << GetDatabase() << "' in "
|
||||
<< std::setw(2) << Utility::GetTime() - startTime << " second(s).";
|
||||
|
||||
Query("COMMIT");
|
||||
Query("BEGIN");
|
||||
@ -710,7 +740,7 @@ void IdoMysqlConnection::ActivateObject(const DbObject::Ptr& dbobj)
|
||||
<< "Scheduling object activation task for '" << dbobj->GetName1() << "!" << dbobj->GetName2() << "'.";
|
||||
#endif /* I2_DEBUG */
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalActivateObject, this, dbobj), PriorityHigh);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalActivateObject, this, dbobj), PriorityNormal);
|
||||
}
|
||||
|
||||
void IdoMysqlConnection::InternalActivateObject(const DbObject::Ptr& dbobj)
|
||||
@ -755,7 +785,7 @@ void IdoMysqlConnection::DeactivateObject(const DbObject::Ptr& dbobj)
|
||||
<< "Scheduling object deactivation task for '" << dbobj->GetName1() << "!" << dbobj->GetName2() << "'.";
|
||||
#endif /* I2_DEBUG */
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalDeactivateObject, this, dbobj), PriorityHigh);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalDeactivateObject, this, dbobj), PriorityNormal);
|
||||
}
|
||||
|
||||
void IdoMysqlConnection::InternalDeactivateObject(const DbObject::Ptr& dbobj)
|
||||
|
@ -72,8 +72,6 @@ void IdoPgsqlConnection::StatsFunc(const Dictionary::Ptr& status, const Array::P
|
||||
|
||||
void IdoPgsqlConnection::Resume()
|
||||
{
|
||||
DbConnection::Resume();
|
||||
|
||||
Log(LogInformation, "IdoPgsqlConnection")
|
||||
<< "'" << GetName() << "' resumed.";
|
||||
|
||||
@ -81,6 +79,9 @@ void IdoPgsqlConnection::Resume()
|
||||
|
||||
m_QueryQueue.SetExceptionCallback(std::bind(&IdoPgsqlConnection::ExceptionHandler, this, _1));
|
||||
|
||||
/* Immediately try to connect on Resume() without timer. */
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::Reconnect, this), PriorityImmediate);
|
||||
|
||||
m_TxTimer = new Timer();
|
||||
m_TxTimer->SetInterval(1);
|
||||
m_TxTimer->OnTimerExpired.connect(std::bind(&IdoPgsqlConnection::TxTimerHandler, this));
|
||||
@ -90,23 +91,26 @@ void IdoPgsqlConnection::Resume()
|
||||
m_ReconnectTimer->SetInterval(10);
|
||||
m_ReconnectTimer->OnTimerExpired.connect(std::bind(&IdoPgsqlConnection::ReconnectTimerHandler, this));
|
||||
m_ReconnectTimer->Start();
|
||||
m_ReconnectTimer->Reschedule(0);
|
||||
|
||||
/* Start with queries after connect. */
|
||||
DbConnection::Resume();
|
||||
|
||||
ASSERT(m_Pgsql->isthreadsafe());
|
||||
}
|
||||
|
||||
void IdoPgsqlConnection::Pause()
|
||||
{
|
||||
m_ReconnectTimer.reset();
|
||||
|
||||
DbConnection::Pause();
|
||||
|
||||
m_ReconnectTimer.reset();
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::Disconnect, this), PriorityLow);
|
||||
|
||||
/* Work on remaining tasks but never delete the threads, for HA resuming later. */
|
||||
m_QueryQueue.Join();
|
||||
|
||||
Log(LogInformation, "IdoPgsqlConnection")
|
||||
<< "'" << GetName() << "' paused.";
|
||||
|
||||
}
|
||||
|
||||
void IdoPgsqlConnection::ExceptionHandler(boost::exception_ptr exp)
|
||||
@ -138,6 +142,9 @@ void IdoPgsqlConnection::Disconnect()
|
||||
|
||||
m_Pgsql->finish(m_Connection);
|
||||
SetConnected(false);
|
||||
|
||||
Log(LogInformation, "IdoPgsqlConnection")
|
||||
<< "Disconnected from '" << GetName() << "' database '" << GetDatabase() << "'.";
|
||||
}
|
||||
|
||||
void IdoPgsqlConnection::TxTimerHandler()
|
||||
@ -150,7 +157,7 @@ void IdoPgsqlConnection::NewTransaction()
|
||||
if (IsPaused())
|
||||
return;
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::InternalNewTransaction, this), PriorityHigh, true);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::InternalNewTransaction, this), PriorityNormal, true);
|
||||
}
|
||||
|
||||
void IdoPgsqlConnection::InternalNewTransaction()
|
||||
@ -166,6 +173,7 @@ void IdoPgsqlConnection::InternalNewTransaction()
|
||||
|
||||
void IdoPgsqlConnection::ReconnectTimerHandler()
|
||||
{
|
||||
/* Only allow Reconnect events with high priority. */
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::Reconnect, this), PriorityHigh);
|
||||
}
|
||||
|
||||
@ -323,12 +331,16 @@ void IdoPgsqlConnection::Reconnect()
|
||||
else
|
||||
status_update_time = 0;
|
||||
|
||||
double status_update_age = Utility::GetTime() - status_update_time;
|
||||
double now = Utility::GetTime();
|
||||
|
||||
Log(LogNotice, "IdoPgsqlConnection")
|
||||
<< "Last update by '" << endpoint_name << "' was " << status_update_age << "s ago.";
|
||||
double status_update_age = now - status_update_time;
|
||||
double failoverTimeout = GetFailoverTimeout();
|
||||
|
||||
if (status_update_age < GetFailoverTimeout()) {
|
||||
Log(LogInformation, "IdoPgsqlConnection")
|
||||
<< "Last update by endpoint '" << endpoint_name << "' was "
|
||||
<< status_update_age << "s ago (< failover timeout of " << failoverTimeout << "s). Retrying.";
|
||||
|
||||
m_Pgsql->finish(m_Connection);
|
||||
SetConnected(false);
|
||||
SetShouldConnect(false);
|
||||
@ -346,6 +358,12 @@ void IdoPgsqlConnection::Reconnect()
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
SetLastFailover(now);
|
||||
|
||||
Log(LogInformation, "IdoPgsqlConnection")
|
||||
<< "Last update by endpoint '" << endpoint_name << "' was "
|
||||
<< status_update_age << "s ago. Taking over '" << GetName() << "' in HA zone '" << Zone::GetLocalZone()->GetName() << "'.";
|
||||
}
|
||||
|
||||
Log(LogNotice, "IdoPgsqlConnection", "Enabling IDO connection.");
|
||||
@ -409,9 +427,9 @@ void IdoPgsqlConnection::Reconnect()
|
||||
|
||||
UpdateAllObjects();
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::ClearTablesBySession, this), PriorityHigh);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::ClearTablesBySession, this), PriorityNormal);
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::FinishConnect, this, startTime), PriorityHigh);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::FinishConnect, this, startTime), PriorityNormal);
|
||||
}
|
||||
|
||||
void IdoPgsqlConnection::FinishConnect(double startTime)
|
||||
@ -422,7 +440,8 @@ void IdoPgsqlConnection::FinishConnect(double startTime)
|
||||
return;
|
||||
|
||||
Log(LogInformation, "IdoPgsqlConnection")
|
||||
<< "Finished reconnecting to PostgreSQL IDO database in " << std::setw(2) << Utility::GetTime() - startTime << " second(s).";
|
||||
<< "Finished reconnecting to '" << GetName() << "' database '" << GetDatabase() << "' in "
|
||||
<< std::setw(2) << Utility::GetTime() - startTime << " second(s).";
|
||||
|
||||
Query("COMMIT");
|
||||
Query("BEGIN");
|
||||
@ -559,7 +578,7 @@ void IdoPgsqlConnection::ActivateObject(const DbObject::Ptr& dbobj)
|
||||
if (IsPaused())
|
||||
return;
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::InternalActivateObject, this, dbobj), PriorityHigh);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::InternalActivateObject, this, dbobj), PriorityNormal);
|
||||
}
|
||||
|
||||
void IdoPgsqlConnection::InternalActivateObject(const DbObject::Ptr& dbobj)
|
||||
@ -596,7 +615,7 @@ void IdoPgsqlConnection::DeactivateObject(const DbObject::Ptr& dbobj)
|
||||
if (IsPaused())
|
||||
return;
|
||||
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::InternalDeactivateObject, this, dbobj), PriorityHigh);
|
||||
m_QueryQueue.Enqueue(std::bind(&IdoPgsqlConnection::InternalDeactivateObject, this, dbobj), PriorityNormal);
|
||||
}
|
||||
|
||||
void IdoPgsqlConnection::InternalDeactivateObject(const DbObject::Ptr& dbobj)
|
||||
|
@ -12,8 +12,8 @@ set(remote_SOURCES
|
||||
apiclient.cpp apiclient.hpp
|
||||
apifunction.cpp apifunction.hpp
|
||||
apilistener.cpp apilistener.hpp apilistener-ti.hpp apilistener-configsync.cpp apilistener-filesync.cpp
|
||||
apilistener-authority.cpp
|
||||
apiuser.cpp apiuser.hpp apiuser-ti.hpp
|
||||
authority.cpp
|
||||
configfileshandler.cpp configfileshandler.hpp
|
||||
configobjectutility.cpp configobjectutility.hpp
|
||||
configpackageshandler.cpp configpackageshandler.hpp
|
||||
|
@ -4,11 +4,20 @@
|
||||
#include "remote/apilistener.hpp"
|
||||
#include "base/configtype.hpp"
|
||||
#include "base/utility.hpp"
|
||||
#include "base/convert.hpp"
|
||||
|
||||
using namespace icinga;
|
||||
|
||||
void ApiListener::UpdateObjectAuthority()
|
||||
{
|
||||
ApiListener::Ptr instance = ApiListener::GetInstance();
|
||||
|
||||
if (!instance)
|
||||
return;
|
||||
|
||||
Log(LogNotice, "ApiListener")
|
||||
<< "Updating object authority for objects at endpoint '" << instance->GetIdentity() << "'.";
|
||||
|
||||
Zone::Ptr my_zone = Zone::GetLocalZone();
|
||||
|
||||
std::vector<Endpoint::Ptr> endpoints;
|
||||
@ -30,7 +39,8 @@ void ApiListener::UpdateObjectAuthority()
|
||||
|
||||
double mainTime = Application::GetMainTime();
|
||||
|
||||
if (num_total > 1 && endpoints.size() <= 1 && (mainTime == 0 || Utility::GetTime() - mainTime < 60))
|
||||
/* 30 seconds cold startup, don't update any authority to give the secondary endpoint time to reconnect. */
|
||||
if (num_total > 1 && endpoints.size() <= 1 && (mainTime == 0 || Utility::GetTime() - mainTime < 30))
|
||||
return;
|
||||
|
||||
std::sort(endpoints.begin(), endpoints.end(),
|
||||
@ -57,6 +67,12 @@ void ApiListener::UpdateObjectAuthority()
|
||||
else
|
||||
authority = endpoints[Utility::SDBM(object->GetName()) % endpoints.size()] == my_endpoint;
|
||||
|
||||
#ifdef I2_DEBUG
|
||||
// //Enable on demand, causes heavy logging on each run.
|
||||
// Log(LogDebug, "ApiListener")
|
||||
// << "Setting authority '" << Convert::ToString(authority) << "' for object '" << object->GetName() << "' of type '" << object->GetReflectionType()->GetName() << "'.";
|
||||
#endif /* I2_DEBUG */
|
||||
|
||||
object->SetAuthority(authority);
|
||||
}
|
||||
}
|
@ -252,9 +252,13 @@ void ApiListener::Start(bool runtimeCreated)
|
||||
m_ReconnectTimer->Start();
|
||||
m_ReconnectTimer->Reschedule(0);
|
||||
|
||||
/* Keep this in relative sync with the cold startup in UpdateObjectAuthority() and the reconnect interval above.
|
||||
* Previous: 60s reconnect, 30s OA, 60s cold startup.
|
||||
* Now: 10s reconnect, 10s OA, 30s cold startup.
|
||||
*/
|
||||
m_AuthorityTimer = new Timer();
|
||||
m_AuthorityTimer->OnTimerExpired.connect(std::bind(&ApiListener::UpdateObjectAuthority));
|
||||
m_AuthorityTimer->SetInterval(30);
|
||||
m_AuthorityTimer->SetInterval(10);
|
||||
m_AuthorityTimer->Start();
|
||||
|
||||
m_CleanupCertificateRequestsTimer = new Timer();
|
||||
|
Loading…
x
Reference in New Issue
Block a user