diff --git a/doc/09-object-types.md b/doc/09-object-types.md
index cb8f35ccf..9e2b5b3c6 100644
--- a/doc/09-object-types.md
+++ b/doc/09-object-types.md
@@ -256,7 +256,7 @@ Configuration Attributes:
Name | Type | Description
--------------------------|-----------------------|----------------------------------
- concurrent\_checks | Number | **Optional and Deprecated.** The maximum number of concurrent checks. Was replaced by global constant `MaxConcurrentChecks` which will be set if you still use `concurrent_checks`.
+ concurrent\_checks | Number | **Optional and deprecated.** The maximum number of concurrent checks. Was replaced by global constant `MaxConcurrentChecks` which will be set if you still use `concurrent_checks`.
## CheckResultReader
diff --git a/lib/icinga/cib.cpp b/lib/icinga/cib.cpp
index 7da5e3c8d..e68df57ad 100644
--- a/lib/icinga/cib.cpp
+++ b/lib/icinga/cib.cpp
@@ -20,6 +20,7 @@
#include "icinga/cib.hpp"
#include "icinga/host.hpp"
#include "icinga/service.hpp"
+#include "icinga/clusterevents.hpp"
#include "base/objectlock.hpp"
#include "base/utility.hpp"
#include "base/perfdatavalue.hpp"
@@ -305,6 +306,8 @@ void CIB::StatsFunc(const Dictionary::Ptr& status, const Array::Ptr& perfdata) {
status->Set("active_service_checks_15min", GetActiveServiceChecksStatistics(60 * 15));
status->Set("passive_service_checks_15min", GetPassiveServiceChecksStatistics(60 * 15));
+ status->Set("remote_check_queue", ClusterEvents::GetCheckRequestQueueSize());
+
CheckableCheckStatistics scs = CalculateServiceCheckStats();
status->Set("min_latency", scs.min_latency);
diff --git a/lib/icinga/clusterevents-check.cpp b/lib/icinga/clusterevents-check.cpp
index 41e2be237..58d983eb8 100644
--- a/lib/icinga/clusterevents-check.cpp
+++ b/lib/icinga/clusterevents-check.cpp
@@ -21,6 +21,7 @@
#include "remote/apilistener.hpp"
#include "base/serializer.hpp"
#include "base/exception.hpp"
+#include
#include
using namespace icinga;
@@ -28,6 +29,9 @@ using namespace icinga;
boost::mutex ClusterEvents::m_Mutex;
std::deque> ClusterEvents::m_CheckRequestQueue;
bool ClusterEvents::m_CheckSchedulerRunning;
+int ClusterEvents::m_ChecksExecutedDuringInterval;
+int ClusterEvents::m_ChecksDroppedDuringInterval;
+Timer::Ptr ClusterEvents::m_LogTimer;
void ClusterEvents::RemoteCheckThreadProc()
{
@@ -45,6 +49,7 @@ void ClusterEvents::RemoteCheckThreadProc()
auto callback = m_CheckRequestQueue.front();
m_CheckRequestQueue.pop_front();
+ m_ChecksExecutedDuringInterval++;
lock.unlock();
callback();
@@ -58,10 +63,19 @@ void ClusterEvents::RemoteCheckThreadProc()
void ClusterEvents::EnqueueCheck(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params)
{
+ static boost::once_flag once = BOOST_ONCE_INIT;
+
+ boost::call_once(once, []() {
+ m_LogTimer = new Timer();
+ m_LogTimer->SetInterval(10);
+ m_LogTimer->OnTimerExpired.connect(std::bind(ClusterEvents::LogRemoteCheckQueueInformation));
+ m_LogTimer->Start();
+ });
+
boost::mutex::scoped_lock lock(m_Mutex);
if (m_CheckRequestQueue.size() >= 25000) {
- Log(LogCritical, "ClusterEvents", "Remote check queue ran out of slots. Discarding remote check request.");
+ m_ChecksDroppedDuringInterval++;
return;
}
@@ -184,3 +198,28 @@ void ClusterEvents::ExecuteCheckFromQueue(const MessageOrigin::Ptr& origin, cons
}
}
+int ClusterEvents::GetCheckRequestQueueSize()
+{
+ return m_CheckRequestQueue.size();
+}
+
+void ClusterEvents::LogRemoteCheckQueueInformation() {
+ if (m_ChecksDroppedDuringInterval > 0) {
+ Log(LogCritical, "ClusterEvents")
+ << "Remote check queue ran out of slots. "
+ << m_ChecksDroppedDuringInterval << " checks dropped.";
+ m_ChecksDroppedDuringInterval = 0;
+ }
+
+ if (m_ChecksExecutedDuringInterval == 0)
+ return;
+
+ Log(LogInformation, "RemoteCheckQueue")
+ << "items: " << m_CheckRequestQueue.size()
+ << ", rate: " << m_ChecksExecutedDuringInterval / 10 << "/s "
+ << "(" << m_ChecksExecutedDuringInterval * 6 << "/min "
+ << m_ChecksExecutedDuringInterval * 6 * 5 << "/5min "
+ << m_ChecksExecutedDuringInterval * 6 * 15 << "/15min" << ");";
+
+ m_ChecksExecutedDuringInterval = 0;
+}
\ No newline at end of file
diff --git a/lib/icinga/clusterevents.hpp b/lib/icinga/clusterevents.hpp
index d712b7fb8..5b8acf78a 100644
--- a/lib/icinga/clusterevents.hpp
+++ b/lib/icinga/clusterevents.hpp
@@ -75,10 +75,16 @@ public:
NotificationType notificationType, const CheckResult::Ptr& cr, const String& author, const String& commentText, const MessageOrigin::Ptr& origin);
static Value NotificationSentToAllUsersAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params);
+ static int GetCheckRequestQueueSize();
+ static void LogRemoteCheckQueueInformation();
+
private:
static boost::mutex m_Mutex;
static std::deque> m_CheckRequestQueue;
static bool m_CheckSchedulerRunning;
+ static int m_ChecksExecutedDuringInterval;
+ static int m_ChecksDroppedDuringInterval;
+ static Timer::Ptr m_LogTimer;
static void RemoteCheckThreadProc();
static void EnqueueCheck(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params);
diff --git a/lib/methods/icingachecktask.cpp b/lib/methods/icingachecktask.cpp
index 90e9390bd..16f58e71d 100644
--- a/lib/methods/icingachecktask.cpp
+++ b/lib/methods/icingachecktask.cpp
@@ -23,6 +23,7 @@
#include "icinga/checkcommand.hpp"
#include "icinga/macroprocessor.hpp"
#include "icinga/icingaapplication.hpp"
+#include "icinga/clusterevents.hpp"
#include "base/application.hpp"
#include "base/objectlock.hpp"
#include "base/utility.hpp"
@@ -84,6 +85,8 @@ void IcingaCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes
perfdata->Add(new PerfdataValue("active_service_checks_15min", CIB::GetActiveServiceChecksStatistics(60 * 15)));
perfdata->Add(new PerfdataValue("passive_service_checks_15min", CIB::GetPassiveServiceChecksStatistics(60 * 15)));
+ perfdata->Add(new PerfdataValue("remote_check_queue", ClusterEvents::GetCheckRequestQueueSize()));
+
CheckableCheckStatistics scs = CIB::CalculateServiceCheckStats();
perfdata->Add(new PerfdataValue("min_latency", scs.min_latency));