mirror of
https://github.com/Icinga/icinga2.git
synced 2025-07-25 06:34:42 +02:00
Add metrics about RemoteCheckQueue to Icinga check, API and logs
refs #4841
This commit is contained in:
parent
d8c31353e4
commit
06e381ceea
@ -256,7 +256,7 @@ Configuration Attributes:
|
|||||||
|
|
||||||
Name | Type | Description
|
Name | Type | Description
|
||||||
--------------------------|-----------------------|----------------------------------
|
--------------------------|-----------------------|----------------------------------
|
||||||
concurrent\_checks | Number | **Optional and Deprecated.** The maximum number of concurrent checks. Was replaced by global constant `MaxConcurrentChecks` which will be set if you still use `concurrent_checks`.
|
concurrent\_checks | Number | **Optional and deprecated.** The maximum number of concurrent checks. Was replaced by global constant `MaxConcurrentChecks` which will be set if you still use `concurrent_checks`.
|
||||||
|
|
||||||
## CheckResultReader <a id="objecttype-checkresultreader"></a>
|
## CheckResultReader <a id="objecttype-checkresultreader"></a>
|
||||||
|
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
#include "icinga/cib.hpp"
|
#include "icinga/cib.hpp"
|
||||||
#include "icinga/host.hpp"
|
#include "icinga/host.hpp"
|
||||||
#include "icinga/service.hpp"
|
#include "icinga/service.hpp"
|
||||||
|
#include "icinga/clusterevents.hpp"
|
||||||
#include "base/objectlock.hpp"
|
#include "base/objectlock.hpp"
|
||||||
#include "base/utility.hpp"
|
#include "base/utility.hpp"
|
||||||
#include "base/perfdatavalue.hpp"
|
#include "base/perfdatavalue.hpp"
|
||||||
@ -305,6 +306,8 @@ void CIB::StatsFunc(const Dictionary::Ptr& status, const Array::Ptr& perfdata) {
|
|||||||
status->Set("active_service_checks_15min", GetActiveServiceChecksStatistics(60 * 15));
|
status->Set("active_service_checks_15min", GetActiveServiceChecksStatistics(60 * 15));
|
||||||
status->Set("passive_service_checks_15min", GetPassiveServiceChecksStatistics(60 * 15));
|
status->Set("passive_service_checks_15min", GetPassiveServiceChecksStatistics(60 * 15));
|
||||||
|
|
||||||
|
status->Set("remote_check_queue", ClusterEvents::GetCheckRequestQueueSize());
|
||||||
|
|
||||||
CheckableCheckStatistics scs = CalculateServiceCheckStats();
|
CheckableCheckStatistics scs = CalculateServiceCheckStats();
|
||||||
|
|
||||||
status->Set("min_latency", scs.min_latency);
|
status->Set("min_latency", scs.min_latency);
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
#include "remote/apilistener.hpp"
|
#include "remote/apilistener.hpp"
|
||||||
#include "base/serializer.hpp"
|
#include "base/serializer.hpp"
|
||||||
#include "base/exception.hpp"
|
#include "base/exception.hpp"
|
||||||
|
#include <boost/thread/once.hpp>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
|
||||||
using namespace icinga;
|
using namespace icinga;
|
||||||
@ -28,6 +29,9 @@ using namespace icinga;
|
|||||||
boost::mutex ClusterEvents::m_Mutex;
|
boost::mutex ClusterEvents::m_Mutex;
|
||||||
std::deque<std::function<void ()>> ClusterEvents::m_CheckRequestQueue;
|
std::deque<std::function<void ()>> ClusterEvents::m_CheckRequestQueue;
|
||||||
bool ClusterEvents::m_CheckSchedulerRunning;
|
bool ClusterEvents::m_CheckSchedulerRunning;
|
||||||
|
int ClusterEvents::m_ChecksExecutedDuringInterval;
|
||||||
|
int ClusterEvents::m_ChecksDroppedDuringInterval;
|
||||||
|
Timer::Ptr ClusterEvents::m_LogTimer;
|
||||||
|
|
||||||
void ClusterEvents::RemoteCheckThreadProc()
|
void ClusterEvents::RemoteCheckThreadProc()
|
||||||
{
|
{
|
||||||
@ -45,6 +49,7 @@ void ClusterEvents::RemoteCheckThreadProc()
|
|||||||
|
|
||||||
auto callback = m_CheckRequestQueue.front();
|
auto callback = m_CheckRequestQueue.front();
|
||||||
m_CheckRequestQueue.pop_front();
|
m_CheckRequestQueue.pop_front();
|
||||||
|
m_ChecksExecutedDuringInterval++;
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
|
|
||||||
callback();
|
callback();
|
||||||
@ -58,10 +63,19 @@ void ClusterEvents::RemoteCheckThreadProc()
|
|||||||
|
|
||||||
void ClusterEvents::EnqueueCheck(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params)
|
void ClusterEvents::EnqueueCheck(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params)
|
||||||
{
|
{
|
||||||
|
static boost::once_flag once = BOOST_ONCE_INIT;
|
||||||
|
|
||||||
|
boost::call_once(once, []() {
|
||||||
|
m_LogTimer = new Timer();
|
||||||
|
m_LogTimer->SetInterval(10);
|
||||||
|
m_LogTimer->OnTimerExpired.connect(std::bind(ClusterEvents::LogRemoteCheckQueueInformation));
|
||||||
|
m_LogTimer->Start();
|
||||||
|
});
|
||||||
|
|
||||||
boost::mutex::scoped_lock lock(m_Mutex);
|
boost::mutex::scoped_lock lock(m_Mutex);
|
||||||
|
|
||||||
if (m_CheckRequestQueue.size() >= 25000) {
|
if (m_CheckRequestQueue.size() >= 25000) {
|
||||||
Log(LogCritical, "ClusterEvents", "Remote check queue ran out of slots. Discarding remote check request.");
|
m_ChecksDroppedDuringInterval++;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -184,3 +198,28 @@ void ClusterEvents::ExecuteCheckFromQueue(const MessageOrigin::Ptr& origin, cons
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int ClusterEvents::GetCheckRequestQueueSize()
|
||||||
|
{
|
||||||
|
return m_CheckRequestQueue.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
void ClusterEvents::LogRemoteCheckQueueInformation() {
|
||||||
|
if (m_ChecksDroppedDuringInterval > 0) {
|
||||||
|
Log(LogCritical, "ClusterEvents")
|
||||||
|
<< "Remote check queue ran out of slots. "
|
||||||
|
<< m_ChecksDroppedDuringInterval << " checks dropped.";
|
||||||
|
m_ChecksDroppedDuringInterval = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m_ChecksExecutedDuringInterval == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
Log(LogInformation, "RemoteCheckQueue")
|
||||||
|
<< "items: " << m_CheckRequestQueue.size()
|
||||||
|
<< ", rate: " << m_ChecksExecutedDuringInterval / 10 << "/s "
|
||||||
|
<< "(" << m_ChecksExecutedDuringInterval * 6 << "/min "
|
||||||
|
<< m_ChecksExecutedDuringInterval * 6 * 5 << "/5min "
|
||||||
|
<< m_ChecksExecutedDuringInterval * 6 * 15 << "/15min" << ");";
|
||||||
|
|
||||||
|
m_ChecksExecutedDuringInterval = 0;
|
||||||
|
}
|
@ -75,10 +75,16 @@ public:
|
|||||||
NotificationType notificationType, const CheckResult::Ptr& cr, const String& author, const String& commentText, const MessageOrigin::Ptr& origin);
|
NotificationType notificationType, const CheckResult::Ptr& cr, const String& author, const String& commentText, const MessageOrigin::Ptr& origin);
|
||||||
static Value NotificationSentToAllUsersAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params);
|
static Value NotificationSentToAllUsersAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params);
|
||||||
|
|
||||||
|
static int GetCheckRequestQueueSize();
|
||||||
|
static void LogRemoteCheckQueueInformation();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static boost::mutex m_Mutex;
|
static boost::mutex m_Mutex;
|
||||||
static std::deque<std::function<void ()>> m_CheckRequestQueue;
|
static std::deque<std::function<void ()>> m_CheckRequestQueue;
|
||||||
static bool m_CheckSchedulerRunning;
|
static bool m_CheckSchedulerRunning;
|
||||||
|
static int m_ChecksExecutedDuringInterval;
|
||||||
|
static int m_ChecksDroppedDuringInterval;
|
||||||
|
static Timer::Ptr m_LogTimer;
|
||||||
|
|
||||||
static void RemoteCheckThreadProc();
|
static void RemoteCheckThreadProc();
|
||||||
static void EnqueueCheck(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params);
|
static void EnqueueCheck(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params);
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
#include "icinga/checkcommand.hpp"
|
#include "icinga/checkcommand.hpp"
|
||||||
#include "icinga/macroprocessor.hpp"
|
#include "icinga/macroprocessor.hpp"
|
||||||
#include "icinga/icingaapplication.hpp"
|
#include "icinga/icingaapplication.hpp"
|
||||||
|
#include "icinga/clusterevents.hpp"
|
||||||
#include "base/application.hpp"
|
#include "base/application.hpp"
|
||||||
#include "base/objectlock.hpp"
|
#include "base/objectlock.hpp"
|
||||||
#include "base/utility.hpp"
|
#include "base/utility.hpp"
|
||||||
@ -84,6 +85,8 @@ void IcingaCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes
|
|||||||
perfdata->Add(new PerfdataValue("active_service_checks_15min", CIB::GetActiveServiceChecksStatistics(60 * 15)));
|
perfdata->Add(new PerfdataValue("active_service_checks_15min", CIB::GetActiveServiceChecksStatistics(60 * 15)));
|
||||||
perfdata->Add(new PerfdataValue("passive_service_checks_15min", CIB::GetPassiveServiceChecksStatistics(60 * 15)));
|
perfdata->Add(new PerfdataValue("passive_service_checks_15min", CIB::GetPassiveServiceChecksStatistics(60 * 15)));
|
||||||
|
|
||||||
|
perfdata->Add(new PerfdataValue("remote_check_queue", ClusterEvents::GetCheckRequestQueueSize()));
|
||||||
|
|
||||||
CheckableCheckStatistics scs = CIB::CalculateServiceCheckStats();
|
CheckableCheckStatistics scs = CIB::CalculateServiceCheckStats();
|
||||||
|
|
||||||
perfdata->Add(new PerfdataValue("min_latency", scs.min_latency));
|
perfdata->Add(new PerfdataValue("min_latency", scs.min_latency));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user