mirror of
https://github.com/Icinga/icinga2.git
synced 2025-07-23 05:34:48 +02:00
Add metrics about RemoteCheckQueue to Icinga check, API and logs
refs #4841
This commit is contained in:
parent
d8c31353e4
commit
06e381ceea
@ -256,7 +256,7 @@ Configuration Attributes:
|
||||
|
||||
Name | Type | Description
|
||||
--------------------------|-----------------------|----------------------------------
|
||||
concurrent\_checks | Number | **Optional and Deprecated.** The maximum number of concurrent checks. Was replaced by global constant `MaxConcurrentChecks` which will be set if you still use `concurrent_checks`.
|
||||
concurrent\_checks | Number | **Optional and deprecated.** The maximum number of concurrent checks. Was replaced by global constant `MaxConcurrentChecks` which will be set if you still use `concurrent_checks`.
|
||||
|
||||
## CheckResultReader <a id="objecttype-checkresultreader"></a>
|
||||
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "icinga/cib.hpp"
|
||||
#include "icinga/host.hpp"
|
||||
#include "icinga/service.hpp"
|
||||
#include "icinga/clusterevents.hpp"
|
||||
#include "base/objectlock.hpp"
|
||||
#include "base/utility.hpp"
|
||||
#include "base/perfdatavalue.hpp"
|
||||
@ -305,6 +306,8 @@ void CIB::StatsFunc(const Dictionary::Ptr& status, const Array::Ptr& perfdata) {
|
||||
status->Set("active_service_checks_15min", GetActiveServiceChecksStatistics(60 * 15));
|
||||
status->Set("passive_service_checks_15min", GetPassiveServiceChecksStatistics(60 * 15));
|
||||
|
||||
status->Set("remote_check_queue", ClusterEvents::GetCheckRequestQueueSize());
|
||||
|
||||
CheckableCheckStatistics scs = CalculateServiceCheckStats();
|
||||
|
||||
status->Set("min_latency", scs.min_latency);
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "remote/apilistener.hpp"
|
||||
#include "base/serializer.hpp"
|
||||
#include "base/exception.hpp"
|
||||
#include <boost/thread/once.hpp>
|
||||
#include <thread>
|
||||
|
||||
using namespace icinga;
|
||||
@ -28,6 +29,9 @@ using namespace icinga;
|
||||
boost::mutex ClusterEvents::m_Mutex;
|
||||
std::deque<std::function<void ()>> ClusterEvents::m_CheckRequestQueue;
|
||||
bool ClusterEvents::m_CheckSchedulerRunning;
|
||||
int ClusterEvents::m_ChecksExecutedDuringInterval;
|
||||
int ClusterEvents::m_ChecksDroppedDuringInterval;
|
||||
Timer::Ptr ClusterEvents::m_LogTimer;
|
||||
|
||||
void ClusterEvents::RemoteCheckThreadProc()
|
||||
{
|
||||
@ -45,6 +49,7 @@ void ClusterEvents::RemoteCheckThreadProc()
|
||||
|
||||
auto callback = m_CheckRequestQueue.front();
|
||||
m_CheckRequestQueue.pop_front();
|
||||
m_ChecksExecutedDuringInterval++;
|
||||
lock.unlock();
|
||||
|
||||
callback();
|
||||
@ -58,10 +63,19 @@ void ClusterEvents::RemoteCheckThreadProc()
|
||||
|
||||
void ClusterEvents::EnqueueCheck(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params)
|
||||
{
|
||||
static boost::once_flag once = BOOST_ONCE_INIT;
|
||||
|
||||
boost::call_once(once, []() {
|
||||
m_LogTimer = new Timer();
|
||||
m_LogTimer->SetInterval(10);
|
||||
m_LogTimer->OnTimerExpired.connect(std::bind(ClusterEvents::LogRemoteCheckQueueInformation));
|
||||
m_LogTimer->Start();
|
||||
});
|
||||
|
||||
boost::mutex::scoped_lock lock(m_Mutex);
|
||||
|
||||
if (m_CheckRequestQueue.size() >= 25000) {
|
||||
Log(LogCritical, "ClusterEvents", "Remote check queue ran out of slots. Discarding remote check request.");
|
||||
m_ChecksDroppedDuringInterval++;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -184,3 +198,28 @@ void ClusterEvents::ExecuteCheckFromQueue(const MessageOrigin::Ptr& origin, cons
|
||||
}
|
||||
}
|
||||
|
||||
int ClusterEvents::GetCheckRequestQueueSize()
|
||||
{
|
||||
return m_CheckRequestQueue.size();
|
||||
}
|
||||
|
||||
void ClusterEvents::LogRemoteCheckQueueInformation() {
|
||||
if (m_ChecksDroppedDuringInterval > 0) {
|
||||
Log(LogCritical, "ClusterEvents")
|
||||
<< "Remote check queue ran out of slots. "
|
||||
<< m_ChecksDroppedDuringInterval << " checks dropped.";
|
||||
m_ChecksDroppedDuringInterval = 0;
|
||||
}
|
||||
|
||||
if (m_ChecksExecutedDuringInterval == 0)
|
||||
return;
|
||||
|
||||
Log(LogInformation, "RemoteCheckQueue")
|
||||
<< "items: " << m_CheckRequestQueue.size()
|
||||
<< ", rate: " << m_ChecksExecutedDuringInterval / 10 << "/s "
|
||||
<< "(" << m_ChecksExecutedDuringInterval * 6 << "/min "
|
||||
<< m_ChecksExecutedDuringInterval * 6 * 5 << "/5min "
|
||||
<< m_ChecksExecutedDuringInterval * 6 * 15 << "/15min" << ");";
|
||||
|
||||
m_ChecksExecutedDuringInterval = 0;
|
||||
}
|
@ -75,10 +75,16 @@ public:
|
||||
NotificationType notificationType, const CheckResult::Ptr& cr, const String& author, const String& commentText, const MessageOrigin::Ptr& origin);
|
||||
static Value NotificationSentToAllUsersAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params);
|
||||
|
||||
static int GetCheckRequestQueueSize();
|
||||
static void LogRemoteCheckQueueInformation();
|
||||
|
||||
private:
|
||||
static boost::mutex m_Mutex;
|
||||
static std::deque<std::function<void ()>> m_CheckRequestQueue;
|
||||
static bool m_CheckSchedulerRunning;
|
||||
static int m_ChecksExecutedDuringInterval;
|
||||
static int m_ChecksDroppedDuringInterval;
|
||||
static Timer::Ptr m_LogTimer;
|
||||
|
||||
static void RemoteCheckThreadProc();
|
||||
static void EnqueueCheck(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params);
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "icinga/checkcommand.hpp"
|
||||
#include "icinga/macroprocessor.hpp"
|
||||
#include "icinga/icingaapplication.hpp"
|
||||
#include "icinga/clusterevents.hpp"
|
||||
#include "base/application.hpp"
|
||||
#include "base/objectlock.hpp"
|
||||
#include "base/utility.hpp"
|
||||
@ -84,6 +85,8 @@ void IcingaCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes
|
||||
perfdata->Add(new PerfdataValue("active_service_checks_15min", CIB::GetActiveServiceChecksStatistics(60 * 15)));
|
||||
perfdata->Add(new PerfdataValue("passive_service_checks_15min", CIB::GetPassiveServiceChecksStatistics(60 * 15)));
|
||||
|
||||
perfdata->Add(new PerfdataValue("remote_check_queue", ClusterEvents::GetCheckRequestQueueSize()));
|
||||
|
||||
CheckableCheckStatistics scs = CIB::CalculateServiceCheckStats();
|
||||
|
||||
perfdata->Add(new PerfdataValue("min_latency", scs.min_latency));
|
||||
|
Loading…
x
Reference in New Issue
Block a user