2012-06-14 11:23:25 +02:00
|
|
|
/******************************************************************************
|
|
|
|
* Icinga 2 *
|
|
|
|
* Copyright (C) 2012 Icinga Development Team (http://www.icinga.org/) *
|
|
|
|
* *
|
|
|
|
* This program is free software; you can redistribute it and/or *
|
|
|
|
* modify it under the terms of the GNU General Public License *
|
|
|
|
* as published by the Free Software Foundation; either version 2 *
|
|
|
|
* of the License, or (at your option) any later version. *
|
|
|
|
* *
|
|
|
|
* This program is distributed in the hope that it will be useful, *
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
|
|
|
* GNU General Public License for more details. *
|
|
|
|
* *
|
|
|
|
* You should have received a copy of the GNU General Public License *
|
|
|
|
* along with this program; if not, write to the Free Software Foundation *
|
|
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. *
|
|
|
|
******************************************************************************/
|
|
|
|
|
|
|
|
#include "i2-checker.h"
|
|
|
|
|
|
|
|
using namespace icinga;
|
|
|
|
|
|
|
|
string CheckerComponent::GetName(void) const
|
|
|
|
{
|
2012-06-14 13:21:40 +02:00
|
|
|
return "checker";
|
2012-06-14 11:23:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void CheckerComponent::Start(void)
|
|
|
|
{
|
2012-06-15 19:32:41 +02:00
|
|
|
m_CheckerEndpoint = boost::make_shared<VirtualEndpoint>();
|
2012-06-14 11:23:25 +02:00
|
|
|
m_CheckerEndpoint->RegisterTopicHandler("checker::AssignService",
|
2012-06-16 03:42:54 +02:00
|
|
|
boost::bind(&CheckerComponent::AssignServiceRequestHandler, this, _2, _3));
|
2012-06-15 19:32:41 +02:00
|
|
|
m_CheckerEndpoint->RegisterTopicHandler("checker::ClearServices",
|
2012-06-16 03:42:54 +02:00
|
|
|
boost::bind(&CheckerComponent::ClearServicesRequestHandler, this, _2, _3));
|
2012-06-14 11:23:25 +02:00
|
|
|
m_CheckerEndpoint->RegisterPublication("checker::CheckResult");
|
2012-06-27 18:43:34 +02:00
|
|
|
EndpointManager::GetInstance()->RegisterEndpoint(m_CheckerEndpoint);
|
2012-06-14 11:23:25 +02:00
|
|
|
|
2012-06-15 19:32:41 +02:00
|
|
|
m_CheckTimer = boost::make_shared<Timer>();
|
2012-06-19 12:23:52 +02:00
|
|
|
m_CheckTimer->SetInterval(5);
|
2012-06-15 19:32:41 +02:00
|
|
|
m_CheckTimer->OnTimerExpired.connect(boost::bind(&CheckerComponent::CheckTimerHandler, this));
|
2012-06-14 11:23:25 +02:00
|
|
|
m_CheckTimer->Start();
|
|
|
|
|
2012-06-24 16:30:16 +02:00
|
|
|
NagiosCheckTask::Register();
|
2012-06-14 11:23:25 +02:00
|
|
|
|
2012-06-17 20:35:56 +02:00
|
|
|
m_ResultTimer = boost::make_shared<Timer>();
|
2012-06-17 23:10:03 +02:00
|
|
|
m_ResultTimer->SetInterval(5);
|
2012-06-17 20:35:56 +02:00
|
|
|
m_ResultTimer->OnTimerExpired.connect(boost::bind(&CheckerComponent::ResultTimerHandler, this));
|
|
|
|
m_ResultTimer->Start();
|
2012-06-14 11:23:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void CheckerComponent::Stop(void)
|
|
|
|
{
|
2012-06-27 18:43:34 +02:00
|
|
|
EndpointManager::Ptr mgr = EndpointManager::GetInstance();
|
2012-06-14 11:23:25 +02:00
|
|
|
|
2012-06-14 13:21:40 +02:00
|
|
|
if (mgr)
|
|
|
|
mgr->UnregisterEndpoint(m_CheckerEndpoint);
|
2012-06-14 11:23:25 +02:00
|
|
|
}
|
|
|
|
|
2012-06-15 19:32:41 +02:00
|
|
|
void CheckerComponent::CheckTimerHandler(void)
|
2012-06-14 11:23:25 +02:00
|
|
|
{
|
|
|
|
time_t now;
|
|
|
|
time(&now);
|
|
|
|
|
2012-06-18 17:23:48 +02:00
|
|
|
Application::Log(LogDebug, "checker", "CheckTimerHandler entered.");
|
|
|
|
|
|
|
|
long tasks = 0;
|
|
|
|
|
2012-06-19 09:38:20 +02:00
|
|
|
while (!m_Services.empty()) {
|
2012-06-14 11:23:25 +02:00
|
|
|
Service service = m_Services.top();
|
|
|
|
|
2012-06-19 09:38:20 +02:00
|
|
|
if (service.GetNextCheck() > now)
|
2012-06-14 11:23:25 +02:00
|
|
|
break;
|
|
|
|
|
2012-06-17 22:46:40 +02:00
|
|
|
m_Services.pop();
|
|
|
|
|
2012-06-20 10:46:18 +02:00
|
|
|
Application::Log(LogDebug, "checker", "Executing service check for '" + service.GetName() + "'");
|
2012-06-17 20:35:56 +02:00
|
|
|
|
2012-06-22 12:22:36 +02:00
|
|
|
m_PendingServices.insert(service.GetConfigObject());
|
2012-06-21 12:51:50 +02:00
|
|
|
|
2012-06-17 20:35:56 +02:00
|
|
|
CheckTask::Ptr task = CheckTask::CreateTask(service);
|
2012-06-18 17:23:48 +02:00
|
|
|
task->Enqueue();
|
|
|
|
|
|
|
|
tasks++;
|
2012-06-14 11:23:25 +02:00
|
|
|
}
|
|
|
|
|
2012-06-18 17:23:48 +02:00
|
|
|
Application::Log(LogDebug, "checker", "CheckTimerHandler: past loop.");
|
|
|
|
|
|
|
|
CheckTask::FlushQueue();
|
|
|
|
|
|
|
|
stringstream msgbuf;
|
|
|
|
msgbuf << "CheckTimerHandler: created " << tasks << " tasks";
|
2012-06-20 10:46:18 +02:00
|
|
|
Application::Log(LogInformation, "checker", msgbuf.str());
|
2012-06-14 11:23:25 +02:00
|
|
|
}
|
|
|
|
|
2012-06-17 20:35:56 +02:00
|
|
|
void CheckerComponent::ResultTimerHandler(void)
|
|
|
|
{
|
2012-06-18 17:23:48 +02:00
|
|
|
Application::Log(LogDebug, "checker", "ResultTimerHandler entered.");
|
|
|
|
|
2012-06-19 09:38:20 +02:00
|
|
|
time_t now;
|
|
|
|
time(&now);
|
|
|
|
|
2012-06-20 10:46:18 +02:00
|
|
|
long min_latency = -1, max_latency = 0, avg_latency = 0, results = 0, failed = 0;
|
2012-06-18 17:23:48 +02:00
|
|
|
|
2012-06-19 09:38:20 +02:00
|
|
|
vector<CheckTask::Ptr> finishedTasks = CheckTask::GetFinishedTasks();
|
2012-06-17 20:35:56 +02:00
|
|
|
|
2012-06-19 09:38:20 +02:00
|
|
|
for (vector<CheckTask::Ptr>::iterator it = finishedTasks.begin(); it != finishedTasks.end(); it++) {
|
|
|
|
CheckTask::Ptr task = *it;
|
2012-06-17 20:35:56 +02:00
|
|
|
|
2012-06-18 00:14:34 +02:00
|
|
|
Service service = task->GetService();
|
2012-06-17 22:46:40 +02:00
|
|
|
|
2012-06-21 12:51:50 +02:00
|
|
|
/* if the service isn't in the set of pending services
|
|
|
|
* it was removed and we need to ignore this check result. */
|
2012-06-22 12:22:36 +02:00
|
|
|
if (m_PendingServices.find(service.GetConfigObject()) == m_PendingServices.end())
|
2012-06-21 12:51:50 +02:00
|
|
|
continue;
|
|
|
|
|
2012-06-17 20:35:56 +02:00
|
|
|
CheckResult result = task->GetResult();
|
2012-06-20 10:46:18 +02:00
|
|
|
Application::Log(LogDebug, "checker", "Got result for service '" + service.GetName() + "'");
|
2012-06-18 17:23:48 +02:00
|
|
|
|
2012-06-27 23:38:50 +02:00
|
|
|
long execution_time = result.GetExecutionEnd() - result.GetExecutionStart();
|
|
|
|
long latency = (result.GetScheduleEnd() - result.GetScheduleStart()) - execution_time;
|
2012-06-19 19:05:24 +02:00
|
|
|
avg_latency += latency;
|
|
|
|
|
|
|
|
if (min_latency == -1 || latency < min_latency)
|
|
|
|
min_latency = latency;
|
|
|
|
|
|
|
|
if (latency > max_latency)
|
|
|
|
max_latency = latency;
|
|
|
|
|
2012-06-18 17:23:48 +02:00
|
|
|
results++;
|
2012-06-18 00:14:34 +02:00
|
|
|
|
2012-06-25 14:13:24 +02:00
|
|
|
if (result.GetState() != StateOK)
|
2012-06-20 10:46:18 +02:00
|
|
|
failed++;
|
|
|
|
|
2012-06-25 15:42:46 +02:00
|
|
|
/* update service state */
|
|
|
|
service.ApplyCheckResult(result);
|
|
|
|
|
2012-06-27 18:43:34 +02:00
|
|
|
/* figure out when the next check is for this service */
|
|
|
|
service.UpdateNextCheck();
|
|
|
|
|
|
|
|
/* remove the service from the list of pending services */
|
|
|
|
m_PendingServices.erase(service.GetConfigObject());
|
|
|
|
m_Services.push(service);
|
|
|
|
|
2012-06-25 14:13:24 +02:00
|
|
|
RequestMessage rm;
|
|
|
|
rm.SetMethod("checker::CheckResult");
|
|
|
|
|
|
|
|
MessagePart params;
|
|
|
|
params.SetProperty("service", service.GetName());
|
2012-06-25 15:42:46 +02:00
|
|
|
params.SetProperty("state", static_cast<long>(service.GetState()));
|
|
|
|
params.SetProperty("state_type", static_cast<long>(service.GetStateType()));
|
|
|
|
params.SetProperty("current_attempt", static_cast<long>(service.GetCurrentCheckAttempt()));
|
2012-06-27 18:43:34 +02:00
|
|
|
params.SetProperty("next_check", static_cast<long>(service.GetNextCheck()));
|
2012-06-25 14:13:24 +02:00
|
|
|
params.SetProperty("result", result.GetDictionary());
|
|
|
|
|
|
|
|
rm.SetParams(params);
|
|
|
|
|
2012-06-27 18:43:34 +02:00
|
|
|
EndpointManager::GetInstance()->SendMulticastMessage(m_CheckerEndpoint, rm);
|
2012-06-17 20:35:56 +02:00
|
|
|
}
|
|
|
|
|
2012-06-20 15:23:31 +02:00
|
|
|
if (min_latency > 5) {
|
|
|
|
stringstream latwarn;
|
|
|
|
latwarn << "We can't keep up with the checks: minimum latency is " << min_latency << " seconds";
|
|
|
|
Application::Log(LogWarning, "checker", latwarn.str());
|
|
|
|
}
|
|
|
|
|
2012-06-22 07:24:50 +02:00
|
|
|
{
|
|
|
|
stringstream msgbuf;
|
|
|
|
msgbuf << "ResultTimerHandler: " << results << " results (" << failed << " failed); latency: avg=" << avg_latency / (results ? results : 1) << ", min=" << min_latency << ", max: " << max_latency;
|
|
|
|
Application::Log(LogInformation, "checker", msgbuf.str());
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
stringstream msgbuf;
|
2012-06-22 12:05:25 +02:00
|
|
|
msgbuf << "Pending services: " << m_PendingServices.size() << "; Idle services: " << m_Services.size();
|
2012-06-22 07:24:50 +02:00
|
|
|
Application::Log(LogInformation, "checker", msgbuf.str());
|
|
|
|
}
|
2012-06-17 20:35:56 +02:00
|
|
|
}
|
|
|
|
|
2012-06-16 03:42:54 +02:00
|
|
|
void CheckerComponent::AssignServiceRequestHandler(const Endpoint::Ptr& sender, const RequestMessage& request)
|
2012-06-14 11:23:25 +02:00
|
|
|
{
|
2012-06-14 16:09:04 +02:00
|
|
|
MessagePart params;
|
2012-06-16 03:42:54 +02:00
|
|
|
if (!request.GetParams(¶ms))
|
2012-06-15 19:32:41 +02:00
|
|
|
return;
|
2012-06-14 16:09:04 +02:00
|
|
|
|
|
|
|
MessagePart serviceMsg;
|
|
|
|
if (!params.GetProperty("service", &serviceMsg))
|
2012-06-15 19:32:41 +02:00
|
|
|
return;
|
2012-06-14 16:09:04 +02:00
|
|
|
|
2012-06-15 19:32:41 +02:00
|
|
|
ConfigObject::Ptr object = boost::make_shared<ConfigObject>(serviceMsg.GetDictionary());
|
2012-06-14 16:09:04 +02:00
|
|
|
Service service(object);
|
|
|
|
m_Services.push(service);
|
|
|
|
|
2012-06-20 10:46:18 +02:00
|
|
|
Application::Log(LogDebug, "checker", "Accepted delegation for service '" + service.GetName() + "'");
|
2012-06-14 16:09:04 +02:00
|
|
|
|
2012-06-15 19:32:41 +02:00
|
|
|
string id;
|
2012-06-16 03:42:54 +02:00
|
|
|
if (request.GetID(&id)) {
|
2012-06-15 19:32:41 +02:00
|
|
|
ResponseMessage rm;
|
|
|
|
rm.SetID(id);
|
|
|
|
|
|
|
|
MessagePart result;
|
|
|
|
rm.SetResult(result);
|
2012-06-27 18:43:34 +02:00
|
|
|
EndpointManager::GetInstance()->SendUnicastMessage(m_CheckerEndpoint, sender, rm);
|
2012-06-15 19:32:41 +02:00
|
|
|
}
|
2012-06-14 11:23:25 +02:00
|
|
|
}
|
|
|
|
|
2012-06-16 03:42:54 +02:00
|
|
|
void CheckerComponent::ClearServicesRequestHandler(const Endpoint::Ptr& sender, const RequestMessage& request)
|
2012-06-14 11:23:25 +02:00
|
|
|
{
|
2012-06-21 13:08:26 +02:00
|
|
|
Application::Log(LogInformation, "checker", "Clearing service delegations.");
|
2012-06-21 12:51:50 +02:00
|
|
|
|
|
|
|
/* clear the services lists */
|
2012-06-15 19:32:41 +02:00
|
|
|
m_Services = ServiceQueue();
|
2012-06-21 12:51:50 +02:00
|
|
|
m_PendingServices.clear();
|
|
|
|
|
|
|
|
/* TODO: clear checks we've already sent to the thread pool */
|
2012-06-14 11:23:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
EXPORT_COMPONENT(checker, CheckerComponent);
|