2012-06-14 13:21:40 +02:00
|
|
|
/******************************************************************************
|
|
|
|
* Icinga 2 *
|
|
|
|
* Copyright (C) 2012 Icinga Development Team (http://www.icinga.org/) *
|
|
|
|
* *
|
|
|
|
* This program is free software; you can redistribute it and/or *
|
|
|
|
* modify it under the terms of the GNU General Public License *
|
|
|
|
* as published by the Free Software Foundation; either version 2 *
|
|
|
|
* of the License, or (at your option) any later version. *
|
|
|
|
* *
|
|
|
|
* This program is distributed in the hope that it will be useful, *
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
|
|
|
* GNU General Public License for more details. *
|
|
|
|
* *
|
|
|
|
* You should have received a copy of the GNU General Public License *
|
|
|
|
* along with this program; if not, write to the Free Software Foundation *
|
|
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. *
|
|
|
|
******************************************************************************/
|
|
|
|
|
|
|
|
#include "i2-delegation.h"
|
2012-06-20 16:52:56 +02:00
|
|
|
#include <algorithm>
|
2012-06-14 13:21:40 +02:00
|
|
|
|
|
|
|
using namespace icinga;
|
|
|
|
|
2013-02-08 07:11:14 +01:00
|
|
|
EXPORT_COMPONENT(delegation, DelegationComponent);
|
|
|
|
|
2012-06-14 13:21:40 +02:00
|
|
|
void DelegationComponent::Start(void)
|
|
|
|
{
|
2012-06-15 19:32:41 +02:00
|
|
|
m_DelegationTimer = boost::make_shared<Timer>();
|
2013-02-13 09:55:39 +01:00
|
|
|
// TODO: implement a handler for config changes for the delegation_interval variable
|
|
|
|
m_DelegationTimer->SetInterval(GetDelegationInterval());
|
2012-06-15 19:32:41 +02:00
|
|
|
m_DelegationTimer->OnTimerExpired.connect(boost::bind(&DelegationComponent::DelegationTimerHandler, this));
|
2012-06-14 16:31:38 +02:00
|
|
|
m_DelegationTimer->Start();
|
2013-02-13 12:18:36 +01:00
|
|
|
m_DelegationTimer->Reschedule(Utility::GetTime() + 10);
|
2012-06-27 18:43:34 +02:00
|
|
|
}
|
|
|
|
|
2013-02-13 09:55:39 +01:00
|
|
|
double DelegationComponent::GetDelegationInterval(void) const
|
|
|
|
{
|
|
|
|
Value interval = GetConfig()->Get("delegation_interval");
|
|
|
|
if (interval.IsEmpty())
|
|
|
|
return 30;
|
|
|
|
else
|
|
|
|
return interval;
|
|
|
|
}
|
|
|
|
|
2012-06-27 18:43:34 +02:00
|
|
|
bool DelegationComponent::IsEndpointChecker(const Endpoint::Ptr& endpoint)
|
|
|
|
{
|
2012-08-14 10:53:04 +02:00
|
|
|
return (endpoint->HasSubscription("checker"));
|
2012-06-14 13:21:40 +02:00
|
|
|
}
|
|
|
|
|
2012-07-27 16:05:02 +02:00
|
|
|
vector<Endpoint::Ptr> DelegationComponent::GetCheckerCandidates(const Service::Ptr& service) const
|
2012-06-20 16:52:56 +02:00
|
|
|
{
|
|
|
|
vector<Endpoint::Ptr> candidates;
|
|
|
|
|
2012-09-03 10:28:14 +02:00
|
|
|
DynamicObject::Ptr object;
|
2012-12-04 08:42:24 +01:00
|
|
|
BOOST_FOREACH(tie(tuples::ignore, object), DynamicType::GetByName("Endpoint")->GetObjects()) {
|
2012-09-03 10:28:14 +02:00
|
|
|
Endpoint::Ptr endpoint = dynamic_pointer_cast<Endpoint>(object);
|
2012-06-21 00:10:10 +02:00
|
|
|
|
2012-10-12 15:45:33 +02:00
|
|
|
String myIdentity = EndpointManager::GetInstance()->GetIdentity();
|
|
|
|
|
|
|
|
/* ignore local-only endpoints (unless this is a local-only instance) */
|
|
|
|
if (endpoint->IsLocal() && !myIdentity.IsEmpty())
|
2012-10-12 10:33:11 +02:00
|
|
|
continue;
|
|
|
|
|
2012-06-22 07:24:50 +02:00
|
|
|
/* ignore disconnected endpoints */
|
2012-10-12 15:45:33 +02:00
|
|
|
if (!endpoint->IsConnected() && endpoint->GetName() != myIdentity)
|
2012-06-22 07:24:50 +02:00
|
|
|
continue;
|
|
|
|
|
|
|
|
/* ignore endpoints that aren't running the checker component */
|
2012-06-27 18:43:34 +02:00
|
|
|
if (!IsEndpointChecker(endpoint))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* ignore endpoints that aren't allowed to check this service */
|
2012-09-03 10:28:14 +02:00
|
|
|
if (!service->IsAllowedChecker(endpoint->GetName()))
|
2012-06-21 00:10:10 +02:00
|
|
|
continue;
|
|
|
|
|
|
|
|
candidates.push_back(endpoint);
|
|
|
|
}
|
2012-06-20 16:52:56 +02:00
|
|
|
|
|
|
|
return candidates;
|
|
|
|
}
|
|
|
|
|
2012-06-15 19:32:41 +02:00
|
|
|
void DelegationComponent::DelegationTimerHandler(void)
|
2012-06-14 16:31:38 +02:00
|
|
|
{
|
2012-06-20 16:52:56 +02:00
|
|
|
map<Endpoint::Ptr, int> histogram;
|
|
|
|
|
2012-09-03 10:28:14 +02:00
|
|
|
DynamicObject::Ptr object;
|
2012-12-04 08:42:24 +01:00
|
|
|
BOOST_FOREACH(tie(tuples::ignore, object), DynamicType::GetByName("Endpoint")->GetObjects()) {
|
2012-09-03 10:28:14 +02:00
|
|
|
Endpoint::Ptr endpoint = dynamic_pointer_cast<Endpoint>(object);
|
|
|
|
|
|
|
|
histogram[endpoint] = 0;
|
|
|
|
}
|
2012-06-20 16:52:56 +02:00
|
|
|
|
2012-07-27 16:05:02 +02:00
|
|
|
vector<Service::Ptr> services;
|
2012-06-20 16:52:56 +02:00
|
|
|
|
|
|
|
/* build "checker -> service count" histogram */
|
2012-12-04 08:42:24 +01:00
|
|
|
BOOST_FOREACH(tie(tuples::ignore, object), DynamicType::GetByName("Service")->GetObjects()) {
|
2012-07-27 16:05:02 +02:00
|
|
|
Service::Ptr service = dynamic_pointer_cast<Service>(object);
|
|
|
|
|
|
|
|
if (!service)
|
|
|
|
continue;
|
2012-06-14 16:31:38 +02:00
|
|
|
|
2012-06-20 16:52:56 +02:00
|
|
|
services.push_back(service);
|
|
|
|
|
2012-08-02 09:38:08 +02:00
|
|
|
String checker = service->GetChecker();
|
|
|
|
if (checker.IsEmpty())
|
2012-06-20 16:52:56 +02:00
|
|
|
continue;
|
|
|
|
|
2012-09-03 10:28:14 +02:00
|
|
|
if (!Endpoint::Exists(checker))
|
2012-06-20 16:52:56 +02:00
|
|
|
continue;
|
|
|
|
|
2012-09-03 10:28:14 +02:00
|
|
|
Endpoint::Ptr endpoint = Endpoint::GetByName(checker);
|
|
|
|
|
2012-06-20 16:52:56 +02:00
|
|
|
histogram[endpoint]++;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::random_shuffle(services.begin(), services.end());
|
|
|
|
|
2012-06-21 00:10:10 +02:00
|
|
|
int delegated = 0;
|
2012-06-20 16:52:56 +02:00
|
|
|
|
|
|
|
/* re-assign services */
|
2012-07-27 16:05:02 +02:00
|
|
|
BOOST_FOREACH(const Service::Ptr& service, services) {
|
2012-08-02 09:38:08 +02:00
|
|
|
String checker = service->GetChecker();
|
2012-06-20 16:52:56 +02:00
|
|
|
|
|
|
|
Endpoint::Ptr oldEndpoint;
|
2012-09-03 10:28:14 +02:00
|
|
|
if (Endpoint::Exists(checker))
|
|
|
|
oldEndpoint = Endpoint::GetByName(checker);
|
2012-06-20 16:52:56 +02:00
|
|
|
|
|
|
|
vector<Endpoint::Ptr> candidates = GetCheckerCandidates(service);
|
|
|
|
|
2012-06-21 12:51:50 +02:00
|
|
|
int avg_services = 0, overflow_tolerance = 0;
|
2012-06-20 16:52:56 +02:00
|
|
|
vector<Endpoint::Ptr>::iterator cit;
|
|
|
|
|
2012-06-21 12:51:50 +02:00
|
|
|
if (candidates.size() > 0) {
|
|
|
|
std::random_shuffle(candidates.begin(), candidates.end());
|
|
|
|
|
|
|
|
stringstream msgbuf;
|
2012-07-27 16:05:02 +02:00
|
|
|
msgbuf << "Service: " << service->GetName() << ", candidates: " << candidates.size();
|
2012-07-10 12:21:19 +02:00
|
|
|
Logger::Write(LogDebug, "delegation", msgbuf.str());
|
2012-06-21 12:51:50 +02:00
|
|
|
|
2012-07-16 22:00:50 +02:00
|
|
|
BOOST_FOREACH(const Endpoint::Ptr& candidate, candidates) {
|
|
|
|
avg_services += histogram[candidate];
|
|
|
|
}
|
2012-06-21 12:51:50 +02:00
|
|
|
|
|
|
|
avg_services /= candidates.size();
|
|
|
|
overflow_tolerance = candidates.size() * 2;
|
|
|
|
}
|
2012-06-20 16:52:56 +02:00
|
|
|
|
|
|
|
/* don't re-assign service if the checker is still valid
|
|
|
|
* and doesn't have too many services */
|
2012-06-22 08:30:36 +02:00
|
|
|
if (oldEndpoint && oldEndpoint->IsConnected() &&
|
|
|
|
find(candidates.begin(), candidates.end(), oldEndpoint) != candidates.end() &&
|
2012-06-20 16:52:56 +02:00
|
|
|
histogram[oldEndpoint] <= avg_services + overflow_tolerance)
|
2012-06-14 16:31:38 +02:00
|
|
|
continue;
|
|
|
|
|
2012-06-20 16:52:56 +02:00
|
|
|
/* clear the service's current checker */
|
2012-08-02 09:38:08 +02:00
|
|
|
if (!checker.IsEmpty()) {
|
2012-07-27 16:05:02 +02:00
|
|
|
service->SetChecker("");
|
2012-06-20 16:52:56 +02:00
|
|
|
|
|
|
|
if (oldEndpoint)
|
|
|
|
histogram[oldEndpoint]--;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* find a new checker for the service */
|
2012-07-16 22:00:50 +02:00
|
|
|
BOOST_FOREACH(const Endpoint::Ptr& candidate, candidates) {
|
2012-06-20 16:52:56 +02:00
|
|
|
/* does this checker already have too many services */
|
2012-07-16 22:00:50 +02:00
|
|
|
if (histogram[candidate] > avg_services)
|
2012-06-20 16:52:56 +02:00
|
|
|
continue;
|
|
|
|
|
2012-09-03 10:28:14 +02:00
|
|
|
service->SetChecker(candidate->GetName());
|
2012-07-16 22:00:50 +02:00
|
|
|
histogram[candidate]++;
|
2012-06-20 16:52:56 +02:00
|
|
|
|
2013-01-25 13:09:23 +01:00
|
|
|
/* reschedule the service; this avoids "check floods"
|
|
|
|
* when a lot of services are re-assigned that haven't
|
|
|
|
* been checked recently. */
|
|
|
|
service->UpdateNextCheck();
|
|
|
|
|
2012-06-20 16:52:56 +02:00
|
|
|
delegated++;
|
2012-06-21 13:12:16 +02:00
|
|
|
|
|
|
|
break;
|
2012-06-20 16:52:56 +02:00
|
|
|
}
|
|
|
|
|
2013-01-24 10:40:31 +01:00
|
|
|
if (candidates.size() == 0) {
|
2013-01-24 23:10:07 +01:00
|
|
|
if (service->GetState() != StateUncheckable && service->GetEnableActiveChecks()) {
|
2013-01-24 10:40:31 +01:00
|
|
|
Dictionary::Ptr cr = boost::make_shared<Dictionary>();
|
|
|
|
|
|
|
|
double now = Utility::GetTime();
|
|
|
|
cr->Set("schedule_start", now);
|
|
|
|
cr->Set("schedule_end", now);
|
|
|
|
cr->Set("execution_start", now);
|
|
|
|
cr->Set("execution_end", now);
|
|
|
|
|
|
|
|
cr->Set("state", StateUncheckable);
|
|
|
|
cr->Set("output", "No checker is available for this service.");
|
|
|
|
|
|
|
|
service->ProcessCheckResult(cr);
|
|
|
|
|
|
|
|
Logger::Write(LogWarning, "delegation", "Can't delegate service: " + service->GetName());
|
|
|
|
}
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(!service->GetChecker().IsEmpty());
|
2012-06-14 16:31:38 +02:00
|
|
|
}
|
2012-06-19 12:23:52 +02:00
|
|
|
|
2012-07-16 22:00:50 +02:00
|
|
|
Endpoint::Ptr endpoint;
|
|
|
|
int count;
|
|
|
|
BOOST_FOREACH(tie(endpoint, count), histogram) {
|
2012-06-22 12:22:36 +02:00
|
|
|
stringstream msgbuf;
|
2012-09-03 10:28:14 +02:00
|
|
|
msgbuf << "histogram: " << endpoint->GetName() << " - " << count;
|
2012-07-10 12:21:19 +02:00
|
|
|
Logger::Write(LogInformation, "delegation", msgbuf.str());
|
2012-06-22 12:22:36 +02:00
|
|
|
}
|
|
|
|
|
2012-06-20 10:46:18 +02:00
|
|
|
stringstream msgbuf;
|
2012-06-21 17:39:16 +02:00
|
|
|
msgbuf << "Updated delegations for " << delegated << " services";
|
2012-07-10 12:21:19 +02:00
|
|
|
Logger::Write(LogInformation, "delegation", msgbuf.str());
|
2012-06-14 13:21:40 +02:00
|
|
|
}
|