Merge pull request from Icinga/bugfix/don-t-wait-for-checks-on-reload-7888

CheckerComponent#Stop(): don't wait for checks
This commit is contained in:
Noah Hilverling 2020-07-29 16:35:09 +02:00 committed by GitHub
commit 2648a82de0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 113 additions and 24 deletions

View File

@ -1397,6 +1397,40 @@ Message updates will be dropped when:
* Checkable does not exist.
* Origin endpoint's zone is not allowed to access this checkable.
#### event::SetLastCheckStarted <a id="technical-concepts-json-rpc-messages-event-setlastcheckstarted"></a>
> Location: `clusterevents.cpp`
##### Message Body
Key | Value
----------|---------
jsonrpc | 2.0
method | event::SetLastCheckStarted
params | Dictionary
##### Params
Key | Type | Description
---------------------|-----------|------------------
host | String | Host name
service | String | Service name
last\_check\_started | Timestamp | Last check's start time as UNIX timestamp.
##### Functions
Event Sender: `Checkable::OnLastCheckStartedChanged`
Event Receiver: `LastCheckStartedChangedAPIHandler`
##### Permissions
The receiver will not process messages from not configured endpoints.
Message updates will be dropped when:
* Checkable does not exist.
* Origin endpoint's zone is not allowed to access this checkable.
#### event::SuppressedNotifications <a id="technical-concepts-json-rpc-messages-event-setsupressednotifications"></a>
> Location: `clusterevents.cpp`

View File

@ -74,30 +74,6 @@ void CheckerComponent::Stop(bool runtimeRemoved)
m_CV.notify_all();
}
double wait = 0.0;
while (Checkable::GetPendingChecks() > 0) {
Log(LogDebug, "CheckerComponent")
<< "Waiting for running checks (" << Checkable::GetPendingChecks()
<< ") to finish. Waited for " << wait << " seconds now.";
Utility::Sleep(0.1);
wait += 0.1;
/* Pick a timeout slightly shorther than the process reload timeout. */
double reloadTimeout = Application::GetReloadTimeout();
double waitMax = reloadTimeout - 30;
if (waitMax <= 0)
waitMax = 1;
if (wait > waitMax) {
Log(LogWarning, "CheckerComponent")
<< "Checks running too long for " << wait
<< " seconds, hard shutdown before reload timeout: " << reloadTimeout << ".";
break;
}
}
m_ResultTimer->Stop();
m_Thread.join();

View File

@ -514,6 +514,8 @@ void Checkable::ExecuteCheck()
double scheduled_start = GetNextCheck();
double before_check = Utility::GetTime();
SetLastCheckStarted(Utility::GetTime());
/* This calls SetNextCheck() which updates the CheckerComponent's idle/pending
* queues and ensures that checks are not fired multiple times. ProcessCheckResult()
* is called too late. See #6421.

View File

@ -63,6 +63,14 @@ void Checkable::Start(bool runtimeCreated)
{
double now = Utility::GetTime();
{
auto cr (GetLastCheckResult());
if (GetLastCheckStarted() > (cr ? cr->GetExecutionEnd() : 0.0)) {
SetNextCheck(GetLastCheckStarted());
}
}
if (GetNextCheck() < now + 60) {
double delta = std::min(GetCheckInterval(), 60.0);
delta *= (double)std::rand() / RAND_MAX;

View File

@ -90,6 +90,8 @@ abstract class Checkable : CustomVarObject
[config] String icon_image_alt;
[state] Timestamp next_check;
[state, no_user_view, no_user_modify] Timestamp last_check_started;
[state] int check_attempt {
default {{{ return 1; }}}
};

View File

@ -24,6 +24,7 @@ INITIALIZE_ONCE(&ClusterEvents::StaticInitialize);
REGISTER_APIFUNCTION(CheckResult, event, &ClusterEvents::CheckResultAPIHandler);
REGISTER_APIFUNCTION(SetNextCheck, event, &ClusterEvents::NextCheckChangedAPIHandler);
REGISTER_APIFUNCTION(SetLastCheckStarted, event, &ClusterEvents::LastCheckStartedChangedAPIHandler);
REGISTER_APIFUNCTION(SetSuppressedNotifications, event, &ClusterEvents::SuppressedNotificationsChangedAPIHandler);
REGISTER_APIFUNCTION(SetNextNotification, event, &ClusterEvents::NextNotificationChangedAPIHandler);
REGISTER_APIFUNCTION(SetForceNextCheck, event, &ClusterEvents::ForceNextCheckChangedAPIHandler);
@ -39,6 +40,7 @@ void ClusterEvents::StaticInitialize()
{
Checkable::OnNewCheckResult.connect(&ClusterEvents::CheckResultHandler);
Checkable::OnNextCheckChanged.connect(&ClusterEvents::NextCheckChangedHandler);
Checkable::OnLastCheckStartedChanged.connect(&ClusterEvents::LastCheckStartedChangedHandler);
Checkable::OnSuppressedNotificationsChanged.connect(&ClusterEvents::SuppressedNotificationsChangedHandler);
Notification::OnNextNotificationChanged.connect(&ClusterEvents::NextNotificationChangedHandler);
Checkable::OnForceNextCheckChanged.connect(&ClusterEvents::ForceNextCheckChangedHandler);
@ -234,6 +236,68 @@ Value ClusterEvents::NextCheckChangedAPIHandler(const MessageOrigin::Ptr& origin
return Empty;
}
void ClusterEvents::LastCheckStartedChangedHandler(const Checkable::Ptr& checkable, const MessageOrigin::Ptr& origin)
{
ApiListener::Ptr listener = ApiListener::GetInstance();
if (!listener)
return;
Host::Ptr host;
Service::Ptr service;
tie(host, service) = GetHostService(checkable);
Dictionary::Ptr params = new Dictionary();
params->Set("host", host->GetName());
if (service)
params->Set("service", service->GetShortName());
params->Set("last_check_started", checkable->GetLastCheckStarted());
Dictionary::Ptr message = new Dictionary();
message->Set("jsonrpc", "2.0");
message->Set("method", "event::SetLastCheckStarted");
message->Set("params", params);
listener->RelayMessage(origin, checkable, message, true);
}
Value ClusterEvents::LastCheckStartedChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params)
{
Endpoint::Ptr endpoint = origin->FromClient->GetEndpoint();
if (!endpoint) {
Log(LogNotice, "ClusterEvents")
<< "Discarding 'last_check_started changed' message from '" << origin->FromClient->GetIdentity() << "': Invalid endpoint origin (client not allowed).";
return Empty;
}
Host::Ptr host = Host::GetByName(params->Get("host"));
if (!host)
return Empty;
Checkable::Ptr checkable;
if (params->Contains("service"))
checkable = host->GetServiceByShortName(params->Get("service"));
else
checkable = host;
if (!checkable)
return Empty;
if (origin->FromZone && !origin->FromZone->CanAccessObject(checkable)) {
Log(LogNotice, "ClusterEvents")
<< "Discarding 'last_check_started changed' message for checkable '" << checkable->GetName()
<< "' from '" << origin->FromClient->GetIdentity() << "': Unauthorized access.";
return Empty;
}
checkable->SetLastCheckStarted(params->Get("last_check_started"), false, origin);
return Empty;
}
void ClusterEvents::SuppressedNotificationsChangedHandler(const Checkable::Ptr& checkable, const MessageOrigin::Ptr& origin)
{
ApiListener::Ptr listener = ApiListener::GetInstance();

View File

@ -26,6 +26,9 @@ public:
static void NextCheckChangedHandler(const Checkable::Ptr& checkable, const MessageOrigin::Ptr& origin);
static Value NextCheckChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params);
static void LastCheckStartedChangedHandler(const Checkable::Ptr& checkable, const MessageOrigin::Ptr& origin);
static Value LastCheckStartedChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params);
static void SuppressedNotificationsChangedHandler(const Checkable::Ptr& checkable, const MessageOrigin::Ptr& origin);
static Value SuppressedNotificationsChangedAPIHandler(const MessageOrigin::Ptr& origin, const Dictionary::Ptr& params);