From 1a9c1591c0c13603b1dee6cfb514e6ec7c309450 Mon Sep 17 00:00:00 2001 From: Jean Flach Date: Tue, 10 Apr 2018 15:50:45 +0200 Subject: [PATCH 1/2] Fix check behavior on restart This patch changes the way checkresults are handled during a restart. 1. Check results coming in during a shutdown are ignored. 2. Upon start, checks which should have ran (next_check in the past), are re-scheduled within the first minute. This new behavior means there will be no more "Unknown - Terminated" checkresults during a restart and checks with high check_interval will be run earlier if they were already scheduled to run. The downside is that after Icinga2 was down for a while, there will be a lot of checks within the first minute. Our max concurrent check should take care of this though. --- lib/icinga/checkable-check.cpp | 4 +--- lib/icinga/checkable.cpp | 7 +++++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/icinga/checkable-check.cpp b/lib/icinga/checkable-check.cpp index 8bf1fed74..76b2cc37e 100644 --- a/lib/icinga/checkable-check.cpp +++ b/lib/icinga/checkable-check.cpp @@ -107,7 +107,7 @@ void Checkable::ProcessCheckResult(const CheckResult::Ptr& cr, const MessageOrig m_CheckRunning = false; } - if (!cr) + if (!cr || !IsActive()) return; double now = Utility::GetTime(); @@ -428,8 +428,6 @@ void Checkable::ExecuteCheck() double scheduled_start = GetNextCheck(); double before_check = Utility::GetTime(); - UpdateNextCheck(); - bool reachable = IsReachable(); { diff --git a/lib/icinga/checkable.cpp b/lib/icinga/checkable.cpp index 69af2c581..fe52e37ef 100644 --- a/lib/icinga/checkable.cpp +++ b/lib/icinga/checkable.cpp @@ -73,8 +73,11 @@ void Checkable::Start(bool runtimeCreated) { double now = Utility::GetTime(); - if (GetNextCheck() < now + 300) - UpdateNextCheck(); + if (GetNextCheck() < now + 60) { + double delta = std::min(GetCheckInterval(), 60.0); + delta *= (double)std::rand() / RAND_MAX; + SetNextCheck(now + delta); + } ObjectImpl::Start(runtimeCreated); } From b2092e449977486bb1704042801909339bdb280d Mon Sep 17 00:00:00 2001 From: Jean Flach Date: Tue, 10 Apr 2018 18:11:26 +0200 Subject: [PATCH 2/2] Set Hosts/Services in tests as active where needed --- test/icinga-checkable-flapping.cpp | 6 +++++- test/icinga-checkresult.cpp | 8 ++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/test/icinga-checkable-flapping.cpp b/test/icinga-checkable-flapping.cpp index 41646a0b1..256c7b517 100644 --- a/test/icinga-checkable-flapping.cpp +++ b/test/icinga-checkable-flapping.cpp @@ -56,7 +56,7 @@ static void LogFlapping(const Checkable::Ptr& obj) static void LogHostStatus(const Host::Ptr &host) { std::cout << "Current status: state: " << host->GetState() << " state_type: " << host->GetStateType() - << " check attempt: " << host->GetCheckAttempt() << "/" << host->GetMaxCheckAttempts() << std::endl; + << " check attempt: " << host->GetCheckAttempt() << "/" << host->GetMaxCheckAttempts() << " Active: " << host->IsActive() << std::endl; } #endif /* I2_DEBUG */ @@ -73,6 +73,7 @@ BOOST_AUTO_TEST_CASE(host_not_flapping) host->SetName("test"); host->SetEnableFlapping(true); host->SetMaxCheckAttempts(5); + host->SetActive(true); // Host otherwise is soft down host->SetState(HostUp); @@ -116,6 +117,7 @@ BOOST_AUTO_TEST_CASE(host_flapping) host->SetName("test"); host->SetEnableFlapping(true); host->SetMaxCheckAttempts(5); + host->SetActive(true); Utility::SetTime(0); @@ -150,6 +152,7 @@ BOOST_AUTO_TEST_CASE(host_flapping_recover) host->SetName("test"); host->SetEnableFlapping(true); host->SetMaxCheckAttempts(5); + host->SetActive(true); // Host otherwise is soft down host->SetState(HostUp); @@ -209,6 +212,7 @@ BOOST_AUTO_TEST_CASE(host_flapping_docs_example) host->SetName("test"); host->SetEnableFlapping(true); host->SetMaxCheckAttempts(5); + host->SetActive(true); // Host otherwise is soft down host->SetState(HostUp); diff --git a/test/icinga-checkresult.cpp b/test/icinga-checkresult.cpp index 9ea962642..fb024ce31 100644 --- a/test/icinga-checkresult.cpp +++ b/test/icinga-checkresult.cpp @@ -63,6 +63,7 @@ BOOST_AUTO_TEST_CASE(host_1attempt) boost::signals2::connection c = Checkable::OnNotificationsRequested.connect(std::bind(&NotificationHandler, _1, _2)); Host::Ptr host = new Host(); + host->SetActive(true); host->SetMaxCheckAttempts(1); host->Activate(); host->SetAuthority(true); @@ -111,6 +112,7 @@ BOOST_AUTO_TEST_CASE(host_2attempts) boost::signals2::connection c = Checkable::OnNotificationsRequested.connect(std::bind(&NotificationHandler, _1, _2)); Host::Ptr host = new Host(); + host->SetActive(true); host->SetMaxCheckAttempts(2); host->Activate(); host->SetAuthority(true); @@ -166,6 +168,7 @@ BOOST_AUTO_TEST_CASE(host_3attempts) boost::signals2::connection c = Checkable::OnNotificationsRequested.connect(std::bind(&NotificationHandler, _1, _2)); Host::Ptr host = new Host(); + host->SetActive(true); host->SetMaxCheckAttempts(3); host->Activate(); host->SetAuthority(true); @@ -228,6 +231,7 @@ BOOST_AUTO_TEST_CASE(service_1attempt) boost::signals2::connection c = Checkable::OnNotificationsRequested.connect(std::bind(&NotificationHandler, _1, _2)); Service::Ptr service = new Service(); + service->SetActive(true); service->SetMaxCheckAttempts(1); service->Activate(); service->SetAuthority(true); @@ -276,6 +280,7 @@ BOOST_AUTO_TEST_CASE(service_2attempts) boost::signals2::connection c = Checkable::OnNotificationsRequested.connect(std::bind(&NotificationHandler, _1, _2)); Service::Ptr service = new Service(); + service->SetActive(true); service->SetMaxCheckAttempts(2); service->Activate(); service->SetAuthority(true); @@ -331,6 +336,7 @@ BOOST_AUTO_TEST_CASE(service_3attempts) boost::signals2::connection c = Checkable::OnNotificationsRequested.connect(std::bind(&NotificationHandler, _1, _2)); Service::Ptr service = new Service(); + service->SetActive(true); service->SetMaxCheckAttempts(3); service->Activate(); service->SetAuthority(true); @@ -398,6 +404,7 @@ BOOST_AUTO_TEST_CASE(host_flapping_notification) int timeStepInterval = 60; Host::Ptr host = new Host(); + host->SetActive(true); host->Activate(); host->SetAuthority(true); host->SetStateRaw(ServiceOK); @@ -451,6 +458,7 @@ BOOST_AUTO_TEST_CASE(service_flapping_notification) int timeStepInterval = 60; Service::Ptr service = new Service(); + service->SetActive(true); service->Activate(); service->SetAuthority(true); service->SetStateRaw(ServiceOK);