From 5f3b7d2337d68d9b0340902d172d863deb0775d1 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Thu, 25 Sep 2025 09:53:47 +0200 Subject: [PATCH] test: add basic checker scheduling test cases --- test/CMakeLists.txt | 9 + test/base-testloggerfixture.hpp | 26 +++ test/checker-fixture.cpp | 201 +++++++++++++++++++++ test/checker-fixture.hpp | 127 +++++++++++++ test/checker.cpp | 311 ++++++++++++++++++++++++++++++++ 5 files changed, 674 insertions(+) create mode 100644 test/checker-fixture.cpp create mode 100644 test/checker-fixture.hpp create mode 100644 test/checker.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index cb815da73..2d93afc56 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -129,6 +129,15 @@ set(base_test_SOURCES $ ) +if (ICINGA2_WITH_CHECKER) + list(APPEND base_test_SOURCES + checker.cpp + checker-fixture.cpp + checker-fixture.hpp + $ + ) +endif() + if(ICINGA2_UNITY_BUILD) mkunity_target(base test base_test_SOURCES) endif() diff --git a/test/base-testloggerfixture.hpp b/test/base-testloggerfixture.hpp index c5a4f63e2..a3571f8f9 100644 --- a/test/base-testloggerfixture.hpp +++ b/test/base-testloggerfixture.hpp @@ -52,6 +52,27 @@ public: return ret; } + /** + * Counts the number of log entries that match the given regex pattern. + * + * This only counts existing log entries, it does not wait for new ones to arrive. + * + * @param pattern The regex pattern the log message needs to match + * + * @return The number of log entries that match the given pattern + */ + auto CountExpectedLogPattern(const std::string& pattern) + { + std::lock_guard lock(m_Mutex); + int count = 0; + for (const auto& logEntry : m_LogEntries) { + if (boost::regex_match(logEntry.Message.GetData(), boost::regex(pattern))) { + ++count; + } + } + return count; + } + private: void ProcessLogEntry(const LogEntry& entry) override { @@ -93,6 +114,11 @@ struct TestLoggerFixture } ~TestLoggerFixture() + { + DeactivateLogger(); + } + + void DeactivateLogger() const { testLogger->SetActive(false); testLogger->Deactivate(true); diff --git a/test/checker-fixture.cpp b/test/checker-fixture.cpp new file mode 100644 index 000000000..7e5de60f5 --- /dev/null +++ b/test/checker-fixture.cpp @@ -0,0 +1,201 @@ +/* Icinga 2 | (c) 2025 Icinga GmbH | GPLv2+ */ + +#include "base/utility.hpp" +#include "config/configcompiler.hpp" +#include "remote/apilistener.hpp" +#include "test/checker-fixture.hpp" +#include "test/icingaapplication-fixture.hpp" + +using namespace icinga; + +CheckerFixture::CheckerFixture() +{ + checker = new CheckerComponent; + checker->SetResultTimerInterval(.4); // Speed up result timer for tests + checker->SetName("randomizer", true); + checker->Register(); + checker->OnConfigLoaded(); + checker->PreActivate(); + checker->Activate(); + + // Manually building and registering the command won't work here as we need a real callable + // function that produces cr and calls ProcessCheckResult on the checkable. So we use a small + // config snippet that imports the random and sleep check commands registered by the "methods-itl.cpp" + // file in the methods lib. + ConfigItem::RunWithActivationContext( + new Function{ + "checker-fixture", + [] { + std::unique_ptr expression = ConfigCompiler::CompileText( + "", + R"CONFIG( +object CheckCommand "random" { + import "random-check-command" +} + +object CheckCommand "sleep" { + import "sleep-check-command" +} +)CONFIG" + ); + BOOST_REQUIRE(expression); + ScriptFrame frame(true); + BOOST_CHECK_NO_THROW(expression->Evaluate(frame)); + } + } + ); + + Checkable::OnNewCheckResult.connect( + [this](const Checkable::Ptr&, const CheckResult::Ptr& cr, const MessageOrigin::Ptr&) { + ++resultCount; + lastResult.store(cr); + } + ); + // Track the next check times of the checkables, so we can verify that they are set to the expected + // value. The map is populated with the expected checkable names by the RegisterChecable() function. + // So, we can safely modify the map from within this signal handler without further locking, since there + // won't be any concurrent access to the same keys. + Checkable::OnNextCheckChanged.connect([this](const Checkable::Ptr& checkable, const Value&) { + assert(nextCheckTimes.find(checkable->GetName()) != nextCheckTimes.end()); + nextCheckTimes[checkable->GetName()] = checkable->GetNextCheck(); + }); +} + +CheckerFixture::~CheckerFixture() +{ + checker->Deactivate(); +} + +void CheckerFixture::RegisterCheckablesRandom(int count, bool disableChecks, bool unreachable) +{ + for (int i = 1; i <= count; ++i) { + RegisterCheckable("host-" + std::to_string(i), "random", "", "", disableChecks, unreachable); + } +} + +void CheckerFixture::RegisterCheckablesSleep(int count, double sleepTime, bool disableChecks, bool unreachable) +{ + for (int i = 1; i <= count; ++i) { + auto h = RegisterCheckable("host-" + std::to_string(i), "sleep", "", "", disableChecks, unreachable); + h->SetVars(new Dictionary{{"sleep_time", sleepTime}}); + } +} + +void CheckerFixture::RegisterRemoteChecks(int count, bool isConnected, bool isSyncingReplayLogs) +{ + RegisterEndpoint("remote-checker", isConnected, isSyncingReplayLogs); + for (int i = 1; i <= count; ++i) { + auto h = RegisterCheckable("host-"+std::to_string(i), "random", "", "remote-checker"); + Checkable::OnRescheduleCheck(h, Utility::GetTime()); // Force initial scheduling + } +} + +Host::Ptr CheckerFixture::RegisterCheckable( + std::string name, + std::string cmd, + std::string period, + std::string endpoint, + bool disableChecks, + bool unreachable +) +{ + Host::Ptr host = new Host; + host->SetName(std::move(name), true); + host->SetCheckCommandRaw(std::move(cmd), true); + host->SetCheckInterval(checkInterval, true); + host->SetRetryInterval(retryInterval, true); + host->SetHAMode(HARunEverywhere, true); // Disable HA for tests + host->SetEnableActiveChecks(!disableChecks, true); + host->SetCheckPeriodRaw(std::move(period), true); + host->SetZoneName(endpoint, true); + host->SetCommandEndpointRaw(std::move(endpoint), true); + host->SetCheckTimeout(0, true); + host->Register(); + host->OnAllConfigLoaded(); + + nextCheckTimes[host->GetName()] = 0.0; // Initialize next check time tracking + + if (unreachable) { + Host::Ptr parent = new Host; + parent->SetName(Utility::NewUniqueID(), true); + parent->SetStateRaw(ServiceCritical, true); + parent->SetStateType(StateTypeHard, true); + parent->SetLastCheckResult(new CheckResult, true); + parent->Register(); + + Dependency::Ptr dep = new Dependency; + dep->SetName(Utility::NewUniqueID(), true); + dep->SetStateFilter(StateFilterUp, true); + dep->SetDisableChecks(true, true); + dep->SetParent(parent); + dep->SetChild(host); + dep->Register(); + + host->AddDependency(dep); + } + + host->PreActivate(); + host->Activate(); + return host; +} + +void CheckerFixture::SleepFor(double seconds, bool deactivateLogger) const +{ + Utility::Sleep(seconds); + Checkable::OnNextCheckChanged.disconnect_all_slots(); + Checkable::OnNewCheckResult.disconnect_all_slots(); + if (deactivateLogger) { + DeactivateLogger(); + } +} + +Endpoint::Ptr CheckerFixture::RegisterEndpoint(std::string name, bool isConnected, bool isSyncingReplayLogs) +{ + auto makeEndpoint = [](const std::string& name, bool syncing) { + Endpoint::Ptr remote = new Endpoint; + remote->SetName(name, true); + if (syncing) { + remote->SetSyncing(true); + } + remote->Register(); + remote->PreActivate(); + return remote; + }; + + Endpoint::Ptr remote = makeEndpoint(name, isSyncingReplayLogs); + Endpoint::Ptr local = makeEndpoint("local-tester", false); + + Zone::Ptr zone = new Zone; + zone->SetName(remote->GetName(), true); + zone->SetEndpointsRaw(new Array{{remote->GetName(), local->GetName()}}, true); + zone->Register(); + zone->OnAllConfigLoaded(); + zone->PreActivate(); + zone->Activate(); + + ApiListener::Ptr listener = new ApiListener; + listener->SetIdentity(local->GetName(), true); + listener->SetName(local->GetName(), true); + listener->Register(); + try { + listener->OnAllConfigLoaded(); // Initialize the m_LocalEndpoint of the listener! + + // May throw due to various reasons, but we only care that the m_Instance singleton + // is set which it will be if no other ApiListener is registered yet. + listener->OnConfigLoaded(); + } catch (const std::exception& ex) { + BOOST_TEST_MESSAGE("Exception during ApiListener::OnConfigLoaded: " << DiagnosticInformation(ex)); + } + + if (isConnected) { + JsonRpcConnection::Ptr client = new JsonRpcConnection( + new StoppableWaitGroup, + "anonymous", + false, + nullptr, + RoleClient + ); + remote->AddClient(client); + } + return remote; +} diff --git a/test/checker-fixture.hpp b/test/checker-fixture.hpp new file mode 100644 index 000000000..963bcde39 --- /dev/null +++ b/test/checker-fixture.hpp @@ -0,0 +1,127 @@ +/* Icinga 2 | (c) 2025 Icinga GmbH | GPLv2+ */ + +#pragma once + +#include "base/atomic.hpp" +#include "checker/checkercomponent.hpp" +#include "icinga/checkcommand.hpp" +#include "remote/endpoint.hpp" +#include "test/base-testloggerfixture.hpp" + +namespace icinga { + +/** + * Test fixture for tests involving the @c CheckerComponent. + * + * This fixture sets up a CheckerComponent instance and provides utility functions to register + * checkable objects. It is derived from @c TestLoggerFixture to capture log output during tests, + * so tests can verify that expected log messages are produced. The fixture also connects to the + * @c Checkable::OnNewCheckResult signal to count the number of check results produced during tests. + */ +struct CheckerFixture : TestLoggerFixture +{ + CheckerFixture(); + + ~CheckerFixture(); + + /** + * Registers a fully configured set of checkable hosts that execute the "random" check command. + * + * Each host is configured with a random check command, check interval, and retry interval. + * If @c unreachable is true, each host is made unreachable by adding a dependency on a parent + * host that is in a critical state. This prevents the checker from executing checks for the + * child hosts. The check and retry intervals are kept low to allow for quick test execution, + * but they can be adjusted via the @c interval and @c retry parameters. + * + * @param count Number of checkable hosts to register. + * @param disableChecks If true, disables active checks for each host. + * @param unreachable If true, makes each host unreachable via a dependency. + */ + void RegisterCheckablesRandom(int count, bool disableChecks = false, bool unreachable = false); + + /** + * Registers a fully configured set of checkable hosts that execute the "sleep" command. + * + * Each host is configured with a sleep check command that sleeps for the specified duration. + * The check and retry intervals can be adjusted via the @c checkInterval and @c retryInterval + * member variables of the fixture. If @c unreachable is true, each host is made unreachable by + * adding a dependency on a parent host that is in a critical state. This prevents the checker + * from executing checks for the child hosts. + * + * @param count Number of checkable hosts to register. + * @param sleepTime Duration (in seconds) that the sleep command should sleep. Defaults to 1.0 second. + * @param disableChecks If true, disables active checks for each host. + * @param unreachable If true, makes each host unreachable via a dependency. + */ + void RegisterCheckablesSleep( + int count, + double sleepTime = 1.0, + bool disableChecks = false, + bool unreachable = false + ); + + /** + * Registers a remote endpoint and a set of checkable hosts assigned to that endpoint. + * + * The remote endpoint can be configured to appear connected or disconnected, and can also + * be set to be syncing replay as needed for tests involving remote checks. + * + * @param count Number of checkable hosts to register. + * @param isConnected If true, the remote endpoint is marked as connected. + * @param isSyncingReplayLogs If true, the remote endpoint is marked as syncing replay logs. + */ + void RegisterRemoteChecks(int count, bool isConnected = false, bool isSyncingReplayLogs = false); + + Host::Ptr RegisterCheckable( + std::string name, + std::string cmd, + std::string period = "", + std::string endpoint = "", + bool disableChecks = false, + bool unreachable = false + ); + + /** + * Sleeps for the specified number of seconds, then immediately disconnects all signal handlers + * connected to @c Checkable::OnNextCheckChanged and @c Checkable::OnNewCheckResult. + * + * This is useful in tests to allow some time for checks to be executed and results to be processed, + * while ensuring that no further signal handlers are called after the sleep period. This helps to avoid + * unexpected side effects in tests, since the checker continues to run till the fixture is destroyed. + * + * @param seconds Number of seconds to sleep. + * @param deactivateLogger If true, deactivates the test logger after sleeping to prevent further log capture. + */ + void SleepFor(double seconds, bool deactivateLogger = false) const; + + /** + * Registers a remote endpoint with the specified name and connection/syncing state and returns it. + * + * @param name Name of the endpoint to register. + * @param isConnected If true, the endpoint is marked as connected. + * @param isSyncingReplayLogs If true, the endpoint is marked as syncing replay logs. + * + * @return The registered endpoint instance. + */ + static Endpoint::Ptr RegisterEndpoint(std::string name, bool isConnected = false, bool isSyncingReplayLogs = false); + + /** + * resultCount tracks the number of check results produced by the checker. + * + * This is used in tests to verify that checks are actually being executed and results processed. + * It is incremented from within the OnNewCheckResult signal handler, thus must be atomic. + */ + Atomic resultCount{0}; + AtomicOrLocked lastResult; // Stores the last check result received, for inspection in tests. + /** + * nextCheckTimes tracks the next scheduled check time for each registered checkable host. + * This might not be used in all tests, but is available for tests that need to verify the exact + * next check timestamp set by the checker. + */ + std::map nextCheckTimes; + double checkInterval{.1}; // Interval in seconds between regular checks for each checkable. + double retryInterval{.1}; // Interval in seconds between retry checks for each checkable. + CheckerComponent::Ptr checker; +}; + +} // namespace icinga diff --git a/test/checker.cpp b/test/checker.cpp new file mode 100644 index 000000000..e0e0a0585 --- /dev/null +++ b/test/checker.cpp @@ -0,0 +1,311 @@ +/* Icinga 2 | (c) 2025 Icinga GmbH | GPLv2+ */ + +#include "test/checker-fixture.hpp" +#include "base/scriptglobal.hpp" +#include "icinga/host.hpp" +#include "icinga/dependency.hpp" +#include "icinga/legacytimeperiod.hpp" +#include + +using namespace icinga; + +BOOST_FIXTURE_TEST_SUITE(checker, CheckerFixture, *boost::unit_test::label("checker")) + +BOOST_AUTO_TEST_CASE(single_check) +{ + // For a single checkable, there shouldn't be any concurrent checks that trigger this event, + // so we can safely use boost assertion macros within the event handler. + Checkable::OnNextCheckChanged.connect([this](const Checkable::Ptr& checkable, const Value&) { + BOOST_CHECK_EQUAL(checkable->GetName(), "host-1"); + BOOST_CHECK_LE(checkable->GetNextCheck(), checkable->GetLastCheck() + checkInterval + .5); + }); + + RegisterCheckablesRandom(1); + SleepFor(.4, true); + + BOOST_CHECK_EQUAL(4, testLogger->CountExpectedLogPattern("Executing check for 'host-1'")); + BOOST_CHECK_EQUAL(4, testLogger->CountExpectedLogPattern("Check finished for object 'host-1'")); + BOOST_CHECK_EQUAL(4, resultCount); +} + +BOOST_AUTO_TEST_CASE(multiple_checks) +{ + Checkable::OnNextCheckChanged.connect([this](const Checkable::Ptr& checkable, const Value&) { + BOOST_CHECK_LE(checkable->GetNextCheck(), checkable->GetLastCheck() + checkInterval + .5); + }); + + RegisterCheckablesRandom(8); + SleepFor(.3, true); + + BOOST_CHECK(ExpectLogPattern("Executing check for .*")); + BOOST_CHECK(ExpectLogPattern("Check finished for object .*")); + auto executedC = testLogger->CountExpectedLogPattern("Executing check for .*"); + auto finishedC = testLogger->CountExpectedLogPattern("Check finished for object .*"); + BOOST_CHECK_EQUAL(executedC, finishedC); + // With 8 checkables and a check interval of 0.1s, we expect that each checkable is checked at least + // twice, but some of them possibly up to 3 times, depending on the timing and OS scheduling behavior. + BOOST_CHECK_MESSAGE(22 <= resultCount && resultCount <= 25, "got=" << resultCount); +} + +BOOST_AUTO_TEST_CASE(disabled_checks) +{ + RegisterCheckablesRandom(4, true); + SleepFor(.3, true); + + auto failedC = testLogger->CountExpectedLogPattern("Skipping check for host .*: active host checks are disabled"); + BOOST_CHECK_MESSAGE(10 <= failedC && failedC <= 13, "got=" << failedC); + + auto rescheduleC = testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check."); + BOOST_CHECK_MESSAGE(10 <= rescheduleC && rescheduleC <= 13, "got=" << rescheduleC); + BOOST_CHECK_EQUAL(failedC, rescheduleC); + + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Check finished for object .*")); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Executing check for .*")); + BOOST_CHECK_EQUAL(0, resultCount); +} + +BOOST_AUTO_TEST_CASE(globally_disabled_checks) +{ + IcingaApplication::GetInstance()->SetEnableHostChecks(false); // Disable active host checks globally + + RegisterCheckablesRandom(4); + SleepFor(.3, true); + + auto failedC = testLogger->CountExpectedLogPattern("Skipping check for host .*: active host checks are disabled"); + BOOST_CHECK_MESSAGE(10 <= failedC && failedC <= 13, "got=" << failedC); + + auto rescheduleC = testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check."); + BOOST_CHECK_MESSAGE(10 <= rescheduleC && rescheduleC <= 13, "got=" << rescheduleC); + BOOST_CHECK_EQUAL(failedC, rescheduleC); + + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Check finished for object .*")); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Executing check for .*")); + BOOST_CHECK_EQUAL(0, resultCount); +} + +BOOST_AUTO_TEST_CASE(unreachable_checkable) +{ + // Create a dependency that makes the host unreachable (i.e. no checks should be executed). + // This must be done before activating the actual child checkable, otherwise the checker will + // immediately schedule a check before the dependency is in place. + RegisterCheckablesRandom(4, false, true); + SleepFor(.3, true); + + auto failedC = testLogger->CountExpectedLogPattern("Skipping check for object .*: Dependency failed."); + BOOST_CHECK_MESSAGE(10 <= failedC && failedC <= 13, "got=" << failedC); + + auto rescheduleC = testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check."); + BOOST_CHECK_MESSAGE(10 <= rescheduleC && rescheduleC <= 13, "got=" << rescheduleC); + BOOST_CHECK_EQUAL(failedC, rescheduleC); + + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Check finished for object .*")); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Executing check for .*")); + BOOST_CHECK_EQUAL(0, resultCount); +} + +BOOST_AUTO_TEST_CASE(never_in_check_period) +{ + TimePeriod::Ptr period = new TimePeriod; + period->SetName("never"); + period->SetUpdate(new Function("LegacyTimePeriod", LegacyTimePeriod::ScriptFunc, {"tp", "begin", "end"}), true); + period->Register(); + period->PreActivate(); + period->Activate(); + + // Register some checkables that are only checked during the "never" time period, which is never. + (void)RegisterCheckable("host-1", "random", "never"); + (void)RegisterCheckable("host-2", "random", "never"); + (void)RegisterCheckable("host-3", "sleep", "never"); + (void)RegisterCheckable("host-4", "sleep", "never"); + + SleepFor(.3, true); + + for (auto& [host, nextCheck] : nextCheckTimes) { + BOOST_TEST_MESSAGE("Host " << std::quoted(host) << " -> next_check: " << std::fixed << std::setprecision(0) + << nextCheck << " expected: " << Convert::ToDouble(period->GetValidEnd())); + // The checker should ignore the regular check interval and instead set the next check time based on the tp. + BOOST_CHECK_EQUAL(nextCheck, Convert::ToDouble(period->GetValidEnd())); + } + + // We expect that no checks are executed, and instead the checker reschedules the checks for the + // next valid end time of the "never" time period, which is always 24h from now. So, we should see + // 4 log messages about skipping the checks due to the time period, and nothing else. + BOOST_CHECK_EQUAL(4, testLogger->CountExpectedLogPattern("Skipping check for object .*, as not in check period 'never', until .*")); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check.")); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Executing check for .*")); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Check finished for object .*")); + BOOST_CHECK_EQUAL(0, resultCount); +} + +BOOST_AUTO_TEST_CASE(in_check_period) +{ + TimePeriod::Ptr period = new TimePeriod; + period->SetName("24x7"); + period->SetRanges( + new Dictionary{ + {"monday", "00:00-24:00"}, + {"tuesday", "00:00-24:00"}, + {"wednesday", "00:00-24:00"}, + {"thursday", "00:00-24:00"}, + {"friday", "00:00-24:00"}, + {"saturday", "00:00-24:00"}, + {"sunday", "00:00-24:00"} + }, + true + ); + period->SetUpdate(new Function("LegacyTimePeriod", LegacyTimePeriod::ScriptFunc, {"tp", "begin", "end"}), true); + period->Register(); + period->PreActivate(); + period->Activate(); + + // Register some checkables that are only checked during the "always" time period, which is always. + (void)RegisterCheckable("host-1", "random", "always"); + (void)RegisterCheckable("host-2", "random", "always"); + (void)RegisterCheckable("host-3", "sleep", "always"); + (void)RegisterCheckable("host-4", "sleep", "always"); + + SleepFor(.3, true); + + // We expect that checks are executed normally, and the checker sets the next check time based + // on the regular check interval. So, we should see multiple checks executed for each checkable. + BOOST_CHECK(ExpectLogPattern("Executing check for .*")); + BOOST_CHECK(ExpectLogPattern("Check finished for object .*")); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Skipping check for object .*: Dependency failed.")); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check.")); + BOOST_CHECK_MESSAGE(6 <= resultCount && resultCount <= 8, "got=" << resultCount); +} + +BOOST_AUTO_TEST_CASE(max_concurrent_checks) +{ + // Limit the number of concurrent checks to 4. + ScriptGlobal::Set("MaxConcurrentChecks", 4); + + // Register 16 checkables that each sleep for 10 seconds when executing their check. + // With a max concurrent check limit of 4, we should see that only 4 checks are executed + // at the same time, and the remaining 12 checks are queued until one of the running checks + // finishes (which will not happen within the short sleep time of this test). + RegisterCheckablesSleep(16, 10); + Utility::Sleep(.5); + + auto objects(ConfigType::GetObjectsByType()); + BOOST_CHECK_EQUAL(16, objects.size()); + + for (auto& h : objects) { + // Force a reschedule of the checks to see whether the checker does absolutely nothing + // when the max concurrent check limit is reached. Normally, this would force the checker + // to immediately pick up the checkable and execute its check, but since all 4 slots are + // already taken, the checker should just update its queue idx and do nothing else. + Checkable::OnRescheduleCheck(h, Utility::GetTime()); + } + Utility::Sleep(.5); + + // We expect that only 4 checks are started initially, and the other 12 checks should have + // never been run, since the sleep time for each check (10 seconds) is much longer than the + // total sleep time of this test (1 second). + BOOST_CHECK(ExpectLogPattern("Pending checkables: 4; Idle checkables: 12; Checks/s: .*")); + BOOST_CHECK_EQUAL(4, testLogger->CountExpectedLogPattern("Scheduling info for checkable .*: Object .*")); + BOOST_CHECK_EQUAL(4, testLogger->CountExpectedLogPattern("Executing check for .*")); + BOOST_CHECK_EQUAL(4, Checkable::GetPendingChecks()); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Check finished for object .*")); // none finished yet + BOOST_CHECK_EQUAL(0, resultCount); +} + +BOOST_AUTO_TEST_CASE(skipped_remote_checks) +{ + // The check execution for remote checks is skipped if the remote endpoint is not connected, + // not syncing, and we are within the cold startup window (5min after application start). + Application::GetInstance()->SetStartTime(Utility::GetTime()); + + // Set the check and retry intervals to 60 seconds, since it's sufficient to + // just verify that the checks are skipped only once, and not repeatedly. + checkInterval = 60; + retryInterval = 60; + + RegisterRemoteChecks(8); + SleepFor(.3, true); + + for (auto& [host, nextCheck] : nextCheckTimes) { + BOOST_TEST_MESSAGE("Host " << std::quoted(host) << " -> next_check: " << std::fixed << std::setprecision(0) + << nextCheck << " roughly expected: " << Utility::GetTime() + checkInterval); + // Our algorithm for computing the next check time is not too precise, but it should roughly be within 5s of + // the expected next check time based on the check interval. See Checkable::UpdateNextCheck() for details. + BOOST_CHECK_GE(nextCheck, Utility::GetTime() + checkInterval-5); + BOOST_CHECK_LE(nextCheck, Utility::GetTime() + checkInterval); // but not more than the interval + } + + BOOST_CHECK_EQUAL(nullptr, lastResult.load()); // No check results should be received + BOOST_CHECK_EQUAL(8, testLogger->CountExpectedLogPattern("Executing check for .*")); + BOOST_CHECK_EQUAL(8, testLogger->CountExpectedLogPattern("Check finished for object .*")); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Skipping check for object .*: Dependency failed.")); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check.")); + BOOST_CHECK_EQUAL(0, resultCount); +} + +BOOST_AUTO_TEST_CASE(remote_checks_outside_cold_startup) +{ + Application::GetInstance()->SetStartTime(Utility::GetTime()-500); // Simulate being outside cold startup window + + checkInterval = 60; + retryInterval = 60; + + RegisterRemoteChecks(8); + SleepFor(.3, true); + + for (auto& [host, nextCheck] : nextCheckTimes) { + BOOST_TEST_MESSAGE("Host " << std::quoted(host) << " -> next_check: " << std::fixed << std::setprecision(0) + << nextCheck << " roughly expected: " << Utility::GetTime() + checkInterval); + // Our algorithm for computing the next check time is not too precise, but it should roughly be within 5s of + // the expected next check time based on the check interval. See Checkable::UpdateNextCheck() for details. + BOOST_CHECK_GE(nextCheck, Utility::GetTime() + checkInterval-5); + BOOST_CHECK_LE(nextCheck, Utility::GetTime() + checkInterval); // but not more than the interval + } + + BOOST_CHECK_EQUAL(8, testLogger->CountExpectedLogPattern("Executing check for .*")); + BOOST_CHECK_EQUAL(8, testLogger->CountExpectedLogPattern("Check finished for object .*")); + BOOST_CHECK_EQUAL(8, resultCount); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Skipping check for object .*: Dependency failed.")); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check.")); + + BOOST_REQUIRE(lastResult.load()); // We should now have a cr! + + auto cr = lastResult.load(); + BOOST_CHECK_EQUAL(ServiceUnknown, cr->GetState()); + String expectedOutput = "Remote Icinga instance 'remote-checker' is not connected to '" + Endpoint::GetLocalEndpoint()->GetName() + "'"; + BOOST_CHECK_EQUAL(expectedOutput, cr->GetOutput()); +} + +BOOST_AUTO_TEST_CASE(remote_checks_with_connected_endpoint) +{ + // Register a remote endpoint that is connected, so remote checks can be executed or actually simulate + // sending the check command to the remote endpoint. In this case, we shouldn't also receive any cr, + // but checker should reschedule the check at "now + check_timeout (which is 0) + 30s". + RegisterRemoteChecks(8, true); + SleepFor(.3, true); + + for (auto& [host, nextCheck] : nextCheckTimes) { + // In this case, there shouldn't be any OnNextCheckChanged events triggered, + // thus nextCheck should still be 0.0 as initialized by RegisterCheckable(). + BOOST_CHECK_EQUAL(0.0, nextCheck); + } + + auto hosts(ConfigType::GetObjectsByType()); + BOOST_CHECK_EQUAL(8, hosts.size()); + + for (auto& h : hosts) { + // Verify the next_check time is set to roughly now + 30s, since RegisterCheckable() + // initializes the check_timeout to 0. + BOOST_TEST_MESSAGE("Host " << std::quoted(h->GetName().GetData()) << " -> next_check: " << std::fixed + << std::setprecision(0) << h->GetNextCheck() << " roughly expected: " << Utility::GetTime() + 30); + BOOST_CHECK_GE(h->GetNextCheck(), Utility::GetTime() + 25); + BOOST_CHECK_LE(h->GetNextCheck(), Utility::GetTime() + 30); + } + + BOOST_CHECK_EQUAL(8, testLogger->CountExpectedLogPattern("Sending message 'event::ExecuteCommand' to 'remote-checker'")); + BOOST_CHECK_EQUAL(8, testLogger->CountExpectedLogPattern("Executing check for .*")); + BOOST_CHECK_EQUAL(8, testLogger->CountExpectedLogPattern("Check finished for object .*")); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Skipping check for object .*: Dependency failed.")); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check.")); + BOOST_CHECK_EQUAL(0, resultCount); // No check results should be received yet +} + +BOOST_AUTO_TEST_SUITE_END()