diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index cb815da73..9ec1f25a9 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -129,6 +129,14 @@ set(base_test_SOURCES $ ) +if (ICINGA2_WITH_CHECKER) + list(APPEND base_test_SOURCES + checker.cpp + checker-fixture.hpp + $ + ) +endif() + if(ICINGA2_UNITY_BUILD) mkunity_target(base test base_test_SOURCES) endif() diff --git a/test/base-testloggerfixture.hpp b/test/base-testloggerfixture.hpp index 4c71c4623..2bc96eb40 100644 --- a/test/base-testloggerfixture.hpp +++ b/test/base-testloggerfixture.hpp @@ -52,6 +52,27 @@ public: return ret; } + /** + * Counts the number of log entries that match the given regex pattern. + * + * This only counts existing log entries, it does not wait for new ones to arrive. + * + * @param pattern The regex pattern the log message needs to match + * + * @return The number of log entries that match the given pattern + */ + auto CountExpectedLogPattern(const std::string& pattern) + { + std::unique_lock lock(m_Mutex); + int count = 0; + for (const auto& logEntry : m_LogEntries) { + if (boost::regex_match(logEntry.Message.GetData(), boost::regex(pattern))) { + ++count; + } + } + return count; + } + private: void ProcessLogEntry(const LogEntry& entry) override { diff --git a/test/checker-fixture.hpp b/test/checker-fixture.hpp new file mode 100644 index 000000000..f2635ed9f --- /dev/null +++ b/test/checker-fixture.hpp @@ -0,0 +1,179 @@ +/* Icinga 2 | (c) 2025 Icinga GmbH | GPLv2+ */ + +#pragma once + +#include "base/atomic.hpp" +#include "checker/checkercomponent.hpp" +#include "config/configcompiler.hpp" +#include "icinga/checkcommand.hpp" +#include "test/base-testloggerfixture.hpp" +#include "test/icingaapplication-fixture.hpp" + +namespace icinga { +/** + * Test fixture for tests involving the @c CheckerComponent. + * + * This fixture sets up a CheckerComponent instance and provides utility functions to register + * checkable objects. It is derived from @c TestLoggerFixture to capture log output during tests, + * so tests can verify that expected log messages are produced. The fixture also connects to the + * @c Checkable::OnNewCheckResult signal to count the number of check results produced during tests. + */ +struct CheckerFixture : TestLoggerFixture +{ + CheckerFixture() + { + checker = new CheckerComponent; + checker->SetResultTimerInterval(.4); // Speed up result timer for tests + checker->SetName("randomizer", true); + checker->Register(); + checker->OnConfigLoaded(); + checker->PreActivate(); + checker->Activate(); + + // Manually building and registering the command won't work here as we need a real callable + // function that produces cr and calls ProcessCheckResult on the checkable. So we use a small + // config snippet that imports the random and sleep check commands registered by the "methods-itl.cpp" + // file in the methods lib. + ConfigItem::RunWithActivationContext( + new Function{ + "checker-fixture", + [] { + std::unique_ptr expression = ConfigCompiler::CompileText( + "", + R"CONFIG( +object CheckCommand "random" { + import "random-check-command" +} + +object CheckCommand "sleep" { + import "sleep-check-command" +} +)CONFIG" + ); + BOOST_REQUIRE(expression); + ScriptFrame frame(true); + BOOST_CHECK_NO_THROW(expression->Evaluate(frame)); + } + } + ); + + Checkable::OnNewCheckResult.connect( + [this](const Checkable::Ptr&, const CheckResult::Ptr& cr, const MessageOrigin::Ptr&) { + ++resultCount; + BOOST_REQUIRE(cr); + BOOST_CHECK_EQUAL(0, cr->GetExitStatus()); + BOOST_CHECK(!cr->GetOutput().IsEmpty()); + } + ); + } + + ~CheckerFixture() + { + Checkable::OnNextCheckChanged.disconnect_all_slots(); + Checkable::OnNewCheckResult.disconnect_all_slots(); + checker->Deactivate(); + } + + /** + * Registers a fully configured set of checkable hosts that execute the "random" check command. + * + * Each host is configured with a random check command, check interval, and retry interval. + * If @c unreachable is true, each host is made unreachable by adding a dependency on a parent + * host that is in a critical state. This prevents the checker from executing checks for the + * child hosts. The check and retry intervals are kept low to allow for quick test execution, + * but they can be adjusted via the @c interval and @c retry parameters. + * + * @param count Number of checkable hosts to register. + * @param disableChecks If true, disables active checks for each host. + * @param unreachable If true, makes each host unreachable via a dependency. + */ + void RegisterCheckablesRandom(int count, bool disableChecks = false, bool unreachable = false) const + { + for (int i = 1; i <= count; ++i) { + RegisterCheckable("host-" + std::to_string(i), "random", "", disableChecks, unreachable); + } + } + + /** + * Registers a fully configured set of checkable hosts that execute the "sleep" command. + * + * Each host is configured with a sleep check command that sleeps for the specified duration. + * The check and retry intervals can be adjusted via the @c checkInterval and @c retryInterval + * member variables of the fixture. If @c unreachable is true, each host is made unreachable by + * adding a dependency on a parent host that is in a critical state. This prevents the checker + * from executing checks for the child hosts. + * + * @param count Number of checkable hosts to register. + * @param sleepTime Duration (in seconds) that the sleep command should sleep. Defaults to 1.0 second. + * @param disableChecks If true, disables active checks for each host. + * @param unreachable If true, makes each host unreachable via a dependency. + */ + void RegisterCheckablesSleep( + int count, + double sleepTime = 1.0, + bool disableChecks = false, + bool unreachable = false + ) const + { + for (int i = 1; i <= count; ++i) { + auto h = RegisterCheckable("host-" + std::to_string(i), "sleep", "", disableChecks, unreachable); + h->SetVars(new Dictionary{{"sleep_time", sleepTime}}); + } + } + + Host::Ptr RegisterCheckable( + std::string name, + std::string cmd, + std::string period, + bool disableChecks = false, + bool unreachable = false + ) const + { + Host::Ptr host = new Host; + host->SetName(std::move(name), true); + host->SetCheckCommandRaw(std::move(cmd), true); + host->SetCheckInterval(checkInterval, true); + host->SetRetryInterval(retryInterval, true); + host->SetHAMode(HARunEverywhere, true); // Disable HA for tests + host->SetEnableActiveChecks(!disableChecks, true); + host->SetCheckPeriodRaw(std::move(period), true); + host->Register(); + host->OnAllConfigLoaded(); + + if (unreachable) { + Host::Ptr parent = new Host; + parent->SetName(Utility::NewUniqueID(), true); + parent->SetStateRaw(ServiceCritical, true); + parent->SetStateType(StateTypeHard, true); + parent->SetLastCheckResult(new CheckResult, true); + parent->Register(); + + Dependency::Ptr dep = new Dependency; + dep->SetName(Utility::NewUniqueID(), true); + dep->SetStateFilter(StateFilterUp, true); + dep->SetDisableChecks(true, true); + dep->SetParent(parent); + dep->SetChild(host); + dep->Register(); + + host->AddDependency(dep); + } + + host->PreActivate(); + host->Activate(); + return host; + } + + /** + * resultCount tracks the number of check results produced by the checker. + * + * This is used in tests to verify that checks are actually being executed and results processed. + * It is incremented from within the OnNewCheckResult signal handler, thus must be atomic. + */ + Atomic resultCount{0}; + double checkInterval{.1}; // Interval in seconds between regular checks for each checkable. + double retryInterval{.1}; // Interval in seconds between retry checks for each checkable. + CheckerComponent::Ptr checker; +}; + +} // namespace icinga diff --git a/test/checker.cpp b/test/checker.cpp new file mode 100644 index 000000000..7f4c76757 --- /dev/null +++ b/test/checker.cpp @@ -0,0 +1,208 @@ +/* Icinga 2 | (c) 2025 Icinga GmbH | GPLv2+ */ + +#include "test/checker-fixture.hpp" +#include "base/scriptglobal.hpp" +#include "base/utility.hpp" +#include "icinga/host.hpp" +#include "icinga/dependency.hpp" +#include "icinga/legacytimeperiod.hpp" +#include + +using namespace icinga; + +BOOST_FIXTURE_TEST_SUITE(checker, CheckerFixture, *boost::unit_test::label("checker")) + +BOOST_AUTO_TEST_CASE(single_checkable) +{ + Checkable::OnNextCheckChanged.connect([](const Checkable::Ptr& checkable, const Value&) { + BOOST_CHECK_EQUAL(checkable->GetName(), "host-1"); + BOOST_CHECK_LE(checkable->GetNextCheck(), checkable->GetLastCheck() + .15); + }); + + RegisterCheckablesRandom(1); + Utility::Sleep(.4); + + BOOST_CHECK_EQUAL(4, testLogger->CountExpectedLogPattern("Executing check for 'host-1'")); + BOOST_CHECK_EQUAL(4, testLogger->CountExpectedLogPattern("Check finished for object 'host-1'")); + BOOST_CHECK_EQUAL(4, resultCount); +} + +BOOST_AUTO_TEST_CASE(multiple_checkables) +{ + Checkable::OnNextCheckChanged.connect([](const Checkable::Ptr& checkable, const Value&) { + BOOST_CHECK_LE(checkable->GetNextCheck(), checkable->GetLastCheck() + .15); + }); + + RegisterCheckablesRandom(16); + Utility::Sleep(.3); + BOOST_CHECK_MESSAGE(resultCount >= 47 && resultCount <= 50, "Expected between 47 and 50 results, got " << resultCount); +} + +BOOST_AUTO_TEST_CASE(disabled_checks) +{ + RegisterCheckablesRandom(4, true); + Utility::Sleep(.3); + + auto failedC = testLogger->CountExpectedLogPattern("Skipping check for host .*: active host checks are disabled"); + BOOST_CHECK_MESSAGE(11 <= failedC && failedC <= 13, "Expected between 11 and 13 dependency failure log messages, got " << failedC); + + auto rescheduleC = testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check."); + BOOST_CHECK_MESSAGE(1 <= rescheduleC && rescheduleC <= 13, "Expected between 11 and 13 reschedule log messages, got " << rescheduleC); + BOOST_CHECK_EQUAL(failedC, rescheduleC); + + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Check finished for object .*")); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Executing check for .*")); + BOOST_CHECK_EQUAL(0, resultCount); +} + +BOOST_AUTO_TEST_CASE(globally_disabled_checks) +{ + IcingaApplication::GetInstance()->SetEnableHostChecks(false); // Disable active host checks globally + + RegisterCheckablesRandom(4); + Utility::Sleep(.3); + + auto failedC = testLogger->CountExpectedLogPattern("Skipping check for host .*: active host checks are disabled"); + BOOST_CHECK_MESSAGE(11 <= failedC && failedC <= 13, "Expected between 11 and 13 dependency failure log messages, got " << failedC); + + auto rescheduleC = testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check."); + BOOST_CHECK_MESSAGE(1 <= rescheduleC && rescheduleC <= 13, "Expected between 11 and 13 reschedule log messages, got " << rescheduleC); + BOOST_CHECK_EQUAL(failedC, rescheduleC); + + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Check finished for object .*")); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Executing check for .*")); + BOOST_CHECK_EQUAL(0, resultCount); +} + +BOOST_AUTO_TEST_CASE(unreachable_checkable) +{ + // Create a dependency that makes the host unreachable (i.e. no checks should be executed). + // This must be done before activating the actual child checkable, otherwise the checker will + // immediately schedule a check before the dependency is in place. + RegisterCheckablesRandom(4, false, true); + Utility::Sleep(.3); + + auto failedC = testLogger->CountExpectedLogPattern("Skipping check for object .*: Dependency failed."); + BOOST_CHECK_MESSAGE(11 <= failedC && failedC <= 13, "Expected between 11 and 13 dependency failure log messages, got " << failedC); + + auto rescheduleC = testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check."); + BOOST_CHECK_MESSAGE(1 <= rescheduleC && rescheduleC <= 13, "Expected between 11 and 13 reschedule log messages, got " << rescheduleC); + BOOST_CHECK_EQUAL(failedC, rescheduleC); + + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Check finished for object .*")); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Executing check for .*")); + BOOST_CHECK_EQUAL(0, resultCount); +} + +BOOST_AUTO_TEST_CASE(never_in_check_period) +{ + TimePeriod::Ptr period = new TimePeriod; + period->SetName("never"); + period->SetUpdate(new Function("LegacyTimePeriod", LegacyTimePeriod::ScriptFunc, {"tp", "begin", "end"}), true); + period->Register(); + period->PreActivate(); + period->Activate(); + + Checkable::OnNextCheckChanged.connect([&period](const Checkable::Ptr& checkable, const Value&) { + // The checker should ignore the regular check interval and instead set the next check time based on the tp. + BOOST_CHECK_EQUAL(checkable->GetNextCheck(), period->GetValidEnd()); + }); + + // Register some checkables that are only checked during the "never" time period, which is never. + (void)RegisterCheckable("host-1", "random", "never"); + (void)RegisterCheckable("host-2", "random", "never"); + (void)RegisterCheckable("host-3", "sleep", "never"); + (void)RegisterCheckable("host-4", "sleep", "never"); + + Utility::Sleep(.3); + + // We expect that no checks are executed, and instead the checker reschedules the checks for the + // next valid end time of the "never" time period, which is always 24h from now. So, we should see + // 4 log messages about skipping the checks due to the time period, and nothing else. + BOOST_CHECK_EQUAL(4, testLogger->CountExpectedLogPattern("Skipping check for object .*, as not in check period 'never', until .*")); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check.")); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Executing check for .*")); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Check finished for object .*")); + BOOST_CHECK_EQUAL(0, resultCount); +} + +BOOST_AUTO_TEST_CASE(in_check_period) +{ + TimePeriod::Ptr period = new TimePeriod; + period->SetName("24x7"); + period->SetRanges( + new Dictionary{ + {"monday", "00:00-24:00"}, + {"tuesday", "00:00-24:00"}, + {"wednesday", "00:00-24:00"}, + {"thursday", "00:00-24:00"}, + {"friday", "00:00-24:00"}, + {"saturday", "00:00-24:00"}, + {"sunday", "00:00-24:00"} + }, + true + ); + period->SetUpdate(new Function("LegacyTimePeriod", LegacyTimePeriod::ScriptFunc, {"tp", "begin", "end"}), true); + period->Register(); + period->PreActivate(); + period->Activate(); + + Checkable::OnNextCheckChanged.connect([this](const Checkable::Ptr& checkable, const Value&) { + // We're using a 24x7 time period, so the checker should behave normally and set the + // next check time based on the regular check or retry interval. + BOOST_CHECK_LE(checkable->GetNextCheck(), checkable->GetLastCheck() + checkInterval + .5); + }); + + // Register some checkables that are only checked during the "always" time period, which is always. + (void)RegisterCheckable("host-1", "random", "always"); + (void)RegisterCheckable("host-2", "random", "always"); + (void)RegisterCheckable("host-3", "sleep", "always"); + (void)RegisterCheckable("host-4", "sleep", "always"); + + Utility::Sleep(.3); + + // We expect that checks are executed normally, and the checker sets the next check time based + // on the regular check interval. So, we should see multiple checks executed for each checkable. + BOOST_CHECK(ExpectLogPattern("Executing check for .*")); + BOOST_CHECK(ExpectLogPattern("Check finished for object .*")); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Skipping check for object .*: Dependency failed.")); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check.")); + BOOST_CHECK_MESSAGE(6 <= resultCount && resultCount <= 8, "Expected between 5 and 6 results, got " << resultCount); +} + +BOOST_AUTO_TEST_CASE(max_concurrent_checks) +{ + // Limit the number of concurrent checks to 4. + ScriptGlobal::Set("MaxConcurrentChecks", 4); + + // Register 16 checkables that each sleep for 4 seconds when executing their check. + // With a max concurrent check limit of 4, we should see that only 4 checks are executed + // at the same time, and the remaining 12 checks are queued until one of the running checks + // finishes (which will not happen within the short sleep time of this test). + RegisterCheckablesSleep(16, 4); + Utility::Sleep(.5); + + auto objects(ConfigType::GetObjectsByType()); + BOOST_CHECK_EQUAL(16, objects.size()); + + for (auto& h : objects) { + // Force a reschedule of the checks to see whether the checker does absolutely nothing + // when the max concurrent check limit is reached. Normally, this would force the checker + // to immediately pick up the checkable and execute its check, but since all 4 slots are + // already taken, the checker should just update its queue idx and do nothing else. + Checkable::OnRescheduleCheck(h, Utility::GetTime()); + } + Utility::Sleep(.5); + + // We expect that only 4 checks are started initially, and the other 12 checks should have + // never been run, since the sleep time for each check (4 seconds) is much longer than the + // total sleep time of this test (1 second). + BOOST_CHECK(ExpectLogPattern("Pending checkables: 4; Idle checkables: 12; Checks/s: .*")); + BOOST_CHECK_EQUAL(4, testLogger->CountExpectedLogPattern("Scheduling info for checkable .*: Object .*")); + BOOST_CHECK_EQUAL(4, testLogger->CountExpectedLogPattern("Executing check for .*")); + BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Check finished for object .*")); + BOOST_CHECK_EQUAL(4, Checkable::GetPendingChecks()); + BOOST_CHECK_EQUAL(0, resultCount); +} + +BOOST_AUTO_TEST_SUITE_END()