test: add basic checker scheduling test cases

This commit is contained in:
Yonas Habteab 2025-09-25 09:53:47 +02:00
parent ab8621f6be
commit 0bcee67751
4 changed files with 416 additions and 0 deletions

View File

@ -129,6 +129,14 @@ set(base_test_SOURCES
$<TARGET_OBJECTS:methods> $<TARGET_OBJECTS:methods>
) )
if (ICINGA2_WITH_CHECKER)
list(APPEND base_test_SOURCES
checker.cpp
checker-fixture.hpp
$<TARGET_OBJECTS:checker>
)
endif()
if(ICINGA2_UNITY_BUILD) if(ICINGA2_UNITY_BUILD)
mkunity_target(base test base_test_SOURCES) mkunity_target(base test base_test_SOURCES)
endif() endif()

View File

@ -52,6 +52,27 @@ public:
return ret; return ret;
} }
/**
* Counts the number of log entries that match the given regex pattern.
*
* This only counts existing log entries, it does not wait for new ones to arrive.
*
* @param pattern The regex pattern the log message needs to match
*
* @return The number of log entries that match the given pattern
*/
auto CountExpectedLogPattern(const std::string& pattern)
{
std::unique_lock lock(m_Mutex);
int count = 0;
for (const auto& logEntry : m_LogEntries) {
if (boost::regex_match(logEntry.Message.GetData(), boost::regex(pattern))) {
++count;
}
}
return count;
}
private: private:
void ProcessLogEntry(const LogEntry& entry) override void ProcessLogEntry(const LogEntry& entry) override
{ {

179
test/checker-fixture.hpp Normal file
View File

@ -0,0 +1,179 @@
/* Icinga 2 | (c) 2025 Icinga GmbH | GPLv2+ */
#pragma once
#include "base/atomic.hpp"
#include "checker/checkercomponent.hpp"
#include "config/configcompiler.hpp"
#include "icinga/checkcommand.hpp"
#include "test/base-testloggerfixture.hpp"
#include "test/icingaapplication-fixture.hpp"
namespace icinga {
/**
* Test fixture for tests involving the @c CheckerComponent.
*
* This fixture sets up a CheckerComponent instance and provides utility functions to register
* checkable objects. It is derived from @c TestLoggerFixture to capture log output during tests,
* so tests can verify that expected log messages are produced. The fixture also connects to the
* @c Checkable::OnNewCheckResult signal to count the number of check results produced during tests.
*/
struct CheckerFixture : TestLoggerFixture
{
CheckerFixture()
{
checker = new CheckerComponent;
checker->SetResultTimerInterval(.4); // Speed up result timer for tests
checker->SetName("randomizer", true);
checker->Register();
checker->OnConfigLoaded();
checker->PreActivate();
checker->Activate();
// Manually building and registering the command won't work here as we need a real callable
// function that produces cr and calls ProcessCheckResult on the checkable. So we use a small
// config snippet that imports the random and sleep check commands registered by the "methods-itl.cpp"
// file in the methods lib.
ConfigItem::RunWithActivationContext(
new Function{
"checker-fixture",
[] {
std::unique_ptr<Expression> expression = ConfigCompiler::CompileText(
"<checker-fixture>",
R"CONFIG(
object CheckCommand "random" {
import "random-check-command"
}
object CheckCommand "sleep" {
import "sleep-check-command"
}
)CONFIG"
);
BOOST_REQUIRE(expression);
ScriptFrame frame(true);
BOOST_CHECK_NO_THROW(expression->Evaluate(frame));
}
}
);
Checkable::OnNewCheckResult.connect(
[this](const Checkable::Ptr&, const CheckResult::Ptr& cr, const MessageOrigin::Ptr&) {
++resultCount;
BOOST_REQUIRE(cr);
BOOST_CHECK_EQUAL(0, cr->GetExitStatus());
BOOST_CHECK(!cr->GetOutput().IsEmpty());
}
);
}
~CheckerFixture()
{
Checkable::OnNextCheckChanged.disconnect_all_slots();
Checkable::OnNewCheckResult.disconnect_all_slots();
checker->Deactivate();
}
/**
* Registers a fully configured set of checkable hosts that execute the "random" check command.
*
* Each host is configured with a random check command, check interval, and retry interval.
* If @c unreachable is true, each host is made unreachable by adding a dependency on a parent
* host that is in a critical state. This prevents the checker from executing checks for the
* child hosts. The check and retry intervals are kept low to allow for quick test execution,
* but they can be adjusted via the @c interval and @c retry parameters.
*
* @param count Number of checkable hosts to register.
* @param disableChecks If true, disables active checks for each host.
* @param unreachable If true, makes each host unreachable via a dependency.
*/
void RegisterCheckablesRandom(int count, bool disableChecks = false, bool unreachable = false) const
{
for (int i = 1; i <= count; ++i) {
RegisterCheckable("host-" + std::to_string(i), "random", "", disableChecks, unreachable);
}
}
/**
* Registers a fully configured set of checkable hosts that execute the "sleep" command.
*
* Each host is configured with a sleep check command that sleeps for the specified duration.
* The check and retry intervals can be adjusted via the @c checkInterval and @c retryInterval
* member variables of the fixture. If @c unreachable is true, each host is made unreachable by
* adding a dependency on a parent host that is in a critical state. This prevents the checker
* from executing checks for the child hosts.
*
* @param count Number of checkable hosts to register.
* @param sleepTime Duration (in seconds) that the sleep command should sleep. Defaults to 1.0 second.
* @param disableChecks If true, disables active checks for each host.
* @param unreachable If true, makes each host unreachable via a dependency.
*/
void RegisterCheckablesSleep(
int count,
double sleepTime = 1.0,
bool disableChecks = false,
bool unreachable = false
) const
{
for (int i = 1; i <= count; ++i) {
auto h = RegisterCheckable("host-" + std::to_string(i), "sleep", "", disableChecks, unreachable);
h->SetVars(new Dictionary{{"sleep_time", sleepTime}});
}
}
Host::Ptr RegisterCheckable(
std::string name,
std::string cmd,
std::string period,
bool disableChecks = false,
bool unreachable = false
) const
{
Host::Ptr host = new Host;
host->SetName(std::move(name), true);
host->SetCheckCommandRaw(std::move(cmd), true);
host->SetCheckInterval(checkInterval, true);
host->SetRetryInterval(retryInterval, true);
host->SetHAMode(HARunEverywhere, true); // Disable HA for tests
host->SetEnableActiveChecks(!disableChecks, true);
host->SetCheckPeriodRaw(std::move(period), true);
host->Register();
host->OnAllConfigLoaded();
if (unreachable) {
Host::Ptr parent = new Host;
parent->SetName(Utility::NewUniqueID(), true);
parent->SetStateRaw(ServiceCritical, true);
parent->SetStateType(StateTypeHard, true);
parent->SetLastCheckResult(new CheckResult, true);
parent->Register();
Dependency::Ptr dep = new Dependency;
dep->SetName(Utility::NewUniqueID(), true);
dep->SetStateFilter(StateFilterUp, true);
dep->SetDisableChecks(true, true);
dep->SetParent(parent);
dep->SetChild(host);
dep->Register();
host->AddDependency(dep);
}
host->PreActivate();
host->Activate();
return host;
}
/**
* resultCount tracks the number of check results produced by the checker.
*
* This is used in tests to verify that checks are actually being executed and results processed.
* It is incremented from within the OnNewCheckResult signal handler, thus must be atomic.
*/
Atomic<int> resultCount{0};
double checkInterval{.1}; // Interval in seconds between regular checks for each checkable.
double retryInterval{.1}; // Interval in seconds between retry checks for each checkable.
CheckerComponent::Ptr checker;
};
} // namespace icinga

208
test/checker.cpp Normal file
View File

@ -0,0 +1,208 @@
/* Icinga 2 | (c) 2025 Icinga GmbH | GPLv2+ */
#include "test/checker-fixture.hpp"
#include "base/scriptglobal.hpp"
#include "base/utility.hpp"
#include "icinga/host.hpp"
#include "icinga/dependency.hpp"
#include "icinga/legacytimeperiod.hpp"
#include <boost/test/unit_test.hpp>
using namespace icinga;
BOOST_FIXTURE_TEST_SUITE(checker, CheckerFixture, *boost::unit_test::label("checker"))
BOOST_AUTO_TEST_CASE(single_checkable)
{
Checkable::OnNextCheckChanged.connect([](const Checkable::Ptr& checkable, const Value&) {
BOOST_CHECK_EQUAL(checkable->GetName(), "host-1");
BOOST_CHECK_LE(checkable->GetNextCheck(), checkable->GetLastCheck() + .15);
});
RegisterCheckablesRandom(1);
Utility::Sleep(.4);
BOOST_CHECK_EQUAL(4, testLogger->CountExpectedLogPattern("Executing check for 'host-1'"));
BOOST_CHECK_EQUAL(4, testLogger->CountExpectedLogPattern("Check finished for object 'host-1'"));
BOOST_CHECK_EQUAL(4, resultCount);
}
BOOST_AUTO_TEST_CASE(multiple_checkables)
{
Checkable::OnNextCheckChanged.connect([](const Checkable::Ptr& checkable, const Value&) {
BOOST_CHECK_LE(checkable->GetNextCheck(), checkable->GetLastCheck() + .15);
});
RegisterCheckablesRandom(16);
Utility::Sleep(.3);
BOOST_CHECK_MESSAGE(resultCount >= 47 && resultCount <= 50, "Expected between 47 and 50 results, got " << resultCount);
}
BOOST_AUTO_TEST_CASE(disabled_checks)
{
RegisterCheckablesRandom(4, true);
Utility::Sleep(.3);
auto failedC = testLogger->CountExpectedLogPattern("Skipping check for host .*: active host checks are disabled");
BOOST_CHECK_MESSAGE(11 <= failedC && failedC <= 13, "Expected between 11 and 13 dependency failure log messages, got " << failedC);
auto rescheduleC = testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check.");
BOOST_CHECK_MESSAGE(1 <= rescheduleC && rescheduleC <= 13, "Expected between 11 and 13 reschedule log messages, got " << rescheduleC);
BOOST_CHECK_EQUAL(failedC, rescheduleC);
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Check finished for object .*"));
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Executing check for .*"));
BOOST_CHECK_EQUAL(0, resultCount);
}
BOOST_AUTO_TEST_CASE(globally_disabled_checks)
{
IcingaApplication::GetInstance()->SetEnableHostChecks(false); // Disable active host checks globally
RegisterCheckablesRandom(4);
Utility::Sleep(.3);
auto failedC = testLogger->CountExpectedLogPattern("Skipping check for host .*: active host checks are disabled");
BOOST_CHECK_MESSAGE(11 <= failedC && failedC <= 13, "Expected between 11 and 13 dependency failure log messages, got " << failedC);
auto rescheduleC = testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check.");
BOOST_CHECK_MESSAGE(1 <= rescheduleC && rescheduleC <= 13, "Expected between 11 and 13 reschedule log messages, got " << rescheduleC);
BOOST_CHECK_EQUAL(failedC, rescheduleC);
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Check finished for object .*"));
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Executing check for .*"));
BOOST_CHECK_EQUAL(0, resultCount);
}
BOOST_AUTO_TEST_CASE(unreachable_checkable)
{
// Create a dependency that makes the host unreachable (i.e. no checks should be executed).
// This must be done before activating the actual child checkable, otherwise the checker will
// immediately schedule a check before the dependency is in place.
RegisterCheckablesRandom(4, false, true);
Utility::Sleep(.3);
auto failedC = testLogger->CountExpectedLogPattern("Skipping check for object .*: Dependency failed.");
BOOST_CHECK_MESSAGE(11 <= failedC && failedC <= 13, "Expected between 11 and 13 dependency failure log messages, got " << failedC);
auto rescheduleC = testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check.");
BOOST_CHECK_MESSAGE(1 <= rescheduleC && rescheduleC <= 13, "Expected between 11 and 13 reschedule log messages, got " << rescheduleC);
BOOST_CHECK_EQUAL(failedC, rescheduleC);
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Check finished for object .*"));
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Executing check for .*"));
BOOST_CHECK_EQUAL(0, resultCount);
}
BOOST_AUTO_TEST_CASE(never_in_check_period)
{
TimePeriod::Ptr period = new TimePeriod;
period->SetName("never");
period->SetUpdate(new Function("LegacyTimePeriod", LegacyTimePeriod::ScriptFunc, {"tp", "begin", "end"}), true);
period->Register();
period->PreActivate();
period->Activate();
Checkable::OnNextCheckChanged.connect([&period](const Checkable::Ptr& checkable, const Value&) {
// The checker should ignore the regular check interval and instead set the next check time based on the tp.
BOOST_CHECK_EQUAL(checkable->GetNextCheck(), period->GetValidEnd());
});
// Register some checkables that are only checked during the "never" time period, which is never.
(void)RegisterCheckable("host-1", "random", "never");
(void)RegisterCheckable("host-2", "random", "never");
(void)RegisterCheckable("host-3", "sleep", "never");
(void)RegisterCheckable("host-4", "sleep", "never");
Utility::Sleep(.3);
// We expect that no checks are executed, and instead the checker reschedules the checks for the
// next valid end time of the "never" time period, which is always 24h from now. So, we should see
// 4 log messages about skipping the checks due to the time period, and nothing else.
BOOST_CHECK_EQUAL(4, testLogger->CountExpectedLogPattern("Skipping check for object .*, as not in check period 'never', until .*"));
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check."));
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Executing check for .*"));
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Check finished for object .*"));
BOOST_CHECK_EQUAL(0, resultCount);
}
BOOST_AUTO_TEST_CASE(in_check_period)
{
TimePeriod::Ptr period = new TimePeriod;
period->SetName("24x7");
period->SetRanges(
new Dictionary{
{"monday", "00:00-24:00"},
{"tuesday", "00:00-24:00"},
{"wednesday", "00:00-24:00"},
{"thursday", "00:00-24:00"},
{"friday", "00:00-24:00"},
{"saturday", "00:00-24:00"},
{"sunday", "00:00-24:00"}
},
true
);
period->SetUpdate(new Function("LegacyTimePeriod", LegacyTimePeriod::ScriptFunc, {"tp", "begin", "end"}), true);
period->Register();
period->PreActivate();
period->Activate();
Checkable::OnNextCheckChanged.connect([this](const Checkable::Ptr& checkable, const Value&) {
// We're using a 24x7 time period, so the checker should behave normally and set the
// next check time based on the regular check or retry interval.
BOOST_CHECK_LE(checkable->GetNextCheck(), checkable->GetLastCheck() + checkInterval + .5);
});
// Register some checkables that are only checked during the "always" time period, which is always.
(void)RegisterCheckable("host-1", "random", "always");
(void)RegisterCheckable("host-2", "random", "always");
(void)RegisterCheckable("host-3", "sleep", "always");
(void)RegisterCheckable("host-4", "sleep", "always");
Utility::Sleep(.3);
// We expect that checks are executed normally, and the checker sets the next check time based
// on the regular check interval. So, we should see multiple checks executed for each checkable.
BOOST_CHECK(ExpectLogPattern("Executing check for .*"));
BOOST_CHECK(ExpectLogPattern("Check finished for object .*"));
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Skipping check for object .*: Dependency failed."));
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check."));
BOOST_CHECK_MESSAGE(6 <= resultCount && resultCount <= 8, "Expected between 5 and 6 results, got " << resultCount);
}
BOOST_AUTO_TEST_CASE(max_concurrent_checks)
{
// Limit the number of concurrent checks to 4.
ScriptGlobal::Set("MaxConcurrentChecks", 4);
// Register 16 checkables that each sleep for 4 seconds when executing their check.
// With a max concurrent check limit of 4, we should see that only 4 checks are executed
// at the same time, and the remaining 12 checks are queued until one of the running checks
// finishes (which will not happen within the short sleep time of this test).
RegisterCheckablesSleep(16, 4);
Utility::Sleep(.5);
auto objects(ConfigType::GetObjectsByType<Host>());
BOOST_CHECK_EQUAL(16, objects.size());
for (auto& h : objects) {
// Force a reschedule of the checks to see whether the checker does absolutely nothing
// when the max concurrent check limit is reached. Normally, this would force the checker
// to immediately pick up the checkable and execute its check, but since all 4 slots are
// already taken, the checker should just update its queue idx and do nothing else.
Checkable::OnRescheduleCheck(h, Utility::GetTime());
}
Utility::Sleep(.5);
// We expect that only 4 checks are started initially, and the other 12 checks should have
// never been run, since the sleep time for each check (4 seconds) is much longer than the
// total sleep time of this test (1 second).
BOOST_CHECK(ExpectLogPattern("Pending checkables: 4; Idle checkables: 12; Checks/s: .*"));
BOOST_CHECK_EQUAL(4, testLogger->CountExpectedLogPattern("Scheduling info for checkable .*: Object .*"));
BOOST_CHECK_EQUAL(4, testLogger->CountExpectedLogPattern("Executing check for .*"));
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Check finished for object .*"));
BOOST_CHECK_EQUAL(4, Checkable::GetPendingChecks());
BOOST_CHECK_EQUAL(0, resultCount);
}
BOOST_AUTO_TEST_SUITE_END()