mirror of
https://github.com/Icinga/icinga2.git
synced 2025-09-26 02:58:43 +02:00
test: add basic checker scheduling test cases
This commit is contained in:
parent
ab8621f6be
commit
0bcee67751
@ -129,6 +129,14 @@ set(base_test_SOURCES
|
||||
$<TARGET_OBJECTS:methods>
|
||||
)
|
||||
|
||||
if (ICINGA2_WITH_CHECKER)
|
||||
list(APPEND base_test_SOURCES
|
||||
checker.cpp
|
||||
checker-fixture.hpp
|
||||
$<TARGET_OBJECTS:checker>
|
||||
)
|
||||
endif()
|
||||
|
||||
if(ICINGA2_UNITY_BUILD)
|
||||
mkunity_target(base test base_test_SOURCES)
|
||||
endif()
|
||||
|
@ -52,6 +52,27 @@ public:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Counts the number of log entries that match the given regex pattern.
|
||||
*
|
||||
* This only counts existing log entries, it does not wait for new ones to arrive.
|
||||
*
|
||||
* @param pattern The regex pattern the log message needs to match
|
||||
*
|
||||
* @return The number of log entries that match the given pattern
|
||||
*/
|
||||
auto CountExpectedLogPattern(const std::string& pattern)
|
||||
{
|
||||
std::unique_lock lock(m_Mutex);
|
||||
int count = 0;
|
||||
for (const auto& logEntry : m_LogEntries) {
|
||||
if (boost::regex_match(logEntry.Message.GetData(), boost::regex(pattern))) {
|
||||
++count;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
private:
|
||||
void ProcessLogEntry(const LogEntry& entry) override
|
||||
{
|
||||
|
179
test/checker-fixture.hpp
Normal file
179
test/checker-fixture.hpp
Normal file
@ -0,0 +1,179 @@
|
||||
/* Icinga 2 | (c) 2025 Icinga GmbH | GPLv2+ */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "base/atomic.hpp"
|
||||
#include "checker/checkercomponent.hpp"
|
||||
#include "config/configcompiler.hpp"
|
||||
#include "icinga/checkcommand.hpp"
|
||||
#include "test/base-testloggerfixture.hpp"
|
||||
#include "test/icingaapplication-fixture.hpp"
|
||||
|
||||
namespace icinga {
|
||||
/**
|
||||
* Test fixture for tests involving the @c CheckerComponent.
|
||||
*
|
||||
* This fixture sets up a CheckerComponent instance and provides utility functions to register
|
||||
* checkable objects. It is derived from @c TestLoggerFixture to capture log output during tests,
|
||||
* so tests can verify that expected log messages are produced. The fixture also connects to the
|
||||
* @c Checkable::OnNewCheckResult signal to count the number of check results produced during tests.
|
||||
*/
|
||||
struct CheckerFixture : TestLoggerFixture
|
||||
{
|
||||
CheckerFixture()
|
||||
{
|
||||
checker = new CheckerComponent;
|
||||
checker->SetResultTimerInterval(.4); // Speed up result timer for tests
|
||||
checker->SetName("randomizer", true);
|
||||
checker->Register();
|
||||
checker->OnConfigLoaded();
|
||||
checker->PreActivate();
|
||||
checker->Activate();
|
||||
|
||||
// Manually building and registering the command won't work here as we need a real callable
|
||||
// function that produces cr and calls ProcessCheckResult on the checkable. So we use a small
|
||||
// config snippet that imports the random and sleep check commands registered by the "methods-itl.cpp"
|
||||
// file in the methods lib.
|
||||
ConfigItem::RunWithActivationContext(
|
||||
new Function{
|
||||
"checker-fixture",
|
||||
[] {
|
||||
std::unique_ptr<Expression> expression = ConfigCompiler::CompileText(
|
||||
"<checker-fixture>",
|
||||
R"CONFIG(
|
||||
object CheckCommand "random" {
|
||||
import "random-check-command"
|
||||
}
|
||||
|
||||
object CheckCommand "sleep" {
|
||||
import "sleep-check-command"
|
||||
}
|
||||
)CONFIG"
|
||||
);
|
||||
BOOST_REQUIRE(expression);
|
||||
ScriptFrame frame(true);
|
||||
BOOST_CHECK_NO_THROW(expression->Evaluate(frame));
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
Checkable::OnNewCheckResult.connect(
|
||||
[this](const Checkable::Ptr&, const CheckResult::Ptr& cr, const MessageOrigin::Ptr&) {
|
||||
++resultCount;
|
||||
BOOST_REQUIRE(cr);
|
||||
BOOST_CHECK_EQUAL(0, cr->GetExitStatus());
|
||||
BOOST_CHECK(!cr->GetOutput().IsEmpty());
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
~CheckerFixture()
|
||||
{
|
||||
Checkable::OnNextCheckChanged.disconnect_all_slots();
|
||||
Checkable::OnNewCheckResult.disconnect_all_slots();
|
||||
checker->Deactivate();
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers a fully configured set of checkable hosts that execute the "random" check command.
|
||||
*
|
||||
* Each host is configured with a random check command, check interval, and retry interval.
|
||||
* If @c unreachable is true, each host is made unreachable by adding a dependency on a parent
|
||||
* host that is in a critical state. This prevents the checker from executing checks for the
|
||||
* child hosts. The check and retry intervals are kept low to allow for quick test execution,
|
||||
* but they can be adjusted via the @c interval and @c retry parameters.
|
||||
*
|
||||
* @param count Number of checkable hosts to register.
|
||||
* @param disableChecks If true, disables active checks for each host.
|
||||
* @param unreachable If true, makes each host unreachable via a dependency.
|
||||
*/
|
||||
void RegisterCheckablesRandom(int count, bool disableChecks = false, bool unreachable = false) const
|
||||
{
|
||||
for (int i = 1; i <= count; ++i) {
|
||||
RegisterCheckable("host-" + std::to_string(i), "random", "", disableChecks, unreachable);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers a fully configured set of checkable hosts that execute the "sleep" command.
|
||||
*
|
||||
* Each host is configured with a sleep check command that sleeps for the specified duration.
|
||||
* The check and retry intervals can be adjusted via the @c checkInterval and @c retryInterval
|
||||
* member variables of the fixture. If @c unreachable is true, each host is made unreachable by
|
||||
* adding a dependency on a parent host that is in a critical state. This prevents the checker
|
||||
* from executing checks for the child hosts.
|
||||
*
|
||||
* @param count Number of checkable hosts to register.
|
||||
* @param sleepTime Duration (in seconds) that the sleep command should sleep. Defaults to 1.0 second.
|
||||
* @param disableChecks If true, disables active checks for each host.
|
||||
* @param unreachable If true, makes each host unreachable via a dependency.
|
||||
*/
|
||||
void RegisterCheckablesSleep(
|
||||
int count,
|
||||
double sleepTime = 1.0,
|
||||
bool disableChecks = false,
|
||||
bool unreachable = false
|
||||
) const
|
||||
{
|
||||
for (int i = 1; i <= count; ++i) {
|
||||
auto h = RegisterCheckable("host-" + std::to_string(i), "sleep", "", disableChecks, unreachable);
|
||||
h->SetVars(new Dictionary{{"sleep_time", sleepTime}});
|
||||
}
|
||||
}
|
||||
|
||||
Host::Ptr RegisterCheckable(
|
||||
std::string name,
|
||||
std::string cmd,
|
||||
std::string period,
|
||||
bool disableChecks = false,
|
||||
bool unreachable = false
|
||||
) const
|
||||
{
|
||||
Host::Ptr host = new Host;
|
||||
host->SetName(std::move(name), true);
|
||||
host->SetCheckCommandRaw(std::move(cmd), true);
|
||||
host->SetCheckInterval(checkInterval, true);
|
||||
host->SetRetryInterval(retryInterval, true);
|
||||
host->SetHAMode(HARunEverywhere, true); // Disable HA for tests
|
||||
host->SetEnableActiveChecks(!disableChecks, true);
|
||||
host->SetCheckPeriodRaw(std::move(period), true);
|
||||
host->Register();
|
||||
host->OnAllConfigLoaded();
|
||||
|
||||
if (unreachable) {
|
||||
Host::Ptr parent = new Host;
|
||||
parent->SetName(Utility::NewUniqueID(), true);
|
||||
parent->SetStateRaw(ServiceCritical, true);
|
||||
parent->SetStateType(StateTypeHard, true);
|
||||
parent->SetLastCheckResult(new CheckResult, true);
|
||||
parent->Register();
|
||||
|
||||
Dependency::Ptr dep = new Dependency;
|
||||
dep->SetName(Utility::NewUniqueID(), true);
|
||||
dep->SetStateFilter(StateFilterUp, true);
|
||||
dep->SetDisableChecks(true, true);
|
||||
dep->SetParent(parent);
|
||||
dep->SetChild(host);
|
||||
dep->Register();
|
||||
|
||||
host->AddDependency(dep);
|
||||
}
|
||||
|
||||
host->PreActivate();
|
||||
host->Activate();
|
||||
return host;
|
||||
}
|
||||
|
||||
/**
|
||||
* resultCount tracks the number of check results produced by the checker.
|
||||
*
|
||||
* This is used in tests to verify that checks are actually being executed and results processed.
|
||||
* It is incremented from within the OnNewCheckResult signal handler, thus must be atomic.
|
||||
*/
|
||||
Atomic<int> resultCount{0};
|
||||
double checkInterval{.1}; // Interval in seconds between regular checks for each checkable.
|
||||
double retryInterval{.1}; // Interval in seconds between retry checks for each checkable.
|
||||
CheckerComponent::Ptr checker;
|
||||
};
|
||||
|
||||
} // namespace icinga
|
208
test/checker.cpp
Normal file
208
test/checker.cpp
Normal file
@ -0,0 +1,208 @@
|
||||
/* Icinga 2 | (c) 2025 Icinga GmbH | GPLv2+ */
|
||||
|
||||
#include "test/checker-fixture.hpp"
|
||||
#include "base/scriptglobal.hpp"
|
||||
#include "base/utility.hpp"
|
||||
#include "icinga/host.hpp"
|
||||
#include "icinga/dependency.hpp"
|
||||
#include "icinga/legacytimeperiod.hpp"
|
||||
#include <boost/test/unit_test.hpp>
|
||||
|
||||
using namespace icinga;
|
||||
|
||||
BOOST_FIXTURE_TEST_SUITE(checker, CheckerFixture, *boost::unit_test::label("checker"))
|
||||
|
||||
BOOST_AUTO_TEST_CASE(single_checkable)
|
||||
{
|
||||
Checkable::OnNextCheckChanged.connect([](const Checkable::Ptr& checkable, const Value&) {
|
||||
BOOST_CHECK_EQUAL(checkable->GetName(), "host-1");
|
||||
BOOST_CHECK_LE(checkable->GetNextCheck(), checkable->GetLastCheck() + .15);
|
||||
});
|
||||
|
||||
RegisterCheckablesRandom(1);
|
||||
Utility::Sleep(.4);
|
||||
|
||||
BOOST_CHECK_EQUAL(4, testLogger->CountExpectedLogPattern("Executing check for 'host-1'"));
|
||||
BOOST_CHECK_EQUAL(4, testLogger->CountExpectedLogPattern("Check finished for object 'host-1'"));
|
||||
BOOST_CHECK_EQUAL(4, resultCount);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(multiple_checkables)
|
||||
{
|
||||
Checkable::OnNextCheckChanged.connect([](const Checkable::Ptr& checkable, const Value&) {
|
||||
BOOST_CHECK_LE(checkable->GetNextCheck(), checkable->GetLastCheck() + .15);
|
||||
});
|
||||
|
||||
RegisterCheckablesRandom(16);
|
||||
Utility::Sleep(.3);
|
||||
BOOST_CHECK_MESSAGE(resultCount >= 47 && resultCount <= 50, "Expected between 47 and 50 results, got " << resultCount);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(disabled_checks)
|
||||
{
|
||||
RegisterCheckablesRandom(4, true);
|
||||
Utility::Sleep(.3);
|
||||
|
||||
auto failedC = testLogger->CountExpectedLogPattern("Skipping check for host .*: active host checks are disabled");
|
||||
BOOST_CHECK_MESSAGE(11 <= failedC && failedC <= 13, "Expected between 11 and 13 dependency failure log messages, got " << failedC);
|
||||
|
||||
auto rescheduleC = testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check.");
|
||||
BOOST_CHECK_MESSAGE(1 <= rescheduleC && rescheduleC <= 13, "Expected between 11 and 13 reschedule log messages, got " << rescheduleC);
|
||||
BOOST_CHECK_EQUAL(failedC, rescheduleC);
|
||||
|
||||
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Check finished for object .*"));
|
||||
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Executing check for .*"));
|
||||
BOOST_CHECK_EQUAL(0, resultCount);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(globally_disabled_checks)
|
||||
{
|
||||
IcingaApplication::GetInstance()->SetEnableHostChecks(false); // Disable active host checks globally
|
||||
|
||||
RegisterCheckablesRandom(4);
|
||||
Utility::Sleep(.3);
|
||||
|
||||
auto failedC = testLogger->CountExpectedLogPattern("Skipping check for host .*: active host checks are disabled");
|
||||
BOOST_CHECK_MESSAGE(11 <= failedC && failedC <= 13, "Expected between 11 and 13 dependency failure log messages, got " << failedC);
|
||||
|
||||
auto rescheduleC = testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check.");
|
||||
BOOST_CHECK_MESSAGE(1 <= rescheduleC && rescheduleC <= 13, "Expected between 11 and 13 reschedule log messages, got " << rescheduleC);
|
||||
BOOST_CHECK_EQUAL(failedC, rescheduleC);
|
||||
|
||||
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Check finished for object .*"));
|
||||
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Executing check for .*"));
|
||||
BOOST_CHECK_EQUAL(0, resultCount);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(unreachable_checkable)
|
||||
{
|
||||
// Create a dependency that makes the host unreachable (i.e. no checks should be executed).
|
||||
// This must be done before activating the actual child checkable, otherwise the checker will
|
||||
// immediately schedule a check before the dependency is in place.
|
||||
RegisterCheckablesRandom(4, false, true);
|
||||
Utility::Sleep(.3);
|
||||
|
||||
auto failedC = testLogger->CountExpectedLogPattern("Skipping check for object .*: Dependency failed.");
|
||||
BOOST_CHECK_MESSAGE(11 <= failedC && failedC <= 13, "Expected between 11 and 13 dependency failure log messages, got " << failedC);
|
||||
|
||||
auto rescheduleC = testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check.");
|
||||
BOOST_CHECK_MESSAGE(1 <= rescheduleC && rescheduleC <= 13, "Expected between 11 and 13 reschedule log messages, got " << rescheduleC);
|
||||
BOOST_CHECK_EQUAL(failedC, rescheduleC);
|
||||
|
||||
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Check finished for object .*"));
|
||||
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Executing check for .*"));
|
||||
BOOST_CHECK_EQUAL(0, resultCount);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(never_in_check_period)
|
||||
{
|
||||
TimePeriod::Ptr period = new TimePeriod;
|
||||
period->SetName("never");
|
||||
period->SetUpdate(new Function("LegacyTimePeriod", LegacyTimePeriod::ScriptFunc, {"tp", "begin", "end"}), true);
|
||||
period->Register();
|
||||
period->PreActivate();
|
||||
period->Activate();
|
||||
|
||||
Checkable::OnNextCheckChanged.connect([&period](const Checkable::Ptr& checkable, const Value&) {
|
||||
// The checker should ignore the regular check interval and instead set the next check time based on the tp.
|
||||
BOOST_CHECK_EQUAL(checkable->GetNextCheck(), period->GetValidEnd());
|
||||
});
|
||||
|
||||
// Register some checkables that are only checked during the "never" time period, which is never.
|
||||
(void)RegisterCheckable("host-1", "random", "never");
|
||||
(void)RegisterCheckable("host-2", "random", "never");
|
||||
(void)RegisterCheckable("host-3", "sleep", "never");
|
||||
(void)RegisterCheckable("host-4", "sleep", "never");
|
||||
|
||||
Utility::Sleep(.3);
|
||||
|
||||
// We expect that no checks are executed, and instead the checker reschedules the checks for the
|
||||
// next valid end time of the "never" time period, which is always 24h from now. So, we should see
|
||||
// 4 log messages about skipping the checks due to the time period, and nothing else.
|
||||
BOOST_CHECK_EQUAL(4, testLogger->CountExpectedLogPattern("Skipping check for object .*, as not in check period 'never', until .*"));
|
||||
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check."));
|
||||
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Executing check for .*"));
|
||||
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Check finished for object .*"));
|
||||
BOOST_CHECK_EQUAL(0, resultCount);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(in_check_period)
|
||||
{
|
||||
TimePeriod::Ptr period = new TimePeriod;
|
||||
period->SetName("24x7");
|
||||
period->SetRanges(
|
||||
new Dictionary{
|
||||
{"monday", "00:00-24:00"},
|
||||
{"tuesday", "00:00-24:00"},
|
||||
{"wednesday", "00:00-24:00"},
|
||||
{"thursday", "00:00-24:00"},
|
||||
{"friday", "00:00-24:00"},
|
||||
{"saturday", "00:00-24:00"},
|
||||
{"sunday", "00:00-24:00"}
|
||||
},
|
||||
true
|
||||
);
|
||||
period->SetUpdate(new Function("LegacyTimePeriod", LegacyTimePeriod::ScriptFunc, {"tp", "begin", "end"}), true);
|
||||
period->Register();
|
||||
period->PreActivate();
|
||||
period->Activate();
|
||||
|
||||
Checkable::OnNextCheckChanged.connect([this](const Checkable::Ptr& checkable, const Value&) {
|
||||
// We're using a 24x7 time period, so the checker should behave normally and set the
|
||||
// next check time based on the regular check or retry interval.
|
||||
BOOST_CHECK_LE(checkable->GetNextCheck(), checkable->GetLastCheck() + checkInterval + .5);
|
||||
});
|
||||
|
||||
// Register some checkables that are only checked during the "always" time period, which is always.
|
||||
(void)RegisterCheckable("host-1", "random", "always");
|
||||
(void)RegisterCheckable("host-2", "random", "always");
|
||||
(void)RegisterCheckable("host-3", "sleep", "always");
|
||||
(void)RegisterCheckable("host-4", "sleep", "always");
|
||||
|
||||
Utility::Sleep(.3);
|
||||
|
||||
// We expect that checks are executed normally, and the checker sets the next check time based
|
||||
// on the regular check interval. So, we should see multiple checks executed for each checkable.
|
||||
BOOST_CHECK(ExpectLogPattern("Executing check for .*"));
|
||||
BOOST_CHECK(ExpectLogPattern("Check finished for object .*"));
|
||||
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Skipping check for object .*: Dependency failed."));
|
||||
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Checks for checkable .* are disabled. Rescheduling check."));
|
||||
BOOST_CHECK_MESSAGE(6 <= resultCount && resultCount <= 8, "Expected between 5 and 6 results, got " << resultCount);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(max_concurrent_checks)
|
||||
{
|
||||
// Limit the number of concurrent checks to 4.
|
||||
ScriptGlobal::Set("MaxConcurrentChecks", 4);
|
||||
|
||||
// Register 16 checkables that each sleep for 4 seconds when executing their check.
|
||||
// With a max concurrent check limit of 4, we should see that only 4 checks are executed
|
||||
// at the same time, and the remaining 12 checks are queued until one of the running checks
|
||||
// finishes (which will not happen within the short sleep time of this test).
|
||||
RegisterCheckablesSleep(16, 4);
|
||||
Utility::Sleep(.5);
|
||||
|
||||
auto objects(ConfigType::GetObjectsByType<Host>());
|
||||
BOOST_CHECK_EQUAL(16, objects.size());
|
||||
|
||||
for (auto& h : objects) {
|
||||
// Force a reschedule of the checks to see whether the checker does absolutely nothing
|
||||
// when the max concurrent check limit is reached. Normally, this would force the checker
|
||||
// to immediately pick up the checkable and execute its check, but since all 4 slots are
|
||||
// already taken, the checker should just update its queue idx and do nothing else.
|
||||
Checkable::OnRescheduleCheck(h, Utility::GetTime());
|
||||
}
|
||||
Utility::Sleep(.5);
|
||||
|
||||
// We expect that only 4 checks are started initially, and the other 12 checks should have
|
||||
// never been run, since the sleep time for each check (4 seconds) is much longer than the
|
||||
// total sleep time of this test (1 second).
|
||||
BOOST_CHECK(ExpectLogPattern("Pending checkables: 4; Idle checkables: 12; Checks/s: .*"));
|
||||
BOOST_CHECK_EQUAL(4, testLogger->CountExpectedLogPattern("Scheduling info for checkable .*: Object .*"));
|
||||
BOOST_CHECK_EQUAL(4, testLogger->CountExpectedLogPattern("Executing check for .*"));
|
||||
BOOST_CHECK_EQUAL(0, testLogger->CountExpectedLogPattern("Check finished for object .*"));
|
||||
BOOST_CHECK_EQUAL(4, Checkable::GetPendingChecks());
|
||||
BOOST_CHECK_EQUAL(0, resultCount);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
Loading…
x
Reference in New Issue
Block a user