From d6bc5a1a185c5a8f84c25f41f80ec04acd7c5505 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Fri, 12 Jul 2019 13:45:54 +0200 Subject: [PATCH 01/14] Remove old signal handlers refs #5230 --- lib/base/application.cpp | 66 --------------------------------------- lib/base/application.hpp | 5 +-- lib/cli/daemoncommand.cpp | 14 --------- 3 files changed, 1 insertion(+), 84 deletions(-) diff --git a/lib/base/application.cpp b/lib/base/application.cpp index 880af34a2..3e1408542 100644 --- a/lib/base/application.cpp +++ b/lib/base/application.cpp @@ -680,29 +680,6 @@ void Application::AttachDebugger(const String& filename, bool interactive) #endif /* _WIN32 */ } -#ifndef _WIN32 -/** - * Signal handler for SIGINT and SIGTERM. Prepares the application for cleanly - * shutting down during the next execution of the event loop. - * - * @param - The signal number. - */ -void Application::SigIntTermHandler(int signum) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_handler = SIG_DFL; - sigaction(signum, &sa, nullptr); - - Application::Ptr instance = Application::GetInstance(); - - if (!instance) - return; - - instance->RequestShutdown(); -} -#endif /* _WIN32 */ - /** * Signal handler for SIGUSR1. This signal causes Icinga to re-open * its log files and is mainly for use by logrotate. @@ -717,42 +694,6 @@ void Application::SigUsr1Handler(int) RequestReopenLogs(); } -/** - * Signal handler for SIGUSR2. Hands over PID to child and commits suicide - * - * @param - The signal number. - */ -void Application::SigUsr2Handler(int) -{ - Log(LogInformation, "Application", "Reload requested, letting new process take over."); -#ifdef HAVE_SYSTEMD - sd_notifyf(0, "MAINPID=%lu", (unsigned long) m_ReloadProcess); -#endif /* HAVE_SYSTEMD */ - - /* Write the PID of the new process to the pidfile before this - * process exits to keep systemd happy. - */ - Application::Ptr instance = GetInstance(); - try { - instance->UpdatePidFile(Configuration::PidPath, m_ReloadProcess); - } catch (const std::exception&) { - /* abort restart */ - Log(LogCritical, "Application", "Cannot update PID file. Aborting restart operation."); - return; - } - - instance->ClosePidFile(false); - - /* Ensure to dump the program state on reload. */ - ConfigObject::StopObjects(); - instance->OnShutdown(); - - Log(LogInformation, "Application") - << "Reload done, parent process shutting down. Child process with PID '" << m_ReloadProcess << "' is taking over."; - - Exit(0); -} - /** * Signal handler for SIGABRT. Helps with debugging ASSERT()s. * @@ -999,15 +940,8 @@ int Application::Run() #ifndef _WIN32 struct sigaction sa; memset(&sa, 0, sizeof(sa)); - sa.sa_handler = &Application::SigIntTermHandler; - sigaction(SIGINT, &sa, nullptr); - sigaction(SIGTERM, &sa, nullptr); - sa.sa_handler = &Application::SigUsr1Handler; sigaction(SIGUSR1, &sa, nullptr); - - sa.sa_handler = &Application::SigUsr2Handler; - sigaction(SIGUSR2, &sa, nullptr); #else /* _WIN32 */ SetConsoleCtrlHandler(&Application::CtrlHandler, TRUE); #endif /* _WIN32 */ diff --git a/lib/base/application.hpp b/lib/base/application.hpp index 92f627e75..ad7fd9606 100644 --- a/lib/base/application.hpp +++ b/lib/base/application.hpp @@ -132,9 +132,7 @@ private: static bool m_ScriptDebuggerEnabled; static double m_LastReloadFailed; -#ifndef _WIN32 - static void SigIntTermHandler(int signum); -#else /* _WIN32 */ +#ifdef _WIN32 static BOOL WINAPI CtrlHandler(DWORD type); static LONG WINAPI SEHUnhandledExceptionFilter(PEXCEPTION_POINTERS exi); #endif /* _WIN32 */ @@ -143,7 +141,6 @@ private: static void SigAbrtHandler(int signum); static void SigUsr1Handler(int signum); - static void SigUsr2Handler(int signum); static void ExceptionHandler(); static String GetCrashReportFilename(); diff --git a/lib/cli/daemoncommand.cpp b/lib/cli/daemoncommand.cpp index 9fbe486ba..38938bf19 100644 --- a/lib/cli/daemoncommand.cpp +++ b/lib/cli/daemoncommand.cpp @@ -27,13 +27,6 @@ static po::variables_map g_AppParams; REGISTER_CLICOMMAND("daemon", DaemonCommand); -#ifndef _WIN32 -static void SigHupHandler(int) -{ - Application::RequestRestart(); -} -#endif /* _WIN32 */ - /* * Daemonize(). On error, this function logs by itself and exits (i.e. does not return). * @@ -299,13 +292,6 @@ int DaemonCommand::Run(const po::variables_map& vm, const std::vectorRun(); From c303d08c249e9609a76e5424fe19479fd0ffa5a4 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Fri, 12 Jul 2019 18:14:02 +0200 Subject: [PATCH 02/14] Do the actual work in a separate process on *nix refs #5230 --- lib/cli/daemoncommand.cpp | 405 ++++++++++++++++++++++++++++---------- 1 file changed, 306 insertions(+), 99 deletions(-) diff --git a/lib/cli/daemoncommand.cpp b/lib/cli/daemoncommand.cpp index 38938bf19..9f1133bf7 100644 --- a/lib/cli/daemoncommand.cpp +++ b/lib/cli/daemoncommand.cpp @@ -16,10 +16,20 @@ #include "base/scriptglobal.hpp" #include "base/context.hpp" #include "config.h" +#include +#include +#include #include #include #include +#ifndef _WIN32 +#include +#include +#include +#include +#endif /* _WIN32 */ + using namespace icinga; namespace po = boost::program_options; @@ -156,11 +166,6 @@ void DaemonCommand::InitParameters(boost::program_options::options_description& ("close-stdio", "do not log to stdout (or stderr) after startup") #endif /* _WIN32 */ ; - -#ifndef _WIN32 - hiddenDesc.add_options() - ("reload-internal", po::value(), "used internally to implement config reload: do not call manually, send SIGHUP instead"); -#endif /* _WIN32 */ } std::vector DaemonCommand::GetArgumentSuggestions(const String& argument, const String& word) const @@ -171,92 +176,35 @@ std::vector DaemonCommand::GetArgumentSuggestions(const String& argument return CLICommand::GetArgumentSuggestions(argument, word); } -/** - * The entry point for the "daemon" CLI command. - * - * @returns An exit status. - */ -int DaemonCommand::Run(const po::variables_map& vm, const std::vector& ap) const -{ - Logger::EnableTimestamp(); - - Log(LogInformation, "cli") - << "Icinga application loader (version: " << Application::GetAppVersion() -#ifdef I2_DEBUG - << "; debug" -#endif /* I2_DEBUG */ - << ")"; - - if (!vm.count("validate") && !vm.count("reload-internal")) { - pid_t runningpid = Application::ReadPidFile(Configuration::PidPath); - if (runningpid > 0) { - Log(LogCritical, "cli") - << "Another instance of Icinga already running with PID " << runningpid; - return EXIT_FAILURE; - } - } - - std::vector configs; - if (vm.count("config") > 0) - configs = vm["config"].as >(); - else if (!vm.count("no-config")) { - /* The implicit string assignment is needed for Windows builds. */ - String configDir = Configuration::ConfigDir; - configs.push_back(configDir + "/icinga2.conf"); - } - - Log(LogInformation, "cli", "Loading configuration file(s)."); - - std::vector newItems; - - if (!DaemonUtility::LoadConfigFiles(configs, newItems, Configuration::ObjectsPath, Configuration::VarsPath)) - return EXIT_FAILURE; - - if (vm.count("validate")) { - Log(LogInformation, "cli", "Finished validating the configuration file(s)."); - return EXIT_SUCCESS; - } - #ifndef _WIN32 - if (vm.count("reload-internal")) { - /* We went through validation and now ask the old process kindly to die */ - Log(LogInformation, "cli", "Requesting to take over."); - int rc = kill(vm["reload-internal"].as(), SIGUSR2); - if (rc) { - Log(LogCritical, "cli") - << "Failed to send signal to \"" << vm["reload-internal"].as() << "\" with " << strerror(errno); - return EXIT_FAILURE; - } - - double start = Utility::GetTime(); - while (kill(vm["reload-internal"].as(), SIGCHLD) == 0) - Utility::Sleep(0.2); - - Log(LogNotice, "cli") - << "Waited for " << Utility::FormatDuration(Utility::GetTime() - start) << " on old process to exit."; - } +pid_t l_UmbrellaPid = 0; #endif /* _WIN32 */ - if (vm.count("daemonize")) { - if (!vm.count("reload-internal")) { - // no additional fork neccessary on reload - - // this subroutine either succeeds, or logs an error - // and terminates the process (does not return). - Daemonize(); - } - } - - /* restore the previous program state */ - try { - ConfigObject::RestoreObjects(Configuration::StatePath); - } catch (const std::exception& ex) { - Log(LogCritical, "cli") - << "Failed to restore state file: " << DiagnosticInformation(ex); - return EXIT_FAILURE; - } +static inline +int RunWorker(const std::vector& configs) +{ + Log(LogInformation, "cli", "Loading configuration file(s)."); { + std::vector newItems; + + if (!DaemonUtility::LoadConfigFiles(configs, newItems, Configuration::ObjectsPath, Configuration::VarsPath)) + return EXIT_FAILURE; + +#ifndef _WIN32 + // Notify umbrella process about the config loading success. + (void)kill(l_UmbrellaPid, SIGUSR2); +#endif /* _WIN32 */ + + /* restore the previous program state */ + try { + ConfigObject::RestoreObjects(Configuration::StatePath); + } catch (const std::exception& ex) { + Log(LogCritical, "cli") + << "Failed to restore state file: " << DiagnosticInformation(ex); + return EXIT_FAILURE; + } + WorkQueue upq(25000, Configuration::Concurrency); upq.SetName("DaemonCommand::Run"); @@ -267,19 +215,6 @@ int DaemonCommand::Run(const po::variables_map& vm, const std::vector(); - - CloseStdIO(errorLog); - Logger::DisableConsoleLog(); - } - /* Create the internal API object storage. Do this here too with setups without API. */ ConfigObjectUtility::CreateStorage(); @@ -296,3 +231,275 @@ int DaemonCommand::Run(const po::variables_map& vm, const std::vectorRun(); } + +#ifndef _WIN32 +enum class UnixWorkerState : uint_fast8_t +{ + Pending, + LoadedConfig, + Failed +}; + +static const sigset_t l_UnixWorkerSignals = ([]() -> sigset_t { + sigset_t s; + + (void)sigemptyset(&s); + (void)sigaddset(&s, SIGCHLD); + (void)sigaddset(&s, SIGUSR2); + (void)sigaddset(&s, SIGINT); + (void)sigaddset(&s, SIGTERM); + + return s; +})(); + +static std::atomic l_CurrentlyStartingUnixWorkerPid (-1); +static std::atomic l_CurrentlyStartingUnixWorkerState (UnixWorkerState::Pending); +static std::atomic l_TermSignal (-1); + +static void UmbrellaSignalHandler(int num, siginfo_t *info, void*) +{ + switch (num) { + case SIGUSR2: + if (l_CurrentlyStartingUnixWorkerState.load() == UnixWorkerState::Pending + && info->si_pid == l_CurrentlyStartingUnixWorkerPid.load()) { + l_CurrentlyStartingUnixWorkerState.store(UnixWorkerState::LoadedConfig); + } + break; + case SIGCHLD: + if (l_CurrentlyStartingUnixWorkerState.load() == UnixWorkerState::Pending + && info->si_pid == l_CurrentlyStartingUnixWorkerPid.load()) { + l_CurrentlyStartingUnixWorkerState.store(UnixWorkerState::Failed); + } + break; + case SIGINT: + case SIGTERM: + { + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + + sa.sa_handler = SIG_DFL; + + (void)sigaction(num, &sa, nullptr); + } + + l_TermSignal.store(num); + } +} + +static void WorkerSignalHandler(int num, siginfo_t *info, void*) +{ + switch (num) { + case SIGINT: + case SIGTERM: + if (info->si_pid == l_UmbrellaPid) { + Application::RequestShutdown(); + } + } +} + +static pid_t StartUnixWorker(const std::vector& configs) +{ + try { + Application::UninitializeBase(); + } catch (const std::exception& ex) { + Log(LogCritical, "cli") + << "Failed to stop thread pool before forking, unexpected error: " << DiagnosticInformation(ex); + exit(EXIT_FAILURE); + } + + (void)sigprocmask(SIG_BLOCK, &l_UnixWorkerSignals, nullptr); + + pid_t pid = fork(); + + switch (pid) { + case -1: + Log(LogCritical, "cli") + << "fork() failed with error code " << errno << ", \"" << Utility::FormatErrorNumber(errno) << "\""; + exit(EXIT_FAILURE); + + case 0: + try { + { + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + + sa.sa_handler = SIG_DFL; + + (void)sigaction(SIGCHLD, &sa, nullptr); + (void)sigaction(SIGUSR2, &sa, nullptr); + } + + { + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + + sa.sa_sigaction = &WorkerSignalHandler; + sa.sa_flags = SA_RESTART | SA_SIGINFO; + + (void)sigaction(SIGINT, &sa, nullptr); + (void)sigaction(SIGTERM, &sa, nullptr); + } + + (void)sigprocmask(SIG_UNBLOCK, &l_UnixWorkerSignals, nullptr); + + try { + Application::InitializeBase(); + } catch (const std::exception& ex) { + Log(LogCritical, "cli") + << "Failed to re-initialize thread pool after forking (child): " << DiagnosticInformation(ex); + _exit(EXIT_FAILURE); + } + + _exit(RunWorker(configs)); + } catch (...) { + _exit(EXIT_FAILURE); + } + + default: + l_CurrentlyStartingUnixWorkerPid.store(pid); + (void)sigprocmask(SIG_UNBLOCK, &l_UnixWorkerSignals, nullptr); + + for (;;) { + switch (l_CurrentlyStartingUnixWorkerState.load()) { + case UnixWorkerState::LoadedConfig: + break; + case UnixWorkerState::Failed: + while (waitpid(pid, nullptr, 0) == -1 && errno == EINTR) { + } + pid = -1; + break; + default: + Utility::Sleep(0.2); + continue; + } + + break; + } + + l_CurrentlyStartingUnixWorkerPid.store(-1); + l_CurrentlyStartingUnixWorkerState.store(UnixWorkerState::Pending); + + try { + Application::InitializeBase(); + } catch (const std::exception& ex) { + Log(LogCritical, "cli") + << "Failed to re-initialize thread pool after forking (parent): " << DiagnosticInformation(ex); + exit(EXIT_FAILURE); + } + } + + return pid; +} +#endif /* _WIN32 */ + +/** + * The entry point for the "daemon" CLI command. + * + * @returns An exit status. + */ +int DaemonCommand::Run(const po::variables_map& vm, const std::vector& ap) const +{ + Logger::EnableTimestamp(); + + Log(LogInformation, "cli") + << "Icinga application loader (version: " << Application::GetAppVersion() +#ifdef I2_DEBUG + << "; debug" +#endif /* I2_DEBUG */ + << ")"; + + std::vector configs; + if (vm.count("config") > 0) + configs = vm["config"].as >(); + else if (!vm.count("no-config")) { + /* The implicit string assignment is needed for Windows builds. */ + String configDir = Configuration::ConfigDir; + configs.push_back(configDir + "/icinga2.conf"); + } + + if (vm.count("validate")) { + Log(LogInformation, "cli", "Loading configuration file(s)."); + + std::vector newItems; + + if (!DaemonUtility::LoadConfigFiles(configs, newItems, Configuration::ObjectsPath, Configuration::VarsPath)) + return EXIT_FAILURE; + + Log(LogInformation, "cli", "Finished validating the configuration file(s)."); + return EXIT_SUCCESS; + } + + { + pid_t runningpid = Application::ReadPidFile(Configuration::PidPath); + if (runningpid > 0) { + Log(LogCritical, "cli") + << "Another instance of Icinga already running with PID " << runningpid; + return EXIT_FAILURE; + } + } + + if (vm.count("daemonize")) { + // this subroutine either succeeds, or logs an error + // and terminates the process (does not return). + Daemonize(); + } + + if (vm.count("daemonize") || vm.count("close-stdio")) { + // After disabling the console log, any further errors will go to the configured log only. + // Let's try to make this clear and say good bye. + Log(LogInformation, "cli", "Closing console log."); + + String errorLog; + if (vm.count("errorlog")) + errorLog = vm["errorlog"].as(); + + CloseStdIO(errorLog); + Logger::DisableConsoleLog(); + } + +#ifdef _WIN32 + return RunWorker(configs); +#else /* _WIN32 */ + l_UmbrellaPid = getpid(); + + { + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + + sa.sa_sigaction = &UmbrellaSignalHandler; + sa.sa_flags = SA_NOCLDSTOP | SA_RESTART | SA_SIGINFO; + + (void)sigaction(SIGCHLD, &sa, nullptr); + (void)sigaction(SIGUSR2, &sa, nullptr); + (void)sigaction(SIGINT, &sa, nullptr); + (void)sigaction(SIGTERM, &sa, nullptr); + } + + pid_t currentWorker = StartUnixWorker(configs); + + if (currentWorker == -1) { + return EXIT_FAILURE; + } + + bool requestedTermination = false; + + for (;;) { + if (!requestedTermination) { + int termSig = l_TermSignal.load(); + if (termSig != -1) { + (void)kill(currentWorker, termSig); + requestedTermination = true; + } + } + + { + int status; + if (waitpid(currentWorker, &status, WNOHANG) > 0) { + return WIFSIGNALED(status) ? 128 + WTERMSIG(status) : WEXITSTATUS(status); + } + } + + Utility::Sleep(0.2); + } +#endif /* _WIN32 */ +} From 249408209d900ae5a8116092e2bd665b5069e7fe Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 15 Jul 2019 12:11:19 +0200 Subject: [PATCH 03/14] Reload on SIGHUP refs #5230 --- lib/cli/daemoncommand.cpp | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/lib/cli/daemoncommand.cpp b/lib/cli/daemoncommand.cpp index 9f1133bf7..dbc683649 100644 --- a/lib/cli/daemoncommand.cpp +++ b/lib/cli/daemoncommand.cpp @@ -178,6 +178,7 @@ std::vector DaemonCommand::GetArgumentSuggestions(const String& argument #ifndef _WIN32 pid_t l_UmbrellaPid = 0; +static std::atomic l_AllowedToWork (false); #endif /* _WIN32 */ static inline @@ -194,6 +195,10 @@ int RunWorker(const std::vector& configs) #ifndef _WIN32 // Notify umbrella process about the config loading success. (void)kill(l_UmbrellaPid, SIGUSR2); + + while (!l_AllowedToWork.load()) { + Utility::Sleep(0.2); + } #endif /* _WIN32 */ /* restore the previous program state */ @@ -248,6 +253,7 @@ static const sigset_t l_UnixWorkerSignals = ([]() -> sigset_t { (void)sigaddset(&s, SIGUSR2); (void)sigaddset(&s, SIGINT); (void)sigaddset(&s, SIGTERM); + (void)sigaddset(&s, SIGHUP); return s; })(); @@ -255,6 +261,7 @@ static const sigset_t l_UnixWorkerSignals = ([]() -> sigset_t { static std::atomic l_CurrentlyStartingUnixWorkerPid (-1); static std::atomic l_CurrentlyStartingUnixWorkerState (UnixWorkerState::Pending); static std::atomic l_TermSignal (-1); +static std::atomic l_RequestedReload (false); static void UmbrellaSignalHandler(int num, siginfo_t *info, void*) { @@ -283,12 +290,20 @@ static void UmbrellaSignalHandler(int num, siginfo_t *info, void*) } l_TermSignal.store(num); + break; + case SIGHUP: + l_RequestedReload.store(true); } } static void WorkerSignalHandler(int num, siginfo_t *info, void*) { switch (num) { + case SIGUSR2: + if (info->si_pid == l_UmbrellaPid) { + l_AllowedToWork.store(true); + } + break; case SIGINT: case SIGTERM: if (info->si_pid == l_UmbrellaPid) { @@ -326,7 +341,7 @@ static pid_t StartUnixWorker(const std::vector& configs) sa.sa_handler = SIG_DFL; (void)sigaction(SIGCHLD, &sa, nullptr); - (void)sigaction(SIGUSR2, &sa, nullptr); + (void)sigaction(SIGHUP, &sa, nullptr); } { @@ -336,6 +351,7 @@ static pid_t StartUnixWorker(const std::vector& configs) sa.sa_sigaction = &WorkerSignalHandler; sa.sa_flags = SA_RESTART | SA_SIGINFO; + (void)sigaction(SIGUSR2, &sa, nullptr); (void)sigaction(SIGINT, &sa, nullptr); (void)sigaction(SIGTERM, &sa, nullptr); } @@ -473,6 +489,7 @@ int DaemonCommand::Run(const po::variables_map& vm, const std::vector 0) { From 37a3e7e4d5adb0b63b95368f311b71f223f67a3c Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 15 Jul 2019 13:59:17 +0200 Subject: [PATCH 04/14] Application::RunEventLoop(): forward restart requests to umbrella process refs #5230 --- lib/base/application.cpp | 18 ++++++++++++++++++ lib/base/application.hpp | 8 ++++++++ lib/cli/daemoncommand.cpp | 1 + 3 files changed, 27 insertions(+) diff --git a/lib/base/application.cpp b/lib/base/application.cpp index 3e1408542..4d10ad027 100644 --- a/lib/base/application.cpp +++ b/lib/base/application.cpp @@ -27,6 +27,8 @@ #endif /* __linux__ */ #ifdef _WIN32 #include +#else /* _WIN32 */ +#include #endif /* _WIN32 */ #ifdef HAVE_SYSTEMD #include @@ -42,6 +44,11 @@ bool Application::m_ShuttingDown = false; bool Application::m_RequestRestart = false; bool Application::m_RequestReopenLogs = false; pid_t Application::m_ReloadProcess = 0; + +#ifndef _WIN32 +pid_t Application::m_UmbrellaProcess = 0; +#endif /* _WIN32 */ + static bool l_Restarting = false; static bool l_InExceptionHandler = false; int Application::m_ArgC; @@ -300,11 +307,15 @@ void Application::RunEventLoop() sd_notify(0, "RELOADING=1"); #endif /* HAVE_SYSTEMD */ +#ifdef _WIN32 // are we already restarting? ignore request if we already are if (!l_Restarting) { l_Restarting = true; m_ReloadProcess = StartReloadProcess(); } +#else /* _WIN32 */ + (void)kill(m_UmbrellaProcess, SIGHUP); +#endif /* _WIN32 */ } else { /* Watches for changes to the system time. Adjusts timers if necessary. */ Utility::Sleep(2.5); @@ -446,6 +457,13 @@ void Application::RequestReopenLogs() m_RequestReopenLogs = true; } +#ifndef _WIN32 +void Application::SetUmbrellaProcess(pid_t pid) +{ + m_UmbrellaProcess = pid; +} +#endif /* _WIN32 */ + /** * Retrieves the full path of the executable. * diff --git a/lib/base/application.hpp b/lib/base/application.hpp index ad7fd9606..de046daed 100644 --- a/lib/base/application.hpp +++ b/lib/base/application.hpp @@ -57,6 +57,10 @@ public: static void RequestRestart(); static void RequestReopenLogs(); +#ifndef _WIN32 + static void SetUmbrellaProcess(pid_t pid); +#endif /* _WIN32 */ + static bool IsShuttingDown(); static bool IsRestarting(); @@ -122,6 +126,10 @@ private: static pid_t m_ReloadProcess; /**< The PID of a subprocess doing a reload, only valid when l_Restarting==true */ static bool m_RequestReopenLogs; /**< Whether we should re-open log files. */ +#ifndef _WIN32 + static pid_t m_UmbrellaProcess; +#endif /* _WIN32 */ + static int m_ArgC; /**< The number of command-line arguments. */ static char **m_ArgV; /**< Command-line arguments. */ FILE *m_PidFile; /**< The PID file */ diff --git a/lib/cli/daemoncommand.cpp b/lib/cli/daemoncommand.cpp index dbc683649..7a95e2e7e 100644 --- a/lib/cli/daemoncommand.cpp +++ b/lib/cli/daemoncommand.cpp @@ -477,6 +477,7 @@ int DaemonCommand::Run(const po::variables_map& vm, const std::vector Date: Mon, 15 Jul 2019 15:09:35 +0200 Subject: [PATCH 05/14] Catch programming errors refs #5230 --- lib/cli/daemoncommand.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/cli/daemoncommand.cpp b/lib/cli/daemoncommand.cpp index 7a95e2e7e..c2e55975b 100644 --- a/lib/cli/daemoncommand.cpp +++ b/lib/cli/daemoncommand.cpp @@ -293,6 +293,9 @@ static void UmbrellaSignalHandler(int num, siginfo_t *info, void*) break; case SIGHUP: l_RequestedReload.store(true); + break; + default: + VERIFY(!"Caught unexpected signal"); } } @@ -309,6 +312,9 @@ static void WorkerSignalHandler(int num, siginfo_t *info, void*) if (info->si_pid == l_UmbrellaPid) { Application::RequestShutdown(); } + break; + default: + VERIFY(!"Caught unexpected signal"); } } From 06b504f2911f56e6323489453fd95ba219c7dcb2 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 15 Jul 2019 16:08:08 +0200 Subject: [PATCH 06/14] Adjust PID file management refs #5230 --- lib/base/application.cpp | 4 ++++ lib/base/application.hpp | 2 +- lib/cli/daemoncommand.cpp | 26 ++++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/lib/base/application.cpp b/lib/base/application.cpp index 4d10ad027..9aea52c7b 100644 --- a/lib/base/application.cpp +++ b/lib/base/application.cpp @@ -80,7 +80,9 @@ void Application::Stop(bool runtimeRemoved) WSACleanup(); #endif /* _WIN32 */ +#ifdef _WIN32 ClosePidFile(true); +#endif /* _WIN32 */ ObjectImpl::Stop(runtimeRemoved); } @@ -964,6 +966,7 @@ int Application::Run() SetConsoleCtrlHandler(&Application::CtrlHandler, TRUE); #endif /* _WIN32 */ +#ifdef _WIN32 try { UpdatePidFile(Configuration::PidPath); } catch (const std::exception&) { @@ -971,6 +974,7 @@ int Application::Run() << "Cannot update PID file '" << Configuration::PidPath << "'. Aborting."; return EXIT_FAILURE; } +#endif /* _WIN32 */ SetMainTime(Utility::GetTime()); diff --git a/lib/base/application.hpp b/lib/base/application.hpp index de046daed..906f17f9a 100644 --- a/lib/base/application.hpp +++ b/lib/base/application.hpp @@ -132,7 +132,7 @@ private: static int m_ArgC; /**< The number of command-line arguments. */ static char **m_ArgV; /**< Command-line arguments. */ - FILE *m_PidFile; /**< The PID file */ + FILE *m_PidFile = nullptr; /**< The PID file */ static bool m_Debugging; /**< Whether debugging is enabled. */ static LogSeverity m_DebuggingSeverity; /**< Whether debugging severity is set. */ static double m_StartTime; diff --git a/lib/cli/daemoncommand.cpp b/lib/cli/daemoncommand.cpp index c2e55975b..0f5ad15eb 100644 --- a/lib/cli/daemoncommand.cpp +++ b/lib/cli/daemoncommand.cpp @@ -7,6 +7,7 @@ #include "config/configcompiler.hpp" #include "config/configcompilercontext.hpp" #include "config/configitembuilder.hpp" +#include "base/defer.hpp" #include "base/logger.hpp" #include "base/application.hpp" #include "base/timer.hpp" @@ -412,6 +413,15 @@ static pid_t StartUnixWorker(const std::vector& configs) return pid; } + +class PidFileManagementApp : public Application +{ +public: + inline int Main() override + { + return EXIT_FAILURE; + } +}; #endif /* _WIN32 */ /** @@ -466,6 +476,22 @@ int DaemonCommand::Run(const po::variables_map& vm, const std::vector Date: Mon, 15 Jul 2019 16:58:34 +0200 Subject: [PATCH 07/14] Adjust sd_notify() refs #5230 --- lib/base/application.cpp | 19 ------------- lib/cli/daemoncommand.cpp | 60 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 19 deletions(-) diff --git a/lib/base/application.cpp b/lib/base/application.cpp index 9aea52c7b..873c399c9 100644 --- a/lib/base/application.cpp +++ b/lib/base/application.cpp @@ -30,9 +30,6 @@ #else /* _WIN32 */ #include #endif /* _WIN32 */ -#ifdef HAVE_SYSTEMD -#include -#endif /* HAVE_SYSTEMD */ using namespace icinga; @@ -295,20 +292,12 @@ void Application::SetArgV(char **argv) */ void Application::RunEventLoop() { -#ifdef HAVE_SYSTEMD - sd_notify(0, "READY=1"); -#endif /* HAVE_SYSTEMD */ - double lastLoop = Utility::GetTime(); while (!m_ShuttingDown) { if (m_RequestRestart) { m_RequestRestart = false; // we are now handling the request, once is enough -#ifdef HAVE_SYSTEMD - sd_notify(0, "RELOADING=1"); -#endif /* HAVE_SYSTEMD */ - #ifdef _WIN32 // are we already restarting? ignore request if we already are if (!l_Restarting) { @@ -331,10 +320,6 @@ void Application::RunEventLoop() double now = Utility::GetTime(); double timeDiff = lastLoop - now; -#ifdef HAVE_SYSTEMD - sd_notify(0, "WATCHDOG=1"); -#endif /* HAVE_SYSTEMD */ - if (std::fabs(timeDiff) > 15) { /* We made a significant jump in time. */ Log(LogInformation, "Application") @@ -349,10 +334,6 @@ void Application::RunEventLoop() } } -#ifdef HAVE_SYSTEMD - sd_notify(0, "STOPPING=1"); -#endif /* HAVE_SYSTEMD */ - Log(LogInformation, "Application", "Shutting down..."); ConfigObject::StopObjects(); diff --git a/lib/cli/daemoncommand.cpp b/lib/cli/daemoncommand.cpp index 0f5ad15eb..9eaeb8ea0 100644 --- a/lib/cli/daemoncommand.cpp +++ b/lib/cli/daemoncommand.cpp @@ -31,6 +31,10 @@ #include #endif /* _WIN32 */ +#ifdef HAVE_SYSTEMD +#include +#endif /* HAVE_SYSTEMD */ + using namespace icinga; namespace po = boost::program_options; @@ -319,6 +323,20 @@ static void WorkerSignalHandler(int num, siginfo_t *info, void*) } } +#ifdef HAVE_SYSTEMD +static std::atomic l_LastNotifiedWatchdog (0); + +static void NotifyWatchdog() +{ + double now = Utility::GetTime(); + + if (now - l_LastNotifiedWatchdog.load() >= 2.5) { + sd_notify(0, "WATCHDOG=1"); + l_LastNotifiedWatchdog.store(now); + } +} +#endif /* HAVE_SYSTEMD */ + static pid_t StartUnixWorker(const std::vector& configs) { try { @@ -383,11 +401,18 @@ static pid_t StartUnixWorker(const std::vector& configs) (void)sigprocmask(SIG_UNBLOCK, &l_UnixWorkerSignals, nullptr); for (;;) { +#ifdef HAVE_SYSTEMD + NotifyWatchdog(); +#endif /* HAVE_SYSTEMD */ + switch (l_CurrentlyStartingUnixWorkerState.load()) { case UnixWorkerState::LoadedConfig: break; case UnixWorkerState::Failed: while (waitpid(pid, nullptr, 0) == -1 && errno == EINTR) { +#ifdef HAVE_SYSTEMD + NotifyWatchdog(); +#endif /* HAVE_SYSTEMD */ } pid = -1; break; @@ -533,34 +558,69 @@ int DaemonCommand::Run(const po::variables_map& vm, const std::vector 0) { +#ifdef HAVE_SYSTEMD + if (!notifiedTermination) { + notifiedTermination = true; + sd_notify(0, "STOPPING=1"); + } +#endif /* HAVE_SYSTEMD */ + return WIFSIGNALED(status) ? 128 + WTERMSIG(status) : WEXITSTATUS(status); } } From 3584ad97d85f8c09b7d3b75f85be96d0b451a96a Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 15 Jul 2019 18:29:15 +0200 Subject: [PATCH 08/14] Fix missing log messages refs #5230 --- lib/base/application.cpp | 2 ++ lib/cli/daemoncommand.cpp | 37 +++++++++++++++++++++++++++++++++---- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/lib/base/application.cpp b/lib/base/application.cpp index 873c399c9..676ea43d3 100644 --- a/lib/base/application.cpp +++ b/lib/base/application.cpp @@ -305,6 +305,8 @@ void Application::RunEventLoop() m_ReloadProcess = StartReloadProcess(); } #else /* _WIN32 */ + Log(LogNotice, "Application") << "Got reload command, forwarding to umbrella process (PID " << m_UmbrellaProcess << ")"; + (void)kill(m_UmbrellaProcess, SIGHUP); #endif /* _WIN32 */ } else { diff --git a/lib/cli/daemoncommand.cpp b/lib/cli/daemoncommand.cpp index 9eaeb8ea0..39fd804be 100644 --- a/lib/cli/daemoncommand.cpp +++ b/lib/cli/daemoncommand.cpp @@ -198,12 +198,17 @@ int RunWorker(const std::vector& configs) return EXIT_FAILURE; #ifndef _WIN32 - // Notify umbrella process about the config loading success. + Log(LogNotice, "cli") << "Notifying umbrella process (PID " << l_UmbrellaPid << ") about the config loading success"; + (void)kill(l_UmbrellaPid, SIGUSR2); + Log(LogNotice, "cli") << "Waiting for the umbrella process to let us doing the actual work"; + while (!l_AllowedToWork.load()) { Utility::Sleep(0.2); } + + Log(LogNotice, "cli") << "The umbrella process let us continuing"; #endif /* _WIN32 */ /* restore the previous program state */ @@ -339,6 +344,8 @@ static void NotifyWatchdog() static pid_t StartUnixWorker(const std::vector& configs) { + Log(LogNotice, "cli") << "Spawning seemless worker process doing the actual work"; + try { Application::UninitializeBase(); } catch (const std::exception& ex) { @@ -400,6 +407,8 @@ static pid_t StartUnixWorker(const std::vector& configs) l_CurrentlyStartingUnixWorkerPid.store(pid); (void)sigprocmask(SIG_UNBLOCK, &l_UnixWorkerSignals, nullptr); + Log(LogNotice, "cli") << "Spawned worker process (PID " << pid << "), waiting for it to load its config"; + for (;;) { #ifdef HAVE_SYSTEMD NotifyWatchdog(); @@ -407,8 +416,11 @@ static pid_t StartUnixWorker(const std::vector& configs) switch (l_CurrentlyStartingUnixWorkerState.load()) { case UnixWorkerState::LoadedConfig: + Log(LogNotice, "cli") << "Worker process successfully loaded its config"; break; case UnixWorkerState::Failed: + Log(LogNotice, "cli") << "Worker process couldn't load its config"; + while (waitpid(pid, nullptr, 0) == -1 && errno == EINTR) { #ifdef HAVE_SYSTEMD NotifyWatchdog(); @@ -573,6 +585,8 @@ int DaemonCommand::Run(const po::variables_map& vm, const std::vector 0) { + Log(LogNotice, "cli") << "Seemless worker (PID " << currentWorker << ") stopped, stopping as well"; + #ifdef HAVE_SYSTEMD if (!notifiedTermination) { notifiedTermination = true; From 372ecd8a7204809fa2f00e74c573f635574dfc16 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 15 Jul 2019 18:36:34 +0200 Subject: [PATCH 09/14] Forward SIGUSR1 refs #5230 --- lib/cli/daemoncommand.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/lib/cli/daemoncommand.cpp b/lib/cli/daemoncommand.cpp index 39fd804be..3dd8a8177 100644 --- a/lib/cli/daemoncommand.cpp +++ b/lib/cli/daemoncommand.cpp @@ -260,6 +260,7 @@ static const sigset_t l_UnixWorkerSignals = ([]() -> sigset_t { (void)sigemptyset(&s); (void)sigaddset(&s, SIGCHLD); + (void)sigaddset(&s, SIGUSR1); (void)sigaddset(&s, SIGUSR2); (void)sigaddset(&s, SIGINT); (void)sigaddset(&s, SIGTERM); @@ -272,10 +273,14 @@ static std::atomic l_CurrentlyStartingUnixWorkerPid (-1); static std::atomic l_CurrentlyStartingUnixWorkerState (UnixWorkerState::Pending); static std::atomic l_TermSignal (-1); static std::atomic l_RequestedReload (false); +static std::atomic l_RequestedReopenLogs (false); static void UmbrellaSignalHandler(int num, siginfo_t *info, void*) { switch (num) { + case SIGUSR1: + l_RequestedReopenLogs.store(true); + break; case SIGUSR2: if (l_CurrentlyStartingUnixWorkerState.load() == UnixWorkerState::Pending && info->si_pid == l_CurrentlyStartingUnixWorkerPid.load()) { @@ -373,6 +378,7 @@ static pid_t StartUnixWorker(const std::vector& configs) sa.sa_handler = SIG_DFL; (void)sigaction(SIGCHLD, &sa, nullptr); + (void)sigaction(SIGUSR1, &sa, nullptr); (void)sigaction(SIGHUP, &sa, nullptr); } @@ -556,6 +562,7 @@ int DaemonCommand::Run(const po::variables_map& vm, const std::vector 0) { From 4ee9ac16b43731218203128fe2a9e791e6aba775 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Tue, 16 Jul 2019 11:11:10 +0200 Subject: [PATCH 10/14] Fix missing comments refs #5230 --- lib/base/application.cpp | 5 +++ lib/base/application.hpp | 2 +- lib/cli/daemoncommand.cpp | 69 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 1 deletion(-) diff --git a/lib/base/application.cpp b/lib/base/application.cpp index 676ea43d3..7011ddc07 100644 --- a/lib/base/application.cpp +++ b/lib/base/application.cpp @@ -443,6 +443,11 @@ void Application::RequestReopenLogs() } #ifndef _WIN32 +/** + * Sets the PID of the Icinga umbrella process. + * + * @param pid The PID of the Icinga umbrella process. + */ void Application::SetUmbrellaProcess(pid_t pid) { m_UmbrellaProcess = pid; diff --git a/lib/base/application.hpp b/lib/base/application.hpp index 906f17f9a..a6b78ff09 100644 --- a/lib/base/application.hpp +++ b/lib/base/application.hpp @@ -127,7 +127,7 @@ private: static bool m_RequestReopenLogs; /**< Whether we should re-open log files. */ #ifndef _WIN32 - static pid_t m_UmbrellaProcess; + static pid_t m_UmbrellaProcess; /**< The PID of the Icinga umbrella process */ #endif /* _WIN32 */ static int m_ArgC; /**< The number of command-line arguments. */ diff --git a/lib/cli/daemoncommand.cpp b/lib/cli/daemoncommand.cpp index 3dd8a8177..33a6343f1 100644 --- a/lib/cli/daemoncommand.cpp +++ b/lib/cli/daemoncommand.cpp @@ -182,10 +182,20 @@ std::vector DaemonCommand::GetArgumentSuggestions(const String& argument } #ifndef _WIN32 +// The PID of the Icinga umbrella process pid_t l_UmbrellaPid = 0; + +// Whether the umbrella process allowed us to continue working beyond config validation static std::atomic l_AllowedToWork (false); #endif /* _WIN32 */ +/** + * Do the actual work (config loading, ...) + * + * @param configs Files to read config from + * + * @return Exit code + */ static inline int RunWorker(const std::vector& configs) { @@ -248,6 +258,9 @@ int RunWorker(const std::vector& configs) } #ifndef _WIN32 +/** + * The possible states of a seemless worker being started by StartUnixWorker(). + */ enum class UnixWorkerState : uint_fast8_t { Pending, @@ -255,6 +268,7 @@ enum class UnixWorkerState : uint_fast8_t Failed }; +// The signals to block temporarily in StartUnixWorker(). static const sigset_t l_UnixWorkerSignals = ([]() -> sigset_t { sigset_t s; @@ -269,32 +283,49 @@ static const sigset_t l_UnixWorkerSignals = ([]() -> sigset_t { return s; })(); +// The PID of the seemless worker currently being started by StartUnixWorker() static std::atomic l_CurrentlyStartingUnixWorkerPid (-1); + +// The state of the seemless worker currently being started by StartUnixWorker() static std::atomic l_CurrentlyStartingUnixWorkerState (UnixWorkerState::Pending); + +// The last temination signal we received static std::atomic l_TermSignal (-1); + +// Whether someone requested to re-load config (and we didn't handle that request, yet) static std::atomic l_RequestedReload (false); + +// Whether someone requested to re-open logs (and we didn't handle that request, yet) static std::atomic l_RequestedReopenLogs (false); +/** + * Umbrella process' signal handlers + */ static void UmbrellaSignalHandler(int num, siginfo_t *info, void*) { switch (num) { case SIGUSR1: + // Someone requested to re-open logs l_RequestedReopenLogs.store(true); break; case SIGUSR2: if (l_CurrentlyStartingUnixWorkerState.load() == UnixWorkerState::Pending && info->si_pid == l_CurrentlyStartingUnixWorkerPid.load()) { + // The seemless worker currently being started by StartUnixWorker() successfully loaded its config l_CurrentlyStartingUnixWorkerState.store(UnixWorkerState::LoadedConfig); } break; case SIGCHLD: if (l_CurrentlyStartingUnixWorkerState.load() == UnixWorkerState::Pending && info->si_pid == l_CurrentlyStartingUnixWorkerPid.load()) { + // The seemless worker currently being started by StartUnixWorker() failed l_CurrentlyStartingUnixWorkerState.store(UnixWorkerState::Failed); } break; case SIGINT: case SIGTERM: + // Someone requested our termination + { struct sigaction sa; memset(&sa, 0, sizeof(sa)); @@ -307,35 +338,47 @@ static void UmbrellaSignalHandler(int num, siginfo_t *info, void*) l_TermSignal.store(num); break; case SIGHUP: + // Someone requested to re-load config l_RequestedReload.store(true); break; default: + // Programming error (or someone has broken the userspace) VERIFY(!"Caught unexpected signal"); } } +/** + * Seemless worker's signal handlers + */ static void WorkerSignalHandler(int num, siginfo_t *info, void*) { switch (num) { case SIGUSR2: if (info->si_pid == l_UmbrellaPid) { + // The umbrella process allowed us to continue working beyond config validation l_AllowedToWork.store(true); } break; case SIGINT: case SIGTERM: if (info->si_pid == l_UmbrellaPid) { + // The umbrella process requested our termination Application::RequestShutdown(); } break; default: + // Programming error (or someone has broken the userspace) VERIFY(!"Caught unexpected signal"); } } #ifdef HAVE_SYSTEMD +// When we last notified the watchdog. static std::atomic l_LastNotifiedWatchdog (0); +/** + * Notify the watchdog if not notified during the last 2.5s. + */ static void NotifyWatchdog() { double now = Utility::GetTime(); @@ -347,6 +390,13 @@ static void NotifyWatchdog() } #endif /* HAVE_SYSTEMD */ +/** + * Starts seemless worker process doing the actual work (config loading, ...) + * + * @param configs Files to read config from + * + * @return The worker's PID on success, -1 on failure (if the worker couldn't load its config) + */ static pid_t StartUnixWorker(const std::vector& configs) { Log(LogNotice, "cli") << "Spawning seemless worker process doing the actual work"; @@ -359,6 +409,9 @@ static pid_t StartUnixWorker(const std::vector& configs) exit(EXIT_FAILURE); } + /* Block the signal handlers we'd like to change in the child process until we changed them. + * Block SIGUSR2 and SIGCHLD handlers until we've set l_CurrentlyStartingUnixWorkerPid. + */ (void)sigprocmask(SIG_BLOCK, &l_UnixWorkerSignals, nullptr); pid_t pid = fork(); @@ -415,6 +468,7 @@ static pid_t StartUnixWorker(const std::vector& configs) Log(LogNotice, "cli") << "Spawned worker process (PID " << pid << "), waiting for it to load its config"; + // Wait for the newly spawned process to either load its config or fail. for (;;) { #ifdef HAVE_SYSTEMD NotifyWatchdog(); @@ -442,6 +496,7 @@ static pid_t StartUnixWorker(const std::vector& configs) break; } + // Reset flags for the next time l_CurrentlyStartingUnixWorkerPid.store(-1); l_CurrentlyStartingUnixWorkerState.store(UnixWorkerState::Pending); @@ -457,6 +512,9 @@ static pid_t StartUnixWorker(const std::vector& configs) return pid; } +/** + * Workaround to instantiate Application (which is abstract) in DaemonCommand#Run() + */ class PidFileManagementApp : public Application { public: @@ -520,6 +578,10 @@ int DaemonCommand::Run(const po::variables_map& vm, const std::vector Date: Tue, 16 Jul 2019 11:28:20 +0200 Subject: [PATCH 11/14] DaemonCommand: make the atomics a bit more atomic Just to be sure. refs #5230 --- lib/base/CMakeLists.txt | 1 + lib/base/atomic.hpp | 43 +++++++++++++++++++++++++++++++++++++++ lib/cli/daemoncommand.cpp | 16 +++++++-------- 3 files changed, 52 insertions(+), 8 deletions(-) create mode 100644 lib/base/atomic.hpp diff --git a/lib/base/CMakeLists.txt b/lib/base/CMakeLists.txt index fb3de3029..100ca27a7 100644 --- a/lib/base/CMakeLists.txt +++ b/lib/base/CMakeLists.txt @@ -15,6 +15,7 @@ set(base_SOURCES i2-base.hpp application.cpp application.hpp application-ti.hpp application-version.cpp application-environment.cpp array.cpp array.hpp array-script.cpp + atomic.hpp base64.cpp base64.hpp boolean.cpp boolean.hpp boolean-script.cpp configobject.cpp configobject.hpp configobject-ti.hpp configobject-script.cpp diff --git a/lib/base/atomic.hpp b/lib/base/atomic.hpp new file mode 100644 index 000000000..0ebcddefb --- /dev/null +++ b/lib/base/atomic.hpp @@ -0,0 +1,43 @@ +/* Icinga 2 | (c) 2019 Icinga GmbH | GPLv2+ */ + +#ifndef ATOMIC_H +#define ATOMIC_H + +#include + +namespace icinga +{ + +/** + * Extends std::atomic with an atomic constructor. + * + * @ingroup base + */ +template +class Atomic : public std::atomic { +public: + /** + * Like std::atomic#atomic, but operates atomically + * + * @param desired Initial value + */ + inline Atomic(T desired) + { + this->store(desired); + } + + /** + * Like std::atomic#atomic, but operates atomically + * + * @param desired Initial value + * @param order Initial store operation's memory order + */ + inline Atomic(T desired, std::memory_order order) + { + this->store(desired, order); + } +}; + +} + +#endif /* ATOMIC_H */ diff --git a/lib/cli/daemoncommand.cpp b/lib/cli/daemoncommand.cpp index 33a6343f1..cdc105dc6 100644 --- a/lib/cli/daemoncommand.cpp +++ b/lib/cli/daemoncommand.cpp @@ -7,6 +7,7 @@ #include "config/configcompiler.hpp" #include "config/configcompilercontext.hpp" #include "config/configitembuilder.hpp" +#include "base/atomic.hpp" #include "base/defer.hpp" #include "base/logger.hpp" #include "base/application.hpp" @@ -17,7 +18,6 @@ #include "base/scriptglobal.hpp" #include "base/context.hpp" #include "config.h" -#include #include #include #include @@ -186,7 +186,7 @@ std::vector DaemonCommand::GetArgumentSuggestions(const String& argument pid_t l_UmbrellaPid = 0; // Whether the umbrella process allowed us to continue working beyond config validation -static std::atomic l_AllowedToWork (false); +static Atomic l_AllowedToWork (false); #endif /* _WIN32 */ /** @@ -284,19 +284,19 @@ static const sigset_t l_UnixWorkerSignals = ([]() -> sigset_t { })(); // The PID of the seemless worker currently being started by StartUnixWorker() -static std::atomic l_CurrentlyStartingUnixWorkerPid (-1); +static Atomic l_CurrentlyStartingUnixWorkerPid (-1); // The state of the seemless worker currently being started by StartUnixWorker() -static std::atomic l_CurrentlyStartingUnixWorkerState (UnixWorkerState::Pending); +static Atomic l_CurrentlyStartingUnixWorkerState (UnixWorkerState::Pending); // The last temination signal we received -static std::atomic l_TermSignal (-1); +static Atomic l_TermSignal (-1); // Whether someone requested to re-load config (and we didn't handle that request, yet) -static std::atomic l_RequestedReload (false); +static Atomic l_RequestedReload (false); // Whether someone requested to re-open logs (and we didn't handle that request, yet) -static std::atomic l_RequestedReopenLogs (false); +static Atomic l_RequestedReopenLogs (false); /** * Umbrella process' signal handlers @@ -374,7 +374,7 @@ static void WorkerSignalHandler(int num, siginfo_t *info, void*) #ifdef HAVE_SYSTEMD // When we last notified the watchdog. -static std::atomic l_LastNotifiedWatchdog (0); +static Atomic l_LastNotifiedWatchdog (0); /** * Notify the watchdog if not notified during the last 2.5s. From 31e5394fe9b70ee2be1a5c962661d31eed1c98d0 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Tue, 16 Jul 2019 11:43:47 +0200 Subject: [PATCH 12/14] Fix style refs #5230 --- lib/base/application.cpp | 3 ++- lib/cli/daemoncommand.cpp | 39 ++++++++++++++++++++++++++------------- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/lib/base/application.cpp b/lib/base/application.cpp index 7011ddc07..4707f0d23 100644 --- a/lib/base/application.cpp +++ b/lib/base/application.cpp @@ -305,7 +305,8 @@ void Application::RunEventLoop() m_ReloadProcess = StartReloadProcess(); } #else /* _WIN32 */ - Log(LogNotice, "Application") << "Got reload command, forwarding to umbrella process (PID " << m_UmbrellaProcess << ")"; + Log(LogNotice, "Application") + << "Got reload command, forwarding to umbrella process (PID " << m_UmbrellaProcess << ")"; (void)kill(m_UmbrellaProcess, SIGHUP); #endif /* _WIN32 */ diff --git a/lib/cli/daemoncommand.cpp b/lib/cli/daemoncommand.cpp index cdc105dc6..8091c403e 100644 --- a/lib/cli/daemoncommand.cpp +++ b/lib/cli/daemoncommand.cpp @@ -208,17 +208,20 @@ int RunWorker(const std::vector& configs) return EXIT_FAILURE; #ifndef _WIN32 - Log(LogNotice, "cli") << "Notifying umbrella process (PID " << l_UmbrellaPid << ") about the config loading success"; + Log(LogNotice, "cli") + << "Notifying umbrella process (PID " << l_UmbrellaPid << ") about the config loading success"; (void)kill(l_UmbrellaPid, SIGUSR2); - Log(LogNotice, "cli") << "Waiting for the umbrella process to let us doing the actual work"; + Log(LogNotice, "cli") + << "Waiting for the umbrella process to let us doing the actual work"; while (!l_AllowedToWork.load()) { Utility::Sleep(0.2); } - Log(LogNotice, "cli") << "The umbrella process let us continuing"; + Log(LogNotice, "cli") + << "The umbrella process let us continuing"; #endif /* _WIN32 */ /* restore the previous program state */ @@ -399,7 +402,8 @@ static void NotifyWatchdog() */ static pid_t StartUnixWorker(const std::vector& configs) { - Log(LogNotice, "cli") << "Spawning seemless worker process doing the actual work"; + Log(LogNotice, "cli") + << "Spawning seemless worker process doing the actual work"; try { Application::UninitializeBase(); @@ -466,7 +470,8 @@ static pid_t StartUnixWorker(const std::vector& configs) l_CurrentlyStartingUnixWorkerPid.store(pid); (void)sigprocmask(SIG_UNBLOCK, &l_UnixWorkerSignals, nullptr); - Log(LogNotice, "cli") << "Spawned worker process (PID " << pid << "), waiting for it to load its config"; + Log(LogNotice, "cli") + << "Spawned worker process (PID " << pid << "), waiting for it to load its config"; // Wait for the newly spawned process to either load its config or fail. for (;;) { @@ -476,10 +481,12 @@ static pid_t StartUnixWorker(const std::vector& configs) switch (l_CurrentlyStartingUnixWorkerState.load()) { case UnixWorkerState::LoadedConfig: - Log(LogNotice, "cli") << "Worker process successfully loaded its config"; + Log(LogNotice, "cli") + << "Worker process successfully loaded its config"; break; case UnixWorkerState::Failed: - Log(LogNotice, "cli") << "Worker process couldn't load its config"; + Log(LogNotice, "cli") + << "Worker process couldn't load its config"; while (waitpid(pid, nullptr, 0) == -1 && errno == EINTR) { #ifdef HAVE_SYSTEMD @@ -659,7 +666,8 @@ int DaemonCommand::Run(const po::variables_map& vm, const std::vector 0) { - Log(LogNotice, "cli") << "Seemless worker (PID " << currentWorker << ") stopped, stopping as well"; + Log(LogNotice, "cli") + << "Seemless worker (PID " << currentWorker << ") stopped, stopping as well"; #ifdef HAVE_SYSTEMD if (!notifiedTermination) { From 01c16f856d1e40c503d3fd59fedfb98e1c7da955 Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Thu, 18 Jul 2019 15:33:32 +0200 Subject: [PATCH 13/14] Docs: Core reload for technical concepts --- doc/19-technical-concepts.md | 43 ++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/doc/19-technical-concepts.md b/doc/19-technical-concepts.md index 187d3a5c4..dbf975e3f 100644 --- a/doc/19-technical-concepts.md +++ b/doc/19-technical-concepts.md @@ -176,6 +176,49 @@ The following signals are triggered in the stages: * [Flex](https://github.com/westes/flex) * [GNU Bison](https://www.gnu.org/software/bison/) +## Core + +#:## Core: Reload Handling + +The initial design of the reload state machine looks like this: + +* receive reload signal SIGHUP +* fork a child process, start configuration validation in parallel work queues +* parent process continues with old configuration objects and the event scheduling +(doing checks, replicating cluster events, triggering alert notifications, etc.) +* validation NOT ok: child process terminates, parent process continues with old configuration state +* validation ok: child process signals parent process to terminate and save its current state (all events until now) into the icinga2 state file +* parent process shuts down writing icinga2.state file +* child process waits for parent process gone, reads the icinga2 state file and synchronizes all historical and status data +* child becomes the new session leader + +Since Icinga 2.6, there are two processes when checked with `ps aux | grep icinga2` or `pidof icinga2`. +This was to ensure that feature file descriptors don't leak into the plugin process (e.g. DB IDO MySQL sockets). + +Icinga 2.9 changed the reload handling a bit with SIGUSR2 signals +and systemd notifies. + +With systemd, it could occur that the tree was broken thus resulting +in killing all remaining processes on stop, instead of a clean exit. +You can read the full story [here](https://github.com/Icinga/icinga2/issues/7309). + +With 2.11 you'll now see 3 processes: + +- The umbrella process which takes care about signal handling and process spawning/stopping +- The main process with the check scheduler, notifications, etc. +- The execution helper process + +During reload, the umbrella process spawns a new reload process which validates the configuration. +Once successful, the new reload process signals the umbrella process that it is finished. +The umbrella process forwards the signal and tells the old main process to shutdown. +The old main process writes the icinga2.state file. The umbrella process signals +the reload process that the main process terminated. + +The reload process was in idle wait before, and now continues to read the written +state file and run the event loop (checks, notifications, "events", ...). The reload +process itself also spawns the execution helper process again. + + ## Features Features are implemented in specific libraries and can be enabled From cb878fdd1bacafc0a6c9086e8a34c7054683fefb Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Thu, 18 Jul 2019 15:33:46 +0200 Subject: [PATCH 14/14] Docs: Upgrading docs for improved reload handling --- doc/16-upgrading-icinga-2.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/doc/16-upgrading-icinga-2.md b/doc/16-upgrading-icinga-2.md index 3f5ba1017..ada066a4b 100644 --- a/doc/16-upgrading-icinga-2.md +++ b/doc/16-upgrading-icinga-2.md @@ -55,6 +55,17 @@ and [benchmarks](https://github.com/miloyip/nativejson-benchmark#parsing-time). ### Core +#### Reload Handling + +2.11 provides fixes for unwanted notifications during restarts. +The updated systemd service file now uses the `KillMode=mixed` setting. + +The reload handling was improved with an umbrella process, which means +that normal runtime operations include **3 processes**. You may need to +adjust the local instance monitoring of the [procs](08-advanced-topics.md#monitoring-icinga) check. + +More details can be found in the [technical concepts](19-technical-concepts.md#technical-concepts-core-reload) chapter. + #### Downtime Notifications Imagine that a host/service changes to a HARD NOT-OK state,