mirror of
https://github.com/Icinga/icinga2.git
synced 2025-07-29 16:44:29 +02:00
Merge pull request #7320 from Icinga/feature/foreground-5230
Rework reload handling on *nix
This commit is contained in:
commit
844e821cd1
@ -55,6 +55,17 @@ and [benchmarks](https://github.com/miloyip/nativejson-benchmark#parsing-time).
|
|||||||
|
|
||||||
### Core <a id="upgrading-to-2-11-core"></a>
|
### Core <a id="upgrading-to-2-11-core"></a>
|
||||||
|
|
||||||
|
#### Reload Handling <a id="upgrading-to-2-11-core-reload-handling"></a>
|
||||||
|
|
||||||
|
2.11 provides fixes for unwanted notifications during restarts.
|
||||||
|
The updated systemd service file now uses the `KillMode=mixed` setting.
|
||||||
|
|
||||||
|
The reload handling was improved with an umbrella process, which means
|
||||||
|
that normal runtime operations include **3 processes**. You may need to
|
||||||
|
adjust the local instance monitoring of the [procs](08-advanced-topics.md#monitoring-icinga) check.
|
||||||
|
|
||||||
|
More details can be found in the [technical concepts](19-technical-concepts.md#technical-concepts-core-reload) chapter.
|
||||||
|
|
||||||
#### Downtime Notifications <a id="upgrading-to-2-11-core-downtime-notifications"></a>
|
#### Downtime Notifications <a id="upgrading-to-2-11-core-downtime-notifications"></a>
|
||||||
|
|
||||||
Imagine that a host/service changes to a HARD NOT-OK state,
|
Imagine that a host/service changes to a HARD NOT-OK state,
|
||||||
|
@ -176,6 +176,49 @@ The following signals are triggered in the stages:
|
|||||||
* [Flex](https://github.com/westes/flex)
|
* [Flex](https://github.com/westes/flex)
|
||||||
* [GNU Bison](https://www.gnu.org/software/bison/)
|
* [GNU Bison](https://www.gnu.org/software/bison/)
|
||||||
|
|
||||||
|
## Core <a id="technical-concepts-core"></a>
|
||||||
|
|
||||||
|
#:## Core: Reload Handling <a id="technical-concepts-core-reload"></a>
|
||||||
|
|
||||||
|
The initial design of the reload state machine looks like this:
|
||||||
|
|
||||||
|
* receive reload signal SIGHUP
|
||||||
|
* fork a child process, start configuration validation in parallel work queues
|
||||||
|
* parent process continues with old configuration objects and the event scheduling
|
||||||
|
(doing checks, replicating cluster events, triggering alert notifications, etc.)
|
||||||
|
* validation NOT ok: child process terminates, parent process continues with old configuration state
|
||||||
|
* validation ok: child process signals parent process to terminate and save its current state (all events until now) into the icinga2 state file
|
||||||
|
* parent process shuts down writing icinga2.state file
|
||||||
|
* child process waits for parent process gone, reads the icinga2 state file and synchronizes all historical and status data
|
||||||
|
* child becomes the new session leader
|
||||||
|
|
||||||
|
Since Icinga 2.6, there are two processes when checked with `ps aux | grep icinga2` or `pidof icinga2`.
|
||||||
|
This was to ensure that feature file descriptors don't leak into the plugin process (e.g. DB IDO MySQL sockets).
|
||||||
|
|
||||||
|
Icinga 2.9 changed the reload handling a bit with SIGUSR2 signals
|
||||||
|
and systemd notifies.
|
||||||
|
|
||||||
|
With systemd, it could occur that the tree was broken thus resulting
|
||||||
|
in killing all remaining processes on stop, instead of a clean exit.
|
||||||
|
You can read the full story [here](https://github.com/Icinga/icinga2/issues/7309).
|
||||||
|
|
||||||
|
With 2.11 you'll now see 3 processes:
|
||||||
|
|
||||||
|
- The umbrella process which takes care about signal handling and process spawning/stopping
|
||||||
|
- The main process with the check scheduler, notifications, etc.
|
||||||
|
- The execution helper process
|
||||||
|
|
||||||
|
During reload, the umbrella process spawns a new reload process which validates the configuration.
|
||||||
|
Once successful, the new reload process signals the umbrella process that it is finished.
|
||||||
|
The umbrella process forwards the signal and tells the old main process to shutdown.
|
||||||
|
The old main process writes the icinga2.state file. The umbrella process signals
|
||||||
|
the reload process that the main process terminated.
|
||||||
|
|
||||||
|
The reload process was in idle wait before, and now continues to read the written
|
||||||
|
state file and run the event loop (checks, notifications, "events", ...). The reload
|
||||||
|
process itself also spawns the execution helper process again.
|
||||||
|
|
||||||
|
|
||||||
## Features <a id="technical-concepts-features"></a>
|
## Features <a id="technical-concepts-features"></a>
|
||||||
|
|
||||||
Features are implemented in specific libraries and can be enabled
|
Features are implemented in specific libraries and can be enabled
|
||||||
|
@ -15,6 +15,7 @@ set(base_SOURCES
|
|||||||
i2-base.hpp
|
i2-base.hpp
|
||||||
application.cpp application.hpp application-ti.hpp application-version.cpp application-environment.cpp
|
application.cpp application.hpp application-ti.hpp application-version.cpp application-environment.cpp
|
||||||
array.cpp array.hpp array-script.cpp
|
array.cpp array.hpp array-script.cpp
|
||||||
|
atomic.hpp
|
||||||
base64.cpp base64.hpp
|
base64.cpp base64.hpp
|
||||||
boolean.cpp boolean.hpp boolean-script.cpp
|
boolean.cpp boolean.hpp boolean-script.cpp
|
||||||
configobject.cpp configobject.hpp configobject-ti.hpp configobject-script.cpp
|
configobject.cpp configobject.hpp configobject-ti.hpp configobject-script.cpp
|
||||||
|
@ -27,10 +27,9 @@
|
|||||||
#endif /* __linux__ */
|
#endif /* __linux__ */
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
|
#else /* _WIN32 */
|
||||||
|
#include <signal.h>
|
||||||
#endif /* _WIN32 */
|
#endif /* _WIN32 */
|
||||||
#ifdef HAVE_SYSTEMD
|
|
||||||
#include <systemd/sd-daemon.h>
|
|
||||||
#endif /* HAVE_SYSTEMD */
|
|
||||||
|
|
||||||
using namespace icinga;
|
using namespace icinga;
|
||||||
|
|
||||||
@ -42,6 +41,11 @@ bool Application::m_ShuttingDown = false;
|
|||||||
bool Application::m_RequestRestart = false;
|
bool Application::m_RequestRestart = false;
|
||||||
bool Application::m_RequestReopenLogs = false;
|
bool Application::m_RequestReopenLogs = false;
|
||||||
pid_t Application::m_ReloadProcess = 0;
|
pid_t Application::m_ReloadProcess = 0;
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
pid_t Application::m_UmbrellaProcess = 0;
|
||||||
|
#endif /* _WIN32 */
|
||||||
|
|
||||||
static bool l_Restarting = false;
|
static bool l_Restarting = false;
|
||||||
static bool l_InExceptionHandler = false;
|
static bool l_InExceptionHandler = false;
|
||||||
int Application::m_ArgC;
|
int Application::m_ArgC;
|
||||||
@ -73,7 +77,9 @@ void Application::Stop(bool runtimeRemoved)
|
|||||||
WSACleanup();
|
WSACleanup();
|
||||||
#endif /* _WIN32 */
|
#endif /* _WIN32 */
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
ClosePidFile(true);
|
ClosePidFile(true);
|
||||||
|
#endif /* _WIN32 */
|
||||||
|
|
||||||
ObjectImpl<Application>::Stop(runtimeRemoved);
|
ObjectImpl<Application>::Stop(runtimeRemoved);
|
||||||
}
|
}
|
||||||
@ -286,25 +292,24 @@ void Application::SetArgV(char **argv)
|
|||||||
*/
|
*/
|
||||||
void Application::RunEventLoop()
|
void Application::RunEventLoop()
|
||||||
{
|
{
|
||||||
#ifdef HAVE_SYSTEMD
|
|
||||||
sd_notify(0, "READY=1");
|
|
||||||
#endif /* HAVE_SYSTEMD */
|
|
||||||
|
|
||||||
double lastLoop = Utility::GetTime();
|
double lastLoop = Utility::GetTime();
|
||||||
|
|
||||||
while (!m_ShuttingDown) {
|
while (!m_ShuttingDown) {
|
||||||
if (m_RequestRestart) {
|
if (m_RequestRestart) {
|
||||||
m_RequestRestart = false; // we are now handling the request, once is enough
|
m_RequestRestart = false; // we are now handling the request, once is enough
|
||||||
|
|
||||||
#ifdef HAVE_SYSTEMD
|
#ifdef _WIN32
|
||||||
sd_notify(0, "RELOADING=1");
|
|
||||||
#endif /* HAVE_SYSTEMD */
|
|
||||||
|
|
||||||
// are we already restarting? ignore request if we already are
|
// are we already restarting? ignore request if we already are
|
||||||
if (!l_Restarting) {
|
if (!l_Restarting) {
|
||||||
l_Restarting = true;
|
l_Restarting = true;
|
||||||
m_ReloadProcess = StartReloadProcess();
|
m_ReloadProcess = StartReloadProcess();
|
||||||
}
|
}
|
||||||
|
#else /* _WIN32 */
|
||||||
|
Log(LogNotice, "Application")
|
||||||
|
<< "Got reload command, forwarding to umbrella process (PID " << m_UmbrellaProcess << ")";
|
||||||
|
|
||||||
|
(void)kill(m_UmbrellaProcess, SIGHUP);
|
||||||
|
#endif /* _WIN32 */
|
||||||
} else {
|
} else {
|
||||||
/* Watches for changes to the system time. Adjusts timers if necessary. */
|
/* Watches for changes to the system time. Adjusts timers if necessary. */
|
||||||
Utility::Sleep(2.5);
|
Utility::Sleep(2.5);
|
||||||
@ -318,10 +323,6 @@ void Application::RunEventLoop()
|
|||||||
double now = Utility::GetTime();
|
double now = Utility::GetTime();
|
||||||
double timeDiff = lastLoop - now;
|
double timeDiff = lastLoop - now;
|
||||||
|
|
||||||
#ifdef HAVE_SYSTEMD
|
|
||||||
sd_notify(0, "WATCHDOG=1");
|
|
||||||
#endif /* HAVE_SYSTEMD */
|
|
||||||
|
|
||||||
if (std::fabs(timeDiff) > 15) {
|
if (std::fabs(timeDiff) > 15) {
|
||||||
/* We made a significant jump in time. */
|
/* We made a significant jump in time. */
|
||||||
Log(LogInformation, "Application")
|
Log(LogInformation, "Application")
|
||||||
@ -336,10 +337,6 @@ void Application::RunEventLoop()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_SYSTEMD
|
|
||||||
sd_notify(0, "STOPPING=1");
|
|
||||||
#endif /* HAVE_SYSTEMD */
|
|
||||||
|
|
||||||
Log(LogInformation, "Application", "Shutting down...");
|
Log(LogInformation, "Application", "Shutting down...");
|
||||||
|
|
||||||
ConfigObject::StopObjects();
|
ConfigObject::StopObjects();
|
||||||
@ -446,6 +443,18 @@ void Application::RequestReopenLogs()
|
|||||||
m_RequestReopenLogs = true;
|
m_RequestReopenLogs = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
/**
|
||||||
|
* Sets the PID of the Icinga umbrella process.
|
||||||
|
*
|
||||||
|
* @param pid The PID of the Icinga umbrella process.
|
||||||
|
*/
|
||||||
|
void Application::SetUmbrellaProcess(pid_t pid)
|
||||||
|
{
|
||||||
|
m_UmbrellaProcess = pid;
|
||||||
|
}
|
||||||
|
#endif /* _WIN32 */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves the full path of the executable.
|
* Retrieves the full path of the executable.
|
||||||
*
|
*
|
||||||
@ -680,29 +689,6 @@ void Application::AttachDebugger(const String& filename, bool interactive)
|
|||||||
#endif /* _WIN32 */
|
#endif /* _WIN32 */
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef _WIN32
|
|
||||||
/**
|
|
||||||
* Signal handler for SIGINT and SIGTERM. Prepares the application for cleanly
|
|
||||||
* shutting down during the next execution of the event loop.
|
|
||||||
*
|
|
||||||
* @param - The signal number.
|
|
||||||
*/
|
|
||||||
void Application::SigIntTermHandler(int signum)
|
|
||||||
{
|
|
||||||
struct sigaction sa;
|
|
||||||
memset(&sa, 0, sizeof(sa));
|
|
||||||
sa.sa_handler = SIG_DFL;
|
|
||||||
sigaction(signum, &sa, nullptr);
|
|
||||||
|
|
||||||
Application::Ptr instance = Application::GetInstance();
|
|
||||||
|
|
||||||
if (!instance)
|
|
||||||
return;
|
|
||||||
|
|
||||||
instance->RequestShutdown();
|
|
||||||
}
|
|
||||||
#endif /* _WIN32 */
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Signal handler for SIGUSR1. This signal causes Icinga to re-open
|
* Signal handler for SIGUSR1. This signal causes Icinga to re-open
|
||||||
* its log files and is mainly for use by logrotate.
|
* its log files and is mainly for use by logrotate.
|
||||||
@ -717,42 +703,6 @@ void Application::SigUsr1Handler(int)
|
|||||||
RequestReopenLogs();
|
RequestReopenLogs();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Signal handler for SIGUSR2. Hands over PID to child and commits suicide
|
|
||||||
*
|
|
||||||
* @param - The signal number.
|
|
||||||
*/
|
|
||||||
void Application::SigUsr2Handler(int)
|
|
||||||
{
|
|
||||||
Log(LogInformation, "Application", "Reload requested, letting new process take over.");
|
|
||||||
#ifdef HAVE_SYSTEMD
|
|
||||||
sd_notifyf(0, "MAINPID=%lu", (unsigned long) m_ReloadProcess);
|
|
||||||
#endif /* HAVE_SYSTEMD */
|
|
||||||
|
|
||||||
/* Write the PID of the new process to the pidfile before this
|
|
||||||
* process exits to keep systemd happy.
|
|
||||||
*/
|
|
||||||
Application::Ptr instance = GetInstance();
|
|
||||||
try {
|
|
||||||
instance->UpdatePidFile(Configuration::PidPath, m_ReloadProcess);
|
|
||||||
} catch (const std::exception&) {
|
|
||||||
/* abort restart */
|
|
||||||
Log(LogCritical, "Application", "Cannot update PID file. Aborting restart operation.");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
instance->ClosePidFile(false);
|
|
||||||
|
|
||||||
/* Ensure to dump the program state on reload. */
|
|
||||||
ConfigObject::StopObjects();
|
|
||||||
instance->OnShutdown();
|
|
||||||
|
|
||||||
Log(LogInformation, "Application")
|
|
||||||
<< "Reload done, parent process shutting down. Child process with PID '" << m_ReloadProcess << "' is taking over.";
|
|
||||||
|
|
||||||
Exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Signal handler for SIGABRT. Helps with debugging ASSERT()s.
|
* Signal handler for SIGABRT. Helps with debugging ASSERT()s.
|
||||||
*
|
*
|
||||||
@ -999,19 +949,13 @@ int Application::Run()
|
|||||||
#ifndef _WIN32
|
#ifndef _WIN32
|
||||||
struct sigaction sa;
|
struct sigaction sa;
|
||||||
memset(&sa, 0, sizeof(sa));
|
memset(&sa, 0, sizeof(sa));
|
||||||
sa.sa_handler = &Application::SigIntTermHandler;
|
|
||||||
sigaction(SIGINT, &sa, nullptr);
|
|
||||||
sigaction(SIGTERM, &sa, nullptr);
|
|
||||||
|
|
||||||
sa.sa_handler = &Application::SigUsr1Handler;
|
sa.sa_handler = &Application::SigUsr1Handler;
|
||||||
sigaction(SIGUSR1, &sa, nullptr);
|
sigaction(SIGUSR1, &sa, nullptr);
|
||||||
|
|
||||||
sa.sa_handler = &Application::SigUsr2Handler;
|
|
||||||
sigaction(SIGUSR2, &sa, nullptr);
|
|
||||||
#else /* _WIN32 */
|
#else /* _WIN32 */
|
||||||
SetConsoleCtrlHandler(&Application::CtrlHandler, TRUE);
|
SetConsoleCtrlHandler(&Application::CtrlHandler, TRUE);
|
||||||
#endif /* _WIN32 */
|
#endif /* _WIN32 */
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
try {
|
try {
|
||||||
UpdatePidFile(Configuration::PidPath);
|
UpdatePidFile(Configuration::PidPath);
|
||||||
} catch (const std::exception&) {
|
} catch (const std::exception&) {
|
||||||
@ -1019,6 +963,7 @@ int Application::Run()
|
|||||||
<< "Cannot update PID file '" << Configuration::PidPath << "'. Aborting.";
|
<< "Cannot update PID file '" << Configuration::PidPath << "'. Aborting.";
|
||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
#endif /* _WIN32 */
|
||||||
|
|
||||||
SetMainTime(Utility::GetTime());
|
SetMainTime(Utility::GetTime());
|
||||||
|
|
||||||
|
@ -57,6 +57,10 @@ public:
|
|||||||
static void RequestRestart();
|
static void RequestRestart();
|
||||||
static void RequestReopenLogs();
|
static void RequestReopenLogs();
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
static void SetUmbrellaProcess(pid_t pid);
|
||||||
|
#endif /* _WIN32 */
|
||||||
|
|
||||||
static bool IsShuttingDown();
|
static bool IsShuttingDown();
|
||||||
static bool IsRestarting();
|
static bool IsRestarting();
|
||||||
|
|
||||||
@ -122,9 +126,13 @@ private:
|
|||||||
static pid_t m_ReloadProcess; /**< The PID of a subprocess doing a reload, only valid when l_Restarting==true */
|
static pid_t m_ReloadProcess; /**< The PID of a subprocess doing a reload, only valid when l_Restarting==true */
|
||||||
static bool m_RequestReopenLogs; /**< Whether we should re-open log files. */
|
static bool m_RequestReopenLogs; /**< Whether we should re-open log files. */
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
static pid_t m_UmbrellaProcess; /**< The PID of the Icinga umbrella process */
|
||||||
|
#endif /* _WIN32 */
|
||||||
|
|
||||||
static int m_ArgC; /**< The number of command-line arguments. */
|
static int m_ArgC; /**< The number of command-line arguments. */
|
||||||
static char **m_ArgV; /**< Command-line arguments. */
|
static char **m_ArgV; /**< Command-line arguments. */
|
||||||
FILE *m_PidFile; /**< The PID file */
|
FILE *m_PidFile = nullptr; /**< The PID file */
|
||||||
static bool m_Debugging; /**< Whether debugging is enabled. */
|
static bool m_Debugging; /**< Whether debugging is enabled. */
|
||||||
static LogSeverity m_DebuggingSeverity; /**< Whether debugging severity is set. */
|
static LogSeverity m_DebuggingSeverity; /**< Whether debugging severity is set. */
|
||||||
static double m_StartTime;
|
static double m_StartTime;
|
||||||
@ -132,9 +140,7 @@ private:
|
|||||||
static bool m_ScriptDebuggerEnabled;
|
static bool m_ScriptDebuggerEnabled;
|
||||||
static double m_LastReloadFailed;
|
static double m_LastReloadFailed;
|
||||||
|
|
||||||
#ifndef _WIN32
|
#ifdef _WIN32
|
||||||
static void SigIntTermHandler(int signum);
|
|
||||||
#else /* _WIN32 */
|
|
||||||
static BOOL WINAPI CtrlHandler(DWORD type);
|
static BOOL WINAPI CtrlHandler(DWORD type);
|
||||||
static LONG WINAPI SEHUnhandledExceptionFilter(PEXCEPTION_POINTERS exi);
|
static LONG WINAPI SEHUnhandledExceptionFilter(PEXCEPTION_POINTERS exi);
|
||||||
#endif /* _WIN32 */
|
#endif /* _WIN32 */
|
||||||
@ -143,7 +149,6 @@ private:
|
|||||||
|
|
||||||
static void SigAbrtHandler(int signum);
|
static void SigAbrtHandler(int signum);
|
||||||
static void SigUsr1Handler(int signum);
|
static void SigUsr1Handler(int signum);
|
||||||
static void SigUsr2Handler(int signum);
|
|
||||||
static void ExceptionHandler();
|
static void ExceptionHandler();
|
||||||
|
|
||||||
static String GetCrashReportFilename();
|
static String GetCrashReportFilename();
|
||||||
|
43
lib/base/atomic.hpp
Normal file
43
lib/base/atomic.hpp
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
/* Icinga 2 | (c) 2019 Icinga GmbH | GPLv2+ */
|
||||||
|
|
||||||
|
#ifndef ATOMIC_H
|
||||||
|
#define ATOMIC_H
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
|
|
||||||
|
namespace icinga
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extends std::atomic with an atomic constructor.
|
||||||
|
*
|
||||||
|
* @ingroup base
|
||||||
|
*/
|
||||||
|
template<class T>
|
||||||
|
class Atomic : public std::atomic<T> {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* Like std::atomic#atomic, but operates atomically
|
||||||
|
*
|
||||||
|
* @param desired Initial value
|
||||||
|
*/
|
||||||
|
inline Atomic(T desired)
|
||||||
|
{
|
||||||
|
this->store(desired);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Like std::atomic#atomic, but operates atomically
|
||||||
|
*
|
||||||
|
* @param desired Initial value
|
||||||
|
* @param order Initial store operation's memory order
|
||||||
|
*/
|
||||||
|
inline Atomic(T desired, std::memory_order order)
|
||||||
|
{
|
||||||
|
this->store(desired, order);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* ATOMIC_H */
|
@ -7,6 +7,8 @@
|
|||||||
#include "config/configcompiler.hpp"
|
#include "config/configcompiler.hpp"
|
||||||
#include "config/configcompilercontext.hpp"
|
#include "config/configcompilercontext.hpp"
|
||||||
#include "config/configitembuilder.hpp"
|
#include "config/configitembuilder.hpp"
|
||||||
|
#include "base/atomic.hpp"
|
||||||
|
#include "base/defer.hpp"
|
||||||
#include "base/logger.hpp"
|
#include "base/logger.hpp"
|
||||||
#include "base/application.hpp"
|
#include "base/application.hpp"
|
||||||
#include "base/timer.hpp"
|
#include "base/timer.hpp"
|
||||||
@ -16,10 +18,23 @@
|
|||||||
#include "base/scriptglobal.hpp"
|
#include "base/scriptglobal.hpp"
|
||||||
#include "base/context.hpp"
|
#include "base/context.hpp"
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstring>
|
||||||
#include <boost/program_options.hpp>
|
#include <boost/program_options.hpp>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
#include <signal.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/wait.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#endif /* _WIN32 */
|
||||||
|
|
||||||
|
#ifdef HAVE_SYSTEMD
|
||||||
|
#include <systemd/sd-daemon.h>
|
||||||
|
#endif /* HAVE_SYSTEMD */
|
||||||
|
|
||||||
using namespace icinga;
|
using namespace icinga;
|
||||||
namespace po = boost::program_options;
|
namespace po = boost::program_options;
|
||||||
|
|
||||||
@ -27,13 +42,6 @@ static po::variables_map g_AppParams;
|
|||||||
|
|
||||||
REGISTER_CLICOMMAND("daemon", DaemonCommand);
|
REGISTER_CLICOMMAND("daemon", DaemonCommand);
|
||||||
|
|
||||||
#ifndef _WIN32
|
|
||||||
static void SigHupHandler(int)
|
|
||||||
{
|
|
||||||
Application::RequestRestart();
|
|
||||||
}
|
|
||||||
#endif /* _WIN32 */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Daemonize(). On error, this function logs by itself and exits (i.e. does not return).
|
* Daemonize(). On error, this function logs by itself and exits (i.e. does not return).
|
||||||
*
|
*
|
||||||
@ -163,11 +171,6 @@ void DaemonCommand::InitParameters(boost::program_options::options_description&
|
|||||||
("close-stdio", "do not log to stdout (or stderr) after startup")
|
("close-stdio", "do not log to stdout (or stderr) after startup")
|
||||||
#endif /* _WIN32 */
|
#endif /* _WIN32 */
|
||||||
;
|
;
|
||||||
|
|
||||||
#ifndef _WIN32
|
|
||||||
hiddenDesc.add_options()
|
|
||||||
("reload-internal", po::value<int>(), "used internally to implement config reload: do not call manually, send SIGHUP instead");
|
|
||||||
#endif /* _WIN32 */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<String> DaemonCommand::GetArgumentSuggestions(const String& argument, const String& word) const
|
std::vector<String> DaemonCommand::GetArgumentSuggestions(const String& argument, const String& word) const
|
||||||
@ -178,6 +181,357 @@ std::vector<String> DaemonCommand::GetArgumentSuggestions(const String& argument
|
|||||||
return CLICommand::GetArgumentSuggestions(argument, word);
|
return CLICommand::GetArgumentSuggestions(argument, word);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
// The PID of the Icinga umbrella process
|
||||||
|
pid_t l_UmbrellaPid = 0;
|
||||||
|
|
||||||
|
// Whether the umbrella process allowed us to continue working beyond config validation
|
||||||
|
static Atomic<bool> l_AllowedToWork (false);
|
||||||
|
#endif /* _WIN32 */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Do the actual work (config loading, ...)
|
||||||
|
*
|
||||||
|
* @param configs Files to read config from
|
||||||
|
*
|
||||||
|
* @return Exit code
|
||||||
|
*/
|
||||||
|
static inline
|
||||||
|
int RunWorker(const std::vector<std::string>& configs)
|
||||||
|
{
|
||||||
|
Log(LogInformation, "cli", "Loading configuration file(s).");
|
||||||
|
|
||||||
|
{
|
||||||
|
std::vector<ConfigItem::Ptr> newItems;
|
||||||
|
|
||||||
|
if (!DaemonUtility::LoadConfigFiles(configs, newItems, Configuration::ObjectsPath, Configuration::VarsPath))
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
Log(LogNotice, "cli")
|
||||||
|
<< "Notifying umbrella process (PID " << l_UmbrellaPid << ") about the config loading success";
|
||||||
|
|
||||||
|
(void)kill(l_UmbrellaPid, SIGUSR2);
|
||||||
|
|
||||||
|
Log(LogNotice, "cli")
|
||||||
|
<< "Waiting for the umbrella process to let us doing the actual work";
|
||||||
|
|
||||||
|
while (!l_AllowedToWork.load()) {
|
||||||
|
Utility::Sleep(0.2);
|
||||||
|
}
|
||||||
|
|
||||||
|
Log(LogNotice, "cli")
|
||||||
|
<< "The umbrella process let us continuing";
|
||||||
|
#endif /* _WIN32 */
|
||||||
|
|
||||||
|
/* restore the previous program state */
|
||||||
|
try {
|
||||||
|
ConfigObject::RestoreObjects(Configuration::StatePath);
|
||||||
|
} catch (const std::exception& ex) {
|
||||||
|
Log(LogCritical, "cli")
|
||||||
|
<< "Failed to restore state file: " << DiagnosticInformation(ex);
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
WorkQueue upq(25000, Configuration::Concurrency);
|
||||||
|
upq.SetName("DaemonCommand::Run");
|
||||||
|
|
||||||
|
// activate config only after daemonization: it starts threads and that is not compatible with fork()
|
||||||
|
if (!ConfigItem::ActivateItems(upq, newItems, false, false, true)) {
|
||||||
|
Log(LogCritical, "cli", "Error activating configuration.");
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create the internal API object storage. Do this here too with setups without API. */
|
||||||
|
ConfigObjectUtility::CreateStorage();
|
||||||
|
|
||||||
|
/* Remove ignored Downtime/Comment objects. */
|
||||||
|
try {
|
||||||
|
String configDir = ConfigObjectUtility::GetConfigDir();
|
||||||
|
ConfigItem::RemoveIgnoredItems(configDir);
|
||||||
|
} catch (const std::exception& ex) {
|
||||||
|
Log(LogNotice, "cli")
|
||||||
|
<< "Cannot clean ignored downtimes/comments: " << ex.what();
|
||||||
|
}
|
||||||
|
|
||||||
|
ApiListener::UpdateObjectAuthority();
|
||||||
|
|
||||||
|
return Application::GetInstance()->Run();
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
/**
|
||||||
|
* The possible states of a seemless worker being started by StartUnixWorker().
|
||||||
|
*/
|
||||||
|
enum class UnixWorkerState : uint_fast8_t
|
||||||
|
{
|
||||||
|
Pending,
|
||||||
|
LoadedConfig,
|
||||||
|
Failed
|
||||||
|
};
|
||||||
|
|
||||||
|
// The signals to block temporarily in StartUnixWorker().
|
||||||
|
static const sigset_t l_UnixWorkerSignals = ([]() -> sigset_t {
|
||||||
|
sigset_t s;
|
||||||
|
|
||||||
|
(void)sigemptyset(&s);
|
||||||
|
(void)sigaddset(&s, SIGCHLD);
|
||||||
|
(void)sigaddset(&s, SIGUSR1);
|
||||||
|
(void)sigaddset(&s, SIGUSR2);
|
||||||
|
(void)sigaddset(&s, SIGINT);
|
||||||
|
(void)sigaddset(&s, SIGTERM);
|
||||||
|
(void)sigaddset(&s, SIGHUP);
|
||||||
|
|
||||||
|
return s;
|
||||||
|
})();
|
||||||
|
|
||||||
|
// The PID of the seemless worker currently being started by StartUnixWorker()
|
||||||
|
static Atomic<pid_t> l_CurrentlyStartingUnixWorkerPid (-1);
|
||||||
|
|
||||||
|
// The state of the seemless worker currently being started by StartUnixWorker()
|
||||||
|
static Atomic<UnixWorkerState> l_CurrentlyStartingUnixWorkerState (UnixWorkerState::Pending);
|
||||||
|
|
||||||
|
// The last temination signal we received
|
||||||
|
static Atomic<int> l_TermSignal (-1);
|
||||||
|
|
||||||
|
// Whether someone requested to re-load config (and we didn't handle that request, yet)
|
||||||
|
static Atomic<bool> l_RequestedReload (false);
|
||||||
|
|
||||||
|
// Whether someone requested to re-open logs (and we didn't handle that request, yet)
|
||||||
|
static Atomic<bool> l_RequestedReopenLogs (false);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Umbrella process' signal handlers
|
||||||
|
*/
|
||||||
|
static void UmbrellaSignalHandler(int num, siginfo_t *info, void*)
|
||||||
|
{
|
||||||
|
switch (num) {
|
||||||
|
case SIGUSR1:
|
||||||
|
// Someone requested to re-open logs
|
||||||
|
l_RequestedReopenLogs.store(true);
|
||||||
|
break;
|
||||||
|
case SIGUSR2:
|
||||||
|
if (l_CurrentlyStartingUnixWorkerState.load() == UnixWorkerState::Pending
|
||||||
|
&& info->si_pid == l_CurrentlyStartingUnixWorkerPid.load()) {
|
||||||
|
// The seemless worker currently being started by StartUnixWorker() successfully loaded its config
|
||||||
|
l_CurrentlyStartingUnixWorkerState.store(UnixWorkerState::LoadedConfig);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SIGCHLD:
|
||||||
|
if (l_CurrentlyStartingUnixWorkerState.load() == UnixWorkerState::Pending
|
||||||
|
&& info->si_pid == l_CurrentlyStartingUnixWorkerPid.load()) {
|
||||||
|
// The seemless worker currently being started by StartUnixWorker() failed
|
||||||
|
l_CurrentlyStartingUnixWorkerState.store(UnixWorkerState::Failed);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SIGINT:
|
||||||
|
case SIGTERM:
|
||||||
|
// Someone requested our termination
|
||||||
|
|
||||||
|
{
|
||||||
|
struct sigaction sa;
|
||||||
|
memset(&sa, 0, sizeof(sa));
|
||||||
|
|
||||||
|
sa.sa_handler = SIG_DFL;
|
||||||
|
|
||||||
|
(void)sigaction(num, &sa, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
l_TermSignal.store(num);
|
||||||
|
break;
|
||||||
|
case SIGHUP:
|
||||||
|
// Someone requested to re-load config
|
||||||
|
l_RequestedReload.store(true);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// Programming error (or someone has broken the userspace)
|
||||||
|
VERIFY(!"Caught unexpected signal");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Seemless worker's signal handlers
|
||||||
|
*/
|
||||||
|
static void WorkerSignalHandler(int num, siginfo_t *info, void*)
|
||||||
|
{
|
||||||
|
switch (num) {
|
||||||
|
case SIGUSR2:
|
||||||
|
if (info->si_pid == l_UmbrellaPid) {
|
||||||
|
// The umbrella process allowed us to continue working beyond config validation
|
||||||
|
l_AllowedToWork.store(true);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SIGINT:
|
||||||
|
case SIGTERM:
|
||||||
|
if (info->si_pid == l_UmbrellaPid) {
|
||||||
|
// The umbrella process requested our termination
|
||||||
|
Application::RequestShutdown();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// Programming error (or someone has broken the userspace)
|
||||||
|
VERIFY(!"Caught unexpected signal");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_SYSTEMD
|
||||||
|
// When we last notified the watchdog.
|
||||||
|
static Atomic<double> l_LastNotifiedWatchdog (0);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Notify the watchdog if not notified during the last 2.5s.
|
||||||
|
*/
|
||||||
|
static void NotifyWatchdog()
|
||||||
|
{
|
||||||
|
double now = Utility::GetTime();
|
||||||
|
|
||||||
|
if (now - l_LastNotifiedWatchdog.load() >= 2.5) {
|
||||||
|
sd_notify(0, "WATCHDOG=1");
|
||||||
|
l_LastNotifiedWatchdog.store(now);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* HAVE_SYSTEMD */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Starts seemless worker process doing the actual work (config loading, ...)
|
||||||
|
*
|
||||||
|
* @param configs Files to read config from
|
||||||
|
*
|
||||||
|
* @return The worker's PID on success, -1 on failure (if the worker couldn't load its config)
|
||||||
|
*/
|
||||||
|
static pid_t StartUnixWorker(const std::vector<std::string>& configs)
|
||||||
|
{
|
||||||
|
Log(LogNotice, "cli")
|
||||||
|
<< "Spawning seemless worker process doing the actual work";
|
||||||
|
|
||||||
|
try {
|
||||||
|
Application::UninitializeBase();
|
||||||
|
} catch (const std::exception& ex) {
|
||||||
|
Log(LogCritical, "cli")
|
||||||
|
<< "Failed to stop thread pool before forking, unexpected error: " << DiagnosticInformation(ex);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Block the signal handlers we'd like to change in the child process until we changed them.
|
||||||
|
* Block SIGUSR2 and SIGCHLD handlers until we've set l_CurrentlyStartingUnixWorkerPid.
|
||||||
|
*/
|
||||||
|
(void)sigprocmask(SIG_BLOCK, &l_UnixWorkerSignals, nullptr);
|
||||||
|
|
||||||
|
pid_t pid = fork();
|
||||||
|
|
||||||
|
switch (pid) {
|
||||||
|
case -1:
|
||||||
|
Log(LogCritical, "cli")
|
||||||
|
<< "fork() failed with error code " << errno << ", \"" << Utility::FormatErrorNumber(errno) << "\"";
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
|
||||||
|
case 0:
|
||||||
|
try {
|
||||||
|
{
|
||||||
|
struct sigaction sa;
|
||||||
|
memset(&sa, 0, sizeof(sa));
|
||||||
|
|
||||||
|
sa.sa_handler = SIG_DFL;
|
||||||
|
|
||||||
|
(void)sigaction(SIGCHLD, &sa, nullptr);
|
||||||
|
(void)sigaction(SIGUSR1, &sa, nullptr);
|
||||||
|
(void)sigaction(SIGHUP, &sa, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
struct sigaction sa;
|
||||||
|
memset(&sa, 0, sizeof(sa));
|
||||||
|
|
||||||
|
sa.sa_sigaction = &WorkerSignalHandler;
|
||||||
|
sa.sa_flags = SA_RESTART | SA_SIGINFO;
|
||||||
|
|
||||||
|
(void)sigaction(SIGUSR2, &sa, nullptr);
|
||||||
|
(void)sigaction(SIGINT, &sa, nullptr);
|
||||||
|
(void)sigaction(SIGTERM, &sa, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
(void)sigprocmask(SIG_UNBLOCK, &l_UnixWorkerSignals, nullptr);
|
||||||
|
|
||||||
|
try {
|
||||||
|
Application::InitializeBase();
|
||||||
|
} catch (const std::exception& ex) {
|
||||||
|
Log(LogCritical, "cli")
|
||||||
|
<< "Failed to re-initialize thread pool after forking (child): " << DiagnosticInformation(ex);
|
||||||
|
_exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
_exit(RunWorker(configs));
|
||||||
|
} catch (...) {
|
||||||
|
_exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
l_CurrentlyStartingUnixWorkerPid.store(pid);
|
||||||
|
(void)sigprocmask(SIG_UNBLOCK, &l_UnixWorkerSignals, nullptr);
|
||||||
|
|
||||||
|
Log(LogNotice, "cli")
|
||||||
|
<< "Spawned worker process (PID " << pid << "), waiting for it to load its config";
|
||||||
|
|
||||||
|
// Wait for the newly spawned process to either load its config or fail.
|
||||||
|
for (;;) {
|
||||||
|
#ifdef HAVE_SYSTEMD
|
||||||
|
NotifyWatchdog();
|
||||||
|
#endif /* HAVE_SYSTEMD */
|
||||||
|
|
||||||
|
switch (l_CurrentlyStartingUnixWorkerState.load()) {
|
||||||
|
case UnixWorkerState::LoadedConfig:
|
||||||
|
Log(LogNotice, "cli")
|
||||||
|
<< "Worker process successfully loaded its config";
|
||||||
|
break;
|
||||||
|
case UnixWorkerState::Failed:
|
||||||
|
Log(LogNotice, "cli")
|
||||||
|
<< "Worker process couldn't load its config";
|
||||||
|
|
||||||
|
while (waitpid(pid, nullptr, 0) == -1 && errno == EINTR) {
|
||||||
|
#ifdef HAVE_SYSTEMD
|
||||||
|
NotifyWatchdog();
|
||||||
|
#endif /* HAVE_SYSTEMD */
|
||||||
|
}
|
||||||
|
pid = -1;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
Utility::Sleep(0.2);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset flags for the next time
|
||||||
|
l_CurrentlyStartingUnixWorkerPid.store(-1);
|
||||||
|
l_CurrentlyStartingUnixWorkerState.store(UnixWorkerState::Pending);
|
||||||
|
|
||||||
|
try {
|
||||||
|
Application::InitializeBase();
|
||||||
|
} catch (const std::exception& ex) {
|
||||||
|
Log(LogCritical, "cli")
|
||||||
|
<< "Failed to re-initialize thread pool after forking (parent): " << DiagnosticInformation(ex);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return pid;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Workaround to instantiate Application (which is abstract) in DaemonCommand#Run()
|
||||||
|
*/
|
||||||
|
class PidFileManagementApp : public Application
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
inline int Main() override
|
||||||
|
{
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
#endif /* _WIN32 */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The entry point for the "daemon" CLI command.
|
* The entry point for the "daemon" CLI command.
|
||||||
*
|
*
|
||||||
@ -194,15 +548,6 @@ int DaemonCommand::Run(const po::variables_map& vm, const std::vector<std::strin
|
|||||||
#endif /* I2_DEBUG */
|
#endif /* I2_DEBUG */
|
||||||
<< ")";
|
<< ")";
|
||||||
|
|
||||||
if (!vm.count("validate") && !vm.count("reload-internal")) {
|
|
||||||
pid_t runningpid = Application::ReadPidFile(Configuration::PidPath);
|
|
||||||
if (runningpid > 0) {
|
|
||||||
Log(LogCritical, "cli")
|
|
||||||
<< "Another instance of Icinga already running with PID " << runningpid;
|
|
||||||
return EXIT_FAILURE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::string> configs;
|
std::vector<std::string> configs;
|
||||||
if (vm.count("config") > 0)
|
if (vm.count("config") > 0)
|
||||||
configs = vm["config"].as<std::vector<std::string> >();
|
configs = vm["config"].as<std::vector<std::string> >();
|
||||||
@ -212,67 +557,52 @@ int DaemonCommand::Run(const po::variables_map& vm, const std::vector<std::strin
|
|||||||
configs.push_back(configDir + "/icinga2.conf");
|
configs.push_back(configDir + "/icinga2.conf");
|
||||||
}
|
}
|
||||||
|
|
||||||
Log(LogInformation, "cli", "Loading configuration file(s).");
|
|
||||||
|
|
||||||
std::vector<ConfigItem::Ptr> newItems;
|
|
||||||
|
|
||||||
if (!DaemonUtility::LoadConfigFiles(configs, newItems, Configuration::ObjectsPath, Configuration::VarsPath))
|
|
||||||
return EXIT_FAILURE;
|
|
||||||
|
|
||||||
if (vm.count("validate")) {
|
if (vm.count("validate")) {
|
||||||
|
Log(LogInformation, "cli", "Loading configuration file(s).");
|
||||||
|
|
||||||
|
std::vector<ConfigItem::Ptr> newItems;
|
||||||
|
|
||||||
|
if (!DaemonUtility::LoadConfigFiles(configs, newItems, Configuration::ObjectsPath, Configuration::VarsPath))
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
|
||||||
Log(LogInformation, "cli", "Finished validating the configuration file(s).");
|
Log(LogInformation, "cli", "Finished validating the configuration file(s).");
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef _WIN32
|
{
|
||||||
if (vm.count("reload-internal")) {
|
pid_t runningpid = Application::ReadPidFile(Configuration::PidPath);
|
||||||
/* We went through validation and now ask the old process kindly to die */
|
if (runningpid > 0) {
|
||||||
Log(LogInformation, "cli", "Requesting to take over.");
|
|
||||||
int rc = kill(vm["reload-internal"].as<int>(), SIGUSR2);
|
|
||||||
if (rc) {
|
|
||||||
Log(LogCritical, "cli")
|
Log(LogCritical, "cli")
|
||||||
<< "Failed to send signal to \"" << vm["reload-internal"].as<int>() << "\" with " << strerror(errno);
|
<< "Another instance of Icinga already running with PID " << runningpid;
|
||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
double start = Utility::GetTime();
|
|
||||||
while (kill(vm["reload-internal"].as<int>(), SIGCHLD) == 0)
|
|
||||||
Utility::Sleep(0.2);
|
|
||||||
|
|
||||||
Log(LogNotice, "cli")
|
|
||||||
<< "Waited for " << Utility::FormatDuration(Utility::GetTime() - start) << " on old process to exit.";
|
|
||||||
}
|
}
|
||||||
#endif /* _WIN32 */
|
|
||||||
|
|
||||||
if (vm.count("daemonize")) {
|
if (vm.count("daemonize")) {
|
||||||
if (!vm.count("reload-internal")) {
|
// this subroutine either succeeds, or logs an error
|
||||||
// no additional fork neccessary on reload
|
// and terminates the process (does not return).
|
||||||
|
Daemonize();
|
||||||
// this subroutine either succeeds, or logs an error
|
|
||||||
// and terminates the process (does not return).
|
|
||||||
Daemonize();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* restore the previous program state */
|
#ifndef _WIN32
|
||||||
|
/* The Application manages the PID file,
|
||||||
|
* but on *nix this process doesn't load any config
|
||||||
|
* so there's no central Application instance.
|
||||||
|
*/
|
||||||
|
PidFileManagementApp app;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
ConfigObject::RestoreObjects(Configuration::StatePath);
|
app.UpdatePidFile(Configuration::PidPath);
|
||||||
} catch (const std::exception& ex) {
|
} catch (const std::exception&) {
|
||||||
Log(LogCritical, "cli")
|
Log(LogCritical, "Application")
|
||||||
<< "Failed to restore state file: " << DiagnosticInformation(ex);
|
<< "Cannot update PID file '" << Configuration::PidPath << "'. Aborting.";
|
||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
Defer closePidFile ([&app]() {
|
||||||
WorkQueue upq(25000, Configuration::Concurrency);
|
app.ClosePidFile(true);
|
||||||
upq.SetName("DaemonCommand::Run");
|
});
|
||||||
|
#endif /* _WIN32 */
|
||||||
// activate config only after daemonization: it starts threads and that is not compatible with fork()
|
|
||||||
if (!ConfigItem::ActivateItems(upq, newItems, false, false, true)) {
|
|
||||||
Log(LogCritical, "cli", "Error activating configuration.");
|
|
||||||
return EXIT_FAILURE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (vm.count("daemonize") || vm.count("close-stdio")) {
|
if (vm.count("daemonize") || vm.count("close-stdio")) {
|
||||||
// After disabling the console log, any further errors will go to the configured log only.
|
// After disabling the console log, any further errors will go to the configured log only.
|
||||||
@ -287,26 +617,139 @@ int DaemonCommand::Run(const po::variables_map& vm, const std::vector<std::strin
|
|||||||
Logger::DisableConsoleLog();
|
Logger::DisableConsoleLog();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Create the internal API object storage. Do this here too with setups without API. */
|
#ifdef _WIN32
|
||||||
ConfigObjectUtility::CreateStorage();
|
return RunWorker(configs);
|
||||||
|
#else /* _WIN32 */
|
||||||
|
l_UmbrellaPid = getpid();
|
||||||
|
Application::SetUmbrellaProcess(l_UmbrellaPid);
|
||||||
|
|
||||||
/* Remove ignored Downtime/Comment objects. */
|
{
|
||||||
try {
|
struct sigaction sa;
|
||||||
String configDir = ConfigObjectUtility::GetConfigDir();
|
memset(&sa, 0, sizeof(sa));
|
||||||
ConfigItem::RemoveIgnoredItems(configDir);
|
|
||||||
} catch (const std::exception& ex) {
|
sa.sa_sigaction = &UmbrellaSignalHandler;
|
||||||
Log(LogNotice, "cli")
|
sa.sa_flags = SA_NOCLDSTOP | SA_RESTART | SA_SIGINFO;
|
||||||
<< "Cannot clean ignored downtimes/comments: " << ex.what();
|
|
||||||
|
(void)sigaction(SIGCHLD, &sa, nullptr);
|
||||||
|
(void)sigaction(SIGUSR1, &sa, nullptr);
|
||||||
|
(void)sigaction(SIGUSR2, &sa, nullptr);
|
||||||
|
(void)sigaction(SIGINT, &sa, nullptr);
|
||||||
|
(void)sigaction(SIGTERM, &sa, nullptr);
|
||||||
|
(void)sigaction(SIGHUP, &sa, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef _WIN32
|
// The PID of the current seemless worker
|
||||||
struct sigaction sa;
|
pid_t currentWorker = StartUnixWorker(configs);
|
||||||
memset(&sa, 0, sizeof(sa));
|
|
||||||
sa.sa_handler = &SigHupHandler;
|
if (currentWorker == -1) {
|
||||||
sigaction(SIGHUP, &sa, nullptr);
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Immediately allow the first (non-reload) worker to continue working beyond config validation
|
||||||
|
(void)kill(currentWorker, SIGUSR2);
|
||||||
|
|
||||||
|
#ifdef HAVE_SYSTEMD
|
||||||
|
sd_notify(0, "READY=1");
|
||||||
|
#endif /* HAVE_SYSTEMD */
|
||||||
|
|
||||||
|
// Whether we already forwarded a termination signal to the seemless worker
|
||||||
|
bool requestedTermination = false;
|
||||||
|
|
||||||
|
// Whether we already notified systemd about our termination
|
||||||
|
bool notifiedTermination = false;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
#ifdef HAVE_SYSTEMD
|
||||||
|
NotifyWatchdog();
|
||||||
|
#endif /* HAVE_SYSTEMD */
|
||||||
|
|
||||||
|
if (!requestedTermination) {
|
||||||
|
int termSig = l_TermSignal.load();
|
||||||
|
if (termSig != -1) {
|
||||||
|
Log(LogNotice, "cli")
|
||||||
|
<< "Got signal " << termSig << ", forwarding to seemless worker (PID " << currentWorker << ")";
|
||||||
|
|
||||||
|
(void)kill(currentWorker, termSig);
|
||||||
|
requestedTermination = true;
|
||||||
|
|
||||||
|
#ifdef HAVE_SYSTEMD
|
||||||
|
if (!notifiedTermination) {
|
||||||
|
notifiedTermination = true;
|
||||||
|
sd_notify(0, "STOPPING=1");
|
||||||
|
}
|
||||||
|
#endif /* HAVE_SYSTEMD */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (l_RequestedReload.exchange(false)) {
|
||||||
|
Log(LogInformation, "Application")
|
||||||
|
<< "Got reload command: Starting new instance.";
|
||||||
|
|
||||||
|
#ifdef HAVE_SYSTEMD
|
||||||
|
sd_notify(0, "RELOADING=1");
|
||||||
|
#endif /* HAVE_SYSTEMD */
|
||||||
|
|
||||||
|
pid_t nextWorker = StartUnixWorker(configs);
|
||||||
|
|
||||||
|
if (nextWorker == -1) {
|
||||||
|
Log(LogCritical, "Application", "Found error in config: reloading aborted");
|
||||||
|
} else {
|
||||||
|
Log(LogInformation, "Application")
|
||||||
|
<< "Reload done, old process shutting down. Child process with PID '" << nextWorker << "' is taking over.";
|
||||||
|
|
||||||
|
(void)kill(currentWorker, SIGTERM);
|
||||||
|
|
||||||
|
{
|
||||||
|
double start = Utility::GetTime();
|
||||||
|
|
||||||
|
while (waitpid(currentWorker, nullptr, 0) == -1 && errno == EINTR) {
|
||||||
|
#ifdef HAVE_SYSTEMD
|
||||||
|
NotifyWatchdog();
|
||||||
|
#endif /* HAVE_SYSTEMD */
|
||||||
|
}
|
||||||
|
|
||||||
|
Log(LogNotice, "cli")
|
||||||
|
<< "Waited for " << Utility::FormatDuration(Utility::GetTime() - start) << " on old process to exit.";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Old instance shut down, allow the new one to continue working beyond config validation
|
||||||
|
(void)kill(nextWorker, SIGUSR2);
|
||||||
|
|
||||||
|
currentWorker = nextWorker;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_SYSTEMD
|
||||||
|
sd_notify(0, "READY=1");
|
||||||
|
#endif /* HAVE_SYSTEMD */
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (l_RequestedReopenLogs.exchange(false)) {
|
||||||
|
Log(LogNotice, "cli")
|
||||||
|
<< "Got signal " << SIGUSR1 << ", forwarding to seemless worker (PID " << currentWorker << ")";
|
||||||
|
|
||||||
|
(void)kill(currentWorker, SIGUSR1);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
int status;
|
||||||
|
if (waitpid(currentWorker, &status, WNOHANG) > 0) {
|
||||||
|
Log(LogNotice, "cli")
|
||||||
|
<< "Seemless worker (PID " << currentWorker << ") stopped, stopping as well";
|
||||||
|
|
||||||
|
#ifdef HAVE_SYSTEMD
|
||||||
|
if (!notifiedTermination) {
|
||||||
|
notifiedTermination = true;
|
||||||
|
sd_notify(0, "STOPPING=1");
|
||||||
|
}
|
||||||
|
#endif /* HAVE_SYSTEMD */
|
||||||
|
|
||||||
|
// If killed by signal, forward it via the exit code (to be as seemless as possible)
|
||||||
|
return WIFSIGNALED(status) ? 128 + WTERMSIG(status) : WEXITSTATUS(status);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Utility::Sleep(0.2);
|
||||||
|
}
|
||||||
#endif /* _WIN32 */
|
#endif /* _WIN32 */
|
||||||
|
|
||||||
ApiListener::UpdateObjectAuthority();
|
|
||||||
|
|
||||||
return Application::GetInstance()->Run();
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user