125 lines
4.1 KiB
Bash
Executable File
125 lines
4.1 KiB
Bash
Executable File
#!/bin/bash
|
|
# Copyright (c) 2005-2023 Pandora FMS
|
|
# Author: Sancho Lerena <slerena@artica.es> 2009
|
|
# Licence: GPL2
|
|
#
|
|
# daemon_watchdog
|
|
#
|
|
# Generic watchdog to detect if a daemon is running. If cannot restart, execute
|
|
# a custom-user defined command to notify daemon is down and continues in
|
|
# standby (without notifying / checking) until daemon is alive again.
|
|
|
|
# Default configuration is for Pandora FMS Server daemon
|
|
|
|
# =====================================================================
|
|
# Configuration begins here. Please use "" if data contain blank spaces
|
|
|
|
export DAEMON_WATCHDOG=pandora_watchdog.sh
|
|
# DAEMON_WATCHDOG: Name of this script. Used to check if its running already
|
|
|
|
export DAEMON_CHECK="/usr/bin/pandora_server /etc/pandora/pandora_server.conf"
|
|
# DAEMON_CHECK: Daemon monitored, please use full path and parameters like
|
|
# are shown doing a ps aux of ps -Alf
|
|
|
|
export DAEMON_RESTART="/etc/init.d/pandora_server restart"
|
|
# DAEMON_RESTART: Command to try to restart the daemon
|
|
|
|
export DAEMON_DEADWAIT=90
|
|
# DAEMON_DEADWAIT: Time this script checks after detect that
|
|
# daemon is down before to consider is really down.
|
|
|
|
export DAEMON_ALERT="/usr/bin/pandora_alert"
|
|
# DAEMON_ALERT: Command/Script executed if after detecting daemon is down,
|
|
# and waiting DAEMON_DEADWAIT, and daemon continues down.
|
|
|
|
export DAEMON_LOOP=7
|
|
# DAEMON_LOOP: Interval within daemon_wathdog checks if daemon is alive.
|
|
# DO NOT use values under 3-5 seconds or could be CPU consuming.
|
|
# NEVER NEVER NEVER use 0 value or gets 100% CPU!.
|
|
|
|
# Configuration stop here
|
|
# =====================================================================
|
|
|
|
# Check if another instance of this script
|
|
|
|
RUNNING_CHECK=`ps aux | grep "$DAEMON_WATCHDOG" | grep -v grep |wc -l`
|
|
if [ $RUNNING_CHECK -gt 2 ]
|
|
then
|
|
echo "Aborting, seems that there are more '$DAEMON_WATCHDOG' running in this system"
|
|
logger $DAEMON_WATCHDOG aborted execution because another watchdog seems to be running
|
|
exit -1
|
|
fi
|
|
|
|
|
|
# This value always must be 0 at start. Do not alter
|
|
export DAEMON_STANDBY=0
|
|
|
|
# This function replace pidof, not working in the same way in different linux distros
|
|
function pidof_daemon () (
|
|
# This sets COLUMNS to XXX chars, because if command is run
|
|
# in a "strech" term, ps aux don't report more than COLUMNS
|
|
# characters and this will not work.
|
|
COLUMNS=300
|
|
DAEMON_PID=`ps aux | grep "$DAEMON_CHECK" | grep -v grep | tail -1 | awk '{ print $2 }'`
|
|
echo $DAEMON_PID
|
|
)
|
|
|
|
# Main script
|
|
|
|
if [ ! -f `echo $DAEMON_CHECK | awk '{ print $1 }'` ]
|
|
then
|
|
echo "Daemon you want to check is not present in the system. Aborting watchdog"
|
|
exit
|
|
fi
|
|
|
|
while [ 1 ]
|
|
do
|
|
|
|
DAEMON_PID=`pidof_daemon`
|
|
if [ -z "$DAEMON_PID" ]
|
|
then
|
|
|
|
echo "Checkpoint #1 $DAEMON_PID "
|
|
|
|
if [ $DAEMON_STANDBY == 0 ]
|
|
then
|
|
|
|
# Daemon down, first detection
|
|
# Restart it !
|
|
|
|
logger $DAEMON_WATCHDOG restarting $DAEMON_CHECK
|
|
$DAEMON_RESTART 2> /dev/null > /dev/null
|
|
|
|
# Just WAIT another DAEMON_DEADWAIT before consider it DEAD
|
|
|
|
echo "Going to DAEMON_DEADEWAIT"
|
|
|
|
sleep $DAEMON_DEADWAIT
|
|
DAEMON_PID=`pidof_daemon`
|
|
|
|
if [ -z "$DAEMON_PID" ]
|
|
then
|
|
|
|
# Is dead and can't be restarted properly. Execute alert
|
|
|
|
echo "I cannot startup again the process"
|
|
|
|
logger $DAEMON_WATCHDOG $DAEMON_CHECK is dead, alerting !
|
|
$DAEMON_ALERT 2> /dev/null > /dev/null
|
|
|
|
# Watchdog process puts in STANDBY mode until process get alive again
|
|
logger $DAEMON_WATCHDOG "Entering in Stabdby mode"
|
|
|
|
DAEMON_STANDBY=1
|
|
fi
|
|
fi
|
|
else
|
|
|
|
echo "Checkpoint #1B $DAEMON_PID "
|
|
|
|
DAEMON_STANDBY=0
|
|
fi
|
|
|
|
sleep $DAEMON_LOOP
|
|
done
|