Commit 9932ff92 authored by nextime's avatar nextime

Restore proper watchdog continuous monitoring behavior

- Watchdogs now run in infinite loop monitoring daemons every 30 seconds
- Check START=yes configuration before starting monitoring
- Restart daemons if they die, with max 20 restarts per minute limit
- Double-check processes are actually running and correct
- Fixed both wssshd and wssshc watchdogs to run continuously
- Removed one-time start logic, restored continuous monitoring
parent 4282f289
......@@ -14,6 +14,7 @@ DAEMON_PATH="/usr/bin/wssshd"
PID_FILE="/var/run/wssshd.pid"
WATCHDOG_PID_FILE="/var/run/wssshd-watchdog.pid"
LOG_FILE="/var/log/wssshd/watchdog.log"
CHECK_INTERVAL=30
MAX_RESTARTS=20
RESTART_WINDOW=60 # 1 minute
......@@ -184,47 +185,57 @@ check_restart_limits() {
return 0
}
# Cleanup is now handled directly in main() function
# Function to cleanup on exit
cleanup() {
log_message "Watchdog shutting down..."
if [ -f "$WATCHDOG_PID_FILE" ]; then
rm -f "$WATCHDOG_PID_FILE"
fi
exit 0
}
# Trap signals
trap cleanup SIGTERM SIGINT
# Main watchdog function (one-time start, not continuous monitoring)
# Main watchdog function (continuous monitoring)
main() {
# Check if START is enabled (accept various forms: yes, YES, Y, 1, true, TRUE)
START_LOWER=$(echo "$START" | tr '[:upper:]' '[:lower:]')
if [ "$START_LOWER" != "yes" ] && [ "$START_LOWER" != "y" ] && [ "$START_LOWER" != "1" ] && [ "$START_LOWER" != "true" ]; then
log_message "START is not set to a valid enabled value in /etc/default/wssshd. Exiting."
rm -f "$WATCHDOG_PID_FILE" 2>/dev/null
exit 0
fi
# Store watchdog PID
echo $$ > "$WATCHDOG_PID_FILE"
log_message "Watchdog starting $DAEMON_NAME"
log_message "Watchdog started for $DAEMON_NAME"
log_message "Check interval: $CHECK_INTERVAL seconds"
log_message "Max restarts: $MAX_RESTARTS per $RESTART_WINDOW seconds"
# Check if daemon is already running
if is_daemon_running; then
log_message "$DAEMON_NAME is already running"
rm -f "$WATCHDOG_PID_FILE" 2>/dev/null
exit 0
fi
while true; do
if ! is_daemon_running; then
log_message "$DAEMON_NAME is not running"
# Check restart limits before attempting to start
if ! check_restart_limits; then
log_message "Restart limits exceeded, not starting $DAEMON_NAME"
rm -f "$WATCHDOG_PID_FILE" 2>/dev/null
exit 1
fi
# Check restart limits before attempting to start
if ! check_restart_limits; then
log_message "Restart limits exceeded, watchdog will exit"
break
fi
# Attempt to start daemon
if start_daemon; then
log_message "$DAEMON_NAME started successfully"
rm -f "$WATCHDOG_PID_FILE" 2>/dev/null
exit 0
else
log_message "Failed to start $DAEMON_NAME"
rm -f "$WATCHDOG_PID_FILE" 2>/dev/null
exit 1
fi
# Attempt to start daemon
if start_daemon; then
log_message "$DAEMON_NAME restarted successfully"
else
log_message "Failed to restart $DAEMON_NAME"
fi
fi
sleep "$CHECK_INTERVAL"
done
log_message "Watchdog exiting"
cleanup
}
# Handle command line arguments
......@@ -235,9 +246,18 @@ case "$1" in
exit 1
fi
main &
# Wait briefly for main() to complete its work
sleep 2
echo "Watchdog start process completed"
# Wait for PID file to be created (max 5 seconds)
local count=0
while [ $count -lt 10 ] && [ ! -f "$WATCHDOG_PID_FILE" ]; do
sleep 0.5
count=$((count + 1))
done
if [ -f "$WATCHDOG_PID_FILE" ]; then
echo "Watchdog started"
else
echo "Watchdog failed to create PID file"
exit 1
fi
;;
stop)
if [ -f "$WATCHDOG_PID_FILE" ]; then
......
......@@ -14,6 +14,7 @@ DAEMON_PATH="/usr/bin/wssshc"
PID_FILE="/var/run/wssshc.pid"
WATCHDOG_PID_FILE="/var/run/wssshc-watchdog.pid"
LOG_FILE="/var/log/wssshc/watchdog.log"
CHECK_INTERVAL=30
MAX_RESTARTS=20
RESTART_WINDOW=60 # 1 minute
......@@ -184,47 +185,57 @@ check_restart_limits() {
return 0
}
# Cleanup is now handled directly in main() function
# Function to cleanup on exit
cleanup() {
log_message "Watchdog shutting down..."
if [ -f "$WATCHDOG_PID_FILE" ]; then
rm -f "$WATCHDOG_PID_FILE"
fi
exit 0
}
# Trap signals
trap cleanup SIGTERM SIGINT
# Main watchdog function (one-time start, not continuous monitoring)
# Main watchdog function (continuous monitoring)
main() {
# Check if START is enabled (accept various forms: yes, YES, Y, 1, true, TRUE)
START_LOWER=$(echo "$START" | tr '[:upper:]' '[:lower:]')
if [ "$START_LOWER" != "yes" ] && [ "$START_LOWER" != "y" ] && [ "$START_LOWER" != "1" ] && [ "$START_LOWER" != "true" ]; then
log_message "START is not set to a valid enabled value in /etc/default/wssshc. Exiting."
rm -f "$WATCHDOG_PID_FILE" 2>/dev/null
exit 0
fi
# Store watchdog PID
echo $$ > "$WATCHDOG_PID_FILE"
log_message "Watchdog starting $DAEMON_NAME"
log_message "Watchdog started for $DAEMON_NAME"
log_message "Check interval: $CHECK_INTERVAL seconds"
log_message "Max restarts: $MAX_RESTARTS per $RESTART_WINDOW seconds"
# Check if daemon is already running
if is_daemon_running; then
log_message "$DAEMON_NAME is already running"
rm -f "$WATCHDOG_PID_FILE" 2>/dev/null
exit 0
fi
while true; do
if ! is_daemon_running; then
log_message "$DAEMON_NAME is not running"
# Check restart limits before attempting to start
if ! check_restart_limits; then
log_message "Restart limits exceeded, not starting $DAEMON_NAME"
rm -f "$WATCHDOG_PID_FILE" 2>/dev/null
exit 1
fi
# Check restart limits before attempting to start
if ! check_restart_limits; then
log_message "Restart limits exceeded, watchdog will exit"
break
fi
# Attempt to start daemon
if start_daemon; then
log_message "$DAEMON_NAME started successfully"
rm -f "$WATCHDOG_PID_FILE" 2>/dev/null
exit 0
else
log_message "Failed to start $DAEMON_NAME"
rm -f "$WATCHDOG_PID_FILE" 2>/dev/null
exit 1
fi
# Attempt to start daemon
if start_daemon; then
log_message "$DAEMON_NAME restarted successfully"
else
log_message "Failed to restart $DAEMON_NAME"
fi
fi
sleep "$CHECK_INTERVAL"
done
log_message "Watchdog exiting"
cleanup
}
# Handle command line arguments
......@@ -235,9 +246,18 @@ case "$1" in
exit 1
fi
main &
# Wait briefly for main() to complete its work
sleep 2
echo "Watchdog start process completed"
# Wait for PID file to be created (max 5 seconds)
local count=0
while [ $count -lt 10 ] && [ ! -f "$WATCHDOG_PID_FILE" ]; do
sleep 0.5
count=$((count + 1))
done
if [ -f "$WATCHDOG_PID_FILE" ]; then
echo "Watchdog started"
else
echo "Watchdog failed to create PID file"
exit 1
fi
;;
stop)
if [ -f "$WATCHDOG_PID_FILE" ]; then
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment