@@ -12,6 +12,7 @@ STARTUP_GRACE_SEC="${STARTUP_GRACE_SEC:-3}"
|
||||
dashboard_port="${DASHBOARD_PORT:-8080}"
|
||||
|
||||
if [ -z "${APP_CMD:-}" ]; then
|
||||
USE_DEFAULT_APP_CMD="true"
|
||||
if [ -x ".venv/bin/python" ]; then
|
||||
PYTHON_BIN=".venv/bin/python"
|
||||
elif command -v python3 >/dev/null 2>&1; then
|
||||
@@ -23,7 +24,9 @@ if [ -z "${APP_CMD:-}" ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
APP_CMD="DASHBOARD_PORT=$dashboard_port $PYTHON_BIN -m src.main --mode=live --dashboard"
|
||||
APP_CMD="$PYTHON_BIN -m src.main --mode=live --dashboard"
|
||||
else
|
||||
USE_DEFAULT_APP_CMD="false"
|
||||
fi
|
||||
|
||||
mkdir -p "$LOG_DIR"
|
||||
@@ -66,9 +69,14 @@ if [[ "$APP_CMD" == *"--dashboard"* ]] && is_port_in_use "$dashboard_port"; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# `APP_CMD` is treated as a shell command string.
|
||||
# If executable paths include spaces, they must be quoted inside APP_CMD.
|
||||
nohup bash -lc "exec env $APP_CMD" >>"$RUN_LOG" 2>&1 &
|
||||
if [ "$USE_DEFAULT_APP_CMD" = "true" ]; then
|
||||
# Default path avoids shell word-splitting on executable paths.
|
||||
nohup env DASHBOARD_PORT="$dashboard_port" "$PYTHON_BIN" -m src.main --mode=live --dashboard >>"$RUN_LOG" 2>&1 &
|
||||
else
|
||||
# Custom APP_CMD is treated as a shell command string.
|
||||
# If executable paths include spaces, they must be quoted inside APP_CMD.
|
||||
nohup bash -lc "exec env $APP_CMD" >>"$RUN_LOG" 2>&1 &
|
||||
fi
|
||||
app_pid=$!
|
||||
echo "$app_pid" > "$PID_FILE"
|
||||
|
||||
@@ -82,11 +90,13 @@ echo "$watchdog_pid" > "$WATCHDOG_PID_FILE"
|
||||
sleep "$STARTUP_GRACE_SEC"
|
||||
if ! kill -0 "$app_pid" 2>/dev/null; then
|
||||
echo "[$(date -u +"%Y-%m-%dT%H:%M:%SZ")] startup failed: app process exited early (pid=$app_pid)" | tee -a "$RUN_LOG"
|
||||
[ -n "${watchdog_pid:-}" ] && kill "$watchdog_pid" 2>/dev/null || true
|
||||
tail -n 20 "$RUN_LOG" || true
|
||||
exit 1
|
||||
fi
|
||||
if ! kill -0 "$watchdog_pid" 2>/dev/null; then
|
||||
echo "[$(date -u +"%Y-%m-%dT%H:%M:%SZ")] startup failed: watchdog exited early (pid=$watchdog_pid)" | tee -a "$WATCHDOG_LOG"
|
||||
kill "$app_pid" 2>/dev/null || true
|
||||
tail -n 20 "$WATCHDOG_LOG" || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -52,6 +52,24 @@ check_forbidden() {
|
||||
return 0
|
||||
}
|
||||
|
||||
is_port_listening() {
|
||||
local port="$1"
|
||||
|
||||
if command -v ss >/dev/null 2>&1; then
|
||||
ss -ltn 2>/dev/null | grep -Eq ":${port}[[:space:]]"
|
||||
return $?
|
||||
fi
|
||||
if command -v lsof >/dev/null 2>&1; then
|
||||
lsof -nP -iTCP:"$port" -sTCP:LISTEN >/dev/null 2>&1
|
||||
return $?
|
||||
fi
|
||||
if command -v netstat >/dev/null 2>&1; then
|
||||
netstat -ltn 2>/dev/null | grep -Eq "[:.]${port}[[:space:]]"
|
||||
return $?
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
log "[INFO] runtime verify monitor started interval=${INTERVAL_SEC}s max_hours=${MAX_HOURS} policy_tz=${POLICY_TZ}"
|
||||
|
||||
while true; do
|
||||
@@ -80,7 +98,7 @@ while true; do
|
||||
if [ "$app_alive" -eq 0 ] && [ -n "$live_pids" ]; then
|
||||
app_alive=1
|
||||
fi
|
||||
ss -ltnp 2>/dev/null | rg -q ":${DASHBOARD_PORT}\\b" && port_alive=1
|
||||
is_port_listening "$DASHBOARD_PORT" && port_alive=1
|
||||
log "[HEARTBEAT] run_log=${latest_run:-none} app_alive=$app_alive watchdog_alive=$wd_alive port=${DASHBOARD_PORT} alive=$port_alive live_pids=${live_pids:-none}"
|
||||
|
||||
defer_log_checks=0
|
||||
|
||||
@@ -6,6 +6,8 @@ import socket
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||
RUN_OVERNIGHT = REPO_ROOT / "scripts" / "run_overnight.sh"
|
||||
RUNTIME_MONITOR = REPO_ROOT / "scripts" / "runtime_verify_monitor.sh"
|
||||
@@ -149,3 +151,9 @@ def test_run_overnight_fails_when_process_exits_before_grace_period(tmp_path: Pa
|
||||
assert completed.returncode != 0
|
||||
output = f"{completed.stdout}\n{completed.stderr}"
|
||||
assert "startup failed:" in output
|
||||
|
||||
watchdog_pid_file = log_dir / "watchdog.pid"
|
||||
if watchdog_pid_file.exists():
|
||||
watchdog_pid = int(watchdog_pid_file.read_text(encoding="utf-8").strip())
|
||||
with pytest.raises(ProcessLookupError):
|
||||
os.kill(watchdog_pid, 0)
|
||||
|
||||
Reference in New Issue
Block a user