fix: address second-round review findings on PR #404
This commit is contained in:
@@ -12,6 +12,7 @@ STARTUP_GRACE_SEC="${STARTUP_GRACE_SEC:-3}"
|
|||||||
dashboard_port="${DASHBOARD_PORT:-8080}"
|
dashboard_port="${DASHBOARD_PORT:-8080}"
|
||||||
|
|
||||||
if [ -z "${APP_CMD:-}" ]; then
|
if [ -z "${APP_CMD:-}" ]; then
|
||||||
|
USE_DEFAULT_APP_CMD="true"
|
||||||
if [ -x ".venv/bin/python" ]; then
|
if [ -x ".venv/bin/python" ]; then
|
||||||
PYTHON_BIN=".venv/bin/python"
|
PYTHON_BIN=".venv/bin/python"
|
||||||
elif command -v python3 >/dev/null 2>&1; then
|
elif command -v python3 >/dev/null 2>&1; then
|
||||||
@@ -23,7 +24,9 @@ if [ -z "${APP_CMD:-}" ]; then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
APP_CMD="DASHBOARD_PORT=$dashboard_port $PYTHON_BIN -m src.main --mode=live --dashboard"
|
APP_CMD="$PYTHON_BIN -m src.main --mode=live --dashboard"
|
||||||
|
else
|
||||||
|
USE_DEFAULT_APP_CMD="false"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
mkdir -p "$LOG_DIR"
|
mkdir -p "$LOG_DIR"
|
||||||
@@ -66,9 +69,14 @@ if [[ "$APP_CMD" == *"--dashboard"* ]] && is_port_in_use "$dashboard_port"; then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# `APP_CMD` is treated as a shell command string.
|
if [ "$USE_DEFAULT_APP_CMD" = "true" ]; then
|
||||||
|
# Default path avoids shell word-splitting on executable paths.
|
||||||
|
nohup env DASHBOARD_PORT="$dashboard_port" "$PYTHON_BIN" -m src.main --mode=live --dashboard >>"$RUN_LOG" 2>&1 &
|
||||||
|
else
|
||||||
|
# Custom APP_CMD is treated as a shell command string.
|
||||||
# If executable paths include spaces, they must be quoted inside APP_CMD.
|
# If executable paths include spaces, they must be quoted inside APP_CMD.
|
||||||
nohup bash -lc "exec env $APP_CMD" >>"$RUN_LOG" 2>&1 &
|
nohup bash -lc "exec env $APP_CMD" >>"$RUN_LOG" 2>&1 &
|
||||||
|
fi
|
||||||
app_pid=$!
|
app_pid=$!
|
||||||
echo "$app_pid" > "$PID_FILE"
|
echo "$app_pid" > "$PID_FILE"
|
||||||
|
|
||||||
@@ -82,11 +90,13 @@ echo "$watchdog_pid" > "$WATCHDOG_PID_FILE"
|
|||||||
sleep "$STARTUP_GRACE_SEC"
|
sleep "$STARTUP_GRACE_SEC"
|
||||||
if ! kill -0 "$app_pid" 2>/dev/null; then
|
if ! kill -0 "$app_pid" 2>/dev/null; then
|
||||||
echo "[$(date -u +"%Y-%m-%dT%H:%M:%SZ")] startup failed: app process exited early (pid=$app_pid)" | tee -a "$RUN_LOG"
|
echo "[$(date -u +"%Y-%m-%dT%H:%M:%SZ")] startup failed: app process exited early (pid=$app_pid)" | tee -a "$RUN_LOG"
|
||||||
|
[ -n "${watchdog_pid:-}" ] && kill "$watchdog_pid" 2>/dev/null || true
|
||||||
tail -n 20 "$RUN_LOG" || true
|
tail -n 20 "$RUN_LOG" || true
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
if ! kill -0 "$watchdog_pid" 2>/dev/null; then
|
if ! kill -0 "$watchdog_pid" 2>/dev/null; then
|
||||||
echo "[$(date -u +"%Y-%m-%dT%H:%M:%SZ")] startup failed: watchdog exited early (pid=$watchdog_pid)" | tee -a "$WATCHDOG_LOG"
|
echo "[$(date -u +"%Y-%m-%dT%H:%M:%SZ")] startup failed: watchdog exited early (pid=$watchdog_pid)" | tee -a "$WATCHDOG_LOG"
|
||||||
|
kill "$app_pid" 2>/dev/null || true
|
||||||
tail -n 20 "$WATCHDOG_LOG" || true
|
tail -n 20 "$WATCHDOG_LOG" || true
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|||||||
@@ -52,6 +52,24 @@ check_forbidden() {
|
|||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
is_port_listening() {
|
||||||
|
local port="$1"
|
||||||
|
|
||||||
|
if command -v ss >/dev/null 2>&1; then
|
||||||
|
ss -ltn 2>/dev/null | grep -Eq ":${port}[[:space:]]"
|
||||||
|
return $?
|
||||||
|
fi
|
||||||
|
if command -v lsof >/dev/null 2>&1; then
|
||||||
|
lsof -nP -iTCP:"$port" -sTCP:LISTEN >/dev/null 2>&1
|
||||||
|
return $?
|
||||||
|
fi
|
||||||
|
if command -v netstat >/dev/null 2>&1; then
|
||||||
|
netstat -ltn 2>/dev/null | grep -Eq "[:.]${port}[[:space:]]"
|
||||||
|
return $?
|
||||||
|
fi
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
log "[INFO] runtime verify monitor started interval=${INTERVAL_SEC}s max_hours=${MAX_HOURS} policy_tz=${POLICY_TZ}"
|
log "[INFO] runtime verify monitor started interval=${INTERVAL_SEC}s max_hours=${MAX_HOURS} policy_tz=${POLICY_TZ}"
|
||||||
|
|
||||||
while true; do
|
while true; do
|
||||||
@@ -80,7 +98,7 @@ while true; do
|
|||||||
if [ "$app_alive" -eq 0 ] && [ -n "$live_pids" ]; then
|
if [ "$app_alive" -eq 0 ] && [ -n "$live_pids" ]; then
|
||||||
app_alive=1
|
app_alive=1
|
||||||
fi
|
fi
|
||||||
ss -ltnp 2>/dev/null | rg -q ":${DASHBOARD_PORT}\\b" && port_alive=1
|
is_port_listening "$DASHBOARD_PORT" && port_alive=1
|
||||||
log "[HEARTBEAT] run_log=${latest_run:-none} app_alive=$app_alive watchdog_alive=$wd_alive port=${DASHBOARD_PORT} alive=$port_alive live_pids=${live_pids:-none}"
|
log "[HEARTBEAT] run_log=${latest_run:-none} app_alive=$app_alive watchdog_alive=$wd_alive port=${DASHBOARD_PORT} alive=$port_alive live_pids=${live_pids:-none}"
|
||||||
|
|
||||||
defer_log_checks=0
|
defer_log_checks=0
|
||||||
|
|||||||
@@ -6,6 +6,8 @@ import socket
|
|||||||
import subprocess
|
import subprocess
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||||
RUN_OVERNIGHT = REPO_ROOT / "scripts" / "run_overnight.sh"
|
RUN_OVERNIGHT = REPO_ROOT / "scripts" / "run_overnight.sh"
|
||||||
RUNTIME_MONITOR = REPO_ROOT / "scripts" / "runtime_verify_monitor.sh"
|
RUNTIME_MONITOR = REPO_ROOT / "scripts" / "runtime_verify_monitor.sh"
|
||||||
@@ -149,3 +151,9 @@ def test_run_overnight_fails_when_process_exits_before_grace_period(tmp_path: Pa
|
|||||||
assert completed.returncode != 0
|
assert completed.returncode != 0
|
||||||
output = f"{completed.stdout}\n{completed.stderr}"
|
output = f"{completed.stdout}\n{completed.stderr}"
|
||||||
assert "startup failed:" in output
|
assert "startup failed:" in output
|
||||||
|
|
||||||
|
watchdog_pid_file = log_dir / "watchdog.pid"
|
||||||
|
if watchdog_pid_file.exists():
|
||||||
|
watchdog_pid = int(watchdog_pid_file.read_text(encoding="utf-8").strip())
|
||||||
|
with pytest.raises(ProcessLookupError):
|
||||||
|
os.kill(watchdog_pid, 0)
|
||||||
|
|||||||
Reference in New Issue
Block a user