fix: address second-round review findings on PR #404
All checks were successful
Gitea CI / test (push) Successful in 36s
Gitea CI / test (pull_request) Successful in 36s

This commit is contained in:
agentson
2026-03-04 02:29:54 +09:00
parent 3cde8779fa
commit c412412f7b
3 changed files with 41 additions and 5 deletions

View File

@@ -12,6 +12,7 @@ STARTUP_GRACE_SEC="${STARTUP_GRACE_SEC:-3}"
dashboard_port="${DASHBOARD_PORT:-8080}" dashboard_port="${DASHBOARD_PORT:-8080}"
if [ -z "${APP_CMD:-}" ]; then if [ -z "${APP_CMD:-}" ]; then
USE_DEFAULT_APP_CMD="true"
if [ -x ".venv/bin/python" ]; then if [ -x ".venv/bin/python" ]; then
PYTHON_BIN=".venv/bin/python" PYTHON_BIN=".venv/bin/python"
elif command -v python3 >/dev/null 2>&1; then elif command -v python3 >/dev/null 2>&1; then
@@ -23,7 +24,9 @@ if [ -z "${APP_CMD:-}" ]; then
exit 1 exit 1
fi fi
APP_CMD="DASHBOARD_PORT=$dashboard_port $PYTHON_BIN -m src.main --mode=live --dashboard" APP_CMD="$PYTHON_BIN -m src.main --mode=live --dashboard"
else
USE_DEFAULT_APP_CMD="false"
fi fi
mkdir -p "$LOG_DIR" mkdir -p "$LOG_DIR"
@@ -66,9 +69,14 @@ if [[ "$APP_CMD" == *"--dashboard"* ]] && is_port_in_use "$dashboard_port"; then
exit 1 exit 1
fi fi
# `APP_CMD` is treated as a shell command string. if [ "$USE_DEFAULT_APP_CMD" = "true" ]; then
# Default path avoids shell word-splitting on executable paths.
nohup env DASHBOARD_PORT="$dashboard_port" "$PYTHON_BIN" -m src.main --mode=live --dashboard >>"$RUN_LOG" 2>&1 &
else
# Custom APP_CMD is treated as a shell command string.
# If executable paths include spaces, they must be quoted inside APP_CMD. # If executable paths include spaces, they must be quoted inside APP_CMD.
nohup bash -lc "exec env $APP_CMD" >>"$RUN_LOG" 2>&1 & nohup bash -lc "exec env $APP_CMD" >>"$RUN_LOG" 2>&1 &
fi
app_pid=$! app_pid=$!
echo "$app_pid" > "$PID_FILE" echo "$app_pid" > "$PID_FILE"
@@ -82,11 +90,13 @@ echo "$watchdog_pid" > "$WATCHDOG_PID_FILE"
sleep "$STARTUP_GRACE_SEC" sleep "$STARTUP_GRACE_SEC"
if ! kill -0 "$app_pid" 2>/dev/null; then if ! kill -0 "$app_pid" 2>/dev/null; then
echo "[$(date -u +"%Y-%m-%dT%H:%M:%SZ")] startup failed: app process exited early (pid=$app_pid)" | tee -a "$RUN_LOG" echo "[$(date -u +"%Y-%m-%dT%H:%M:%SZ")] startup failed: app process exited early (pid=$app_pid)" | tee -a "$RUN_LOG"
[ -n "${watchdog_pid:-}" ] && kill "$watchdog_pid" 2>/dev/null || true
tail -n 20 "$RUN_LOG" || true tail -n 20 "$RUN_LOG" || true
exit 1 exit 1
fi fi
if ! kill -0 "$watchdog_pid" 2>/dev/null; then if ! kill -0 "$watchdog_pid" 2>/dev/null; then
echo "[$(date -u +"%Y-%m-%dT%H:%M:%SZ")] startup failed: watchdog exited early (pid=$watchdog_pid)" | tee -a "$WATCHDOG_LOG" echo "[$(date -u +"%Y-%m-%dT%H:%M:%SZ")] startup failed: watchdog exited early (pid=$watchdog_pid)" | tee -a "$WATCHDOG_LOG"
kill "$app_pid" 2>/dev/null || true
tail -n 20 "$WATCHDOG_LOG" || true tail -n 20 "$WATCHDOG_LOG" || true
exit 1 exit 1
fi fi

View File

@@ -52,6 +52,24 @@ check_forbidden() {
return 0 return 0
} }
is_port_listening() {
local port="$1"
if command -v ss >/dev/null 2>&1; then
ss -ltn 2>/dev/null | grep -Eq ":${port}[[:space:]]"
return $?
fi
if command -v lsof >/dev/null 2>&1; then
lsof -nP -iTCP:"$port" -sTCP:LISTEN >/dev/null 2>&1
return $?
fi
if command -v netstat >/dev/null 2>&1; then
netstat -ltn 2>/dev/null | grep -Eq "[:.]${port}[[:space:]]"
return $?
fi
return 1
}
log "[INFO] runtime verify monitor started interval=${INTERVAL_SEC}s max_hours=${MAX_HOURS} policy_tz=${POLICY_TZ}" log "[INFO] runtime verify monitor started interval=${INTERVAL_SEC}s max_hours=${MAX_HOURS} policy_tz=${POLICY_TZ}"
while true; do while true; do
@@ -80,7 +98,7 @@ while true; do
if [ "$app_alive" -eq 0 ] && [ -n "$live_pids" ]; then if [ "$app_alive" -eq 0 ] && [ -n "$live_pids" ]; then
app_alive=1 app_alive=1
fi fi
ss -ltnp 2>/dev/null | rg -q ":${DASHBOARD_PORT}\\b" && port_alive=1 is_port_listening "$DASHBOARD_PORT" && port_alive=1
log "[HEARTBEAT] run_log=${latest_run:-none} app_alive=$app_alive watchdog_alive=$wd_alive port=${DASHBOARD_PORT} alive=$port_alive live_pids=${live_pids:-none}" log "[HEARTBEAT] run_log=${latest_run:-none} app_alive=$app_alive watchdog_alive=$wd_alive port=${DASHBOARD_PORT} alive=$port_alive live_pids=${live_pids:-none}"
defer_log_checks=0 defer_log_checks=0

View File

@@ -6,6 +6,8 @@ import socket
import subprocess import subprocess
from pathlib import Path from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parent.parent REPO_ROOT = Path(__file__).resolve().parent.parent
RUN_OVERNIGHT = REPO_ROOT / "scripts" / "run_overnight.sh" RUN_OVERNIGHT = REPO_ROOT / "scripts" / "run_overnight.sh"
RUNTIME_MONITOR = REPO_ROOT / "scripts" / "runtime_verify_monitor.sh" RUNTIME_MONITOR = REPO_ROOT / "scripts" / "runtime_verify_monitor.sh"
@@ -149,3 +151,9 @@ def test_run_overnight_fails_when_process_exits_before_grace_period(tmp_path: Pa
assert completed.returncode != 0 assert completed.returncode != 0
output = f"{completed.stdout}\n{completed.stderr}" output = f"{completed.stdout}\n{completed.stderr}"
assert "startup failed:" in output assert "startup failed:" in output
watchdog_pid_file = log_dir / "watchdog.pid"
if watchdog_pid_file.exists():
watchdog_pid = int(watchdog_pid_file.read_text(encoding="utf-8").strip())
with pytest.raises(ProcessLookupError):
os.kill(watchdog_pid, 0)