diff --git a/scripts/run_overnight.sh b/scripts/run_overnight.sh index b5d6902..e71a7b0 100755 --- a/scripts/run_overnight.sh +++ b/scripts/run_overnight.sh @@ -36,7 +36,20 @@ WATCHDOG_PID_FILE="$LOG_DIR/watchdog.pid" is_port_in_use() { local port="$1" - ss -ltn 2>/dev/null | grep -Eq ":${port}[[:space:]]" + if command -v ss >/dev/null 2>&1; then + ss -ltn 2>/dev/null | grep -Eq ":${port}[[:space:]]" + return $? + fi + if command -v lsof >/dev/null 2>&1; then + lsof -nP -iTCP:"$port" -sTCP:LISTEN >/dev/null 2>&1 + return $? + fi + if command -v netstat >/dev/null 2>&1; then + netstat -ltn 2>/dev/null | grep -Eq "[:.]${port}[[:space:]]" + return $? + fi + # No supported socket inspection command found. + return 1 } if [ -f "$PID_FILE" ]; then @@ -53,7 +66,8 @@ if [[ "$APP_CMD" == *"--dashboard"* ]] && is_port_in_use "$dashboard_port"; then exit 1 fi -# `env` keeps inline VAR=value prefixes in APP_CMD working with `exec`. +# `APP_CMD` is treated as a shell command string. +# If executable paths include spaces, they must be quoted inside APP_CMD. nohup bash -lc "exec env $APP_CMD" >>"$RUN_LOG" 2>&1 & app_pid=$! echo "$app_pid" > "$PID_FILE" diff --git a/scripts/runtime_verify_monitor.sh b/scripts/runtime_verify_monitor.sh index e5a78a1..5d7aad2 100755 --- a/scripts/runtime_verify_monitor.sh +++ b/scripts/runtime_verify_monitor.sh @@ -9,6 +9,7 @@ INTERVAL_SEC="${INTERVAL_SEC:-60}" MAX_HOURS="${MAX_HOURS:-24}" MAX_LOOPS="${MAX_LOOPS:-0}" POLICY_TZ="${POLICY_TZ:-Asia/Seoul}" +DASHBOARD_PORT="${DASHBOARD_PORT:-8080}" cd "$ROOT_DIR" @@ -79,10 +80,16 @@ while true; do if [ "$app_alive" -eq 0 ] && [ -n "$live_pids" ]; then app_alive=1 fi - ss -ltnp 2>/dev/null | rg -q ':8080' && port_alive=1 - log "[HEARTBEAT] run_log=${latest_run:-none} app_alive=$app_alive watchdog_alive=$wd_alive port8080=$port_alive live_pids=${live_pids:-none}" + ss -ltnp 2>/dev/null | rg -q ":${DASHBOARD_PORT}\\b" && port_alive=1 + log "[HEARTBEAT] run_log=${latest_run:-none} app_alive=$app_alive watchdog_alive=$wd_alive port=${DASHBOARD_PORT} alive=$port_alive live_pids=${live_pids:-none}" - if [ -z "$latest_run" ]; then + defer_log_checks=0 + if [ -z "$latest_run" ] && [ "$app_alive" -eq 1 ]; then + defer_log_checks=1 + log "[INFO] run log not yet available; defer log-based coverage checks" + fi + + if [ -z "$latest_run" ] && [ "$defer_log_checks" -eq 0 ]; then log "[ANOMALY] no run log found" fi @@ -98,7 +105,11 @@ while true; do not_observed=$((not_observed+1)) fi fi - if [ -n "$latest_run" ]; then + if [ "$defer_log_checks" -eq 1 ]; then + for deferred in KR_LOOP NXT_PATH US_PRE_PATH US_DAY_PATH US_AFTER_PATH ORDER_POLICY_SESSION; do + log "[COVERAGE] ${deferred}=DEFERRED reason=no_run_log_process_alive" + done + elif [ -n "$latest_run" ]; then check_signal "KR_LOOP" "Processing market: Korea Exchange" "$latest_run" || not_observed=$((not_observed+1)) check_signal "NXT_PATH" "NXT_PRE|NXT_AFTER|session=NXT_" "$latest_run" || not_observed=$((not_observed+1)) check_signal "US_PRE_PATH" "US_PRE|session=US_PRE" "$latest_run" || not_observed=$((not_observed+1)) @@ -126,7 +137,9 @@ while true; do is_weekend=1 fi - if [ "$is_weekend" -eq 1 ]; then + if [ "$defer_log_checks" -eq 1 ]; then + log "[FORBIDDEN] WEEKEND_KR_SESSION_ACTIVE=SKIP reason=no_run_log_process_alive" + elif [ "$is_weekend" -eq 1 ]; then # Weekend policy: KR regular session loop must never appear. if [ -n "$latest_run" ]; then check_forbidden "WEEKEND_KR_SESSION_ACTIVE" \ diff --git a/tests/test_runtime_overnight_scripts.py b/tests/test_runtime_overnight_scripts.py index 81b3195..f0ec038 100644 --- a/tests/test_runtime_overnight_scripts.py +++ b/tests/test_runtime_overnight_scripts.py @@ -50,6 +50,7 @@ def test_runtime_verify_monitor_detects_live_process_without_pid_files(tmp_path: log_text = _latest_runtime_log(log_dir) assert "app_alive=1" in log_text assert "[COVERAGE] LIVE_MODE=PASS source=process_liveness" in log_text + assert "[ANOMALY]" not in log_text finally: fake_live.terminate() fake_live.wait(timeout=5) @@ -122,3 +123,29 @@ def test_run_overnight_writes_live_pid_and_watchdog_pid(tmp_path: Path) -> None: os.kill(pid, signal.SIGTERM) except ProcessLookupError: pass + + +def test_run_overnight_fails_when_process_exits_before_grace_period(tmp_path: Path) -> None: + log_dir = tmp_path / "overnight" + log_dir.mkdir(parents=True, exist_ok=True) + + env = os.environ.copy() + env.update( + { + "LOG_DIR": str(log_dir), + "TMUX_AUTO": "false", + "STARTUP_GRACE_SEC": "1", + "APP_CMD": "false", + } + ) + completed = subprocess.run( + ["bash", str(RUN_OVERNIGHT)], + cwd=REPO_ROOT, + env=env, + capture_output=True, + text=True, + check=False, + ) + assert completed.returncode != 0 + output = f"{completed.stdout}\n{completed.stderr}" + assert "startup failed:" in output