2026-03-04 02:46:38 +09:00
3 changed files with 61 additions and 7 deletions
--- a/scripts/run_overnight.sh
+++ b/scripts/run_overnight.sh
@@ -36,7 +36,20 @@ WATCHDOG_PID_FILE="$LOG_DIR/watchdog.pid"

 is_port_in_use() {
    local port="$1"
-    ss -ltn 2>/dev/null | grep -Eq ":${port}[[:space:]]"
+    if command -v ss >/dev/null 2>&1; then
+        ss -ltn 2>/dev/null | grep -Eq ":${port}[[:space:]]"
+        return $?
+    fi
+    if command -v lsof >/dev/null 2>&1; then
+        lsof -nP -iTCP:"$port" -sTCP:LISTEN >/dev/null 2>&1
+        return $?
+    fi
+    if command -v netstat >/dev/null 2>&1; then
+        netstat -ltn 2>/dev/null | grep -Eq "[:.]${port}[[:space:]]"
+        return $?
+    fi
+    # No supported socket inspection command found.
+    return 1
 }

 if [ -f "$PID_FILE" ]; then
@@ -53,7 +66,8 @@ if [[ "$APP_CMD" == *"--dashboard"* ]] && is_port_in_use "$dashboard_port"; then
    exit 1
 fi

-# `env` keeps inline VAR=value prefixes in APP_CMD working with `exec`.
+# `APP_CMD` is treated as a shell command string.
+# If executable paths include spaces, they must be quoted inside APP_CMD.
 nohup bash -lc "exec env $APP_CMD" >>"$RUN_LOG" 2>&1 &
 app_pid=$!
 echo "$app_pid" > "$PID_FILE"
--- a/scripts/runtime_verify_monitor.sh
+++ b/scripts/runtime_verify_monitor.sh
@@ -9,6 +9,7 @@ INTERVAL_SEC="${INTERVAL_SEC:-60}"
 MAX_HOURS="${MAX_HOURS:-24}"
 MAX_LOOPS="${MAX_LOOPS:-0}"
 POLICY_TZ="${POLICY_TZ:-Asia/Seoul}"
+DASHBOARD_PORT="${DASHBOARD_PORT:-8080}"

 cd "$ROOT_DIR"

@@ -79,10 +80,16 @@ while true; do
  if [ "$app_alive" -eq 0 ] && [ -n "$live_pids" ]; then
    app_alive=1
  fi
-  ss -ltnp 2>/dev/null | rg -q ':8080' && port_alive=1
-  log "[HEARTBEAT] run_log=${latest_run:-none} app_alive=$app_alive watchdog_alive=$wd_alive port8080=$port_alive live_pids=${live_pids:-none}"
+  ss -ltnp 2>/dev/null | rg -q ":${DASHBOARD_PORT}\\b" && port_alive=1
+  log "[HEARTBEAT] run_log=${latest_run:-none} app_alive=$app_alive watchdog_alive=$wd_alive port=${DASHBOARD_PORT} alive=$port_alive live_pids=${live_pids:-none}"

-  if [ -z "$latest_run" ]; then
+  defer_log_checks=0
+  if [ -z "$latest_run" ] && [ "$app_alive" -eq 1 ]; then
+    defer_log_checks=1
+    log "[INFO] run log not yet available; defer log-based coverage checks"
+  fi
+
+  if [ -z "$latest_run" ] && [ "$defer_log_checks" -eq 0 ]; then
    log "[ANOMALY] no run log found"
  fi

@@ -98,7 +105,11 @@ while true; do
      not_observed=$((not_observed+1))
    fi
  fi
-  if [ -n "$latest_run" ]; then
+  if [ "$defer_log_checks" -eq 1 ]; then
+    for deferred in KR_LOOP NXT_PATH US_PRE_PATH US_DAY_PATH US_AFTER_PATH ORDER_POLICY_SESSION; do
+      log "[COVERAGE] ${deferred}=DEFERRED reason=no_run_log_process_alive"
+    done
+  elif [ -n "$latest_run" ]; then
    check_signal "KR_LOOP" "Processing market: Korea Exchange" "$latest_run" || not_observed=$((not_observed+1))
    check_signal "NXT_PATH" "NXT_PRE|NXT_AFTER|session=NXT_" "$latest_run" || not_observed=$((not_observed+1))
    check_signal "US_PRE_PATH" "US_PRE|session=US_PRE" "$latest_run" || not_observed=$((not_observed+1))
@@ -126,7 +137,9 @@ while true; do
    is_weekend=1
  fi

-  if [ "$is_weekend" -eq 1 ]; then
+  if [ "$defer_log_checks" -eq 1 ]; then
+    log "[FORBIDDEN] WEEKEND_KR_SESSION_ACTIVE=SKIP reason=no_run_log_process_alive"
+  elif [ "$is_weekend" -eq 1 ]; then
    # Weekend policy: KR regular session loop must never appear.
    if [ -n "$latest_run" ]; then
      check_forbidden "WEEKEND_KR_SESSION_ACTIVE" \
--- a/tests/test_runtime_overnight_scripts.py
+++ b/tests/test_runtime_overnight_scripts.py
@@ -50,6 +50,7 @@ def test_runtime_verify_monitor_detects_live_process_without_pid_files(tmp_path:
        log_text = _latest_runtime_log(log_dir)
        assert "app_alive=1" in log_text
        assert "[COVERAGE] LIVE_MODE=PASS source=process_liveness" in log_text
+        assert "[ANOMALY]" not in log_text
    finally:
        fake_live.terminate()
        fake_live.wait(timeout=5)
@@ -122,3 +123,29 @@ def test_run_overnight_writes_live_pid_and_watchdog_pid(tmp_path: Path) -> None:
            os.kill(pid, signal.SIGTERM)
        except ProcessLookupError:
            pass
+
+
+def test_run_overnight_fails_when_process_exits_before_grace_period(tmp_path: Path) -> None:
+    log_dir = tmp_path / "overnight"
+    log_dir.mkdir(parents=True, exist_ok=True)
+
+    env = os.environ.copy()
+    env.update(
+        {
+            "LOG_DIR": str(log_dir),
+            "TMUX_AUTO": "false",
+            "STARTUP_GRACE_SEC": "1",
+            "APP_CMD": "false",
+        }
+    )
+    completed = subprocess.run(
+        ["bash", str(RUN_OVERNIGHT)],
+        cwd=REPO_ROOT,
+        env=env,
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+    assert completed.returncode != 0
+    output = f"{completed.stdout}\n{completed.stderr}"
+    assert "startup failed:" in output