@@ -36,7 +36,20 @@ WATCHDOG_PID_FILE="$LOG_DIR/watchdog.pid"
|
||||
|
||||
is_port_in_use() {
|
||||
local port="$1"
|
||||
ss -ltn 2>/dev/null | grep -Eq ":${port}[[:space:]]"
|
||||
if command -v ss >/dev/null 2>&1; then
|
||||
ss -ltn 2>/dev/null | grep -Eq ":${port}[[:space:]]"
|
||||
return $?
|
||||
fi
|
||||
if command -v lsof >/dev/null 2>&1; then
|
||||
lsof -nP -iTCP:"$port" -sTCP:LISTEN >/dev/null 2>&1
|
||||
return $?
|
||||
fi
|
||||
if command -v netstat >/dev/null 2>&1; then
|
||||
netstat -ltn 2>/dev/null | grep -Eq "[:.]${port}[[:space:]]"
|
||||
return $?
|
||||
fi
|
||||
# No supported socket inspection command found.
|
||||
return 1
|
||||
}
|
||||
|
||||
if [ -f "$PID_FILE" ]; then
|
||||
@@ -53,7 +66,8 @@ if [[ "$APP_CMD" == *"--dashboard"* ]] && is_port_in_use "$dashboard_port"; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# `env` keeps inline VAR=value prefixes in APP_CMD working with `exec`.
|
||||
# `APP_CMD` is treated as a shell command string.
|
||||
# If executable paths include spaces, they must be quoted inside APP_CMD.
|
||||
nohup bash -lc "exec env $APP_CMD" >>"$RUN_LOG" 2>&1 &
|
||||
app_pid=$!
|
||||
echo "$app_pid" > "$PID_FILE"
|
||||
|
||||
@@ -9,6 +9,7 @@ INTERVAL_SEC="${INTERVAL_SEC:-60}"
|
||||
MAX_HOURS="${MAX_HOURS:-24}"
|
||||
MAX_LOOPS="${MAX_LOOPS:-0}"
|
||||
POLICY_TZ="${POLICY_TZ:-Asia/Seoul}"
|
||||
DASHBOARD_PORT="${DASHBOARD_PORT:-8080}"
|
||||
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
@@ -79,10 +80,16 @@ while true; do
|
||||
if [ "$app_alive" -eq 0 ] && [ -n "$live_pids" ]; then
|
||||
app_alive=1
|
||||
fi
|
||||
ss -ltnp 2>/dev/null | rg -q ':8080' && port_alive=1
|
||||
log "[HEARTBEAT] run_log=${latest_run:-none} app_alive=$app_alive watchdog_alive=$wd_alive port8080=$port_alive live_pids=${live_pids:-none}"
|
||||
ss -ltnp 2>/dev/null | rg -q ":${DASHBOARD_PORT}\\b" && port_alive=1
|
||||
log "[HEARTBEAT] run_log=${latest_run:-none} app_alive=$app_alive watchdog_alive=$wd_alive port=${DASHBOARD_PORT} alive=$port_alive live_pids=${live_pids:-none}"
|
||||
|
||||
if [ -z "$latest_run" ]; then
|
||||
defer_log_checks=0
|
||||
if [ -z "$latest_run" ] && [ "$app_alive" -eq 1 ]; then
|
||||
defer_log_checks=1
|
||||
log "[INFO] run log not yet available; defer log-based coverage checks"
|
||||
fi
|
||||
|
||||
if [ -z "$latest_run" ] && [ "$defer_log_checks" -eq 0 ]; then
|
||||
log "[ANOMALY] no run log found"
|
||||
fi
|
||||
|
||||
@@ -98,7 +105,11 @@ while true; do
|
||||
not_observed=$((not_observed+1))
|
||||
fi
|
||||
fi
|
||||
if [ -n "$latest_run" ]; then
|
||||
if [ "$defer_log_checks" -eq 1 ]; then
|
||||
for deferred in KR_LOOP NXT_PATH US_PRE_PATH US_DAY_PATH US_AFTER_PATH ORDER_POLICY_SESSION; do
|
||||
log "[COVERAGE] ${deferred}=DEFERRED reason=no_run_log_process_alive"
|
||||
done
|
||||
elif [ -n "$latest_run" ]; then
|
||||
check_signal "KR_LOOP" "Processing market: Korea Exchange" "$latest_run" || not_observed=$((not_observed+1))
|
||||
check_signal "NXT_PATH" "NXT_PRE|NXT_AFTER|session=NXT_" "$latest_run" || not_observed=$((not_observed+1))
|
||||
check_signal "US_PRE_PATH" "US_PRE|session=US_PRE" "$latest_run" || not_observed=$((not_observed+1))
|
||||
@@ -126,7 +137,9 @@ while true; do
|
||||
is_weekend=1
|
||||
fi
|
||||
|
||||
if [ "$is_weekend" -eq 1 ]; then
|
||||
if [ "$defer_log_checks" -eq 1 ]; then
|
||||
log "[FORBIDDEN] WEEKEND_KR_SESSION_ACTIVE=SKIP reason=no_run_log_process_alive"
|
||||
elif [ "$is_weekend" -eq 1 ]; then
|
||||
# Weekend policy: KR regular session loop must never appear.
|
||||
if [ -n "$latest_run" ]; then
|
||||
check_forbidden "WEEKEND_KR_SESSION_ACTIVE" \
|
||||
|
||||
@@ -50,6 +50,7 @@ def test_runtime_verify_monitor_detects_live_process_without_pid_files(tmp_path:
|
||||
log_text = _latest_runtime_log(log_dir)
|
||||
assert "app_alive=1" in log_text
|
||||
assert "[COVERAGE] LIVE_MODE=PASS source=process_liveness" in log_text
|
||||
assert "[ANOMALY]" not in log_text
|
||||
finally:
|
||||
fake_live.terminate()
|
||||
fake_live.wait(timeout=5)
|
||||
@@ -122,3 +123,29 @@ def test_run_overnight_writes_live_pid_and_watchdog_pid(tmp_path: Path) -> None:
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
|
||||
|
||||
def test_run_overnight_fails_when_process_exits_before_grace_period(tmp_path: Path) -> None:
|
||||
log_dir = tmp_path / "overnight"
|
||||
log_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
env = os.environ.copy()
|
||||
env.update(
|
||||
{
|
||||
"LOG_DIR": str(log_dir),
|
||||
"TMUX_AUTO": "false",
|
||||
"STARTUP_GRACE_SEC": "1",
|
||||
"APP_CMD": "false",
|
||||
}
|
||||
)
|
||||
completed = subprocess.run(
|
||||
["bash", str(RUN_OVERNIGHT)],
|
||||
cwd=REPO_ROOT,
|
||||
env=env,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
assert completed.returncode != 0
|
||||
output = f"{completed.stdout}\n{completed.stderr}"
|
||||
assert "startup failed:" in output
|
||||
|
||||
Reference in New Issue
Block a user