Fix overnight runner Python selection and tmux window targeting #138

Merged
jihoson merged 2 commits from agentson/fix/137-run-overnight-python-tmux AGit into main 2026-02-17 23:25:11 +09:00
4 changed files with 259 additions and 0 deletions

54
scripts/morning_report.sh Executable file
View File

@@ -0,0 +1,54 @@
#!/usr/bin/env bash
# Morning summary for overnight run logs.
set -euo pipefail
LOG_DIR="${LOG_DIR:-data/overnight}"
if [ ! -d "$LOG_DIR" ]; then
echo "로그 디렉터리가 없습니다: $LOG_DIR"
exit 1
fi
latest_run="$(ls -1t "$LOG_DIR"/run_*.log 2>/dev/null | head -n 1 || true)"
latest_watchdog="$(ls -1t "$LOG_DIR"/watchdog_*.log 2>/dev/null | head -n 1 || true)"
if [ -z "$latest_run" ]; then
echo "run 로그가 없습니다: $LOG_DIR/run_*.log"
exit 1
fi
echo "Overnight report"
echo "- run log: $latest_run"
if [ -n "$latest_watchdog" ]; then
echo "- watchdog log: $latest_watchdog"
fi
start_line="$(head -n 1 "$latest_run" || true)"
end_line="$(tail -n 1 "$latest_run" || true)"
info_count="$(rg -c '"level": "INFO"' "$latest_run" || true)"
warn_count="$(rg -c '"level": "WARNING"' "$latest_run" || true)"
error_count="$(rg -c '"level": "ERROR"' "$latest_run" || true)"
critical_count="$(rg -c '"level": "CRITICAL"' "$latest_run" || true)"
traceback_count="$(rg -c 'Traceback' "$latest_run" || true)"
echo "- start: ${start_line:-N/A}"
echo "- end: ${end_line:-N/A}"
echo "- INFO: ${info_count:-0}"
echo "- WARNING: ${warn_count:-0}"
echo "- ERROR: ${error_count:-0}"
echo "- CRITICAL: ${critical_count:-0}"
echo "- Traceback: ${traceback_count:-0}"
if [ -n "$latest_watchdog" ]; then
watchdog_errors="$(rg -c '\[ERROR\]' "$latest_watchdog" || true)"
echo "- watchdog ERROR: ${watchdog_errors:-0}"
echo ""
echo "최근 watchdog 로그:"
tail -n 5 "$latest_watchdog" || true
fi
echo ""
echo "최근 앱 로그:"
tail -n 20 "$latest_run" || true

87
scripts/run_overnight.sh Executable file
View File

@@ -0,0 +1,87 @@
#!/usr/bin/env bash
# Start The Ouroboros overnight with logs and watchdog.
set -euo pipefail
LOG_DIR="${LOG_DIR:-data/overnight}"
CHECK_INTERVAL="${CHECK_INTERVAL:-30}"
TMUX_AUTO="${TMUX_AUTO:-true}"
TMUX_ATTACH="${TMUX_ATTACH:-true}"
TMUX_SESSION_PREFIX="${TMUX_SESSION_PREFIX:-ouroboros_overnight}"
if [ -z "${APP_CMD:-}" ]; then
if [ -x ".venv/bin/python" ]; then
PYTHON_BIN=".venv/bin/python"
elif command -v python3 >/dev/null 2>&1; then
PYTHON_BIN="python3"
elif command -v python >/dev/null 2>&1; then
PYTHON_BIN="python"
else
echo ".venv/bin/python 또는 python3/python 실행 파일을 찾을 수 없습니다."
exit 1
fi
dashboard_port="${DASHBOARD_PORT:-8080}"
APP_CMD="DASHBOARD_PORT=$dashboard_port $PYTHON_BIN -m src.main --mode=paper --dashboard"
fi
mkdir -p "$LOG_DIR"
timestamp="$(date +"%Y%m%d_%H%M%S")"
RUN_LOG="$LOG_DIR/run_${timestamp}.log"
WATCHDOG_LOG="$LOG_DIR/watchdog_${timestamp}.log"
PID_FILE="$LOG_DIR/app.pid"
WATCHDOG_PID_FILE="$LOG_DIR/watchdog.pid"
if [ -f "$PID_FILE" ]; then
old_pid="$(cat "$PID_FILE" || true)"
if [ -n "$old_pid" ] && kill -0 "$old_pid" 2>/dev/null; then
echo "앱이 이미 실행 중입니다. pid=$old_pid"
exit 1
fi
fi
echo "[$(date -u +"%Y-%m-%dT%H:%M:%SZ")] starting: $APP_CMD" | tee -a "$RUN_LOG"
nohup bash -lc "$APP_CMD" >>"$RUN_LOG" 2>&1 &
app_pid=$!
echo "$app_pid" > "$PID_FILE"
echo "[$(date -u +"%Y-%m-%dT%H:%M:%SZ")] app pid=$app_pid" | tee -a "$RUN_LOG"
nohup env PID_FILE="$PID_FILE" LOG_FILE="$WATCHDOG_LOG" CHECK_INTERVAL="$CHECK_INTERVAL" \
bash scripts/watchdog.sh >/dev/null 2>&1 &
watchdog_pid=$!
echo "$watchdog_pid" > "$WATCHDOG_PID_FILE"
cat <<EOF
시작 완료
- app pid: $app_pid
- watchdog pid: $watchdog_pid
- app log: $RUN_LOG
- watchdog log: $WATCHDOG_LOG
실시간 확인:
tail -f "$RUN_LOG"
tail -f "$WATCHDOG_LOG"
EOF
if [ "$TMUX_AUTO" = "true" ]; then
if ! command -v tmux >/dev/null 2>&1; then
echo "tmux를 찾지 못해 자동 세션 생성은 건너뜁니다."
exit 0
fi
session_name="${TMUX_SESSION_PREFIX}_${timestamp}"
window_name="overnight"
tmux new-session -d -s "$session_name" -n "$window_name" "tail -f '$RUN_LOG'"
tmux split-window -t "${session_name}:${window_name}" -v "tail -f '$WATCHDOG_LOG'"
tmux select-layout -t "${session_name}:${window_name}" even-vertical
echo "tmux session 생성: $session_name"
echo "수동 접속: tmux attach -t $session_name"
if [ -z "${TMUX:-}" ] && [ "$TMUX_ATTACH" = "true" ]; then
tmux attach -t "$session_name"
fi
fi

76
scripts/stop_overnight.sh Executable file
View File

@@ -0,0 +1,76 @@
#!/usr/bin/env bash
# Stop The Ouroboros overnight app/watchdog/tmux session.
set -euo pipefail
LOG_DIR="${LOG_DIR:-data/overnight}"
PID_FILE="$LOG_DIR/app.pid"
WATCHDOG_PID_FILE="$LOG_DIR/watchdog.pid"
TMUX_SESSION_PREFIX="${TMUX_SESSION_PREFIX:-ouroboros_overnight}"
KILL_TIMEOUT="${KILL_TIMEOUT:-5}"
stop_pid() {
local name="$1"
local pid="$2"
if [ -z "$pid" ]; then
echo "$name PID가 비어 있습니다."
return 1
fi
if ! kill -0 "$pid" 2>/dev/null; then
echo "$name 프로세스가 이미 종료됨 (pid=$pid)"
return 0
fi
kill "$pid" 2>/dev/null || true
for _ in $(seq 1 "$KILL_TIMEOUT"); do
if ! kill -0 "$pid" 2>/dev/null; then
echo "$name 종료됨 (pid=$pid)"
return 0
fi
sleep 1
done
kill -9 "$pid" 2>/dev/null || true
if ! kill -0 "$pid" 2>/dev/null; then
echo "$name 강제 종료됨 (pid=$pid)"
return 0
fi
echo "$name 종료 실패 (pid=$pid)"
return 1
}
status=0
if [ -f "$WATCHDOG_PID_FILE" ]; then
watchdog_pid="$(cat "$WATCHDOG_PID_FILE" || true)"
stop_pid "watchdog" "$watchdog_pid" || status=1
rm -f "$WATCHDOG_PID_FILE"
else
echo "watchdog pid 파일 없음: $WATCHDOG_PID_FILE"
fi
if [ -f "$PID_FILE" ]; then
app_pid="$(cat "$PID_FILE" || true)"
stop_pid "app" "$app_pid" || status=1
rm -f "$PID_FILE"
else
echo "app pid 파일 없음: $PID_FILE"
fi
if command -v tmux >/dev/null 2>&1; then
sessions="$(tmux ls 2>/dev/null | awk -F: -v p="$TMUX_SESSION_PREFIX" '$1 ~ "^" p "_" {print $1}')"
if [ -n "$sessions" ]; then
while IFS= read -r s; do
[ -z "$s" ] && continue
tmux kill-session -t "$s" 2>/dev/null || true
echo "tmux 세션 종료: $s"
done <<< "$sessions"
else
echo "종료할 tmux 세션 없음 (prefix=${TMUX_SESSION_PREFIX}_)"
fi
fi
exit "$status"

42
scripts/watchdog.sh Executable file
View File

@@ -0,0 +1,42 @@
#!/usr/bin/env bash
# Simple watchdog for The Ouroboros process.
set -euo pipefail
PID_FILE="${PID_FILE:-data/overnight/app.pid}"
LOG_FILE="${LOG_FILE:-data/overnight/watchdog.log}"
CHECK_INTERVAL="${CHECK_INTERVAL:-30}"
STATUS_EVERY="${STATUS_EVERY:-10}"
mkdir -p "$(dirname "$LOG_FILE")"
log() {
printf '%s %s\n' "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" "$1" | tee -a "$LOG_FILE"
}
if [ ! -f "$PID_FILE" ]; then
log "[ERROR] pid file not found: $PID_FILE"
exit 1
fi
PID="$(cat "$PID_FILE")"
if [ -z "$PID" ]; then
log "[ERROR] pid file is empty: $PID_FILE"
exit 1
fi
log "[INFO] watchdog started (pid=$PID, interval=${CHECK_INTERVAL}s)"
count=0
while true; do
if kill -0 "$PID" 2>/dev/null; then
count=$((count + 1))
if [ $((count % STATUS_EVERY)) -eq 0 ]; then
log "[INFO] process alive (pid=$PID)"
fi
else
log "[ERROR] process stopped (pid=$PID)"
exit 1
fi
sleep "$CHECK_INTERVAL"
done