Merge pull request 'Fix overnight runner Python selection and tmux window targeting' (#138) from agentson/fix/137-run-overnight-python-tmux into main
Some checks failed
CI / test (push) Has been cancelled
Some checks failed
CI / test (push) Has been cancelled
Reviewed-on: #138
This commit was merged in pull request #138.
This commit is contained in:
54
scripts/morning_report.sh
Executable file
54
scripts/morning_report.sh
Executable file
@@ -0,0 +1,54 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Morning summary for overnight run logs.
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
LOG_DIR="${LOG_DIR:-data/overnight}"
|
||||||
|
|
||||||
|
if [ ! -d "$LOG_DIR" ]; then
|
||||||
|
echo "로그 디렉터리가 없습니다: $LOG_DIR"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
latest_run="$(ls -1t "$LOG_DIR"/run_*.log 2>/dev/null | head -n 1 || true)"
|
||||||
|
latest_watchdog="$(ls -1t "$LOG_DIR"/watchdog_*.log 2>/dev/null | head -n 1 || true)"
|
||||||
|
|
||||||
|
if [ -z "$latest_run" ]; then
|
||||||
|
echo "run 로그가 없습니다: $LOG_DIR/run_*.log"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Overnight report"
|
||||||
|
echo "- run log: $latest_run"
|
||||||
|
if [ -n "$latest_watchdog" ]; then
|
||||||
|
echo "- watchdog log: $latest_watchdog"
|
||||||
|
fi
|
||||||
|
|
||||||
|
start_line="$(head -n 1 "$latest_run" || true)"
|
||||||
|
end_line="$(tail -n 1 "$latest_run" || true)"
|
||||||
|
|
||||||
|
info_count="$(rg -c '"level": "INFO"' "$latest_run" || true)"
|
||||||
|
warn_count="$(rg -c '"level": "WARNING"' "$latest_run" || true)"
|
||||||
|
error_count="$(rg -c '"level": "ERROR"' "$latest_run" || true)"
|
||||||
|
critical_count="$(rg -c '"level": "CRITICAL"' "$latest_run" || true)"
|
||||||
|
traceback_count="$(rg -c 'Traceback' "$latest_run" || true)"
|
||||||
|
|
||||||
|
echo "- start: ${start_line:-N/A}"
|
||||||
|
echo "- end: ${end_line:-N/A}"
|
||||||
|
echo "- INFO: ${info_count:-0}"
|
||||||
|
echo "- WARNING: ${warn_count:-0}"
|
||||||
|
echo "- ERROR: ${error_count:-0}"
|
||||||
|
echo "- CRITICAL: ${critical_count:-0}"
|
||||||
|
echo "- Traceback: ${traceback_count:-0}"
|
||||||
|
|
||||||
|
if [ -n "$latest_watchdog" ]; then
|
||||||
|
watchdog_errors="$(rg -c '\[ERROR\]' "$latest_watchdog" || true)"
|
||||||
|
echo "- watchdog ERROR: ${watchdog_errors:-0}"
|
||||||
|
echo ""
|
||||||
|
echo "최근 watchdog 로그:"
|
||||||
|
tail -n 5 "$latest_watchdog" || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "최근 앱 로그:"
|
||||||
|
tail -n 20 "$latest_run" || true
|
||||||
87
scripts/run_overnight.sh
Executable file
87
scripts/run_overnight.sh
Executable file
@@ -0,0 +1,87 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Start The Ouroboros overnight with logs and watchdog.
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
LOG_DIR="${LOG_DIR:-data/overnight}"
|
||||||
|
CHECK_INTERVAL="${CHECK_INTERVAL:-30}"
|
||||||
|
TMUX_AUTO="${TMUX_AUTO:-true}"
|
||||||
|
TMUX_ATTACH="${TMUX_ATTACH:-true}"
|
||||||
|
TMUX_SESSION_PREFIX="${TMUX_SESSION_PREFIX:-ouroboros_overnight}"
|
||||||
|
|
||||||
|
if [ -z "${APP_CMD:-}" ]; then
|
||||||
|
if [ -x ".venv/bin/python" ]; then
|
||||||
|
PYTHON_BIN=".venv/bin/python"
|
||||||
|
elif command -v python3 >/dev/null 2>&1; then
|
||||||
|
PYTHON_BIN="python3"
|
||||||
|
elif command -v python >/dev/null 2>&1; then
|
||||||
|
PYTHON_BIN="python"
|
||||||
|
else
|
||||||
|
echo ".venv/bin/python 또는 python3/python 실행 파일을 찾을 수 없습니다."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
dashboard_port="${DASHBOARD_PORT:-8080}"
|
||||||
|
|
||||||
|
APP_CMD="DASHBOARD_PORT=$dashboard_port $PYTHON_BIN -m src.main --mode=paper --dashboard"
|
||||||
|
fi
|
||||||
|
|
||||||
|
mkdir -p "$LOG_DIR"
|
||||||
|
|
||||||
|
timestamp="$(date +"%Y%m%d_%H%M%S")"
|
||||||
|
RUN_LOG="$LOG_DIR/run_${timestamp}.log"
|
||||||
|
WATCHDOG_LOG="$LOG_DIR/watchdog_${timestamp}.log"
|
||||||
|
PID_FILE="$LOG_DIR/app.pid"
|
||||||
|
WATCHDOG_PID_FILE="$LOG_DIR/watchdog.pid"
|
||||||
|
|
||||||
|
if [ -f "$PID_FILE" ]; then
|
||||||
|
old_pid="$(cat "$PID_FILE" || true)"
|
||||||
|
if [ -n "$old_pid" ] && kill -0 "$old_pid" 2>/dev/null; then
|
||||||
|
echo "앱이 이미 실행 중입니다. pid=$old_pid"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "[$(date -u +"%Y-%m-%dT%H:%M:%SZ")] starting: $APP_CMD" | tee -a "$RUN_LOG"
|
||||||
|
nohup bash -lc "$APP_CMD" >>"$RUN_LOG" 2>&1 &
|
||||||
|
app_pid=$!
|
||||||
|
echo "$app_pid" > "$PID_FILE"
|
||||||
|
|
||||||
|
echo "[$(date -u +"%Y-%m-%dT%H:%M:%SZ")] app pid=$app_pid" | tee -a "$RUN_LOG"
|
||||||
|
|
||||||
|
nohup env PID_FILE="$PID_FILE" LOG_FILE="$WATCHDOG_LOG" CHECK_INTERVAL="$CHECK_INTERVAL" \
|
||||||
|
bash scripts/watchdog.sh >/dev/null 2>&1 &
|
||||||
|
watchdog_pid=$!
|
||||||
|
echo "$watchdog_pid" > "$WATCHDOG_PID_FILE"
|
||||||
|
|
||||||
|
cat <<EOF
|
||||||
|
시작 완료
|
||||||
|
- app pid: $app_pid
|
||||||
|
- watchdog pid: $watchdog_pid
|
||||||
|
- app log: $RUN_LOG
|
||||||
|
- watchdog log: $WATCHDOG_LOG
|
||||||
|
|
||||||
|
실시간 확인:
|
||||||
|
tail -f "$RUN_LOG"
|
||||||
|
tail -f "$WATCHDOG_LOG"
|
||||||
|
EOF
|
||||||
|
|
||||||
|
if [ "$TMUX_AUTO" = "true" ]; then
|
||||||
|
if ! command -v tmux >/dev/null 2>&1; then
|
||||||
|
echo "tmux를 찾지 못해 자동 세션 생성은 건너뜁니다."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
session_name="${TMUX_SESSION_PREFIX}_${timestamp}"
|
||||||
|
window_name="overnight"
|
||||||
|
tmux new-session -d -s "$session_name" -n "$window_name" "tail -f '$RUN_LOG'"
|
||||||
|
tmux split-window -t "${session_name}:${window_name}" -v "tail -f '$WATCHDOG_LOG'"
|
||||||
|
tmux select-layout -t "${session_name}:${window_name}" even-vertical
|
||||||
|
|
||||||
|
echo "tmux session 생성: $session_name"
|
||||||
|
echo "수동 접속: tmux attach -t $session_name"
|
||||||
|
|
||||||
|
if [ -z "${TMUX:-}" ] && [ "$TMUX_ATTACH" = "true" ]; then
|
||||||
|
tmux attach -t "$session_name"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
76
scripts/stop_overnight.sh
Executable file
76
scripts/stop_overnight.sh
Executable file
@@ -0,0 +1,76 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Stop The Ouroboros overnight app/watchdog/tmux session.
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
LOG_DIR="${LOG_DIR:-data/overnight}"
|
||||||
|
PID_FILE="$LOG_DIR/app.pid"
|
||||||
|
WATCHDOG_PID_FILE="$LOG_DIR/watchdog.pid"
|
||||||
|
TMUX_SESSION_PREFIX="${TMUX_SESSION_PREFIX:-ouroboros_overnight}"
|
||||||
|
KILL_TIMEOUT="${KILL_TIMEOUT:-5}"
|
||||||
|
|
||||||
|
stop_pid() {
|
||||||
|
local name="$1"
|
||||||
|
local pid="$2"
|
||||||
|
|
||||||
|
if [ -z "$pid" ]; then
|
||||||
|
echo "$name PID가 비어 있습니다."
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! kill -0 "$pid" 2>/dev/null; then
|
||||||
|
echo "$name 프로세스가 이미 종료됨 (pid=$pid)"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
kill "$pid" 2>/dev/null || true
|
||||||
|
for _ in $(seq 1 "$KILL_TIMEOUT"); do
|
||||||
|
if ! kill -0 "$pid" 2>/dev/null; then
|
||||||
|
echo "$name 종료됨 (pid=$pid)"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
kill -9 "$pid" 2>/dev/null || true
|
||||||
|
if ! kill -0 "$pid" 2>/dev/null; then
|
||||||
|
echo "$name 강제 종료됨 (pid=$pid)"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "$name 종료 실패 (pid=$pid)"
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
status=0
|
||||||
|
|
||||||
|
if [ -f "$WATCHDOG_PID_FILE" ]; then
|
||||||
|
watchdog_pid="$(cat "$WATCHDOG_PID_FILE" || true)"
|
||||||
|
stop_pid "watchdog" "$watchdog_pid" || status=1
|
||||||
|
rm -f "$WATCHDOG_PID_FILE"
|
||||||
|
else
|
||||||
|
echo "watchdog pid 파일 없음: $WATCHDOG_PID_FILE"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -f "$PID_FILE" ]; then
|
||||||
|
app_pid="$(cat "$PID_FILE" || true)"
|
||||||
|
stop_pid "app" "$app_pid" || status=1
|
||||||
|
rm -f "$PID_FILE"
|
||||||
|
else
|
||||||
|
echo "app pid 파일 없음: $PID_FILE"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if command -v tmux >/dev/null 2>&1; then
|
||||||
|
sessions="$(tmux ls 2>/dev/null | awk -F: -v p="$TMUX_SESSION_PREFIX" '$1 ~ "^" p "_" {print $1}')"
|
||||||
|
if [ -n "$sessions" ]; then
|
||||||
|
while IFS= read -r s; do
|
||||||
|
[ -z "$s" ] && continue
|
||||||
|
tmux kill-session -t "$s" 2>/dev/null || true
|
||||||
|
echo "tmux 세션 종료: $s"
|
||||||
|
done <<< "$sessions"
|
||||||
|
else
|
||||||
|
echo "종료할 tmux 세션 없음 (prefix=${TMUX_SESSION_PREFIX}_)"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
exit "$status"
|
||||||
42
scripts/watchdog.sh
Executable file
42
scripts/watchdog.sh
Executable file
@@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Simple watchdog for The Ouroboros process.
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
PID_FILE="${PID_FILE:-data/overnight/app.pid}"
|
||||||
|
LOG_FILE="${LOG_FILE:-data/overnight/watchdog.log}"
|
||||||
|
CHECK_INTERVAL="${CHECK_INTERVAL:-30}"
|
||||||
|
STATUS_EVERY="${STATUS_EVERY:-10}"
|
||||||
|
|
||||||
|
mkdir -p "$(dirname "$LOG_FILE")"
|
||||||
|
|
||||||
|
log() {
|
||||||
|
printf '%s %s\n' "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" "$1" | tee -a "$LOG_FILE"
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ ! -f "$PID_FILE" ]; then
|
||||||
|
log "[ERROR] pid file not found: $PID_FILE"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
PID="$(cat "$PID_FILE")"
|
||||||
|
if [ -z "$PID" ]; then
|
||||||
|
log "[ERROR] pid file is empty: $PID_FILE"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
log "[INFO] watchdog started (pid=$PID, interval=${CHECK_INTERVAL}s)"
|
||||||
|
|
||||||
|
count=0
|
||||||
|
while true; do
|
||||||
|
if kill -0 "$PID" 2>/dev/null; then
|
||||||
|
count=$((count + 1))
|
||||||
|
if [ $((count % STATUS_EVERY)) -eq 0 ]; then
|
||||||
|
log "[INFO] process alive (pid=$PID)"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
log "[ERROR] process stopped (pid=$PID)"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
sleep "$CHECK_INTERVAL"
|
||||||
|
done
|
||||||
Reference in New Issue
Block a user