merge: feature/v3-session-policy-stream into main #399
@@ -1,6 +1,6 @@
|
||||
<!--
|
||||
Doc-ID: DOC-REQ-001
|
||||
Version: 1.0.10
|
||||
Version: 1.0.11
|
||||
Status: active
|
||||
Owner: strategy
|
||||
Updated: 2026-03-02
|
||||
@@ -19,7 +19,7 @@ Updated: 2026-03-02
|
||||
- `REQ-V2-005`: 라벨링은 Triple Barrier(Upper/Lower/Time) 방식이어야 한다.
|
||||
- `REQ-V2-006`: 검증은 Walk-forward + Purge/Embargo를 강제한다.
|
||||
- `REQ-V2-007`: 백테스트는 비용/슬리피지/체결실패를 반영하지 않으면 채택 불가다.
|
||||
- `REQ-V2-008`: Kill Switch는 신규주문차단 -> 미체결취소 -> 재조회 -> 리스크축소 -> 스냅샷 순서다.
|
||||
- `REQ-V2-008`: Kill Switch는 신규주문차단 -> 미체결취소 -> 재조회(실패 시 최대 3회, 1s/2s backoff 재시도, 성공 시 즉시 중단) -> 리스크축소 -> 스냅샷 순서다.
|
||||
|
||||
## v3 핵심 요구사항
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ Updated: 2026-03-02
|
||||
| REQ-V2-005 | Triple Barrier 라벨링 | `src/analysis/triple_barrier.py` | ✅ 완료 |
|
||||
| REQ-V2-006 | Walk-Forward + Purge/Embargo 검증 | `src/analysis/walk_forward_split.py` | ✅ 완료 |
|
||||
| REQ-V2-007 | 비용/슬리피지/체결실패 모델 필수 | `src/analysis/backtest_cost_guard.py`, `src/analysis/backtest_pipeline.py` | ✅ 완료 |
|
||||
| REQ-V2-008 | Kill Switch 실행 순서 (Block→Cancel→Refresh→Reduce→Snapshot) | `src/core/kill_switch.py` | ⚠️ 부분 (`#377`) |
|
||||
| REQ-V2-008 | Kill Switch 실행 순서 (Block→Cancel→Refresh(retry)→Reduce→Snapshot) | `src/core/kill_switch.py` | ✅ 완료 |
|
||||
|
||||
### 1.3 v3 구현 상태: 부분 완료 (2026-03-02 기준)
|
||||
|
||||
|
||||
@@ -3,13 +3,14 @@
|
||||
Order is fixed:
|
||||
1) block new orders
|
||||
2) cancel pending orders
|
||||
3) refresh order state
|
||||
3) refresh order state (retry up to 3 attempts with exponential backoff)
|
||||
4) reduce risk
|
||||
5) snapshot and notify
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import inspect
|
||||
from collections.abc import Awaitable, Callable
|
||||
from dataclasses import dataclass, field
|
||||
@@ -34,16 +35,55 @@ class KillSwitchOrchestrator:
|
||||
report: KillSwitchReport,
|
||||
name: str,
|
||||
fn: StepCallable | None,
|
||||
) -> None:
|
||||
) -> bool:
|
||||
report.steps.append(name)
|
||||
if fn is None:
|
||||
return
|
||||
return True
|
||||
try:
|
||||
result = fn()
|
||||
if inspect.isawaitable(result):
|
||||
await result
|
||||
if result is False:
|
||||
raise RuntimeError("step returned False")
|
||||
return True
|
||||
except Exception as exc: # pragma: no cover - intentionally resilient
|
||||
report.errors.append(f"{name}: {exc}")
|
||||
return False
|
||||
|
||||
async def _run_refresh_with_retry(
|
||||
self,
|
||||
report: KillSwitchReport,
|
||||
fn: StepCallable | None,
|
||||
*,
|
||||
max_attempts: int,
|
||||
base_delay_sec: float,
|
||||
) -> None:
|
||||
report.steps.append("refresh_order_state")
|
||||
if fn is None:
|
||||
return
|
||||
|
||||
attempts = max(1, max_attempts)
|
||||
delay = max(0.0, base_delay_sec)
|
||||
last_exc: Exception | None = None
|
||||
for attempt in range(1, attempts + 1):
|
||||
try:
|
||||
result = fn()
|
||||
if inspect.isawaitable(result):
|
||||
await result
|
||||
if result is False:
|
||||
raise RuntimeError("step returned False")
|
||||
return
|
||||
except Exception as exc:
|
||||
last_exc = exc
|
||||
if attempt >= attempts:
|
||||
break
|
||||
if delay > 0:
|
||||
await asyncio.sleep(delay * (2 ** (attempt - 1)))
|
||||
if last_exc is not None:
|
||||
report.errors.append(
|
||||
"refresh_order_state: failed after "
|
||||
f"{attempts} attempts ({last_exc})"
|
||||
)
|
||||
|
||||
async def trigger(
|
||||
self,
|
||||
@@ -54,6 +94,8 @@ class KillSwitchOrchestrator:
|
||||
reduce_risk: StepCallable | None = None,
|
||||
snapshot_state: StepCallable | None = None,
|
||||
notify: StepCallable | None = None,
|
||||
refresh_retry_attempts: int = 3,
|
||||
refresh_retry_base_delay_sec: float = 1.0,
|
||||
) -> KillSwitchReport:
|
||||
report = KillSwitchReport(reason=reason)
|
||||
|
||||
@@ -61,7 +103,12 @@ class KillSwitchOrchestrator:
|
||||
report.steps.append("block_new_orders")
|
||||
|
||||
await self._run_step(report, "cancel_pending_orders", cancel_pending_orders)
|
||||
await self._run_step(report, "refresh_order_state", refresh_order_state)
|
||||
await self._run_refresh_with_retry(
|
||||
report,
|
||||
refresh_order_state,
|
||||
max_attempts=refresh_retry_attempts,
|
||||
base_delay_sec=refresh_retry_base_delay_sec,
|
||||
)
|
||||
await self._run_step(report, "reduce_risk", reduce_risk)
|
||||
await self._run_step(report, "snapshot_state", snapshot_state)
|
||||
await self._run_step(report, "notify", notify)
|
||||
|
||||
12
src/main.py
12
src/main.py
@@ -1375,7 +1375,10 @@ async def _cancel_pending_orders_for_kill_switch(
|
||||
)
|
||||
|
||||
if failures:
|
||||
raise RuntimeError("; ".join(failures[:3]))
|
||||
summary = "; ".join(failures[:3])
|
||||
if len(failures) > 3:
|
||||
summary = f"{summary} (+{len(failures) - 3} more)"
|
||||
raise RuntimeError(summary)
|
||||
|
||||
|
||||
async def _refresh_order_state_for_kill_switch(
|
||||
@@ -1384,6 +1387,7 @@ async def _refresh_order_state_for_kill_switch(
|
||||
overseas_broker: OverseasBroker,
|
||||
markets: list[MarketInfo],
|
||||
) -> None:
|
||||
failures: list[str] = []
|
||||
seen_overseas: set[str] = set()
|
||||
for market in markets:
|
||||
try:
|
||||
@@ -1399,6 +1403,12 @@ async def _refresh_order_state_for_kill_switch(
|
||||
market.exchange_code,
|
||||
exc,
|
||||
)
|
||||
failures.append(f"{market.code}/{market.exchange_code}: {exc}")
|
||||
if failures:
|
||||
summary = "; ".join(failures[:3])
|
||||
if len(failures) > 3:
|
||||
summary = f"{summary} (+{len(failures) - 3} more)"
|
||||
raise RuntimeError(summary)
|
||||
|
||||
|
||||
def _reduce_risk_for_kill_switch() -> None:
|
||||
|
||||
@@ -53,3 +53,52 @@ async def test_kill_switch_collects_step_errors() -> None:
|
||||
|
||||
report = await ks.trigger(reason="test", cancel_pending_orders=_boom)
|
||||
assert any(err.startswith("cancel_pending_orders:") for err in report.errors)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_kill_switch_refresh_retries_then_succeeds() -> None:
|
||||
ks = KillSwitchOrchestrator()
|
||||
refresh_calls = {"count": 0}
|
||||
|
||||
def _flaky_refresh() -> None:
|
||||
refresh_calls["count"] += 1
|
||||
if refresh_calls["count"] < 3:
|
||||
raise RuntimeError("temporary refresh failure")
|
||||
|
||||
report = await ks.trigger(
|
||||
reason="test",
|
||||
refresh_order_state=_flaky_refresh,
|
||||
refresh_retry_attempts=3,
|
||||
refresh_retry_base_delay_sec=0.0,
|
||||
)
|
||||
assert refresh_calls["count"] == 3
|
||||
assert report.errors == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_kill_switch_refresh_retry_exhausted_records_error_and_continues() -> None:
|
||||
ks = KillSwitchOrchestrator()
|
||||
calls: list[str] = []
|
||||
|
||||
def _refresh_fail() -> None:
|
||||
raise RuntimeError("persistent refresh failure")
|
||||
|
||||
def _reduce() -> None:
|
||||
calls.append("reduce")
|
||||
|
||||
def _snapshot() -> None:
|
||||
calls.append("snapshot")
|
||||
|
||||
report = await ks.trigger(
|
||||
reason="test",
|
||||
refresh_order_state=_refresh_fail,
|
||||
reduce_risk=_reduce,
|
||||
snapshot_state=_snapshot,
|
||||
refresh_retry_attempts=2,
|
||||
refresh_retry_base_delay_sec=0.0,
|
||||
)
|
||||
assert any(
|
||||
err.startswith("refresh_order_state: failed after 2 attempts")
|
||||
for err in report.errors
|
||||
)
|
||||
assert calls == ["reduce", "snapshot"]
|
||||
|
||||
@@ -7154,3 +7154,27 @@ async def test_trigger_emergency_kill_switch_records_cancel_failure() -> None:
|
||||
)
|
||||
|
||||
assert any(err.startswith("cancel_pending_orders:") for err in report.errors)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_refresh_order_state_failure_summary_includes_more_count() -> None:
|
||||
broker = MagicMock()
|
||||
broker.get_balance = AsyncMock(side_effect=RuntimeError("domestic down"))
|
||||
overseas_broker = MagicMock()
|
||||
overseas_broker.get_overseas_balance = AsyncMock(side_effect=RuntimeError("overseas down"))
|
||||
|
||||
markets = []
|
||||
for code, exchange in [("KR", "KRX"), ("US_PRE", "NASD"), ("US_DAY", "NYSE"), ("JP", "TKSE")]:
|
||||
market = MagicMock()
|
||||
market.code = code
|
||||
market.exchange_code = exchange
|
||||
market.is_domestic = code == "KR"
|
||||
markets.append(market)
|
||||
|
||||
with pytest.raises(RuntimeError, match=r"\(\+1 more\)$") as exc_info:
|
||||
await main_module._refresh_order_state_for_kill_switch(
|
||||
broker=broker,
|
||||
overseas_broker=overseas_broker,
|
||||
markets=markets,
|
||||
)
|
||||
assert "KR/KRX" in str(exc_info.value)
|
||||
|
||||
@@ -121,3 +121,19 @@
|
||||
- next_ticket: #369
|
||||
- process_gate_checked: process_ticket=#306,#308 merged_to_feature_branch=yes
|
||||
- risks_or_notes: v2 사양 기준으로 model_exit_signal을 직접 청산 트리거가 아닌 보조 트리거로 정합화하고 테스트/문서를 동기화한다.
|
||||
|
||||
### 2026-03-02 | session=codex-v3-stream-next-ticket-377
|
||||
- branch: feature/v3-session-policy-stream
|
||||
- docs_checked: docs/workflow.md, docs/commands.md, docs/agent-constraints.md
|
||||
- open_issues_reviewed: #377, #370, #371, #375, #376, #381
|
||||
- next_ticket: #377
|
||||
- process_gate_checked: process_ticket=#306,#308 merged_to_feature_branch=yes
|
||||
- risks_or_notes: kill switch refresh 재시도 정책(횟수/간격/중단조건)을 코드/테스트/요구사항 원장/감사 문서에 동시 반영한다.
|
||||
|
||||
### 2026-03-02 | session=codex-issue377-start
|
||||
- branch: feature/issue-377-kill-switch-refresh-retry
|
||||
- docs_checked: docs/workflow.md, docs/commands.md, docs/agent-constraints.md
|
||||
- open_issues_reviewed: #377
|
||||
- next_ticket: #377
|
||||
- process_gate_checked: process_ticket=#306,#308 merged_to_feature_branch=yes
|
||||
- risks_or_notes: refresh 단계를 최대 3회(초기+재시도2), 실패 시 지수 백오프로 재시도하고 성공 시 즉시 중단, 소진 시 오류를 기록한 뒤 다음 단계를 계속 수행한다.
|
||||
|
||||
Reference in New Issue
Block a user