merge: feature/v3-session-policy-stream into main #399
@@ -1,6 +1,6 @@
|
|||||||
<!--
|
<!--
|
||||||
Doc-ID: DOC-REQ-001
|
Doc-ID: DOC-REQ-001
|
||||||
Version: 1.0.10
|
Version: 1.0.11
|
||||||
Status: active
|
Status: active
|
||||||
Owner: strategy
|
Owner: strategy
|
||||||
Updated: 2026-03-02
|
Updated: 2026-03-02
|
||||||
@@ -19,7 +19,7 @@ Updated: 2026-03-02
|
|||||||
- `REQ-V2-005`: 라벨링은 Triple Barrier(Upper/Lower/Time) 방식이어야 한다.
|
- `REQ-V2-005`: 라벨링은 Triple Barrier(Upper/Lower/Time) 방식이어야 한다.
|
||||||
- `REQ-V2-006`: 검증은 Walk-forward + Purge/Embargo를 강제한다.
|
- `REQ-V2-006`: 검증은 Walk-forward + Purge/Embargo를 강제한다.
|
||||||
- `REQ-V2-007`: 백테스트는 비용/슬리피지/체결실패를 반영하지 않으면 채택 불가다.
|
- `REQ-V2-007`: 백테스트는 비용/슬리피지/체결실패를 반영하지 않으면 채택 불가다.
|
||||||
- `REQ-V2-008`: Kill Switch는 신규주문차단 -> 미체결취소 -> 재조회 -> 리스크축소 -> 스냅샷 순서다.
|
- `REQ-V2-008`: Kill Switch는 신규주문차단 -> 미체결취소 -> 재조회(실패 시 최대 3회, 1s/2s backoff 재시도, 성공 시 즉시 중단) -> 리스크축소 -> 스냅샷 순서다.
|
||||||
|
|
||||||
## v3 핵심 요구사항
|
## v3 핵심 요구사항
|
||||||
|
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ Updated: 2026-03-02
|
|||||||
| REQ-V2-005 | Triple Barrier 라벨링 | `src/analysis/triple_barrier.py` | ✅ 완료 |
|
| REQ-V2-005 | Triple Barrier 라벨링 | `src/analysis/triple_barrier.py` | ✅ 완료 |
|
||||||
| REQ-V2-006 | Walk-Forward + Purge/Embargo 검증 | `src/analysis/walk_forward_split.py` | ✅ 완료 |
|
| REQ-V2-006 | Walk-Forward + Purge/Embargo 검증 | `src/analysis/walk_forward_split.py` | ✅ 완료 |
|
||||||
| REQ-V2-007 | 비용/슬리피지/체결실패 모델 필수 | `src/analysis/backtest_cost_guard.py`, `src/analysis/backtest_pipeline.py` | ✅ 완료 |
|
| REQ-V2-007 | 비용/슬리피지/체결실패 모델 필수 | `src/analysis/backtest_cost_guard.py`, `src/analysis/backtest_pipeline.py` | ✅ 완료 |
|
||||||
| REQ-V2-008 | Kill Switch 실행 순서 (Block→Cancel→Refresh→Reduce→Snapshot) | `src/core/kill_switch.py` | ⚠️ 부분 (`#377`) |
|
| REQ-V2-008 | Kill Switch 실행 순서 (Block→Cancel→Refresh(retry)→Reduce→Snapshot) | `src/core/kill_switch.py` | ✅ 완료 |
|
||||||
|
|
||||||
### 1.3 v3 구현 상태: 부분 완료 (2026-03-02 기준)
|
### 1.3 v3 구현 상태: 부분 완료 (2026-03-02 기준)
|
||||||
|
|
||||||
|
|||||||
@@ -3,13 +3,14 @@
|
|||||||
Order is fixed:
|
Order is fixed:
|
||||||
1) block new orders
|
1) block new orders
|
||||||
2) cancel pending orders
|
2) cancel pending orders
|
||||||
3) refresh order state
|
3) refresh order state (retry up to 3 attempts with exponential backoff)
|
||||||
4) reduce risk
|
4) reduce risk
|
||||||
5) snapshot and notify
|
5) snapshot and notify
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import inspect
|
import inspect
|
||||||
from collections.abc import Awaitable, Callable
|
from collections.abc import Awaitable, Callable
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
@@ -34,16 +35,55 @@ class KillSwitchOrchestrator:
|
|||||||
report: KillSwitchReport,
|
report: KillSwitchReport,
|
||||||
name: str,
|
name: str,
|
||||||
fn: StepCallable | None,
|
fn: StepCallable | None,
|
||||||
) -> None:
|
) -> bool:
|
||||||
report.steps.append(name)
|
report.steps.append(name)
|
||||||
if fn is None:
|
if fn is None:
|
||||||
return
|
return True
|
||||||
try:
|
try:
|
||||||
result = fn()
|
result = fn()
|
||||||
if inspect.isawaitable(result):
|
if inspect.isawaitable(result):
|
||||||
await result
|
await result
|
||||||
|
if result is False:
|
||||||
|
raise RuntimeError("step returned False")
|
||||||
|
return True
|
||||||
except Exception as exc: # pragma: no cover - intentionally resilient
|
except Exception as exc: # pragma: no cover - intentionally resilient
|
||||||
report.errors.append(f"{name}: {exc}")
|
report.errors.append(f"{name}: {exc}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def _run_refresh_with_retry(
|
||||||
|
self,
|
||||||
|
report: KillSwitchReport,
|
||||||
|
fn: StepCallable | None,
|
||||||
|
*,
|
||||||
|
max_attempts: int,
|
||||||
|
base_delay_sec: float,
|
||||||
|
) -> None:
|
||||||
|
report.steps.append("refresh_order_state")
|
||||||
|
if fn is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
attempts = max(1, max_attempts)
|
||||||
|
delay = max(0.0, base_delay_sec)
|
||||||
|
last_exc: Exception | None = None
|
||||||
|
for attempt in range(1, attempts + 1):
|
||||||
|
try:
|
||||||
|
result = fn()
|
||||||
|
if inspect.isawaitable(result):
|
||||||
|
await result
|
||||||
|
if result is False:
|
||||||
|
raise RuntimeError("step returned False")
|
||||||
|
return
|
||||||
|
except Exception as exc:
|
||||||
|
last_exc = exc
|
||||||
|
if attempt >= attempts:
|
||||||
|
break
|
||||||
|
if delay > 0:
|
||||||
|
await asyncio.sleep(delay * (2 ** (attempt - 1)))
|
||||||
|
if last_exc is not None:
|
||||||
|
report.errors.append(
|
||||||
|
"refresh_order_state: failed after "
|
||||||
|
f"{attempts} attempts ({last_exc})"
|
||||||
|
)
|
||||||
|
|
||||||
async def trigger(
|
async def trigger(
|
||||||
self,
|
self,
|
||||||
@@ -54,6 +94,8 @@ class KillSwitchOrchestrator:
|
|||||||
reduce_risk: StepCallable | None = None,
|
reduce_risk: StepCallable | None = None,
|
||||||
snapshot_state: StepCallable | None = None,
|
snapshot_state: StepCallable | None = None,
|
||||||
notify: StepCallable | None = None,
|
notify: StepCallable | None = None,
|
||||||
|
refresh_retry_attempts: int = 3,
|
||||||
|
refresh_retry_base_delay_sec: float = 1.0,
|
||||||
) -> KillSwitchReport:
|
) -> KillSwitchReport:
|
||||||
report = KillSwitchReport(reason=reason)
|
report = KillSwitchReport(reason=reason)
|
||||||
|
|
||||||
@@ -61,7 +103,12 @@ class KillSwitchOrchestrator:
|
|||||||
report.steps.append("block_new_orders")
|
report.steps.append("block_new_orders")
|
||||||
|
|
||||||
await self._run_step(report, "cancel_pending_orders", cancel_pending_orders)
|
await self._run_step(report, "cancel_pending_orders", cancel_pending_orders)
|
||||||
await self._run_step(report, "refresh_order_state", refresh_order_state)
|
await self._run_refresh_with_retry(
|
||||||
|
report,
|
||||||
|
refresh_order_state,
|
||||||
|
max_attempts=refresh_retry_attempts,
|
||||||
|
base_delay_sec=refresh_retry_base_delay_sec,
|
||||||
|
)
|
||||||
await self._run_step(report, "reduce_risk", reduce_risk)
|
await self._run_step(report, "reduce_risk", reduce_risk)
|
||||||
await self._run_step(report, "snapshot_state", snapshot_state)
|
await self._run_step(report, "snapshot_state", snapshot_state)
|
||||||
await self._run_step(report, "notify", notify)
|
await self._run_step(report, "notify", notify)
|
||||||
|
|||||||
12
src/main.py
12
src/main.py
@@ -1375,7 +1375,10 @@ async def _cancel_pending_orders_for_kill_switch(
|
|||||||
)
|
)
|
||||||
|
|
||||||
if failures:
|
if failures:
|
||||||
raise RuntimeError("; ".join(failures[:3]))
|
summary = "; ".join(failures[:3])
|
||||||
|
if len(failures) > 3:
|
||||||
|
summary = f"{summary} (+{len(failures) - 3} more)"
|
||||||
|
raise RuntimeError(summary)
|
||||||
|
|
||||||
|
|
||||||
async def _refresh_order_state_for_kill_switch(
|
async def _refresh_order_state_for_kill_switch(
|
||||||
@@ -1384,6 +1387,7 @@ async def _refresh_order_state_for_kill_switch(
|
|||||||
overseas_broker: OverseasBroker,
|
overseas_broker: OverseasBroker,
|
||||||
markets: list[MarketInfo],
|
markets: list[MarketInfo],
|
||||||
) -> None:
|
) -> None:
|
||||||
|
failures: list[str] = []
|
||||||
seen_overseas: set[str] = set()
|
seen_overseas: set[str] = set()
|
||||||
for market in markets:
|
for market in markets:
|
||||||
try:
|
try:
|
||||||
@@ -1399,6 +1403,12 @@ async def _refresh_order_state_for_kill_switch(
|
|||||||
market.exchange_code,
|
market.exchange_code,
|
||||||
exc,
|
exc,
|
||||||
)
|
)
|
||||||
|
failures.append(f"{market.code}/{market.exchange_code}: {exc}")
|
||||||
|
if failures:
|
||||||
|
summary = "; ".join(failures[:3])
|
||||||
|
if len(failures) > 3:
|
||||||
|
summary = f"{summary} (+{len(failures) - 3} more)"
|
||||||
|
raise RuntimeError(summary)
|
||||||
|
|
||||||
|
|
||||||
def _reduce_risk_for_kill_switch() -> None:
|
def _reduce_risk_for_kill_switch() -> None:
|
||||||
|
|||||||
@@ -53,3 +53,52 @@ async def test_kill_switch_collects_step_errors() -> None:
|
|||||||
|
|
||||||
report = await ks.trigger(reason="test", cancel_pending_orders=_boom)
|
report = await ks.trigger(reason="test", cancel_pending_orders=_boom)
|
||||||
assert any(err.startswith("cancel_pending_orders:") for err in report.errors)
|
assert any(err.startswith("cancel_pending_orders:") for err in report.errors)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_kill_switch_refresh_retries_then_succeeds() -> None:
|
||||||
|
ks = KillSwitchOrchestrator()
|
||||||
|
refresh_calls = {"count": 0}
|
||||||
|
|
||||||
|
def _flaky_refresh() -> None:
|
||||||
|
refresh_calls["count"] += 1
|
||||||
|
if refresh_calls["count"] < 3:
|
||||||
|
raise RuntimeError("temporary refresh failure")
|
||||||
|
|
||||||
|
report = await ks.trigger(
|
||||||
|
reason="test",
|
||||||
|
refresh_order_state=_flaky_refresh,
|
||||||
|
refresh_retry_attempts=3,
|
||||||
|
refresh_retry_base_delay_sec=0.0,
|
||||||
|
)
|
||||||
|
assert refresh_calls["count"] == 3
|
||||||
|
assert report.errors == []
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_kill_switch_refresh_retry_exhausted_records_error_and_continues() -> None:
|
||||||
|
ks = KillSwitchOrchestrator()
|
||||||
|
calls: list[str] = []
|
||||||
|
|
||||||
|
def _refresh_fail() -> None:
|
||||||
|
raise RuntimeError("persistent refresh failure")
|
||||||
|
|
||||||
|
def _reduce() -> None:
|
||||||
|
calls.append("reduce")
|
||||||
|
|
||||||
|
def _snapshot() -> None:
|
||||||
|
calls.append("snapshot")
|
||||||
|
|
||||||
|
report = await ks.trigger(
|
||||||
|
reason="test",
|
||||||
|
refresh_order_state=_refresh_fail,
|
||||||
|
reduce_risk=_reduce,
|
||||||
|
snapshot_state=_snapshot,
|
||||||
|
refresh_retry_attempts=2,
|
||||||
|
refresh_retry_base_delay_sec=0.0,
|
||||||
|
)
|
||||||
|
assert any(
|
||||||
|
err.startswith("refresh_order_state: failed after 2 attempts")
|
||||||
|
for err in report.errors
|
||||||
|
)
|
||||||
|
assert calls == ["reduce", "snapshot"]
|
||||||
|
|||||||
@@ -7154,3 +7154,27 @@ async def test_trigger_emergency_kill_switch_records_cancel_failure() -> None:
|
|||||||
)
|
)
|
||||||
|
|
||||||
assert any(err.startswith("cancel_pending_orders:") for err in report.errors)
|
assert any(err.startswith("cancel_pending_orders:") for err in report.errors)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_refresh_order_state_failure_summary_includes_more_count() -> None:
|
||||||
|
broker = MagicMock()
|
||||||
|
broker.get_balance = AsyncMock(side_effect=RuntimeError("domestic down"))
|
||||||
|
overseas_broker = MagicMock()
|
||||||
|
overseas_broker.get_overseas_balance = AsyncMock(side_effect=RuntimeError("overseas down"))
|
||||||
|
|
||||||
|
markets = []
|
||||||
|
for code, exchange in [("KR", "KRX"), ("US_PRE", "NASD"), ("US_DAY", "NYSE"), ("JP", "TKSE")]:
|
||||||
|
market = MagicMock()
|
||||||
|
market.code = code
|
||||||
|
market.exchange_code = exchange
|
||||||
|
market.is_domestic = code == "KR"
|
||||||
|
markets.append(market)
|
||||||
|
|
||||||
|
with pytest.raises(RuntimeError, match=r"\(\+1 more\)$") as exc_info:
|
||||||
|
await main_module._refresh_order_state_for_kill_switch(
|
||||||
|
broker=broker,
|
||||||
|
overseas_broker=overseas_broker,
|
||||||
|
markets=markets,
|
||||||
|
)
|
||||||
|
assert "KR/KRX" in str(exc_info.value)
|
||||||
|
|||||||
@@ -121,3 +121,19 @@
|
|||||||
- next_ticket: #369
|
- next_ticket: #369
|
||||||
- process_gate_checked: process_ticket=#306,#308 merged_to_feature_branch=yes
|
- process_gate_checked: process_ticket=#306,#308 merged_to_feature_branch=yes
|
||||||
- risks_or_notes: v2 사양 기준으로 model_exit_signal을 직접 청산 트리거가 아닌 보조 트리거로 정합화하고 테스트/문서를 동기화한다.
|
- risks_or_notes: v2 사양 기준으로 model_exit_signal을 직접 청산 트리거가 아닌 보조 트리거로 정합화하고 테스트/문서를 동기화한다.
|
||||||
|
|
||||||
|
### 2026-03-02 | session=codex-v3-stream-next-ticket-377
|
||||||
|
- branch: feature/v3-session-policy-stream
|
||||||
|
- docs_checked: docs/workflow.md, docs/commands.md, docs/agent-constraints.md
|
||||||
|
- open_issues_reviewed: #377, #370, #371, #375, #376, #381
|
||||||
|
- next_ticket: #377
|
||||||
|
- process_gate_checked: process_ticket=#306,#308 merged_to_feature_branch=yes
|
||||||
|
- risks_or_notes: kill switch refresh 재시도 정책(횟수/간격/중단조건)을 코드/테스트/요구사항 원장/감사 문서에 동시 반영한다.
|
||||||
|
|
||||||
|
### 2026-03-02 | session=codex-issue377-start
|
||||||
|
- branch: feature/issue-377-kill-switch-refresh-retry
|
||||||
|
- docs_checked: docs/workflow.md, docs/commands.md, docs/agent-constraints.md
|
||||||
|
- open_issues_reviewed: #377
|
||||||
|
- next_ticket: #377
|
||||||
|
- process_gate_checked: process_ticket=#306,#308 merged_to_feature_branch=yes
|
||||||
|
- risks_or_notes: refresh 단계를 최대 3회(초기+재시도2), 실패 시 지수 백오프로 재시도하고 성공 시 즉시 중단, 소진 시 오류를 기록한 뒤 다음 단계를 계속 수행한다.
|
||||||
|
|||||||
Reference in New Issue
Block a user