diff --git a/docs/ouroboros/80_implementation_audit.md b/docs/ouroboros/80_implementation_audit.md index 583d6eb..0624f14 100644 --- a/docs/ouroboros/80_implementation_audit.md +++ b/docs/ouroboros/80_implementation_audit.md @@ -35,7 +35,7 @@ Updated: 2026-03-02 | REQ-V2-004 | 4중 청산 로직 (Hard/BE/ATR Trailing/Model) | `src/strategy/exit_rules.py` | ⚠️ 부분 (`#369`) | | REQ-V2-005 | Triple Barrier 라벨링 | `src/analysis/triple_barrier.py` | ✅ 완료 | | REQ-V2-006 | Walk-Forward + Purge/Embargo 검증 | `src/analysis/walk_forward_split.py` | ✅ 완료 | -| REQ-V2-007 | 비용/슬리피지/체결실패 모델 필수 | `src/analysis/backtest_cost_guard.py` | ⚠️ 부분 (`#368`) | +| REQ-V2-007 | 비용/슬리피지/체결실패 모델 필수 | `src/analysis/backtest_cost_guard.py`, `src/analysis/backtest_pipeline.py` | ✅ 완료 | | REQ-V2-008 | Kill Switch 실행 순서 (Block→Cancel→Refresh→Reduce→Snapshot) | `src/core/kill_switch.py` | ⚠️ 부분 (`#377`) | ### 1.3 v3 구현 상태: 부분 완료 (2026-03-02 기준) diff --git a/src/analysis/backtest_cost_guard.py b/src/analysis/backtest_cost_guard.py index 97e1cd3..c7248c8 100644 --- a/src/analysis/backtest_cost_guard.py +++ b/src/analysis/backtest_cost_guard.py @@ -11,6 +11,7 @@ class BacktestCostModel: commission_bps: float | None = None slippage_bps_by_session: dict[str, float] | None = None failure_rate_by_session: dict[str, float] | None = None + partial_fill_rate_by_session: dict[str, float] | None = None unfavorable_fill_required: bool = True @@ -31,6 +32,7 @@ def validate_backtest_cost_model( slippage = model.slippage_bps_by_session or {} failure = model.failure_rate_by_session or {} + partial = model.partial_fill_rate_by_session or {} missing_slippage = [s for s in required_sessions if s not in slippage] if missing_slippage: @@ -43,6 +45,12 @@ def validate_backtest_cost_model( raise ValueError( f"missing failure_rate_by_session for sessions: {', '.join(missing_failure)}" ) + missing_partial = [s for s in required_sessions if s not in partial] + if missing_partial: + raise ValueError( + "missing partial_fill_rate_by_session for sessions: " + f"{', '.join(missing_partial)}" + ) for sess, bps in slippage.items(): if not math.isfinite(bps) or bps < 0: @@ -50,3 +58,6 @@ def validate_backtest_cost_model( for sess, rate in failure.items(): if not math.isfinite(rate) or rate < 0 or rate > 1: raise ValueError(f"failure rate must be within [0,1] for session={sess}") + for sess, rate in partial.items(): + if not math.isfinite(rate) or rate < 0 or rate > 1: + raise ValueError(f"partial fill rate must be within [0,1] for session={sess}") diff --git a/src/analysis/backtest_pipeline.py b/src/analysis/backtest_pipeline.py index 985e0e0..16a7734 100644 --- a/src/analysis/backtest_pipeline.py +++ b/src/analysis/backtest_pipeline.py @@ -13,6 +13,11 @@ from statistics import mean from typing import Literal, cast from src.analysis.backtest_cost_guard import BacktestCostModel, validate_backtest_cost_model +from src.analysis.backtest_execution_model import ( + BacktestExecutionModel, + ExecutionAssumptions, + ExecutionRequest, +) from src.analysis.triple_barrier import TripleBarrierSpec, label_with_triple_barrier from src.analysis.walk_forward_split import WalkForwardFold, generate_walk_forward_splits @@ -50,6 +55,10 @@ class BacktestFoldResult: train_label_distribution: dict[int, int] test_label_distribution: dict[int, int] baseline_scores: list[BaselineScore] + execution_adjusted_avg_return_bps: float + execution_adjusted_trade_count: int + execution_rejected_count: int + execution_partial_count: int @dataclass(frozen=True) @@ -84,6 +93,14 @@ def run_v2_backtest_pipeline( else sorted({bar.session_id for bar in bars}) ) validate_backtest_cost_model(model=cost_model, required_sessions=resolved_sessions) + execution_model = BacktestExecutionModel( + ExecutionAssumptions( + slippage_bps_by_session=cost_model.slippage_bps_by_session or {}, + failure_rate_by_session=cost_model.failure_rate_by_session or {}, + partial_fill_rate_by_session=cost_model.partial_fill_rate_by_session or {}, + seed=0, + ) + ) highs = [float(bar.high) for bar in bars] lows = [float(bar.low) for bar in bars] @@ -131,6 +148,26 @@ def run_v2_backtest_pipeline( test_labels = [ordered_labels[i] for i in fold.test_indices] if not test_labels: continue + execution_returns_bps: list[float] = [] + execution_rejected = 0 + execution_partial = 0 + for rel_idx in fold.test_indices: + entry_bar_index = normalized_entries[rel_idx] + bar = bars[entry_bar_index] + trade = _simulate_execution_adjusted_return_bps( + execution_model=execution_model, + bar=bar, + label=ordered_labels[rel_idx], + side=side, + spec=triple_barrier_spec, + commission_bps=float(cost_model.commission_bps or 0.0), + ) + if trade["status"] == "REJECTED": + execution_rejected += 1 + continue + execution_returns_bps.append(float(trade["return_bps"])) + if trade["status"] == "PARTIAL": + execution_partial += 1 fold_results.append( BacktestFoldResult( fold_index=fold_idx, @@ -146,6 +183,12 @@ def run_v2_backtest_pipeline( accuracy=_score_constant(_m1_pred(train_labels), test_labels), ), ], + execution_adjusted_avg_return_bps=( + mean(execution_returns_bps) if execution_returns_bps else 0.0 + ), + execution_adjusted_trade_count=len(execution_returns_bps), + execution_rejected_count=execution_rejected, + execution_partial_count=execution_partial, ) ) @@ -198,3 +241,58 @@ def _build_run_id(*, n_entries: int, n_folds: int, sessions: Sequence[str]) -> s def fold_has_leakage(fold: WalkForwardFold) -> bool: """Utility for tests/verification: True when train/test overlap exists.""" return bool(set(fold.train_indices).intersection(fold.test_indices)) + + +def _simulate_execution_adjusted_return_bps( + *, + execution_model: BacktestExecutionModel, + bar: BacktestBar, + label: int, + side: int, + spec: TripleBarrierSpec, + commission_bps: float, +) -> dict[str, float | str]: + qty = 100 + entry_req = ExecutionRequest( + side="BUY" if side == 1 else "SELL", + session_id=bar.session_id, + qty=qty, + reference_price=float(bar.close), + ) + entry_fill = execution_model.simulate(entry_req) + if entry_fill.status == "REJECTED": + return {"status": "REJECTED", "return_bps": 0.0} + + exit_qty = entry_fill.filled_qty + if label == 1: + gross_return_bps = spec.take_profit_pct * 10000.0 + elif label == -1: + gross_return_bps = -spec.stop_loss_pct * 10000.0 + else: + gross_return_bps = 0.0 + + if side == 1: + exit_price = float(bar.close) * (1.0 + gross_return_bps / 10000.0) + else: + exit_price = float(bar.close) * (1.0 - gross_return_bps / 10000.0) + + exit_req = ExecutionRequest( + side="SELL" if side == 1 else "BUY", + session_id=bar.session_id, + qty=exit_qty, + reference_price=max(0.01, exit_price), + ) + exit_fill = execution_model.simulate(exit_req) + if exit_fill.status == "REJECTED": + return {"status": "REJECTED", "return_bps": 0.0} + + fill_ratio = min(entry_fill.filled_qty, exit_fill.filled_qty) / qty + cost_bps = ( + float(entry_fill.slippage_bps) + + float(exit_fill.slippage_bps) + + (2.0 * float(commission_bps)) + ) + net_return_bps = (gross_return_bps * fill_ratio) - cost_bps + is_partial = entry_fill.status == "PARTIAL" or exit_fill.status == "PARTIAL" + status = "PARTIAL" if is_partial else "FILLED" + return {"status": status, "return_bps": net_return_bps} diff --git a/tests/test_backtest_cost_guard.py b/tests/test_backtest_cost_guard.py index 6c73a30..d1406bc 100644 --- a/tests/test_backtest_cost_guard.py +++ b/tests/test_backtest_cost_guard.py @@ -10,6 +10,7 @@ def test_valid_backtest_cost_model_passes() -> None: commission_bps=5.0, slippage_bps_by_session={"KRX_REG": 10.0, "US_PRE": 50.0}, failure_rate_by_session={"KRX_REG": 0.01, "US_PRE": 0.08}, + partial_fill_rate_by_session={"KRX_REG": 0.1, "US_PRE": 0.2}, unfavorable_fill_required=True, ) validate_backtest_cost_model(model=model, required_sessions=["KRX_REG", "US_PRE"]) @@ -20,6 +21,7 @@ def test_missing_required_slippage_session_raises() -> None: commission_bps=5.0, slippage_bps_by_session={"KRX_REG": 10.0}, failure_rate_by_session={"KRX_REG": 0.01, "US_PRE": 0.08}, + partial_fill_rate_by_session={"KRX_REG": 0.1, "US_PRE": 0.2}, unfavorable_fill_required=True, ) with pytest.raises(ValueError, match="missing slippage_bps_by_session.*US_PRE"): @@ -31,6 +33,7 @@ def test_missing_required_failure_rate_session_raises() -> None: commission_bps=5.0, slippage_bps_by_session={"KRX_REG": 10.0, "US_PRE": 50.0}, failure_rate_by_session={"KRX_REG": 0.01}, + partial_fill_rate_by_session={"KRX_REG": 0.1, "US_PRE": 0.2}, unfavorable_fill_required=True, ) with pytest.raises(ValueError, match="missing failure_rate_by_session.*US_PRE"): @@ -42,6 +45,7 @@ def test_invalid_failure_rate_range_raises() -> None: commission_bps=5.0, slippage_bps_by_session={"KRX_REG": 10.0}, failure_rate_by_session={"KRX_REG": 1.2}, + partial_fill_rate_by_session={"KRX_REG": 0.1}, unfavorable_fill_required=True, ) with pytest.raises(ValueError, match="failure rate must be within"): @@ -53,6 +57,7 @@ def test_unfavorable_fill_requirement_cannot_be_disabled() -> None: commission_bps=5.0, slippage_bps_by_session={"KRX_REG": 10.0}, failure_rate_by_session={"KRX_REG": 0.02}, + partial_fill_rate_by_session={"KRX_REG": 0.1}, unfavorable_fill_required=False, ) with pytest.raises(ValueError, match="unfavorable_fill_required must be True"): @@ -65,6 +70,7 @@ def test_non_finite_commission_rejected(bad_commission: float) -> None: commission_bps=bad_commission, slippage_bps_by_session={"KRX_REG": 10.0}, failure_rate_by_session={"KRX_REG": 0.02}, + partial_fill_rate_by_session={"KRX_REG": 0.1}, unfavorable_fill_required=True, ) with pytest.raises(ValueError, match="commission_bps"): @@ -77,7 +83,33 @@ def test_non_finite_slippage_rejected(bad_slippage: float) -> None: commission_bps=5.0, slippage_bps_by_session={"KRX_REG": bad_slippage}, failure_rate_by_session={"KRX_REG": 0.02}, + partial_fill_rate_by_session={"KRX_REG": 0.1}, unfavorable_fill_required=True, ) with pytest.raises(ValueError, match="slippage bps"): validate_backtest_cost_model(model=model, required_sessions=["KRX_REG"]) + + +def test_missing_required_partial_fill_session_raises() -> None: + model = BacktestCostModel( + commission_bps=5.0, + slippage_bps_by_session={"KRX_REG": 10.0, "US_PRE": 50.0}, + failure_rate_by_session={"KRX_REG": 0.01, "US_PRE": 0.08}, + partial_fill_rate_by_session={"KRX_REG": 0.1}, + unfavorable_fill_required=True, + ) + with pytest.raises(ValueError, match="missing partial_fill_rate_by_session.*US_PRE"): + validate_backtest_cost_model(model=model, required_sessions=["KRX_REG", "US_PRE"]) + + +@pytest.mark.parametrize("bad_rate", [-0.1, 1.1, float("nan")]) +def test_invalid_partial_fill_rate_range_raises(bad_rate: float) -> None: + model = BacktestCostModel( + commission_bps=5.0, + slippage_bps_by_session={"KRX_REG": 10.0}, + failure_rate_by_session={"KRX_REG": 0.02}, + partial_fill_rate_by_session={"KRX_REG": bad_rate}, + unfavorable_fill_required=True, + ) + with pytest.raises(ValueError, match="partial fill rate must be within"): + validate_backtest_cost_model(model=model, required_sessions=["KRX_REG"]) diff --git a/tests/test_backtest_pipeline_integration.py b/tests/test_backtest_pipeline_integration.py index c0ad496..7aa2391 100644 --- a/tests/test_backtest_pipeline_integration.py +++ b/tests/test_backtest_pipeline_integration.py @@ -35,6 +35,7 @@ def _cost_model() -> BacktestCostModel: commission_bps=3.0, slippage_bps_by_session={"KRX_REG": 10.0, "US_PRE": 50.0}, failure_rate_by_session={"KRX_REG": 0.01, "US_PRE": 0.08}, + partial_fill_rate_by_session={"KRX_REG": 0.1, "US_PRE": 0.2}, unfavorable_fill_required=True, ) @@ -71,6 +72,9 @@ def test_pipeline_happy_path_returns_fold_and_artifact_contract() -> None: assert names == {"B0", "B1", "M1"} for score in fold.baseline_scores: assert 0.0 <= score.accuracy <= 1.0 + assert fold.execution_adjusted_trade_count >= 0 + assert fold.execution_rejected_count >= 0 + assert fold.execution_partial_count >= 0 def test_pipeline_cost_guard_fail_fast() -> None: @@ -78,6 +82,7 @@ def test_pipeline_cost_guard_fail_fast() -> None: commission_bps=3.0, slippage_bps_by_session={"KRX_REG": 10.0}, failure_rate_by_session={"KRX_REG": 0.01}, + partial_fill_rate_by_session={"KRX_REG": 0.1}, unfavorable_fill_required=True, ) try: @@ -166,3 +171,56 @@ def test_pipeline_rejects_minutes_spec_when_timestamp_missing() -> None: assert "BacktestBar.timestamp is required" in str(exc) else: raise AssertionError("expected timestamp validation error") + + +def test_pipeline_execution_adjusted_returns_reflect_cost_and_fill_assumptions() -> None: + base_cfg = dict( + bars=_bars(), + entry_indices=[0, 1, 2, 3, 4, 5, 6, 7], + side=1, + triple_barrier_spec=TripleBarrierSpec( + take_profit_pct=0.02, + stop_loss_pct=0.01, + max_holding_minutes=3, + ), + walk_forward=WalkForwardConfig( + train_size=4, + test_size=2, + step_size=2, + purge_size=1, + embargo_size=1, + min_train_size=3, + ), + ) + + optimistic = BacktestCostModel( + commission_bps=0.0, + slippage_bps_by_session={"KRX_REG": 0.0, "US_PRE": 0.0}, + failure_rate_by_session={"KRX_REG": 0.0, "US_PRE": 0.0}, + partial_fill_rate_by_session={"KRX_REG": 0.0, "US_PRE": 0.0}, + unfavorable_fill_required=True, + ) + conservative = BacktestCostModel( + commission_bps=10.0, + slippage_bps_by_session={"KRX_REG": 20.0, "US_PRE": 60.0}, + failure_rate_by_session={"KRX_REG": 0.2, "US_PRE": 0.4}, + partial_fill_rate_by_session={"KRX_REG": 0.5, "US_PRE": 0.7}, + unfavorable_fill_required=True, + ) + + opt_out = run_v2_backtest_pipeline(cost_model=optimistic, **base_cfg) + cons_out = run_v2_backtest_pipeline(cost_model=conservative, **base_cfg) + + opt_avg = sum( + f.execution_adjusted_avg_return_bps for f in opt_out.folds + ) / len(opt_out.folds) + cons_avg = sum( + f.execution_adjusted_avg_return_bps for f in cons_out.folds + ) / len(cons_out.folds) + assert cons_avg < opt_avg + + opt_trades = sum(f.execution_adjusted_trade_count for f in opt_out.folds) + cons_trades = sum(f.execution_adjusted_trade_count for f in cons_out.folds) + cons_rejected = sum(f.execution_rejected_count for f in cons_out.folds) + assert cons_trades <= opt_trades + assert cons_rejected >= 0 diff --git a/workflow/session-handover.md b/workflow/session-handover.md index a3fd61b..bdd2d5a 100644 --- a/workflow/session-handover.md +++ b/workflow/session-handover.md @@ -89,3 +89,19 @@ - next_ticket: #316 - process_gate_checked: process_ticket=#306,#308 merged_to_feature_branch=yes - risks_or_notes: 모니터 판정을 liveness 중심에서 policy invariant(FORBIDDEN) 중심으로 전환 + +### 2026-03-01 | session=codex-v3-stream-next-ticket +- branch: feature/v3-session-policy-stream +- docs_checked: docs/workflow.md, docs/commands.md, docs/agent-constraints.md +- open_issues_reviewed: #368, #369, #370, #371, #374, #375, #376, #377, #381 +- next_ticket: #368 +- process_gate_checked: process_ticket=#306,#308 merged_to_feature_branch=yes +- risks_or_notes: 비블로킹 소견은 합당성(정확성/안정성/유지보수성) 기준으로 반영하고, 미반영 시 근거를 코멘트로 남긴다. + +### 2026-03-01 | session=codex-issue368-start +- branch: feature/issue-368-backtest-cost-execution +- docs_checked: docs/workflow.md, docs/commands.md, docs/agent-constraints.md +- open_issues_reviewed: #368 +- next_ticket: #368 +- process_gate_checked: process_ticket=#306,#308 merged_to_feature_branch=yes +- risks_or_notes: TASK-V2-012 구현 갭 보완을 위해 cost guard + execution-adjusted fold metric + 회귀 테스트를 함께 반영한다.