analysis: apply execution-adjusted cost model in v2 backtest pipeline (#368)
This commit is contained in:
@@ -35,7 +35,7 @@ Updated: 2026-03-02
|
||||
| REQ-V2-004 | 4중 청산 로직 (Hard/BE/ATR Trailing/Model) | `src/strategy/exit_rules.py` | ⚠️ 부분 (`#369`) |
|
||||
| REQ-V2-005 | Triple Barrier 라벨링 | `src/analysis/triple_barrier.py` | ✅ 완료 |
|
||||
| REQ-V2-006 | Walk-Forward + Purge/Embargo 검증 | `src/analysis/walk_forward_split.py` | ✅ 완료 |
|
||||
| REQ-V2-007 | 비용/슬리피지/체결실패 모델 필수 | `src/analysis/backtest_cost_guard.py` | ⚠️ 부분 (`#368`) |
|
||||
| REQ-V2-007 | 비용/슬리피지/체결실패 모델 필수 | `src/analysis/backtest_cost_guard.py`, `src/analysis/backtest_pipeline.py` | ✅ 완료 |
|
||||
| REQ-V2-008 | Kill Switch 실행 순서 (Block→Cancel→Refresh→Reduce→Snapshot) | `src/core/kill_switch.py` | ⚠️ 부분 (`#377`) |
|
||||
|
||||
### 1.3 v3 구현 상태: 부분 완료 (2026-03-02 기준)
|
||||
|
||||
@@ -11,6 +11,7 @@ class BacktestCostModel:
|
||||
commission_bps: float | None = None
|
||||
slippage_bps_by_session: dict[str, float] | None = None
|
||||
failure_rate_by_session: dict[str, float] | None = None
|
||||
partial_fill_rate_by_session: dict[str, float] | None = None
|
||||
unfavorable_fill_required: bool = True
|
||||
|
||||
|
||||
@@ -31,6 +32,7 @@ def validate_backtest_cost_model(
|
||||
|
||||
slippage = model.slippage_bps_by_session or {}
|
||||
failure = model.failure_rate_by_session or {}
|
||||
partial = model.partial_fill_rate_by_session or {}
|
||||
|
||||
missing_slippage = [s for s in required_sessions if s not in slippage]
|
||||
if missing_slippage:
|
||||
@@ -43,6 +45,12 @@ def validate_backtest_cost_model(
|
||||
raise ValueError(
|
||||
f"missing failure_rate_by_session for sessions: {', '.join(missing_failure)}"
|
||||
)
|
||||
missing_partial = [s for s in required_sessions if s not in partial]
|
||||
if missing_partial:
|
||||
raise ValueError(
|
||||
"missing partial_fill_rate_by_session for sessions: "
|
||||
f"{', '.join(missing_partial)}"
|
||||
)
|
||||
|
||||
for sess, bps in slippage.items():
|
||||
if not math.isfinite(bps) or bps < 0:
|
||||
@@ -50,3 +58,6 @@ def validate_backtest_cost_model(
|
||||
for sess, rate in failure.items():
|
||||
if not math.isfinite(rate) or rate < 0 or rate > 1:
|
||||
raise ValueError(f"failure rate must be within [0,1] for session={sess}")
|
||||
for sess, rate in partial.items():
|
||||
if not math.isfinite(rate) or rate < 0 or rate > 1:
|
||||
raise ValueError(f"partial fill rate must be within [0,1] for session={sess}")
|
||||
|
||||
@@ -13,6 +13,11 @@ from statistics import mean
|
||||
from typing import Literal, cast
|
||||
|
||||
from src.analysis.backtest_cost_guard import BacktestCostModel, validate_backtest_cost_model
|
||||
from src.analysis.backtest_execution_model import (
|
||||
BacktestExecutionModel,
|
||||
ExecutionAssumptions,
|
||||
ExecutionRequest,
|
||||
)
|
||||
from src.analysis.triple_barrier import TripleBarrierSpec, label_with_triple_barrier
|
||||
from src.analysis.walk_forward_split import WalkForwardFold, generate_walk_forward_splits
|
||||
|
||||
@@ -50,6 +55,10 @@ class BacktestFoldResult:
|
||||
train_label_distribution: dict[int, int]
|
||||
test_label_distribution: dict[int, int]
|
||||
baseline_scores: list[BaselineScore]
|
||||
execution_adjusted_avg_return_bps: float
|
||||
execution_adjusted_trade_count: int
|
||||
execution_rejected_count: int
|
||||
execution_partial_count: int
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@@ -84,6 +93,14 @@ def run_v2_backtest_pipeline(
|
||||
else sorted({bar.session_id for bar in bars})
|
||||
)
|
||||
validate_backtest_cost_model(model=cost_model, required_sessions=resolved_sessions)
|
||||
execution_model = BacktestExecutionModel(
|
||||
ExecutionAssumptions(
|
||||
slippage_bps_by_session=cost_model.slippage_bps_by_session or {},
|
||||
failure_rate_by_session=cost_model.failure_rate_by_session or {},
|
||||
partial_fill_rate_by_session=cost_model.partial_fill_rate_by_session or {},
|
||||
seed=0,
|
||||
)
|
||||
)
|
||||
|
||||
highs = [float(bar.high) for bar in bars]
|
||||
lows = [float(bar.low) for bar in bars]
|
||||
@@ -131,6 +148,26 @@ def run_v2_backtest_pipeline(
|
||||
test_labels = [ordered_labels[i] for i in fold.test_indices]
|
||||
if not test_labels:
|
||||
continue
|
||||
execution_returns_bps: list[float] = []
|
||||
execution_rejected = 0
|
||||
execution_partial = 0
|
||||
for rel_idx in fold.test_indices:
|
||||
entry_bar_index = normalized_entries[rel_idx]
|
||||
bar = bars[entry_bar_index]
|
||||
trade = _simulate_execution_adjusted_return_bps(
|
||||
execution_model=execution_model,
|
||||
bar=bar,
|
||||
label=ordered_labels[rel_idx],
|
||||
side=side,
|
||||
spec=triple_barrier_spec,
|
||||
commission_bps=float(cost_model.commission_bps or 0.0),
|
||||
)
|
||||
if trade["status"] == "REJECTED":
|
||||
execution_rejected += 1
|
||||
continue
|
||||
execution_returns_bps.append(float(trade["return_bps"]))
|
||||
if trade["status"] == "PARTIAL":
|
||||
execution_partial += 1
|
||||
fold_results.append(
|
||||
BacktestFoldResult(
|
||||
fold_index=fold_idx,
|
||||
@@ -146,6 +183,12 @@ def run_v2_backtest_pipeline(
|
||||
accuracy=_score_constant(_m1_pred(train_labels), test_labels),
|
||||
),
|
||||
],
|
||||
execution_adjusted_avg_return_bps=(
|
||||
mean(execution_returns_bps) if execution_returns_bps else 0.0
|
||||
),
|
||||
execution_adjusted_trade_count=len(execution_returns_bps),
|
||||
execution_rejected_count=execution_rejected,
|
||||
execution_partial_count=execution_partial,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -198,3 +241,58 @@ def _build_run_id(*, n_entries: int, n_folds: int, sessions: Sequence[str]) -> s
|
||||
def fold_has_leakage(fold: WalkForwardFold) -> bool:
|
||||
"""Utility for tests/verification: True when train/test overlap exists."""
|
||||
return bool(set(fold.train_indices).intersection(fold.test_indices))
|
||||
|
||||
|
||||
def _simulate_execution_adjusted_return_bps(
|
||||
*,
|
||||
execution_model: BacktestExecutionModel,
|
||||
bar: BacktestBar,
|
||||
label: int,
|
||||
side: int,
|
||||
spec: TripleBarrierSpec,
|
||||
commission_bps: float,
|
||||
) -> dict[str, float | str]:
|
||||
qty = 100
|
||||
entry_req = ExecutionRequest(
|
||||
side="BUY" if side == 1 else "SELL",
|
||||
session_id=bar.session_id,
|
||||
qty=qty,
|
||||
reference_price=float(bar.close),
|
||||
)
|
||||
entry_fill = execution_model.simulate(entry_req)
|
||||
if entry_fill.status == "REJECTED":
|
||||
return {"status": "REJECTED", "return_bps": 0.0}
|
||||
|
||||
exit_qty = entry_fill.filled_qty
|
||||
if label == 1:
|
||||
gross_return_bps = spec.take_profit_pct * 10000.0
|
||||
elif label == -1:
|
||||
gross_return_bps = -spec.stop_loss_pct * 10000.0
|
||||
else:
|
||||
gross_return_bps = 0.0
|
||||
|
||||
if side == 1:
|
||||
exit_price = float(bar.close) * (1.0 + gross_return_bps / 10000.0)
|
||||
else:
|
||||
exit_price = float(bar.close) * (1.0 - gross_return_bps / 10000.0)
|
||||
|
||||
exit_req = ExecutionRequest(
|
||||
side="SELL" if side == 1 else "BUY",
|
||||
session_id=bar.session_id,
|
||||
qty=exit_qty,
|
||||
reference_price=max(0.01, exit_price),
|
||||
)
|
||||
exit_fill = execution_model.simulate(exit_req)
|
||||
if exit_fill.status == "REJECTED":
|
||||
return {"status": "REJECTED", "return_bps": 0.0}
|
||||
|
||||
fill_ratio = min(entry_fill.filled_qty, exit_fill.filled_qty) / qty
|
||||
cost_bps = (
|
||||
float(entry_fill.slippage_bps)
|
||||
+ float(exit_fill.slippage_bps)
|
||||
+ (2.0 * float(commission_bps))
|
||||
)
|
||||
net_return_bps = (gross_return_bps * fill_ratio) - cost_bps
|
||||
is_partial = entry_fill.status == "PARTIAL" or exit_fill.status == "PARTIAL"
|
||||
status = "PARTIAL" if is_partial else "FILLED"
|
||||
return {"status": status, "return_bps": net_return_bps}
|
||||
|
||||
@@ -10,6 +10,7 @@ def test_valid_backtest_cost_model_passes() -> None:
|
||||
commission_bps=5.0,
|
||||
slippage_bps_by_session={"KRX_REG": 10.0, "US_PRE": 50.0},
|
||||
failure_rate_by_session={"KRX_REG": 0.01, "US_PRE": 0.08},
|
||||
partial_fill_rate_by_session={"KRX_REG": 0.1, "US_PRE": 0.2},
|
||||
unfavorable_fill_required=True,
|
||||
)
|
||||
validate_backtest_cost_model(model=model, required_sessions=["KRX_REG", "US_PRE"])
|
||||
@@ -20,6 +21,7 @@ def test_missing_required_slippage_session_raises() -> None:
|
||||
commission_bps=5.0,
|
||||
slippage_bps_by_session={"KRX_REG": 10.0},
|
||||
failure_rate_by_session={"KRX_REG": 0.01, "US_PRE": 0.08},
|
||||
partial_fill_rate_by_session={"KRX_REG": 0.1, "US_PRE": 0.2},
|
||||
unfavorable_fill_required=True,
|
||||
)
|
||||
with pytest.raises(ValueError, match="missing slippage_bps_by_session.*US_PRE"):
|
||||
@@ -31,6 +33,7 @@ def test_missing_required_failure_rate_session_raises() -> None:
|
||||
commission_bps=5.0,
|
||||
slippage_bps_by_session={"KRX_REG": 10.0, "US_PRE": 50.0},
|
||||
failure_rate_by_session={"KRX_REG": 0.01},
|
||||
partial_fill_rate_by_session={"KRX_REG": 0.1, "US_PRE": 0.2},
|
||||
unfavorable_fill_required=True,
|
||||
)
|
||||
with pytest.raises(ValueError, match="missing failure_rate_by_session.*US_PRE"):
|
||||
@@ -42,6 +45,7 @@ def test_invalid_failure_rate_range_raises() -> None:
|
||||
commission_bps=5.0,
|
||||
slippage_bps_by_session={"KRX_REG": 10.0},
|
||||
failure_rate_by_session={"KRX_REG": 1.2},
|
||||
partial_fill_rate_by_session={"KRX_REG": 0.1},
|
||||
unfavorable_fill_required=True,
|
||||
)
|
||||
with pytest.raises(ValueError, match="failure rate must be within"):
|
||||
@@ -53,6 +57,7 @@ def test_unfavorable_fill_requirement_cannot_be_disabled() -> None:
|
||||
commission_bps=5.0,
|
||||
slippage_bps_by_session={"KRX_REG": 10.0},
|
||||
failure_rate_by_session={"KRX_REG": 0.02},
|
||||
partial_fill_rate_by_session={"KRX_REG": 0.1},
|
||||
unfavorable_fill_required=False,
|
||||
)
|
||||
with pytest.raises(ValueError, match="unfavorable_fill_required must be True"):
|
||||
@@ -65,6 +70,7 @@ def test_non_finite_commission_rejected(bad_commission: float) -> None:
|
||||
commission_bps=bad_commission,
|
||||
slippage_bps_by_session={"KRX_REG": 10.0},
|
||||
failure_rate_by_session={"KRX_REG": 0.02},
|
||||
partial_fill_rate_by_session={"KRX_REG": 0.1},
|
||||
unfavorable_fill_required=True,
|
||||
)
|
||||
with pytest.raises(ValueError, match="commission_bps"):
|
||||
@@ -77,7 +83,33 @@ def test_non_finite_slippage_rejected(bad_slippage: float) -> None:
|
||||
commission_bps=5.0,
|
||||
slippage_bps_by_session={"KRX_REG": bad_slippage},
|
||||
failure_rate_by_session={"KRX_REG": 0.02},
|
||||
partial_fill_rate_by_session={"KRX_REG": 0.1},
|
||||
unfavorable_fill_required=True,
|
||||
)
|
||||
with pytest.raises(ValueError, match="slippage bps"):
|
||||
validate_backtest_cost_model(model=model, required_sessions=["KRX_REG"])
|
||||
|
||||
|
||||
def test_missing_required_partial_fill_session_raises() -> None:
|
||||
model = BacktestCostModel(
|
||||
commission_bps=5.0,
|
||||
slippage_bps_by_session={"KRX_REG": 10.0, "US_PRE": 50.0},
|
||||
failure_rate_by_session={"KRX_REG": 0.01, "US_PRE": 0.08},
|
||||
partial_fill_rate_by_session={"KRX_REG": 0.1},
|
||||
unfavorable_fill_required=True,
|
||||
)
|
||||
with pytest.raises(ValueError, match="missing partial_fill_rate_by_session.*US_PRE"):
|
||||
validate_backtest_cost_model(model=model, required_sessions=["KRX_REG", "US_PRE"])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bad_rate", [-0.1, 1.1, float("nan")])
|
||||
def test_invalid_partial_fill_rate_range_raises(bad_rate: float) -> None:
|
||||
model = BacktestCostModel(
|
||||
commission_bps=5.0,
|
||||
slippage_bps_by_session={"KRX_REG": 10.0},
|
||||
failure_rate_by_session={"KRX_REG": 0.02},
|
||||
partial_fill_rate_by_session={"KRX_REG": bad_rate},
|
||||
unfavorable_fill_required=True,
|
||||
)
|
||||
with pytest.raises(ValueError, match="partial fill rate must be within"):
|
||||
validate_backtest_cost_model(model=model, required_sessions=["KRX_REG"])
|
||||
|
||||
@@ -35,6 +35,7 @@ def _cost_model() -> BacktestCostModel:
|
||||
commission_bps=3.0,
|
||||
slippage_bps_by_session={"KRX_REG": 10.0, "US_PRE": 50.0},
|
||||
failure_rate_by_session={"KRX_REG": 0.01, "US_PRE": 0.08},
|
||||
partial_fill_rate_by_session={"KRX_REG": 0.1, "US_PRE": 0.2},
|
||||
unfavorable_fill_required=True,
|
||||
)
|
||||
|
||||
@@ -71,6 +72,9 @@ def test_pipeline_happy_path_returns_fold_and_artifact_contract() -> None:
|
||||
assert names == {"B0", "B1", "M1"}
|
||||
for score in fold.baseline_scores:
|
||||
assert 0.0 <= score.accuracy <= 1.0
|
||||
assert fold.execution_adjusted_trade_count >= 0
|
||||
assert fold.execution_rejected_count >= 0
|
||||
assert fold.execution_partial_count >= 0
|
||||
|
||||
|
||||
def test_pipeline_cost_guard_fail_fast() -> None:
|
||||
@@ -78,6 +82,7 @@ def test_pipeline_cost_guard_fail_fast() -> None:
|
||||
commission_bps=3.0,
|
||||
slippage_bps_by_session={"KRX_REG": 10.0},
|
||||
failure_rate_by_session={"KRX_REG": 0.01},
|
||||
partial_fill_rate_by_session={"KRX_REG": 0.1},
|
||||
unfavorable_fill_required=True,
|
||||
)
|
||||
try:
|
||||
@@ -166,3 +171,56 @@ def test_pipeline_rejects_minutes_spec_when_timestamp_missing() -> None:
|
||||
assert "BacktestBar.timestamp is required" in str(exc)
|
||||
else:
|
||||
raise AssertionError("expected timestamp validation error")
|
||||
|
||||
|
||||
def test_pipeline_execution_adjusted_returns_reflect_cost_and_fill_assumptions() -> None:
|
||||
base_cfg = dict(
|
||||
bars=_bars(),
|
||||
entry_indices=[0, 1, 2, 3, 4, 5, 6, 7],
|
||||
side=1,
|
||||
triple_barrier_spec=TripleBarrierSpec(
|
||||
take_profit_pct=0.02,
|
||||
stop_loss_pct=0.01,
|
||||
max_holding_minutes=3,
|
||||
),
|
||||
walk_forward=WalkForwardConfig(
|
||||
train_size=4,
|
||||
test_size=2,
|
||||
step_size=2,
|
||||
purge_size=1,
|
||||
embargo_size=1,
|
||||
min_train_size=3,
|
||||
),
|
||||
)
|
||||
|
||||
optimistic = BacktestCostModel(
|
||||
commission_bps=0.0,
|
||||
slippage_bps_by_session={"KRX_REG": 0.0, "US_PRE": 0.0},
|
||||
failure_rate_by_session={"KRX_REG": 0.0, "US_PRE": 0.0},
|
||||
partial_fill_rate_by_session={"KRX_REG": 0.0, "US_PRE": 0.0},
|
||||
unfavorable_fill_required=True,
|
||||
)
|
||||
conservative = BacktestCostModel(
|
||||
commission_bps=10.0,
|
||||
slippage_bps_by_session={"KRX_REG": 20.0, "US_PRE": 60.0},
|
||||
failure_rate_by_session={"KRX_REG": 0.2, "US_PRE": 0.4},
|
||||
partial_fill_rate_by_session={"KRX_REG": 0.5, "US_PRE": 0.7},
|
||||
unfavorable_fill_required=True,
|
||||
)
|
||||
|
||||
opt_out = run_v2_backtest_pipeline(cost_model=optimistic, **base_cfg)
|
||||
cons_out = run_v2_backtest_pipeline(cost_model=conservative, **base_cfg)
|
||||
|
||||
opt_avg = sum(
|
||||
f.execution_adjusted_avg_return_bps for f in opt_out.folds
|
||||
) / len(opt_out.folds)
|
||||
cons_avg = sum(
|
||||
f.execution_adjusted_avg_return_bps for f in cons_out.folds
|
||||
) / len(cons_out.folds)
|
||||
assert cons_avg < opt_avg
|
||||
|
||||
opt_trades = sum(f.execution_adjusted_trade_count for f in opt_out.folds)
|
||||
cons_trades = sum(f.execution_adjusted_trade_count for f in cons_out.folds)
|
||||
cons_rejected = sum(f.execution_rejected_count for f in cons_out.folds)
|
||||
assert cons_trades <= opt_trades
|
||||
assert cons_rejected >= 0
|
||||
|
||||
@@ -89,3 +89,19 @@
|
||||
- next_ticket: #316
|
||||
- process_gate_checked: process_ticket=#306,#308 merged_to_feature_branch=yes
|
||||
- risks_or_notes: 모니터 판정을 liveness 중심에서 policy invariant(FORBIDDEN) 중심으로 전환
|
||||
|
||||
### 2026-03-01 | session=codex-v3-stream-next-ticket
|
||||
- branch: feature/v3-session-policy-stream
|
||||
- docs_checked: docs/workflow.md, docs/commands.md, docs/agent-constraints.md
|
||||
- open_issues_reviewed: #368, #369, #370, #371, #374, #375, #376, #377, #381
|
||||
- next_ticket: #368
|
||||
- process_gate_checked: process_ticket=#306,#308 merged_to_feature_branch=yes
|
||||
- risks_or_notes: 비블로킹 소견은 합당성(정확성/안정성/유지보수성) 기준으로 반영하고, 미반영 시 근거를 코멘트로 남긴다.
|
||||
|
||||
### 2026-03-01 | session=codex-issue368-start
|
||||
- branch: feature/issue-368-backtest-cost-execution
|
||||
- docs_checked: docs/workflow.md, docs/commands.md, docs/agent-constraints.md
|
||||
- open_issues_reviewed: #368
|
||||
- next_ticket: #368
|
||||
- process_gate_checked: process_ticket=#306,#308 merged_to_feature_branch=yes
|
||||
- risks_or_notes: TASK-V2-012 구현 갭 보완을 위해 cost guard + execution-adjusted fold metric + 회귀 테스트를 함께 반영한다.
|
||||
|
||||
Reference in New Issue
Block a user