#!/usr/bin/env python3 """Validate PR body formatting to prevent escaped-newline artifacts.""" from __future__ import annotations import argparse import json import os import shutil import re import subprocess import sys from pathlib import Path HEADER_PATTERN = re.compile(r"^##\s+\S+", re.MULTILINE) LIST_ITEM_PATTERN = re.compile(r"^\s*(?:-|\*|\d+\.)\s+\S+", re.MULTILINE) FENCED_CODE_PATTERN = re.compile(r"```.*?```", re.DOTALL) INLINE_CODE_PATTERN = re.compile(r"`[^`]*`") REQ_ID_PATTERN = re.compile(r"\bREQ-[A-Z0-9-]+-\d{3}\b") TASK_ID_PATTERN = re.compile(r"\bTASK-[A-Z0-9-]+-\d{3}\b") TEST_ID_PATTERN = re.compile(r"\bTEST-[A-Z0-9-]+-\d{3}\b") def _strip_code_segments(text: str) -> str: without_fences = FENCED_CODE_PATTERN.sub("", text) return INLINE_CODE_PATTERN.sub("", without_fences) def resolve_tea_binary() -> str: tea_from_path = shutil.which("tea") if tea_from_path: return tea_from_path tea_home = Path.home() / "bin" / "tea" if tea_home.exists() and tea_home.is_file() and os.access(tea_home, os.X_OK): return str(tea_home) raise RuntimeError("tea binary not found (checked PATH and ~/bin/tea)") def validate_pr_body_text(text: str, *, check_governance: bool = True) -> list[str]: errors: list[str] = [] searchable = _strip_code_segments(text) if "\\n" in searchable: errors.append("body contains escaped newline sequence (\\n)") if text.count("```") % 2 != 0: errors.append("body has unbalanced fenced code blocks (``` count is odd)") if not HEADER_PATTERN.search(text): errors.append("body is missing markdown section headers (e.g. '## Summary')") if not LIST_ITEM_PATTERN.search(text): errors.append("body is missing markdown list items") if check_governance: # Check governance IDs against code-stripped text so IDs hidden in code # blocks or inline code are not counted (prevents spoof via code fences). if not REQ_ID_PATTERN.search(searchable): errors.append("body is missing REQ-ID traceability (e.g. REQ-OPS-001)") if not TASK_ID_PATTERN.search(searchable): errors.append("body is missing TASK-ID traceability (e.g. TASK-OPS-001)") if not TEST_ID_PATTERN.search(searchable): errors.append("body is missing TEST-ID traceability (e.g. TEST-OPS-001)") return errors def fetch_pr_body(pr_number: int) -> str: tea_binary = resolve_tea_binary() try: completed = subprocess.run( [ tea_binary, "api", "-R", "origin", f"repos/{{owner}}/{{repo}}/pulls/{pr_number}", ], check=True, capture_output=True, text=True, ) except (subprocess.CalledProcessError, FileNotFoundError, PermissionError) as exc: raise RuntimeError(f"failed to fetch PR #{pr_number}: {exc}") from exc try: payload = json.loads(completed.stdout) except json.JSONDecodeError as exc: raise RuntimeError(f"failed to parse PR payload for #{pr_number}: {exc}") from exc body = payload.get("body", "") if not isinstance(body, str): raise RuntimeError(f"unexpected PR body type for #{pr_number}: {type(body).__name__}") return body def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Validate PR body markdown formatting, escaped-newline artifacts, and governance traceability." ) group = parser.add_mutually_exclusive_group(required=True) group.add_argument("--pr", type=int, help="PR number to fetch via `tea api`") group.add_argument("--body-file", type=Path, help="Path to markdown body file") parser.add_argument( "--no-governance", action="store_true", help="Skip REQ-ID/TASK-ID/TEST-ID governance traceability checks", ) return parser.parse_args() def main() -> int: args = parse_args() if args.body_file is not None: if not args.body_file.exists(): print(f"[FAIL] body file not found: {args.body_file}") return 1 body = args.body_file.read_text(encoding="utf-8") source = f"file:{args.body_file}" else: body = fetch_pr_body(args.pr) source = f"pr:{args.pr}" errors = validate_pr_body_text(body, check_governance=not args.no_governance) if errors: print("[FAIL] PR body validation failed") print(f"- source: {source}") for err in errors: print(f"- {err}") return 1 print("[OK] PR body validation passed") print(f"- source: {source}") return 0 if __name__ == "__main__": sys.exit(main())