#!/usr/bin/env python3 """Pre-commit hook for validating workflow files. Checks workflow files for security anti-patterns: - Full webhook data in environment variables - Missing input validation - Unsafe shell operations """ import re import sys from pathlib import Path import yaml SECURITY_CHECKS = [ { "name": "Full webhook data in env vars", "pattern": r"toJSON\(github\.event\)|toJSON\(gitea\.event\)", "severity": "HIGH", "message": "Do not pass full webhook data to environment variables. Use minimal extraction instead.", }, { "name": "Unvalidated repository input", "pattern": r"\$\{\{\s*(?:github|gitea)\.repository\s*\}\}", "severity": "MEDIUM", "message": "Repository name should be validated before use. Add format validation.", "exclude_if": r"grep -qE.*repository", # OK if validation present }, { "name": "Direct user input in shell", "pattern": r"\$\{\{\s*(?:github|gitea)\.event\.comment\.body\s*\}\}", "severity": "MEDIUM", "message": "Comment body should be properly escaped. Use jq -Rs for JSON escaping.", "exclude_if": r"jq -Rs", # OK if using jq for escaping }, { "name": "Inline Python without validation", "pattern": r"python -c.*json\.loads\(os\.environ", "severity": "HIGH", "message": "Use utils/safe_dispatch.py instead of inline Python with env vars.", }, ] def check_workflow_file(filepath: str) -> list[dict]: """Check a workflow file for security issues. Args: filepath: Path to workflow YAML file Returns: List of findings """ try: with open(filepath, "r", encoding="utf-8") as f: content = f.read() except Exception as e: return [{"severity": "ERROR", "message": f"Could not read file: {e}"}] # Parse YAML to ensure it's valid try: yaml.safe_load(content) except yaml.YAMLError as e: return [{"severity": "ERROR", "message": f"Invalid YAML: {e}"}] findings = [] for check in SECURITY_CHECKS: pattern = check["pattern"] # Check if pattern is found matches = re.finditer(pattern, content, re.MULTILINE) for match in matches: # If there's an exclusion pattern, check if it's present if "exclude_if" in check: if re.search(check["exclude_if"], content): continue # Validation present, skip this finding # Find line number line_num = content[: match.start()].count("\n") + 1 findings.append( { "name": check["name"], "severity": check["severity"], "message": check["message"], "line": line_num, "match": match.group(0)[:80], # First 80 chars } ) return findings def main(): """Run workflow validation.""" files = sys.argv[1:] if not files: print("No workflow files to validate") return 0 has_high_severity = False total_findings = 0 for filepath in files: findings = check_workflow_file(filepath) if not findings: continue total_findings += len(findings) print(f"\n{'=' * 60}") print(f"Workflow security issues in: {filepath}") print("=" * 60) for finding in findings: severity = finding.get("severity", "UNKNOWN") severity_symbol = { "HIGH": "šŸ”“", "MEDIUM": "🟔", "LOW": "šŸ”µ", "ERROR": "āŒ", }.get(severity, "⚪") print(f"\n{severity_symbol} [{severity}] {finding.get('name', 'Issue')}") print(f" Line: {finding.get('line', 'N/A')}") print(f" {finding['message']}") if "match" in finding: print(f" Match: {finding['match']}") if severity == "HIGH" or severity == "ERROR": has_high_severity = True if total_findings > 0: print(f"\n{'=' * 60}") print(f"Total findings: {total_findings}") print("=" * 60) if has_high_severity: print("\nāŒ COMMIT BLOCKED: Critical workflow security issues found") print("Please fix the issues above before committing.") print("\nSee SECURITY.md for workflow security best practices.") return 1 if total_findings > 0: print("\nāš ļø Medium severity issues found - review recommended") return 0 if __name__ == "__main__": sys.exit(main())