Files
openrabbit/tools/ai-review/security/validate_workflows.py
latte f94d21580c
All checks were successful
Enterprise AI Code Review / ai-review (pull_request) Successful in 26s
security fixes
2025-12-28 19:55:05 +00:00

158 lines
4.6 KiB
Python
Executable File

#!/usr/bin/env python3
"""Pre-commit hook for validating workflow files.
Checks workflow files for security anti-patterns:
- Full webhook data in environment variables
- Missing input validation
- Unsafe shell operations
"""
import re
import sys
from pathlib import Path
import yaml
SECURITY_CHECKS = [
{
'name': 'Full webhook data in env vars',
"name": "Full webhook data in env vars",
"pattern": r"toJSON\(github\.event\)|toJSON\(gitea\.event\)",
"severity": "HIGH",
"message": "Do not pass full webhook data to environment variables. Use minimal extraction instead.",
},
{
"name": "Unvalidated repository input",
"pattern": r"\$\{\{\s*(?:github|gitea)\.repository\s*\}\}",
"severity": "MEDIUM",
"message": "Repository name should be validated before use. Add format validation.",
"exclude_if": r"grep -qE.*repository", # OK if validation present
},
{
"name": "Direct user input in shell",
"pattern": r"\$\{\{\s*(?:github|gitea)\.event\.comment\.body\s*\}\}",
"severity": "MEDIUM",
"message": "Comment body should be properly escaped. Use jq -Rs for JSON escaping.",
"exclude_if": r"jq -Rs", # OK if using jq for escaping
},
{
"name": "Inline Python without validation",
"pattern": r"python -c.*json\.loads\(os\.environ",
"severity": "HIGH",
"message": "Use utils/safe_dispatch.py instead of inline Python with env vars.",
},
]
def check_workflow_file(filepath: str) -> list[dict]:
"""Check a workflow file for security issues.
Args:
filepath: Path to workflow YAML file
Returns:
List of findings
"""
try:
with open(filepath, "r", encoding="utf-8") as f:
content = f.read()
except Exception as e:
return [{"severity": "ERROR", "message": f"Could not read file: {e}"}]
# Parse YAML to ensure it's valid
try:
yaml.safe_load(content)
except yaml.YAMLError as e:
return [{"severity": "ERROR", "message": f"Invalid YAML: {e}"}]
findings = []
for check in SECURITY_CHECKS:
pattern = check["pattern"]
# Check if pattern is found
matches = re.finditer(pattern, content, re.MULTILINE)
for match in matches:
# If there's an exclusion pattern, check if it's present
if "exclude_if" in check:
if re.search(check["exclude_if"], content):
continue # Validation present, skip this finding
# Find line number
line_num = content[: match.start()].count("\n") + 1
findings.append(
{
"name": check["name"],
"severity": check["severity"],
"message": check["message"],
"line": line_num,
"match": match.group(0)[:80], # First 80 chars
}
)
return findings
def main():
"""Run workflow validation."""
files = sys.argv[1:]
if not files:
print("No workflow files to validate")
return 0
has_high_severity = False
total_findings = 0
for filepath in files:
findings = check_workflow_file(filepath)
if not findings:
continue
total_findings += len(findings)
print(f"\n{'=' * 60}")
print(f"Workflow security issues in: {filepath}")
print("=" * 60)
for finding in findings:
severity = finding.get("severity", "UNKNOWN")
severity_symbol = {
"HIGH": "🔴",
"MEDIUM": "🟡",
"LOW": "🔵",
"ERROR": "",
}.get(severity, "")
print(f"\n{severity_symbol} [{severity}] {finding.get('name', 'Issue')}")
print(f" Line: {finding.get('line', 'N/A')}")
print(f" {finding['message']}")
if "match" in finding:
print(f" Match: {finding['match']}")
if severity == "HIGH" or severity == "ERROR":
has_high_severity = True
if total_findings > 0:
print(f"\n{'=' * 60}")
print(f"Total findings: {total_findings}")
print("=" * 60)
if has_high_severity:
print("\n❌ COMMIT BLOCKED: Critical workflow security issues found")
print("Please fix the issues above before committing.")
print("\nSee SECURITY.md for workflow security best practices.")
return 1
if total_findings > 0:
print("\n⚠️ Medium severity issues found - review recommended")
return 0
if __name__ == "__main__":