security fixes

2025-12-28 19:55:05 +00:00
parent 4a3ddec68c
commit f94d21580c
15 changed files with 2549 additions and 46 deletions
@@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+"""Pre-commit hook for detecting hardcoded secrets.
+
+Checks files for common secret patterns:
+- API keys
+- AWS credentials
+- Private keys
+- Passwords
+- Tokens
+"""
+
+import re
+import sys
+from pathlib import Path
+
+SECRET_PATTERNS = [
+    {
+        'name': 'OpenAI API Key',
+        "name": "OpenAI API Key",
+        "pattern": r"sk-[a-zA-Z0-9]{32,}",
+        "severity": "HIGH",
+    },
+    {
+        "name": "AWS Access Key",
+        "pattern": r"AKIA[0-9A-Z]{16}",
+        "severity": "HIGH",
+    },
+    {
+        "name": "Private Key",
+        "pattern": r"-----BEGIN[A-Z ]+PRIVATE KEY-----",
+        "severity": "HIGH",
+    },
+    {
+        "name": "Generic API Key",
+        "pattern": r'(?i)(api[_-]?key|apikey)\s*[:=]\s*["\']([a-zA-Z0-9_\-]{20,})["\']',
+        "severity": "HIGH",
+    },
+    {
+        "name": "Password in Code",
+        "pattern": r'(?i)password\s*[:=]\s*["\'](?!.*\{.*\})([^"\']{8,})["\']',
+        "severity": "HIGH",
+    },
+    {
+        "name": "Bearer Token",
+        "pattern": r"bearer\s+[a-zA-Z0-9_\-\.]{20,}",
+        "severity": "HIGH",
+    },
+    {
+        "name": "GitHub Token",
+        "pattern": r"gh[pousr]_[a-zA-Z0-9]{36,}",
+        "severity": "HIGH",
+    },
+    {
+        "name": "Slack Token",
+        "pattern": r"xox[baprs]-[a-zA-Z0-9-]{10,}",
+        "severity": "HIGH",
+    },
+]
+
+
+# Patterns to exclude (common false positives)
+EXCLUDE_PATTERNS = [
+    r"example\.com",
+    r"your[_-]?api[_-]?key",
+    r"your[_-]?password",
+    r"<API[_-]?KEY>",
+    r"\[API[_-]?KEY\]",
+    r"\$\{",  # Environment variable substitution
+    r"os\.environ",  # Reading from env vars
+    r"secrets\.",  # GitHub secrets
+    r"getenv",  # Reading from env
+]
+
+
+def is_false_positive(line: str) -> bool:
+    """Check if a line is likely a false positive."""
+    for pattern in EXCLUDE_PATTERNS:
+        if re.search(pattern, line, re.IGNORECASE):
+            return True
+    return False
+
+
+def check_file_for_secrets(filepath: str) -> list[dict]:
+    """Check a file for hardcoded secrets.
+
+    Args:
+        filepath: Path to file to check
+
+    Returns:
+        List of findings
+    """
+    try:
+        with open(filepath, "r", encoding="utf-8") as f:
+            content = f.read()
+    except Exception:
+        return []  # Skip files we can't read
+
+    findings = []
+    lines = content.split("\n")
+
+    for i, line in enumerate(lines, start=1):
+        # Skip comments in common languages
+        stripped = line.strip()
+        if any(stripped.startswith(c) for c in ["#", "//", "/*", "*", "--"]):
+            continue
+
+        # Skip if line is a false positive
+        if is_false_positive(line):
+            continue
+
+        for pattern_info in SECRET_PATTERNS:
+            matches = re.finditer(pattern_info["pattern"], line)
+
+            for match in matches:
+                findings.append(
+                    {
+                        "name": pattern_info["name"],
+                        "severity": pattern_info["severity"],
+                        "line": i,
+                        "match": match.group(0)[:50] + "..."
+                        if len(match.group(0)) > 50
+                        else match.group(0),
+                    }
+                )
+
+    return findings
+
+
+def main():
+    """Run secret detection."""
+    files = sys.argv[1:]
+
+    if not files:
+        return 0
+
+    has_secrets = False
+    total_findings = 0
+
+    for filepath in files:
+        findings = check_file_for_secrets(filepath)
+
+        if not findings:
+            continue
+
+        total_findings += len(findings)
+        has_secrets = True
+
+        print(f"\n{'=' * 60}")
+        print(f"🔐 Potential secrets detected in: {filepath}")
+        print("=" * 60)
+
+        for finding in findings:
+            print(f"\n🔴 [{finding['severity']}] {finding['name']}")
+            print(f"   Line: {finding['line']}")
+            print(f"   Match: {finding['match']}")
+
+    if has_secrets:
+        print(f"\n{'=' * 60}")
+        print(f"Total potential secrets: {total_findings}")
+        print("=" * 60)
+        print("\n❌ COMMIT BLOCKED: Potential hardcoded secrets detected")
+        print("\nIf these are false positives:")
+        print("  1. Use environment variables: os.environ.get('API_KEY')")
+        print("  2. Use a secrets manager")
+        print("  3. Add to .gitignore if it's a config file")
+        print("\nTo bypass (not recommended): git commit --no-verify")
+        return 1
+
+    return 0
+
+
+if __name__ == "__main__":
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+"""Pre-commit hook for security scanning.
+
+Scans staged files for security vulnerabilities before commit.
+Fails if HIGH severity issues are found.
+"""
+
+import sys
+from pathlib import Path
+
+from security_scanner import SecurityScanner
+
+
+def main():
+    """Run security scan on staged files."""
+    scanner = SecurityScanner()
+
+    # Get files from command line (pre-commit passes them)
+    files = sys.argv[1:]
+
+    if not files:
+        print("No files to scan")
+        return 0
+
+    has_high_severity = False
+    total_findings = 0
+
+    for filepath in files:
+        try:
+            with open(filepath, "r", encoding="utf-8") as f:
+                content = f.read()
+        except Exception as e:
+            print(f"Warning: Could not read {filepath}: {e}")
+            continue
+
+        findings = list(scanner.scan_content(content, filepath))
+
+        if not findings:
+            continue
+
+        total_findings += len(findings)
+
+        # Print findings
+        print(f"\n{'=' * 60}")
+        print(f"Security findings in: {filepath}")
+        print("=" * 60)
+
+        for finding in findings:
+            severity_symbol = {
+                "HIGH": "🔴",
+                "MEDIUM": "🟡",
+                "LOW": "🔵",
+            }.get(finding.severity, "⚪")
+
+            print(f"\n{severity_symbol} [{finding.severity}] {finding.name}")
+            print(f"   Category: {finding.category}")
+            print(f"   CWE: {finding.cwe}")
+            print(f"   Line: {finding.line}")
+            print(f"   Description: {finding.description}")
+            print(f"   Recommendation: {finding.recommendation}")
+
+            if finding.severity == "HIGH":
+                has_high_severity = True
+
+    if total_findings > 0:
+        print(f"\n{'=' * 60}")
+        print(f"Total findings: {total_findings}")
+        print("=" * 60)
+
+    if has_high_severity:
+        print("\n❌ COMMIT BLOCKED: HIGH severity security issues found")
+        print("Please fix the issues above before committing.")
+        print("\nTo bypass (not recommended): git commit --no-verify")
+        return 1
+
+    if total_findings > 0:
+        print("\n⚠️  Medium/Low severity issues found - review recommended")
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -0,0 +1,157 @@
+#!/usr/bin/env python3
+"""Pre-commit hook for validating workflow files.
+
+Checks workflow files for security anti-patterns:
+- Full webhook data in environment variables
+- Missing input validation
+- Unsafe shell operations
+"""
+
+import re
+import sys
+from pathlib import Path
+
+import yaml
+
+SECURITY_CHECKS = [
+    {
+        'name': 'Full webhook data in env vars',
+        "name": "Full webhook data in env vars",
+        "pattern": r"toJSON\(github\.event\)|toJSON\(gitea\.event\)",
+        "severity": "HIGH",
+        "message": "Do not pass full webhook data to environment variables. Use minimal extraction instead.",
+    },
+    {
+        "name": "Unvalidated repository input",
+        "pattern": r"\$\{\{\s*(?:github|gitea)\.repository\s*\}\}",
+        "severity": "MEDIUM",
+        "message": "Repository name should be validated before use. Add format validation.",
+        "exclude_if": r"grep -qE.*repository",  # OK if validation present
+    },
+    {
+        "name": "Direct user input in shell",
+        "pattern": r"\$\{\{\s*(?:github|gitea)\.event\.comment\.body\s*\}\}",
+        "severity": "MEDIUM",
+        "message": "Comment body should be properly escaped. Use jq -Rs for JSON escaping.",
+        "exclude_if": r"jq -Rs",  # OK if using jq for escaping
+    },
+    {
+        "name": "Inline Python without validation",
+        "pattern": r"python -c.*json\.loads\(os\.environ",
+        "severity": "HIGH",
+        "message": "Use utils/safe_dispatch.py instead of inline Python with env vars.",
+    },
+]
+
+
+def check_workflow_file(filepath: str) -> list[dict]:
+    """Check a workflow file for security issues.
+
+    Args:
+        filepath: Path to workflow YAML file
+
+    Returns:
+        List of findings
+    """
+    try:
+        with open(filepath, "r", encoding="utf-8") as f:
+            content = f.read()
+    except Exception as e:
+        return [{"severity": "ERROR", "message": f"Could not read file: {e}"}]
+
+    # Parse YAML to ensure it's valid
+    try:
+        yaml.safe_load(content)
+    except yaml.YAMLError as e:
+        return [{"severity": "ERROR", "message": f"Invalid YAML: {e}"}]
+
+    findings = []
+
+    for check in SECURITY_CHECKS:
+        pattern = check["pattern"]
+
+        # Check if pattern is found
+        matches = re.finditer(pattern, content, re.MULTILINE)
+
+        for match in matches:
+            # If there's an exclusion pattern, check if it's present
+            if "exclude_if" in check:
+                if re.search(check["exclude_if"], content):
+                    continue  # Validation present, skip this finding
+
+            # Find line number
+            line_num = content[: match.start()].count("\n") + 1
+
+            findings.append(
+                {
+                    "name": check["name"],
+                    "severity": check["severity"],
+                    "message": check["message"],
+                    "line": line_num,
+                    "match": match.group(0)[:80],  # First 80 chars
+                }
+            )
+
+    return findings
+
+
+def main():
+    """Run workflow validation."""
+    files = sys.argv[1:]
+
+    if not files:
+        print("No workflow files to validate")
+        return 0
+
+    has_high_severity = False
+    total_findings = 0
+
+    for filepath in files:
+        findings = check_workflow_file(filepath)
+
+        if not findings:
+            continue
+
+        total_findings += len(findings)
+
+        print(f"\n{'=' * 60}")
+        print(f"Workflow security issues in: {filepath}")
+        print("=" * 60)
+
+        for finding in findings:
+            severity = finding.get("severity", "UNKNOWN")
+            severity_symbol = {
+                "HIGH": "🔴",
+                "MEDIUM": "🟡",
+                "LOW": "🔵",
+                "ERROR": "❌",
+            }.get(severity, "⚪")
+
+            print(f"\n{severity_symbol} [{severity}] {finding.get('name', 'Issue')}")
+            print(f"   Line: {finding.get('line', 'N/A')}")
+            print(f"   {finding['message']}")
+
+            if "match" in finding:
+                print(f"   Match: {finding['match']}")
+
+            if severity == "HIGH" or severity == "ERROR":
+                has_high_severity = True
+
+    if total_findings > 0:
+        print(f"\n{'=' * 60}")
+        print(f"Total findings: {total_findings}")
+        print("=" * 60)
+
+    if has_high_severity:
+        print("\n❌ COMMIT BLOCKED: Critical workflow security issues found")
+        print("Please fix the issues above before committing.")
+        print("\nSee SECURITY.md for workflow security best practices.")
+        return 1
+
+    if total_findings > 0:
+        print("\n⚠️  Medium severity issues found - review recommended")
+
+    return 0
+
+
+if __name__ == "__main__":