#!/usr/bin/env python3 """Pre-commit hook for detecting hardcoded secrets. Checks files for common secret patterns: - API keys - AWS credentials - Private keys - Passwords - Tokens """ import re import sys from pathlib import Path SECRET_PATTERNS = [ { 'name': 'OpenAI API Key', "name": "OpenAI API Key", "pattern": r"sk-[a-zA-Z0-9]{32,}", "severity": "HIGH", }, { "name": "AWS Access Key", "pattern": r"AKIA[0-9A-Z]{16}", "severity": "HIGH", }, { "name": "Private Key", "pattern": r"-----BEGIN[A-Z ]+PRIVATE KEY-----", "severity": "HIGH", }, { "name": "Generic API Key", "pattern": r'(?i)(api[_-]?key|apikey)\s*[:=]\s*["\']([a-zA-Z0-9_\-]{20,})["\']', "severity": "HIGH", }, { "name": "Password in Code", "pattern": r'(?i)password\s*[:=]\s*["\'](?!.*\{.*\})([^"\']{8,})["\']', "severity": "HIGH", }, { "name": "Bearer Token", "pattern": r"bearer\s+[a-zA-Z0-9_\-\.]{20,}", "severity": "HIGH", }, { "name": "GitHub Token", "pattern": r"gh[pousr]_[a-zA-Z0-9]{36,}", "severity": "HIGH", }, { "name": "Slack Token", "pattern": r"xox[baprs]-[a-zA-Z0-9-]{10,}", "severity": "HIGH", }, ] # Patterns to exclude (common false positives) EXCLUDE_PATTERNS = [ r"example\.com", r"your[_-]?api[_-]?key", r"your[_-]?password", r"", r"\[API[_-]?KEY\]", r"\$\{", # Environment variable substitution r"os\.environ", # Reading from env vars r"secrets\.", # GitHub secrets r"getenv", # Reading from env ] def is_false_positive(line: str) -> bool: """Check if a line is likely a false positive.""" for pattern in EXCLUDE_PATTERNS: if re.search(pattern, line, re.IGNORECASE): return True return False def check_file_for_secrets(filepath: str) -> list[dict]: """Check a file for hardcoded secrets. Args: filepath: Path to file to check Returns: List of findings """ try: with open(filepath, "r", encoding="utf-8") as f: content = f.read() except Exception: return [] # Skip files we can't read findings = [] lines = content.split("\n") for i, line in enumerate(lines, start=1): # Skip comments in common languages stripped = line.strip() if any(stripped.startswith(c) for c in ["#", "//", "/*", "*", "--"]): continue # Skip if line is a false positive if is_false_positive(line): continue for pattern_info in SECRET_PATTERNS: matches = re.finditer(pattern_info["pattern"], line) for match in matches: findings.append( { "name": pattern_info["name"], "severity": pattern_info["severity"], "line": i, "match": match.group(0)[:50] + "..." if len(match.group(0)) > 50 else match.group(0), } ) return findings def main(): """Run secret detection.""" files = sys.argv[1:] if not files: return 0 has_secrets = False total_findings = 0 for filepath in files: findings = check_file_for_secrets(filepath) if not findings: continue total_findings += len(findings) has_secrets = True print(f"\n{'=' * 60}") print(f"šŸ” Potential secrets detected in: {filepath}") print("=" * 60) for finding in findings: print(f"\nšŸ”“ [{finding['severity']}] {finding['name']}") print(f" Line: {finding['line']}") print(f" Match: {finding['match']}") if has_secrets: print(f"\n{'=' * 60}") print(f"Total potential secrets: {total_findings}") print("=" * 60) print("\nāŒ COMMIT BLOCKED: Potential hardcoded secrets detected") print("\nIf these are false positives:") print(" 1. Use environment variables: os.environ.get('API_KEY')") print(" 2. Use a secrets manager") print(" 3. Add to .gitignore if it's a config file") print("\nTo bypass (not recommended): git commit --no-verify") return 1 return 0 if __name__ == "__main__":