security fixes
All checks were successful
Enterprise AI Code Review / ai-review (pull_request) Successful in 26s

This commit is contained in:
2025-12-28 19:55:05 +00:00
parent 4a3ddec68c
commit f94d21580c
15 changed files with 2549 additions and 46 deletions

0
tools/ai-review/security/__init__.py Normal file → Executable file
View File

View File

@@ -0,0 +1,172 @@
#!/usr/bin/env python3
"""Pre-commit hook for detecting hardcoded secrets.
Checks files for common secret patterns:
- API keys
- AWS credentials
- Private keys
- Passwords
- Tokens
"""
import re
import sys
from pathlib import Path
SECRET_PATTERNS = [
{
'name': 'OpenAI API Key',
"name": "OpenAI API Key",
"pattern": r"sk-[a-zA-Z0-9]{32,}",
"severity": "HIGH",
},
{
"name": "AWS Access Key",
"pattern": r"AKIA[0-9A-Z]{16}",
"severity": "HIGH",
},
{
"name": "Private Key",
"pattern": r"-----BEGIN[A-Z ]+PRIVATE KEY-----",
"severity": "HIGH",
},
{
"name": "Generic API Key",
"pattern": r'(?i)(api[_-]?key|apikey)\s*[:=]\s*["\']([a-zA-Z0-9_\-]{20,})["\']',
"severity": "HIGH",
},
{
"name": "Password in Code",
"pattern": r'(?i)password\s*[:=]\s*["\'](?!.*\{.*\})([^"\']{8,})["\']',
"severity": "HIGH",
},
{
"name": "Bearer Token",
"pattern": r"bearer\s+[a-zA-Z0-9_\-\.]{20,}",
"severity": "HIGH",
},
{
"name": "GitHub Token",
"pattern": r"gh[pousr]_[a-zA-Z0-9]{36,}",
"severity": "HIGH",
},
{
"name": "Slack Token",
"pattern": r"xox[baprs]-[a-zA-Z0-9-]{10,}",
"severity": "HIGH",
},
]
# Patterns to exclude (common false positives)
EXCLUDE_PATTERNS = [
r"example\.com",
r"your[_-]?api[_-]?key",
r"your[_-]?password",
r"<API[_-]?KEY>",
r"\[API[_-]?KEY\]",
r"\$\{", # Environment variable substitution
r"os\.environ", # Reading from env vars
r"secrets\.", # GitHub secrets
r"getenv", # Reading from env
]
def is_false_positive(line: str) -> bool:
"""Check if a line is likely a false positive."""
for pattern in EXCLUDE_PATTERNS:
if re.search(pattern, line, re.IGNORECASE):
return True
return False
def check_file_for_secrets(filepath: str) -> list[dict]:
"""Check a file for hardcoded secrets.
Args:
filepath: Path to file to check
Returns:
List of findings
"""
try:
with open(filepath, "r", encoding="utf-8") as f:
content = f.read()
except Exception:
return [] # Skip files we can't read
findings = []
lines = content.split("\n")
for i, line in enumerate(lines, start=1):
# Skip comments in common languages
stripped = line.strip()
if any(stripped.startswith(c) for c in ["#", "//", "/*", "*", "--"]):
continue
# Skip if line is a false positive
if is_false_positive(line):
continue
for pattern_info in SECRET_PATTERNS:
matches = re.finditer(pattern_info["pattern"], line)
for match in matches:
findings.append(
{
"name": pattern_info["name"],
"severity": pattern_info["severity"],
"line": i,
"match": match.group(0)[:50] + "..."
if len(match.group(0)) > 50
else match.group(0),
}
)
return findings
def main():
"""Run secret detection."""
files = sys.argv[1:]
if not files:
return 0
has_secrets = False
total_findings = 0
for filepath in files:
findings = check_file_for_secrets(filepath)
if not findings:
continue
total_findings += len(findings)
has_secrets = True
print(f"\n{'=' * 60}")
print(f"🔐 Potential secrets detected in: {filepath}")
print("=" * 60)
for finding in findings:
print(f"\n🔴 [{finding['severity']}] {finding['name']}")
print(f" Line: {finding['line']}")
print(f" Match: {finding['match']}")
if has_secrets:
print(f"\n{'=' * 60}")
print(f"Total potential secrets: {total_findings}")
print("=" * 60)
print("\n❌ COMMIT BLOCKED: Potential hardcoded secrets detected")
print("\nIf these are false positives:")
print(" 1. Use environment variables: os.environ.get('API_KEY')")
print(" 2. Use a secrets manager")
print(" 3. Add to .gitignore if it's a config file")
print("\nTo bypass (not recommended): git commit --no-verify")
return 1
return 0
if __name__ == "__main__":

View File

@@ -0,0 +1,83 @@
#!/usr/bin/env python3
"""Pre-commit hook for security scanning.
Scans staged files for security vulnerabilities before commit.
Fails if HIGH severity issues are found.
"""
import sys
from pathlib import Path
from security_scanner import SecurityScanner
def main():
"""Run security scan on staged files."""
scanner = SecurityScanner()
# Get files from command line (pre-commit passes them)
files = sys.argv[1:]
if not files:
print("No files to scan")
return 0
has_high_severity = False
total_findings = 0
for filepath in files:
try:
with open(filepath, "r", encoding="utf-8") as f:
content = f.read()
except Exception as e:
print(f"Warning: Could not read {filepath}: {e}")
continue
findings = list(scanner.scan_content(content, filepath))
if not findings:
continue
total_findings += len(findings)
# Print findings
print(f"\n{'=' * 60}")
print(f"Security findings in: {filepath}")
print("=" * 60)
for finding in findings:
severity_symbol = {
"HIGH": "🔴",
"MEDIUM": "🟡",
"LOW": "🔵",
}.get(finding.severity, "")
print(f"\n{severity_symbol} [{finding.severity}] {finding.name}")
print(f" Category: {finding.category}")
print(f" CWE: {finding.cwe}")
print(f" Line: {finding.line}")
print(f" Description: {finding.description}")
print(f" Recommendation: {finding.recommendation}")
if finding.severity == "HIGH":
has_high_severity = True
if total_findings > 0:
print(f"\n{'=' * 60}")
print(f"Total findings: {total_findings}")
print("=" * 60)
if has_high_severity:
print("\n❌ COMMIT BLOCKED: HIGH severity security issues found")
print("Please fix the issues above before committing.")
print("\nTo bypass (not recommended): git commit --no-verify")
return 1
if total_findings > 0:
print("\n⚠️ Medium/Low severity issues found - review recommended")
return 0
if __name__ == "__main__":
sys.exit(main())

0
tools/ai-review/security/security_scanner.py Normal file → Executable file
View File

View File

@@ -0,0 +1,157 @@
#!/usr/bin/env python3
"""Pre-commit hook for validating workflow files.
Checks workflow files for security anti-patterns:
- Full webhook data in environment variables
- Missing input validation
- Unsafe shell operations
"""
import re
import sys
from pathlib import Path
import yaml
SECURITY_CHECKS = [
{
'name': 'Full webhook data in env vars',
"name": "Full webhook data in env vars",
"pattern": r"toJSON\(github\.event\)|toJSON\(gitea\.event\)",
"severity": "HIGH",
"message": "Do not pass full webhook data to environment variables. Use minimal extraction instead.",
},
{
"name": "Unvalidated repository input",
"pattern": r"\$\{\{\s*(?:github|gitea)\.repository\s*\}\}",
"severity": "MEDIUM",
"message": "Repository name should be validated before use. Add format validation.",
"exclude_if": r"grep -qE.*repository", # OK if validation present
},
{
"name": "Direct user input in shell",
"pattern": r"\$\{\{\s*(?:github|gitea)\.event\.comment\.body\s*\}\}",
"severity": "MEDIUM",
"message": "Comment body should be properly escaped. Use jq -Rs for JSON escaping.",
"exclude_if": r"jq -Rs", # OK if using jq for escaping
},
{
"name": "Inline Python without validation",
"pattern": r"python -c.*json\.loads\(os\.environ",
"severity": "HIGH",
"message": "Use utils/safe_dispatch.py instead of inline Python with env vars.",
},
]
def check_workflow_file(filepath: str) -> list[dict]:
"""Check a workflow file for security issues.
Args:
filepath: Path to workflow YAML file
Returns:
List of findings
"""
try:
with open(filepath, "r", encoding="utf-8") as f:
content = f.read()
except Exception as e:
return [{"severity": "ERROR", "message": f"Could not read file: {e}"}]
# Parse YAML to ensure it's valid
try:
yaml.safe_load(content)
except yaml.YAMLError as e:
return [{"severity": "ERROR", "message": f"Invalid YAML: {e}"}]
findings = []
for check in SECURITY_CHECKS:
pattern = check["pattern"]
# Check if pattern is found
matches = re.finditer(pattern, content, re.MULTILINE)
for match in matches:
# If there's an exclusion pattern, check if it's present
if "exclude_if" in check:
if re.search(check["exclude_if"], content):
continue # Validation present, skip this finding
# Find line number
line_num = content[: match.start()].count("\n") + 1
findings.append(
{
"name": check["name"],
"severity": check["severity"],
"message": check["message"],
"line": line_num,
"match": match.group(0)[:80], # First 80 chars
}
)
return findings
def main():
"""Run workflow validation."""
files = sys.argv[1:]
if not files:
print("No workflow files to validate")
return 0
has_high_severity = False
total_findings = 0
for filepath in files:
findings = check_workflow_file(filepath)
if not findings:
continue
total_findings += len(findings)
print(f"\n{'=' * 60}")
print(f"Workflow security issues in: {filepath}")
print("=" * 60)
for finding in findings:
severity = finding.get("severity", "UNKNOWN")
severity_symbol = {
"HIGH": "🔴",
"MEDIUM": "🟡",
"LOW": "🔵",
"ERROR": "",
}.get(severity, "")
print(f"\n{severity_symbol} [{severity}] {finding.get('name', 'Issue')}")
print(f" Line: {finding.get('line', 'N/A')}")
print(f" {finding['message']}")
if "match" in finding:
print(f" Match: {finding['match']}")
if severity == "HIGH" or severity == "ERROR":
has_high_severity = True
if total_findings > 0:
print(f"\n{'=' * 60}")
print(f"Total findings: {total_findings}")
print("=" * 60)
if has_high_severity:
print("\n❌ COMMIT BLOCKED: Critical workflow security issues found")
print("Please fix the issues above before committing.")
print("\nSee SECURITY.md for workflow security best practices.")
return 1
if total_findings > 0:
print("\n⚠️ Medium severity issues found - review recommended")
return 0
if __name__ == "__main__":