Files
openrabbit/tools/ai-review/security/security_scanner.py
latte f94d21580c
All checks were successful
Enterprise AI Code Review / ai-review (pull_request) Successful in 26s
security fixes
2025-12-28 19:55:05 +00:00

336 lines
13 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Security Scanner
Pattern-based security vulnerability detection for code analysis.
Covers OWASP Top 10 and common security anti-patterns.
"""
import re
from dataclasses import dataclass
from typing import Iterator
import yaml
import os
@dataclass
class SecurityFinding:
"""A single security finding."""
rule_id: str
rule_name: str
severity: str # HIGH, MEDIUM, LOW
category: str # OWASP category
file: str
line: int
code_snippet: str
description: str
recommendation: str
cwe: str | None = None # CWE reference
class SecurityScanner:
"""Security scanner using pattern matching and rules."""
# Default rules covering OWASP Top 10
DEFAULT_RULES = [
# A01:2021 Broken Access Control
{
"id": "SEC001",
"name": "Hardcoded Credentials",
"pattern": r'(?i)(password|passwd|pwd|secret|api_key|apikey|token|auth_token)\s*[=:]\s*["\'][^"\']{4,}["\']',
"severity": "HIGH",
"category": "A01:2021 Broken Access Control",
"cwe": "CWE-798",
"description": "Hardcoded credentials detected in source code",
"recommendation": "Use environment variables or a secrets management system",
},
{
"id": "SEC002",
"name": "Exposed Private Key",
"pattern": r"-----BEGIN\s+(RSA\s+)?PRIVATE\s+KEY-----",
"severity": "HIGH",
"category": "A01:2021 Broken Access Control",
"cwe": "CWE-321",
"description": "Private key embedded in source code",
"recommendation": "Never commit private keys. Use secure key management",
},
# A02:2021 Cryptographic Failures
{
"id": "SEC003",
"name": "Weak Crypto Algorithm",
"pattern": r"(?i)\b(md5|sha1)\s*\(",
"severity": "MEDIUM",
"category": "A02:2021 Cryptographic Failures",
"cwe": "CWE-328",
"description": "Use of weak cryptographic hash function",
"recommendation": "Use SHA-256 or stronger hashing algorithms",
},
{
"id": "SEC004",
"name": "Insecure Random",
"pattern": r"(?i)\brandom\.(random|randint|choice|randrange)\s*\(",
"severity": "MEDIUM",
"category": "A02:2021 Cryptographic Failures",
"cwe": "CWE-330",
"description": "Use of non-cryptographic random number generator for security purposes",
"recommendation": "Use secrets module or os.urandom() for security-critical randomness",
},
# A03:2021 Injection
{
"id": "SEC005",
"name": "SQL Injection",
"pattern": r'(?i)(execute|query|cursor\.execute)\s*\([^)]*(%s|%d|\{|\+)[^)]*\)',
"severity": "HIGH",
"category": "A03:2021 Injection",
"cwe": "CWE-89",
"description": "Potential SQL injection through string formatting",
"recommendation": "Use parameterized queries with placeholders",
},
{
"id": "SEC006",
"name": "Command Injection",
"pattern": r"(?i)(os\.system|subprocess\.call|subprocess\.run)\s*\([^)]*(\+|format|%)[^)]*\)",
"severity": "HIGH",
"category": "A03:2021 Injection",
"cwe": "CWE-78",
"description": "Potential command injection through string concatenation",
"recommendation": "Use subprocess with shell=False and pass arguments as list",
},
{
"id": "SEC007",
"name": "Eval Usage",
"pattern": r"\beval\s*\(",
"severity": "HIGH",
"category": "A03:2021 Injection",
"cwe": "CWE-95",
"description": "Use of eval() can lead to code injection",
"recommendation": "Avoid eval(). Use ast.literal_eval() for data or safer alternatives",
},
{
"id": "SEC008",
"name": "XSS Risk",
"pattern": r'(?i)(innerHTML|outerHTML|document\.write)\s*=',
"severity": "MEDIUM",
"category": "A03:2021 Injection",
"cwe": "CWE-79",
"description": "Direct DOM manipulation may allow XSS",
"recommendation": "Use textContent or proper sanitization libraries",
},
# A04:2021 Insecure Design
{
"id": "SEC009",
"name": "Debug Mode",
"pattern": r"(?i)(debug\s*=\s*true|DEBUG\s*=\s*True|\.setLevel\(.*DEBUG\))",
"severity": "MEDIUM",
"category": "A04:2021 Insecure Design",
"cwe": "CWE-489",
"description": "Debug mode enabled in code",
"recommendation": "Ensure debug mode is disabled in production",
},
# A05:2021 Security Misconfiguration
{
"id": "SEC010",
"name": "CORS Wildcard",
"pattern": r'(?i)(access-control-allow-origin|cors.*origin)\s*[=:]\s*["\']?\*',
"severity": "MEDIUM",
"category": "A05:2021 Security Misconfiguration",
"cwe": "CWE-942",
"description": "CORS configured to allow all origins",
"recommendation": "Specify allowed origins explicitly",
},
{
"id": "SEC011",
"name": "SSL Verification Disabled",
"pattern": r"(?i)(verify\s*=\s*False|CERT_NONE|ssl\._create_unverified_context)",
"severity": "HIGH",
"category": "A05:2021 Security Misconfiguration",
"cwe": "CWE-295",
"description": "SSL certificate verification disabled",
"recommendation": "Always verify SSL certificates in production",
},
# A07:2021 Identification and Authentication Failures
{
"id": "SEC012",
"name": "Hardcoded JWT Secret",
"pattern": r'(?i)(jwt|token).*secret\s*[=:]\s*["\'][^"\']+["\']',
"severity": "HIGH",
"category": "A07:2021 Authentication Failures",
"cwe": "CWE-798",
"description": "JWT secret hardcoded in source code",
"recommendation": "Use environment variables for JWT secrets",
},
# A08:2021 Software and Data Integrity Failures
{
"id": "SEC013",
"name": "Pickle Usage",
"pattern": r"(?i)pickle\.(loads?|dumps?)\s*\(",
"severity": "MEDIUM",
"category": "A08:2021 Integrity Failures",
"cwe": "CWE-502",
"description": "Pickle can execute arbitrary code during deserialization",
"recommendation": "Use JSON or other safe serialization formats",
},
# A09:2021 Security Logging and Monitoring Failures
{
"id": "SEC014",
"name": "Sensitive Data Logging",
"pattern": r'(?i)(log|print|console\.log)\s*\([^)]*\b(password|token|secret|key)\b',
"severity": "MEDIUM",
"category": "A09:2021 Logging Failures",
"cwe": "CWE-532",
"description": "Potentially logging sensitive information",
"recommendation": "Never log passwords, tokens, or secrets",
},
# A10:2021 Server-Side Request Forgery
{
"id": "SEC015",
"name": "SSRF Risk",
"pattern": r'(?i)(requests\.(get|post|put)|urllib\.request\.urlopen|fetch)\s*\([^)]*\+',
"severity": "MEDIUM",
"category": "A10:2021 SSRF",
"cwe": "CWE-918",
"description": "URL constructed from user input may allow SSRF",
"recommendation": "Validate and sanitize URLs, use allowlists",
},
# Additional common issues
{
"id": "SEC016",
"name": "Hardcoded IP Address",
"pattern": r'\b(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b',
"severity": "LOW",
"category": "Configuration",
"cwe": "CWE-547",
"description": "Hardcoded IP address found",
"recommendation": "Use configuration files or environment variables for IP addresses",
},
{
"id": "SEC017",
"name": "TODO/FIXME Security",
"pattern": r"(?i)(TODO|FIXME).*\b(security|auth|password|token|secret|vulnerable)\b",
"severity": "MEDIUM",
"category": "Code Quality",
"cwe": None,
"description": "Security-related TODO/FIXME comment found",
"recommendation": "Address security-related TODO items before deployment",
},
]
def __init__(self, rules_file: str | None = None):
"""Initialize scanner with rules.
Args:
rules_file: Optional path to custom rules YAML file.
"""
self.rules = self.DEFAULT_RULES.copy()
if rules_file and os.path.exists(rules_file):
try:
with open(rules_file) as f:
custom_rules = yaml.safe_load(f)
if custom_rules and "rules" in custom_rules:
self.rules.extend(custom_rules["rules"])
except Exception:
pass # Use defaults if custom rules fail to load
# Compile patterns for efficiency
self._compiled_rules = []
for rule in self.rules:
try:
self._compiled_rules.append(
{**rule, "_pattern": re.compile(rule["pattern"])}
)
except re.error:
pass # Skip invalid patterns
def scan_content(
self,
content: str,
filename: str,
) -> Iterator[SecurityFinding]:
"""Scan content for security issues.
Args:
content: File content to scan.
filename: Name of the file (for reporting).
Yields:
SecurityFinding for each detected issue.
"""
lines = content.splitlines()
for line_num, line in enumerate(lines, 1):
for rule in self._compiled_rules:
if rule["_pattern"].search(line):
yield SecurityFinding(
rule_id=rule["id"],
rule_name=rule["name"],
severity=rule["severity"],
category=rule["category"],
file=filename,
line=line_num,
code_snippet=line.strip()[:120],
description=rule["description"],
recommendation=rule["recommendation"],
cwe=rule.get("cwe"),
)
def scan_diff(self, diff: str) -> Iterator[SecurityFinding]:
"""Scan a git diff for security issues.
Only scans added lines (lines starting with +).
Args:
diff: Git diff content.
Yields:
SecurityFinding for each detected issue.
"""
current_file = None
current_line = 0
for line in diff.splitlines():
# Track current file
if line.startswith("diff --git"):
match = re.search(r"b/(.+)$", line)
if match:
current_file = match.group(1)
current_line = 0
# Track line numbers
elif line.startswith("@@"):
match = re.search(r"\+(\d+)", line)
if match:
current_line = int(match.group(1)) - 1
# Check added lines
elif line.startswith("+") and not line.startswith("+++"):
current_line += 1
for finding in self.scan_content(line[1:], current_file or "unknown"):
finding.line = current_line
yield finding
elif not line.startswith("-"):
current_line += 1
def get_summary(self, findings: list[SecurityFinding]) -> dict:
"""Get summary statistics for findings.
Args:
findings: List of security findings.
Returns:
Summary dictionary with counts by severity and category.
"""
summary = {
"total": len(findings),
"by_severity": {"HIGH": 0, "MEDIUM": 0, "LOW": 0},
"by_category": {},
}
for finding in findings:
summary["by_severity"][finding.severity] = (
summary["by_severity"].get(finding.severity, 0) + 1
)
summary["by_category"][finding.category] = (
summary["by_category"].get(finding.category, 0) + 1
)
return summary