336 lines
13 KiB
Python
336 lines
13 KiB
Python
"""Security Scanner
|
||
|
||
Pattern-based security vulnerability detection for code analysis.
|
||
Covers OWASP Top 10 and common security anti-patterns.
|
||
"""
|
||
|
||
import re
|
||
from dataclasses import dataclass
|
||
from typing import Iterator
|
||
|
||
import yaml
|
||
import os
|
||
|
||
|
||
@dataclass
|
||
class SecurityFinding:
|
||
"""A single security finding."""
|
||
|
||
rule_id: str
|
||
rule_name: str
|
||
severity: str # HIGH, MEDIUM, LOW
|
||
category: str # OWASP category
|
||
file: str
|
||
line: int
|
||
code_snippet: str
|
||
description: str
|
||
recommendation: str
|
||
cwe: str | None = None # CWE reference
|
||
|
||
|
||
class SecurityScanner:
|
||
"""Security scanner using pattern matching and rules."""
|
||
|
||
# Default rules covering OWASP Top 10
|
||
DEFAULT_RULES = [
|
||
# A01:2021 – Broken Access Control
|
||
{
|
||
"id": "SEC001",
|
||
"name": "Hardcoded Credentials",
|
||
"pattern": r'(?i)(password|passwd|pwd|secret|api_key|apikey|token|auth_token)\s*[=:]\s*["\'][^"\']{4,}["\']',
|
||
"severity": "HIGH",
|
||
"category": "A01:2021 Broken Access Control",
|
||
"cwe": "CWE-798",
|
||
"description": "Hardcoded credentials detected in source code",
|
||
"recommendation": "Use environment variables or a secrets management system",
|
||
},
|
||
{
|
||
"id": "SEC002",
|
||
"name": "Exposed Private Key",
|
||
"pattern": r"-----BEGIN\s+(RSA\s+)?PRIVATE\s+KEY-----",
|
||
"severity": "HIGH",
|
||
"category": "A01:2021 Broken Access Control",
|
||
"cwe": "CWE-321",
|
||
"description": "Private key embedded in source code",
|
||
"recommendation": "Never commit private keys. Use secure key management",
|
||
},
|
||
# A02:2021 – Cryptographic Failures
|
||
{
|
||
"id": "SEC003",
|
||
"name": "Weak Crypto Algorithm",
|
||
"pattern": r"(?i)\b(md5|sha1)\s*\(",
|
||
"severity": "MEDIUM",
|
||
"category": "A02:2021 Cryptographic Failures",
|
||
"cwe": "CWE-328",
|
||
"description": "Use of weak cryptographic hash function",
|
||
"recommendation": "Use SHA-256 or stronger hashing algorithms",
|
||
},
|
||
{
|
||
"id": "SEC004",
|
||
"name": "Insecure Random",
|
||
"pattern": r"(?i)\brandom\.(random|randint|choice|randrange)\s*\(",
|
||
"severity": "MEDIUM",
|
||
"category": "A02:2021 Cryptographic Failures",
|
||
"cwe": "CWE-330",
|
||
"description": "Use of non-cryptographic random number generator for security purposes",
|
||
"recommendation": "Use secrets module or os.urandom() for security-critical randomness",
|
||
},
|
||
# A03:2021 – Injection
|
||
{
|
||
"id": "SEC005",
|
||
"name": "SQL Injection",
|
||
"pattern": r'(?i)(execute|query|cursor\.execute)\s*\([^)]*(%s|%d|\{|\+)[^)]*\)',
|
||
"severity": "HIGH",
|
||
"category": "A03:2021 Injection",
|
||
"cwe": "CWE-89",
|
||
"description": "Potential SQL injection through string formatting",
|
||
"recommendation": "Use parameterized queries with placeholders",
|
||
},
|
||
{
|
||
"id": "SEC006",
|
||
"name": "Command Injection",
|
||
"pattern": r"(?i)(os\.system|subprocess\.call|subprocess\.run)\s*\([^)]*(\+|format|%)[^)]*\)",
|
||
"severity": "HIGH",
|
||
"category": "A03:2021 Injection",
|
||
"cwe": "CWE-78",
|
||
"description": "Potential command injection through string concatenation",
|
||
"recommendation": "Use subprocess with shell=False and pass arguments as list",
|
||
},
|
||
{
|
||
"id": "SEC007",
|
||
"name": "Eval Usage",
|
||
"pattern": r"\beval\s*\(",
|
||
"severity": "HIGH",
|
||
"category": "A03:2021 Injection",
|
||
"cwe": "CWE-95",
|
||
"description": "Use of eval() can lead to code injection",
|
||
"recommendation": "Avoid eval(). Use ast.literal_eval() for data or safer alternatives",
|
||
},
|
||
{
|
||
"id": "SEC008",
|
||
"name": "XSS Risk",
|
||
"pattern": r'(?i)(innerHTML|outerHTML|document\.write)\s*=',
|
||
"severity": "MEDIUM",
|
||
"category": "A03:2021 Injection",
|
||
"cwe": "CWE-79",
|
||
"description": "Direct DOM manipulation may allow XSS",
|
||
"recommendation": "Use textContent or proper sanitization libraries",
|
||
},
|
||
# A04:2021 – Insecure Design
|
||
{
|
||
"id": "SEC009",
|
||
"name": "Debug Mode",
|
||
"pattern": r"(?i)(debug\s*=\s*true|DEBUG\s*=\s*True|\.setLevel\(.*DEBUG\))",
|
||
"severity": "MEDIUM",
|
||
"category": "A04:2021 Insecure Design",
|
||
"cwe": "CWE-489",
|
||
"description": "Debug mode enabled in code",
|
||
"recommendation": "Ensure debug mode is disabled in production",
|
||
},
|
||
# A05:2021 – Security Misconfiguration
|
||
{
|
||
"id": "SEC010",
|
||
"name": "CORS Wildcard",
|
||
"pattern": r'(?i)(access-control-allow-origin|cors.*origin)\s*[=:]\s*["\']?\*',
|
||
"severity": "MEDIUM",
|
||
"category": "A05:2021 Security Misconfiguration",
|
||
"cwe": "CWE-942",
|
||
"description": "CORS configured to allow all origins",
|
||
"recommendation": "Specify allowed origins explicitly",
|
||
},
|
||
{
|
||
"id": "SEC011",
|
||
"name": "SSL Verification Disabled",
|
||
"pattern": r"(?i)(verify\s*=\s*False|CERT_NONE|ssl\._create_unverified_context)",
|
||
"severity": "HIGH",
|
||
"category": "A05:2021 Security Misconfiguration",
|
||
"cwe": "CWE-295",
|
||
"description": "SSL certificate verification disabled",
|
||
"recommendation": "Always verify SSL certificates in production",
|
||
},
|
||
# A07:2021 – Identification and Authentication Failures
|
||
{
|
||
"id": "SEC012",
|
||
"name": "Hardcoded JWT Secret",
|
||
"pattern": r'(?i)(jwt|token).*secret\s*[=:]\s*["\'][^"\']+["\']',
|
||
"severity": "HIGH",
|
||
"category": "A07:2021 Authentication Failures",
|
||
"cwe": "CWE-798",
|
||
"description": "JWT secret hardcoded in source code",
|
||
"recommendation": "Use environment variables for JWT secrets",
|
||
},
|
||
# A08:2021 – Software and Data Integrity Failures
|
||
{
|
||
"id": "SEC013",
|
||
"name": "Pickle Usage",
|
||
"pattern": r"(?i)pickle\.(loads?|dumps?)\s*\(",
|
||
"severity": "MEDIUM",
|
||
"category": "A08:2021 Integrity Failures",
|
||
"cwe": "CWE-502",
|
||
"description": "Pickle can execute arbitrary code during deserialization",
|
||
"recommendation": "Use JSON or other safe serialization formats",
|
||
},
|
||
# A09:2021 – Security Logging and Monitoring Failures
|
||
{
|
||
"id": "SEC014",
|
||
"name": "Sensitive Data Logging",
|
||
"pattern": r'(?i)(log|print|console\.log)\s*\([^)]*\b(password|token|secret|key)\b',
|
||
"severity": "MEDIUM",
|
||
"category": "A09:2021 Logging Failures",
|
||
"cwe": "CWE-532",
|
||
"description": "Potentially logging sensitive information",
|
||
"recommendation": "Never log passwords, tokens, or secrets",
|
||
},
|
||
# A10:2021 – Server-Side Request Forgery
|
||
{
|
||
"id": "SEC015",
|
||
"name": "SSRF Risk",
|
||
"pattern": r'(?i)(requests\.(get|post|put)|urllib\.request\.urlopen|fetch)\s*\([^)]*\+',
|
||
"severity": "MEDIUM",
|
||
"category": "A10:2021 SSRF",
|
||
"cwe": "CWE-918",
|
||
"description": "URL constructed from user input may allow SSRF",
|
||
"recommendation": "Validate and sanitize URLs, use allowlists",
|
||
},
|
||
# Additional common issues
|
||
{
|
||
"id": "SEC016",
|
||
"name": "Hardcoded IP Address",
|
||
"pattern": r'\b(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b',
|
||
"severity": "LOW",
|
||
"category": "Configuration",
|
||
"cwe": "CWE-547",
|
||
"description": "Hardcoded IP address found",
|
||
"recommendation": "Use configuration files or environment variables for IP addresses",
|
||
},
|
||
{
|
||
"id": "SEC017",
|
||
"name": "TODO/FIXME Security",
|
||
"pattern": r"(?i)(TODO|FIXME).*\b(security|auth|password|token|secret|vulnerable)\b",
|
||
"severity": "MEDIUM",
|
||
"category": "Code Quality",
|
||
"cwe": None,
|
||
"description": "Security-related TODO/FIXME comment found",
|
||
"recommendation": "Address security-related TODO items before deployment",
|
||
},
|
||
]
|
||
|
||
def __init__(self, rules_file: str | None = None):
|
||
"""Initialize scanner with rules.
|
||
|
||
Args:
|
||
rules_file: Optional path to custom rules YAML file.
|
||
"""
|
||
self.rules = self.DEFAULT_RULES.copy()
|
||
|
||
if rules_file and os.path.exists(rules_file):
|
||
try:
|
||
with open(rules_file) as f:
|
||
custom_rules = yaml.safe_load(f)
|
||
if custom_rules and "rules" in custom_rules:
|
||
self.rules.extend(custom_rules["rules"])
|
||
except Exception:
|
||
pass # Use defaults if custom rules fail to load
|
||
|
||
# Compile patterns for efficiency
|
||
self._compiled_rules = []
|
||
for rule in self.rules:
|
||
try:
|
||
self._compiled_rules.append(
|
||
{**rule, "_pattern": re.compile(rule["pattern"])}
|
||
)
|
||
except re.error:
|
||
pass # Skip invalid patterns
|
||
|
||
def scan_content(
|
||
self,
|
||
content: str,
|
||
filename: str,
|
||
) -> Iterator[SecurityFinding]:
|
||
"""Scan content for security issues.
|
||
|
||
Args:
|
||
content: File content to scan.
|
||
filename: Name of the file (for reporting).
|
||
|
||
Yields:
|
||
SecurityFinding for each detected issue.
|
||
"""
|
||
lines = content.splitlines()
|
||
|
||
for line_num, line in enumerate(lines, 1):
|
||
for rule in self._compiled_rules:
|
||
if rule["_pattern"].search(line):
|
||
yield SecurityFinding(
|
||
rule_id=rule["id"],
|
||
rule_name=rule["name"],
|
||
severity=rule["severity"],
|
||
category=rule["category"],
|
||
file=filename,
|
||
line=line_num,
|
||
code_snippet=line.strip()[:120],
|
||
description=rule["description"],
|
||
recommendation=rule["recommendation"],
|
||
cwe=rule.get("cwe"),
|
||
)
|
||
|
||
def scan_diff(self, diff: str) -> Iterator[SecurityFinding]:
|
||
"""Scan a git diff for security issues.
|
||
|
||
Only scans added lines (lines starting with +).
|
||
|
||
Args:
|
||
diff: Git diff content.
|
||
|
||
Yields:
|
||
SecurityFinding for each detected issue.
|
||
"""
|
||
current_file = None
|
||
current_line = 0
|
||
|
||
for line in diff.splitlines():
|
||
# Track current file
|
||
if line.startswith("diff --git"):
|
||
match = re.search(r"b/(.+)$", line)
|
||
if match:
|
||
current_file = match.group(1)
|
||
current_line = 0
|
||
# Track line numbers
|
||
elif line.startswith("@@"):
|
||
match = re.search(r"\+(\d+)", line)
|
||
if match:
|
||
current_line = int(match.group(1)) - 1
|
||
# Check added lines
|
||
elif line.startswith("+") and not line.startswith("+++"):
|
||
current_line += 1
|
||
for finding in self.scan_content(line[1:], current_file or "unknown"):
|
||
finding.line = current_line
|
||
yield finding
|
||
elif not line.startswith("-"):
|
||
current_line += 1
|
||
|
||
def get_summary(self, findings: list[SecurityFinding]) -> dict:
|
||
"""Get summary statistics for findings.
|
||
|
||
Args:
|
||
findings: List of security findings.
|
||
|
||
Returns:
|
||
Summary dictionary with counts by severity and category.
|
||
"""
|
||
summary = {
|
||
"total": len(findings),
|
||
"by_severity": {"HIGH": 0, "MEDIUM": 0, "LOW": 0},
|
||
"by_category": {},
|
||
}
|
||
|
||
for finding in findings:
|
||
summary["by_severity"][finding.severity] = (
|
||
summary["by_severity"].get(finding.severity, 0) + 1
|
||
)
|
||
summary["by_category"][finding.category] = (
|
||
summary["by_category"].get(finding.category, 0) + 1
|
||
)
|
||
|
||
return summary
|