first commit

This commit is contained in:
2025-12-21 13:42:30 +01:00
parent 823b825acb
commit f9b24fe248
47 changed files with 8222 additions and 1 deletions

View File

@@ -0,0 +1,9 @@
"""Security Scanning Package
This package contains security scanning utilities for
detecting vulnerabilities in code.
"""
from security.security_scanner import SecurityScanner
__all__ = ["SecurityScanner"]

View File

@@ -0,0 +1,335 @@
"""Security Scanner
Pattern-based security vulnerability detection for code analysis.
Covers OWASP Top 10 and common security anti-patterns.
"""
import re
from dataclasses import dataclass
from typing import Iterator
import yaml
import os
@dataclass
class SecurityFinding:
"""A single security finding."""
rule_id: str
rule_name: str
severity: str # HIGH, MEDIUM, LOW
category: str # OWASP category
file: str
line: int
code_snippet: str
description: str
recommendation: str
cwe: str | None = None # CWE reference
class SecurityScanner:
"""Security scanner using pattern matching and rules."""
# Default rules covering OWASP Top 10
DEFAULT_RULES = [
# A01:2021 Broken Access Control
{
"id": "SEC001",
"name": "Hardcoded Credentials",
"pattern": r'(?i)(password|passwd|pwd|secret|api_key|apikey|token|auth_token)\s*[=:]\s*["\'][^"\']{4,}["\']',
"severity": "HIGH",
"category": "A01:2021 Broken Access Control",
"cwe": "CWE-798",
"description": "Hardcoded credentials detected in source code",
"recommendation": "Use environment variables or a secrets management system",
},
{
"id": "SEC002",
"name": "Exposed Private Key",
"pattern": r"-----BEGIN\s+(RSA\s+)?PRIVATE\s+KEY-----",
"severity": "HIGH",
"category": "A01:2021 Broken Access Control",
"cwe": "CWE-321",
"description": "Private key embedded in source code",
"recommendation": "Never commit private keys. Use secure key management",
},
# A02:2021 Cryptographic Failures
{
"id": "SEC003",
"name": "Weak Crypto Algorithm",
"pattern": r"(?i)\b(md5|sha1)\s*\(",
"severity": "MEDIUM",
"category": "A02:2021 Cryptographic Failures",
"cwe": "CWE-328",
"description": "Use of weak cryptographic hash function",
"recommendation": "Use SHA-256 or stronger hashing algorithms",
},
{
"id": "SEC004",
"name": "Insecure Random",
"pattern": r"(?i)\brandom\.(random|randint|choice|randrange)\s*\(",
"severity": "MEDIUM",
"category": "A02:2021 Cryptographic Failures",
"cwe": "CWE-330",
"description": "Use of non-cryptographic random number generator for security purposes",
"recommendation": "Use secrets module or os.urandom() for security-critical randomness",
},
# A03:2021 Injection
{
"id": "SEC005",
"name": "SQL Injection",
"pattern": r'(?i)(execute|query|cursor\.execute)\s*\([^)]*(%s|%d|\{|\+)[^)]*\)',
"severity": "HIGH",
"category": "A03:2021 Injection",
"cwe": "CWE-89",
"description": "Potential SQL injection through string formatting",
"recommendation": "Use parameterized queries with placeholders",
},
{
"id": "SEC006",
"name": "Command Injection",
"pattern": r"(?i)(os\.system|subprocess\.call|subprocess\.run)\s*\([^)]*(\+|format|%)[^)]*\)",
"severity": "HIGH",
"category": "A03:2021 Injection",
"cwe": "CWE-78",
"description": "Potential command injection through string concatenation",
"recommendation": "Use subprocess with shell=False and pass arguments as list",
},
{
"id": "SEC007",
"name": "Eval Usage",
"pattern": r"\beval\s*\(",
"severity": "HIGH",
"category": "A03:2021 Injection",
"cwe": "CWE-95",
"description": "Use of eval() can lead to code injection",
"recommendation": "Avoid eval(). Use ast.literal_eval() for data or safer alternatives",
},
{
"id": "SEC008",
"name": "XSS Risk",
"pattern": r'(?i)(innerHTML|outerHTML|document\.write)\s*=',
"severity": "MEDIUM",
"category": "A03:2021 Injection",
"cwe": "CWE-79",
"description": "Direct DOM manipulation may allow XSS",
"recommendation": "Use textContent or proper sanitization libraries",
},
# A04:2021 Insecure Design
{
"id": "SEC009",
"name": "Debug Mode",
"pattern": r"(?i)(debug\s*=\s*true|DEBUG\s*=\s*True|\.setLevel\(.*DEBUG\))",
"severity": "MEDIUM",
"category": "A04:2021 Insecure Design",
"cwe": "CWE-489",
"description": "Debug mode enabled in code",
"recommendation": "Ensure debug mode is disabled in production",
},
# A05:2021 Security Misconfiguration
{
"id": "SEC010",
"name": "CORS Wildcard",
"pattern": r'(?i)(access-control-allow-origin|cors.*origin)\s*[=:]\s*["\']?\*',
"severity": "MEDIUM",
"category": "A05:2021 Security Misconfiguration",
"cwe": "CWE-942",
"description": "CORS configured to allow all origins",
"recommendation": "Specify allowed origins explicitly",
},
{
"id": "SEC011",
"name": "SSL Verification Disabled",
"pattern": r"(?i)(verify\s*=\s*False|CERT_NONE|ssl\._create_unverified_context)",
"severity": "HIGH",
"category": "A05:2021 Security Misconfiguration",
"cwe": "CWE-295",
"description": "SSL certificate verification disabled",
"recommendation": "Always verify SSL certificates in production",
},
# A07:2021 Identification and Authentication Failures
{
"id": "SEC012",
"name": "Hardcoded JWT Secret",
"pattern": r'(?i)(jwt|token).*secret\s*[=:]\s*["\'][^"\']+["\']',
"severity": "HIGH",
"category": "A07:2021 Authentication Failures",
"cwe": "CWE-798",
"description": "JWT secret hardcoded in source code",
"recommendation": "Use environment variables for JWT secrets",
},
# A08:2021 Software and Data Integrity Failures
{
"id": "SEC013",
"name": "Pickle Usage",
"pattern": r"(?i)pickle\.(loads?|dumps?)\s*\(",
"severity": "MEDIUM",
"category": "A08:2021 Integrity Failures",
"cwe": "CWE-502",
"description": "Pickle can execute arbitrary code during deserialization",
"recommendation": "Use JSON or other safe serialization formats",
},
# A09:2021 Security Logging and Monitoring Failures
{
"id": "SEC014",
"name": "Sensitive Data Logging",
"pattern": r'(?i)(log|print|console\.log)\s*\([^)]*\b(password|token|secret|key)\b',
"severity": "MEDIUM",
"category": "A09:2021 Logging Failures",
"cwe": "CWE-532",
"description": "Potentially logging sensitive information",
"recommendation": "Never log passwords, tokens, or secrets",
},
# A10:2021 Server-Side Request Forgery
{
"id": "SEC015",
"name": "SSRF Risk",
"pattern": r'(?i)(requests\.(get|post|put)|urllib\.request\.urlopen|fetch)\s*\([^)]*\+',
"severity": "MEDIUM",
"category": "A10:2021 SSRF",
"cwe": "CWE-918",
"description": "URL constructed from user input may allow SSRF",
"recommendation": "Validate and sanitize URLs, use allowlists",
},
# Additional common issues
{
"id": "SEC016",
"name": "Hardcoded IP Address",
"pattern": r'\b(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b',
"severity": "LOW",
"category": "Configuration",
"cwe": "CWE-547",
"description": "Hardcoded IP address found",
"recommendation": "Use configuration files or environment variables for IP addresses",
},
{
"id": "SEC017",
"name": "TODO/FIXME Security",
"pattern": r"(?i)(TODO|FIXME).*\b(security|auth|password|token|secret|vulnerable)\b",
"severity": "MEDIUM",
"category": "Code Quality",
"cwe": None,
"description": "Security-related TODO/FIXME comment found",
"recommendation": "Address security-related TODO items before deployment",
},
]
def __init__(self, rules_file: str | None = None):
"""Initialize scanner with rules.
Args:
rules_file: Optional path to custom rules YAML file.
"""
self.rules = self.DEFAULT_RULES.copy()
if rules_file and os.path.exists(rules_file):
try:
with open(rules_file) as f:
custom_rules = yaml.safe_load(f)
if custom_rules and "rules" in custom_rules:
self.rules.extend(custom_rules["rules"])
except Exception:
pass # Use defaults if custom rules fail to load
# Compile patterns for efficiency
self._compiled_rules = []
for rule in self.rules:
try:
self._compiled_rules.append(
{**rule, "_pattern": re.compile(rule["pattern"])}
)
except re.error:
pass # Skip invalid patterns
def scan_content(
self,
content: str,
filename: str,
) -> Iterator[SecurityFinding]:
"""Scan content for security issues.
Args:
content: File content to scan.
filename: Name of the file (for reporting).
Yields:
SecurityFinding for each detected issue.
"""
lines = content.splitlines()
for line_num, line in enumerate(lines, 1):
for rule in self._compiled_rules:
if rule["_pattern"].search(line):
yield SecurityFinding(
rule_id=rule["id"],
rule_name=rule["name"],
severity=rule["severity"],
category=rule["category"],
file=filename,
line=line_num,
code_snippet=line.strip()[:120],
description=rule["description"],
recommendation=rule["recommendation"],
cwe=rule.get("cwe"),
)
def scan_diff(self, diff: str) -> Iterator[SecurityFinding]:
"""Scan a git diff for security issues.
Only scans added lines (lines starting with +).
Args:
diff: Git diff content.
Yields:
SecurityFinding for each detected issue.
"""
current_file = None
current_line = 0
for line in diff.splitlines():
# Track current file
if line.startswith("diff --git"):
match = re.search(r"b/(.+)$", line)
if match:
current_file = match.group(1)
current_line = 0
# Track line numbers
elif line.startswith("@@"):
match = re.search(r"\+(\d+)", line)
if match:
current_line = int(match.group(1)) - 1
# Check added lines
elif line.startswith("+") and not line.startswith("+++"):
current_line += 1
for finding in self.scan_content(line[1:], current_file or "unknown"):
finding.line = current_line
yield finding
elif not line.startswith("-"):
current_line += 1
def get_summary(self, findings: list[SecurityFinding]) -> dict:
"""Get summary statistics for findings.
Args:
findings: List of security findings.
Returns:
Summary dictionary with counts by severity and category.
"""
summary = {
"total": len(findings),
"by_severity": {"HIGH": 0, "MEDIUM": 0, "LOW": 0},
"by_category": {},
}
for finding in findings:
summary["by_severity"][finding.severity] = (
summary["by_severity"].get(finding.severity, 0) + 1
)
summary["by_category"][finding.category] = (
summary["by_category"].get(finding.category, 0) + 1
)
return summary