first commit
This commit is contained in:
9
tools/ai-review/security/__init__.py
Normal file
9
tools/ai-review/security/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""Security Scanning Package
|
||||
|
||||
This package contains security scanning utilities for
|
||||
detecting vulnerabilities in code.
|
||||
"""
|
||||
|
||||
from security.security_scanner import SecurityScanner
|
||||
|
||||
__all__ = ["SecurityScanner"]
|
||||
335
tools/ai-review/security/security_scanner.py
Normal file
335
tools/ai-review/security/security_scanner.py
Normal file
@@ -0,0 +1,335 @@
|
||||
"""Security Scanner
|
||||
|
||||
Pattern-based security vulnerability detection for code analysis.
|
||||
Covers OWASP Top 10 and common security anti-patterns.
|
||||
"""
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Iterator
|
||||
|
||||
import yaml
|
||||
import os
|
||||
|
||||
|
||||
@dataclass
|
||||
class SecurityFinding:
|
||||
"""A single security finding."""
|
||||
|
||||
rule_id: str
|
||||
rule_name: str
|
||||
severity: str # HIGH, MEDIUM, LOW
|
||||
category: str # OWASP category
|
||||
file: str
|
||||
line: int
|
||||
code_snippet: str
|
||||
description: str
|
||||
recommendation: str
|
||||
cwe: str | None = None # CWE reference
|
||||
|
||||
|
||||
class SecurityScanner:
|
||||
"""Security scanner using pattern matching and rules."""
|
||||
|
||||
# Default rules covering OWASP Top 10
|
||||
DEFAULT_RULES = [
|
||||
# A01:2021 – Broken Access Control
|
||||
{
|
||||
"id": "SEC001",
|
||||
"name": "Hardcoded Credentials",
|
||||
"pattern": r'(?i)(password|passwd|pwd|secret|api_key|apikey|token|auth_token)\s*[=:]\s*["\'][^"\']{4,}["\']',
|
||||
"severity": "HIGH",
|
||||
"category": "A01:2021 Broken Access Control",
|
||||
"cwe": "CWE-798",
|
||||
"description": "Hardcoded credentials detected in source code",
|
||||
"recommendation": "Use environment variables or a secrets management system",
|
||||
},
|
||||
{
|
||||
"id": "SEC002",
|
||||
"name": "Exposed Private Key",
|
||||
"pattern": r"-----BEGIN\s+(RSA\s+)?PRIVATE\s+KEY-----",
|
||||
"severity": "HIGH",
|
||||
"category": "A01:2021 Broken Access Control",
|
||||
"cwe": "CWE-321",
|
||||
"description": "Private key embedded in source code",
|
||||
"recommendation": "Never commit private keys. Use secure key management",
|
||||
},
|
||||
# A02:2021 – Cryptographic Failures
|
||||
{
|
||||
"id": "SEC003",
|
||||
"name": "Weak Crypto Algorithm",
|
||||
"pattern": r"(?i)\b(md5|sha1)\s*\(",
|
||||
"severity": "MEDIUM",
|
||||
"category": "A02:2021 Cryptographic Failures",
|
||||
"cwe": "CWE-328",
|
||||
"description": "Use of weak cryptographic hash function",
|
||||
"recommendation": "Use SHA-256 or stronger hashing algorithms",
|
||||
},
|
||||
{
|
||||
"id": "SEC004",
|
||||
"name": "Insecure Random",
|
||||
"pattern": r"(?i)\brandom\.(random|randint|choice|randrange)\s*\(",
|
||||
"severity": "MEDIUM",
|
||||
"category": "A02:2021 Cryptographic Failures",
|
||||
"cwe": "CWE-330",
|
||||
"description": "Use of non-cryptographic random number generator for security purposes",
|
||||
"recommendation": "Use secrets module or os.urandom() for security-critical randomness",
|
||||
},
|
||||
# A03:2021 – Injection
|
||||
{
|
||||
"id": "SEC005",
|
||||
"name": "SQL Injection",
|
||||
"pattern": r'(?i)(execute|query|cursor\.execute)\s*\([^)]*(%s|%d|\{|\+)[^)]*\)',
|
||||
"severity": "HIGH",
|
||||
"category": "A03:2021 Injection",
|
||||
"cwe": "CWE-89",
|
||||
"description": "Potential SQL injection through string formatting",
|
||||
"recommendation": "Use parameterized queries with placeholders",
|
||||
},
|
||||
{
|
||||
"id": "SEC006",
|
||||
"name": "Command Injection",
|
||||
"pattern": r"(?i)(os\.system|subprocess\.call|subprocess\.run)\s*\([^)]*(\+|format|%)[^)]*\)",
|
||||
"severity": "HIGH",
|
||||
"category": "A03:2021 Injection",
|
||||
"cwe": "CWE-78",
|
||||
"description": "Potential command injection through string concatenation",
|
||||
"recommendation": "Use subprocess with shell=False and pass arguments as list",
|
||||
},
|
||||
{
|
||||
"id": "SEC007",
|
||||
"name": "Eval Usage",
|
||||
"pattern": r"\beval\s*\(",
|
||||
"severity": "HIGH",
|
||||
"category": "A03:2021 Injection",
|
||||
"cwe": "CWE-95",
|
||||
"description": "Use of eval() can lead to code injection",
|
||||
"recommendation": "Avoid eval(). Use ast.literal_eval() for data or safer alternatives",
|
||||
},
|
||||
{
|
||||
"id": "SEC008",
|
||||
"name": "XSS Risk",
|
||||
"pattern": r'(?i)(innerHTML|outerHTML|document\.write)\s*=',
|
||||
"severity": "MEDIUM",
|
||||
"category": "A03:2021 Injection",
|
||||
"cwe": "CWE-79",
|
||||
"description": "Direct DOM manipulation may allow XSS",
|
||||
"recommendation": "Use textContent or proper sanitization libraries",
|
||||
},
|
||||
# A04:2021 – Insecure Design
|
||||
{
|
||||
"id": "SEC009",
|
||||
"name": "Debug Mode",
|
||||
"pattern": r"(?i)(debug\s*=\s*true|DEBUG\s*=\s*True|\.setLevel\(.*DEBUG\))",
|
||||
"severity": "MEDIUM",
|
||||
"category": "A04:2021 Insecure Design",
|
||||
"cwe": "CWE-489",
|
||||
"description": "Debug mode enabled in code",
|
||||
"recommendation": "Ensure debug mode is disabled in production",
|
||||
},
|
||||
# A05:2021 – Security Misconfiguration
|
||||
{
|
||||
"id": "SEC010",
|
||||
"name": "CORS Wildcard",
|
||||
"pattern": r'(?i)(access-control-allow-origin|cors.*origin)\s*[=:]\s*["\']?\*',
|
||||
"severity": "MEDIUM",
|
||||
"category": "A05:2021 Security Misconfiguration",
|
||||
"cwe": "CWE-942",
|
||||
"description": "CORS configured to allow all origins",
|
||||
"recommendation": "Specify allowed origins explicitly",
|
||||
},
|
||||
{
|
||||
"id": "SEC011",
|
||||
"name": "SSL Verification Disabled",
|
||||
"pattern": r"(?i)(verify\s*=\s*False|CERT_NONE|ssl\._create_unverified_context)",
|
||||
"severity": "HIGH",
|
||||
"category": "A05:2021 Security Misconfiguration",
|
||||
"cwe": "CWE-295",
|
||||
"description": "SSL certificate verification disabled",
|
||||
"recommendation": "Always verify SSL certificates in production",
|
||||
},
|
||||
# A07:2021 – Identification and Authentication Failures
|
||||
{
|
||||
"id": "SEC012",
|
||||
"name": "Hardcoded JWT Secret",
|
||||
"pattern": r'(?i)(jwt|token).*secret\s*[=:]\s*["\'][^"\']+["\']',
|
||||
"severity": "HIGH",
|
||||
"category": "A07:2021 Authentication Failures",
|
||||
"cwe": "CWE-798",
|
||||
"description": "JWT secret hardcoded in source code",
|
||||
"recommendation": "Use environment variables for JWT secrets",
|
||||
},
|
||||
# A08:2021 – Software and Data Integrity Failures
|
||||
{
|
||||
"id": "SEC013",
|
||||
"name": "Pickle Usage",
|
||||
"pattern": r"(?i)pickle\.(loads?|dumps?)\s*\(",
|
||||
"severity": "MEDIUM",
|
||||
"category": "A08:2021 Integrity Failures",
|
||||
"cwe": "CWE-502",
|
||||
"description": "Pickle can execute arbitrary code during deserialization",
|
||||
"recommendation": "Use JSON or other safe serialization formats",
|
||||
},
|
||||
# A09:2021 – Security Logging and Monitoring Failures
|
||||
{
|
||||
"id": "SEC014",
|
||||
"name": "Sensitive Data Logging",
|
||||
"pattern": r'(?i)(log|print|console\.log)\s*\([^)]*\b(password|token|secret|key)\b',
|
||||
"severity": "MEDIUM",
|
||||
"category": "A09:2021 Logging Failures",
|
||||
"cwe": "CWE-532",
|
||||
"description": "Potentially logging sensitive information",
|
||||
"recommendation": "Never log passwords, tokens, or secrets",
|
||||
},
|
||||
# A10:2021 – Server-Side Request Forgery
|
||||
{
|
||||
"id": "SEC015",
|
||||
"name": "SSRF Risk",
|
||||
"pattern": r'(?i)(requests\.(get|post|put)|urllib\.request\.urlopen|fetch)\s*\([^)]*\+',
|
||||
"severity": "MEDIUM",
|
||||
"category": "A10:2021 SSRF",
|
||||
"cwe": "CWE-918",
|
||||
"description": "URL constructed from user input may allow SSRF",
|
||||
"recommendation": "Validate and sanitize URLs, use allowlists",
|
||||
},
|
||||
# Additional common issues
|
||||
{
|
||||
"id": "SEC016",
|
||||
"name": "Hardcoded IP Address",
|
||||
"pattern": r'\b(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b',
|
||||
"severity": "LOW",
|
||||
"category": "Configuration",
|
||||
"cwe": "CWE-547",
|
||||
"description": "Hardcoded IP address found",
|
||||
"recommendation": "Use configuration files or environment variables for IP addresses",
|
||||
},
|
||||
{
|
||||
"id": "SEC017",
|
||||
"name": "TODO/FIXME Security",
|
||||
"pattern": r"(?i)(TODO|FIXME).*\b(security|auth|password|token|secret|vulnerable)\b",
|
||||
"severity": "MEDIUM",
|
||||
"category": "Code Quality",
|
||||
"cwe": None,
|
||||
"description": "Security-related TODO/FIXME comment found",
|
||||
"recommendation": "Address security-related TODO items before deployment",
|
||||
},
|
||||
]
|
||||
|
||||
def __init__(self, rules_file: str | None = None):
|
||||
"""Initialize scanner with rules.
|
||||
|
||||
Args:
|
||||
rules_file: Optional path to custom rules YAML file.
|
||||
"""
|
||||
self.rules = self.DEFAULT_RULES.copy()
|
||||
|
||||
if rules_file and os.path.exists(rules_file):
|
||||
try:
|
||||
with open(rules_file) as f:
|
||||
custom_rules = yaml.safe_load(f)
|
||||
if custom_rules and "rules" in custom_rules:
|
||||
self.rules.extend(custom_rules["rules"])
|
||||
except Exception:
|
||||
pass # Use defaults if custom rules fail to load
|
||||
|
||||
# Compile patterns for efficiency
|
||||
self._compiled_rules = []
|
||||
for rule in self.rules:
|
||||
try:
|
||||
self._compiled_rules.append(
|
||||
{**rule, "_pattern": re.compile(rule["pattern"])}
|
||||
)
|
||||
except re.error:
|
||||
pass # Skip invalid patterns
|
||||
|
||||
def scan_content(
|
||||
self,
|
||||
content: str,
|
||||
filename: str,
|
||||
) -> Iterator[SecurityFinding]:
|
||||
"""Scan content for security issues.
|
||||
|
||||
Args:
|
||||
content: File content to scan.
|
||||
filename: Name of the file (for reporting).
|
||||
|
||||
Yields:
|
||||
SecurityFinding for each detected issue.
|
||||
"""
|
||||
lines = content.splitlines()
|
||||
|
||||
for line_num, line in enumerate(lines, 1):
|
||||
for rule in self._compiled_rules:
|
||||
if rule["_pattern"].search(line):
|
||||
yield SecurityFinding(
|
||||
rule_id=rule["id"],
|
||||
rule_name=rule["name"],
|
||||
severity=rule["severity"],
|
||||
category=rule["category"],
|
||||
file=filename,
|
||||
line=line_num,
|
||||
code_snippet=line.strip()[:120],
|
||||
description=rule["description"],
|
||||
recommendation=rule["recommendation"],
|
||||
cwe=rule.get("cwe"),
|
||||
)
|
||||
|
||||
def scan_diff(self, diff: str) -> Iterator[SecurityFinding]:
|
||||
"""Scan a git diff for security issues.
|
||||
|
||||
Only scans added lines (lines starting with +).
|
||||
|
||||
Args:
|
||||
diff: Git diff content.
|
||||
|
||||
Yields:
|
||||
SecurityFinding for each detected issue.
|
||||
"""
|
||||
current_file = None
|
||||
current_line = 0
|
||||
|
||||
for line in diff.splitlines():
|
||||
# Track current file
|
||||
if line.startswith("diff --git"):
|
||||
match = re.search(r"b/(.+)$", line)
|
||||
if match:
|
||||
current_file = match.group(1)
|
||||
current_line = 0
|
||||
# Track line numbers
|
||||
elif line.startswith("@@"):
|
||||
match = re.search(r"\+(\d+)", line)
|
||||
if match:
|
||||
current_line = int(match.group(1)) - 1
|
||||
# Check added lines
|
||||
elif line.startswith("+") and not line.startswith("+++"):
|
||||
current_line += 1
|
||||
for finding in self.scan_content(line[1:], current_file or "unknown"):
|
||||
finding.line = current_line
|
||||
yield finding
|
||||
elif not line.startswith("-"):
|
||||
current_line += 1
|
||||
|
||||
def get_summary(self, findings: list[SecurityFinding]) -> dict:
|
||||
"""Get summary statistics for findings.
|
||||
|
||||
Args:
|
||||
findings: List of security findings.
|
||||
|
||||
Returns:
|
||||
Summary dictionary with counts by severity and category.
|
||||
"""
|
||||
summary = {
|
||||
"total": len(findings),
|
||||
"by_severity": {"HIGH": 0, "MEDIUM": 0, "LOW": 0},
|
||||
"by_category": {},
|
||||
}
|
||||
|
||||
for finding in findings:
|
||||
summary["by_severity"][finding.severity] = (
|
||||
summary["by_severity"].get(finding.severity, 0) + 1
|
||||
)
|
||||
summary["by_category"][finding.category] = (
|
||||
summary["by_category"].get(finding.category, 0) + 1
|
||||
)
|
||||
|
||||
return summary
|
||||
Reference in New Issue
Block a user