first commit

2025-12-21 13:42:30 +01:00
parent 823b825acb
commit f9b24fe248
47 changed files with 8222 additions and 1 deletions
@@ -0,0 +1,9 @@
+"""Security Scanning Package
+
+This package contains security scanning utilities for
+detecting vulnerabilities in code.
+"""
+
+from security.security_scanner import SecurityScanner
+
+__all__ = ["SecurityScanner"]
@@ -0,0 +1,335 @@
+"""Security Scanner
+
+Pattern-based security vulnerability detection for code analysis.
+Covers OWASP Top 10 and common security anti-patterns.
+"""
+
+import re
+from dataclasses import dataclass
+from typing import Iterator
+
+import yaml
+import os
+
+
+@dataclass
+class SecurityFinding:
+    """A single security finding."""
+
+    rule_id: str
+    rule_name: str
+    severity: str  # HIGH, MEDIUM, LOW
+    category: str  # OWASP category
+    file: str
+    line: int
+    code_snippet: str
+    description: str
+    recommendation: str
+    cwe: str | None = None  # CWE reference
+
+
+class SecurityScanner:
+    """Security scanner using pattern matching and rules."""
+
+    # Default rules covering OWASP Top 10
+    DEFAULT_RULES = [
+        # A01:2021 – Broken Access Control
+        {
+            "id": "SEC001",
+            "name": "Hardcoded Credentials",
+            "pattern": r'(?i)(password|passwd|pwd|secret|api_key|apikey|token|auth_token)\s*[=:]\s*["\'][^"\']{4,}["\']',
+            "severity": "HIGH",
+            "category": "A01:2021 Broken Access Control",
+            "cwe": "CWE-798",
+            "description": "Hardcoded credentials detected in source code",
+            "recommendation": "Use environment variables or a secrets management system",
+        },
+        {
+            "id": "SEC002",
+            "name": "Exposed Private Key",
+            "pattern": r"-----BEGIN\s+(RSA\s+)?PRIVATE\s+KEY-----",
+            "severity": "HIGH",
+            "category": "A01:2021 Broken Access Control",
+            "cwe": "CWE-321",
+            "description": "Private key embedded in source code",
+            "recommendation": "Never commit private keys. Use secure key management",
+        },
+        # A02:2021 – Cryptographic Failures
+        {
+            "id": "SEC003",
+            "name": "Weak Crypto Algorithm",
+            "pattern": r"(?i)\b(md5|sha1)\s*\(",
+            "severity": "MEDIUM",
+            "category": "A02:2021 Cryptographic Failures",
+            "cwe": "CWE-328",
+            "description": "Use of weak cryptographic hash function",
+            "recommendation": "Use SHA-256 or stronger hashing algorithms",
+        },
+        {
+            "id": "SEC004",
+            "name": "Insecure Random",
+            "pattern": r"(?i)\brandom\.(random|randint|choice|randrange)\s*\(",
+            "severity": "MEDIUM",
+            "category": "A02:2021 Cryptographic Failures",
+            "cwe": "CWE-330",
+            "description": "Use of non-cryptographic random number generator for security purposes",
+            "recommendation": "Use secrets module or os.urandom() for security-critical randomness",
+        },
+        # A03:2021 – Injection
+        {
+            "id": "SEC005",
+            "name": "SQL Injection",
+            "pattern": r'(?i)(execute|query|cursor\.execute)\s*\([^)]*(%s|%d|\{|\+)[^)]*\)',
+            "severity": "HIGH",
+            "category": "A03:2021 Injection",
+            "cwe": "CWE-89",
+            "description": "Potential SQL injection through string formatting",
+            "recommendation": "Use parameterized queries with placeholders",
+        },
+        {
+            "id": "SEC006",
+            "name": "Command Injection",
+            "pattern": r"(?i)(os\.system|subprocess\.call|subprocess\.run)\s*\([^)]*(\+|format|%)[^)]*\)",
+            "severity": "HIGH",
+            "category": "A03:2021 Injection",
+            "cwe": "CWE-78",
+            "description": "Potential command injection through string concatenation",
+            "recommendation": "Use subprocess with shell=False and pass arguments as list",
+        },
+        {
+            "id": "SEC007",
+            "name": "Eval Usage",
+            "pattern": r"\beval\s*\(",
+            "severity": "HIGH",
+            "category": "A03:2021 Injection",
+            "cwe": "CWE-95",
+            "description": "Use of eval() can lead to code injection",
+            "recommendation": "Avoid eval(). Use ast.literal_eval() for data or safer alternatives",
+        },
+        {
+            "id": "SEC008",
+            "name": "XSS Risk",
+            "pattern": r'(?i)(innerHTML|outerHTML|document\.write)\s*=',
+            "severity": "MEDIUM",
+            "category": "A03:2021 Injection",
+            "cwe": "CWE-79",
+            "description": "Direct DOM manipulation may allow XSS",
+            "recommendation": "Use textContent or proper sanitization libraries",
+        },
+        # A04:2021 – Insecure Design
+        {
+            "id": "SEC009",
+            "name": "Debug Mode",
+            "pattern": r"(?i)(debug\s*=\s*true|DEBUG\s*=\s*True|\.setLevel\(.*DEBUG\))",
+            "severity": "MEDIUM",
+            "category": "A04:2021 Insecure Design",
+            "cwe": "CWE-489",
+            "description": "Debug mode enabled in code",
+            "recommendation": "Ensure debug mode is disabled in production",
+        },
+        # A05:2021 – Security Misconfiguration
+        {
+            "id": "SEC010",
+            "name": "CORS Wildcard",
+            "pattern": r'(?i)(access-control-allow-origin|cors.*origin)\s*[=:]\s*["\']?\*',
+            "severity": "MEDIUM",
+            "category": "A05:2021 Security Misconfiguration",
+            "cwe": "CWE-942",
+            "description": "CORS configured to allow all origins",
+            "recommendation": "Specify allowed origins explicitly",
+        },
+        {
+            "id": "SEC011",
+            "name": "SSL Verification Disabled",
+            "pattern": r"(?i)(verify\s*=\s*False|CERT_NONE|ssl\._create_unverified_context)",
+            "severity": "HIGH",
+            "category": "A05:2021 Security Misconfiguration",
+            "cwe": "CWE-295",
+            "description": "SSL certificate verification disabled",
+            "recommendation": "Always verify SSL certificates in production",
+        },
+        # A07:2021 – Identification and Authentication Failures
+        {
+            "id": "SEC012",
+            "name": "Hardcoded JWT Secret",
+            "pattern": r'(?i)(jwt|token).*secret\s*[=:]\s*["\'][^"\']+["\']',
+            "severity": "HIGH",
+            "category": "A07:2021 Authentication Failures",
+            "cwe": "CWE-798",
+            "description": "JWT secret hardcoded in source code",
+            "recommendation": "Use environment variables for JWT secrets",
+        },
+        # A08:2021 – Software and Data Integrity Failures
+        {
+            "id": "SEC013",
+            "name": "Pickle Usage",
+            "pattern": r"(?i)pickle\.(loads?|dumps?)\s*\(",
+            "severity": "MEDIUM",
+            "category": "A08:2021 Integrity Failures",
+            "cwe": "CWE-502",
+            "description": "Pickle can execute arbitrary code during deserialization",
+            "recommendation": "Use JSON or other safe serialization formats",
+        },
+        # A09:2021 – Security Logging and Monitoring Failures
+        {
+            "id": "SEC014",
+            "name": "Sensitive Data Logging",
+            "pattern": r'(?i)(log|print|console\.log)\s*\([^)]*\b(password|token|secret|key)\b',
+            "severity": "MEDIUM",
+            "category": "A09:2021 Logging Failures",
+            "cwe": "CWE-532",
+            "description": "Potentially logging sensitive information",
+            "recommendation": "Never log passwords, tokens, or secrets",
+        },
+        # A10:2021 – Server-Side Request Forgery
+        {
+            "id": "SEC015",
+            "name": "SSRF Risk",
+            "pattern": r'(?i)(requests\.(get|post|put)|urllib\.request\.urlopen|fetch)\s*\([^)]*\+',
+            "severity": "MEDIUM",
+            "category": "A10:2021 SSRF",
+            "cwe": "CWE-918",
+            "description": "URL constructed from user input may allow SSRF",
+            "recommendation": "Validate and sanitize URLs, use allowlists",
+        },
+        # Additional common issues
+        {
+            "id": "SEC016",
+            "name": "Hardcoded IP Address",
+            "pattern": r'\b(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b',
+            "severity": "LOW",
+            "category": "Configuration",
+            "cwe": "CWE-547",
+            "description": "Hardcoded IP address found",
+            "recommendation": "Use configuration files or environment variables for IP addresses",
+        },
+        {
+            "id": "SEC017",
+            "name": "TODO/FIXME Security",
+            "pattern": r"(?i)(TODO|FIXME).*\b(security|auth|password|token|secret|vulnerable)\b",
+            "severity": "MEDIUM",
+            "category": "Code Quality",
+            "cwe": None,
+            "description": "Security-related TODO/FIXME comment found",
+            "recommendation": "Address security-related TODO items before deployment",
+        },
+    ]
+
+    def __init__(self, rules_file: str | None = None):
+        """Initialize scanner with rules.
+
+        Args:
+            rules_file: Optional path to custom rules YAML file.
+        """
+        self.rules = self.DEFAULT_RULES.copy()
+
+        if rules_file and os.path.exists(rules_file):
+            try:
+                with open(rules_file) as f:
+                    custom_rules = yaml.safe_load(f)
+                    if custom_rules and "rules" in custom_rules:
+                        self.rules.extend(custom_rules["rules"])
+            except Exception:
+                pass  # Use defaults if custom rules fail to load
+
+        # Compile patterns for efficiency
+        self._compiled_rules = []
+        for rule in self.rules:
+            try:
+                self._compiled_rules.append(
+                    {**rule, "_pattern": re.compile(rule["pattern"])}
+                )
+            except re.error:
+                pass  # Skip invalid patterns
+
+    def scan_content(
+        self,
+        content: str,
+        filename: str,
+    ) -> Iterator[SecurityFinding]:
+        """Scan content for security issues.
+
+        Args:
+            content: File content to scan.
+            filename: Name of the file (for reporting).
+
+        Yields:
+            SecurityFinding for each detected issue.
+        """
+        lines = content.splitlines()
+
+        for line_num, line in enumerate(lines, 1):
+            for rule in self._compiled_rules:
+                if rule["_pattern"].search(line):
+                    yield SecurityFinding(
+                        rule_id=rule["id"],
+                        rule_name=rule["name"],
+                        severity=rule["severity"],
+                        category=rule["category"],
+                        file=filename,
+                        line=line_num,
+                        code_snippet=line.strip()[:120],
+                        description=rule["description"],
+                        recommendation=rule["recommendation"],
+                        cwe=rule.get("cwe"),
+                    )
+
+    def scan_diff(self, diff: str) -> Iterator[SecurityFinding]:
+        """Scan a git diff for security issues.
+
+        Only scans added lines (lines starting with +).
+
+        Args:
+            diff: Git diff content.
+
+        Yields:
+            SecurityFinding for each detected issue.
+        """
+        current_file = None
+        current_line = 0
+
+        for line in diff.splitlines():
+            # Track current file
+            if line.startswith("diff --git"):
+                match = re.search(r"b/(.+)$", line)
+                if match:
+                    current_file = match.group(1)
+                    current_line = 0
+            # Track line numbers
+            elif line.startswith("@@"):
+                match = re.search(r"\+(\d+)", line)
+                if match:
+                    current_line = int(match.group(1)) - 1
+            # Check added lines
+            elif line.startswith("+") and not line.startswith("+++"):
+                current_line += 1
+                for finding in self.scan_content(line[1:], current_file or "unknown"):
+                    finding.line = current_line
+                    yield finding
+            elif not line.startswith("-"):
+                current_line += 1
+
+    def get_summary(self, findings: list[SecurityFinding]) -> dict:
+        """Get summary statistics for findings.
+
+        Args:
+            findings: List of security findings.
+
+        Returns:
+            Summary dictionary with counts by severity and category.
+        """
+        summary = {
+            "total": len(findings),
+            "by_severity": {"HIGH": 0, "MEDIUM": 0, "LOW": 0},
+            "by_category": {},
+        }
+
+        for finding in findings:
+            summary["by_severity"][finding.severity] = (
+                summary["by_severity"].get(finding.severity, 0) + 1
+            )
+            summary["by_category"][finding.category] = (
+                summary["by_category"].get(finding.category, 0) + 1
+            )
+
+        return summary