just why not

2026-01-07 21:19:46 +01:00
parent a1fe47cdf4
commit e8d28225e0
24 changed files with 6431 additions and 250 deletions
@@ -0,0 +1,14 @@
+"""Compliance Module
+
+Provides audit trail, compliance reporting, and regulatory checks.
+"""
+
+from compliance.audit_trail import AuditEvent, AuditLogger, AuditTrail
+from compliance.codeowners import CodeownersChecker
+
+__all__ = [
+    "AuditTrail",
+    "AuditLogger",
+    "AuditEvent",
+    "CodeownersChecker",
+]
@@ -0,0 +1,430 @@
+"""Audit Trail
+
+Provides comprehensive audit logging for compliance requirements.
+Supports HIPAA, SOC2, and other regulatory frameworks.
+"""
+
+import hashlib
+import json
+import logging
+import os
+from dataclasses import asdict, dataclass, field
+from datetime import datetime, timezone
+from enum import Enum
+from pathlib import Path
+from typing import Any
+
+
+class AuditAction(Enum):
+    """Types of auditable actions."""
+
+    # Review actions
+    REVIEW_STARTED = "review_started"
+    REVIEW_COMPLETED = "review_completed"
+    REVIEW_FAILED = "review_failed"
+
+    # Security actions
+    SECURITY_SCAN_STARTED = "security_scan_started"
+    SECURITY_SCAN_COMPLETED = "security_scan_completed"
+    SECURITY_FINDING_DETECTED = "security_finding_detected"
+    SECURITY_FINDING_RESOLVED = "security_finding_resolved"
+
+    # Comment actions
+    COMMENT_POSTED = "comment_posted"
+    COMMENT_UPDATED = "comment_updated"
+    COMMENT_DELETED = "comment_deleted"
+
+    # Label actions
+    LABEL_ADDED = "label_added"
+    LABEL_REMOVED = "label_removed"
+
+    # Configuration actions
+    CONFIG_LOADED = "config_loaded"
+    CONFIG_CHANGED = "config_changed"
+
+    # Access actions
+    API_CALL = "api_call"
+    AUTHENTICATION = "authentication"
+
+    # Approval actions
+    APPROVAL_GRANTED = "approval_granted"
+    APPROVAL_REVOKED = "approval_revoked"
+    CHANGES_REQUESTED = "changes_requested"
+
+
+@dataclass
+class AuditEvent:
+    """An auditable event."""
+
+    action: AuditAction
+    timestamp: str
+    actor: str
+    resource_type: str
+    resource_id: str
+    repository: str
+    details: dict[str, Any] = field(default_factory=dict)
+    outcome: str = "success"
+    error: str | None = None
+    correlation_id: str | None = None
+    checksum: str | None = None
+
+    def __post_init__(self):
+        """Calculate checksum for integrity verification."""
+        if not self.checksum:
+            self.checksum = self._calculate_checksum()
+
+    def _calculate_checksum(self) -> str:
+        """Calculate SHA-256 checksum of event data."""
+        data = {
+            "action": self.action.value
+            if isinstance(self.action, AuditAction)
+            else self.action,
+            "timestamp": self.timestamp,
+            "actor": self.actor,
+            "resource_type": self.resource_type,
+            "resource_id": self.resource_id,
+            "repository": self.repository,
+            "details": self.details,
+            "outcome": self.outcome,
+            "error": self.error,
+        }
+        json_str = json.dumps(data, sort_keys=True)
+        return hashlib.sha256(json_str.encode()).hexdigest()
+
+    def to_dict(self) -> dict:
+        """Convert event to dictionary."""
+        data = asdict(self)
+        if isinstance(self.action, AuditAction):
+            data["action"] = self.action.value
+        return data
+
+    def to_json(self) -> str:
+        """Convert event to JSON string."""
+        return json.dumps(self.to_dict())
+
+
+class AuditLogger:
+    """Logger for audit events."""
+
+    def __init__(
+        self,
+        log_file: str | None = None,
+        log_to_stdout: bool = False,
+        log_level: str = "INFO",
+    ):
+        """Initialize audit logger.
+
+        Args:
+            log_file: Path to audit log file.
+            log_to_stdout: Also log to stdout.
+            log_level: Logging level.
+        """
+        self.log_file = log_file
+        self.log_to_stdout = log_to_stdout
+        self.logger = logging.getLogger("audit")
+        self.logger.setLevel(getattr(logging, log_level.upper(), logging.INFO))
+
+        # Clear existing handlers
+        self.logger.handlers = []
+
+        # Add file handler if specified
+        if log_file:
+            log_dir = os.path.dirname(log_file)
+            if log_dir:
+                os.makedirs(log_dir, exist_ok=True)
+            file_handler = logging.FileHandler(log_file)
+            file_handler.setFormatter(
+                logging.Formatter("%(message)s")  # JSON lines format
+            )
+            self.logger.addHandler(file_handler)
+
+        # Add stdout handler if requested
+        if log_to_stdout:
+            stdout_handler = logging.StreamHandler()
+            stdout_handler.setFormatter(logging.Formatter("[AUDIT] %(message)s"))
+            self.logger.addHandler(stdout_handler)
+
+    def log(self, event: AuditEvent):
+        """Log an audit event.
+
+        Args:
+            event: The audit event to log.
+        """
+        self.logger.info(event.to_json())
+
+    def log_action(
+        self,
+        action: AuditAction,
+        actor: str,
+        resource_type: str,
+        resource_id: str,
+        repository: str,
+        details: dict | None = None,
+        outcome: str = "success",
+        error: str | None = None,
+        correlation_id: str | None = None,
+    ):
+        """Log an action as an audit event.
+
+        Args:
+            action: The action being performed.
+            actor: Who performed the action.
+            resource_type: Type of resource affected.
+            resource_id: ID of the resource.
+            repository: Repository context.
+            details: Additional details.
+            outcome: success, failure, or partial.
+            error: Error message if failed.
+            correlation_id: ID to correlate related events.
+        """
+        event = AuditEvent(
+            action=action,
+            timestamp=datetime.now(timezone.utc).isoformat(),
+            actor=actor,
+            resource_type=resource_type,
+            resource_id=resource_id,
+            repository=repository,
+            details=details or {},
+            outcome=outcome,
+            error=error,
+            correlation_id=correlation_id,
+        )
+        self.log(event)
+
+
+class AuditTrail:
+    """High-level audit trail management."""
+
+    def __init__(self, config: dict):
+        """Initialize audit trail.
+
+        Args:
+            config: Configuration dictionary.
+        """
+        self.config = config
+        compliance_config = config.get("compliance", {})
+        audit_config = compliance_config.get("audit", {})
+
+        self.enabled = audit_config.get("enabled", False)
+        self.log_file = audit_config.get("log_file", "audit.log")
+        self.log_to_stdout = audit_config.get("log_to_stdout", False)
+        self.retention_days = audit_config.get("retention_days", 90)
+
+        if self.enabled:
+            self.logger = AuditLogger(
+                log_file=self.log_file,
+                log_to_stdout=self.log_to_stdout,
+            )
+        else:
+            self.logger = None
+
+        self._correlation_id = None
+
+    def set_correlation_id(self, correlation_id: str):
+        """Set correlation ID for subsequent events.
+
+        Args:
+            correlation_id: ID to correlate related events.
+        """
+        self._correlation_id = correlation_id
+
+    def log(
+        self,
+        action: AuditAction,
+        actor: str,
+        resource_type: str,
+        resource_id: str,
+        repository: str,
+        details: dict | None = None,
+        outcome: str = "success",
+        error: str | None = None,
+    ):
+        """Log an audit event.
+
+        Args:
+            action: The action being performed.
+            actor: Who performed the action.
+            resource_type: Type of resource (pr, issue, comment, etc).
+            resource_id: ID of the resource.
+            repository: Repository (owner/repo).
+            details: Additional details.
+            outcome: success, failure, or partial.
+            error: Error message if failed.
+        """
+        if not self.enabled or not self.logger:
+            return
+
+        self.logger.log_action(
+            action=action,
+            actor=actor,
+            resource_type=resource_type,
+            resource_id=resource_id,
+            repository=repository,
+            details=details,
+            outcome=outcome,
+            error=error,
+            correlation_id=self._correlation_id,
+        )
+
+    def log_review_started(
+        self,
+        repository: str,
+        pr_number: int,
+        reviewer: str = "openrabbit",
+    ):
+        """Log that a review has started."""
+        self.log(
+            action=AuditAction.REVIEW_STARTED,
+            actor=reviewer,
+            resource_type="pull_request",
+            resource_id=str(pr_number),
+            repository=repository,
+        )
+
+    def log_review_completed(
+        self,
+        repository: str,
+        pr_number: int,
+        recommendation: str,
+        findings_count: int,
+        reviewer: str = "openrabbit",
+    ):
+        """Log that a review has completed."""
+        self.log(
+            action=AuditAction.REVIEW_COMPLETED,
+            actor=reviewer,
+            resource_type="pull_request",
+            resource_id=str(pr_number),
+            repository=repository,
+            details={
+                "recommendation": recommendation,
+                "findings_count": findings_count,
+            },
+        )
+
+    def log_security_finding(
+        self,
+        repository: str,
+        pr_number: int,
+        finding: dict,
+        scanner: str = "openrabbit",
+    ):
+        """Log a security finding."""
+        self.log(
+            action=AuditAction.SECURITY_FINDING_DETECTED,
+            actor=scanner,
+            resource_type="pull_request",
+            resource_id=str(pr_number),
+            repository=repository,
+            details={
+                "severity": finding.get("severity"),
+                "category": finding.get("category"),
+                "file": finding.get("file"),
+                "line": finding.get("line"),
+                "cwe": finding.get("cwe"),
+            },
+        )
+
+    def log_approval(
+        self,
+        repository: str,
+        pr_number: int,
+        approver: str,
+        approval_type: str = "ai",
+    ):
+        """Log an approval action."""
+        self.log(
+            action=AuditAction.APPROVAL_GRANTED,
+            actor=approver,
+            resource_type="pull_request",
+            resource_id=str(pr_number),
+            repository=repository,
+            details={"approval_type": approval_type},
+        )
+
+    def log_changes_requested(
+        self,
+        repository: str,
+        pr_number: int,
+        requester: str,
+        reason: str | None = None,
+    ):
+        """Log a changes requested action."""
+        self.log(
+            action=AuditAction.CHANGES_REQUESTED,
+            actor=requester,
+            resource_type="pull_request",
+            resource_id=str(pr_number),
+            repository=repository,
+            details={"reason": reason} if reason else {},
+        )
+
+    def generate_report(
+        self,
+        start_date: datetime | None = None,
+        end_date: datetime | None = None,
+        repository: str | None = None,
+    ) -> dict:
+        """Generate an audit report.
+
+        Args:
+            start_date: Start of reporting period.
+            end_date: End of reporting period.
+            repository: Filter by repository.
+
+        Returns:
+            Report dictionary with statistics and events.
+        """
+        if not self.log_file or not os.path.exists(self.log_file):
+            return {"events": [], "statistics": {}}
+
+        events = []
+        with open(self.log_file) as f:
+            for line in f:
+                try:
+                    event = json.loads(line.strip())
+                    event_time = datetime.fromisoformat(
+                        event["timestamp"].replace("Z", "+00:00")
+                    )
+
+                    # Apply filters
+                    if start_date and event_time < start_date:
+                        continue
+                    if end_date and event_time > end_date:
+                        continue
+                    if repository and event.get("repository") != repository:
+                        continue
+
+                    events.append(event)
+                except (json.JSONDecodeError, KeyError):
+                    continue
+
+        # Calculate statistics
+        action_counts = {}
+        outcome_counts = {"success": 0, "failure": 0, "partial": 0}
+        security_findings = 0
+
+        for event in events:
+            action = event.get("action", "unknown")
+            action_counts[action] = action_counts.get(action, 0) + 1
+
+            outcome = event.get("outcome", "success")
+            if outcome in outcome_counts:
+                outcome_counts[outcome] += 1
+
+            if action == "security_finding_detected":
+                security_findings += 1
+
+        return {
+            "events": events,
+            "statistics": {
+                "total_events": len(events),
+                "action_counts": action_counts,
+                "outcome_counts": outcome_counts,
+                "security_findings": security_findings,
+            },
+            "period": {
+                "start": start_date.isoformat() if start_date else None,
+                "end": end_date.isoformat() if end_date else None,
+            },
+        }
@@ -0,0 +1,314 @@
+"""CODEOWNERS Checker
+
+Parses and validates CODEOWNERS files for compliance enforcement.
+"""
+
+import fnmatch
+import logging
+import os
+import re
+from dataclasses import dataclass
+from pathlib import Path
+
+
+@dataclass
+class CodeOwnerRule:
+    """A CODEOWNERS rule."""
+
+    pattern: str
+    owners: list[str]
+    line_number: int
+    is_negation: bool = False
+
+    def matches(self, path: str) -> bool:
+        """Check if a path matches this rule.
+
+        Args:
+            path: File path to check.
+
+        Returns:
+            True if the path matches.
+        """
+        path = path.lstrip("/")
+        pattern = self.pattern.lstrip("/")
+
+        # Handle directory patterns
+        if pattern.endswith("/"):
+            return path.startswith(pattern) or fnmatch.fnmatch(path, pattern + "*")
+
+        # Handle ** patterns
+        if "**" in pattern:
+            regex = pattern.replace("**", ".*").replace("*", "[^/]*")
+            return bool(re.match(f"^{regex}$", path))
+
+        # Standard fnmatch
+        return fnmatch.fnmatch(path, pattern) or fnmatch.fnmatch(path, f"**/{pattern}")
+
+
+class CodeownersChecker:
+    """Checker for CODEOWNERS file compliance."""
+
+    CODEOWNERS_LOCATIONS = [
+        "CODEOWNERS",
+        ".github/CODEOWNERS",
+        ".gitea/CODEOWNERS",
+        "docs/CODEOWNERS",
+    ]
+
+    def __init__(self, repo_root: str | None = None):
+        """Initialize CODEOWNERS checker.
+
+        Args:
+            repo_root: Repository root path.
+        """
+        self.repo_root = repo_root or os.getcwd()
+        self.rules: list[CodeOwnerRule] = []
+        self.codeowners_path: str | None = None
+        self.logger = logging.getLogger(__name__)
+
+        self._load_codeowners()
+
+    def _load_codeowners(self):
+        """Load CODEOWNERS file from repository."""
+        for location in self.CODEOWNERS_LOCATIONS:
+            path = os.path.join(self.repo_root, location)
+            if os.path.exists(path):
+                self.codeowners_path = path
+                self._parse_codeowners(path)
+                break
+
+    def _parse_codeowners(self, path: str):
+        """Parse a CODEOWNERS file.
+
+        Args:
+            path: Path to CODEOWNERS file.
+        """
+        with open(path) as f:
+            for line_num, line in enumerate(f, 1):
+                line = line.strip()
+
+                # Skip empty lines and comments
+                if not line or line.startswith("#"):
+                    continue
+
+                # Parse pattern and owners
+                parts = line.split()
+                if len(parts) < 2:
+                    continue
+
+                pattern = parts[0]
+                owners = parts[1:]
+
+                # Check for negation (optional syntax)
+                is_negation = pattern.startswith("!")
+                if is_negation:
+                    pattern = pattern[1:]
+
+                self.rules.append(
+                    CodeOwnerRule(
+                        pattern=pattern,
+                        owners=owners,
+                        line_number=line_num,
+                        is_negation=is_negation,
+                    )
+                )
+
+    def get_owners(self, path: str) -> list[str]:
+        """Get owners for a file path.
+
+        Args:
+            path: File path to check.
+
+        Returns:
+            List of owner usernames/teams.
+        """
+        owners = []
+
+        # Apply rules in order (later rules override earlier ones)
+        for rule in self.rules:
+            if rule.matches(path):
+                if rule.is_negation:
+                    owners = []  # Clear owners for negation
+                else:
+                    owners = rule.owners
+
+        return owners
+
+    def get_owners_for_files(self, files: list[str]) -> dict[str, list[str]]:
+        """Get owners for multiple files.
+
+        Args:
+            files: List of file paths.
+
+        Returns:
+            Dict mapping file paths to owner lists.
+        """
+        return {f: self.get_owners(f) for f in files}
+
+    def get_required_reviewers(self, files: list[str]) -> set[str]:
+        """Get all required reviewers for a set of files.
+
+        Args:
+            files: List of file paths.
+
+        Returns:
+            Set of all required reviewer usernames/teams.
+        """
+        reviewers = set()
+        for f in files:
+            reviewers.update(self.get_owners(f))
+        return reviewers
+
+    def check_approval(
+        self,
+        files: list[str],
+        approvers: list[str],
+    ) -> dict:
+        """Check if files have required approvals.
+
+        Args:
+            files: List of changed files.
+            approvers: List of users who approved.
+
+        Returns:
+            Dict with approval status and missing approvers.
+        """
+        required = self.get_required_reviewers(files)
+        approvers_set = set(approvers)
+
+        # Normalize @ prefixes
+        required_normalized = {r.lstrip("@") for r in required}
+        approvers_normalized = {a.lstrip("@") for a in approvers_set}
+
+        missing = required_normalized - approvers_normalized
+
+        # Check for team approvals (simplified - actual implementation
+        # would need API calls to check team membership)
+        teams = {r for r in missing if "/" in r}
+        missing_users = missing - teams
+
+        return {
+            "approved": len(missing_users) == 0,
+            "required_reviewers": list(required_normalized),
+            "actual_approvers": list(approvers_normalized),
+            "missing_approvers": list(missing_users),
+            "pending_teams": list(teams),
+        }
+
+    def get_coverage_report(self, files: list[str]) -> dict:
+        """Generate a coverage report for files.
+
+        Args:
+            files: List of file paths.
+
+        Returns:
+            Coverage report with owned and unowned files.
+        """
+        owned = []
+        unowned = []
+
+        for f in files:
+            owners = self.get_owners(f)
+            if owners:
+                owned.append({"file": f, "owners": owners})
+            else:
+                unowned.append(f)
+
+        return {
+            "total_files": len(files),
+            "owned_files": len(owned),
+            "unowned_files": len(unowned),
+            "coverage_percent": (len(owned) / len(files) * 100) if files else 0,
+            "owned": owned,
+            "unowned": unowned,
+        }
+
+    def validate_codeowners(self) -> dict:
+        """Validate the CODEOWNERS file.
+
+        Returns:
+            Validation result with warnings and errors.
+        """
+        if not self.codeowners_path:
+            return {
+                "valid": False,
+                "errors": ["No CODEOWNERS file found"],
+                "warnings": [],
+            }
+
+        errors = []
+        warnings = []
+
+        # Check for empty rules
+        for rule in self.rules:
+            if not rule.owners:
+                errors.append(
+                    f"Line {rule.line_number}: Pattern '{rule.pattern}' has no owners"
+                )
+
+        # Check for invalid owner formats
+        for rule in self.rules:
+            for owner in rule.owners:
+                if not owner.startswith("@") and "/" not in owner:
+                    warnings.append(
+                        f"Line {rule.line_number}: Owner '{owner}' should start with @ or be a team (org/team)"
+                    )
+
+        # Check for overlapping patterns
+        patterns_seen = {}
+        for rule in self.rules:
+            if rule.pattern in patterns_seen:
+                warnings.append(
+                    f"Line {rule.line_number}: Pattern '{rule.pattern}' duplicates line {patterns_seen[rule.pattern]}"
+                )
+            patterns_seen[rule.pattern] = rule.line_number
+
+        return {
+            "valid": len(errors) == 0,
+            "errors": errors,
+            "warnings": warnings,
+            "rules_count": len(self.rules),
+            "file_path": self.codeowners_path,
+        }
+
+    @classmethod
+    def from_content(cls, content: str) -> "CodeownersChecker":
+        """Create checker from CODEOWNERS content string.
+
+        Args:
+            content: CODEOWNERS file content.
+
+        Returns:
+            CodeownersChecker instance.
+        """
+        checker = cls.__new__(cls)
+        checker.repo_root = None
+        checker.rules = []
+        checker.codeowners_path = "<string>"
+        checker.logger = logging.getLogger(__name__)
+
+        for line_num, line in enumerate(content.split("\n"), 1):
+            line = line.strip()
+            if not line or line.startswith("#"):
+                continue
+
+            parts = line.split()
+            if len(parts) < 2:
+                continue
+
+            pattern = parts[0]
+            owners = parts[1:]
+            is_negation = pattern.startswith("!")
+            if is_negation:
+                pattern = pattern[1:]
+
+            checker.rules.append(
+                CodeOwnerRule(
+                    pattern=pattern,
+                    owners=owners,
+                    line_number=line_num,
+                    is_negation=is_negation,
+                )
+            )
+
+        return checker