just why not
All checks were successful
AI Codebase Quality Review / ai-codebase-review (push) Successful in 39s

This commit is contained in:
2026-01-07 21:19:46 +01:00
parent a1fe47cdf4
commit e8d28225e0
24 changed files with 6431 additions and 250 deletions

View File

@@ -0,0 +1,14 @@
"""Compliance Module
Provides audit trail, compliance reporting, and regulatory checks.
"""
from compliance.audit_trail import AuditEvent, AuditLogger, AuditTrail
from compliance.codeowners import CodeownersChecker
__all__ = [
"AuditTrail",
"AuditLogger",
"AuditEvent",
"CodeownersChecker",
]

View File

@@ -0,0 +1,430 @@
"""Audit Trail
Provides comprehensive audit logging for compliance requirements.
Supports HIPAA, SOC2, and other regulatory frameworks.
"""
import hashlib
import json
import logging
import os
from dataclasses import asdict, dataclass, field
from datetime import datetime, timezone
from enum import Enum
from pathlib import Path
from typing import Any
class AuditAction(Enum):
"""Types of auditable actions."""
# Review actions
REVIEW_STARTED = "review_started"
REVIEW_COMPLETED = "review_completed"
REVIEW_FAILED = "review_failed"
# Security actions
SECURITY_SCAN_STARTED = "security_scan_started"
SECURITY_SCAN_COMPLETED = "security_scan_completed"
SECURITY_FINDING_DETECTED = "security_finding_detected"
SECURITY_FINDING_RESOLVED = "security_finding_resolved"
# Comment actions
COMMENT_POSTED = "comment_posted"
COMMENT_UPDATED = "comment_updated"
COMMENT_DELETED = "comment_deleted"
# Label actions
LABEL_ADDED = "label_added"
LABEL_REMOVED = "label_removed"
# Configuration actions
CONFIG_LOADED = "config_loaded"
CONFIG_CHANGED = "config_changed"
# Access actions
API_CALL = "api_call"
AUTHENTICATION = "authentication"
# Approval actions
APPROVAL_GRANTED = "approval_granted"
APPROVAL_REVOKED = "approval_revoked"
CHANGES_REQUESTED = "changes_requested"
@dataclass
class AuditEvent:
"""An auditable event."""
action: AuditAction
timestamp: str
actor: str
resource_type: str
resource_id: str
repository: str
details: dict[str, Any] = field(default_factory=dict)
outcome: str = "success"
error: str | None = None
correlation_id: str | None = None
checksum: str | None = None
def __post_init__(self):
"""Calculate checksum for integrity verification."""
if not self.checksum:
self.checksum = self._calculate_checksum()
def _calculate_checksum(self) -> str:
"""Calculate SHA-256 checksum of event data."""
data = {
"action": self.action.value
if isinstance(self.action, AuditAction)
else self.action,
"timestamp": self.timestamp,
"actor": self.actor,
"resource_type": self.resource_type,
"resource_id": self.resource_id,
"repository": self.repository,
"details": self.details,
"outcome": self.outcome,
"error": self.error,
}
json_str = json.dumps(data, sort_keys=True)
return hashlib.sha256(json_str.encode()).hexdigest()
def to_dict(self) -> dict:
"""Convert event to dictionary."""
data = asdict(self)
if isinstance(self.action, AuditAction):
data["action"] = self.action.value
return data
def to_json(self) -> str:
"""Convert event to JSON string."""
return json.dumps(self.to_dict())
class AuditLogger:
"""Logger for audit events."""
def __init__(
self,
log_file: str | None = None,
log_to_stdout: bool = False,
log_level: str = "INFO",
):
"""Initialize audit logger.
Args:
log_file: Path to audit log file.
log_to_stdout: Also log to stdout.
log_level: Logging level.
"""
self.log_file = log_file
self.log_to_stdout = log_to_stdout
self.logger = logging.getLogger("audit")
self.logger.setLevel(getattr(logging, log_level.upper(), logging.INFO))
# Clear existing handlers
self.logger.handlers = []
# Add file handler if specified
if log_file:
log_dir = os.path.dirname(log_file)
if log_dir:
os.makedirs(log_dir, exist_ok=True)
file_handler = logging.FileHandler(log_file)
file_handler.setFormatter(
logging.Formatter("%(message)s") # JSON lines format
)
self.logger.addHandler(file_handler)
# Add stdout handler if requested
if log_to_stdout:
stdout_handler = logging.StreamHandler()
stdout_handler.setFormatter(logging.Formatter("[AUDIT] %(message)s"))
self.logger.addHandler(stdout_handler)
def log(self, event: AuditEvent):
"""Log an audit event.
Args:
event: The audit event to log.
"""
self.logger.info(event.to_json())
def log_action(
self,
action: AuditAction,
actor: str,
resource_type: str,
resource_id: str,
repository: str,
details: dict | None = None,
outcome: str = "success",
error: str | None = None,
correlation_id: str | None = None,
):
"""Log an action as an audit event.
Args:
action: The action being performed.
actor: Who performed the action.
resource_type: Type of resource affected.
resource_id: ID of the resource.
repository: Repository context.
details: Additional details.
outcome: success, failure, or partial.
error: Error message if failed.
correlation_id: ID to correlate related events.
"""
event = AuditEvent(
action=action,
timestamp=datetime.now(timezone.utc).isoformat(),
actor=actor,
resource_type=resource_type,
resource_id=resource_id,
repository=repository,
details=details or {},
outcome=outcome,
error=error,
correlation_id=correlation_id,
)
self.log(event)
class AuditTrail:
"""High-level audit trail management."""
def __init__(self, config: dict):
"""Initialize audit trail.
Args:
config: Configuration dictionary.
"""
self.config = config
compliance_config = config.get("compliance", {})
audit_config = compliance_config.get("audit", {})
self.enabled = audit_config.get("enabled", False)
self.log_file = audit_config.get("log_file", "audit.log")
self.log_to_stdout = audit_config.get("log_to_stdout", False)
self.retention_days = audit_config.get("retention_days", 90)
if self.enabled:
self.logger = AuditLogger(
log_file=self.log_file,
log_to_stdout=self.log_to_stdout,
)
else:
self.logger = None
self._correlation_id = None
def set_correlation_id(self, correlation_id: str):
"""Set correlation ID for subsequent events.
Args:
correlation_id: ID to correlate related events.
"""
self._correlation_id = correlation_id
def log(
self,
action: AuditAction,
actor: str,
resource_type: str,
resource_id: str,
repository: str,
details: dict | None = None,
outcome: str = "success",
error: str | None = None,
):
"""Log an audit event.
Args:
action: The action being performed.
actor: Who performed the action.
resource_type: Type of resource (pr, issue, comment, etc).
resource_id: ID of the resource.
repository: Repository (owner/repo).
details: Additional details.
outcome: success, failure, or partial.
error: Error message if failed.
"""
if not self.enabled or not self.logger:
return
self.logger.log_action(
action=action,
actor=actor,
resource_type=resource_type,
resource_id=resource_id,
repository=repository,
details=details,
outcome=outcome,
error=error,
correlation_id=self._correlation_id,
)
def log_review_started(
self,
repository: str,
pr_number: int,
reviewer: str = "openrabbit",
):
"""Log that a review has started."""
self.log(
action=AuditAction.REVIEW_STARTED,
actor=reviewer,
resource_type="pull_request",
resource_id=str(pr_number),
repository=repository,
)
def log_review_completed(
self,
repository: str,
pr_number: int,
recommendation: str,
findings_count: int,
reviewer: str = "openrabbit",
):
"""Log that a review has completed."""
self.log(
action=AuditAction.REVIEW_COMPLETED,
actor=reviewer,
resource_type="pull_request",
resource_id=str(pr_number),
repository=repository,
details={
"recommendation": recommendation,
"findings_count": findings_count,
},
)
def log_security_finding(
self,
repository: str,
pr_number: int,
finding: dict,
scanner: str = "openrabbit",
):
"""Log a security finding."""
self.log(
action=AuditAction.SECURITY_FINDING_DETECTED,
actor=scanner,
resource_type="pull_request",
resource_id=str(pr_number),
repository=repository,
details={
"severity": finding.get("severity"),
"category": finding.get("category"),
"file": finding.get("file"),
"line": finding.get("line"),
"cwe": finding.get("cwe"),
},
)
def log_approval(
self,
repository: str,
pr_number: int,
approver: str,
approval_type: str = "ai",
):
"""Log an approval action."""
self.log(
action=AuditAction.APPROVAL_GRANTED,
actor=approver,
resource_type="pull_request",
resource_id=str(pr_number),
repository=repository,
details={"approval_type": approval_type},
)
def log_changes_requested(
self,
repository: str,
pr_number: int,
requester: str,
reason: str | None = None,
):
"""Log a changes requested action."""
self.log(
action=AuditAction.CHANGES_REQUESTED,
actor=requester,
resource_type="pull_request",
resource_id=str(pr_number),
repository=repository,
details={"reason": reason} if reason else {},
)
def generate_report(
self,
start_date: datetime | None = None,
end_date: datetime | None = None,
repository: str | None = None,
) -> dict:
"""Generate an audit report.
Args:
start_date: Start of reporting period.
end_date: End of reporting period.
repository: Filter by repository.
Returns:
Report dictionary with statistics and events.
"""
if not self.log_file or not os.path.exists(self.log_file):
return {"events": [], "statistics": {}}
events = []
with open(self.log_file) as f:
for line in f:
try:
event = json.loads(line.strip())
event_time = datetime.fromisoformat(
event["timestamp"].replace("Z", "+00:00")
)
# Apply filters
if start_date and event_time < start_date:
continue
if end_date and event_time > end_date:
continue
if repository and event.get("repository") != repository:
continue
events.append(event)
except (json.JSONDecodeError, KeyError):
continue
# Calculate statistics
action_counts = {}
outcome_counts = {"success": 0, "failure": 0, "partial": 0}
security_findings = 0
for event in events:
action = event.get("action", "unknown")
action_counts[action] = action_counts.get(action, 0) + 1
outcome = event.get("outcome", "success")
if outcome in outcome_counts:
outcome_counts[outcome] += 1
if action == "security_finding_detected":
security_findings += 1
return {
"events": events,
"statistics": {
"total_events": len(events),
"action_counts": action_counts,
"outcome_counts": outcome_counts,
"security_findings": security_findings,
},
"period": {
"start": start_date.isoformat() if start_date else None,
"end": end_date.isoformat() if end_date else None,
},
}

View File

@@ -0,0 +1,314 @@
"""CODEOWNERS Checker
Parses and validates CODEOWNERS files for compliance enforcement.
"""
import fnmatch
import logging
import os
import re
from dataclasses import dataclass
from pathlib import Path
@dataclass
class CodeOwnerRule:
"""A CODEOWNERS rule."""
pattern: str
owners: list[str]
line_number: int
is_negation: bool = False
def matches(self, path: str) -> bool:
"""Check if a path matches this rule.
Args:
path: File path to check.
Returns:
True if the path matches.
"""
path = path.lstrip("/")
pattern = self.pattern.lstrip("/")
# Handle directory patterns
if pattern.endswith("/"):
return path.startswith(pattern) or fnmatch.fnmatch(path, pattern + "*")
# Handle ** patterns
if "**" in pattern:
regex = pattern.replace("**", ".*").replace("*", "[^/]*")
return bool(re.match(f"^{regex}$", path))
# Standard fnmatch
return fnmatch.fnmatch(path, pattern) or fnmatch.fnmatch(path, f"**/{pattern}")
class CodeownersChecker:
"""Checker for CODEOWNERS file compliance."""
CODEOWNERS_LOCATIONS = [
"CODEOWNERS",
".github/CODEOWNERS",
".gitea/CODEOWNERS",
"docs/CODEOWNERS",
]
def __init__(self, repo_root: str | None = None):
"""Initialize CODEOWNERS checker.
Args:
repo_root: Repository root path.
"""
self.repo_root = repo_root or os.getcwd()
self.rules: list[CodeOwnerRule] = []
self.codeowners_path: str | None = None
self.logger = logging.getLogger(__name__)
self._load_codeowners()
def _load_codeowners(self):
"""Load CODEOWNERS file from repository."""
for location in self.CODEOWNERS_LOCATIONS:
path = os.path.join(self.repo_root, location)
if os.path.exists(path):
self.codeowners_path = path
self._parse_codeowners(path)
break
def _parse_codeowners(self, path: str):
"""Parse a CODEOWNERS file.
Args:
path: Path to CODEOWNERS file.
"""
with open(path) as f:
for line_num, line in enumerate(f, 1):
line = line.strip()
# Skip empty lines and comments
if not line or line.startswith("#"):
continue
# Parse pattern and owners
parts = line.split()
if len(parts) < 2:
continue
pattern = parts[0]
owners = parts[1:]
# Check for negation (optional syntax)
is_negation = pattern.startswith("!")
if is_negation:
pattern = pattern[1:]
self.rules.append(
CodeOwnerRule(
pattern=pattern,
owners=owners,
line_number=line_num,
is_negation=is_negation,
)
)
def get_owners(self, path: str) -> list[str]:
"""Get owners for a file path.
Args:
path: File path to check.
Returns:
List of owner usernames/teams.
"""
owners = []
# Apply rules in order (later rules override earlier ones)
for rule in self.rules:
if rule.matches(path):
if rule.is_negation:
owners = [] # Clear owners for negation
else:
owners = rule.owners
return owners
def get_owners_for_files(self, files: list[str]) -> dict[str, list[str]]:
"""Get owners for multiple files.
Args:
files: List of file paths.
Returns:
Dict mapping file paths to owner lists.
"""
return {f: self.get_owners(f) for f in files}
def get_required_reviewers(self, files: list[str]) -> set[str]:
"""Get all required reviewers for a set of files.
Args:
files: List of file paths.
Returns:
Set of all required reviewer usernames/teams.
"""
reviewers = set()
for f in files:
reviewers.update(self.get_owners(f))
return reviewers
def check_approval(
self,
files: list[str],
approvers: list[str],
) -> dict:
"""Check if files have required approvals.
Args:
files: List of changed files.
approvers: List of users who approved.
Returns:
Dict with approval status and missing approvers.
"""
required = self.get_required_reviewers(files)
approvers_set = set(approvers)
# Normalize @ prefixes
required_normalized = {r.lstrip("@") for r in required}
approvers_normalized = {a.lstrip("@") for a in approvers_set}
missing = required_normalized - approvers_normalized
# Check for team approvals (simplified - actual implementation
# would need API calls to check team membership)
teams = {r for r in missing if "/" in r}
missing_users = missing - teams
return {
"approved": len(missing_users) == 0,
"required_reviewers": list(required_normalized),
"actual_approvers": list(approvers_normalized),
"missing_approvers": list(missing_users),
"pending_teams": list(teams),
}
def get_coverage_report(self, files: list[str]) -> dict:
"""Generate a coverage report for files.
Args:
files: List of file paths.
Returns:
Coverage report with owned and unowned files.
"""
owned = []
unowned = []
for f in files:
owners = self.get_owners(f)
if owners:
owned.append({"file": f, "owners": owners})
else:
unowned.append(f)
return {
"total_files": len(files),
"owned_files": len(owned),
"unowned_files": len(unowned),
"coverage_percent": (len(owned) / len(files) * 100) if files else 0,
"owned": owned,
"unowned": unowned,
}
def validate_codeowners(self) -> dict:
"""Validate the CODEOWNERS file.
Returns:
Validation result with warnings and errors.
"""
if not self.codeowners_path:
return {
"valid": False,
"errors": ["No CODEOWNERS file found"],
"warnings": [],
}
errors = []
warnings = []
# Check for empty rules
for rule in self.rules:
if not rule.owners:
errors.append(
f"Line {rule.line_number}: Pattern '{rule.pattern}' has no owners"
)
# Check for invalid owner formats
for rule in self.rules:
for owner in rule.owners:
if not owner.startswith("@") and "/" not in owner:
warnings.append(
f"Line {rule.line_number}: Owner '{owner}' should start with @ or be a team (org/team)"
)
# Check for overlapping patterns
patterns_seen = {}
for rule in self.rules:
if rule.pattern in patterns_seen:
warnings.append(
f"Line {rule.line_number}: Pattern '{rule.pattern}' duplicates line {patterns_seen[rule.pattern]}"
)
patterns_seen[rule.pattern] = rule.line_number
return {
"valid": len(errors) == 0,
"errors": errors,
"warnings": warnings,
"rules_count": len(self.rules),
"file_path": self.codeowners_path,
}
@classmethod
def from_content(cls, content: str) -> "CodeownersChecker":
"""Create checker from CODEOWNERS content string.
Args:
content: CODEOWNERS file content.
Returns:
CodeownersChecker instance.
"""
checker = cls.__new__(cls)
checker.repo_root = None
checker.rules = []
checker.codeowners_path = "<string>"
checker.logger = logging.getLogger(__name__)
for line_num, line in enumerate(content.split("\n"), 1):
line = line.strip()
if not line or line.startswith("#"):
continue
parts = line.split()
if len(parts) < 2:
continue
pattern = parts[0]
owners = parts[1:]
is_negation = pattern.startswith("!")
if is_negation:
pattern = pattern[1:]
checker.rules.append(
CodeOwnerRule(
pattern=pattern,
owners=owners,
line_number=line_num,
is_negation=is_negation,
)
)
return checker