first commit

This commit is contained in:
2025-12-21 13:42:30 +01:00
parent 823b825acb
commit f9b24fe248
47 changed files with 8222 additions and 1 deletions

View File

@@ -0,0 +1,10 @@
"""Enterprise Features Package
This package contains enterprise-grade features like
audit logging and metrics collection.
"""
from enterprise.audit_logger import AuditLogger
from enterprise.metrics import MetricsCollector
__all__ = ["AuditLogger", "MetricsCollector"]

View File

@@ -0,0 +1,303 @@
"""Audit Logger
Enterprise audit logging for tracking all AI agent actions,
decisions, and interactions for compliance and debugging.
"""
import json
import logging
import os
from datetime import datetime
from pathlib import Path
from typing import Any
class AuditLogger:
"""Audit logger for enterprise compliance."""
def __init__(
self,
log_path: str | None = None,
enabled: bool = True,
):
"""Initialize the audit logger.
Args:
log_path: Directory to write audit logs.
enabled: Whether audit logging is enabled.
"""
self.enabled = enabled
self.log_path = Path(
log_path or os.environ.get("AI_AUDIT_PATH", "/var/log/ai-review/")
)
self.logger = logging.getLogger("audit")
if self.enabled:
self._ensure_log_dir()
def _ensure_log_dir(self):
"""Ensure the log directory exists."""
try:
self.log_path.mkdir(parents=True, exist_ok=True)
except Exception as e:
self.logger.warning(f"Could not create audit log directory: {e}")
self.enabled = False
def _get_log_file(self) -> Path:
"""Get the current log file path (daily rotation)."""
date_str = datetime.utcnow().strftime("%Y-%m-%d")
return self.log_path / f"audit-{date_str}.jsonl"
def log(
self,
action: str,
agent: str,
owner: str,
repo: str,
details: dict[str, Any] | None = None,
success: bool = True,
error: str | None = None,
):
"""Log an audit event.
Args:
action: Action performed (e.g., "review_pr", "triage_issue").
agent: Agent name that performed the action.
owner: Repository owner.
repo: Repository name.
details: Additional details about the action.
success: Whether the action succeeded.
error: Error message if failed.
"""
if not self.enabled:
return
event = {
"timestamp": datetime.utcnow().isoformat() + "Z",
"action": action,
"agent": agent,
"repository": f"{owner}/{repo}",
"success": success,
"details": details or {},
}
if error:
event["error"] = error
try:
log_file = self._get_log_file()
with open(log_file, "a") as f:
f.write(json.dumps(event) + "\n")
except Exception as e:
self.logger.error(f"Failed to write audit log: {e}")
def log_llm_call(
self,
agent: str,
owner: str,
repo: str,
provider: str,
model: str,
tokens_used: int | None = None,
duration_ms: int | None = None,
):
"""Log an LLM API call.
Args:
agent: Agent making the call.
owner: Repository owner.
repo: Repository name.
provider: LLM provider used.
model: Model name.
tokens_used: Number of tokens consumed.
duration_ms: Call duration in milliseconds.
"""
self.log(
action="llm_call",
agent=agent,
owner=owner,
repo=repo,
details={
"provider": provider,
"model": model,
"tokens_used": tokens_used,
"duration_ms": duration_ms,
},
)
def log_comment_posted(
self,
agent: str,
owner: str,
repo: str,
issue_number: int,
comment_type: str,
):
"""Log a comment being posted.
Args:
agent: Agent posting the comment.
owner: Repository owner.
repo: Repository name.
issue_number: Issue or PR number.
comment_type: Type of comment (triage, review, response).
"""
self.log(
action="comment_posted",
agent=agent,
owner=owner,
repo=repo,
details={
"issue_number": issue_number,
"comment_type": comment_type,
},
)
def log_labels_applied(
self,
agent: str,
owner: str,
repo: str,
issue_number: int,
labels: list[str],
):
"""Log labels being applied.
Args:
agent: Agent applying labels.
owner: Repository owner.
repo: Repository name.
issue_number: Issue or PR number.
labels: Labels applied.
"""
self.log(
action="labels_applied",
agent=agent,
owner=owner,
repo=repo,
details={
"issue_number": issue_number,
"labels": labels,
},
)
def get_logs(
self,
start_date: str | None = None,
end_date: str | None = None,
action: str | None = None,
repository: str | None = None,
) -> list[dict]:
"""Retrieve audit logs with optional filtering.
Args:
start_date: Start date (YYYY-MM-DD).
end_date: End date (YYYY-MM-DD).
action: Filter by action type.
repository: Filter by repository (owner/repo).
Returns:
List of audit log entries.
"""
if not self.enabled:
return []
logs = []
log_files = sorted(self.log_path.glob("audit-*.jsonl"))
for log_file in log_files:
# Date filter on filename
file_date = log_file.stem.replace("audit-", "")
if start_date and file_date < start_date:
continue
if end_date and file_date > end_date:
continue
try:
with open(log_file) as f:
for line in f:
try:
entry = json.loads(line.strip())
# Apply filters
if action and entry.get("action") != action:
continue
if repository and entry.get("repository") != repository:
continue
logs.append(entry)
except json.JSONDecodeError:
continue
except Exception:
continue
return logs
def generate_report(
self,
start_date: str | None = None,
end_date: str | None = None,
) -> dict:
"""Generate a summary report of audit activity.
Args:
start_date: Report start date.
end_date: Report end date.
Returns:
Summary report dictionary.
"""
logs = self.get_logs(start_date=start_date, end_date=end_date)
report = {
"period": {
"start": start_date or "all",
"end": end_date or "all",
},
"total_events": len(logs),
"by_action": {},
"by_repository": {},
"by_agent": {},
"success_rate": 0.0,
"llm_usage": {
"total_calls": 0,
"total_tokens": 0,
},
}
success_count = 0
for log in logs:
action = log.get("action", "unknown")
repo = log.get("repository", "unknown")
agent = log.get("agent", "unknown")
report["by_action"][action] = report["by_action"].get(action, 0) + 1
report["by_repository"][repo] = report["by_repository"].get(repo, 0) + 1
report["by_agent"][agent] = report["by_agent"].get(agent, 0) + 1
if log.get("success"):
success_count += 1
if action == "llm_call":
report["llm_usage"]["total_calls"] += 1
tokens = log.get("details", {}).get("tokens_used")
if tokens:
report["llm_usage"]["total_tokens"] += tokens
if logs:
report["success_rate"] = success_count / len(logs)
return report
# Global instance
_audit_logger: AuditLogger | None = None
def get_audit_logger() -> AuditLogger:
"""Get the global audit logger instance."""
global _audit_logger
if _audit_logger is None:
_audit_logger = AuditLogger()
return _audit_logger

View File

@@ -0,0 +1,371 @@
"""Metrics Collector
Observability metrics for AI agent performance monitoring.
Tracks request counts, latencies, errors, and LLM usage.
"""
import time
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from threading import Lock
@dataclass
class MetricPoint:
"""A single metric data point."""
timestamp: datetime
value: float
labels: dict = field(default_factory=dict)
class Counter:
"""Thread-safe counter metric."""
def __init__(self, name: str, description: str = ""):
self.name = name
self.description = description
self._value = 0.0
self._lock = Lock()
def inc(self, value: float = 1.0):
"""Increment the counter."""
with self._lock:
self._value += value
@property
def value(self) -> float:
"""Get current counter value."""
with self._lock:
return self._value
class Gauge:
"""Thread-safe gauge metric."""
def __init__(self, name: str, description: str = ""):
self.name = name
self.description = description
self._value = 0.0
self._lock = Lock()
def set(self, value: float):
"""Set the gauge value."""
with self._lock:
self._value = value
def inc(self, value: float = 1.0):
"""Increment the gauge."""
with self._lock:
self._value += value
def dec(self, value: float = 1.0):
"""Decrement the gauge."""
with self._lock:
self._value -= value
@property
def value(self) -> float:
"""Get current gauge value."""
with self._lock:
return self._value
class Histogram:
"""Simple histogram for tracking distributions."""
def __init__(
self,
name: str,
description: str = "",
buckets: list[float] | None = None,
):
self.name = name
self.description = description
self.buckets = buckets or [0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
self._values: list[float] = []
self._lock = Lock()
def observe(self, value: float):
"""Record an observation."""
with self._lock:
self._values.append(value)
# Keep only last 1000 observations
if len(self._values) > 1000:
self._values = self._values[-1000:]
def get_percentile(self, percentile: float) -> float:
"""Get a percentile value."""
with self._lock:
if not self._values:
return 0.0
sorted_values = sorted(self._values)
idx = int(len(sorted_values) * percentile / 100)
return sorted_values[min(idx, len(sorted_values) - 1)]
@property
def count(self) -> int:
"""Get observation count."""
with self._lock:
return len(self._values)
@property
def sum(self) -> float:
"""Get sum of observations."""
with self._lock:
return sum(self._values)
class MetricsCollector:
"""Central metrics collector for AI agents."""
def __init__(self, enabled: bool = True):
"""Initialize metrics collector.
Args:
enabled: Whether metrics collection is enabled.
"""
self.enabled = enabled
self._start_time = time.time()
# Counters
self.requests_total = Counter(
"ai_review_requests_total",
"Total number of review requests processed",
)
self.requests_success = Counter(
"ai_review_requests_success",
"Number of successful review requests",
)
self.requests_failed = Counter(
"ai_review_requests_failed",
"Number of failed review requests",
)
self.llm_calls_total = Counter(
"ai_review_llm_calls_total",
"Total number of LLM API calls",
)
self.llm_tokens_total = Counter(
"ai_review_llm_tokens_total",
"Total LLM tokens consumed",
)
self.comments_posted = Counter(
"ai_review_comments_posted_total",
"Total comments posted",
)
self.labels_applied = Counter(
"ai_review_labels_applied_total",
"Total labels applied",
)
self.security_findings = Counter(
"ai_review_security_findings_total",
"Total security findings detected",
)
# Gauges
self.active_requests = Gauge(
"ai_review_active_requests",
"Currently active review requests",
)
# Histograms
self.request_duration = Histogram(
"ai_review_request_duration_seconds",
"Request processing duration",
)
self.llm_duration = Histogram(
"ai_review_llm_duration_seconds",
"LLM API call duration",
)
# Per-agent metrics
self._agent_metrics: dict[str, dict] = {}
def record_request_start(self, agent: str):
"""Record the start of a request.
Args:
agent: Name of the agent handling the request.
"""
if not self.enabled:
return
self.requests_total.inc()
self.active_requests.inc()
if agent not in self._agent_metrics:
self._agent_metrics[agent] = {
"total": 0,
"success": 0,
"failed": 0,
}
self._agent_metrics[agent]["total"] += 1
def record_request_end(
self,
agent: str,
success: bool,
duration_seconds: float,
):
"""Record the end of a request.
Args:
agent: Name of the agent.
success: Whether the request succeeded.
duration_seconds: Request duration.
"""
if not self.enabled:
return
self.active_requests.dec()
self.request_duration.observe(duration_seconds)
if success:
self.requests_success.inc()
if agent in self._agent_metrics:
self._agent_metrics[agent]["success"] += 1
else:
self.requests_failed.inc()
if agent in self._agent_metrics:
self._agent_metrics[agent]["failed"] += 1
def record_llm_call(
self,
provider: str,
model: str,
tokens: int | None,
duration_seconds: float,
):
"""Record an LLM API call.
Args:
provider: LLM provider name.
model: Model used.
tokens: Tokens consumed.
duration_seconds: Call duration.
"""
if not self.enabled:
return
self.llm_calls_total.inc()
self.llm_duration.observe(duration_seconds)
if tokens:
self.llm_tokens_total.inc(tokens)
def record_comment_posted(self):
"""Record a comment being posted."""
if self.enabled:
self.comments_posted.inc()
def record_labels_applied(self, count: int = 1):
"""Record labels being applied."""
if self.enabled:
self.labels_applied.inc(count)
def record_security_finding(self, severity: str):
"""Record a security finding."""
if self.enabled:
self.security_findings.inc()
def get_summary(self) -> dict:
"""Get a summary of all metrics.
Returns:
Dictionary with metric summaries.
"""
uptime = time.time() - self._start_time
return {
"uptime_seconds": uptime,
"requests": {
"total": self.requests_total.value,
"success": self.requests_success.value,
"failed": self.requests_failed.value,
"active": self.active_requests.value,
"success_rate": (
self.requests_success.value / max(self.requests_total.value, 1)
),
},
"llm": {
"calls": self.llm_calls_total.value,
"tokens": self.llm_tokens_total.value,
"avg_duration_ms": (
(self.llm_duration.sum / max(self.llm_duration.count, 1)) * 1000
),
"p50_duration_ms": self.llm_duration.get_percentile(50) * 1000,
"p95_duration_ms": self.llm_duration.get_percentile(95) * 1000,
},
"actions": {
"comments_posted": self.comments_posted.value,
"labels_applied": self.labels_applied.value,
"security_findings": self.security_findings.value,
},
"latency": {
"avg_ms": (
(self.request_duration.sum / max(self.request_duration.count, 1))
* 1000
),
"p50_ms": self.request_duration.get_percentile(50) * 1000,
"p95_ms": self.request_duration.get_percentile(95) * 1000,
"p99_ms": self.request_duration.get_percentile(99) * 1000,
},
"by_agent": self._agent_metrics,
}
def export_prometheus(self) -> str:
"""Export metrics in Prometheus format.
Returns:
Prometheus-formatted metrics string.
"""
lines = []
def add_metric(name: str, value: float, help_text: str = ""):
if help_text:
lines.append(f"# HELP {name} {help_text}")
lines.append(f"{name} {value}")
add_metric(
"ai_review_requests_total",
self.requests_total.value,
"Total review requests",
)
add_metric(
"ai_review_requests_success_total",
self.requests_success.value,
"Successful requests",
)
add_metric(
"ai_review_requests_failed_total",
self.requests_failed.value,
"Failed requests",
)
add_metric(
"ai_review_llm_calls_total",
self.llm_calls_total.value,
"Total LLM calls",
)
add_metric(
"ai_review_llm_tokens_total",
self.llm_tokens_total.value,
"Total LLM tokens",
)
add_metric(
"ai_review_comments_posted_total",
self.comments_posted.value,
"Comments posted",
)
return "\n".join(lines)
# Global instance
_metrics: MetricsCollector | None = None
def get_metrics() -> MetricsCollector:
"""Get the global metrics collector instance."""
global _metrics
if _metrics is None:
_metrics = MetricsCollector()
return _metrics