"""Security helpers for secret detection and untrusted content handling.""" from __future__ import annotations import re from dataclasses import dataclass from typing import Any @dataclass(frozen=True) class SecretMatch: """Represents a detected secret-like token.""" secret_type: str value: str _SECRET_PATTERNS: tuple[tuple[str, re.Pattern[str]], ...] = ( ( "openai_key", re.compile(r"\bsk-[A-Za-z0-9_-]{20,}\b"), ), ( "aws_access_key", re.compile(r"\bAKIA[0-9A-Z]{16}\b"), ), ( "github_token", re.compile(r"\bgh[pousr]_[A-Za-z0-9]{20,}\b"), ), ( "jwt", re.compile(r"\beyJ[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{4,}\.[A-Za-z0-9_-]{4,}\b"), ), ( "private_key", re.compile(r"-----BEGIN (?:RSA |EC |OPENSSH |)PRIVATE KEY-----"), ), ( "generic_api_key", re.compile(r"\b(?:api[_-]?key|token)[\"'=: ]+[A-Za-z0-9_-]{16,}\b", re.IGNORECASE), ), ) def detect_secrets(text: str) -> list[SecretMatch]: """Detect common secret patterns in text. Args: text: Untrusted text to scan. Returns: List of detected secret-like values. """ matches: list[SecretMatch] = [] for secret_type, pattern in _SECRET_PATTERNS: for found in pattern.findall(text): if isinstance(found, tuple): candidate = "".join(found) else: candidate = found matches.append(SecretMatch(secret_type=secret_type, value=candidate)) return matches def mask_secret(value: str) -> str: """Mask a secret value while preserving minimal context. Args: value: Raw secret text. Returns: Masked string that does not reveal the secret. """ if len(value) <= 8: return "[REDACTED]" return f"{value[:4]}...{value[-4:]}" def sanitize_data(value: Any, mode: str = "mask") -> Any: """Recursively sanitize secret-like material from arbitrary data. Args: value: Arbitrary response payload. mode: `mask` to keep redacted content, `block` to fully replace fields. Returns: Sanitized payload value. """ if isinstance(value, dict): return {str(key): sanitize_data(item, mode=mode) for key, item in value.items()} if isinstance(value, list): return [sanitize_data(item, mode=mode) for item in value] if isinstance(value, tuple): return tuple(sanitize_data(item, mode=mode) for item in value) if isinstance(value, str): findings = detect_secrets(value) if not findings: return value if mode == "block": return "[REDACTED_SECRET]" masked = value for finding in findings: masked = masked.replace(finding.value, mask_secret(finding.value)) return masked return value def sanitize_untrusted_text(text: str, max_chars: int) -> str: """Normalize untrusted repository content for display-only usage. Security note: Repository content is always treated as data and never interpreted as executable instructions. This helper enforces a strict length limit to prevent prompt-stuffing through oversized payloads. Args: text: Repository text content. max_chars: Maximum allowed characters in returned text. Returns: Truncated text safe for downstream display. """ if max_chars <= 0: return "" if len(text) <= max_chars: return text return text[:max_chars]