Files
AegisGitea-MCP/src/aegis_gitea_mcp/security.py
T

135 lines
3.5 KiB
Python

"""Security helpers for secret detection and untrusted content handling."""
from __future__ import annotations
import re
from dataclasses import dataclass
from typing import Any
@dataclass(frozen=True)
class SecretMatch:
"""Represents a detected secret-like token."""
secret_type: str
value: str
_SECRET_PATTERNS: tuple[tuple[str, re.Pattern[str]], ...] = (
(
"openai_key",
re.compile(r"\bsk-[A-Za-z0-9_-]{20,}\b"),
),
(
"aws_access_key",
re.compile(r"\bAKIA[0-9A-Z]{16}\b"),
),
(
"github_token",
re.compile(r"\bgh[pousr]_[A-Za-z0-9]{20,}\b"),
),
(
"jwt",
re.compile(r"\beyJ[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{4,}\.[A-Za-z0-9_-]{4,}\b"),
),
(
"private_key",
re.compile(r"-----BEGIN (?:RSA |EC |OPENSSH |)PRIVATE KEY-----"),
),
(
"generic_api_key",
re.compile(r"\b(?:api[_-]?key|token)[\"'=: ]+[A-Za-z0-9_-]{16,}\b", re.IGNORECASE),
),
)
def detect_secrets(text: str) -> list[SecretMatch]:
"""Detect common secret patterns in text.
Args:
text: Untrusted text to scan.
Returns:
List of detected secret-like values.
"""
matches: list[SecretMatch] = []
for secret_type, pattern in _SECRET_PATTERNS:
for found in pattern.findall(text):
if isinstance(found, tuple):
candidate = "".join(found)
else:
candidate = found
matches.append(SecretMatch(secret_type=secret_type, value=candidate))
return matches
def mask_secret(value: str) -> str:
"""Mask a secret value while preserving minimal context.
Args:
value: Raw secret text.
Returns:
Masked string that does not reveal the secret.
"""
if len(value) <= 8:
return "[REDACTED]"
return f"{value[:4]}...{value[-4:]}"
def sanitize_data(value: Any, mode: str = "mask") -> Any:
"""Recursively sanitize secret-like material from arbitrary data.
Args:
value: Arbitrary response payload.
mode: `mask` to keep redacted content, `block` to fully replace fields.
Returns:
Sanitized payload value.
"""
if isinstance(value, dict):
return {str(key): sanitize_data(item, mode=mode) for key, item in value.items()}
if isinstance(value, list):
return [sanitize_data(item, mode=mode) for item in value]
if isinstance(value, tuple):
return tuple(sanitize_data(item, mode=mode) for item in value)
if isinstance(value, str):
findings = detect_secrets(value)
if not findings:
return value
if mode == "block":
return "[REDACTED_SECRET]"
masked = value
for finding in findings:
masked = masked.replace(finding.value, mask_secret(finding.value))
return masked
return value
def sanitize_untrusted_text(text: str, max_chars: int) -> str:
"""Normalize untrusted repository content for display-only usage.
Security note:
Repository content is always treated as data and never interpreted as
executable instructions. This helper enforces a strict length limit to
prevent prompt-stuffing through oversized payloads.
Args:
text: Repository text content.
max_chars: Maximum allowed characters in returned text.
Returns:
Truncated text safe for downstream display.
"""
if max_chars <= 0:
return ""
if len(text) <= max_chars:
return text
return text[:max_chars]