Files
AegisGitea-MCP/src/aegis_gitea_mcp/stdio_app.py
T
Latte 5d4a98d06e feat(stdio): harden local MCP transport and add end-to-end tests
Reserve stdout for the JSON-RPC stream: _configure_stderr_logging() pins all
logging to stderr (and rewrites any stray stdout handler) so a log line can
never corrupt the stdio protocol. Extract a pure, testable build_server() from
_serve(). Add end-to-end tests over the mcp in-memory transport (initialize +
tools/list + tools/call), covering a successful round trip and a policy denial
surfaced as an MCP error.
2026-06-27 15:19:42 +02:00

268 lines
10 KiB
Python

"""Local stdio transport adapter (``aegis-gitea-mcp``).
This is the second transport for the shared core: a single-user, local MCP
server spoken over stdio using the official ``mcp`` SDK. It is meant to be run
like ``uvx aegis-gitea-mcp`` and wired into Claude Desktop / Claude Code, mirror-
ing the ergonomics of other local MCP servers.
Trust model
-----------
The local operator owns the Gitea Personal Access Token supplied via
``GITEA_TOKEN``; there is no per-user OAuth. At startup the adapter resolves the
PAT owner (``GET /user``) and pins the request context to that single login.
Because the caller *is* the token owner, the per-user repository-permission
probe used by the public HTTP server is intentionally skipped — but the policy
engine, ``WRITE_MODE`` gate, secret sanitization and the tamper-evident audit
log all run exactly as they do on the server. The same tools (including
``gitea_request``) are served from the shared :mod:`aegis_gitea_mcp.registry`.
"""
from __future__ import annotations
import asyncio
import logging
import os
import sys
from pathlib import Path
from typing import Any
from aegis_gitea_mcp.errors import ToolError
class StdioConfigError(RuntimeError):
"""Raised when the local environment is missing required configuration."""
def _default_audit_log_path() -> Path:
"""Return a writable per-user audit-log path for local runs.
The server's container default (``/var/log/aegis-mcp/audit.log``) is not
writable on a typical workstation, so fall back to an OS-appropriate user
state directory.
"""
if sys.platform == "win32":
base = os.environ.get("LOCALAPPDATA") or str(Path.home() / "AppData" / "Local")
return Path(base) / "aegis-gitea-mcp" / "audit.log"
xdg_state = os.environ.get("XDG_STATE_HOME")
base_dir = Path(xdg_state) if xdg_state else (Path.home() / ".local" / "state")
return base_dir / "aegis-gitea-mcp" / "audit.log"
def _bootstrap_env() -> None:
"""Apply local-mode defaults to the environment before settings load.
Local mode has no OAuth and no API-key gate (the operator is the trusted PAT
owner), and writes its audit log to a per-user path when one is not set. User
overrides via real env vars or ``.env`` always win for everything else.
"""
# python-dotenv: load a local .env so GITEA_URL/GITEA_TOKEN can live there.
try:
from dotenv import load_dotenv
load_dotenv()
except Exception: # pragma: no cover - dotenv is a core dep, defensive only
pass
# Local mode is single-user PAT auth: force OAuth off and disable the API-key
# requirement so the server's API-key/OAuth config validation does not apply.
os.environ["OAUTH_MODE"] = "false"
os.environ.setdefault("AUTH_ENABLED", "false")
os.environ.setdefault("STARTUP_VALIDATE_GITEA", "false")
if not os.environ.get("AUDIT_LOG_PATH", "").strip():
os.environ["AUDIT_LOG_PATH"] = str(_default_audit_log_path())
def _check_required_env() -> None:
"""Fail with an actionable message when required env vars are missing."""
missing = [
name for name in ("GITEA_URL", "GITEA_TOKEN") if not os.environ.get(name, "").strip()
]
if missing:
raise StdioConfigError(
"Missing required environment variable(s): "
+ ", ".join(missing)
+ ".\nSet them in your environment or a local .env file, e.g.:\n"
" GITEA_URL=https://gitea.example.com\n"
" GITEA_TOKEN=<a Gitea personal access token>\n"
)
# The PAT owner login, resolved once at startup and pinned onto every dispatch.
_owner_login: str | None = None
async def _resolve_owner_login() -> str:
"""Resolve and cache the Gitea login that owns the configured PAT."""
from aegis_gitea_mcp.config import get_settings
from aegis_gitea_mcp.gitea_client import GiteaClient
settings = get_settings()
async with GiteaClient(token=settings.gitea_token) as gitea:
user = await gitea.get_current_user()
login = str(user.get("login", "")).strip()
if not login:
raise StdioConfigError(
"Could not resolve the Gitea user for the supplied GITEA_TOKEN. "
"Verify the token is valid and has API access."
)
return login
async def _dispatch(tool_name: str, arguments: dict[str, Any]) -> dict[str, Any]:
"""Execute a tool with the same policy/audit/sanitize guarantees as the server.
The per-user repository-permission probe is intentionally omitted: the local
operator is the PAT owner. Everything else — policy engine, ``WRITE_MODE``,
the ``gitea_request`` per-method authorization, secret sanitization and audit
logging — runs identically to the HTTP adapter.
"""
from aegis_gitea_mcp.audit import get_audit_logger
from aegis_gitea_mcp.config import get_settings
from aegis_gitea_mcp.gitea_client import GiteaClient
from aegis_gitea_mcp.policy import get_policy_engine
from aegis_gitea_mcp.registry import get_tool_by_name, get_tool_handler
from aegis_gitea_mcp.request_context import set_gitea_user_login
from aegis_gitea_mcp.security import sanitize_data
from aegis_gitea_mcp.tools.arguments import extract_repository, extract_target_path
# Pin identity to the trusted PAT owner for every call (e.g. list_repositories
# scopes its results to this login in service-PAT mode).
if _owner_login:
set_gitea_user_login(_owner_login)
settings = get_settings()
audit = get_audit_logger()
tool_def = get_tool_by_name(tool_name)
if tool_def is None:
raise ToolError(f"Tool '{tool_name}' not found", status_code=404)
handler = get_tool_handler(tool_name)
if handler is None:
raise ToolError(f"Tool '{tool_name}' has no handler implementation", status_code=500)
repository = extract_repository(arguments)
target_path = extract_target_path(arguments)
decision = get_policy_engine().authorize(
tool_name=tool_name,
is_write=tool_def.write_operation,
repository=repository,
target_path=target_path,
)
if not decision.allowed:
audit.log_access_denied(tool_name=tool_name, repository=repository, reason=decision.reason)
raise ToolError(f"Policy denied request: {decision.reason}", status_code=403)
correlation_id = audit.log_tool_invocation(tool_name=tool_name, params=arguments)
async with GiteaClient(token=settings.gitea_token) as gitea:
result = await handler(gitea, arguments)
if settings.secret_detection_mode != "off":
result = sanitize_data(result, mode=settings.secret_detection_mode)
audit.log_tool_invocation(
tool_name=tool_name, correlation_id=correlation_id, result_status="success"
)
return result
def _configure_stderr_logging() -> None:
"""Pin all logging to stderr so the stdout JSON-RPC channel stays clean.
The stdio MCP transport speaks JSON-RPC over stdout; a single stray log line
on stdout corrupts the stream and breaks the client. ``configure_logging``
already targets stderr, but we additionally rewrite any handler that points
at stdout (e.g. a library that called ``basicConfig``) so nothing can leak.
"""
from aegis_gitea_mcp.config import get_settings
from aegis_gitea_mcp.logging_utils import configure_logging
configure_logging(get_settings().log_level)
root = logging.getLogger()
for handler in root.handlers:
if isinstance(handler, logging.StreamHandler) and handler.stream is sys.stdout:
handler.setStream(sys.stderr)
def build_server() -> Any:
"""Build (but do not run) the stdio MCP ``Server`` from the shared registry.
Kept separate from :func:`_serve` so it can be driven in-process by tests
over an in-memory transport without opening real stdio streams.
"""
import mcp.types as mcp_types
from mcp.server import Server
from aegis_gitea_mcp.registry import list_tool_definitions
server: Any = Server("aegis-gitea-mcp")
@server.list_tools()
async def list_tools() -> list[mcp_types.Tool]:
return [
mcp_types.Tool(
name=tool.name,
description=tool.description,
inputSchema=tool.input_schema,
)
for tool in list_tool_definitions()
]
@server.call_tool()
async def call_tool(name: str, arguments: dict[str, Any]) -> dict[str, Any]:
# Returning a dict yields structured content plus a JSON text block.
return await _dispatch(name, arguments)
return server
async def _serve() -> None:
"""Resolve identity and serve the stdio MCP server over real stdin/stdout."""
from mcp.server.stdio import stdio_server
from aegis_gitea_mcp.config import get_settings
from aegis_gitea_mcp.policy import get_policy_engine
# Fail fast on bad settings/policy before opening the transport.
get_settings()
get_policy_engine()
global _owner_login
_owner_login = await _resolve_owner_login()
server = build_server()
async with stdio_server() as (read_stream, write_stream):
await server.run(read_stream, write_stream, server.create_initialization_options())
def main() -> None:
"""Console-script entry point for the local stdio MCP server."""
_bootstrap_env()
try:
_check_required_env()
except StdioConfigError as exc:
print(f"aegis-gitea-mcp: {exc}", file=sys.stderr)
raise SystemExit(2) from exc
try:
from aegis_gitea_mcp.config import get_settings
get_settings()
except Exception as exc: # pydantic ValidationError or PolicyError
print(f"aegis-gitea-mcp: invalid configuration: {exc}", file=sys.stderr)
raise SystemExit(2) from exc
# Keep stdout reserved for the JSON-RPC stream; all logs go to stderr.
_configure_stderr_logging()
try:
asyncio.run(_serve())
except StdioConfigError as exc:
print(f"aegis-gitea-mcp: {exc}", file=sys.stderr)
raise SystemExit(2) from exc
except KeyboardInterrupt: # pragma: no cover - interactive shutdown
pass
__all__ = ["main", "StdioConfigError"]