AegisGitea-MCP/src/aegis_gitea_mcp/stdio_app.py

"""Local stdio transport adapter (``aegis-gitea-mcp``).

This is the second transport for the shared core: a single-user, local MCP
server spoken over stdio using the official ``mcp`` SDK. It is meant to be run
like ``uvx aegis-gitea-mcp`` and wired into Claude Desktop / Claude Code, mirror-
ing the ergonomics of other local MCP servers.

Trust model
-----------
The local operator owns the Gitea Personal Access Token supplied via
``GITEA_TOKEN``; there is no per-user OAuth. At startup the adapter resolves the
PAT owner (``GET /user``) and pins the request context to that single login.
Because the caller *is* the token owner, the per-user repository-permission
probe used by the public HTTP server is intentionally skipped — but the policy
engine, ``WRITE_MODE`` gate, secret sanitization and the tamper-evident audit
log all run exactly as they do on the server. The same tools (including
``gitea_request``) are served from the shared :mod:`aegis_gitea_mcp.registry`.
"""

from __future__ import annotations

import asyncio
import logging
import os
import sys
from pathlib import Path
from typing import Any

from aegis_gitea_mcp.errors import ToolError


class StdioConfigError(RuntimeError):
    """Raised when the local environment is missing required configuration."""


def _default_audit_log_path() -> Path:
    """Return a writable per-user audit-log path for local runs.

    The server's container default (``/var/log/aegis-mcp/audit.log``) is not
    writable on a typical workstation, so fall back to an OS-appropriate user
    state directory.
    """
    if sys.platform == "win32":
        base = os.environ.get("LOCALAPPDATA") or str(Path.home() / "AppData" / "Local")
        return Path(base) / "aegis-gitea-mcp" / "audit.log"
    xdg_state = os.environ.get("XDG_STATE_HOME")
    base_dir = Path(xdg_state) if xdg_state else (Path.home() / ".local" / "state")
    return base_dir / "aegis-gitea-mcp" / "audit.log"


def _bootstrap_env() -> None:
    """Apply local-mode defaults to the environment before settings load.

    Local mode has no OAuth and no API-key gate (the operator is the trusted PAT
    owner), and writes its audit log to a per-user path when one is not set. User
    overrides via real env vars or ``.env`` always win for everything else.
    """
    # python-dotenv: load a local .env so GITEA_URL/GITEA_TOKEN can live there.
    try:
        from dotenv import load_dotenv

        load_dotenv()
    except Exception:  # pragma: no cover - dotenv is a core dep, defensive only
        pass

    # Local mode is single-user PAT auth: force OAuth off and disable the API-key
    # requirement so the server's API-key/OAuth config validation does not apply.
    os.environ["OAUTH_MODE"] = "false"
    os.environ.setdefault("AUTH_ENABLED", "false")
    os.environ.setdefault("STARTUP_VALIDATE_GITEA", "false")

    if not os.environ.get("AUDIT_LOG_PATH", "").strip():
        os.environ["AUDIT_LOG_PATH"] = str(_default_audit_log_path())


def _check_required_env() -> None:
    """Fail with an actionable message when required env vars are missing."""
    missing = [
        name for name in ("GITEA_URL", "GITEA_TOKEN") if not os.environ.get(name, "").strip()
    ]
    if missing:
        raise StdioConfigError(
            "Missing required environment variable(s): "
            + ", ".join(missing)
            + ".\nSet them in your environment or a local .env file, e.g.:\n"
            "  GITEA_URL=https://gitea.example.com\n"
            "  GITEA_TOKEN=<a Gitea personal access token>\n"
        )


# The PAT owner login, resolved once at startup and pinned onto every dispatch.
_owner_login: str | None = None


async def _resolve_owner_login() -> str:
    """Resolve and cache the Gitea login that owns the configured PAT."""
    from aegis_gitea_mcp.config import get_settings
    from aegis_gitea_mcp.gitea_client import GiteaClient

    settings = get_settings()
    async with GiteaClient(token=settings.gitea_token) as gitea:
        user = await gitea.get_current_user()
    login = str(user.get("login", "")).strip()
    if not login:
        raise StdioConfigError(
            "Could not resolve the Gitea user for the supplied GITEA_TOKEN. "
            "Verify the token is valid and has API access."
        )
    return login


async def _dispatch(tool_name: str, arguments: dict[str, Any]) -> dict[str, Any]:
    """Execute a tool with the same policy/audit/sanitize guarantees as the server.

    The per-user repository-permission probe is intentionally omitted: the local
    operator is the PAT owner. Everything else — policy engine, ``WRITE_MODE``,
    the ``gitea_request`` per-method authorization, secret sanitization and audit
    logging — runs identically to the HTTP adapter.
    """
    from aegis_gitea_mcp.audit import get_audit_logger
    from aegis_gitea_mcp.config import get_settings
    from aegis_gitea_mcp.gitea_client import GiteaClient
    from aegis_gitea_mcp.policy import get_policy_engine
    from aegis_gitea_mcp.registry import get_tool_by_name, get_tool_handler
    from aegis_gitea_mcp.request_context import set_gitea_user_login
    from aegis_gitea_mcp.security import sanitize_data
    from aegis_gitea_mcp.tools.arguments import extract_repository, extract_target_path

    # Pin identity to the trusted PAT owner for every call (e.g. list_repositories
    # scopes its results to this login in service-PAT mode).
    if _owner_login:
        set_gitea_user_login(_owner_login)

    settings = get_settings()
    audit = get_audit_logger()

    tool_def = get_tool_by_name(tool_name)
    if tool_def is None:
        raise ToolError(f"Tool '{tool_name}' not found", status_code=404)
    handler = get_tool_handler(tool_name)
    if handler is None:
        raise ToolError(f"Tool '{tool_name}' has no handler implementation", status_code=500)

    repository = extract_repository(arguments)
    target_path = extract_target_path(arguments)
    decision = get_policy_engine().authorize(
        tool_name=tool_name,
        is_write=tool_def.write_operation,
        repository=repository,
        target_path=target_path,
    )
    if not decision.allowed:
        audit.log_access_denied(tool_name=tool_name, repository=repository, reason=decision.reason)
        raise ToolError(f"Policy denied request: {decision.reason}", status_code=403)

    correlation_id = audit.log_tool_invocation(tool_name=tool_name, params=arguments)
    async with GiteaClient(token=settings.gitea_token) as gitea:
        result = await handler(gitea, arguments)

    if settings.secret_detection_mode != "off":
        result = sanitize_data(result, mode=settings.secret_detection_mode)

    audit.log_tool_invocation(
        tool_name=tool_name, correlation_id=correlation_id, result_status="success"
    )
    return result


def _configure_stderr_logging() -> None:
    """Pin all logging to stderr so the stdout JSON-RPC channel stays clean.

    The stdio MCP transport speaks JSON-RPC over stdout; a single stray log line
    on stdout corrupts the stream and breaks the client. ``configure_logging``
    already targets stderr, but we additionally rewrite any handler that points
    at stdout (e.g. a library that called ``basicConfig``) so nothing can leak.
    """
    from aegis_gitea_mcp.config import get_settings
    from aegis_gitea_mcp.logging_utils import configure_logging

    configure_logging(get_settings().log_level)
    root = logging.getLogger()
    for handler in root.handlers:
        if isinstance(handler, logging.StreamHandler) and handler.stream is sys.stdout:
            handler.setStream(sys.stderr)


def build_server() -> Any:
    """Build (but do not run) the stdio MCP ``Server`` from the shared registry.

    Kept separate from :func:`_serve` so it can be driven in-process by tests
    over an in-memory transport without opening real stdio streams.
    """
    import mcp.types as mcp_types
    from mcp.server import Server

    from aegis_gitea_mcp.registry import list_tool_definitions

    server: Any = Server("aegis-gitea-mcp")

    @server.list_tools()
    async def list_tools() -> list[mcp_types.Tool]:
        return [
            mcp_types.Tool(
                name=tool.name,
                description=tool.description,
                inputSchema=tool.input_schema,
            )
            for tool in list_tool_definitions()
        ]

    @server.call_tool()
    async def call_tool(name: str, arguments: dict[str, Any]) -> dict[str, Any]:
        # Returning a dict yields structured content plus a JSON text block.
        return await _dispatch(name, arguments)

    return server


async def _serve() -> None:
    """Resolve identity and serve the stdio MCP server over real stdin/stdout."""
    from mcp.server.stdio import stdio_server

    from aegis_gitea_mcp.config import get_settings
    from aegis_gitea_mcp.policy import get_policy_engine

    # Fail fast on bad settings/policy before opening the transport.
    get_settings()
    get_policy_engine()

    global _owner_login
    _owner_login = await _resolve_owner_login()

    server = build_server()
    async with stdio_server() as (read_stream, write_stream):
        await server.run(read_stream, write_stream, server.create_initialization_options())


def main() -> None:
    """Console-script entry point for the local stdio MCP server."""
    _bootstrap_env()
    try:
        _check_required_env()
    except StdioConfigError as exc:
        print(f"aegis-gitea-mcp: {exc}", file=sys.stderr)
        raise SystemExit(2) from exc

    try:
        from aegis_gitea_mcp.config import get_settings

        get_settings()
    except Exception as exc:  # pydantic ValidationError or PolicyError
        print(f"aegis-gitea-mcp: invalid configuration: {exc}", file=sys.stderr)
        raise SystemExit(2) from exc

    # Keep stdout reserved for the JSON-RPC stream; all logs go to stderr.
    _configure_stderr_logging()

    try:
        asyncio.run(_serve())
    except StdioConfigError as exc:
        print(f"aegis-gitea-mcp: {exc}", file=sys.stderr)
        raise SystemExit(2) from exc
    except KeyboardInterrupt:  # pragma: no cover - interactive shutdown
        pass


__all__ = ["main", "StdioConfigError"]