diff --git a/docs/observability.md b/docs/observability.md index 8203702..392134d 100644 --- a/docs/observability.md +++ b/docs/observability.md @@ -6,6 +6,26 @@ - Request correlation via `X-Request-ID`. - Security events and policy denials are audit logged. +### Structured event helpers + +`logging_utils` exposes reusable helpers so endpoints emit consistent, +secret-safe structured events instead of ad-hoc inline logging: + +- `log_event(logger, level, event, **context)` — emit a named event with a + `context` mapping; keys in `SENSITIVE_CONTEXT_KEYS` (e.g. `token`, + `authorization`, `password`) are masked as `***`. +- `log_nullable_field(logger, event, field, value)` — record whether a parsed + response field is `None` and its runtime type, without dumping its contents. +- `sanitize_context(context)` — the masking primitive used by the above. + +The `context` mapping is serialized into the JSON log payload under a `context` +key. These run at `DEBUG`, so they are silent unless `LOG_LEVEL=DEBUG`. + +`get_issue` is instrumented with these helpers (`get_issue.start`, +`get_issue.payload_shape`, `get_issue.field_check`) to make nullable-field +parsing failures diagnosable. The same pattern can be reused for other +parsing-heavy endpoints (`get_pull_request`, `list_issues`, `get_commit_diff`). + ## Metrics Prometheus-compatible endpoint: `GET /metrics`. diff --git a/src/aegis_gitea_mcp/logging_utils.py b/src/aegis_gitea_mcp/logging_utils.py index 89d7cf3..309f187 100644 --- a/src/aegis_gitea_mcp/logging_utils.py +++ b/src/aegis_gitea_mcp/logging_utils.py @@ -5,16 +5,22 @@ from __future__ import annotations import json import logging from datetime import datetime, timezone +from typing import Any from aegis_gitea_mcp.request_context import get_request_id +# Context keys whose values must never be written to logs verbatim. +SENSITIVE_CONTEXT_KEYS = frozenset( + {"token", "authorization", "cookie", "password", "secret", "api_key"} +) + class JsonLogFormatter(logging.Formatter): """Format log records as JSON documents.""" def format(self, record: logging.LogRecord) -> str: """Serialize a log record to JSON.""" - payload = { + payload: dict[str, Any] = { "timestamp": datetime.now(timezone.utc).isoformat(), "level": record.levelname, "logger": record.name, @@ -22,6 +28,10 @@ class JsonLogFormatter(logging.Formatter): "request_id": get_request_id(), } + context = getattr(record, "context", None) + if isinstance(context, dict) and context: + payload["context"] = context + if record.exc_info: # Security decision: include only exception type to avoid stack leakage. exception_type = record.exc_info[0] @@ -46,3 +56,55 @@ def configure_logging(level: str) -> None: stream_handler = logging.StreamHandler() stream_handler.setFormatter(JsonLogFormatter()) logger.addHandler(stream_handler) + + +def sanitize_context(context: dict[str, Any]) -> dict[str, Any]: + """Redact sensitive values from a logging context mapping. + + Keys whose lower-cased name is in ``SENSITIVE_CONTEXT_KEYS`` have their + value replaced with ``"***"`` so credentials never reach the log sink. + + Args: + context: Arbitrary key/value pairs intended for structured logging. + + Returns: + A new mapping with sensitive values masked. + """ + return { + key: ("***" if key.lower() in SENSITIVE_CONTEXT_KEYS else value) + for key, value in context.items() + } + + +def log_event(logger: logging.Logger, level: int, event: str, **context: Any) -> None: + """Emit a structured log event with a sanitized context payload. + + Args: + logger: Logger to emit on. + level: Standard ``logging`` level (e.g. ``logging.DEBUG``). + event: Stable, machine-friendly event name (e.g. ``get_issue.start``). + **context: Extra structured fields; sensitive keys are masked. + """ + logger.log(level, event, extra={"context": sanitize_context(context)}) + + +def log_nullable_field(logger: logging.Logger, event: str, field_name: str, value: Any) -> None: + """Log whether a parsed response field is ``None`` and its runtime type. + + Makes null/nullable field failures (such as a ``null`` ``labels`` array) + obvious in logs without dumping the field's full contents. + + Args: + logger: Logger to emit on. + event: Stable event name for the field check. + field_name: Name of the field being inspected. + value: The parsed value to characterize. + """ + log_event( + logger, + logging.DEBUG, + event, + field=field_name, + is_none=value is None, + value_type=(type(value).__name__ if value is not None else None), + ) diff --git a/src/aegis_gitea_mcp/tools/read_tools.py b/src/aegis_gitea_mcp/tools/read_tools.py index ca439a2..34d44b2 100644 --- a/src/aegis_gitea_mcp/tools/read_tools.py +++ b/src/aegis_gitea_mcp/tools/read_tools.py @@ -2,6 +2,7 @@ from __future__ import annotations +import logging from typing import Any from aegis_gitea_mcp.gitea_client import ( @@ -10,6 +11,7 @@ from aegis_gitea_mcp.gitea_client import ( GiteaClient, GiteaError, ) +from aegis_gitea_mcp.logging_utils import log_event, log_nullable_field from aegis_gitea_mcp.response_limits import limit_items, limit_text from aegis_gitea_mcp.tools.arguments import ( CommitDiffArgs, @@ -38,6 +40,8 @@ from aegis_gitea_mcp.tools.arguments import ( SearchCodeArgs, ) +logger = logging.getLogger(__name__) + async def search_code_tool(gitea: GiteaClient, arguments: dict[str, Any]) -> dict[str, Any]: """Search repository code and return bounded result snippets.""" @@ -265,8 +269,26 @@ async def list_issues_tool(gitea: GiteaClient, arguments: dict[str, Any]) -> dic async def get_issue_tool(gitea: GiteaClient, arguments: dict[str, Any]) -> dict[str, Any]: """Get issue details.""" parsed = IssueArgs.model_validate(arguments) + log_event( + logger, + logging.DEBUG, + "get_issue.start", + owner=parsed.owner, + repo=parsed.repo, + issue_number=parsed.issue_number, + ) try: issue = await gitea.get_issue(parsed.owner, parsed.repo, parsed.issue_number) + log_event( + logger, + logging.DEBUG, + "get_issue.payload_shape", + top_level_keys=sorted(issue.keys()) if issue else None, + ) + # Surface nullable collections that previously broke parsing (see #13). + log_nullable_field(logger, "get_issue.field_check", "labels", issue.get("labels")) + log_nullable_field(logger, "get_issue.field_check", "assignees", issue.get("assignees")) + log_nullable_field(logger, "get_issue.field_check", "user", issue.get("user")) return { "number": issue.get("number", 0), "title": limit_text(str(issue.get("title", ""))), diff --git a/tests/test_logging_utils.py b/tests/test_logging_utils.py new file mode 100644 index 0000000..f5a62fa --- /dev/null +++ b/tests/test_logging_utils.py @@ -0,0 +1,120 @@ +"""Tests for structured logging helpers and get_issue instrumentation (#14).""" + +from __future__ import annotations + +import json +import logging + +import pytest + +from aegis_gitea_mcp.config import reset_settings +from aegis_gitea_mcp.logging_utils import ( + JsonLogFormatter, + log_event, + log_nullable_field, + sanitize_context, +) +from aegis_gitea_mcp.tools.read_tools import get_issue_tool + +READ_TOOLS_LOGGER = "aegis_gitea_mcp.tools.read_tools" + + +@pytest.fixture(autouse=True) +def tool_env(monkeypatch: pytest.MonkeyPatch) -> None: + """Provide minimal settings environment for response limit helpers.""" + reset_settings() + monkeypatch.setenv("GITEA_URL", "https://gitea.example.com") + monkeypatch.setenv("GITEA_TOKEN", "test-token") + monkeypatch.setenv("MCP_API_KEYS", "a" * 64) + monkeypatch.setenv("ENVIRONMENT", "test") + + +def test_sanitize_context_masks_sensitive_keys() -> None: + """Sensitive keys are masked case-insensitively; others pass through.""" + cleaned = sanitize_context( + {"owner": "acme", "Token": "abc", "password": "x", "issue_number": 7} + ) + + assert cleaned["owner"] == "acme" + assert cleaned["issue_number"] == 7 + assert cleaned["Token"] == "***" + assert cleaned["password"] == "***" + + +def test_json_formatter_includes_context() -> None: + """The formatter serializes a record's context mapping into the payload.""" + record = logging.LogRecord( + "test", logging.DEBUG, __file__, 1, "get_issue.field_check", None, None + ) + record.context = {"field": "labels", "is_none": True} + + payload = json.loads(JsonLogFormatter().format(record)) + + assert payload["message"] == "get_issue.field_check" + assert payload["context"] == {"field": "labels", "is_none": True} + + +def test_log_event_emits_sanitized_context(caplog: pytest.LogCaptureFixture) -> None: + """log_event records the event name and masks sensitive context values.""" + logger = logging.getLogger("test.log_event") + with caplog.at_level(logging.DEBUG, logger="test.log_event"): + log_event(logger, logging.DEBUG, "evt", owner="acme", token="secret") + + record = caplog.records[-1] + assert record.getMessage() == "evt" + assert record.context == {"owner": "acme", "token": "***"} + + +def test_log_nullable_field_characterizes_value(caplog: pytest.LogCaptureFixture) -> None: + """log_nullable_field reports None-ness and runtime type without dumping data.""" + logger = logging.getLogger("test.nullable") + with caplog.at_level(logging.DEBUG, logger="test.nullable"): + log_nullable_field(logger, "evt", "labels", None) + log_nullable_field(logger, "evt", "labels", [1, 2]) + + assert caplog.records[0].context == { + "field": "labels", + "is_none": True, + "value_type": None, + } + assert caplog.records[1].context == { + "field": "labels", + "is_none": False, + "value_type": "list", + } + + +class _NullIssueGitea: + """Minimal stub returning an issue with null collection fields.""" + + async def get_issue(self, owner: str, repo: str, index: int) -> dict[str, object]: + return { + "number": index, + "title": "Issue", + "body": "Body", + "state": "open", + "labels": None, + "assignees": None, + "user": None, + } + + +@pytest.mark.asyncio +async def test_get_issue_tool_emits_debug_events(caplog: pytest.LogCaptureFixture) -> None: + """get_issue_tool emits start/shape/field-check debug events and still parses.""" + with caplog.at_level(logging.DEBUG, logger=READ_TOOLS_LOGGER): + result = await get_issue_tool( + _NullIssueGitea(), {"owner": "acme", "repo": "app", "issue_number": 7} + ) + + events = [r.getMessage() for r in caplog.records] + assert "get_issue.start" in events + assert "get_issue.payload_shape" in events + + field_checks = [r.context for r in caplog.records if r.getMessage() == "get_issue.field_check"] + assert {"field": "labels", "is_none": True, "value_type": None} in field_checks + assert {"field": "user", "is_none": True, "value_type": None} in field_checks + + # Logging must not change behaviour: null collections still parse to empties. + assert result["labels"] == [] + assert result["assignees"] == [] diff --git a/tests/test_tools_extended.py b/tests/test_tools_extended.py index 5749c75..0a97d70 100644 --- a/tests/test_tools_extended.py +++ b/tests/test_tools_extended.py @@ -3,7 +3,7 @@ import pytest from aegis_gitea_mcp.config import reset_settings -from aegis_gitea_mcp.gitea_client import GiteaError +from aegis_gitea_mcp.gitea_client import GiteaAuthenticationError, GiteaError from aegis_gitea_mcp.tools.read_tools import ( compare_refs_tool, get_branch_tool, @@ -402,3 +402,62 @@ def test_create_label_args_reject_invalid_color() -> None: with pytest.raises(pydantic.ValidationError): CreateLabelArgs(owner="o", repo="r", name="bug", color="red") + + +# (tool, valid_args) for every write tool, used to exercise error branches. +WRITE_TOOL_ERROR_CASES = [ + (create_issue_tool, {"owner": "acme", "repo": "app", "title": "Issue"}), + (update_issue_tool, {"owner": "acme", "repo": "app", "issue_number": 1, "title": "x"}), + (create_issue_comment_tool, {"owner": "acme", "repo": "app", "issue_number": 1, "body": "c"}), + (create_pr_comment_tool, {"owner": "acme", "repo": "app", "pull_number": 1, "body": "c"}), + (add_labels_tool, {"owner": "acme", "repo": "app", "issue_number": 1, "labels": ["bug"]}), + (assign_issue_tool, {"owner": "acme", "repo": "app", "issue_number": 1, "assignees": ["al"]}), + (create_label_tool, {"owner": "acme", "repo": "app", "name": "bug", "color": "#ff0000"}), + (update_label_tool, {"owner": "acme", "repo": "app", "name": "bug", "new_name": "defect"}), + (remove_labels_tool, {"owner": "acme", "repo": "app", "issue_number": 1, "labels": ["bug"]}), + ( + create_pull_request_tool, + {"owner": "acme", "repo": "app", "title": "PR", "head": "feature", "base": "main"}, + ), + (create_release_tool, {"owner": "acme", "repo": "app", "tag_name": "v1.0.0"}), + (edit_release_tool, {"owner": "acme", "repo": "app", "release_id": 3, "name": "x"}), + (create_branch_tool, {"owner": "acme", "repo": "app", "new_branch_name": "feature/x"}), + (create_milestone_tool, {"owner": "acme", "repo": "app", "title": "M1"}), + (edit_issue_comment_tool, {"owner": "acme", "repo": "app", "comment_id": 5, "body": "e"}), +] + + +class _WriteBackendErrorGitea: + """Stub whose every method raises a generic Gitea backend error.""" + + def __getattr__(self, name: str): + async def _raise(*args: object, **kwargs: object) -> object: + raise GiteaError("backend failure") + + return _raise + + +class _WriteAuthErrorGitea: + """Stub whose every method raises an authentication error.""" + + def __getattr__(self, name: str): + async def _raise(*args: object, **kwargs: object) -> object: + raise GiteaAuthenticationError("token expired") + + return _raise + + +@pytest.mark.asyncio +@pytest.mark.parametrize("tool,args", WRITE_TOOL_ERROR_CASES) +async def test_write_tools_wrap_backend_errors(tool, args) -> None: + """Every write tool wraps a backend GiteaError as a RuntimeError.""" + with pytest.raises(RuntimeError): + await tool(_WriteBackendErrorGitea(), args) + + +@pytest.mark.asyncio +@pytest.mark.parametrize("tool,args", WRITE_TOOL_ERROR_CASES) +async def test_write_tools_propagate_auth_errors(tool, args) -> None: + """Every write tool lets auth failures surface for re-authorization.""" + with pytest.raises(GiteaAuthenticationError): + await tool(_WriteAuthErrorGitea(), args)