From f53e1a3a5ac18d63fb430307e4f41f45f4648e07 Mon Sep 17 00:00:00 2001
From: latte <latte@hiddenden.cafe>
Date: Mon, 22 Jun 2026 15:09:59 +0200
Subject: [PATCH] feat: add structured logging helpers and instrument get_issue
 (#14)

Adds reusable, secret-safe logging helpers to `logging_utils`:
- `log_event(logger, level, event, **context)` emits a named event with a
  sanitized `context` mapping (sensitive keys masked as `***`).
- `log_nullable_field(...)` records whether a parsed field is None plus its
  runtime type, without dumping its contents.
- `sanitize_context(...)` is the shared masking primitive.

The JSON formatter now serializes a record's `context` into the payload.

`get_issue_tool` is instrumented at DEBUG (`get_issue.start`,
`get_issue.payload_shape`, `get_issue.field_check` for labels/assignees/user)
so the nullable-field parsing that caused #13 is diagnosable going forward.

Adds tests for the helpers, the formatter, and the get_issue instrumentation,
and documents the pattern in docs/observability.md.
---
 docs/observability.md                   |  20 ++++
 src/aegis_gitea_mcp/logging_utils.py    |  64 ++++++++++++-
 src/aegis_gitea_mcp/tools/read_tools.py |  22 +++++
 tests/test_logging_utils.py             | 120 ++++++++++++++++++++++++
 4 files changed, 225 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_logging_utils.py

diff --git a/docs/observability.md b/docs/observability.md
index 8203702..392134d 100644
--- a/docs/observability.md
+++ b/docs/observability.md
@@ -6,6 +6,26 @@
 - Request correlation via `X-Request-ID`.
 - Security events and policy denials are audit logged.
 
+### Structured event helpers
+
+`logging_utils` exposes reusable helpers so endpoints emit consistent,
+secret-safe structured events instead of ad-hoc inline logging:
+
+- `log_event(logger, level, event, **context)` — emit a named event with a
+  `context` mapping; keys in `SENSITIVE_CONTEXT_KEYS` (e.g. `token`,
+  `authorization`, `password`) are masked as `***`.
+- `log_nullable_field(logger, event, field, value)` — record whether a parsed
+  response field is `None` and its runtime type, without dumping its contents.
+- `sanitize_context(context)` — the masking primitive used by the above.
+
+The `context` mapping is serialized into the JSON log payload under a `context`
+key. These run at `DEBUG`, so they are silent unless `LOG_LEVEL=DEBUG`.
+
+`get_issue` is instrumented with these helpers (`get_issue.start`,
+`get_issue.payload_shape`, `get_issue.field_check`) to make nullable-field
+parsing failures diagnosable. The same pattern can be reused for other
+parsing-heavy endpoints (`get_pull_request`, `list_issues`, `get_commit_diff`).
+
 ## Metrics
 
 Prometheus-compatible endpoint: `GET /metrics`.
diff --git a/src/aegis_gitea_mcp/logging_utils.py b/src/aegis_gitea_mcp/logging_utils.py
index 89d7cf3..309f187 100644
--- a/src/aegis_gitea_mcp/logging_utils.py
+++ b/src/aegis_gitea_mcp/logging_utils.py
@@ -5,16 +5,22 @@ from __future__ import annotations
 import json
 import logging
 from datetime import datetime, timezone
+from typing import Any
 
 from aegis_gitea_mcp.request_context import get_request_id
 
+# Context keys whose values must never be written to logs verbatim.
+SENSITIVE_CONTEXT_KEYS = frozenset(
+    {"token", "authorization", "cookie", "password", "secret", "api_key"}
+)
+
 
 class JsonLogFormatter(logging.Formatter):
     """Format log records as JSON documents."""
 
     def format(self, record: logging.LogRecord) -> str:
         """Serialize a log record to JSON."""
-        payload = {
+        payload: dict[str, Any] = {
             "timestamp": datetime.now(timezone.utc).isoformat(),
             "level": record.levelname,
             "logger": record.name,
@@ -22,6 +28,10 @@ class JsonLogFormatter(logging.Formatter):
             "request_id": get_request_id(),
         }
 
+        context = getattr(record, "context", None)
+        if isinstance(context, dict) and context:
+            payload["context"] = context
+
         if record.exc_info:
             # Security decision: include only exception type to avoid stack leakage.
             exception_type = record.exc_info[0]
@@ -46,3 +56,55 @@ def configure_logging(level: str) -> None:
     stream_handler = logging.StreamHandler()
     stream_handler.setFormatter(JsonLogFormatter())
     logger.addHandler(stream_handler)
+
+
+def sanitize_context(context: dict[str, Any]) -> dict[str, Any]:
+    """Redact sensitive values from a logging context mapping.
+
+    Keys whose lower-cased name is in ``SENSITIVE_CONTEXT_KEYS`` have their
+    value replaced with ``"***"`` so credentials never reach the log sink.
+
+    Args:
+        context: Arbitrary key/value pairs intended for structured logging.
+
+    Returns:
+        A new mapping with sensitive values masked.
+    """
+    return {
+        key: ("***" if key.lower() in SENSITIVE_CONTEXT_KEYS else value)
+        for key, value in context.items()
+    }
+
+
+def log_event(logger: logging.Logger, level: int, event: str, **context: Any) -> None:
+    """Emit a structured log event with a sanitized context payload.
+
+    Args:
+        logger: Logger to emit on.
+        level: Standard ``logging`` level (e.g. ``logging.DEBUG``).
+        event: Stable, machine-friendly event name (e.g. ``get_issue.start``).
+        **context: Extra structured fields; sensitive keys are masked.
+    """
+    logger.log(level, event, extra={"context": sanitize_context(context)})
+
+
+def log_nullable_field(logger: logging.Logger, event: str, field_name: str, value: Any) -> None:
+    """Log whether a parsed response field is ``None`` and its runtime type.
+
+    Makes null/nullable field failures (such as a ``null`` ``labels`` array)
+    obvious in logs without dumping the field's full contents.
+
+    Args:
+        logger: Logger to emit on.
+        event: Stable event name for the field check.
+        field_name: Name of the field being inspected.
+        value: The parsed value to characterize.
+    """
+    log_event(
+        logger,
+        logging.DEBUG,
+        event,
+        field=field_name,
+        is_none=value is None,
+        value_type=(type(value).__name__ if value is not None else None),
+    )
diff --git a/src/aegis_gitea_mcp/tools/read_tools.py b/src/aegis_gitea_mcp/tools/read_tools.py
index ca439a2..34d44b2 100644
--- a/src/aegis_gitea_mcp/tools/read_tools.py
+++ b/src/aegis_gitea_mcp/tools/read_tools.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import logging
 from typing import Any
 
 from aegis_gitea_mcp.gitea_client import (
@@ -10,6 +11,7 @@ from aegis_gitea_mcp.gitea_client import (
     GiteaClient,
     GiteaError,
 )
+from aegis_gitea_mcp.logging_utils import log_event, log_nullable_field
 from aegis_gitea_mcp.response_limits import limit_items, limit_text
 from aegis_gitea_mcp.tools.arguments import (
     CommitDiffArgs,
@@ -38,6 +40,8 @@ from aegis_gitea_mcp.tools.arguments import (
     SearchCodeArgs,
 )
 
+logger = logging.getLogger(__name__)
+
 
 async def search_code_tool(gitea: GiteaClient, arguments: dict[str, Any]) -> dict[str, Any]:
     """Search repository code and return bounded result snippets."""
@@ -265,8 +269,26 @@ async def list_issues_tool(gitea: GiteaClient, arguments: dict[str, Any]) -> dic
 async def get_issue_tool(gitea: GiteaClient, arguments: dict[str, Any]) -> dict[str, Any]:
     """Get issue details."""
     parsed = IssueArgs.model_validate(arguments)
+    log_event(
+        logger,
+        logging.DEBUG,
+        "get_issue.start",
+        owner=parsed.owner,
+        repo=parsed.repo,
+        issue_number=parsed.issue_number,
+    )
     try:
         issue = await gitea.get_issue(parsed.owner, parsed.repo, parsed.issue_number)
+        log_event(
+            logger,
+            logging.DEBUG,
+            "get_issue.payload_shape",
+            top_level_keys=sorted(issue.keys()) if issue else None,
+        )
+        # Surface nullable collections that previously broke parsing (see #13).
+        log_nullable_field(logger, "get_issue.field_check", "labels", issue.get("labels"))
+        log_nullable_field(logger, "get_issue.field_check", "assignees", issue.get("assignees"))
+        log_nullable_field(logger, "get_issue.field_check", "user", issue.get("user"))
         return {
             "number": issue.get("number", 0),
             "title": limit_text(str(issue.get("title", ""))),
diff --git a/tests/test_logging_utils.py b/tests/test_logging_utils.py
new file mode 100644
index 0000000..f5a62fa
--- /dev/null
+++ b/tests/test_logging_utils.py
@@ -0,0 +1,120 @@
+"""Tests for structured logging helpers and get_issue instrumentation (#14)."""
+
+from __future__ import annotations
+
+import json
+import logging
+
+import pytest
+
+from aegis_gitea_mcp.config import reset_settings
+from aegis_gitea_mcp.logging_utils import (
+    JsonLogFormatter,
+    log_event,
+    log_nullable_field,
+    sanitize_context,
+)
+from aegis_gitea_mcp.tools.read_tools import get_issue_tool
+
+READ_TOOLS_LOGGER = "aegis_gitea_mcp.tools.read_tools"
+
+
+@pytest.fixture(autouse=True)
+def tool_env(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Provide minimal settings environment for response limit helpers."""
+    reset_settings()
+    monkeypatch.setenv("GITEA_URL", "https://gitea.example.com")
+    monkeypatch.setenv("GITEA_TOKEN", "test-token")
+    monkeypatch.setenv("MCP_API_KEYS", "a" * 64)
+    monkeypatch.setenv("ENVIRONMENT", "test")
+
+
+def test_sanitize_context_masks_sensitive_keys() -> None:
+    """Sensitive keys are masked case-insensitively; others pass through."""
+    cleaned = sanitize_context(
+        {"owner": "acme", "Token": "abc", "password": "x", "issue_number": 7}
+    )
+
+    assert cleaned["owner"] == "acme"
+    assert cleaned["issue_number"] == 7
+    assert cleaned["Token"] == "***"
+    assert cleaned["password"] == "***"
+
+
+def test_json_formatter_includes_context() -> None:
+    """The formatter serializes a record's context mapping into the payload."""
+    record = logging.LogRecord(
+        "test", logging.DEBUG, __file__, 1, "get_issue.field_check", None, None
+    )
+    record.context = {"field": "labels", "is_none": True}
+
+    payload = json.loads(JsonLogFormatter().format(record))
+
+    assert payload["message"] == "get_issue.field_check"
+    assert payload["context"] == {"field": "labels", "is_none": True}
+
+
+def test_log_event_emits_sanitized_context(caplog: pytest.LogCaptureFixture) -> None:
+    """log_event records the event name and masks sensitive context values."""
+    logger = logging.getLogger("test.log_event")
+    with caplog.at_level(logging.DEBUG, logger="test.log_event"):
+        log_event(logger, logging.DEBUG, "evt", owner="acme", token="secret")
+
+    record = caplog.records[-1]
+    assert record.getMessage() == "evt"
+    assert record.context == {"owner": "acme", "token": "***"}
+
+
+def test_log_nullable_field_characterizes_value(caplog: pytest.LogCaptureFixture) -> None:
+    """log_nullable_field reports None-ness and runtime type without dumping data."""
+    logger = logging.getLogger("test.nullable")
+    with caplog.at_level(logging.DEBUG, logger="test.nullable"):
+        log_nullable_field(logger, "evt", "labels", None)
+        log_nullable_field(logger, "evt", "labels", [1, 2])
+
+    assert caplog.records[0].context == {
+        "field": "labels",
+        "is_none": True,
+        "value_type": None,
+    }
+    assert caplog.records[1].context == {
+        "field": "labels",
+        "is_none": False,
+        "value_type": "list",
+    }
+
+
+class _NullIssueGitea:
+    """Minimal stub returning an issue with null collection fields."""
+
+    async def get_issue(self, owner: str, repo: str, index: int) -> dict[str, object]:
+        return {
+            "number": index,
+            "title": "Issue",
+            "body": "Body",
+            "state": "open",
+            "labels": None,
+            "assignees": None,
+            "user": None,
+        }
+
+
+@pytest.mark.asyncio
+async def test_get_issue_tool_emits_debug_events(caplog: pytest.LogCaptureFixture) -> None:
+    """get_issue_tool emits start/shape/field-check debug events and still parses."""
+    with caplog.at_level(logging.DEBUG, logger=READ_TOOLS_LOGGER):
+        result = await get_issue_tool(
+            _NullIssueGitea(), {"owner": "acme", "repo": "app", "issue_number": 7}
+        )
+
+    events = [r.getMessage() for r in caplog.records]
+    assert "get_issue.start" in events
+    assert "get_issue.payload_shape" in events
+
+    field_checks = [r.context for r in caplog.records if r.getMessage() == "get_issue.field_check"]
+    assert {"field": "labels", "is_none": True, "value_type": None} in field_checks
+    assert {"field": "user", "is_none": True, "value_type": None} in field_checks
+
+    # Logging must not change behaviour: null collections still parse to empties.
+    assert result["labels"] == []
+    assert result["assignees"] == []