fix: prevent path traversal via Gitea ref/sha/base/head parameters
test / test (push) Successful in 20s
lint / lint (push) Successful in 22s
docker / lint (pull_request) Successful in 33s
docker / test (pull_request) Successful in 25s
test / test (pull_request) Successful in 38s
lint / lint (pull_request) Successful in 40s
docker / docker-test (pull_request) Successful in 15s
docker / docker-publish (pull_request) Has been skipped

The ref-like tool arguments (ref, sha, base, head) were only length-limited
and were interpolated unencoded into Gitea API URL paths (get_tree,
get_commit_diff, compare_refs). Because httpx collapses ".." path segments
(RFC 3986), a crafted value such as "../../../../owner/repo/contents/secret"
escaped the declared owner/repo prefix. In service-PAT mode this allowed a
user authorized on one repository to read arbitrary repositories the service
token could reach, and in OAuth mode it bypassed the policy engine's
per-repository rules (which never see ref values).

Two defense layers:
- arguments.py: add _validate_git_ref / GitRef that rejects ".." path
  segments, leading "/", backslashes, null bytes, control chars, whitespace,
  and "?"/"#", while preserving legitimate slash refs (feature/foo, v1.2.3).
  This is what actually closes the traversal.
- gitea_client.py: defense-in-depth urllib.parse.quote() on owner/repo
  (safe="") and ref/sha/base/head/filepath (safe="/") in every repo URL
  builder, mirroring the existing pattern in server.py.

Tests: negative cases for traversal/unsafe chars across all four fields,
positive cases for slash-containing refs, length-bound regression, and a
URL-layer confinement check. Full suite green (176 passed), coverage 85.64%.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-14 15:57:52 +02:00
parent 90df37366f
commit 2d95e89035
3 changed files with 179 additions and 30 deletions
+36 -20
View File
@@ -3,6 +3,7 @@
from __future__ import annotations
from typing import Any
from urllib.parse import quote
from httpx import AsyncClient, Response
@@ -175,6 +176,8 @@ class GiteaClient:
async def get_repository(self, owner: str, repo: str) -> dict[str, Any]:
"""Get repository metadata."""
repo_id = f"{owner}/{repo}"
enc_owner = quote(owner, safe="")
enc_repo = quote(repo, safe="")
correlation_id = self.audit.log_tool_invocation(
tool_name="get_repository",
repository=repo_id,
@@ -183,7 +186,7 @@ class GiteaClient:
try:
result = await self._request(
"GET",
f"/api/v1/repos/{owner}/{repo}",
f"/api/v1/repos/{enc_owner}/{enc_repo}",
correlation_id=correlation_id,
)
self.audit.log_tool_invocation(
@@ -212,6 +215,9 @@ class GiteaClient:
) -> dict[str, Any]:
"""Get file contents from a repository."""
repo_id = f"{owner}/{repo}"
enc_owner = quote(owner, safe="")
enc_repo = quote(repo, safe="")
enc_filepath = quote(filepath, safe="/")
correlation_id = self.audit.log_tool_invocation(
tool_name="get_file_contents",
repository=repo_id,
@@ -222,7 +228,7 @@ class GiteaClient:
try:
result = await self._request(
"GET",
f"/api/v1/repos/{owner}/{repo}/contents/{filepath}",
f"/api/v1/repos/{enc_owner}/{enc_repo}/contents/{enc_filepath}",
params={"ref": ref},
correlation_id=correlation_id,
)
@@ -278,6 +284,9 @@ class GiteaClient:
) -> dict[str, Any]:
"""Get repository tree at given ref."""
repo_id = f"{owner}/{repo}"
enc_owner = quote(owner, safe="")
enc_repo = quote(repo, safe="")
enc_ref = quote(ref, safe="/")
correlation_id = self.audit.log_tool_invocation(
tool_name="get_tree",
repository=repo_id,
@@ -287,7 +296,7 @@ class GiteaClient:
try:
result = await self._request(
"GET",
f"/api/v1/repos/{owner}/{repo}/git/trees/{ref}",
f"/api/v1/repos/{enc_owner}/{enc_repo}/git/trees/{enc_ref}",
params={"recursive": str(recursive).lower()},
correlation_id=correlation_id,
)
@@ -334,7 +343,7 @@ class GiteaClient:
try:
result = await self._request(
"GET",
f"/api/v1/repos/{owner}/{repo}/search",
f"/api/v1/repos/{quote(owner, safe='')}/{quote(repo, safe='')}/search",
params={"q": query, "page": page, "limit": limit, "ref": ref},
correlation_id=correlation_id,
)
@@ -367,7 +376,7 @@ class GiteaClient:
"""List commits for a repository ref."""
result = await self._request(
"GET",
f"/api/v1/repos/{owner}/{repo}/commits",
f"/api/v1/repos/{quote(owner, safe='')}/{quote(repo, safe='')}/commits",
params={"sha": ref, "page": page, "limit": limit},
correlation_id=str(
self.audit.log_tool_invocation(tool_name="list_commits", result_status="pending")
@@ -377,9 +386,12 @@ class GiteaClient:
async def get_commit_diff(self, owner: str, repo: str, sha: str) -> dict[str, Any]:
"""Get detailed commit including changed files and patch metadata."""
enc_owner = quote(owner, safe="")
enc_repo = quote(repo, safe="")
enc_sha = quote(sha, safe="/")
result = await self._request(
"GET",
f"/api/v1/repos/{owner}/{repo}/git/commits/{sha}",
f"/api/v1/repos/{enc_owner}/{enc_repo}/git/commits/{enc_sha}",
correlation_id=str(
self.audit.log_tool_invocation(tool_name="get_commit_diff", result_status="pending")
),
@@ -388,9 +400,13 @@ class GiteaClient:
async def compare_refs(self, owner: str, repo: str, base: str, head: str) -> dict[str, Any]:
"""Compare two refs and return commit/file deltas."""
enc_owner = quote(owner, safe="")
enc_repo = quote(repo, safe="")
enc_base = quote(base, safe="/")
enc_head = quote(head, safe="/")
result = await self._request(
"GET",
f"/api/v1/repos/{owner}/{repo}/compare/{base}...{head}",
f"/api/v1/repos/{enc_owner}/{enc_repo}/compare/{enc_base}...{enc_head}",
correlation_id=str(
self.audit.log_tool_invocation(tool_name="compare_refs", result_status="pending")
),
@@ -414,7 +430,7 @@ class GiteaClient:
result = await self._request(
"GET",
f"/api/v1/repos/{owner}/{repo}/issues",
f"/api/v1/repos/{quote(owner, safe='')}/{quote(repo, safe='')}/issues",
params=params,
correlation_id=str(
self.audit.log_tool_invocation(tool_name="list_issues", result_status="pending")
@@ -426,7 +442,7 @@ class GiteaClient:
"""Get issue details."""
result = await self._request(
"GET",
f"/api/v1/repos/{owner}/{repo}/issues/{index}",
f"/api/v1/repos/{quote(owner, safe='')}/{quote(repo, safe='')}/issues/{index}",
correlation_id=str(
self.audit.log_tool_invocation(tool_name="get_issue", result_status="pending")
),
@@ -445,7 +461,7 @@ class GiteaClient:
"""List pull requests for repository."""
result = await self._request(
"GET",
f"/api/v1/repos/{owner}/{repo}/pulls",
f"/api/v1/repos/{quote(owner, safe='')}/{quote(repo, safe='')}/pulls",
params={"state": state, "page": page, "limit": limit},
correlation_id=str(
self.audit.log_tool_invocation(
@@ -459,7 +475,7 @@ class GiteaClient:
"""Get a single pull request."""
result = await self._request(
"GET",
f"/api/v1/repos/{owner}/{repo}/pulls/{index}",
f"/api/v1/repos/{quote(owner, safe='')}/{quote(repo, safe='')}/pulls/{index}",
correlation_id=str(
self.audit.log_tool_invocation(
tool_name="get_pull_request", result_status="pending"
@@ -474,7 +490,7 @@ class GiteaClient:
"""List repository labels."""
result = await self._request(
"GET",
f"/api/v1/repos/{owner}/{repo}/labels",
f"/api/v1/repos/{quote(owner, safe='')}/{quote(repo, safe='')}/labels",
params={"page": page, "limit": limit},
correlation_id=str(
self.audit.log_tool_invocation(tool_name="list_labels", result_status="pending")
@@ -488,7 +504,7 @@ class GiteaClient:
"""List repository tags."""
result = await self._request(
"GET",
f"/api/v1/repos/{owner}/{repo}/tags",
f"/api/v1/repos/{quote(owner, safe='')}/{quote(repo, safe='')}/tags",
params={"page": page, "limit": limit},
correlation_id=str(
self.audit.log_tool_invocation(tool_name="list_tags", result_status="pending")
@@ -507,7 +523,7 @@ class GiteaClient:
"""List repository releases."""
result = await self._request(
"GET",
f"/api/v1/repos/{owner}/{repo}/releases",
f"/api/v1/repos/{quote(owner, safe='')}/{quote(repo, safe='')}/releases",
params={"page": page, "limit": limit},
correlation_id=str(
self.audit.log_tool_invocation(tool_name="list_releases", result_status="pending")
@@ -533,7 +549,7 @@ class GiteaClient:
payload["assignees"] = assignees
result = await self._request(
"POST",
f"/api/v1/repos/{owner}/{repo}/issues",
f"/api/v1/repos/{quote(owner, safe='')}/{quote(repo, safe='')}/issues",
json_body=payload,
correlation_id=str(
self.audit.log_tool_invocation(tool_name="create_issue", result_status="pending")
@@ -561,7 +577,7 @@ class GiteaClient:
payload["state"] = state
result = await self._request(
"PATCH",
f"/api/v1/repos/{owner}/{repo}/issues/{index}",
f"/api/v1/repos/{quote(owner, safe='')}/{quote(repo, safe='')}/issues/{index}",
json_body=payload,
correlation_id=str(
self.audit.log_tool_invocation(tool_name="update_issue", result_status="pending")
@@ -575,7 +591,7 @@ class GiteaClient:
"""Create a comment on issue (and PR discussion if issue index refers to PR)."""
result = await self._request(
"POST",
f"/api/v1/repos/{owner}/{repo}/issues/{index}/comments",
f"/api/v1/repos/{quote(owner, safe='')}/{quote(repo, safe='')}/issues/{index}/comments",
json_body={"body": body},
correlation_id=str(
self.audit.log_tool_invocation(
@@ -591,7 +607,7 @@ class GiteaClient:
"""Create PR discussion comment."""
result = await self._request(
"POST",
f"/api/v1/repos/{owner}/{repo}/issues/{index}/comments",
f"/api/v1/repos/{quote(owner, safe='')}/{quote(repo, safe='')}/issues/{index}/comments",
json_body={"body": body},
correlation_id=str(
self.audit.log_tool_invocation(
@@ -611,7 +627,7 @@ class GiteaClient:
"""Add labels to issue/PR."""
result = await self._request(
"POST",
f"/api/v1/repos/{owner}/{repo}/issues/{index}/labels",
f"/api/v1/repos/{quote(owner, safe='')}/{quote(repo, safe='')}/issues/{index}/labels",
json_body={"labels": labels},
correlation_id=str(
self.audit.log_tool_invocation(tool_name="add_labels", result_status="pending")
@@ -629,7 +645,7 @@ class GiteaClient:
"""Assign users to issue/PR."""
result = await self._request(
"POST",
f"/api/v1/repos/{owner}/{repo}/issues/{index}/assignees",
f"/api/v1/repos/{quote(owner, safe='')}/{quote(repo, safe='')}/issues/{index}/assignees",
json_body={"assignees": assignees},
correlation_id=str(
self.audit.log_tool_invocation(tool_name="assign_issue", result_status="pending")