first commit

This commit is contained in:
2025-12-21 13:42:30 +01:00
parent 823b825acb
commit f9b24fe248
47 changed files with 8222 additions and 1 deletions

View File

@@ -0,0 +1,19 @@
"""AI Review Agents Package
This package contains the modular agent implementations for the
enterprise AI code review system.
"""
from agents.base_agent import BaseAgent, AgentContext, AgentResult
from agents.issue_agent import IssueAgent
from agents.pr_agent import PRAgent
from agents.codebase_agent import CodebaseAgent
__all__ = [
"BaseAgent",
"AgentContext",
"AgentResult",
"IssueAgent",
"PRAgent",
"CodebaseAgent",
]

View File

@@ -0,0 +1,257 @@
"""Base Agent
Abstract base class for all AI agents. Provides common functionality
for Gitea API interaction, LLM calls, logging, and rate limiting.
"""
import logging
import os
import time
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Any
import yaml
from clients.gitea_client import GiteaClient
from clients.llm_client import LLMClient, LLMResponse
@dataclass
class AgentContext:
"""Context passed to agent during execution."""
owner: str
repo: str
event_type: str
event_data: dict
config: dict = field(default_factory=dict)
@dataclass
class AgentResult:
"""Result from agent execution."""
success: bool
message: str
data: dict = field(default_factory=dict)
actions_taken: list[str] = field(default_factory=list)
error: str | None = None
class BaseAgent(ABC):
"""Abstract base class for AI agents."""
# Marker to identify AI-generated comments
AI_MARKER = "<!-- AI_CODE_REVIEW -->"
# Disclaimer text
AI_DISCLAIMER = (
"**Note:** This review was generated by an AI assistant. "
"While it aims to be accurate and helpful, it may contain mistakes "
"or miss important issues. Please verify all findings before taking action."
)
def __init__(
self,
config: dict | None = None,
gitea_client: GiteaClient | None = None,
llm_client: LLMClient | None = None,
):
"""Initialize the base agent.
Args:
config: Agent configuration dictionary.
gitea_client: Optional pre-configured Gitea client.
llm_client: Optional pre-configured LLM client.
"""
self.config = config or self._load_config()
self.gitea = gitea_client or GiteaClient()
self.llm = llm_client or LLMClient.from_config(self.config)
self.logger = logging.getLogger(self.__class__.__name__)
# Rate limiting
self._last_request_time = 0.0
self._min_request_interval = 1.0 # seconds
@staticmethod
def _load_config() -> dict:
"""Load configuration from config.yml."""
config_path = os.path.join(os.path.dirname(__file__), "..", "config.yml")
if os.path.exists(config_path):
with open(config_path) as f:
return yaml.safe_load(f)
return {}
def _rate_limit(self):
"""Apply rate limiting between requests."""
elapsed = time.time() - self._last_request_time
if elapsed < self._min_request_interval:
time.sleep(self._min_request_interval - elapsed)
self._last_request_time = time.time()
def load_prompt(self, prompt_name: str) -> str:
"""Load a prompt template from the prompts directory.
Args:
prompt_name: Name of the prompt file (without extension).
Returns:
Prompt template content.
"""
prompt_path = os.path.join(
os.path.dirname(__file__), "..", "prompts", f"{prompt_name}.md"
)
if not os.path.exists(prompt_path):
raise FileNotFoundError(f"Prompt not found: {prompt_path}")
with open(prompt_path) as f:
return f.read()
def call_llm(self, prompt: str, **kwargs) -> LLMResponse:
"""Make a rate-limited call to the LLM.
Args:
prompt: The prompt to send.
**kwargs: Additional LLM options.
Returns:
LLM response.
"""
self._rate_limit()
return self.llm.call(prompt, **kwargs)
def call_llm_json(self, prompt: str, **kwargs) -> dict:
"""Make a rate-limited call and parse JSON response.
Args:
prompt: The prompt to send.
**kwargs: Additional LLM options.
Returns:
Parsed JSON response.
"""
self._rate_limit()
return self.llm.call_json(prompt, **kwargs)
def find_ai_comment(
self,
owner: str,
repo: str,
issue_index: int,
marker: str | None = None,
) -> int | None:
"""Find an existing AI comment by marker.
Args:
owner: Repository owner.
repo: Repository name.
issue_index: Issue or PR number.
marker: Custom marker to search for. Defaults to AI_MARKER.
Returns:
Comment ID if found, None otherwise.
"""
marker = marker or self.AI_MARKER
comments = self.gitea.list_issue_comments(owner, repo, issue_index)
for comment in comments:
if marker in comment.get("body", ""):
return comment["id"]
return None
def upsert_comment(
self,
owner: str,
repo: str,
issue_index: int,
body: str,
marker: str | None = None,
) -> dict:
"""Create or update an AI comment.
Args:
owner: Repository owner.
repo: Repository name.
issue_index: Issue or PR number.
body: Comment body (marker will be prepended if not present).
marker: Custom marker. Defaults to AI_MARKER.
Returns:
Created or updated comment.
"""
marker = marker or self.AI_MARKER
# Ensure marker is in the body
if marker not in body:
body = f"{marker}\n{body}"
# Check for existing comment
existing_id = self.find_ai_comment(owner, repo, issue_index, marker)
if existing_id:
return self.gitea.update_issue_comment(owner, repo, existing_id, body)
else:
return self.gitea.create_issue_comment(owner, repo, issue_index, body)
def format_with_disclaimer(self, content: str) -> str:
"""Add AI disclaimer to content.
Args:
content: The main content.
Returns:
Content with disclaimer prepended.
"""
return f"{self.AI_DISCLAIMER}\n\n{self.AI_MARKER}\n{content}"
@abstractmethod
def execute(self, context: AgentContext) -> AgentResult:
"""Execute the agent's main task.
Args:
context: Execution context with event data.
Returns:
Result of the agent execution.
"""
pass
@abstractmethod
def can_handle(self, event_type: str, event_data: dict) -> bool:
"""Check if this agent can handle the given event.
Args:
event_type: Type of event (issue, pull_request, etc).
event_data: Event payload data.
Returns:
True if this agent can handle the event.
"""
pass
def run(self, context: AgentContext) -> AgentResult:
"""Run the agent with error handling.
Args:
context: Execution context.
Returns:
Agent result, including any errors.
"""
try:
self.logger.info(
f"Running {self.__class__.__name__} for {context.owner}/{context.repo}"
)
result = self.execute(context)
self.logger.info(
f"Completed with success={result.success}: {result.message}"
)
return result
except Exception as e:
self.logger.exception(f"Agent execution failed: {e}")
return AgentResult(
success=False,
message="Agent execution failed",
error=str(e),
)

View File

@@ -0,0 +1,470 @@
"""Chat Agent (Bartender)
Interactive AI chat agent with tool use capabilities.
Can search the codebase and web to answer user questions.
"""
import base64
import logging
import os
import re
from dataclasses import dataclass
import requests
from agents.base_agent import AgentContext, AgentResult, BaseAgent
from clients.llm_client import ToolCall
@dataclass
class ChatMessage:
"""A message in the chat conversation."""
role: str # 'user', 'assistant', or 'tool'
content: str
tool_call_id: str | None = None
name: str | None = None # Tool name for tool responses
class ChatAgent(BaseAgent):
"""Interactive chat agent with tool capabilities."""
# Marker for chat responses
CHAT_AI_MARKER = "<!-- AI_CHAT_RESPONSE -->"
# Tool definitions in OpenAI format
TOOLS = [
{
"type": "function",
"function": {
"name": "search_codebase",
"description": "Search the repository codebase for files, functions, classes, or patterns. Use this to find relevant code.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query - can be a filename, function name, class name, or code pattern",
},
"file_pattern": {
"type": "string",
"description": "Optional file pattern to filter results (e.g., '*.py', 'src/*.js')",
},
},
"required": ["query"],
},
},
},
{
"type": "function",
"function": {
"name": "read_file",
"description": "Read the contents of a specific file from the repository.",
"parameters": {
"type": "object",
"properties": {
"filepath": {
"type": "string",
"description": "Path to the file to read",
},
},
"required": ["filepath"],
},
},
},
{
"type": "function",
"function": {
"name": "search_web",
"description": "Search the web for information using SearXNG. Use this for external documentation, tutorials, or general knowledge.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query",
},
"categories": {
"type": "string",
"description": "Optional: comma-separated categories (general, images, videos, news, science, it)",
},
},
"required": ["query"],
},
},
},
]
# System prompt for the chat agent
SYSTEM_PROMPT = """You are Bartender, a helpful AI assistant for code review and development tasks.
You have access to tools to help answer questions:
- search_codebase: Search the repository for code, files, functions, or patterns
- read_file: Read specific files from the repository
- search_web: Search the web for documentation, tutorials, or external information
When helping users:
1. Use tools to gather information before answering questions about code
2. Be concise but thorough in your explanations
3. Provide code examples when helpful
4. If you're unsure, say so and suggest alternatives
Repository context: {owner}/{repo}
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._searxng_url = self.config.get("agents", {}).get("chat", {}).get(
"searxng_url", os.environ.get("SEARXNG_URL", "")
)
def can_handle(self, event_type: str, event_data: dict) -> bool:
"""Check if this agent handles the given event."""
agent_config = self.config.get("agents", {}).get("chat", {})
if not agent_config.get("enabled", True):
return False
# Handle issue comment with @ai-bot chat or just @ai-bot
if event_type == "issue_comment":
comment_body = event_data.get("comment", {}).get("body", "")
mention_prefix = self.config.get("interaction", {}).get(
"mention_prefix", "@ai-bot"
)
# Check if this is a chat request (any @ai-bot mention that isn't a specific command)
if mention_prefix in comment_body:
# Check it's not another specific command
specific_commands = ["summarize", "explain", "suggest", "security", "codebase"]
body_lower = comment_body.lower()
for cmd in specific_commands:
if f"{mention_prefix} {cmd}" in body_lower:
return False
return True
# Handle direct chat command
if event_type == "chat":
return True
return False
def execute(self, context: AgentContext) -> AgentResult:
"""Execute the chat agent."""
self.logger.info(f"Starting chat for {context.owner}/{context.repo}")
# Extract user message
if context.event_type == "issue_comment":
user_message = context.event_data.get("comment", {}).get("body", "")
issue_index = context.event_data.get("issue", {}).get("number")
# Remove the @ai-bot prefix
mention_prefix = self.config.get("interaction", {}).get(
"mention_prefix", "@ai-bot"
)
user_message = user_message.replace(mention_prefix, "").strip()
else:
user_message = context.event_data.get("message", "")
issue_index = context.event_data.get("issue_number")
if not user_message:
return AgentResult(
success=False,
message="No message provided",
)
# Build conversation
system_prompt = self.SYSTEM_PROMPT.format(
owner=context.owner,
repo=context.repo,
)
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message},
]
# Run the chat loop with tool execution
response_content, tools_used = self._run_chat_loop(
context, messages, max_iterations=5
)
actions_taken = []
if tools_used:
actions_taken.append(f"Used tools: {', '.join(tools_used)}")
# Post response if this is an issue comment
if issue_index:
comment_body = self._format_response(response_content)
self.upsert_comment(
context.owner,
context.repo,
issue_index,
comment_body,
marker=self.CHAT_AI_MARKER,
)
actions_taken.append("Posted chat response")
return AgentResult(
success=True,
message="Chat completed",
data={"response": response_content, "tools_used": tools_used},
actions_taken=actions_taken,
)
def _run_chat_loop(
self,
context: AgentContext,
messages: list[dict],
max_iterations: int = 5,
) -> tuple[str, list[str]]:
"""Run the chat loop with tool execution.
Returns:
Tuple of (final response content, list of tools used)
"""
tools_used = []
for _ in range(max_iterations):
self._rate_limit()
response = self.llm.call_with_tools(messages, tools=self.TOOLS)
# If no tool calls, we're done
if not response.tool_calls:
return response.content, tools_used
# Add assistant message with tool calls
messages.append({
"role": "assistant",
"content": response.content or "",
"tool_calls": [
{
"id": tc.id,
"type": "function",
"function": {
"name": tc.name,
"arguments": str(tc.arguments),
},
}
for tc in response.tool_calls
],
})
# Execute each tool call
for tool_call in response.tool_calls:
tool_result = self._execute_tool(context, tool_call)
tools_used.append(tool_call.name)
# Add tool result to messages
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": tool_result,
})
# If we hit max iterations, make one final call without tools
self._rate_limit()
final_response = self.llm.call_with_tools(
messages, tools=None, tool_choice="none"
)
return final_response.content, tools_used
def _execute_tool(self, context: AgentContext, tool_call: ToolCall) -> str:
"""Execute a tool call and return the result."""
self.logger.info(f"Executing tool: {tool_call.name}")
try:
if tool_call.name == "search_codebase":
return self._tool_search_codebase(
context,
tool_call.arguments.get("query", ""),
tool_call.arguments.get("file_pattern"),
)
elif tool_call.name == "read_file":
return self._tool_read_file(
context,
tool_call.arguments.get("filepath", ""),
)
elif tool_call.name == "search_web":
return self._tool_search_web(
tool_call.arguments.get("query", ""),
tool_call.arguments.get("categories"),
)
else:
return f"Unknown tool: {tool_call.name}"
except Exception as e:
self.logger.error(f"Tool execution failed: {e}")
return f"Error executing tool: {e}"
def _tool_search_codebase(
self,
context: AgentContext,
query: str,
file_pattern: str | None = None,
) -> str:
"""Search the codebase for files matching a query."""
results = []
# Get repository file list
try:
files = self._collect_files(context.owner, context.repo, file_pattern)
except Exception as e:
return f"Error listing files: {e}"
query_lower = query.lower()
# Search through files
for file_info in files[:50]: # Limit to prevent API exhaustion
filepath = file_info.get("path", "")
# Check filename match
if query_lower in filepath.lower():
results.append(f"File: {filepath}")
continue
# Check content for code patterns
try:
content_data = self.gitea.get_file_contents(
context.owner, context.repo, filepath
)
if content_data.get("content"):
content = base64.b64decode(content_data["content"]).decode(
"utf-8", errors="ignore"
)
# Search for query in content
lines = content.splitlines()
matching_lines = []
for i, line in enumerate(lines, 1):
if query_lower in line.lower():
matching_lines.append(f" L{i}: {line.strip()[:100]}")
if matching_lines:
results.append(f"File: {filepath}")
results.extend(matching_lines[:5]) # Max 5 matches per file
except Exception:
pass
if not results:
return f"No results found for '{query}'"
return "\n".join(results[:30]) # Limit total results
def _collect_files(
self,
owner: str,
repo: str,
file_pattern: str | None = None,
) -> list[dict]:
"""Collect files from the repository."""
files = []
# Code extensions to search
code_extensions = {
".py", ".js", ".ts", ".go", ".rs", ".java", ".rb",
".php", ".c", ".cpp", ".h", ".cs", ".swift", ".kt",
".md", ".yml", ".yaml", ".json", ".toml",
}
# Patterns to ignore
ignore_patterns = [
"node_modules/", "vendor/", ".git/", "__pycache__/",
".venv/", "dist/", "build/", ".min.js", ".min.css",
]
def traverse(path: str = ""):
try:
contents = self.gitea.get_file_contents(owner, repo, path or ".")
if isinstance(contents, list):
for item in contents:
item_path = item.get("path", "")
if any(p in item_path for p in ignore_patterns):
continue
if item.get("type") == "file":
ext = os.path.splitext(item_path)[1]
if ext in code_extensions:
# Check file pattern if provided
if file_pattern:
if not self._match_pattern(item_path, file_pattern):
continue
files.append(item)
elif item.get("type") == "dir":
traverse(item_path)
except Exception as e:
self.logger.warning(f"Failed to list {path}: {e}")
traverse()
return files[:100] # Limit to prevent API exhaustion
def _match_pattern(self, filepath: str, pattern: str) -> bool:
"""Check if filepath matches a simple glob pattern."""
import fnmatch
return fnmatch.fnmatch(filepath, pattern)
def _tool_read_file(self, context: AgentContext, filepath: str) -> str:
"""Read a file from the repository."""
try:
content_data = self.gitea.get_file_contents(
context.owner, context.repo, filepath
)
if content_data.get("content"):
content = base64.b64decode(content_data["content"]).decode(
"utf-8", errors="ignore"
)
# Truncate if too long
if len(content) > 8000:
content = content[:8000] + "\n... (truncated)"
return f"File: {filepath}\n\n```\n{content}\n```"
return f"File not found: {filepath}"
except Exception as e:
return f"Error reading file: {e}"
def _tool_search_web(
self,
query: str,
categories: str | None = None,
) -> str:
"""Search the web using SearXNG."""
if not self._searxng_url:
return "Web search is not configured. Set SEARXNG_URL environment variable."
try:
params = {
"q": query,
"format": "json",
}
if categories:
params["categories"] = categories
response = requests.get(
f"{self._searxng_url}/search",
params=params,
timeout=30,
)
response.raise_for_status()
data = response.json()
results = data.get("results", [])
if not results:
return f"No web results found for '{query}'"
# Format results
output = []
for i, result in enumerate(results[:5], 1): # Top 5 results
title = result.get("title", "No title")
url = result.get("url", "")
content = result.get("content", "")[:200]
output.append(f"{i}. **{title}**\n {url}\n {content}")
return "\n\n".join(output)
except requests.exceptions.RequestException as e:
return f"Web search failed: {e}"
def _format_response(self, content: str) -> str:
"""Format the chat response with disclaimer."""
lines = [
f"{self.AI_DISCLAIMER}",
"",
"---",
"",
content,
]
return "\n".join(lines)

View File

@@ -0,0 +1,457 @@
"""Codebase Quality Agent
AI agent for analyzing overall codebase health, architecture,
technical debt, and documentation coverage.
"""
import base64
import os
from dataclasses import dataclass, field
from agents.base_agent import AgentContext, AgentResult, BaseAgent
@dataclass
class CodebaseMetrics:
"""Metrics collected from codebase analysis."""
total_files: int = 0
total_lines: int = 0
languages: dict = field(default_factory=dict)
todo_count: int = 0
fixme_count: int = 0
deprecated_count: int = 0
missing_docstrings: int = 0
@dataclass
class CodebaseReport:
"""Complete codebase analysis report."""
summary: str
health_score: float # 0-100
metrics: CodebaseMetrics
issues: list[dict]
recommendations: list[str]
architecture_notes: list[str]
class CodebaseAgent(BaseAgent):
"""Agent for codebase quality analysis."""
# Marker for codebase reports
CODEBASE_AI_MARKER = "<!-- AI_CODEBASE_REVIEW -->"
# File extensions to analyze
CODE_EXTENSIONS = {
".py": "Python",
".js": "JavaScript",
".ts": "TypeScript",
".go": "Go",
".rs": "Rust",
".java": "Java",
".rb": "Ruby",
".php": "PHP",
".c": "C",
".cpp": "C++",
".h": "C/C++ Header",
".cs": "C#",
".swift": "Swift",
".kt": "Kotlin",
}
# Files to ignore
IGNORE_PATTERNS = [
"node_modules/",
"vendor/",
".git/",
"__pycache__/",
".venv/",
"dist/",
"build/",
".min.js",
".min.css",
]
def can_handle(self, event_type: str, event_data: dict) -> bool:
"""Check if this agent handles the given event."""
agent_config = self.config.get("agents", {}).get("codebase", {})
if not agent_config.get("enabled", True):
return False
# Handle manual trigger via workflow_dispatch or schedule
if event_type in ("workflow_dispatch", "schedule"):
return True
# Handle special issue command
if event_type == "issue_comment":
comment_body = event_data.get("comment", {}).get("body", "")
mention_prefix = self.config.get("interaction", {}).get(
"mention_prefix", "@ai-bot"
)
if f"{mention_prefix} codebase" in comment_body.lower():
return True
return False
def execute(self, context: AgentContext) -> AgentResult:
"""Execute codebase analysis."""
self.logger.info(f"Starting codebase analysis for {context.owner}/{context.repo}")
actions_taken = []
# Step 1: Collect file list from repository
files = self._collect_files(context.owner, context.repo)
self.logger.info(f"Found {len(files)} files to analyze")
# Step 2: Analyze metrics
metrics = self._analyze_metrics(context.owner, context.repo, files)
actions_taken.append(f"Analyzed {metrics.total_files} files")
# Step 3: Run AI analysis on key files
report = self._run_ai_analysis(context, files, metrics)
actions_taken.append("Generated AI analysis report")
# Step 4: Create or update report issue
issue_number = self._create_report_issue(context, report)
actions_taken.append(f"Created/updated report issue #{issue_number}")
return AgentResult(
success=True,
message=f"Codebase analysis complete - Health Score: {report.health_score:.0f}/100",
data={
"health_score": report.health_score,
"total_files": metrics.total_files,
"issues_found": len(report.issues),
},
actions_taken=actions_taken,
)
def _collect_files(self, owner: str, repo: str) -> list[dict]:
"""Collect list of files from the repository."""
files = []
def traverse(path: str = ""):
try:
contents = self.gitea.get_file_contents(owner, repo, path or ".")
if isinstance(contents, list):
for item in contents:
item_path = item.get("path", "")
# Skip ignored patterns
if any(p in item_path for p in self.IGNORE_PATTERNS):
continue
if item.get("type") == "file":
ext = os.path.splitext(item_path)[1]
if ext in self.CODE_EXTENSIONS:
files.append(item)
elif item.get("type") == "dir":
traverse(item_path)
except Exception as e:
self.logger.warning(f"Failed to list {path}: {e}")
traverse()
return files[:100] # Limit to prevent API exhaustion
def _analyze_metrics(
self,
owner: str,
repo: str,
files: list[dict],
) -> CodebaseMetrics:
"""Analyze metrics from files."""
metrics = CodebaseMetrics()
metrics.total_files = len(files)
for file_info in files[:50]: # Analyze top 50 files
filepath = file_info.get("path", "")
ext = os.path.splitext(filepath)[1]
lang = self.CODE_EXTENSIONS.get(ext, "Unknown")
metrics.languages[lang] = metrics.languages.get(lang, 0) + 1
try:
content_data = self.gitea.get_file_contents(owner, repo, filepath)
if content_data.get("content"):
content = base64.b64decode(content_data["content"]).decode(
"utf-8", errors="ignore"
)
lines = content.splitlines()
metrics.total_lines += len(lines)
# Count markers
for line in lines:
line_upper = line.upper()
if "TODO" in line_upper:
metrics.todo_count += 1
if "FIXME" in line_upper:
metrics.fixme_count += 1
if "DEPRECATED" in line_upper:
metrics.deprecated_count += 1
# Check for docstrings (Python)
if ext == ".py":
if 'def ' in content and '"""' not in content:
metrics.missing_docstrings += 1
except Exception as e:
self.logger.debug(f"Could not analyze {filepath}: {e}")
return metrics
def _run_ai_analysis(
self,
context: AgentContext,
files: list[dict],
metrics: CodebaseMetrics,
) -> CodebaseReport:
"""Run AI analysis on the codebase."""
# Prepare context for AI
file_list = "\n".join([f"- {f.get('path', '')}" for f in files[:30]])
language_breakdown = "\n".join(
[f"- {lang}: {count} files" for lang, count in metrics.languages.items()]
)
# Sample some key files for deeper analysis
key_files_content = self._get_key_files_content(
context.owner, context.repo, files
)
prompt = f"""Analyze this codebase and provide a comprehensive quality assessment.
## Repository: {context.owner}/{context.repo}
## Metrics
- Total Files: {metrics.total_files}
- Total Lines: {metrics.total_lines}
- TODO Comments: {metrics.todo_count}
- FIXME Comments: {metrics.fixme_count}
- Deprecated Markers: {metrics.deprecated_count}
## Language Breakdown
{language_breakdown}
## File Structure (sample)
{file_list}
## Key Files Content
{key_files_content}
## Analysis Required
Provide your analysis as JSON with this structure:
```json
{{
"summary": "Overall assessment in 2-3 sentences",
"health_score": 0-100,
"issues": [
{{
"severity": "HIGH|MEDIUM|LOW",
"category": "Architecture|Code Quality|Security|Testing|Documentation",
"description": "Issue description",
"recommendation": "How to fix"
}}
],
"recommendations": ["Top 3-5 actionable recommendations"],
"architecture_notes": ["Observations about code structure and patterns"]
}}
```
Be constructive and actionable. Focus on the most impactful improvements.
"""
try:
result = self.call_llm_json(prompt)
return CodebaseReport(
summary=result.get("summary", "Analysis complete"),
health_score=float(result.get("health_score", 50)),
metrics=metrics,
issues=result.get("issues", []),
recommendations=result.get("recommendations", []),
architecture_notes=result.get("architecture_notes", []),
)
except Exception as e:
self.logger.error(f"AI analysis failed: {e}")
# Try to log the raw response if possible (requires accessing the last response)
# Since we don't have direct access here, we rely on having good logging in LLMClient if needed.
# But let's add a note to the summary.
# Calculate basic health score from metrics
health_score = 70
if metrics.todo_count > 10:
health_score -= 10
if metrics.fixme_count > 5:
health_score -= 10
return CodebaseReport(
summary=f"Basic analysis complete (AI unavailable: {e})",
health_score=health_score,
metrics=metrics,
issues=[],
recommendations=["Manual review recommended"],
architecture_notes=[],
)
def _get_key_files_content(
self,
owner: str,
repo: str,
files: list[dict],
) -> str:
"""Get content of key files for AI analysis."""
key_file_names = [
"README.md",
"setup.py",
"pyproject.toml",
"package.json",
"Cargo.toml",
"go.mod",
"Makefile",
"Dockerfile",
]
content_parts = []
for file_info in files:
filepath = file_info.get("path", "")
filename = os.path.basename(filepath)
if filename in key_file_names:
try:
content_data = self.gitea.get_file_contents(owner, repo, filepath)
if content_data.get("content"):
content = base64.b64decode(content_data["content"]).decode(
"utf-8", errors="ignore"
)
# Truncate long files
if len(content) > 2000:
content = content[:2000] + "\n... (truncated)"
content_parts.append(f"### {filepath}\n```\n{content}\n```")
except Exception:
pass
return "\n\n".join(content_parts[:5]) or "No key configuration files found."
def _create_report_issue(
self,
context: AgentContext,
report: CodebaseReport,
) -> int:
"""Create or update a report issue."""
# Generate issue body
body = self._generate_report_body(report)
# Look for existing report issue
try:
issues = self.gitea.list_issues(
context.owner, context.repo, state="open", labels=["ai-codebase-report"]
)
for issue in issues:
if self.CODEBASE_AI_MARKER in issue.get("body", ""):
# Update existing issue body
self.gitea.update_issue(
context.owner,
context.repo,
issue["number"],
body=body,
)
return issue["number"]
except Exception as e:
self.logger.warning(f"Failed to check for existing report: {e}")
# Create new issue
try:
# Check for label ID
labels = []
try:
repo_labels = self.gitea.get_repo_labels(context.owner, context.repo)
for label in repo_labels:
if label["name"] == "ai-codebase-report":
labels.append(label["id"])
break
except Exception:
pass
issue = self.gitea.create_issue(
context.owner,
context.repo,
title=f"AI Codebase Report - {context.repo}",
body=body,
labels=labels,
)
return issue["number"]
except Exception as e:
self.logger.error(f"Failed to create report issue: {e}")
return 0
def _generate_report_body(self, report: CodebaseReport) -> str:
"""Generate the report issue body."""
health_emoji = "🟢" if report.health_score >= 80 else ("🟡" if report.health_score >= 60 else "🔴")
lines = [
f"{self.AI_DISCLAIMER}",
"",
"# AI Codebase Quality Report",
"",
f"## Health Score: {report.health_score:.0f}/100",
"",
report.summary,
"",
"---",
"",
"## Metrics",
"",
"| Metric | Value |",
"|--------|-------|",
f"| Total Files | {report.metrics.total_files} |",
f"| Total Lines | {report.metrics.total_lines:,} |",
f"| TODO Comments | {report.metrics.todo_count} |",
f"| FIXME Comments | {report.metrics.fixme_count} |",
f"| Deprecated | {report.metrics.deprecated_count} |",
"",
]
# Languages
if report.metrics.languages:
lines.append("### Languages")
lines.append("")
for lang, count in sorted(
report.metrics.languages.items(), key=lambda x: -x[1]
):
lines.append(f"- **{lang}**: {count} files")
lines.append("")
# Issues
if report.issues:
lines.append("## Issues Found")
lines.append("")
for issue in report.issues[:10]:
severity = issue.get("severity", "MEDIUM")
emoji = "🔴" if severity == "HIGH" else ("🟡" if severity == "MEDIUM" else "🟢")
lines.append(f"### [{severity}] {issue.get('category', 'General')}")
lines.append("")
lines.append(issue.get("description", ""))
lines.append("")
lines.append(f"**Recommendation:** {issue.get('recommendation', '')}")
lines.append("")
# Recommendations
if report.recommendations:
lines.append("## Recommendations")
lines.append("")
for i, rec in enumerate(report.recommendations[:5], 1):
lines.append(f"{i}. {rec}")
lines.append("")
# Architecture notes
if report.architecture_notes:
lines.append("## Architecture Notes")
lines.append("")
for note in report.architecture_notes[:5]:
lines.append(f"- {note}")
lines.append("")
lines.append("---")
lines.append(f"*Generated by AI Codebase Agent*")
return "\n".join(lines)

View File

@@ -0,0 +1,392 @@
"""Issue Review Agent
AI agent for triaging, labeling, and responding to issues.
Handles issue.opened, issue.labeled, and issue_comment events.
"""
import logging
from dataclasses import dataclass
from agents.base_agent import AgentContext, AgentResult, BaseAgent
@dataclass
class TriageResult:
"""Result of issue triage analysis."""
issue_type: str
priority: str
confidence: float
summary: str
suggested_labels: list[str]
is_duplicate: bool
duplicate_of: int | None
needs_more_info: bool
missing_info: list[str]
components: list[str]
reasoning: str
class IssueAgent(BaseAgent):
"""Agent for handling issue events."""
# Marker specific to issue comments
ISSUE_AI_MARKER = "<!-- AI_ISSUE_TRIAGE -->"
def can_handle(self, event_type: str, event_data: dict) -> bool:
"""Check if this agent handles the given event."""
# Check if agent is enabled
agent_config = self.config.get("agents", {}).get("issue", {})
if not agent_config.get("enabled", True):
return False
# Handle issue events
if event_type == "issues":
action = event_data.get("action", "")
allowed_events = agent_config.get("events", ["opened", "labeled"])
if action not in allowed_events:
return False
# Ignore our own codebase reports to prevent double-commenting
issue = event_data.get("issue", {})
title = issue.get("title", "")
labels = [l.get("name") for l in issue.get("labels", [])]
if "AI Codebase Report" in title or "ai-codebase-report" in labels:
return False
return True
# Handle issue comment events (for @mentions)
if event_type == "issue_comment":
action = event_data.get("action", "")
if action == "created":
comment_body = event_data.get("comment", {}).get("body", "")
mention_prefix = self.config.get("interaction", {}).get(
"mention_prefix", "@ai-bot"
)
return mention_prefix in comment_body
return False
def execute(self, context: AgentContext) -> AgentResult:
"""Execute the issue agent."""
event_data = context.event_data
action = event_data.get("action", "")
if context.event_type == "issues":
if action == "opened":
return self._handle_issue_opened(context)
elif action == "labeled":
return self._handle_issue_labeled(context)
if context.event_type == "issue_comment":
return self._handle_issue_comment(context)
return AgentResult(
success=False,
message=f"Unknown action: {action}",
)
def _handle_issue_opened(self, context: AgentContext) -> AgentResult:
"""Handle a newly opened issue."""
issue = context.event_data.get("issue", {})
issue_index = issue.get("number")
title = issue.get("title", "")
body = issue.get("body", "")
author = issue.get("user", {}).get("login", "unknown")
existing_labels = [l.get("name", "") for l in issue.get("labels", [])]
self.logger.info(f"Triaging issue #{issue_index}: {title}")
# Step 1: Triage the issue
triage = self._triage_issue(title, body, author, existing_labels)
actions_taken = []
# Step 2: Apply labels if auto-label is enabled
agent_config = self.config.get("agents", {}).get("issue", {})
if agent_config.get("auto_label", True):
labels_applied = self._apply_labels(
context.owner, context.repo, issue_index, triage
)
if labels_applied:
actions_taken.append(f"Applied labels: {labels_applied}")
# Step 3: Post triage comment
comment = self._generate_triage_comment(triage, issue)
self.upsert_comment(
context.owner,
context.repo,
issue_index,
comment,
marker=self.ISSUE_AI_MARKER,
)
actions_taken.append("Posted triage comment")
return AgentResult(
success=True,
message=f"Triaged issue #{issue_index} as {triage.issue_type} ({triage.priority} priority)",
data={
"triage": {
"type": triage.issue_type,
"priority": triage.priority,
"confidence": triage.confidence,
}
},
actions_taken=actions_taken,
)
def _handle_issue_labeled(self, context: AgentContext) -> AgentResult:
"""Handle label addition to an issue."""
# Could be used for specific label-triggered actions
issue = context.event_data.get("issue", {})
label = context.event_data.get("label", {})
return AgentResult(
success=True,
message=f"Noted label '{label.get('name')}' added to issue #{issue.get('number')}",
)
def _handle_issue_comment(self, context: AgentContext) -> AgentResult:
"""Handle @mention in issue comment."""
issue = context.event_data.get("issue", {})
comment = context.event_data.get("comment", {})
issue_index = issue.get("number")
comment_body = comment.get("body", "")
# Parse command from mention
command = self._parse_command(comment_body)
if command:
response = self._handle_command(context, issue, command)
self.gitea.create_issue_comment(
context.owner, context.repo, issue_index, response
)
return AgentResult(
success=True,
message=f"Responded to command: {command}",
actions_taken=["Posted command response"],
)
return AgentResult(
success=True,
message="No actionable command found in mention",
)
def _triage_issue(
self,
title: str,
body: str,
author: str,
existing_labels: list[str],
) -> TriageResult:
"""Use LLM to triage the issue."""
prompt_template = self.load_prompt("issue_triage")
prompt = prompt_template.format(
title=title,
body=body or "(no description provided)",
author=author,
existing_labels=", ".join(existing_labels) if existing_labels else "none",
)
try:
result = self.call_llm_json(prompt)
return TriageResult(
issue_type=result.get("type", "question"),
priority=result.get("priority", "medium"),
confidence=result.get("confidence", 0.5),
summary=result.get("summary", title),
suggested_labels=result.get("suggested_labels", []),
is_duplicate=result.get("is_duplicate", False),
duplicate_of=result.get("duplicate_of"),
needs_more_info=result.get("needs_more_info", False),
missing_info=result.get("missing_info", []),
components=result.get("components", []),
reasoning=result.get("reasoning", ""),
)
except Exception as e:
self.logger.warning(f"LLM triage failed: {e}")
# Return default triage on failure
return TriageResult(
issue_type="question",
priority="medium",
confidence=0.3,
summary=title,
suggested_labels=[],
is_duplicate=False,
duplicate_of=None,
needs_more_info=True,
missing_info=["Unable to parse issue automatically"],
components=[],
reasoning="Automatic triage failed, needs human review",
)
def _apply_labels(
self,
owner: str,
repo: str,
issue_index: int,
triage: TriageResult,
) -> list[str]:
"""Apply labels based on triage result."""
labels_config = self.config.get("labels", {})
# Get all repo labels
try:
repo_labels = self.gitea.get_repo_labels(owner, repo)
label_map = {l["name"]: l["id"] for l in repo_labels}
except Exception as e:
self.logger.warning(f"Failed to get repo labels: {e}")
return []
labels_to_add = []
# Map priority
priority_labels = labels_config.get("priority", {})
priority_label = priority_labels.get(triage.priority)
if priority_label and priority_label in label_map:
labels_to_add.append(label_map[priority_label])
# Map type
type_labels = labels_config.get("type", {})
type_label = type_labels.get(triage.issue_type)
if type_label and type_label in label_map:
labels_to_add.append(label_map[type_label])
# Add AI reviewed label
status_labels = labels_config.get("status", {})
reviewed_label = status_labels.get("ai_reviewed")
if reviewed_label and reviewed_label in label_map:
labels_to_add.append(label_map[reviewed_label])
if labels_to_add:
try:
self.gitea.add_issue_labels(owner, repo, issue_index, labels_to_add)
return [
name for name, id in label_map.items() if id in labels_to_add
]
except Exception as e:
self.logger.warning(f"Failed to add labels: {e}")
return []
def _generate_triage_comment(self, triage: TriageResult, issue: dict) -> str:
"""Generate a triage summary comment."""
lines = [
f"{self.AI_DISCLAIMER}",
"",
"## AI Issue Triage",
"",
f"| Field | Value |",
f"|-------|--------|",
f"| **Type** | {triage.issue_type.capitalize()} |",
f"| **Priority** | {triage.priority.capitalize()} |",
f"| **Confidence** | {triage.confidence:.0%} |",
"",
]
if triage.summary != issue.get("title"):
lines.append(f"**Summary:** {triage.summary}")
lines.append("")
if triage.components:
lines.append(f"**Components:** {', '.join(triage.components)}")
lines.append("")
if triage.needs_more_info and triage.missing_info:
lines.append("### Additional Information Needed")
lines.append("")
for info in triage.missing_info:
lines.append(f"- {info}")
lines.append("")
if triage.is_duplicate and triage.duplicate_of:
lines.append(f"### Possible Duplicate")
lines.append(f"This issue may be a duplicate of #{triage.duplicate_of}")
lines.append("")
lines.append("---")
lines.append(f"*{triage.reasoning}*")
return "\n".join(lines)
def _parse_command(self, body: str) -> str | None:
"""Parse a command from a comment body."""
mention_prefix = self.config.get("interaction", {}).get(
"mention_prefix", "@ai-bot"
)
commands = self.config.get("interaction", {}).get(
"commands", ["explain", "suggest", "security", "summarize"]
)
for command in commands:
if f"{mention_prefix} {command}" in body.lower():
return command
return None
def _handle_command(self, context: AgentContext, issue: dict, command: str) -> str:
"""Handle a command from an @mention."""
title = issue.get("title", "")
body = issue.get("body", "")
if command == "summarize":
return self._command_summarize(title, body)
elif command == "explain":
return self._command_explain(title, body)
elif command == "suggest":
return self._command_suggest(title, body)
return f"{self.AI_DISCLAIMER}\n\nSorry, I don't understand the command `{command}`."
def _command_summarize(self, title: str, body: str) -> str:
"""Generate a summary of the issue."""
prompt = f"""Summarize the following issue in 2-3 concise sentences:
Title: {title}
Body: {body}
Provide only the summary, no additional formatting."""
try:
response = self.call_llm(prompt)
return f"{self.AI_DISCLAIMER}\n\n**Summary:**\n{response.content}"
except Exception as e:
return f"{self.AI_DISCLAIMER}\n\nSorry, I was unable to generate a summary. Error: {e}"
def _command_explain(self, title: str, body: str) -> str:
"""Explain the issue in more detail."""
prompt = f"""Analyze this issue and provide a clear explanation of what the user is asking for or reporting:
Title: {title}
Body: {body}
Provide:
1. What the issue is about
2. What the user expects
3. Any technical context that might be relevant
Be concise and helpful."""
try:
response = self.call_llm(prompt)
return f"{self.AI_DISCLAIMER}\n\n**Explanation:**\n{response.content}"
except Exception as e:
return f"{self.AI_DISCLAIMER}\n\nSorry, I was unable to explain this issue. Error: {e}"
def _command_suggest(self, title: str, body: str) -> str:
"""Suggest solutions for the issue."""
prompt = f"""Based on this issue, suggest potential solutions or next steps:
Title: {title}
Body: {body}
Provide 2-3 actionable suggestions. If this is a bug, suggest debugging steps. If this is a feature request, suggest implementation approaches.
Be practical and concise."""
try:
response = self.call_llm(prompt)
return f"{self.AI_DISCLAIMER}\n\n**Suggestions:**\n{response.content}"
except Exception as e:
return f"{self.AI_DISCLAIMER}\n\nSorry, I was unable to generate suggestions. Error: {e}"

View File

@@ -0,0 +1,436 @@
"""Pull Request Review Agent
Enhanced AI agent for comprehensive PR reviews with inline comments,
security scanning, and automatic label management.
"""
import re
from dataclasses import dataclass, field
from agents.base_agent import AgentContext, AgentResult, BaseAgent
@dataclass
class ReviewIssue:
"""A single issue found in the PR."""
file: str
line: int | None
severity: str # HIGH, MEDIUM, LOW
category: str # Security, Correctness, Performance, etc.
description: str
recommendation: str
code_snippet: str | None = None
@dataclass
class PRReviewResult:
"""Result of a PR review."""
summary: str
issues: list[ReviewIssue]
overall_severity: str
approval: bool
security_issues: list[ReviewIssue] = field(default_factory=list)
class PRAgent(BaseAgent):
"""Agent for handling pull request reviews."""
# Marker specific to PR reviews
PR_AI_MARKER = "<!-- AI_PR_REVIEW -->"
def can_handle(self, event_type: str, event_data: dict) -> bool:
"""Check if this agent handles the given event."""
# Check if agent is enabled
agent_config = self.config.get("agents", {}).get("pr", {})
if not agent_config.get("enabled", True):
return False
if event_type == "pull_request":
action = event_data.get("action", "")
allowed_events = agent_config.get("events", ["opened", "synchronize"])
return action in allowed_events
return False
def execute(self, context: AgentContext) -> AgentResult:
"""Execute the PR review agent."""
pr = context.event_data.get("pull_request", {})
pr_number = pr.get("number")
self.logger.info(f"Reviewing PR #{pr_number}: {pr.get('title')}")
actions_taken = []
# Step 1: Get PR diff
diff = self._get_diff(context.owner, context.repo, pr_number)
if not diff.strip():
return AgentResult(
success=True,
message="PR has no changes to review",
)
# Step 2: Parse changed files
changed_files = self._parse_diff_files(diff)
# Step 3: Run security scan if enabled
security_issues = []
agent_config = self.config.get("agents", {}).get("pr", {})
if agent_config.get("security_scan", True):
security_issues = self._run_security_scan(changed_files, diff)
if security_issues:
actions_taken.append(f"Found {len(security_issues)} security issues")
# Step 4: Run AI review
review_result = self._run_ai_review(diff, context, security_issues)
# Step 5: Post inline comments if enabled
if agent_config.get("inline_comments", True) and review_result.issues:
inline_count = self._post_inline_comments(
context.owner, context.repo, pr_number, review_result
)
actions_taken.append(f"Posted {inline_count} inline comments")
# Step 6: Post summary comment
summary_comment = self._generate_summary_comment(review_result)
self.upsert_comment(
context.owner,
context.repo,
pr_number,
summary_comment,
marker=self.PR_AI_MARKER,
)
actions_taken.append("Posted summary comment")
# Step 7: Apply labels
labels_applied = self._apply_review_labels(
context.owner, context.repo, pr_number, review_result
)
if labels_applied:
actions_taken.append(f"Applied labels: {labels_applied}")
return AgentResult(
success=True,
message=f"Reviewed PR #{pr_number}: {review_result.overall_severity} severity",
data={
"severity": review_result.overall_severity,
"approval": review_result.approval,
"issues_count": len(review_result.issues),
"security_issues_count": len(review_result.security_issues),
},
actions_taken=actions_taken,
)
def _get_diff(self, owner: str, repo: str, pr_number: int) -> str:
"""Get the PR diff, truncated if necessary."""
max_lines = self.config.get("review", {}).get("max_diff_lines", 800)
try:
diff = self.gitea.get_pull_request_diff(owner, repo, pr_number)
lines = diff.splitlines()
if len(lines) > max_lines:
return "\n".join(lines[:max_lines])
return diff
except Exception as e:
self.logger.error(f"Failed to get diff: {e}")
return ""
def _parse_diff_files(self, diff: str) -> dict[str, str]:
"""Parse diff into file -> content mapping."""
files = {}
current_file = None
current_content = []
for line in diff.splitlines():
if line.startswith("diff --git"):
if current_file:
files[current_file] = "\n".join(current_content)
# Extract file path from "diff --git a/path b/path"
match = re.search(r"b/(.+)$", line)
if match:
current_file = match.group(1)
current_content = []
elif current_file:
current_content.append(line)
if current_file:
files[current_file] = "\n".join(current_content)
return files
def _run_security_scan(
self, changed_files: dict[str, str], diff: str
) -> list[ReviewIssue]:
"""Run security pattern scanning on the diff."""
issues = []
# Security patterns to detect
patterns = [
{
"name": "Hardcoded Secrets",
"pattern": r'(?i)(api_key|apikey|secret|password|token|auth)\s*[=:]\s*["\'][^"\']{8,}["\']',
"severity": "HIGH",
"category": "Security",
"description": "Potential hardcoded secret or API key detected",
"recommendation": "Move secrets to environment variables or a secrets manager",
},
{
"name": "SQL Injection",
"pattern": r'(?i)(execute|query)\s*\([^)]*\+[^)]*\)|f["\'].*\{.*\}.*(?:SELECT|INSERT|UPDATE|DELETE)',
"severity": "HIGH",
"category": "Security",
"description": "Potential SQL injection vulnerability - string concatenation in query",
"recommendation": "Use parameterized queries or prepared statements",
},
{
"name": "Hardcoded IP",
"pattern": r'\b(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b',
"severity": "LOW",
"category": "Security",
"description": "Hardcoded IP address detected",
"recommendation": "Consider using configuration or DNS names instead",
},
{
"name": "Eval Usage",
"pattern": r'\beval\s*\(',
"severity": "HIGH",
"category": "Security",
"description": "Use of eval() detected - potential code injection risk",
"recommendation": "Avoid eval() - use safer alternatives like ast.literal_eval() for Python",
},
{
"name": "Shell Injection",
"pattern": r'(?i)(?:subprocess\.call|os\.system|shell\s*=\s*True)',
"severity": "MEDIUM",
"category": "Security",
"description": "Potential shell command execution - verify input is sanitized",
"recommendation": "Use subprocess with shell=False and pass arguments as a list",
},
]
for filename, content in changed_files.items():
# Only check added lines (starting with +)
added_lines = []
line_numbers = []
current_line = 0
for line in content.splitlines():
if line.startswith("@@"):
# Parse line number from @@ -x,y +a,b @@
match = re.search(r"\+(\d+)", line)
if match:
current_line = int(match.group(1)) - 1
elif line.startswith("+") and not line.startswith("+++"):
current_line += 1
added_lines.append((current_line, line[1:]))
elif not line.startswith("-"):
current_line += 1
# Check patterns on added lines
for line_num, line_content in added_lines:
for pattern_def in patterns:
if re.search(pattern_def["pattern"], line_content):
issues.append(
ReviewIssue(
file=filename,
line=line_num,
severity=pattern_def["severity"],
category=pattern_def["category"],
description=pattern_def["description"],
recommendation=pattern_def["recommendation"],
code_snippet=line_content.strip()[:100],
)
)
return issues
def _run_ai_review(
self,
diff: str,
context: AgentContext,
security_issues: list[ReviewIssue],
) -> PRReviewResult:
"""Run AI-based code review."""
prompt_template = self.load_prompt("base")
# Add security context if issues were found
security_context = ""
if security_issues:
security_context = "\n\nSECURITY SCAN RESULTS (already detected):\n"
for issue in security_issues[:5]: # Limit to first 5
security_context += f"- [{issue.severity}] {issue.file}:{issue.line} - {issue.description}\n"
prompt = f"{prompt_template}\n{security_context}\nDIFF:\n{diff}"
try:
result = self.call_llm_json(prompt)
issues = []
for issue_data in result.get("issues", []):
issues.append(
ReviewIssue(
file=issue_data.get("file", "unknown"),
line=issue_data.get("line"),
severity=issue_data.get("severity", "MEDIUM"),
category=issue_data.get("category", "General"),
description=issue_data.get("description", ""),
recommendation=issue_data.get("recommendation", ""),
code_snippet=issue_data.get("code_snippet"),
)
)
return PRReviewResult(
summary=result.get("summary", "Review completed"),
issues=issues,
overall_severity=result.get("overall_severity", "LOW"),
approval=result.get("approval", True),
security_issues=security_issues,
)
except Exception as e:
self.logger.error(f"AI review failed: {e}")
return PRReviewResult(
summary=f"AI review encountered an error: {e}",
issues=[],
overall_severity="UNKNOWN",
approval=False,
security_issues=security_issues,
)
def _post_inline_comments(
self,
owner: str,
repo: str,
pr_number: int,
review: PRReviewResult,
) -> int:
"""Post inline comments for issues with line numbers."""
comments = []
all_issues = review.issues + review.security_issues
for issue in all_issues:
if issue.line and issue.file:
comment_body = (
f"**[{issue.severity}] {issue.category}**\n\n"
f"{issue.description}\n\n"
f"**Recommendation:** {issue.recommendation}"
)
comments.append(
{
"path": issue.file,
"line": issue.line,
"body": comment_body,
}
)
if not comments:
return 0
try:
# Use Gitea's pull request review API for inline comments
self.gitea.create_pull_request_review(
owner=owner,
repo=repo,
index=pr_number,
body="AI Code Review - Inline Comments",
event="COMMENT",
comments=comments[:10], # Limit to 10 inline comments
)
return min(len(comments), 10)
except Exception as e:
self.logger.warning(f"Failed to post inline comments: {e}")
return 0
def _generate_summary_comment(self, review: PRReviewResult) -> str:
"""Generate the summary comment for the PR."""
lines = [
f"{self.AI_DISCLAIMER}",
"",
"## AI Code Review",
"",
review.summary,
"",
]
# Statistics
all_issues = review.issues + review.security_issues
high = sum(1 for i in all_issues if i.severity == "HIGH")
medium = sum(1 for i in all_issues if i.severity == "MEDIUM")
low = sum(1 for i in all_issues if i.severity == "LOW")
lines.append("### Summary")
lines.append("")
lines.append(f"| Severity | Count |")
lines.append(f"|----------|-------|")
lines.append(f"| HIGH | {high} |")
lines.append(f"| MEDIUM | {medium} |")
lines.append(f"| LOW | {low} |")
lines.append("")
# Security issues section
if review.security_issues:
lines.append("### Security Issues")
lines.append("")
for issue in review.security_issues[:5]:
lines.append(f"- **[{issue.severity}]** `{issue.file}:{issue.line}` - {issue.description}")
lines.append("")
# Other issues (limit display)
other_issues = [i for i in review.issues if i not in review.security_issues]
if other_issues:
lines.append("### Review Findings")
lines.append("")
for issue in other_issues[:10]:
loc = f"`{issue.file}:{issue.line}`" if issue.line else f"`{issue.file}`"
lines.append(f"- **[{issue.severity}]** {loc} - {issue.description}")
if len(other_issues) > 10:
lines.append(f"- ...and {len(other_issues) - 10} more issues")
lines.append("")
# Verdict
lines.append("---")
lines.append(f"**Overall Severity:** `{review.overall_severity}`")
if review.approval:
lines.append("**AI Recommendation:** Approve")
else:
lines.append("**AI Recommendation:** Changes Requested")
return "\n".join(lines)
def _apply_review_labels(
self,
owner: str,
repo: str,
pr_number: int,
review: PRReviewResult,
) -> list[str]:
"""Apply labels based on review result."""
labels_config = self.config.get("labels", {}).get("status", {})
try:
repo_labels = self.gitea.get_repo_labels(owner, repo)
label_map = {l["name"]: l["id"] for l in repo_labels}
except Exception as e:
self.logger.warning(f"Failed to get repo labels: {e}")
return []
labels_to_add = []
# Add approval/changes required label
if review.approval:
label_name = labels_config.get("ai_approved", "ai-approved")
else:
label_name = labels_config.get("ai_changes_required", "ai-changes-required")
if label_name in label_map:
labels_to_add.append(label_map[label_name])
if labels_to_add:
try:
self.gitea.add_issue_labels(owner, repo, pr_number, labels_to_add)
return [name for name, id in label_map.items() if id in labels_to_add]
except Exception as e:
self.logger.warning(f"Failed to add labels: {e}")
return []

View File

@@ -0,0 +1,10 @@
"""API Clients Package
This package contains client wrappers for external services
like Gitea API and LLM providers.
"""
from clients.gitea_client import GiteaClient
from clients.llm_client import LLMClient
__all__ = ["GiteaClient", "LLMClient"]

View File

@@ -0,0 +1,447 @@
"""Gitea API Client
A unified client for interacting with the Gitea REST API.
Provides methods for issues, pull requests, comments, and repository operations.
"""
import os
from typing import Any
import requests
class GiteaClient:
"""Client for Gitea API operations."""
def __init__(
self,
api_url: str | None = None,
token: str | None = None,
timeout: int = 30,
):
"""Initialize the Gitea client.
Args:
api_url: Gitea API base URL. Defaults to AI_REVIEW_API_URL env var.
token: API token. Defaults to AI_REVIEW_TOKEN env var.
timeout: Request timeout in seconds.
"""
self.api_url = api_url or os.environ.get("AI_REVIEW_API_URL", "")
self.token = token or os.environ.get("AI_REVIEW_TOKEN", "")
self.timeout = timeout
if not self.api_url:
raise ValueError("Gitea API URL is required")
if not self.token:
raise ValueError("Gitea API token is required")
self.headers = {
"Authorization": f"token {self.token}",
"Content-Type": "application/json",
"Accept": "application/json",
}
def _request(
self,
method: str,
endpoint: str,
json: dict | None = None,
params: dict | None = None,
) -> dict | list:
"""Make an API request.
Args:
method: HTTP method (GET, POST, PATCH, DELETE).
endpoint: API endpoint (without base URL).
json: Request body for POST/PATCH.
params: Query parameters.
Returns:
Response JSON data.
Raises:
requests.HTTPError: If the request fails.
"""
url = f"{self.api_url}{endpoint}"
response = requests.request(
method=method,
url=url,
headers=self.headers,
json=json,
params=params,
timeout=self.timeout,
)
response.raise_for_status()
if response.status_code == 204:
return {}
return response.json()
# -------------------------------------------------------------------------
# Issue Operations
# -------------------------------------------------------------------------
def create_issue(
self,
owner: str,
repo: str,
title: str,
body: str,
labels: list[int] | None = None,
) -> dict:
"""Create a new issue.
Args:
owner: Repository owner.
repo: Repository name.
title: Issue title.
body: Issue body.
labels: Optional list of label IDs.
Returns:
Created issue object.
"""
payload = {
"title": title,
"body": body,
}
if labels:
payload["labels"] = labels
return self._request(
"POST",
f"/repos/{owner}/{repo}/issues",
json=payload,
)
def update_issue(
self,
owner: str,
repo: str,
index: int,
title: str | None = None,
body: str | None = None,
state: str | None = None,
) -> dict:
"""Update an existing issue.
Args:
owner: Repository owner.
repo: Repository name.
index: Issue number.
title: New title.
body: New body.
state: New state (open, closed).
Returns:
Updated issue object.
"""
payload = {}
if title:
payload["title"] = title
if body:
payload["body"] = body
if state:
payload["state"] = state
return self._request(
"PATCH",
f"/repos/{owner}/{repo}/issues/{index}",
json=payload,
)
def list_issues(
self,
owner: str,
repo: str,
state: str = "open",
labels: list[str] | None = None,
page: int = 1,
limit: int = 30,
) -> list[dict]:
"""List issues in a repository.
Args:
owner: Repository owner.
repo: Repository name.
state: Issue state (open, closed, all).
labels: Filter by labels.
page: Page number.
limit: Items per page.
Returns:
List of issue objects.
"""
params = {
"state": state,
"page": page,
"limit": limit,
}
if labels:
params["labels"] = ",".join(labels)
return self._request("GET", f"/repos/{owner}/{repo}/issues", params=params)
def get_issue(self, owner: str, repo: str, index: int) -> dict:
"""Get a single issue.
Args:
owner: Repository owner.
repo: Repository name.
index: Issue number.
Returns:
Issue object.
"""
return self._request("GET", f"/repos/{owner}/{repo}/issues/{index}")
def create_issue_comment(
self,
owner: str,
repo: str,
index: int,
body: str,
) -> dict:
"""Create a comment on an issue.
Args:
owner: Repository owner.
repo: Repository name.
index: Issue number.
body: Comment body.
Returns:
Created comment object.
"""
return self._request(
"POST",
f"/repos/{owner}/{repo}/issues/{index}/comments",
json={"body": body},
)
def update_issue_comment(
self,
owner: str,
repo: str,
comment_id: int,
body: str,
) -> dict:
"""Update an existing comment.
Args:
owner: Repository owner.
repo: Repository name.
comment_id: Comment ID.
body: Updated comment body.
Returns:
Updated comment object.
"""
return self._request(
"PATCH",
f"/repos/{owner}/{repo}/issues/comments/{comment_id}",
json={"body": body},
)
def list_issue_comments(
self,
owner: str,
repo: str,
index: int,
) -> list[dict]:
"""List comments on an issue.
Args:
owner: Repository owner.
repo: Repository name.
index: Issue number.
Returns:
List of comment objects.
"""
return self._request("GET", f"/repos/{owner}/{repo}/issues/{index}/comments")
def add_issue_labels(
self,
owner: str,
repo: str,
index: int,
labels: list[int],
) -> list[dict]:
"""Add labels to an issue.
Args:
owner: Repository owner.
repo: Repository name.
index: Issue number.
labels: List of label IDs to add.
Returns:
List of label objects.
"""
return self._request(
"POST",
f"/repos/{owner}/{repo}/issues/{index}/labels",
json={"labels": labels},
)
def get_repo_labels(self, owner: str, repo: str) -> list[dict]:
"""Get all labels for a repository.
Args:
owner: Repository owner.
repo: Repository name.
Returns:
List of label objects.
"""
return self._request("GET", f"/repos/{owner}/{repo}/labels")
# -------------------------------------------------------------------------
# Pull Request Operations
# -------------------------------------------------------------------------
def get_pull_request(self, owner: str, repo: str, index: int) -> dict:
"""Get a pull request.
Args:
owner: Repository owner.
repo: Repository name.
index: PR number.
Returns:
Pull request object.
"""
return self._request("GET", f"/repos/{owner}/{repo}/pulls/{index}")
def get_pull_request_diff(self, owner: str, repo: str, index: int) -> str:
"""Get the diff for a pull request.
Args:
owner: Repository owner.
repo: Repository name.
index: PR number.
Returns:
Diff text.
"""
url = f"{self.api_url}/repos/{owner}/{repo}/pulls/{index}.diff"
response = requests.get(
url,
headers={
"Authorization": f"token {self.token}",
"Accept": "text/plain",
},
timeout=self.timeout,
)
response.raise_for_status()
return response.text
def list_pull_request_files(
self,
owner: str,
repo: str,
index: int,
) -> list[dict]:
"""List files changed in a pull request.
Args:
owner: Repository owner.
repo: Repository name.
index: PR number.
Returns:
List of changed file objects.
"""
return self._request("GET", f"/repos/{owner}/{repo}/pulls/{index}/files")
def create_pull_request_review(
self,
owner: str,
repo: str,
index: int,
body: str,
event: str = "COMMENT",
comments: list[dict] | None = None,
) -> dict:
"""Create a review on a pull request.
Args:
owner: Repository owner.
repo: Repository name.
index: PR number.
body: Review body.
event: Review event (APPROVE, REQUEST_CHANGES, COMMENT).
comments: List of inline comments.
Returns:
Created review object.
"""
payload: dict[str, Any] = {
"body": body,
"event": event,
}
if comments:
payload["comments"] = comments
return self._request(
"POST",
f"/repos/{owner}/{repo}/pulls/{index}/reviews",
json=payload,
)
# -------------------------------------------------------------------------
# Repository Operations
# -------------------------------------------------------------------------
def get_repository(self, owner: str, repo: str) -> dict:
"""Get repository information.
Args:
owner: Repository owner.
repo: Repository name.
Returns:
Repository object.
"""
return self._request("GET", f"/repos/{owner}/{repo}")
def get_file_contents(
self,
owner: str,
repo: str,
filepath: str,
ref: str | None = None,
) -> dict:
"""Get file contents from a repository.
Args:
owner: Repository owner.
repo: Repository name.
filepath: Path to file.
ref: Git ref (branch, tag, commit).
Returns:
File content object with base64-encoded content.
"""
params = {}
if ref:
params["ref"] = ref
return self._request(
"GET",
f"/repos/{owner}/{repo}/contents/{filepath}",
params=params,
)
def get_branch(self, owner: str, repo: str, branch: str) -> dict:
"""Get branch information.
Args:
owner: Repository owner.
repo: Repository name.
branch: Branch name.
Returns:
Branch object.
"""
return self._request("GET", f"/repos/{owner}/{repo}/branches/{branch}")

View File

@@ -0,0 +1,482 @@
"""LLM Client
A unified client for interacting with multiple LLM providers.
Supports OpenAI, OpenRouter, Ollama, and extensible for more providers.
"""
import json
import os
from abc import ABC, abstractmethod
from dataclasses import dataclass
import requests
@dataclass
class ToolCall:
"""Represents a tool call from the LLM."""
id: str
name: str
arguments: dict
@dataclass
class LLMResponse:
"""Response from an LLM call."""
content: str
model: str
provider: str
tokens_used: int | None = None
finish_reason: str | None = None
tool_calls: list[ToolCall] | None = None
class BaseLLMProvider(ABC):
"""Abstract base class for LLM providers."""
@abstractmethod
def call(self, prompt: str, **kwargs) -> LLMResponse:
"""Make a call to the LLM.
Args:
prompt: The prompt to send.
**kwargs: Provider-specific options.
Returns:
LLMResponse with the generated content.
"""
pass
def call_with_tools(
self,
messages: list[dict],
tools: list[dict] | None = None,
**kwargs,
) -> LLMResponse:
"""Make a call to the LLM with tool/function calling support.
Args:
messages: List of message dicts with 'role' and 'content'.
tools: List of tool definitions in OpenAI format.
**kwargs: Provider-specific options.
Returns:
LLMResponse with content and/or tool_calls.
"""
raise NotImplementedError("Tool calling not supported by this provider")
class OpenAIProvider(BaseLLMProvider):
"""OpenAI API provider."""
def __init__(
self,
api_key: str | None = None,
model: str = "gpt-4o-mini",
temperature: float = 0,
max_tokens: int = 4096,
):
self.api_key = api_key or os.environ.get("OPENAI_API_KEY", "")
self.model = model
self.temperature = temperature
self.max_tokens = max_tokens
self.api_url = "https://api.openai.com/v1/chat/completions"
def call(self, prompt: str, **kwargs) -> LLMResponse:
"""Call OpenAI API."""
if not self.api_key:
raise ValueError("OpenAI API key is required")
response = requests.post(
self.api_url,
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
},
json={
"model": kwargs.get("model", self.model),
"temperature": kwargs.get("temperature", self.temperature),
"max_tokens": kwargs.get("max_tokens", self.max_tokens),
"messages": [{"role": "user", "content": prompt}],
},
timeout=120,
)
response.raise_for_status()
data = response.json()
choice = data["choices"][0]
usage = data.get("usage", {})
return LLMResponse(
content=choice["message"]["content"],
model=data["model"],
provider="openai",
tokens_used=usage.get("total_tokens"),
finish_reason=choice.get("finish_reason"),
)
def call_with_tools(
self,
messages: list[dict],
tools: list[dict] | None = None,
**kwargs,
) -> LLMResponse:
"""Call OpenAI API with tool support."""
if not self.api_key:
raise ValueError("OpenAI API key is required")
request_body = {
"model": kwargs.get("model", self.model),
"temperature": kwargs.get("temperature", self.temperature),
"max_tokens": kwargs.get("max_tokens", self.max_tokens),
"messages": messages,
}
if tools:
request_body["tools"] = tools
request_body["tool_choice"] = kwargs.get("tool_choice", "auto")
response = requests.post(
self.api_url,
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
},
json=request_body,
timeout=120,
)
response.raise_for_status()
data = response.json()
choice = data["choices"][0]
usage = data.get("usage", {})
message = choice["message"]
# Parse tool calls if present
tool_calls = None
if message.get("tool_calls"):
tool_calls = []
for tc in message["tool_calls"]:
tool_calls.append(
ToolCall(
id=tc["id"],
name=tc["function"]["name"],
arguments=json.loads(tc["function"]["arguments"]),
)
)
return LLMResponse(
content=message.get("content") or "",
model=data["model"],
provider="openai",
tokens_used=usage.get("total_tokens"),
finish_reason=choice.get("finish_reason"),
tool_calls=tool_calls,
)
class OpenRouterProvider(BaseLLMProvider):
"""OpenRouter API provider."""
def __init__(
self,
api_key: str | None = None,
model: str = "anthropic/claude-3.5-sonnet",
temperature: float = 0,
max_tokens: int = 4096,
):
self.api_key = api_key or os.environ.get("OPENROUTER_API_KEY", "")
self.model = model
self.temperature = temperature
self.max_tokens = max_tokens
self.api_url = "https://openrouter.ai/api/v1/chat/completions"
def call(self, prompt: str, **kwargs) -> LLMResponse:
"""Call OpenRouter API."""
if not self.api_key:
raise ValueError("OpenRouter API key is required")
response = requests.post(
self.api_url,
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
},
json={
"model": kwargs.get("model", self.model),
"temperature": kwargs.get("temperature", self.temperature),
"max_tokens": kwargs.get("max_tokens", self.max_tokens),
"messages": [{"role": "user", "content": prompt}],
},
timeout=120,
)
response.raise_for_status()
data = response.json()
choice = data["choices"][0]
usage = data.get("usage", {})
return LLMResponse(
content=choice["message"]["content"],
model=data.get("model", self.model),
provider="openrouter",
tokens_used=usage.get("total_tokens"),
finish_reason=choice.get("finish_reason"),
)
def call_with_tools(
self,
messages: list[dict],
tools: list[dict] | None = None,
**kwargs,
) -> LLMResponse:
"""Call OpenRouter API with tool support."""
if not self.api_key:
raise ValueError("OpenRouter API key is required")
request_body = {
"model": kwargs.get("model", self.model),
"temperature": kwargs.get("temperature", self.temperature),
"max_tokens": kwargs.get("max_tokens", self.max_tokens),
"messages": messages,
}
if tools:
request_body["tools"] = tools
request_body["tool_choice"] = kwargs.get("tool_choice", "auto")
response = requests.post(
self.api_url,
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
},
json=request_body,
timeout=120,
)
response.raise_for_status()
data = response.json()
choice = data["choices"][0]
usage = data.get("usage", {})
message = choice["message"]
# Parse tool calls if present
tool_calls = None
if message.get("tool_calls"):
tool_calls = []
for tc in message["tool_calls"]:
tool_calls.append(
ToolCall(
id=tc["id"],
name=tc["function"]["name"],
arguments=json.loads(tc["function"]["arguments"]),
)
)
return LLMResponse(
content=message.get("content") or "",
model=data.get("model", self.model),
provider="openrouter",
tokens_used=usage.get("total_tokens"),
finish_reason=choice.get("finish_reason"),
tool_calls=tool_calls,
)
class OllamaProvider(BaseLLMProvider):
"""Ollama (self-hosted) provider."""
def __init__(
self,
host: str | None = None,
model: str = "codellama:13b",
temperature: float = 0,
):
self.host = host or os.environ.get("OLLAMA_HOST", "http://localhost:11434")
self.model = model
self.temperature = temperature
def call(self, prompt: str, **kwargs) -> LLMResponse:
"""Call Ollama API."""
response = requests.post(
f"{self.host}/api/generate",
json={
"model": kwargs.get("model", self.model),
"prompt": prompt,
"stream": False,
"options": {
"temperature": kwargs.get("temperature", self.temperature),
},
},
timeout=300, # Longer timeout for local models
)
response.raise_for_status()
data = response.json()
return LLMResponse(
content=data["response"],
model=data.get("model", self.model),
provider="ollama",
tokens_used=data.get("eval_count"),
finish_reason="stop" if data.get("done") else None,
)
class LLMClient:
"""Unified LLM client supporting multiple providers."""
PROVIDERS = {
"openai": OpenAIProvider,
"openrouter": OpenRouterProvider,
"ollama": OllamaProvider,
}
def __init__(
self,
provider: str = "openai",
config: dict | None = None,
):
"""Initialize the LLM client.
Args:
provider: Provider name (openai, openrouter, ollama).
config: Provider-specific configuration.
"""
if provider not in self.PROVIDERS:
raise ValueError(f"Unknown provider: {provider}. Available: {list(self.PROVIDERS.keys())}")
self.provider_name = provider
self.config = config or {}
self._provider = self.PROVIDERS[provider](**self.config)
def call(self, prompt: str, **kwargs) -> LLMResponse:
"""Make a call to the configured LLM provider.
Args:
prompt: The prompt to send.
**kwargs: Provider-specific options.
Returns:
LLMResponse with the generated content.
"""
return self._provider.call(prompt, **kwargs)
def call_with_tools(
self,
messages: list[dict],
tools: list[dict] | None = None,
**kwargs,
) -> LLMResponse:
"""Make a call with tool/function calling support.
Args:
messages: List of message dicts with 'role' and 'content'.
tools: List of tool definitions in OpenAI format.
**kwargs: Provider-specific options.
Returns:
LLMResponse with content and/or tool_calls.
"""
return self._provider.call_with_tools(messages, tools, **kwargs)
def call_json(self, prompt: str, **kwargs) -> dict:
"""Make a call and parse the response as JSON.
Args:
prompt: The prompt to send (should request JSON output).
**kwargs: Provider-specific options.
Returns:
Parsed JSON response.
Raises:
json.JSONDecodeError: If response is not valid JSON.
"""
response = self.call(prompt, **kwargs)
content = response.content.strip()
return self._extract_json(content)
def _extract_json(self, content: str) -> dict:
"""Extract and parse JSON from content string.
Handles markdown code blocks and preamble text.
"""
content = content.strip()
# Attempt 1: direct parse
try:
return json.loads(content)
except json.JSONDecodeError:
pass
# Attempt 2: Extract from markdown code blocks
if "```" in content:
# Find the JSON block
import re
match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", content)
if match:
try:
return json.loads(match.group(1))
except json.JSONDecodeError:
pass
# Attempt 3: Find first { and last }
try:
start = content.find("{")
end = content.rfind("}")
if start != -1 and end != -1:
json_str = content[start : end + 1]
return json.loads(json_str)
except json.JSONDecodeError:
pass
# Attempt 4: Fix common JSON errors (comments, trailing commas)
# This is risky but helpful for LLM output
try:
# Remove comments
import re
json_str = re.sub(r"//.*", "", content)
json_str = re.sub(r"/\*[\s\S]*?\*/", "", json_str)
return json.loads(json_str)
except json.JSONDecodeError as e:
# If all attempts fail, raise an error with the content for debugging
snippet = content[:500] + "..." if len(content) > 500 else content
raise ValueError(f"Failed to parse JSON response: {e}. Raw content snippet: {snippet!r}")
@classmethod
def from_config(cls, config: dict) -> "LLMClient":
"""Create an LLM client from a configuration dictionary.
Args:
config: Configuration with 'provider' key and provider-specific settings.
Returns:
Configured LLMClient instance.
"""
provider = config.get("provider", "openai")
provider_config = {}
# Map config keys to provider-specific settings
if provider == "openai":
provider_config = {
"model": config.get("model", {}).get("openai", "gpt-4o-mini"),
"temperature": config.get("temperature", 0),
"max_tokens": config.get("max_tokens", 16000),
}
elif provider == "openrouter":
provider_config = {
"model": config.get("model", {}).get("openrouter", "anthropic/claude-3.5-sonnet"),
"temperature": config.get("temperature", 0),
"max_tokens": config.get("max_tokens", 16000),
}
elif provider == "ollama":
provider_config = {
"model": config.get("model", {}).get("ollama", "codellama:13b"),
"temperature": config.get("temperature", 0),
}
return cls(provider=provider, config=provider_config)

View File

@@ -0,0 +1,23 @@
def to_markdown(result: dict) -> str:
lines = []
lines.append("## 🤖 Enterprise AI Code Review\n")
lines.append(result.get("summary", "") + "\n")
if not result.get("issues"):
lines.append("✅ No issues found.\n")
else:
for issue in result["issues"]:
lines.append(f"### ❗ {issue['severity']}{issue['category']}")
lines.append(f"- **File:** `{issue['file']}`")
if issue.get("line"):
lines.append(f"- **Line:** `{issue['line']}`")
lines.append(f"- **Issue:** {issue['description']}")
lines.append(f"- **Recommendation:** {issue['recommendation']}\n")
lines.append("---")
lines.append(f"**Overall severity:** `{result['overall_severity']}`")
lines.append(
"✅ **AI Approval**" if result.get("approval") else "❌ **Changes required**"
)
return "\n".join(lines)

View File

@@ -0,0 +1,96 @@
provider: openai # openai | openrouter | ollama
model:
openai: gpt-4.1-mini
openrouter: anthropic/claude-3.5-sonnet
ollama: codellama:13b
temperature: 0
max_tokens: 4096
# Review settings
review:
fail_on_severity: HIGH
max_diff_lines: 800
inline_comments: true
security_scan: true
# Agent settings
agents:
issue:
enabled: true
auto_label: true
auto_triage: true
duplicate_threshold: 0.85
events:
- opened
- labeled
pr:
enabled: true
inline_comments: true
security_scan: true
events:
- opened
- synchronize
codebase:
enabled: true
schedule: "0 0 * * 0" # Weekly on Sunday
chat:
enabled: true
name: "Bartender"
max_iterations: 5 # Max tool call iterations per chat
tools:
- search_codebase
- read_file
- search_web
searxng_url: "" # Set via SEARXNG_URL env var or here
# Interaction settings
# CUSTOMIZE YOUR BOT NAME HERE!
# Change mention_prefix to your preferred bot name:
# "@ai-bot" - Default
# "@bartender" - Friendly bar theme
# "@uni" - Short and simple
# "@joey" - Personal assistant name
# "@codebot" - Code-focused name
# NOTE: Also update the workflow files (.github/workflows/ or .gitea/workflows/)
# to match this prefix in the 'if: contains(...)' condition
interaction:
respond_to_mentions: true
mention_prefix: "@ai-bot" # Change this to customize your bot's name!
commands:
- explain
- suggest
- security
- summarize
# Enterprise settings
enterprise:
audit_log: true
audit_path: "/var/log/ai-review/"
metrics_enabled: true
rate_limit:
requests_per_minute: 30
max_concurrent: 4
# Label mappings for auto-labeling
labels:
priority:
high: "priority: high"
medium: "priority: medium"
low: "priority: low"
type:
bug: "type: bug"
feature: "type: feature"
question: "type: question"
docs: "type: documentation"
status:
ai_approved: "ai-approved"
ai_changes_required: "ai-changes-required"
ai_reviewed: "ai-reviewed"
# Security scanning rules
security:
enabled: true
fail_on_high: true
rules_file: "security/security_rules.yml"

View File

@@ -0,0 +1,211 @@
"""Event Dispatcher
Routes incoming webhook events to the appropriate agent handlers.
Supports concurrent execution and queue management.
"""
import logging
import os
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass
from typing import Type
import yaml
from agents.base_agent import AgentContext, AgentResult, BaseAgent
@dataclass
class DispatchResult:
"""Result of dispatching an event."""
event_type: str
agents_run: list[str]
results: list[AgentResult]
errors: list[str]
class Dispatcher:
"""Event dispatcher that routes events to appropriate agents."""
def __init__(
self,
config: dict | None = None,
max_workers: int = 4,
):
"""Initialize the dispatcher.
Args:
config: Configuration dictionary.
max_workers: Maximum concurrent agent executions.
"""
self.config = config or self._load_config()
self.max_workers = max_workers
self.logger = logging.getLogger(__name__)
self._agents: list[BaseAgent] = []
self._executor = ThreadPoolExecutor(max_workers=max_workers)
@staticmethod
def _load_config() -> dict:
"""Load configuration from config.yml."""
config_path = os.path.join(os.path.dirname(__file__), "config.yml")
if os.path.exists(config_path):
with open(config_path) as f:
return yaml.safe_load(f)
return {}
def register_agent(self, agent: BaseAgent):
"""Register an agent with the dispatcher.
Args:
agent: Agent instance to register.
"""
self._agents.append(agent)
self.logger.info(f"Registered agent: {agent.__class__.__name__}")
def register_agent_class(self, agent_class: Type[BaseAgent], **kwargs):
"""Register an agent class (will be instantiated).
Args:
agent_class: Agent class to instantiate and register.
**kwargs: Arguments to pass to agent constructor.
"""
agent = agent_class(config=self.config, **kwargs)
self.register_agent(agent)
def dispatch(
self,
event_type: str,
event_data: dict,
owner: str,
repo: str,
) -> DispatchResult:
"""Dispatch an event to registered agents.
Args:
event_type: Type of event (issue, pull_request, issue_comment, etc).
event_data: Event payload data.
owner: Repository owner.
repo: Repository name.
Returns:
Dispatch result with all agent results.
"""
self.logger.info(f"Dispatching event: {event_type} for {owner}/{repo}")
# Find agents that can handle this event
handlers = [
agent for agent in self._agents if agent.can_handle(event_type, event_data)
]
if not handlers:
self.logger.info(f"No agents registered for event: {event_type}")
return DispatchResult(
event_type=event_type,
agents_run=[],
results=[],
errors=[],
)
self.logger.info(
f"Found {len(handlers)} agent(s) for event: {[a.__class__.__name__ for a in handlers]}"
)
# Create context for agents
context = AgentContext(
owner=owner,
repo=repo,
event_type=event_type,
event_data=event_data,
config=self.config,
)
# Run all handlers
results = []
errors = []
agents_run = []
for agent in handlers:
agent_name = agent.__class__.__name__
agents_run.append(agent_name)
try:
result = agent.run(context)
results.append(result)
if not result.success:
errors.append(f"{agent_name}: {result.error or result.message}")
except Exception as e:
self.logger.exception(f"Agent {agent_name} failed: {e}")
errors.append(f"{agent_name}: {str(e)}")
results.append(
AgentResult(
success=False,
message="Unexpected error",
error=str(e),
)
)
return DispatchResult(
event_type=event_type,
agents_run=agents_run,
results=results,
errors=errors,
)
def dispatch_async(
self,
event_type: str,
event_data: dict,
owner: str,
repo: str,
):
"""Dispatch an event asynchronously.
Args:
event_type: Type of event.
event_data: Event payload data.
owner: Repository owner.
repo: Repository name.
Returns:
Future that resolves to DispatchResult.
"""
return self._executor.submit(
self.dispatch, event_type, event_data, owner, repo
)
def shutdown(self):
"""Shutdown the executor."""
self._executor.shutdown(wait=True)
# Singleton dispatcher for easy access
_dispatcher: Dispatcher | None = None
def get_dispatcher() -> Dispatcher:
"""Get the global dispatcher instance."""
global _dispatcher
if _dispatcher is None:
_dispatcher = Dispatcher()
return _dispatcher
def dispatch_event(
event_type: str,
event_data: dict,
owner: str,
repo: str,
) -> DispatchResult:
"""Dispatch an event using the global dispatcher.
Args:
event_type: Type of event.
event_data: Event payload data.
owner: Repository owner.
repo: Repository name.
Returns:
Dispatch result.
"""
return get_dispatcher().dispatch(event_type, event_data, owner, repo)

View File

@@ -0,0 +1,10 @@
"""Enterprise Features Package
This package contains enterprise-grade features like
audit logging and metrics collection.
"""
from enterprise.audit_logger import AuditLogger
from enterprise.metrics import MetricsCollector
__all__ = ["AuditLogger", "MetricsCollector"]

View File

@@ -0,0 +1,303 @@
"""Audit Logger
Enterprise audit logging for tracking all AI agent actions,
decisions, and interactions for compliance and debugging.
"""
import json
import logging
import os
from datetime import datetime
from pathlib import Path
from typing import Any
class AuditLogger:
"""Audit logger for enterprise compliance."""
def __init__(
self,
log_path: str | None = None,
enabled: bool = True,
):
"""Initialize the audit logger.
Args:
log_path: Directory to write audit logs.
enabled: Whether audit logging is enabled.
"""
self.enabled = enabled
self.log_path = Path(
log_path or os.environ.get("AI_AUDIT_PATH", "/var/log/ai-review/")
)
self.logger = logging.getLogger("audit")
if self.enabled:
self._ensure_log_dir()
def _ensure_log_dir(self):
"""Ensure the log directory exists."""
try:
self.log_path.mkdir(parents=True, exist_ok=True)
except Exception as e:
self.logger.warning(f"Could not create audit log directory: {e}")
self.enabled = False
def _get_log_file(self) -> Path:
"""Get the current log file path (daily rotation)."""
date_str = datetime.utcnow().strftime("%Y-%m-%d")
return self.log_path / f"audit-{date_str}.jsonl"
def log(
self,
action: str,
agent: str,
owner: str,
repo: str,
details: dict[str, Any] | None = None,
success: bool = True,
error: str | None = None,
):
"""Log an audit event.
Args:
action: Action performed (e.g., "review_pr", "triage_issue").
agent: Agent name that performed the action.
owner: Repository owner.
repo: Repository name.
details: Additional details about the action.
success: Whether the action succeeded.
error: Error message if failed.
"""
if not self.enabled:
return
event = {
"timestamp": datetime.utcnow().isoformat() + "Z",
"action": action,
"agent": agent,
"repository": f"{owner}/{repo}",
"success": success,
"details": details or {},
}
if error:
event["error"] = error
try:
log_file = self._get_log_file()
with open(log_file, "a") as f:
f.write(json.dumps(event) + "\n")
except Exception as e:
self.logger.error(f"Failed to write audit log: {e}")
def log_llm_call(
self,
agent: str,
owner: str,
repo: str,
provider: str,
model: str,
tokens_used: int | None = None,
duration_ms: int | None = None,
):
"""Log an LLM API call.
Args:
agent: Agent making the call.
owner: Repository owner.
repo: Repository name.
provider: LLM provider used.
model: Model name.
tokens_used: Number of tokens consumed.
duration_ms: Call duration in milliseconds.
"""
self.log(
action="llm_call",
agent=agent,
owner=owner,
repo=repo,
details={
"provider": provider,
"model": model,
"tokens_used": tokens_used,
"duration_ms": duration_ms,
},
)
def log_comment_posted(
self,
agent: str,
owner: str,
repo: str,
issue_number: int,
comment_type: str,
):
"""Log a comment being posted.
Args:
agent: Agent posting the comment.
owner: Repository owner.
repo: Repository name.
issue_number: Issue or PR number.
comment_type: Type of comment (triage, review, response).
"""
self.log(
action="comment_posted",
agent=agent,
owner=owner,
repo=repo,
details={
"issue_number": issue_number,
"comment_type": comment_type,
},
)
def log_labels_applied(
self,
agent: str,
owner: str,
repo: str,
issue_number: int,
labels: list[str],
):
"""Log labels being applied.
Args:
agent: Agent applying labels.
owner: Repository owner.
repo: Repository name.
issue_number: Issue or PR number.
labels: Labels applied.
"""
self.log(
action="labels_applied",
agent=agent,
owner=owner,
repo=repo,
details={
"issue_number": issue_number,
"labels": labels,
},
)
def get_logs(
self,
start_date: str | None = None,
end_date: str | None = None,
action: str | None = None,
repository: str | None = None,
) -> list[dict]:
"""Retrieve audit logs with optional filtering.
Args:
start_date: Start date (YYYY-MM-DD).
end_date: End date (YYYY-MM-DD).
action: Filter by action type.
repository: Filter by repository (owner/repo).
Returns:
List of audit log entries.
"""
if not self.enabled:
return []
logs = []
log_files = sorted(self.log_path.glob("audit-*.jsonl"))
for log_file in log_files:
# Date filter on filename
file_date = log_file.stem.replace("audit-", "")
if start_date and file_date < start_date:
continue
if end_date and file_date > end_date:
continue
try:
with open(log_file) as f:
for line in f:
try:
entry = json.loads(line.strip())
# Apply filters
if action and entry.get("action") != action:
continue
if repository and entry.get("repository") != repository:
continue
logs.append(entry)
except json.JSONDecodeError:
continue
except Exception:
continue
return logs
def generate_report(
self,
start_date: str | None = None,
end_date: str | None = None,
) -> dict:
"""Generate a summary report of audit activity.
Args:
start_date: Report start date.
end_date: Report end date.
Returns:
Summary report dictionary.
"""
logs = self.get_logs(start_date=start_date, end_date=end_date)
report = {
"period": {
"start": start_date or "all",
"end": end_date or "all",
},
"total_events": len(logs),
"by_action": {},
"by_repository": {},
"by_agent": {},
"success_rate": 0.0,
"llm_usage": {
"total_calls": 0,
"total_tokens": 0,
},
}
success_count = 0
for log in logs:
action = log.get("action", "unknown")
repo = log.get("repository", "unknown")
agent = log.get("agent", "unknown")
report["by_action"][action] = report["by_action"].get(action, 0) + 1
report["by_repository"][repo] = report["by_repository"].get(repo, 0) + 1
report["by_agent"][agent] = report["by_agent"].get(agent, 0) + 1
if log.get("success"):
success_count += 1
if action == "llm_call":
report["llm_usage"]["total_calls"] += 1
tokens = log.get("details", {}).get("tokens_used")
if tokens:
report["llm_usage"]["total_tokens"] += tokens
if logs:
report["success_rate"] = success_count / len(logs)
return report
# Global instance
_audit_logger: AuditLogger | None = None
def get_audit_logger() -> AuditLogger:
"""Get the global audit logger instance."""
global _audit_logger
if _audit_logger is None:
_audit_logger = AuditLogger()
return _audit_logger

View File

@@ -0,0 +1,371 @@
"""Metrics Collector
Observability metrics for AI agent performance monitoring.
Tracks request counts, latencies, errors, and LLM usage.
"""
import time
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from threading import Lock
@dataclass
class MetricPoint:
"""A single metric data point."""
timestamp: datetime
value: float
labels: dict = field(default_factory=dict)
class Counter:
"""Thread-safe counter metric."""
def __init__(self, name: str, description: str = ""):
self.name = name
self.description = description
self._value = 0.0
self._lock = Lock()
def inc(self, value: float = 1.0):
"""Increment the counter."""
with self._lock:
self._value += value
@property
def value(self) -> float:
"""Get current counter value."""
with self._lock:
return self._value
class Gauge:
"""Thread-safe gauge metric."""
def __init__(self, name: str, description: str = ""):
self.name = name
self.description = description
self._value = 0.0
self._lock = Lock()
def set(self, value: float):
"""Set the gauge value."""
with self._lock:
self._value = value
def inc(self, value: float = 1.0):
"""Increment the gauge."""
with self._lock:
self._value += value
def dec(self, value: float = 1.0):
"""Decrement the gauge."""
with self._lock:
self._value -= value
@property
def value(self) -> float:
"""Get current gauge value."""
with self._lock:
return self._value
class Histogram:
"""Simple histogram for tracking distributions."""
def __init__(
self,
name: str,
description: str = "",
buckets: list[float] | None = None,
):
self.name = name
self.description = description
self.buckets = buckets or [0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
self._values: list[float] = []
self._lock = Lock()
def observe(self, value: float):
"""Record an observation."""
with self._lock:
self._values.append(value)
# Keep only last 1000 observations
if len(self._values) > 1000:
self._values = self._values[-1000:]
def get_percentile(self, percentile: float) -> float:
"""Get a percentile value."""
with self._lock:
if not self._values:
return 0.0
sorted_values = sorted(self._values)
idx = int(len(sorted_values) * percentile / 100)
return sorted_values[min(idx, len(sorted_values) - 1)]
@property
def count(self) -> int:
"""Get observation count."""
with self._lock:
return len(self._values)
@property
def sum(self) -> float:
"""Get sum of observations."""
with self._lock:
return sum(self._values)
class MetricsCollector:
"""Central metrics collector for AI agents."""
def __init__(self, enabled: bool = True):
"""Initialize metrics collector.
Args:
enabled: Whether metrics collection is enabled.
"""
self.enabled = enabled
self._start_time = time.time()
# Counters
self.requests_total = Counter(
"ai_review_requests_total",
"Total number of review requests processed",
)
self.requests_success = Counter(
"ai_review_requests_success",
"Number of successful review requests",
)
self.requests_failed = Counter(
"ai_review_requests_failed",
"Number of failed review requests",
)
self.llm_calls_total = Counter(
"ai_review_llm_calls_total",
"Total number of LLM API calls",
)
self.llm_tokens_total = Counter(
"ai_review_llm_tokens_total",
"Total LLM tokens consumed",
)
self.comments_posted = Counter(
"ai_review_comments_posted_total",
"Total comments posted",
)
self.labels_applied = Counter(
"ai_review_labels_applied_total",
"Total labels applied",
)
self.security_findings = Counter(
"ai_review_security_findings_total",
"Total security findings detected",
)
# Gauges
self.active_requests = Gauge(
"ai_review_active_requests",
"Currently active review requests",
)
# Histograms
self.request_duration = Histogram(
"ai_review_request_duration_seconds",
"Request processing duration",
)
self.llm_duration = Histogram(
"ai_review_llm_duration_seconds",
"LLM API call duration",
)
# Per-agent metrics
self._agent_metrics: dict[str, dict] = {}
def record_request_start(self, agent: str):
"""Record the start of a request.
Args:
agent: Name of the agent handling the request.
"""
if not self.enabled:
return
self.requests_total.inc()
self.active_requests.inc()
if agent not in self._agent_metrics:
self._agent_metrics[agent] = {
"total": 0,
"success": 0,
"failed": 0,
}
self._agent_metrics[agent]["total"] += 1
def record_request_end(
self,
agent: str,
success: bool,
duration_seconds: float,
):
"""Record the end of a request.
Args:
agent: Name of the agent.
success: Whether the request succeeded.
duration_seconds: Request duration.
"""
if not self.enabled:
return
self.active_requests.dec()
self.request_duration.observe(duration_seconds)
if success:
self.requests_success.inc()
if agent in self._agent_metrics:
self._agent_metrics[agent]["success"] += 1
else:
self.requests_failed.inc()
if agent in self._agent_metrics:
self._agent_metrics[agent]["failed"] += 1
def record_llm_call(
self,
provider: str,
model: str,
tokens: int | None,
duration_seconds: float,
):
"""Record an LLM API call.
Args:
provider: LLM provider name.
model: Model used.
tokens: Tokens consumed.
duration_seconds: Call duration.
"""
if not self.enabled:
return
self.llm_calls_total.inc()
self.llm_duration.observe(duration_seconds)
if tokens:
self.llm_tokens_total.inc(tokens)
def record_comment_posted(self):
"""Record a comment being posted."""
if self.enabled:
self.comments_posted.inc()
def record_labels_applied(self, count: int = 1):
"""Record labels being applied."""
if self.enabled:
self.labels_applied.inc(count)
def record_security_finding(self, severity: str):
"""Record a security finding."""
if self.enabled:
self.security_findings.inc()
def get_summary(self) -> dict:
"""Get a summary of all metrics.
Returns:
Dictionary with metric summaries.
"""
uptime = time.time() - self._start_time
return {
"uptime_seconds": uptime,
"requests": {
"total": self.requests_total.value,
"success": self.requests_success.value,
"failed": self.requests_failed.value,
"active": self.active_requests.value,
"success_rate": (
self.requests_success.value / max(self.requests_total.value, 1)
),
},
"llm": {
"calls": self.llm_calls_total.value,
"tokens": self.llm_tokens_total.value,
"avg_duration_ms": (
(self.llm_duration.sum / max(self.llm_duration.count, 1)) * 1000
),
"p50_duration_ms": self.llm_duration.get_percentile(50) * 1000,
"p95_duration_ms": self.llm_duration.get_percentile(95) * 1000,
},
"actions": {
"comments_posted": self.comments_posted.value,
"labels_applied": self.labels_applied.value,
"security_findings": self.security_findings.value,
},
"latency": {
"avg_ms": (
(self.request_duration.sum / max(self.request_duration.count, 1))
* 1000
),
"p50_ms": self.request_duration.get_percentile(50) * 1000,
"p95_ms": self.request_duration.get_percentile(95) * 1000,
"p99_ms": self.request_duration.get_percentile(99) * 1000,
},
"by_agent": self._agent_metrics,
}
def export_prometheus(self) -> str:
"""Export metrics in Prometheus format.
Returns:
Prometheus-formatted metrics string.
"""
lines = []
def add_metric(name: str, value: float, help_text: str = ""):
if help_text:
lines.append(f"# HELP {name} {help_text}")
lines.append(f"{name} {value}")
add_metric(
"ai_review_requests_total",
self.requests_total.value,
"Total review requests",
)
add_metric(
"ai_review_requests_success_total",
self.requests_success.value,
"Successful requests",
)
add_metric(
"ai_review_requests_failed_total",
self.requests_failed.value,
"Failed requests",
)
add_metric(
"ai_review_llm_calls_total",
self.llm_calls_total.value,
"Total LLM calls",
)
add_metric(
"ai_review_llm_tokens_total",
self.llm_tokens_total.value,
"Total LLM tokens",
)
add_metric(
"ai_review_comments_posted_total",
self.comments_posted.value,
"Comments posted",
)
return "\n".join(lines)
# Global instance
_metrics: MetricsCollector | None = None
def get_metrics() -> MetricsCollector:
"""Get the global metrics collector instance."""
global _metrics
if _metrics is None:
_metrics = MetricsCollector()
return _metrics

350
tools/ai-review/main.py Normal file
View File

@@ -0,0 +1,350 @@
#!/usr/bin/env python3
"""AI Code Review Agent - Main Entry Point
This is the main CLI for running AI code review agents.
Can be invoked directly or through CI/CD workflows.
"""
import argparse
import json
import logging
import os
import sys
import yaml
# Add the package to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from agents.issue_agent import IssueAgent
from agents.pr_agent import PRAgent
from agents.codebase_agent import CodebaseAgent
from agents.chat_agent import ChatAgent
from dispatcher import Dispatcher, get_dispatcher
def setup_logging(verbose: bool = False):
"""Configure logging."""
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(
level=level,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
def load_config(config_path: str | None = None) -> dict:
"""Load configuration from file."""
if config_path and os.path.exists(config_path):
with open(config_path) as f:
return yaml.safe_load(f)
default_path = os.path.join(os.path.dirname(__file__), "config.yml")
if os.path.exists(default_path):
with open(default_path) as f:
return yaml.safe_load(f)
return {}
def run_pr_review(args, config: dict):
"""Run PR review agent."""
from agents.base_agent import AgentContext
agent = PRAgent(config=config)
# Build context from environment or arguments
owner, repo = args.repo.split("/")
pr_number = args.pr_number
context = AgentContext(
owner=owner,
repo=repo,
event_type="pull_request",
event_data={
"action": "opened",
"pull_request": {
"number": pr_number,
"title": args.title or f"PR #{pr_number}",
},
},
config=config,
)
result = agent.run(context)
if result.success:
print(f"✅ PR Review Complete: {result.message}")
print(f" Actions: {', '.join(result.actions_taken)}")
else:
print(f"❌ PR Review Failed: {result.message}")
if result.error:
print(f" Error: {result.error}")
sys.exit(1)
def run_issue_triage(args, config: dict):
"""Run issue triage agent."""
from agents.base_agent import AgentContext
from clients.gitea_client import GiteaClient
agent = IssueAgent(config=config)
owner, repo = args.repo.split("/")
issue_number = args.issue_number
# Fetch the actual issue data from Gitea API to get the complete body
gitea = GiteaClient()
try:
issue_data = gitea.get_issue(owner, repo, issue_number)
except Exception as e:
print(f"❌ Failed to fetch issue: {e}")
sys.exit(1)
context = AgentContext(
owner=owner,
repo=repo,
event_type="issues",
event_data={
"action": "opened",
"issue": issue_data,
},
config=config,
)
result = agent.run(context)
if result.success:
print(f"✅ Issue Triage Complete: {result.message}")
print(f" Actions: {', '.join(result.actions_taken)}")
else:
print(f"❌ Issue Triage Failed: {result.message}")
if result.error:
print(f" Error: {result.error}")
sys.exit(1)
def run_issue_comment(args, config: dict):
"""Handle @ai-bot command in issue comment."""
from agents.base_agent import AgentContext
agent = IssueAgent(config=config)
owner, repo = args.repo.split("/")
issue_number = args.issue_number
# Fetch the actual issue data from Gitea API
from clients.gitea_client import GiteaClient
gitea = GiteaClient()
try:
issue_data = gitea.get_issue(owner, repo, issue_number)
except Exception as e:
print(f"❌ Failed to fetch issue: {e}")
sys.exit(1)
context = AgentContext(
owner=owner,
repo=repo,
event_type="issue_comment",
event_data={
"action": "created",
"issue": issue_data,
"comment": {
"body": args.comment_body,
},
},
config=config,
)
result = agent.run(context)
if result.success:
print(f"✅ Comment Response Complete: {result.message}")
print(f" Actions: {', '.join(result.actions_taken)}")
else:
print(f"❌ Comment Response Failed: {result.message}")
if result.error:
print(f" Error: {result.error}")
sys.exit(1)
def run_codebase_analysis(args, config: dict):
"""Run codebase analysis agent."""
from agents.base_agent import AgentContext
agent = CodebaseAgent(config=config)
owner, repo = args.repo.split("/")
context = AgentContext(
owner=owner,
repo=repo,
event_type="workflow_dispatch",
event_data={},
config=config,
)
result = agent.run(context)
if result.success:
print(f"✅ Codebase Analysis Complete: {result.message}")
print(f" Health Score: {result.data.get('health_score', 'N/A')}")
print(f" Actions: {', '.join(result.actions_taken)}")
else:
print(f"❌ Codebase Analysis Failed: {result.message}")
if result.error:
print(f" Error: {result.error}")
sys.exit(1)
def run_chat(args, config: dict):
"""Run interactive chat with the Bartender bot."""
from agents.base_agent import AgentContext
from clients.gitea_client import GiteaClient
agent = ChatAgent(config=config)
owner, repo = args.repo.split("/")
# Build context
event_data = {"message": args.message}
# If issue number provided, add issue context
if args.issue_number:
gitea = GiteaClient()
try:
issue_data = gitea.get_issue(owner, repo, args.issue_number)
event_data["issue"] = issue_data
event_data["issue_number"] = args.issue_number
except Exception as e:
print(f"Warning: Could not fetch issue #{args.issue_number}: {e}")
context = AgentContext(
owner=owner,
repo=repo,
event_type="chat",
event_data=event_data,
config=config,
)
result = agent.run(context)
if result.success:
print(f"\n🍸 Bartender says:\n")
print(result.data.get("response", ""))
print()
if result.data.get("tools_used"):
print(f" [Tools used: {', '.join(result.data['tools_used'])}]")
else:
print(f"❌ Chat Failed: {result.message}")
if result.error:
print(f" Error: {result.error}")
sys.exit(1)
def run_webhook_dispatch(args, config: dict):
"""Dispatch a webhook event."""
dispatcher = get_dispatcher()
# Register all agents
dispatcher.register_agent(IssueAgent(config=config))
dispatcher.register_agent(PRAgent(config=config))
dispatcher.register_agent(CodebaseAgent(config=config))
dispatcher.register_agent(ChatAgent(config=config))
# Parse event data
event_data = json.loads(args.event_data)
owner, repo = args.repo.split("/")
result = dispatcher.dispatch(
event_type=args.event_type,
event_data=event_data,
owner=owner,
repo=repo,
)
print(f"Dispatched event: {result.event_type}")
print(f"Agents run: {result.agents_run}")
for i, agent_result in enumerate(result.results):
status = "" if agent_result.success else ""
print(f" {status} {result.agents_run[i]}: {agent_result.message}")
if result.errors:
sys.exit(1)
def main():
"""Main entry point."""
parser = argparse.ArgumentParser(
description="AI Code Review Agent",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
parser.add_argument("-c", "--config", help="Path to config file")
subparsers = parser.add_subparsers(dest="command", help="Available commands")
# PR review command
pr_parser = subparsers.add_parser("pr", help="Review a pull request")
pr_parser.add_argument("repo", help="Repository (owner/repo)")
pr_parser.add_argument("pr_number", type=int, help="PR number")
pr_parser.add_argument("--title", help="PR title (optional)")
# Issue triage command
issue_parser = subparsers.add_parser("issue", help="Triage an issue")
issue_parser.add_argument("repo", help="Repository (owner/repo)")
issue_parser.add_argument("issue_number", type=int, help="Issue number")
issue_parser.add_argument("--title", help="Issue title")
issue_parser.add_argument("--body", help="Issue body")
# Issue comment command (for @ai-bot mentions)
comment_parser = subparsers.add_parser("comment", help="Respond to @ai-bot command")
comment_parser.add_argument("repo", help="Repository (owner/repo)")
comment_parser.add_argument("issue_number", type=int, help="Issue number")
comment_parser.add_argument("comment_body", help="Comment body with @ai-bot command")
# Codebase analysis command
codebase_parser = subparsers.add_parser("codebase", help="Analyze codebase")
codebase_parser.add_argument("repo", help="Repository (owner/repo)")
# Chat command (Bartender)
chat_parser = subparsers.add_parser("chat", help="Chat with Bartender bot")
chat_parser.add_argument("repo", help="Repository (owner/repo)")
chat_parser.add_argument("message", help="Message to send to Bartender")
chat_parser.add_argument(
"--issue", dest="issue_number", type=int,
help="Optional issue number to post response to"
)
# Webhook dispatch command
webhook_parser = subparsers.add_parser("dispatch", help="Dispatch webhook event")
webhook_parser.add_argument("repo", help="Repository (owner/repo)")
webhook_parser.add_argument("event_type", help="Event type")
webhook_parser.add_argument("event_data", help="Event data (JSON)")
args = parser.parse_args()
if not args.command:
parser.print_help()
sys.exit(1)
setup_logging(args.verbose)
config = load_config(args.config)
if args.command == "pr":
run_pr_review(args, config)
elif args.command == "issue":
run_issue_triage(args, config)
elif args.command == "comment":
run_issue_comment(args, config)
elif args.command == "codebase":
run_codebase_analysis(args, config)
elif args.command == "chat":
run_chat(args, config)
elif args.command == "dispatch":
run_webhook_dispatch(args, config)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,64 @@
You are an experienced senior software engineer with deep expertise in:
- Secure coding and security analysis
- System design and architecture
- Performance optimization
- Maintainable, readable code
- Test coverage and documentation
- CI/CD pipeline best practices
You are reviewing the following **pull request diff**. Your goal is to provide a **comprehensive, actionable, and clear review** as a structured JSON response.
---
## Requirements
Review the diff and identify issues in these categories:
- **Security**: Vulnerabilities, hardcoded secrets, injection risks
- **Correctness**: Logic errors, edge cases, bugs
- **Performance**: Inefficiencies, N+1 queries, memory issues
- **Maintainability**: Code complexity, duplication, unclear logic
- **Readability**: Naming, formatting, documentation
- **Testing**: Missing tests, untested paths
- **Architecture**: Design issues, coupling, separation of concerns
---
## Output Format
Return a JSON object with this structure:
```json
{{
"summary": "Brief overall assessment of the PR",
"overall_severity": "HIGH" | "MEDIUM" | "LOW",
"approval": true | false,
"issues": [
{{
"file": "path/to/file.py",
"line": 42,
"severity": "HIGH" | "MEDIUM" | "LOW",
"category": "Security" | "Correctness" | "Performance" | "Maintainability" | "Readability" | "Testing" | "Architecture",
"description": "Clear description of the issue",
"recommendation": "Specific fix or improvement",
"code_snippet": "relevant code if applicable"
}}
]
}}
```
---
## Rules
1. **Be specific**: Include file paths and line numbers when possible
2. **Be actionable**: Every issue must have a clear recommendation
3. **Prioritize**: HIGH severity for security/data-loss issues, MEDIUM for bugs, LOW for style
4. **Be honest**: If uncertain, note it in the description
5. **Stay focused**: Only report real issues, not style preferences
6. Set `approval: false` if any HIGH severity issues exist
7. Output ONLY valid JSON, no additional text
---
## Diff to Review

View File

@@ -0,0 +1,63 @@
You are a helpful AI assistant responding to a GitHub/Gitea issue. Your goal is to provide a helpful, professional response that assists the issue author.
## Context
**Issue Type:** {issue_type}
**Priority:** {priority}
**Title:** {title}
**Body:**
{body}
## Triage Analysis
{triage_analysis}
## Your Task
Generate a helpful comment response based on the issue type:
### For Bug Reports:
1. Acknowledge the issue
2. If missing info, politely request specific details needed
3. Suggest any immediate workarounds if obvious
4. Indicate next steps (investigation, need reproduction, etc.)
### For Feature Requests:
1. Thank the user for the suggestion
2. Summarize understanding of the request
3. Ask clarifying questions if needed
4. Note any related existing features
### For Questions:
1. Directly answer the question if possible
2. Link to relevant documentation
3. Provide code examples if helpful
4. Suggest alternatives if applicable
### For Documentation Issues:
1. Acknowledge the gap/issue
2. Clarify the correct information if known
3. Note what documentation updates are needed
## Response Guidelines
1. Be concise but thorough
2. Use a friendly, professional tone
3. Format with Markdown appropriately
4. Include code blocks where relevant
5. DO NOT promise timelines or fixes
6. DO NOT make up information - say "I'm not certain" if unsure
7. Always end with an offer to help further
## Output Format
Return a JSON object:
```json
{{
"comment": "Your markdown-formatted response here",
"needs_human_review": true/false,
"suggested_assignee": null or "username",
"follow_up_questions": ["question1", "question2"]
}}
```
Generate your response:

View File

@@ -0,0 +1,69 @@
You are an expert issue triage specialist. Analyze the following GitHub/Gitea issue and provide a structured classification.
## Your Task
Analyze the issue and return a JSON object with the following structure:
```json
{{
"type": "bug" | "feature" | "question" | "documentation" | "support" | "enhancement",
"priority": "high" | "medium" | "low",
"confidence": 0.0-1.0,
"summary": "Brief one-line summary of the issue",
"suggested_labels": ["label1", "label2"],
"is_duplicate": false,
"duplicate_of": null,
"needs_more_info": false,
"missing_info": [],
"components": ["component1", "component2"],
"reasoning": "Brief explanation of your classification"
}}
```
## Classification Guidelines
### Type Classification
- **bug**: Something is broken, not working as expected, error messages, crashes
- **feature**: Request for new functionality that doesn't exist
- **enhancement**: Improvement to existing functionality
- **question**: User asking how to do something, seeking clarification
- **documentation**: Issues with docs, missing docs, unclear docs
- **support**: General help request, troubleshooting
### Priority Classification
- **high**: Security issues, data loss, complete feature broken, blocking issues
- **medium**: Significant functionality impacted, workaround exists
- **low**: Minor issues, cosmetic, nice-to-have improvements
### Missing Information Indicators
Look for missing:
- Steps to reproduce (for bugs)
- Expected vs actual behavior
- Environment details (OS, version, etc.)
- Error messages or logs
- Screenshots (for UI issues)
## Important Rules
1. Be conservative with "high" priority - use it sparingly
2. If uncertain between two types, choose the more actionable one
3. Always provide reasoning for your classification
4. Set confidence lower if the issue is vague or ambiguous
5. Output ONLY valid JSON, no additional text
---
## Issue to Analyze
**Title:** {title}
**Body:**
{body}
**Author:** {author}
**Labels (if any):** {existing_labels}
---
Provide your JSON classification:

View File

@@ -0,0 +1,17 @@
# AI Code Review - Python Dependencies
# Core dependencies
requests>=2.31.0
pyyaml>=6.0
# Optional: For webhook server mode
# flask>=3.0.0
# Optional: For async operations
# aiohttp>=3.9.0
# Development dependencies (install with pip install -e .[dev])
# pytest>=7.4.0
# pytest-mock>=3.12.0
# black>=24.0.0
# mypy>=1.8.0

174
tools/ai-review/review.py Normal file
View File

@@ -0,0 +1,174 @@
import json
import os
import subprocess
import sys
import requests
import yaml
from comment import to_markdown
ROOT = os.path.dirname(__file__)
CFG = yaml.safe_load(open(f"{ROOT}/config.yml"))
# Marker to identify the AI comment
AI_MARKER = "<!-- AI_CODE_REVIEW -->"
# Disclaimer text to prepend
AI_DISCLAIMER = (
"**Note:** This review was generated by an AI assistant. "
"While it aims to be accurate and helpful, it may contain mistakes "
"or miss important issues. Please verify all findings before taking action."
)
# -------------------------------
# Helper functions
# -------------------------------
def get_diff() -> str:
"""Get git diff against main branch, limited by config"""
diff = subprocess.check_output(["git", "diff", "origin/main...HEAD"], text=True)
lines = diff.splitlines()
if len(lines) > CFG["review"]["max_diff_lines"]:
return "\n".join(lines[: CFG["review"]["max_diff_lines"]])
return diff
def build_prompt(diff: str) -> str:
"""Prepare the AI prompt with the diff"""
base = open(f"{ROOT}/prompts/base.md").read()
return f"{base}\n\nDIFF:\n{diff}"
def call_llm(prompt: str) -> str:
"""Call the configured LLM provider"""
provider = CFG["provider"]
if provider == "openai":
r = requests.post(
"https://api.openai.com/v1/chat/completions",
headers={
"Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}",
"Content-Type": "application/json",
},
json={
"model": CFG["model"]["openai"],
"temperature": CFG["temperature"],
"messages": [{"role": "user", "content": prompt}],
},
timeout=60,
)
return r.json()["choices"][0]["message"]["content"]
if provider == "openrouter":
r = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers={
"Authorization": f"Bearer {os.environ['OPENROUTER_API_KEY']}",
"Content-Type": "application/json",
},
json={
"model": CFG["model"]["openrouter"],
"messages": [{"role": "user", "content": prompt}],
},
timeout=60,
)
return r.json()["choices"][0]["message"]["content"]
if provider == "ollama":
r = requests.post(
f"{os.environ['OLLAMA_HOST']}/api/generate",
json={
"model": CFG["model"]["ollama"],
"prompt": prompt,
"stream": False,
},
timeout=120,
)
return r.json()["response"]
raise RuntimeError("Unknown provider")
# -------------------------------
# Gitea PR comment functions
# -------------------------------
def find_existing_comment() -> int | None:
"""Find existing AI review comment in the PR"""
url = (
f"{os.environ['AI_REVIEW_API_URL']}/repos/"
f"{os.environ['AI_REVIEW_REPO']}/issues/"
f"{os.environ['AI_REVIEW_PR_NUMBER']}/comments"
)
r = requests.get(
url,
headers={"Authorization": f"token {os.environ['AI_REVIEW_TOKEN']}"},
timeout=15,
)
for c in r.json():
if AI_MARKER in c["body"]:
return c["id"]
return None
def upsert_pr_comment(markdown: str):
"""Create or update the PR comment"""
comment_id = find_existing_comment()
headers = {
"Authorization": f"token {os.environ['AI_REVIEW_TOKEN']}",
"Content-Type": "application/json",
}
if comment_id:
url = (
f"{os.environ['AI_REVIEW_API_URL']}/repos/"
f"{os.environ['AI_REVIEW_REPO']}/issues/comments/{comment_id}"
)
r = requests.patch(url, headers=headers, json={"body": markdown})
else:
url = (
f"{os.environ['AI_REVIEW_API_URL']}/repos/"
f"{os.environ['AI_REVIEW_REPO']}/issues/"
f"{os.environ['AI_REVIEW_PR_NUMBER']}/comments"
)
r = requests.post(url, headers=headers, json={"body": markdown})
if r.status_code not in (200, 201):
raise RuntimeError(f"Failed to upsert PR comment: {r.text}")
# -------------------------------
# Main workflow
# -------------------------------
def main():
diff = get_diff()
if not diff.strip():
sys.exit(0)
raw = call_llm(build_prompt(diff))
result = json.loads(raw)
# Convert JSON review to Markdown
markdown = to_markdown(result)
# Prepend AI disclaimer and marker
full_comment = AI_DISCLAIMER + "\n\n" + AI_MARKER + "\n" + markdown
upsert_pr_comment(full_comment)
# Fail CI if severity is HIGH
if result["overall_severity"] == CFG["review"][
"fail_on_severity"
] and not result.get("approval", False):
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,9 @@
"""Security Scanning Package
This package contains security scanning utilities for
detecting vulnerabilities in code.
"""
from security.security_scanner import SecurityScanner
__all__ = ["SecurityScanner"]

View File

@@ -0,0 +1,335 @@
"""Security Scanner
Pattern-based security vulnerability detection for code analysis.
Covers OWASP Top 10 and common security anti-patterns.
"""
import re
from dataclasses import dataclass
from typing import Iterator
import yaml
import os
@dataclass
class SecurityFinding:
"""A single security finding."""
rule_id: str
rule_name: str
severity: str # HIGH, MEDIUM, LOW
category: str # OWASP category
file: str
line: int
code_snippet: str
description: str
recommendation: str
cwe: str | None = None # CWE reference
class SecurityScanner:
"""Security scanner using pattern matching and rules."""
# Default rules covering OWASP Top 10
DEFAULT_RULES = [
# A01:2021 Broken Access Control
{
"id": "SEC001",
"name": "Hardcoded Credentials",
"pattern": r'(?i)(password|passwd|pwd|secret|api_key|apikey|token|auth_token)\s*[=:]\s*["\'][^"\']{4,}["\']',
"severity": "HIGH",
"category": "A01:2021 Broken Access Control",
"cwe": "CWE-798",
"description": "Hardcoded credentials detected in source code",
"recommendation": "Use environment variables or a secrets management system",
},
{
"id": "SEC002",
"name": "Exposed Private Key",
"pattern": r"-----BEGIN\s+(RSA\s+)?PRIVATE\s+KEY-----",
"severity": "HIGH",
"category": "A01:2021 Broken Access Control",
"cwe": "CWE-321",
"description": "Private key embedded in source code",
"recommendation": "Never commit private keys. Use secure key management",
},
# A02:2021 Cryptographic Failures
{
"id": "SEC003",
"name": "Weak Crypto Algorithm",
"pattern": r"(?i)\b(md5|sha1)\s*\(",
"severity": "MEDIUM",
"category": "A02:2021 Cryptographic Failures",
"cwe": "CWE-328",
"description": "Use of weak cryptographic hash function",
"recommendation": "Use SHA-256 or stronger hashing algorithms",
},
{
"id": "SEC004",
"name": "Insecure Random",
"pattern": r"(?i)\brandom\.(random|randint|choice|randrange)\s*\(",
"severity": "MEDIUM",
"category": "A02:2021 Cryptographic Failures",
"cwe": "CWE-330",
"description": "Use of non-cryptographic random number generator for security purposes",
"recommendation": "Use secrets module or os.urandom() for security-critical randomness",
},
# A03:2021 Injection
{
"id": "SEC005",
"name": "SQL Injection",
"pattern": r'(?i)(execute|query|cursor\.execute)\s*\([^)]*(%s|%d|\{|\+)[^)]*\)',
"severity": "HIGH",
"category": "A03:2021 Injection",
"cwe": "CWE-89",
"description": "Potential SQL injection through string formatting",
"recommendation": "Use parameterized queries with placeholders",
},
{
"id": "SEC006",
"name": "Command Injection",
"pattern": r"(?i)(os\.system|subprocess\.call|subprocess\.run)\s*\([^)]*(\+|format|%)[^)]*\)",
"severity": "HIGH",
"category": "A03:2021 Injection",
"cwe": "CWE-78",
"description": "Potential command injection through string concatenation",
"recommendation": "Use subprocess with shell=False and pass arguments as list",
},
{
"id": "SEC007",
"name": "Eval Usage",
"pattern": r"\beval\s*\(",
"severity": "HIGH",
"category": "A03:2021 Injection",
"cwe": "CWE-95",
"description": "Use of eval() can lead to code injection",
"recommendation": "Avoid eval(). Use ast.literal_eval() for data or safer alternatives",
},
{
"id": "SEC008",
"name": "XSS Risk",
"pattern": r'(?i)(innerHTML|outerHTML|document\.write)\s*=',
"severity": "MEDIUM",
"category": "A03:2021 Injection",
"cwe": "CWE-79",
"description": "Direct DOM manipulation may allow XSS",
"recommendation": "Use textContent or proper sanitization libraries",
},
# A04:2021 Insecure Design
{
"id": "SEC009",
"name": "Debug Mode",
"pattern": r"(?i)(debug\s*=\s*true|DEBUG\s*=\s*True|\.setLevel\(.*DEBUG\))",
"severity": "MEDIUM",
"category": "A04:2021 Insecure Design",
"cwe": "CWE-489",
"description": "Debug mode enabled in code",
"recommendation": "Ensure debug mode is disabled in production",
},
# A05:2021 Security Misconfiguration
{
"id": "SEC010",
"name": "CORS Wildcard",
"pattern": r'(?i)(access-control-allow-origin|cors.*origin)\s*[=:]\s*["\']?\*',
"severity": "MEDIUM",
"category": "A05:2021 Security Misconfiguration",
"cwe": "CWE-942",
"description": "CORS configured to allow all origins",
"recommendation": "Specify allowed origins explicitly",
},
{
"id": "SEC011",
"name": "SSL Verification Disabled",
"pattern": r"(?i)(verify\s*=\s*False|CERT_NONE|ssl\._create_unverified_context)",
"severity": "HIGH",
"category": "A05:2021 Security Misconfiguration",
"cwe": "CWE-295",
"description": "SSL certificate verification disabled",
"recommendation": "Always verify SSL certificates in production",
},
# A07:2021 Identification and Authentication Failures
{
"id": "SEC012",
"name": "Hardcoded JWT Secret",
"pattern": r'(?i)(jwt|token).*secret\s*[=:]\s*["\'][^"\']+["\']',
"severity": "HIGH",
"category": "A07:2021 Authentication Failures",
"cwe": "CWE-798",
"description": "JWT secret hardcoded in source code",
"recommendation": "Use environment variables for JWT secrets",
},
# A08:2021 Software and Data Integrity Failures
{
"id": "SEC013",
"name": "Pickle Usage",
"pattern": r"(?i)pickle\.(loads?|dumps?)\s*\(",
"severity": "MEDIUM",
"category": "A08:2021 Integrity Failures",
"cwe": "CWE-502",
"description": "Pickle can execute arbitrary code during deserialization",
"recommendation": "Use JSON or other safe serialization formats",
},
# A09:2021 Security Logging and Monitoring Failures
{
"id": "SEC014",
"name": "Sensitive Data Logging",
"pattern": r'(?i)(log|print|console\.log)\s*\([^)]*\b(password|token|secret|key)\b',
"severity": "MEDIUM",
"category": "A09:2021 Logging Failures",
"cwe": "CWE-532",
"description": "Potentially logging sensitive information",
"recommendation": "Never log passwords, tokens, or secrets",
},
# A10:2021 Server-Side Request Forgery
{
"id": "SEC015",
"name": "SSRF Risk",
"pattern": r'(?i)(requests\.(get|post|put)|urllib\.request\.urlopen|fetch)\s*\([^)]*\+',
"severity": "MEDIUM",
"category": "A10:2021 SSRF",
"cwe": "CWE-918",
"description": "URL constructed from user input may allow SSRF",
"recommendation": "Validate and sanitize URLs, use allowlists",
},
# Additional common issues
{
"id": "SEC016",
"name": "Hardcoded IP Address",
"pattern": r'\b(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b',
"severity": "LOW",
"category": "Configuration",
"cwe": "CWE-547",
"description": "Hardcoded IP address found",
"recommendation": "Use configuration files or environment variables for IP addresses",
},
{
"id": "SEC017",
"name": "TODO/FIXME Security",
"pattern": r"(?i)(TODO|FIXME).*\b(security|auth|password|token|secret|vulnerable)\b",
"severity": "MEDIUM",
"category": "Code Quality",
"cwe": None,
"description": "Security-related TODO/FIXME comment found",
"recommendation": "Address security-related TODO items before deployment",
},
]
def __init__(self, rules_file: str | None = None):
"""Initialize scanner with rules.
Args:
rules_file: Optional path to custom rules YAML file.
"""
self.rules = self.DEFAULT_RULES.copy()
if rules_file and os.path.exists(rules_file):
try:
with open(rules_file) as f:
custom_rules = yaml.safe_load(f)
if custom_rules and "rules" in custom_rules:
self.rules.extend(custom_rules["rules"])
except Exception:
pass # Use defaults if custom rules fail to load
# Compile patterns for efficiency
self._compiled_rules = []
for rule in self.rules:
try:
self._compiled_rules.append(
{**rule, "_pattern": re.compile(rule["pattern"])}
)
except re.error:
pass # Skip invalid patterns
def scan_content(
self,
content: str,
filename: str,
) -> Iterator[SecurityFinding]:
"""Scan content for security issues.
Args:
content: File content to scan.
filename: Name of the file (for reporting).
Yields:
SecurityFinding for each detected issue.
"""
lines = content.splitlines()
for line_num, line in enumerate(lines, 1):
for rule in self._compiled_rules:
if rule["_pattern"].search(line):
yield SecurityFinding(
rule_id=rule["id"],
rule_name=rule["name"],
severity=rule["severity"],
category=rule["category"],
file=filename,
line=line_num,
code_snippet=line.strip()[:120],
description=rule["description"],
recommendation=rule["recommendation"],
cwe=rule.get("cwe"),
)
def scan_diff(self, diff: str) -> Iterator[SecurityFinding]:
"""Scan a git diff for security issues.
Only scans added lines (lines starting with +).
Args:
diff: Git diff content.
Yields:
SecurityFinding for each detected issue.
"""
current_file = None
current_line = 0
for line in diff.splitlines():
# Track current file
if line.startswith("diff --git"):
match = re.search(r"b/(.+)$", line)
if match:
current_file = match.group(1)
current_line = 0
# Track line numbers
elif line.startswith("@@"):
match = re.search(r"\+(\d+)", line)
if match:
current_line = int(match.group(1)) - 1
# Check added lines
elif line.startswith("+") and not line.startswith("+++"):
current_line += 1
for finding in self.scan_content(line[1:], current_file or "unknown"):
finding.line = current_line
yield finding
elif not line.startswith("-"):
current_line += 1
def get_summary(self, findings: list[SecurityFinding]) -> dict:
"""Get summary statistics for findings.
Args:
findings: List of security findings.
Returns:
Summary dictionary with counts by severity and category.
"""
summary = {
"total": len(findings),
"by_severity": {"HIGH": 0, "MEDIUM": 0, "LOW": 0},
"by_category": {},
}
for finding in findings:
summary["by_severity"][finding.severity] = (
summary["by_severity"].get(finding.severity, 0) + 1
)
summary["by_category"][finding.category] = (
summary["by_category"].get(finding.category, 0) + 1
)
return summary