458 lines
15 KiB
Python
458 lines
15 KiB
Python
"""Codebase Quality Agent
|
|
|
|
AI agent for analyzing overall codebase health, architecture,
|
|
technical debt, and documentation coverage.
|
|
"""
|
|
|
|
import base64
|
|
import os
|
|
from dataclasses import dataclass, field
|
|
|
|
from agents.base_agent import AgentContext, AgentResult, BaseAgent
|
|
|
|
|
|
@dataclass
|
|
class CodebaseMetrics:
|
|
"""Metrics collected from codebase analysis."""
|
|
|
|
total_files: int = 0
|
|
total_lines: int = 0
|
|
languages: dict = field(default_factory=dict)
|
|
todo_count: int = 0
|
|
fixme_count: int = 0
|
|
deprecated_count: int = 0
|
|
missing_docstrings: int = 0
|
|
|
|
|
|
@dataclass
|
|
class CodebaseReport:
|
|
"""Complete codebase analysis report."""
|
|
|
|
summary: str
|
|
health_score: float # 0-100
|
|
metrics: CodebaseMetrics
|
|
issues: list[dict]
|
|
recommendations: list[str]
|
|
architecture_notes: list[str]
|
|
|
|
|
|
class CodebaseAgent(BaseAgent):
|
|
"""Agent for codebase quality analysis."""
|
|
|
|
# Marker for codebase reports
|
|
CODEBASE_AI_MARKER = "<!-- AI_CODEBASE_REVIEW -->"
|
|
|
|
# File extensions to analyze
|
|
CODE_EXTENSIONS = {
|
|
".py": "Python",
|
|
".js": "JavaScript",
|
|
".ts": "TypeScript",
|
|
".go": "Go",
|
|
".rs": "Rust",
|
|
".java": "Java",
|
|
".rb": "Ruby",
|
|
".php": "PHP",
|
|
".c": "C",
|
|
".cpp": "C++",
|
|
".h": "C/C++ Header",
|
|
".cs": "C#",
|
|
".swift": "Swift",
|
|
".kt": "Kotlin",
|
|
}
|
|
|
|
# Files to ignore
|
|
IGNORE_PATTERNS = [
|
|
"node_modules/",
|
|
"vendor/",
|
|
".git/",
|
|
"__pycache__/",
|
|
".venv/",
|
|
"dist/",
|
|
"build/",
|
|
".min.js",
|
|
".min.css",
|
|
]
|
|
|
|
def can_handle(self, event_type: str, event_data: dict) -> bool:
|
|
"""Check if this agent handles the given event."""
|
|
agent_config = self.config.get("agents", {}).get("codebase", {})
|
|
if not agent_config.get("enabled", True):
|
|
return False
|
|
|
|
# Handle manual trigger via workflow_dispatch or schedule
|
|
if event_type in ("workflow_dispatch", "schedule"):
|
|
return True
|
|
|
|
# Handle special issue command
|
|
if event_type == "issue_comment":
|
|
comment_body = event_data.get("comment", {}).get("body", "")
|
|
mention_prefix = self.config.get("interaction", {}).get(
|
|
"mention_prefix", "@ai-bot"
|
|
)
|
|
if f"{mention_prefix} codebase" in comment_body.lower():
|
|
return True
|
|
|
|
return False
|
|
|
|
def execute(self, context: AgentContext) -> AgentResult:
|
|
"""Execute codebase analysis."""
|
|
self.logger.info(f"Starting codebase analysis for {context.owner}/{context.repo}")
|
|
|
|
actions_taken = []
|
|
|
|
# Step 1: Collect file list from repository
|
|
files = self._collect_files(context.owner, context.repo)
|
|
self.logger.info(f"Found {len(files)} files to analyze")
|
|
|
|
# Step 2: Analyze metrics
|
|
metrics = self._analyze_metrics(context.owner, context.repo, files)
|
|
actions_taken.append(f"Analyzed {metrics.total_files} files")
|
|
|
|
# Step 3: Run AI analysis on key files
|
|
report = self._run_ai_analysis(context, files, metrics)
|
|
actions_taken.append("Generated AI analysis report")
|
|
|
|
# Step 4: Create or update report issue
|
|
issue_number = self._create_report_issue(context, report)
|
|
actions_taken.append(f"Created/updated report issue #{issue_number}")
|
|
|
|
return AgentResult(
|
|
success=True,
|
|
message=f"Codebase analysis complete - Health Score: {report.health_score:.0f}/100",
|
|
data={
|
|
"health_score": report.health_score,
|
|
"total_files": metrics.total_files,
|
|
"issues_found": len(report.issues),
|
|
},
|
|
actions_taken=actions_taken,
|
|
)
|
|
|
|
def _collect_files(self, owner: str, repo: str) -> list[dict]:
|
|
"""Collect list of files from the repository."""
|
|
files = []
|
|
|
|
def traverse(path: str = ""):
|
|
try:
|
|
contents = self.gitea.get_file_contents(owner, repo, path or ".")
|
|
if isinstance(contents, list):
|
|
for item in contents:
|
|
item_path = item.get("path", "")
|
|
|
|
# Skip ignored patterns
|
|
if any(p in item_path for p in self.IGNORE_PATTERNS):
|
|
continue
|
|
|
|
if item.get("type") == "file":
|
|
ext = os.path.splitext(item_path)[1]
|
|
if ext in self.CODE_EXTENSIONS:
|
|
files.append(item)
|
|
elif item.get("type") == "dir":
|
|
traverse(item_path)
|
|
except Exception as e:
|
|
self.logger.warning(f"Failed to list {path}: {e}")
|
|
|
|
traverse()
|
|
return files[:100] # Limit to prevent API exhaustion
|
|
|
|
def _analyze_metrics(
|
|
self,
|
|
owner: str,
|
|
repo: str,
|
|
files: list[dict],
|
|
) -> CodebaseMetrics:
|
|
"""Analyze metrics from files."""
|
|
metrics = CodebaseMetrics()
|
|
metrics.total_files = len(files)
|
|
|
|
for file_info in files[:50]: # Analyze top 50 files
|
|
filepath = file_info.get("path", "")
|
|
ext = os.path.splitext(filepath)[1]
|
|
lang = self.CODE_EXTENSIONS.get(ext, "Unknown")
|
|
|
|
metrics.languages[lang] = metrics.languages.get(lang, 0) + 1
|
|
|
|
try:
|
|
content_data = self.gitea.get_file_contents(owner, repo, filepath)
|
|
if content_data.get("content"):
|
|
content = base64.b64decode(content_data["content"]).decode(
|
|
"utf-8", errors="ignore"
|
|
)
|
|
lines = content.splitlines()
|
|
metrics.total_lines += len(lines)
|
|
|
|
# Count markers
|
|
for line in lines:
|
|
line_upper = line.upper()
|
|
if "TODO" in line_upper:
|
|
metrics.todo_count += 1
|
|
if "FIXME" in line_upper:
|
|
metrics.fixme_count += 1
|
|
if "DEPRECATED" in line_upper:
|
|
metrics.deprecated_count += 1
|
|
|
|
# Check for docstrings (Python)
|
|
if ext == ".py":
|
|
if 'def ' in content and '"""' not in content:
|
|
metrics.missing_docstrings += 1
|
|
|
|
except Exception as e:
|
|
self.logger.debug(f"Could not analyze {filepath}: {e}")
|
|
|
|
return metrics
|
|
|
|
def _run_ai_analysis(
|
|
self,
|
|
context: AgentContext,
|
|
files: list[dict],
|
|
metrics: CodebaseMetrics,
|
|
) -> CodebaseReport:
|
|
"""Run AI analysis on the codebase."""
|
|
# Prepare context for AI
|
|
file_list = "\n".join([f"- {f.get('path', '')}" for f in files[:30]])
|
|
language_breakdown = "\n".join(
|
|
[f"- {lang}: {count} files" for lang, count in metrics.languages.items()]
|
|
)
|
|
|
|
# Sample some key files for deeper analysis
|
|
key_files_content = self._get_key_files_content(
|
|
context.owner, context.repo, files
|
|
)
|
|
|
|
prompt = f"""Analyze this codebase and provide a comprehensive quality assessment.
|
|
|
|
## Repository: {context.owner}/{context.repo}
|
|
|
|
## Metrics
|
|
- Total Files: {metrics.total_files}
|
|
- Total Lines: {metrics.total_lines}
|
|
- TODO Comments: {metrics.todo_count}
|
|
- FIXME Comments: {metrics.fixme_count}
|
|
- Deprecated Markers: {metrics.deprecated_count}
|
|
|
|
## Language Breakdown
|
|
{language_breakdown}
|
|
|
|
## File Structure (sample)
|
|
{file_list}
|
|
|
|
## Key Files Content
|
|
{key_files_content}
|
|
|
|
## Analysis Required
|
|
|
|
Provide your analysis as JSON with this structure:
|
|
```json
|
|
{{
|
|
"summary": "Overall assessment in 2-3 sentences",
|
|
"health_score": 0-100,
|
|
"issues": [
|
|
{{
|
|
"severity": "HIGH|MEDIUM|LOW",
|
|
"category": "Architecture|Code Quality|Security|Testing|Documentation",
|
|
"description": "Issue description",
|
|
"recommendation": "How to fix"
|
|
}}
|
|
],
|
|
"recommendations": ["Top 3-5 actionable recommendations"],
|
|
"architecture_notes": ["Observations about code structure and patterns"]
|
|
}}
|
|
```
|
|
|
|
Be constructive and actionable. Focus on the most impactful improvements.
|
|
"""
|
|
|
|
try:
|
|
result = self.call_llm_json(prompt)
|
|
return CodebaseReport(
|
|
summary=result.get("summary", "Analysis complete"),
|
|
health_score=float(result.get("health_score", 50)),
|
|
metrics=metrics,
|
|
issues=result.get("issues", []),
|
|
recommendations=result.get("recommendations", []),
|
|
architecture_notes=result.get("architecture_notes", []),
|
|
)
|
|
except Exception as e:
|
|
self.logger.error(f"AI analysis failed: {e}")
|
|
# Try to log the raw response if possible (requires accessing the last response)
|
|
# Since we don't have direct access here, we rely on having good logging in LLMClient if needed.
|
|
# But let's add a note to the summary.
|
|
# Calculate basic health score from metrics
|
|
health_score = 70
|
|
if metrics.todo_count > 10:
|
|
health_score -= 10
|
|
if metrics.fixme_count > 5:
|
|
health_score -= 10
|
|
|
|
return CodebaseReport(
|
|
summary=f"Basic analysis complete (AI unavailable: {e})",
|
|
health_score=health_score,
|
|
metrics=metrics,
|
|
issues=[],
|
|
recommendations=["Manual review recommended"],
|
|
architecture_notes=[],
|
|
)
|
|
|
|
def _get_key_files_content(
|
|
self,
|
|
owner: str,
|
|
repo: str,
|
|
files: list[dict],
|
|
) -> str:
|
|
"""Get content of key files for AI analysis."""
|
|
key_file_names = [
|
|
"README.md",
|
|
"setup.py",
|
|
"pyproject.toml",
|
|
"package.json",
|
|
"Cargo.toml",
|
|
"go.mod",
|
|
"Makefile",
|
|
"Dockerfile",
|
|
]
|
|
|
|
content_parts = []
|
|
|
|
for file_info in files:
|
|
filepath = file_info.get("path", "")
|
|
filename = os.path.basename(filepath)
|
|
|
|
if filename in key_file_names:
|
|
try:
|
|
content_data = self.gitea.get_file_contents(owner, repo, filepath)
|
|
if content_data.get("content"):
|
|
content = base64.b64decode(content_data["content"]).decode(
|
|
"utf-8", errors="ignore"
|
|
)
|
|
# Truncate long files
|
|
if len(content) > 2000:
|
|
content = content[:2000] + "\n... (truncated)"
|
|
content_parts.append(f"### {filepath}\n```\n{content}\n```")
|
|
except Exception:
|
|
pass
|
|
|
|
return "\n\n".join(content_parts[:5]) or "No key configuration files found."
|
|
|
|
def _create_report_issue(
|
|
self,
|
|
context: AgentContext,
|
|
report: CodebaseReport,
|
|
) -> int:
|
|
"""Create or update a report issue."""
|
|
# Generate issue body
|
|
body = self._generate_report_body(report)
|
|
|
|
# Look for existing report issue
|
|
try:
|
|
issues = self.gitea.list_issues(
|
|
context.owner, context.repo, state="open", labels=["ai-codebase-report"]
|
|
)
|
|
for issue in issues:
|
|
if self.CODEBASE_AI_MARKER in issue.get("body", ""):
|
|
# Update existing issue body
|
|
self.gitea.update_issue(
|
|
context.owner,
|
|
context.repo,
|
|
issue["number"],
|
|
body=body,
|
|
)
|
|
return issue["number"]
|
|
except Exception as e:
|
|
self.logger.warning(f"Failed to check for existing report: {e}")
|
|
|
|
# Create new issue
|
|
try:
|
|
# Check for label ID
|
|
labels = []
|
|
try:
|
|
repo_labels = self.gitea.get_repo_labels(context.owner, context.repo)
|
|
for label in repo_labels:
|
|
if label["name"] == "ai-codebase-report":
|
|
labels.append(label["id"])
|
|
break
|
|
except Exception:
|
|
pass
|
|
|
|
issue = self.gitea.create_issue(
|
|
context.owner,
|
|
context.repo,
|
|
title=f"AI Codebase Report - {context.repo}",
|
|
body=body,
|
|
labels=labels,
|
|
)
|
|
return issue["number"]
|
|
except Exception as e:
|
|
self.logger.error(f"Failed to create report issue: {e}")
|
|
return 0
|
|
|
|
def _generate_report_body(self, report: CodebaseReport) -> str:
|
|
"""Generate the report issue body."""
|
|
health_emoji = "🟢" if report.health_score >= 80 else ("🟡" if report.health_score >= 60 else "🔴")
|
|
|
|
lines = [
|
|
f"{self.AI_DISCLAIMER}",
|
|
"",
|
|
"# AI Codebase Quality Report",
|
|
"",
|
|
f"## Health Score: {report.health_score:.0f}/100",
|
|
"",
|
|
report.summary,
|
|
"",
|
|
"---",
|
|
"",
|
|
"## Metrics",
|
|
"",
|
|
"| Metric | Value |",
|
|
"|--------|-------|",
|
|
f"| Total Files | {report.metrics.total_files} |",
|
|
f"| Total Lines | {report.metrics.total_lines:,} |",
|
|
f"| TODO Comments | {report.metrics.todo_count} |",
|
|
f"| FIXME Comments | {report.metrics.fixme_count} |",
|
|
f"| Deprecated | {report.metrics.deprecated_count} |",
|
|
"",
|
|
]
|
|
|
|
# Languages
|
|
if report.metrics.languages:
|
|
lines.append("### Languages")
|
|
lines.append("")
|
|
for lang, count in sorted(
|
|
report.metrics.languages.items(), key=lambda x: -x[1]
|
|
):
|
|
lines.append(f"- **{lang}**: {count} files")
|
|
lines.append("")
|
|
|
|
# Issues
|
|
if report.issues:
|
|
lines.append("## Issues Found")
|
|
lines.append("")
|
|
for issue in report.issues[:10]:
|
|
severity = issue.get("severity", "MEDIUM")
|
|
emoji = "🔴" if severity == "HIGH" else ("🟡" if severity == "MEDIUM" else "🟢")
|
|
lines.append(f"### [{severity}] {issue.get('category', 'General')}")
|
|
lines.append("")
|
|
lines.append(issue.get("description", ""))
|
|
lines.append("")
|
|
lines.append(f"**Recommendation:** {issue.get('recommendation', '')}")
|
|
lines.append("")
|
|
|
|
# Recommendations
|
|
if report.recommendations:
|
|
lines.append("## Recommendations")
|
|
lines.append("")
|
|
for i, rec in enumerate(report.recommendations[:5], 1):
|
|
lines.append(f"{i}. {rec}")
|
|
lines.append("")
|
|
|
|
# Architecture notes
|
|
if report.architecture_notes:
|
|
lines.append("## Architecture Notes")
|
|
lines.append("")
|
|
for note in report.architecture_notes[:5]:
|
|
lines.append(f"- {note}")
|
|
lines.append("")
|
|
|
|
lines.append("---")
|
|
lines.append(f"*Generated by AI Codebase Agent*")
|
|
|
|
return "\n".join(lines)
|