feat: Add automatic PR summary generator

Implements automatic PR summary generation feature that analyzes pull request diffs and generates comprehensive summaries. Features: - Auto-generates summaries for PRs with empty descriptions - Manual trigger via @codebot summarize command in PR comments - Structured output with change type, files affected, and impact assessment - Configurable (enable/disable, comment vs description update) Implementation: - Added pr_summary.md prompt template for LLM - Extended PRAgent with summary generation methods - Added auto_summary configuration in config.yml - Comprehensive test suite with 10 new tests - Updated documentation in README.md and CLAUDE.md Usage: - Automatic: Opens PR with no description → auto-generates summary - Manual: Comment '@codebot summarize' on any PR Related: Issue #2 - Milestone 2 feature delivery
2025-12-29 10:15:08 +00:00
parent 1ca6ac7913
commit e21ec5f57a
6 changed files with 643 additions and 7 deletions
@@ -39,6 +39,7 @@ class PRAgent(BaseAgent):

    # Marker specific to PR reviews
    PR_AI_MARKER = "<!-- AI_PR_REVIEW -->"
+    PR_SUMMARY_MARKER = "<!-- AI_PR_SUMMARY -->"

    def _get_label_config(self, category: str, key: str) -> dict:
        """Get full label configuration from config.
@@ -83,7 +84,7 @@ class PRAgent(BaseAgent):
            allowed_events = agent_config.get("events", ["opened", "synchronize"])
            return action in allowed_events

-        # Handle issue comments on PRs (for review-again command)
+        # Handle issue comments on PRs (for review-again and summarize commands)
        if event_type == "issue_comment":
            action = event_data.get("action", "")
            if action == "created":
@@ -91,21 +92,29 @@ class PRAgent(BaseAgent):
                mention_prefix = self.config.get("interaction", {}).get(
                    "mention_prefix", "@codebot"
                )
-                # Only handle if this is a PR and contains review-again command
+                # Only handle if this is a PR
                issue = event_data.get("issue", {})
                is_pr = issue.get("pull_request") is not None
                has_review_again = (
                    f"{mention_prefix} review-again" in comment_body.lower()
                )
-                return is_pr and has_review_again
+                has_summarize = f"{mention_prefix} summarize" in comment_body.lower()
+                return is_pr and (has_review_again or has_summarize)

        return False

    def execute(self, context: AgentContext) -> AgentResult:
        """Execute the PR review agent."""
-        # Check if this is a review-again command
+        # Check if this is a comment-based command
        if context.event_type == "issue_comment":
-            return self._handle_review_again(context)
+            comment_body = context.event_data.get("comment", {}).get("body", "")
+            mention_prefix = self.config.get("interaction", {}).get(
+                "mention_prefix", "@codebot"
+            )
+            if f"{mention_prefix} summarize" in comment_body.lower():
+                return self._handle_summarize_command(context)
+            elif f"{mention_prefix} review-again" in comment_body.lower():
+                return self._handle_review_again(context)

        pr = context.event_data.get("pull_request", {})
        pr_number = pr.get("number")
@@ -114,6 +123,24 @@ class PRAgent(BaseAgent):

        actions_taken = []

+        # Check if PR has empty description and auto-summary is enabled
+        pr_body = pr.get("body", "").strip()
+        agent_config = self.config.get("agents", {}).get("pr", {})
+        auto_summary_enabled = agent_config.get("auto_summary", {}).get("enabled", True)
+
+        if (
+            not pr_body
+            and auto_summary_enabled
+            and context.event_data.get("action") == "opened"
+        ):
+            # Generate and post summary for empty PR descriptions
+            summary_result = self._generate_pr_summary(
+                context.owner, context.repo, pr_number
+            )
+            if summary_result:
+                actions_taken.append("Generated PR summary for empty description")
+                # Don't return here - continue with regular review
+
        # Step 1: Get PR diff
        diff = self._get_diff(context.owner, context.repo, pr_number)
        if not diff.strip():
@@ -791,3 +818,206 @@ class PRAgent(BaseAgent):
                self.logger.warning(f"Failed to add labels: {e}")

        return []
+
+    def _generate_pr_summary(self, owner: str, repo: str, pr_number: int) -> bool:
+        """Generate and post a summary for a PR.
+
+        Args:
+            owner: Repository owner
+            repo: Repository name
+            pr_number: PR number
+
+        Returns:
+            True if summary was generated successfully, False otherwise
+        """
+        try:
+            # Get PR diff
+            diff = self._get_diff(owner, repo, pr_number)
+            if not diff.strip():
+                self.logger.info(f"No diff to summarize for PR #{pr_number}")
+                return False
+
+            # Load summary prompt
+            prompt_template = self.load_prompt("pr_summary")
+            prompt = f"{prompt_template}\n{diff}"
+
+            # Call LLM to generate summary
+            result = self.call_llm_json(prompt)
+
+            # Format the summary comment
+            summary_comment = self._format_pr_summary(result)
+
+            # Post as first comment (or update PR description based on config)
+            agent_config = self.config.get("agents", {}).get("pr", {})
+            auto_summary_config = agent_config.get("auto_summary", {})
+            post_as_comment = auto_summary_config.get("post_as_comment", True)
+
+            if post_as_comment:
+                # Post as comment
+                self.gitea.create_issue_comment(owner, repo, pr_number, summary_comment)
+                self.logger.info(f"Posted PR summary as comment for PR #{pr_number}")
+            else:
+                # Update PR description (requires different API call)
+                # Note: Gitea API may not support updating PR description
+                # In that case, fall back to posting as comment
+                try:
+                    self.gitea.update_pull_request(
+                        owner, repo, pr_number, body=summary_comment
+                    )
+                    self.logger.info(
+                        f"Updated PR description with summary for PR #{pr_number}"
+                    )
+                except Exception as e:
+                    self.logger.warning(
+                        f"Could not update PR description, posting as comment: {e}"
+                    )
+                    self.gitea.create_issue_comment(
+                        owner, repo, pr_number, summary_comment
+                    )
+
+            return True
+
+        except Exception as e:
+            self.logger.error(f"Failed to generate PR summary: {e}")
+            return False
+
+    def _format_pr_summary(self, summary_data: dict) -> str:
+        """Format the PR summary data into a readable comment.
+
+        Args:
+            summary_data: JSON data from LLM containing summary information
+
+        Returns:
+            Formatted markdown comment
+        """
+        lines = [
+            self.AI_DISCLAIMER,
+            "",
+            "## 📋 Pull Request Summary",
+            "",
+            summary_data.get("summary", "Summary unavailable"),
+            "",
+        ]
+
+        # Change type
+        change_type = summary_data.get("change_type", "Unknown")
+        change_type_emoji = {
+            "Feature": "✨",
+            "Bugfix": "🐛",
+            "Refactor": "♻️",
+            "Documentation": "📚",
+            "Testing": "🧪",
+            "Mixed": "🔀",
+        }
+        emoji = change_type_emoji.get(change_type, "🔀")
+        lines.append(f"**Type:** {emoji} {change_type}")
+        lines.append("")
+
+        # Key changes
+        key_changes = summary_data.get("key_changes", {})
+        if key_changes:
+            lines.append("## Changes")
+            lines.append("")
+
+            added = key_changes.get("added", [])
+            if added:
+                lines.append("**✅ Added:**")
+                for item in added:
+                    lines.append(f"- {item}")
+                lines.append("")
+
+            modified = key_changes.get("modified", [])
+            if modified:
+                lines.append("**📝 Modified:**")
+                for item in modified:
+                    lines.append(f"- {item}")
+                lines.append("")
+
+            removed = key_changes.get("removed", [])
+            if removed:
+                lines.append("**❌ Removed:**")
+                for item in removed:
+                    lines.append(f"- {item}")
+                lines.append("")
+
+        # Files affected
+        files = summary_data.get("files_affected", [])
+        if files:
+            lines.append("## Files Affected")
+            lines.append("")
+            for file_info in files[:10]:  # Limit to first 10 files
+                path = file_info.get("path", "unknown")
+                desc = file_info.get("description", "")
+                change_type = file_info.get("change_type", "modified")
+
+                type_icon = {"added": "➕", "modified": "📝", "deleted": "➖"}
+                icon = type_icon.get(change_type, "📝")
+
+                lines.append(f"- {icon} `{path}` - {desc}")
+
+            if len(files) > 10:
+                lines.append(f"- ... and {len(files) - 10} more files")
+            lines.append("")
+
+        # Impact assessment
+        impact = summary_data.get("impact", {})
+        if impact:
+            scope = impact.get("scope", "unknown")
+            description = impact.get("description", "")
+
+            scope_emoji = {"small": "🟢", "medium": "🟡", "large": "🔴"}
+            emoji = scope_emoji.get(scope, "⚪")
+
+            lines.append("## Impact")
+            lines.append(f"{emoji} **Scope:** {scope.capitalize()}")
+            lines.append(f"{description}")
+
+        return "\n".join(lines)
+
+    def _handle_summarize_command(self, context: AgentContext) -> AgentResult:
+        """Handle @codebot summarize command from PR comments.
+
+        Args:
+            context: Agent context with event data
+
+        Returns:
+            AgentResult with success status and actions taken
+        """
+        issue = context.event_data.get("issue", {})
+        pr_number = issue.get("number")
+        comment_author = (
+            context.event_data.get("comment", {}).get("user", {}).get("login", "user")
+        )
+
+        self.logger.info(f"Generating PR summary for PR #{pr_number} at user request")
+
+        # Generate and post summary
+        success = self._generate_pr_summary(context.owner, context.repo, pr_number)
+
+        if success:
+            return AgentResult(
+                success=True,
+                message=f"Generated PR summary for PR #{pr_number}",
+                actions_taken=["Posted PR summary comment"],
+            )
+        else:
+            # Post error message
+            error_msg = (
+                f"@{comment_author}\n\n"
+                f"{self.AI_DISCLAIMER}\n\n"
+                "**⚠️ Summary Generation Failed**\n\n"
+                "I was unable to generate a summary for this PR. "
+                "This could be because:\n"
+                "- The PR has no changes\n"
+                "- There was an error accessing the diff\n"
+                "- The LLM service is unavailable"
+            )
+            self.gitea.create_issue_comment(
+                context.owner, context.repo, pr_number, error_msg
+            )
+
+            return AgentResult(
+                success=False,
+                message=f"Failed to generate PR summary for PR #{pr_number}",
+                error="Summary generation failed",
+            )
@@ -32,6 +32,9 @@ agents:
        events:
            - opened
            - synchronize
+        auto_summary:
+            enabled: true # Auto-generate summary for PRs with empty descriptions
+            post_as_comment: true # true = post as comment, false = update PR description
    codebase:
        enabled: true
        schedule: "0 0 * * 0" # Weekly on Sunday
@@ -63,7 +66,7 @@ interaction:
        - explain
        - suggest
        - security
-        - summarize
+        - summarize # Generate PR summary (works on both issues and PRs)
        - triage
        - review-again

@@ -0,0 +1,67 @@
+You are an experienced senior software engineer analyzing a pull request diff to generate a comprehensive, informative summary.
+
+Your goal is to create a **clear, structured summary** that helps reviewers quickly understand:
+- What changes were made
+- Why these changes matter
+- Which files and components are affected
+- The type of change (feature/bugfix/refactor/documentation)
+
+---
+
+## Requirements
+
+Analyze the PR diff and generate a summary that includes:
+
+1. **Brief Overview**: 2-3 sentence summary of the changes
+2. **Key Changes**: Bullet points of the most important modifications
+   - What was added
+   - What was modified
+   - What was removed (if applicable)
+3. **Files Affected**: List of changed files with brief descriptions
+4. **Change Type**: Classify as Feature, Bugfix, Refactor, Documentation, Testing, or Mixed
+5. **Impact Assessment**: Brief note on the scope and potential impact
+
+---
+
+## Output Format
+
+Return a JSON object with this structure:
+
+```json
+{{{{
+  "summary": "Brief 2-3 sentence overview of what this PR accomplishes",
+  "change_type": "Feature" | "Bugfix" | "Refactor" | "Documentation" | "Testing" | "Mixed",
+  "key_changes": {{{{
+    "added": ["List of new features/files/functionality added"],
+    "modified": ["List of existing components that were changed"],
+    "removed": ["List of removed features/files (if any)"]
+  }}}},
+  "files_affected": [
+    {{{{
+      "path": "path/to/file.py",
+      "description": "Brief description of changes in this file",
+      "change_type": "added" | "modified" | "deleted"
+    }}}}
+  ],
+  "impact": {{{{
+    "scope": "small" | "medium" | "large",
+    "description": "Brief assessment of the impact and scope of changes"
+  }}}}
+}}}}
+```
+
+---
+
+## Rules
+
+1. **Be concise**: Keep descriptions clear and to the point
+2. **Focus on intent**: Explain *what* and *why*, not just *how*
+3. **Identify patterns**: Group related changes together
+4. **Highlight significance**: Emphasize important architectural or behavioral changes
+5. **Be objective**: Base analysis purely on the code changes
+6. **Output only JSON**: No additional text before or after the JSON object
+
+---
+
+## Diff to Analyze
+