first commit
This commit is contained in:
42
.gitea/workflows/ai-chat.yml
Normal file
42
.gitea/workflows/ai-chat.yml
Normal file
@@ -0,0 +1,42 @@
|
||||
name: AI Chat (Bartender)
|
||||
|
||||
on:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
|
||||
# CUSTOMIZE YOUR BOT NAME:
|
||||
# Change '@ai-bot' below to match your config.yml mention_prefix
|
||||
# Examples: '@bartender', '@uni', '@joey', '@codebot'
|
||||
|
||||
jobs:
|
||||
ai-chat:
|
||||
# Only run if comment mentions the bot
|
||||
if: contains(github.event.comment.body, '@ai-bot') # <-- Change this to your bot name
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: Hiddenden/OpenRabbit
|
||||
path: .ai-review
|
||||
token: ${{ secrets.AI_REVIEW_TOKEN }}
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- run: pip install requests pyyaml
|
||||
|
||||
- name: Run AI Chat
|
||||
env:
|
||||
AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }}
|
||||
AI_REVIEW_REPO: ${{ gitea.repository }}
|
||||
AI_REVIEW_API_URL: https://git.hiddenden.cafe/api/v1
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||
OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }}
|
||||
SEARXNG_URL: ${{ secrets.SEARXNG_URL }}
|
||||
run: |
|
||||
cd .ai-review/tools/ai-review
|
||||
python main.py comment ${{ gitea.repository }} ${{ gitea.event.issue.number }} "${{ gitea.event.comment.body }}"
|
||||
58
.gitea/workflows/ai-codebase-review.yml
Normal file
58
.gitea/workflows/ai-codebase-review.yml
Normal file
@@ -0,0 +1,58 @@
|
||||
name: AI Codebase Quality Review
|
||||
|
||||
on:
|
||||
# Weekly scheduled run
|
||||
schedule:
|
||||
- cron: "0 0 * * 0" # Every Sunday at midnight
|
||||
|
||||
# Manual trigger
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
report_type:
|
||||
description: "Type of report to generate"
|
||||
required: false
|
||||
default: "full"
|
||||
type: choice
|
||||
options:
|
||||
- full
|
||||
- security
|
||||
- quick
|
||||
|
||||
jobs:
|
||||
ai-codebase-review:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
# Checkout the repository
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # Full history for analysis
|
||||
|
||||
# Checkout central AI tooling
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: Hiddenden/AI-code-review-workflow
|
||||
path: .ai-review
|
||||
token: ${{ secrets.AI_REVIEW_TOKEN }}
|
||||
|
||||
# Setup Python
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
# Install dependencies
|
||||
- run: pip install requests pyyaml
|
||||
|
||||
# Run AI codebase analysis
|
||||
- name: Run AI Codebase Analysis
|
||||
env:
|
||||
AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }}
|
||||
AI_REVIEW_REPO: ${{ gitea.repository }}
|
||||
AI_REVIEW_API_URL: https://git.hiddenden.cafe/api/v1
|
||||
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||
OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }}
|
||||
run: |
|
||||
cd .ai-review/tools/ai-review
|
||||
python main.py codebase ${{ gitea.repository }}
|
||||
41
.gitea/workflows/ai-comment-reply.yml
Normal file
41
.gitea/workflows/ai-comment-reply.yml
Normal file
@@ -0,0 +1,41 @@
|
||||
name: AI Comment Reply
|
||||
|
||||
on:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
|
||||
# CUSTOMIZE YOUR BOT NAME:
|
||||
# Change '@ai-bot' below to match your config.yml mention_prefix
|
||||
# Examples: '@bartender', '@uni', '@joey', '@codebot'
|
||||
|
||||
jobs:
|
||||
ai-reply:
|
||||
runs-on: ubuntu-latest
|
||||
if: contains(github.event.comment.body, '@ai-bot') # <-- Change this to your bot name
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: Hiddenden/AI-code-review-workflow
|
||||
path: .ai-review
|
||||
token: ${{ secrets.AI_REVIEW_TOKEN }}
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- run: pip install requests pyyaml
|
||||
|
||||
- name: Run AI Comment Response
|
||||
env:
|
||||
AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }}
|
||||
AI_REVIEW_REPO: ${{ gitea.repository }}
|
||||
AI_REVIEW_API_URL: https://git.hiddenden.cafe/api/v1
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||
OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }}
|
||||
run: |
|
||||
cd .ai-review/tools/ai-review
|
||||
python main.py comment ${{ gitea.repository }} ${{ gitea.event.issue.number }} \
|
||||
"${{ gitea.event.comment.body }}"
|
||||
36
.gitea/workflows/ai-issue-triage.yml
Normal file
36
.gitea/workflows/ai-issue-triage.yml
Normal file
@@ -0,0 +1,36 @@
|
||||
name: AI Issue Triage
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened, labeled]
|
||||
|
||||
jobs:
|
||||
ai-triage:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: Hiddenden/AI-code-review-workflow
|
||||
path: .ai-review
|
||||
token: ${{ secrets.AI_REVIEW_TOKEN }}
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- run: pip install requests pyyaml
|
||||
|
||||
- name: Run AI Issue Triage
|
||||
env:
|
||||
AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }}
|
||||
AI_REVIEW_REPO: ${{ gitea.repository }}
|
||||
AI_REVIEW_API_URL: https://git.hiddenden.cafe/api/v1
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||
OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }}
|
||||
run: |
|
||||
cd .ai-review/tools/ai-review
|
||||
python main.py issue ${{ gitea.repository }} ${{ gitea.event.issue.number }} \
|
||||
--title "${{ gitea.event.issue.title }}"
|
||||
53
.gitea/workflows/enterprise-ai-review.yml
Normal file
53
.gitea/workflows/enterprise-ai-review.yml
Normal file
@@ -0,0 +1,53 @@
|
||||
name: Enterprise AI Code Review
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize]
|
||||
|
||||
jobs:
|
||||
ai-review:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
# Checkout the PR repository
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
# Checkout the CENTRAL AI tooling repo
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: Hiddenden/AI-code-review-workflow
|
||||
path: .ai-review
|
||||
token: ${{ secrets.AI_REVIEW_TOKEN }}
|
||||
|
||||
# Setup Python
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
# Install dependencies
|
||||
- run: pip install requests pyyaml
|
||||
|
||||
# Run the AI review
|
||||
- name: Run Enterprise AI Review
|
||||
env:
|
||||
AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }}
|
||||
AI_REVIEW_REPO: ${{ gitea.repository }}
|
||||
AI_REVIEW_API_URL: https://git.hiddenden.cafe/api/v1
|
||||
AI_REVIEW_PR_NUMBER: ${{ gitea.event.pull_request.number }}
|
||||
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||
OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }}
|
||||
run: |
|
||||
cd .ai-review/tools/ai-review
|
||||
python main.py pr ${{ gitea.repository }} ${{ gitea.event.pull_request.number }} \
|
||||
--title "${{ gitea.event.pull_request.title }}"
|
||||
|
||||
# Fail CI on HIGH severity (optional)
|
||||
- name: Check Review Result
|
||||
if: failure()
|
||||
run: |
|
||||
echo "AI Review found HIGH severity issues. Please address them before merging."
|
||||
exit 1
|
||||
36
.github/workflows/ai-chat.yml
vendored
Normal file
36
.github/workflows/ai-chat.yml
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
name: AI Chat (Bartender)
|
||||
|
||||
on:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
|
||||
# CUSTOMIZE YOUR BOT NAME:
|
||||
# Change '@ai-bot' below to match your config.yml mention_prefix
|
||||
# Examples: '@bartender', '@uni', '@joey', '@codebot'
|
||||
|
||||
jobs:
|
||||
ai-chat:
|
||||
# Only run if comment mentions the bot
|
||||
if: contains(github.event.comment.body, '@ai-bot') # <-- Change this to your bot name
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- run: pip install requests pyyaml
|
||||
|
||||
- name: Run AI Chat
|
||||
env:
|
||||
AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
AI_REVIEW_REPO: ${{ github.repository }}
|
||||
AI_REVIEW_API_URL: https://api.github.com
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||
OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }}
|
||||
SEARXNG_URL: ${{ secrets.SEARXNG_URL }}
|
||||
run: |
|
||||
cd tools/ai-review
|
||||
python main.py comment ${{ github.repository }} ${{ github.event.issue.number }} "${{ github.event.comment.body }}"
|
||||
51
.github/workflows/ai-codebase-review.yml
vendored
Normal file
51
.github/workflows/ai-codebase-review.yml
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
name: AI Codebase Quality Review
|
||||
|
||||
on:
|
||||
# Weekly scheduled run
|
||||
schedule:
|
||||
- cron: "0 0 * * 0" # Every Sunday at midnight
|
||||
|
||||
# Manual trigger
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
report_type:
|
||||
description: "Type of report to generate"
|
||||
required: false
|
||||
default: "full"
|
||||
type: choice
|
||||
options:
|
||||
- full
|
||||
- security
|
||||
- quick
|
||||
|
||||
jobs:
|
||||
ai-codebase-review:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
# Checkout the repository
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # Full history for analysis
|
||||
|
||||
# Setup Python
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
# Install dependencies
|
||||
- run: pip install requests pyyaml
|
||||
|
||||
# Run AI codebase analysis
|
||||
- name: Run AI Codebase Analysis
|
||||
env:
|
||||
AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
AI_REVIEW_REPO: ${{ github.repository }}
|
||||
AI_REVIEW_API_URL: https://api.github.com
|
||||
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||
OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }}
|
||||
run: |
|
||||
cd tools/ai-review
|
||||
python main.py codebase ${{ github.repository }}
|
||||
36
.github/workflows/ai-comment-reply.yml
vendored
Normal file
36
.github/workflows/ai-comment-reply.yml
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
name: AI Comment Reply
|
||||
|
||||
on:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
|
||||
# CUSTOMIZE YOUR BOT NAME:
|
||||
# Change '@ai-bot' below to match your config.yml mention_prefix
|
||||
# Examples: '@bartender', '@uni', '@joey', '@codebot'
|
||||
|
||||
jobs:
|
||||
ai-reply:
|
||||
runs-on: ubuntu-latest
|
||||
if: contains(github.event.comment.body, '@ai-bot') # <-- Change this to your bot name
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- run: pip install requests pyyaml
|
||||
|
||||
- name: Run AI Comment Response
|
||||
env:
|
||||
AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
AI_REVIEW_REPO: ${{ github.repository }}
|
||||
AI_REVIEW_API_URL: https://api.github.com
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||
OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }}
|
||||
SEARXNG_URL: ${{ secrets.SEARXNG_URL }}
|
||||
run: |
|
||||
cd tools/ai-review
|
||||
python main.py comment ${{ github.repository }} ${{ github.event.issue.number }} \
|
||||
"${{ github.event.comment.body }}"
|
||||
30
.github/workflows/ai-issue-triage.yml
vendored
Normal file
30
.github/workflows/ai-issue-triage.yml
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
name: AI Issue Triage
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened, labeled]
|
||||
|
||||
jobs:
|
||||
ai-triage:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- run: pip install requests pyyaml
|
||||
|
||||
- name: Run AI Issue Triage
|
||||
env:
|
||||
AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
AI_REVIEW_REPO: ${{ github.repository }}
|
||||
AI_REVIEW_API_URL: https://api.github.com
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||
OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }}
|
||||
run: |
|
||||
cd tools/ai-review
|
||||
python main.py issue ${{ github.repository }} ${{ github.event.issue.number }} \
|
||||
--title "${{ github.event.issue.title }}"
|
||||
52
.github/workflows/ai-review.yml
vendored
Normal file
52
.github/workflows/ai-review.yml
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
name: AI Code Review
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize]
|
||||
|
||||
jobs:
|
||||
ai-review:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
# Checkout the PR repository
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
# Checkout the AI tooling from this repo's tools directory
|
||||
- name: Setup AI Review Tools
|
||||
run: |
|
||||
# Tools are already in this repo under tools/ai-review
|
||||
echo "AI Review tools available at tools/ai-review"
|
||||
|
||||
# Setup Python
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
# Install dependencies
|
||||
- run: pip install requests pyyaml
|
||||
|
||||
# Run the AI review
|
||||
- name: Run AI Review
|
||||
env:
|
||||
AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
AI_REVIEW_REPO: ${{ github.repository }}
|
||||
AI_REVIEW_API_URL: https://api.github.com
|
||||
AI_REVIEW_PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||
OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }}
|
||||
run: |
|
||||
cd tools/ai-review
|
||||
python main.py pr ${{ github.repository }} ${{ github.event.pull_request.number }} \
|
||||
--title "${{ github.event.pull_request.title }}"
|
||||
|
||||
# Fail CI on HIGH severity (optional)
|
||||
- name: Check Review Result
|
||||
if: failure()
|
||||
run: |
|
||||
echo "AI Review found HIGH severity issues. Please address them before merging."
|
||||
exit 1
|
||||
32
.gitignore
vendored
Normal file
32
.gitignore
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
*.egg-info/
|
||||
.eggs/
|
||||
dist/
|
||||
build/
|
||||
|
||||
# Virtual environments
|
||||
.venv/
|
||||
venv/
|
||||
ENV/
|
||||
|
||||
# IDE
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
|
||||
# Environment
|
||||
.env
|
||||
.env.local
|
||||
342
README.md
342
README.md
@@ -1,2 +1,342 @@
|
||||
# openrabbit
|
||||
# OpenRabbit
|
||||
|
||||
Enterprise-grade AI code review system for **GitHub** and **Gitea** with automated PR review, issue triage, interactive chat (Bartender), and codebase analysis.
|
||||
|
||||
---
|
||||
|
||||
## Features
|
||||
|
||||
| Feature | Description |
|
||||
|---------|-------------|
|
||||
| **PR Review** | Inline comments, security scanning, severity-based CI failure |
|
||||
| **Issue Triage** | Auto-classification, labeling, priority assignment |
|
||||
| **Chat (Bartender)** | Interactive AI chat with codebase search and web search tools |
|
||||
| **@ai-bot Commands** | `@ai-bot summarize`, `explain`, `suggest` in issue comments |
|
||||
| **Codebase Analysis** | Health scores, tech debt tracking, weekly reports |
|
||||
| **Security Scanner** | 17 OWASP-aligned rules for vulnerability detection |
|
||||
| **Enterprise Ready** | Audit logging, metrics, Prometheus export |
|
||||
| **Multi-Platform** | Works with both GitHub and Gitea |
|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
|
||||
### 1. Set Repository/Organization Secrets
|
||||
|
||||
```
|
||||
OPENAI_API_KEY - OpenAI API key (or use OpenRouter/Ollama)
|
||||
SEARXNG_URL - (Optional) SearXNG instance URL for web search
|
||||
```
|
||||
|
||||
**For Gitea:**
|
||||
```
|
||||
AI_REVIEW_TOKEN - Bot token with repo + issue permissions
|
||||
```
|
||||
|
||||
**For GitHub:**
|
||||
The built-in `GITHUB_TOKEN` is used automatically.
|
||||
|
||||
### 2. Add Workflows to Repository
|
||||
|
||||
Workflows are provided for both platforms:
|
||||
|
||||
| Platform | Location |
|
||||
|----------|----------|
|
||||
| GitHub | `.github/workflows/` |
|
||||
| Gitea | `.gitea/workflows/` |
|
||||
|
||||
#### GitHub Example
|
||||
|
||||
```yaml
|
||||
# .github/workflows/ai-review.yml
|
||||
name: AI PR Review
|
||||
on: [pull_request]
|
||||
|
||||
jobs:
|
||||
ai-review:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- run: pip install requests pyyaml
|
||||
|
||||
- name: Run AI Review
|
||||
env:
|
||||
AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
AI_REVIEW_REPO: ${{ github.repository }}
|
||||
AI_REVIEW_API_URL: https://api.github.com
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
cd tools/ai-review
|
||||
python main.py pr ${{ github.repository }} ${{ github.event.pull_request.number }}
|
||||
```
|
||||
|
||||
#### Gitea Example
|
||||
|
||||
```yaml
|
||||
# .gitea/workflows/ai-review.yml
|
||||
name: AI PR Review
|
||||
on: [pull_request]
|
||||
|
||||
jobs:
|
||||
ai-review:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: YourOrg/OpenRabbit
|
||||
path: .ai-review
|
||||
token: ${{ secrets.AI_REVIEW_TOKEN }}
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- run: pip install requests pyyaml
|
||||
|
||||
- name: Run AI Review
|
||||
env:
|
||||
AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }}
|
||||
AI_REVIEW_REPO: ${{ gitea.repository }}
|
||||
AI_REVIEW_API_URL: https://your-gitea.example.com/api/v1
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
cd .ai-review/tools/ai-review
|
||||
python main.py pr ${{ gitea.repository }} ${{ gitea.event.pull_request.number }}
|
||||
```
|
||||
|
||||
For full workflow examples, see [Workflows Documentation](docs/workflows.md).
|
||||
|
||||
### 3. Create Labels
|
||||
|
||||
Create these labels in your repository for auto-labeling:
|
||||
- `priority: high`, `priority: medium`, `priority: low`
|
||||
- `type: bug`, `type: feature`, `type: question`
|
||||
- `ai-approved`, `ai-changes-required`
|
||||
|
||||
---
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
tools/ai-review/
|
||||
├── agents/ # Agent implementations
|
||||
│ ├── base_agent.py # Abstract base agent
|
||||
│ ├── issue_agent.py # Issue triage & @ai-bot commands
|
||||
│ ├── pr_agent.py # PR review with security scan
|
||||
│ ├── codebase_agent.py # Codebase health analysis
|
||||
│ └── chat_agent.py # Bartender chat with tool calling
|
||||
├── clients/ # API clients
|
||||
│ ├── gitea_client.py # Gitea REST API wrapper
|
||||
│ └── llm_client.py # Multi-provider LLM client with tool support
|
||||
├── security/ # Security scanning
|
||||
│ └── security_scanner.py # 17 OWASP-aligned rules
|
||||
├── enterprise/ # Enterprise features
|
||||
│ ├── audit_logger.py # JSONL audit logging
|
||||
│ └── metrics.py # Prometheus-compatible metrics
|
||||
├── prompts/ # AI prompt templates
|
||||
├── main.py # CLI entry point
|
||||
└── config.yml # Configuration
|
||||
|
||||
.github/workflows/ # GitHub Actions workflows
|
||||
├── ai-review.yml # PR review workflow
|
||||
├── ai-issue-triage.yml # Issue triage workflow
|
||||
├── ai-codebase-review.yml # Codebase analysis
|
||||
├── ai-comment-reply.yml # @ai-bot command responses
|
||||
└── ai-chat.yml # Bartender chat
|
||||
|
||||
.gitea/workflows/ # Gitea Actions workflows
|
||||
├── enterprise-ai-review.yml
|
||||
├── ai-issue-triage.yml
|
||||
├── ai-codebase-review.yml
|
||||
├── ai-comment-reply.yml
|
||||
└── ai-chat.yml
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## CLI Commands
|
||||
|
||||
```bash
|
||||
# Review a pull request
|
||||
python main.py pr owner/repo 123
|
||||
|
||||
# Triage an issue
|
||||
python main.py issue owner/repo 456
|
||||
|
||||
# Respond to @ai-bot command
|
||||
python main.py comment owner/repo 456 "@ai-bot explain"
|
||||
|
||||
# Analyze codebase
|
||||
python main.py codebase owner/repo
|
||||
|
||||
# Chat with Bartender
|
||||
python main.py chat owner/repo "How does authentication work?"
|
||||
python main.py chat owner/repo "Find all API endpoints" --issue 789
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## @ai-bot Commands
|
||||
|
||||
In any issue comment:
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `@ai-bot summarize` | Summarize the issue in 2-3 sentences |
|
||||
| `@ai-bot explain` | Explain what the issue is about |
|
||||
| `@ai-bot suggest` | Suggest solutions or next steps |
|
||||
| `@ai-bot` (any question) | Chat with Bartender using codebase/web search |
|
||||
|
||||
---
|
||||
|
||||
## Bartender Chat
|
||||
|
||||
Bartender is an interactive AI assistant with tool-calling capabilities:
|
||||
|
||||
**Tools Available:**
|
||||
- `search_codebase` - Search repository files and code
|
||||
- `read_file` - Read specific files
|
||||
- `search_web` - Search the web via SearXNG
|
||||
|
||||
**Example:**
|
||||
```
|
||||
@ai-bot How do I configure rate limiting in this project?
|
||||
```
|
||||
|
||||
Bartender will search the codebase, read relevant files, and provide a comprehensive answer.
|
||||
|
||||
---
|
||||
|
||||
## Configuration
|
||||
|
||||
Edit `tools/ai-review/config.yml`:
|
||||
|
||||
```yaml
|
||||
provider: openai # openai | openrouter | ollama
|
||||
|
||||
model:
|
||||
openai: gpt-4.1-mini
|
||||
openrouter: anthropic/claude-3.5-sonnet
|
||||
ollama: codellama:13b
|
||||
|
||||
agents:
|
||||
issue:
|
||||
enabled: true
|
||||
auto_label: true
|
||||
pr:
|
||||
enabled: true
|
||||
inline_comments: true
|
||||
security_scan: true
|
||||
codebase:
|
||||
enabled: true
|
||||
chat:
|
||||
enabled: true
|
||||
name: "Bartender"
|
||||
searxng_url: "" # Or set SEARXNG_URL env var
|
||||
|
||||
interaction:
|
||||
respond_to_mentions: true
|
||||
mention_prefix: "@ai-bot" # Customize your bot name here!
|
||||
commands:
|
||||
- summarize
|
||||
- explain
|
||||
- suggest
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Customizing the Bot Name
|
||||
|
||||
You can change the bot's mention trigger from `@ai-bot` to any name you prefer:
|
||||
|
||||
**Step 1:** Edit `tools/ai-review/config.yml`:
|
||||
```yaml
|
||||
interaction:
|
||||
mention_prefix: "@bartender" # or "@uni", "@joey", "@codebot", etc.
|
||||
```
|
||||
|
||||
**Step 2:** Update the workflow files to match:
|
||||
|
||||
For GitHub (`.github/workflows/ai-comment-reply.yml` and `ai-chat.yml`):
|
||||
```yaml
|
||||
if: contains(github.event.comment.body, '@bartender')
|
||||
```
|
||||
|
||||
For Gitea (`.gitea/workflows/ai-comment-reply.yml` and `ai-chat.yml`):
|
||||
```yaml
|
||||
if: contains(github.event.comment.body, '@bartender')
|
||||
```
|
||||
|
||||
**Example bot names:**
|
||||
| Name | Use Case |
|
||||
|------|----------|
|
||||
| `@bartender` | Friendly, conversational |
|
||||
| `@uni` | Short, quick to type |
|
||||
| `@joey` | Personal assistant feel |
|
||||
| `@codebot` | Technical, code-focused |
|
||||
| `@reviewer` | Review-focused |
|
||||
|
||||
---
|
||||
|
||||
## Security Scanning
|
||||
|
||||
17 rules covering OWASP Top 10:
|
||||
|
||||
| Category | Examples |
|
||||
|----------|----------|
|
||||
| Injection | SQL injection, command injection, XSS |
|
||||
| Access Control | Hardcoded secrets, private keys |
|
||||
| Crypto Failures | Weak hashing (MD5/SHA1), insecure random |
|
||||
| Misconfiguration | Debug mode, CORS wildcard, SSL bypass |
|
||||
|
||||
---
|
||||
|
||||
## Documentation
|
||||
|
||||
| Document | Description |
|
||||
|----------|-------------|
|
||||
| [Getting Started](docs/getting-started.md) | Quick setup guide |
|
||||
| [Configuration](docs/configuration.md) | All options explained |
|
||||
| [Agents](docs/agents.md) | Agent documentation |
|
||||
| [Security](docs/security.md) | Security rules reference |
|
||||
| [Workflows](docs/workflows.md) | GitHub & Gitea workflow examples |
|
||||
| [API Reference](docs/api-reference.md) | Client and agent APIs |
|
||||
| [Enterprise](docs/enterprise.md) | Audit logging, metrics |
|
||||
| [Troubleshooting](docs/troubleshooting.md) | Common issues |
|
||||
|
||||
---
|
||||
|
||||
## LLM Providers
|
||||
|
||||
| Provider | Model | Use Case |
|
||||
|----------|-------|----------|
|
||||
| OpenAI | gpt-4.1-mini | Fast, reliable |
|
||||
| OpenRouter | claude-3.5-sonnet | Multi-provider access |
|
||||
| Ollama | codellama:13b | Self-hosted, private |
|
||||
|
||||
---
|
||||
|
||||
## Enterprise Features
|
||||
|
||||
- **Audit Logging**: JSONL logs with daily rotation
|
||||
- **Metrics**: Prometheus-compatible export
|
||||
- **Rate Limiting**: Configurable request limits
|
||||
- **Custom Security Rules**: Define your own patterns via YAML
|
||||
- **Tool Calling**: LLM function calling for interactive chat
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
MIT
|
||||
|
||||
52
docs/README.md
Normal file
52
docs/README.md
Normal file
@@ -0,0 +1,52 @@
|
||||
# AI Code Review Workflow Documentation
|
||||
|
||||
Enterprise-grade AI code review system for Gitea with automated issue triage, PR review, and codebase analysis.
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
| Document | Description |
|
||||
|----------|-------------|
|
||||
| [Getting Started](getting-started.md) | Quick setup guide |
|
||||
| [Configuration](configuration.md) | All configuration options |
|
||||
| [Agents](agents.md) | Detailed agent documentation |
|
||||
| [Security](security.md) | Security scanning features |
|
||||
| [API Reference](api-reference.md) | Client and agent APIs |
|
||||
| [Workflows](workflows.md) | Gitea workflow examples |
|
||||
| [Troubleshooting](troubleshooting.md) | Common issues and solutions |
|
||||
|
||||
## Quick Links
|
||||
|
||||
- **Setup**: See [Getting Started](getting-started.md)
|
||||
- **Configuration**: See [Configuration](configuration.md)
|
||||
- **Enterprise Features**: See [Enterprise](enterprise.md)
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ Event Sources │
|
||||
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
|
||||
│ │ PR Event │ │ Issue │ │ Schedule │ │
|
||||
│ └────┬─────┘ └────┬─────┘ └────┬─────┘ │
|
||||
└───────┼─────────────┼─────────────┼─────────────────────────┘
|
||||
│ │ │
|
||||
└─────────────┼─────────────┘
|
||||
▼
|
||||
┌───────────────┐
|
||||
│ Dispatcher │
|
||||
└───────┬───────┘
|
||||
│
|
||||
┌─────────────┼─────────────┐
|
||||
▼ ▼ ▼
|
||||
┌───────────┐ ┌───────────┐ ┌───────────┐
|
||||
│ Issue │ │ PR │ │ Codebase │
|
||||
│ Agent │ │ Agent │ │ Agent │
|
||||
└─────┬─────┘ └─────┬─────┘ └─────┬─────┘
|
||||
│ │ │
|
||||
└──────────────┼──────────────┘
|
||||
▼
|
||||
┌─────────────────┐
|
||||
│ Gitea API │
|
||||
│ LLM Provider │
|
||||
└─────────────────┘
|
||||
```
|
||||
298
docs/agents.md
Normal file
298
docs/agents.md
Normal file
@@ -0,0 +1,298 @@
|
||||
# Agents Documentation
|
||||
|
||||
The AI Code Review system includes four specialized agents.
|
||||
|
||||
## Issue Agent
|
||||
|
||||
Handles issue triage, classification, and interaction.
|
||||
|
||||
### Triggers
|
||||
|
||||
- `issues.opened` - New issue created (handled by `run_issue_triage`)
|
||||
- `issues.labeled` - Label added to issue
|
||||
- `issue_comment.created` - Comment with @mention (handled by `run_issue_comment`)
|
||||
|
||||
### Features
|
||||
|
||||
**Automatic Triage:**
|
||||
- Classifies issue type: bug, feature, question, documentation, support
|
||||
- Assigns priority: high, medium, low
|
||||
- Calculates confidence score
|
||||
|
||||
**Auto-Labeling:**
|
||||
- Applies type labels (`type: bug`, etc.)
|
||||
- Applies priority labels (`priority: high`, etc.)
|
||||
- Adds `ai-reviewed` status label
|
||||
|
||||
**@Mention Commands:**
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `@ai-bot summarize` | Generate concise summary |
|
||||
| `@ai-bot explain` | Detailed explanation |
|
||||
| `@ai-bot suggest` | Solution suggestions |
|
||||
|
||||
### Output
|
||||
|
||||
Posts a triage comment:
|
||||
|
||||
```markdown
|
||||
## AI Issue Triage
|
||||
|
||||
| Field | Value |
|
||||
|-------|--------|
|
||||
| **Type** | Bug |
|
||||
| **Priority** | High |
|
||||
| **Confidence** | 85% |
|
||||
|
||||
### Additional Information Needed
|
||||
|
||||
- Steps to reproduce
|
||||
- Error logs
|
||||
|
||||
---
|
||||
*Classification based on issue content*
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## PR Agent
|
||||
|
||||
Comprehensive pull request review with security scanning.
|
||||
|
||||
### Triggers
|
||||
|
||||
- `pull_request.opened` - New PR created
|
||||
- `pull_request.synchronize` - PR updated with new commits
|
||||
|
||||
### Features
|
||||
|
||||
**AI Code Review:**
|
||||
- Analyzes diff for issues
|
||||
- Categorizes: Security, Correctness, Performance, Maintainability
|
||||
- Assigns severity: HIGH, MEDIUM, LOW
|
||||
|
||||
**Inline Comments:**
|
||||
- Posts comments on specific lines
|
||||
- Links to file and line number
|
||||
- Provides recommendations
|
||||
|
||||
**Security Scanning:**
|
||||
- 17 OWASP-aligned rules
|
||||
- Detects hardcoded secrets, SQL injection, XSS
|
||||
- Fails CI on HIGH severity
|
||||
|
||||
**Label Management:**
|
||||
- `ai-approved` - No blocking issues
|
||||
- `ai-changes-required` - HIGH severity issues found
|
||||
|
||||
### Output
|
||||
|
||||
Posts summary comment:
|
||||
|
||||
```markdown
|
||||
## AI Code Review
|
||||
|
||||
Review of changes in this PR.
|
||||
|
||||
### Summary
|
||||
|
||||
| Severity | Count |
|
||||
|----------|-------|
|
||||
| HIGH | 1 |
|
||||
| MEDIUM | 2 |
|
||||
| LOW | 3 |
|
||||
|
||||
### Security Issues
|
||||
|
||||
- **[HIGH]** `src/auth.py:45` - Hardcoded API key detected
|
||||
|
||||
### Review Findings
|
||||
|
||||
- **[MEDIUM]** `src/db.py:12` - SQL query uses string formatting
|
||||
- **[LOW]** `src/utils.py:30` - Missing docstring
|
||||
|
||||
---
|
||||
**Overall Severity:** `HIGH`
|
||||
**AI Recommendation:** Changes Requested
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Codebase Agent
|
||||
|
||||
Repository-wide quality and health analysis.
|
||||
|
||||
### Triggers
|
||||
|
||||
- `schedule` - Cron schedule (default: weekly)
|
||||
- `workflow_dispatch` - Manual trigger
|
||||
- `@ai-bot codebase` - Comment command
|
||||
|
||||
### Features
|
||||
|
||||
**Metrics Collection:**
|
||||
- Total files and lines of code
|
||||
- Language distribution
|
||||
- TODO/FIXME/DEPRECATED counts
|
||||
|
||||
**AI Analysis:**
|
||||
- Overall health score (0-100)
|
||||
- Architecture observations
|
||||
- Technical debt identification
|
||||
- Improvement recommendations
|
||||
|
||||
### Output
|
||||
|
||||
Creates/updates report issue:
|
||||
|
||||
```markdown
|
||||
# AI Codebase Quality Report
|
||||
|
||||
## Health Score: 72/100
|
||||
|
||||
The codebase is in reasonable condition with some areas for improvement.
|
||||
|
||||
---
|
||||
|
||||
## Metrics
|
||||
|
||||
| Metric | Value |
|
||||
|--------|-------|
|
||||
| Total Files | 45 |
|
||||
| Total Lines | 12,500 |
|
||||
| TODO Comments | 23 |
|
||||
| FIXME Comments | 8 |
|
||||
|
||||
### Languages
|
||||
|
||||
- **Python**: 35 files
|
||||
- **JavaScript**: 10 files
|
||||
|
||||
## Issues Found
|
||||
|
||||
### [MEDIUM] Code Quality
|
||||
Missing docstrings in 15 functions.
|
||||
**Recommendation:** Add docstrings for public functions.
|
||||
|
||||
## Recommendations
|
||||
|
||||
1. Add comprehensive test coverage
|
||||
2. Document API endpoints
|
||||
3. Reduce TODO backlog
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Chat Agent (Bartender)
|
||||
|
||||
Interactive AI chat assistant with tool-calling capabilities.
|
||||
|
||||
### Triggers
|
||||
|
||||
- `issue_comment.created` - Any @ai-bot mention that isn't a specific command
|
||||
- `chat` - Direct CLI invocation
|
||||
|
||||
### Features
|
||||
|
||||
**Tool Calling:**
|
||||
The Chat Agent uses LLM function calling to gather information before responding:
|
||||
|
||||
| Tool | Description |
|
||||
|------|-------------|
|
||||
| `search_codebase` | Search repository files and code patterns |
|
||||
| `read_file` | Read specific files from the repository |
|
||||
| `search_web` | Search the web via SearXNG instance |
|
||||
|
||||
**Iterative Reasoning:**
|
||||
- Makes up to 5 tool calls per request
|
||||
- Combines information from multiple sources
|
||||
- Provides comprehensive, contextual answers
|
||||
|
||||
**Web Search:**
|
||||
- Requires SearXNG instance URL (via `SEARXNG_URL` env var or config)
|
||||
- Searches for documentation, tutorials, external resources
|
||||
|
||||
### Configuration
|
||||
|
||||
```yaml
|
||||
agents:
|
||||
chat:
|
||||
enabled: true
|
||||
name: "Bartender"
|
||||
max_iterations: 5
|
||||
tools:
|
||||
- search_codebase
|
||||
- read_file
|
||||
- search_web
|
||||
searxng_url: "" # Or set SEARXNG_URL env var
|
||||
```
|
||||
|
||||
### CLI Usage
|
||||
|
||||
```bash
|
||||
# Simple chat
|
||||
python main.py chat owner/repo "How does authentication work?"
|
||||
|
||||
# Chat and post response to issue
|
||||
python main.py chat owner/repo "Explain this bug" --issue 123
|
||||
```
|
||||
|
||||
### Issue Comment Usage
|
||||
|
||||
```
|
||||
@ai-bot How do I configure rate limiting?
|
||||
@ai-bot Find all files that handle user authentication
|
||||
@ai-bot What does the dispatcher module do?
|
||||
```
|
||||
|
||||
### Output
|
||||
|
||||
Posts a response comment:
|
||||
|
||||
```markdown
|
||||
**Note:** This review was generated by an AI assistant...
|
||||
|
||||
---
|
||||
|
||||
Based on my analysis of the codebase, rate limiting is configured in
|
||||
`tools/ai-review/config.yml` under the `enterprise.rate_limit` section:
|
||||
|
||||
- `requests_per_minute`: Maximum requests per minute (default: 30)
|
||||
- `max_concurrent`: Maximum concurrent requests (default: 4)
|
||||
|
||||
The rate limiting logic is implemented in `enterprise/rate_limiter.py`...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Agent Interface
|
||||
|
||||
All agents extend `BaseAgent`:
|
||||
|
||||
```python
|
||||
from agents import BaseAgent, AgentContext, AgentResult
|
||||
|
||||
class CustomAgent(BaseAgent):
|
||||
def can_handle(self, event_type: str, event_data: dict) -> bool:
|
||||
# Return True if this agent handles the event
|
||||
return event_type == "custom_event"
|
||||
|
||||
def execute(self, context: AgentContext) -> AgentResult:
|
||||
# Perform agent logic
|
||||
return AgentResult(
|
||||
success=True,
|
||||
message="Custom action completed",
|
||||
actions_taken=["action1", "action2"],
|
||||
)
|
||||
```
|
||||
|
||||
Register with dispatcher:
|
||||
|
||||
```python
|
||||
from dispatcher import get_dispatcher
|
||||
from agents import CustomAgent
|
||||
|
||||
dispatcher = get_dispatcher()
|
||||
dispatcher.register_agent(CustomAgent())
|
||||
```
|
||||
280
docs/api-reference.md
Normal file
280
docs/api-reference.md
Normal file
@@ -0,0 +1,280 @@
|
||||
# API Reference
|
||||
|
||||
## Gitea Client
|
||||
|
||||
`clients/gitea_client.py`
|
||||
|
||||
### Initialization
|
||||
|
||||
```python
|
||||
from clients import GiteaClient
|
||||
|
||||
client = GiteaClient(
|
||||
api_url="https://gitea.example.com/api/v1",
|
||||
token="your_token",
|
||||
timeout=30,
|
||||
)
|
||||
```
|
||||
|
||||
### Issue Methods
|
||||
|
||||
```python
|
||||
# List issues
|
||||
issues = client.list_issues(
|
||||
owner="user",
|
||||
repo="repo",
|
||||
state="open", # open, closed, all
|
||||
labels=["bug"],
|
||||
page=1,
|
||||
limit=30,
|
||||
)
|
||||
|
||||
# Get single issue
|
||||
issue = client.get_issue(owner, repo, index=123)
|
||||
|
||||
# Create comment
|
||||
comment = client.create_issue_comment(owner, repo, index=123, body="Comment text")
|
||||
|
||||
# Update comment
|
||||
client.update_issue_comment(owner, repo, comment_id=456, body="Updated text")
|
||||
|
||||
# List comments
|
||||
comments = client.list_issue_comments(owner, repo, index=123)
|
||||
|
||||
# Add labels
|
||||
client.add_issue_labels(owner, repo, index=123, labels=[1, 2, 3])
|
||||
|
||||
# Get repo labels
|
||||
labels = client.get_repo_labels(owner, repo)
|
||||
```
|
||||
|
||||
### Pull Request Methods
|
||||
|
||||
```python
|
||||
# Get PR
|
||||
pr = client.get_pull_request(owner, repo, index=123)
|
||||
|
||||
# Get diff
|
||||
diff = client.get_pull_request_diff(owner, repo, index=123)
|
||||
|
||||
# List changed files
|
||||
files = client.list_pull_request_files(owner, repo, index=123)
|
||||
|
||||
# Create review with inline comments
|
||||
client.create_pull_request_review(
|
||||
owner, repo, index=123,
|
||||
body="Review summary",
|
||||
event="COMMENT", # APPROVE, REQUEST_CHANGES, COMMENT
|
||||
comments=[
|
||||
{"path": "file.py", "line": 10, "body": "Issue here"},
|
||||
],
|
||||
)
|
||||
```
|
||||
|
||||
### Repository Methods
|
||||
|
||||
```python
|
||||
# Get repository info
|
||||
repo = client.get_repository(owner, repo)
|
||||
|
||||
# Get file contents (base64 encoded)
|
||||
content = client.get_file_contents(owner, repo, "path/to/file.py", ref="main")
|
||||
|
||||
# Get branch
|
||||
branch = client.get_branch(owner, repo, "main")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## LLM Client
|
||||
|
||||
`clients/llm_client.py`
|
||||
|
||||
### Initialization
|
||||
|
||||
```python
|
||||
from clients import LLMClient
|
||||
|
||||
# Direct initialization
|
||||
client = LLMClient(
|
||||
provider="openai", # openai, openrouter, ollama
|
||||
config={"model": "gpt-4", "temperature": 0},
|
||||
)
|
||||
|
||||
# From config file
|
||||
client = LLMClient.from_config(config_dict)
|
||||
```
|
||||
|
||||
### Methods
|
||||
|
||||
```python
|
||||
# Basic call
|
||||
response = client.call("Explain this code")
|
||||
print(response.content)
|
||||
print(response.tokens_used)
|
||||
|
||||
# JSON response
|
||||
result = client.call_json("Return JSON: {\"key\": \"value\"}")
|
||||
print(result["key"])
|
||||
```
|
||||
|
||||
### Response Object
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class LLMResponse:
|
||||
content: str # Generated text
|
||||
model: str # Model used
|
||||
provider: str # Provider name
|
||||
tokens_used: int # Token count
|
||||
finish_reason: str # stop, length, etc.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Base Agent
|
||||
|
||||
`agents/base_agent.py`
|
||||
|
||||
### Creating Custom Agent
|
||||
|
||||
```python
|
||||
from agents import BaseAgent, AgentContext, AgentResult
|
||||
|
||||
class MyAgent(BaseAgent):
|
||||
def can_handle(self, event_type: str, event_data: dict) -> bool:
|
||||
return event_type == "my_event"
|
||||
|
||||
def execute(self, context: AgentContext) -> AgentResult:
|
||||
# Use built-in methods
|
||||
prompt = self.load_prompt("my_prompt")
|
||||
response = self.call_llm(prompt)
|
||||
|
||||
self.upsert_comment(
|
||||
context.owner,
|
||||
context.repo,
|
||||
issue_index=123,
|
||||
body=response.content,
|
||||
)
|
||||
|
||||
return AgentResult(
|
||||
success=True,
|
||||
message="Done",
|
||||
actions_taken=["posted comment"],
|
||||
)
|
||||
```
|
||||
|
||||
### Built-in Methods
|
||||
|
||||
```python
|
||||
# Load prompt template
|
||||
prompt = self.load_prompt("prompt_name") # From prompts/prompt_name.md
|
||||
|
||||
# LLM calls (with rate limiting)
|
||||
response = self.call_llm(prompt)
|
||||
json_result = self.call_llm_json(prompt)
|
||||
|
||||
# Comment management
|
||||
comment_id = self.find_ai_comment(owner, repo, issue_index)
|
||||
self.upsert_comment(owner, repo, issue_index, body)
|
||||
|
||||
# Format with disclaimer
|
||||
formatted = self.format_with_disclaimer(content)
|
||||
```
|
||||
|
||||
### Context Object
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class AgentContext:
|
||||
owner: str # Repository owner
|
||||
repo: str # Repository name
|
||||
event_type: str # Event type
|
||||
event_data: dict # Event payload
|
||||
config: dict # Configuration
|
||||
```
|
||||
|
||||
### Result Object
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class AgentResult:
|
||||
success: bool
|
||||
message: str
|
||||
data: dict = {}
|
||||
actions_taken: list[str] = []
|
||||
error: str | None = None
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Dispatcher
|
||||
|
||||
`dispatcher.py`
|
||||
|
||||
### Usage
|
||||
|
||||
```python
|
||||
from dispatcher import Dispatcher, get_dispatcher
|
||||
|
||||
# Get global dispatcher
|
||||
dispatcher = get_dispatcher()
|
||||
|
||||
# Register agents
|
||||
dispatcher.register_agent(MyAgent())
|
||||
|
||||
# Dispatch event
|
||||
result = dispatcher.dispatch(
|
||||
event_type="pull_request",
|
||||
event_data={"action": "opened", ...},
|
||||
owner="user",
|
||||
repo="repo",
|
||||
)
|
||||
|
||||
# Async dispatch
|
||||
future = dispatcher.dispatch_async(event_type, event_data, owner, repo)
|
||||
result = future.result()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Security Scanner
|
||||
|
||||
`security/security_scanner.py`
|
||||
|
||||
### Usage
|
||||
|
||||
```python
|
||||
from security import SecurityScanner
|
||||
|
||||
scanner = SecurityScanner()
|
||||
|
||||
# Scan content
|
||||
for finding in scanner.scan_content(code, "file.py"):
|
||||
print(finding.rule_id, finding.severity, finding.line)
|
||||
|
||||
# Scan diff (only added lines)
|
||||
for finding in scanner.scan_diff(diff):
|
||||
print(finding.file, finding.line, finding.code_snippet)
|
||||
|
||||
# Summary
|
||||
findings = list(scanner.scan_diff(diff))
|
||||
summary = scanner.get_summary(findings)
|
||||
```
|
||||
|
||||
### Finding Object
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class SecurityFinding:
|
||||
rule_id: str # SEC001, SEC002, etc.
|
||||
rule_name: str # Human-readable name
|
||||
severity: str # HIGH, MEDIUM, LOW
|
||||
category: str # OWASP category
|
||||
file: str # File path
|
||||
line: int # Line number
|
||||
code_snippet: str # Matched code
|
||||
description: str # Issue description
|
||||
recommendation: str # How to fix
|
||||
cwe: str | None # CWE reference
|
||||
```
|
||||
196
docs/configuration.md
Normal file
196
docs/configuration.md
Normal file
@@ -0,0 +1,196 @@
|
||||
# Configuration Reference
|
||||
|
||||
All configuration is managed in `tools/ai-review/config.yml`.
|
||||
|
||||
## Provider Settings
|
||||
|
||||
```yaml
|
||||
# LLM Provider: openai | openrouter | ollama
|
||||
provider: openai
|
||||
|
||||
# Model per provider
|
||||
model:
|
||||
openai: gpt-4.1-mini
|
||||
openrouter: anthropic/claude-3.5-sonnet
|
||||
ollama: codellama:13b
|
||||
|
||||
# Generation settings
|
||||
temperature: 0 # 0 = deterministic
|
||||
max_tokens: 4096 # Max response tokens
|
||||
```
|
||||
|
||||
## Review Settings
|
||||
|
||||
```yaml
|
||||
review:
|
||||
fail_on_severity: HIGH # Fail CI on this severity
|
||||
max_diff_lines: 800 # Truncate large diffs
|
||||
inline_comments: true # Post inline PR comments
|
||||
security_scan: true # Run security scanner
|
||||
```
|
||||
|
||||
## Agent Configuration
|
||||
|
||||
### Issue Agent
|
||||
|
||||
```yaml
|
||||
agents:
|
||||
issue:
|
||||
enabled: true
|
||||
auto_label: true # Apply labels automatically
|
||||
auto_triage: true # Run triage on new issues
|
||||
duplicate_threshold: 0.85 # Similarity threshold
|
||||
events:
|
||||
- opened
|
||||
- labeled
|
||||
```
|
||||
|
||||
### PR Agent
|
||||
|
||||
```yaml
|
||||
agents:
|
||||
pr:
|
||||
enabled: true
|
||||
inline_comments: true # Post inline comments
|
||||
security_scan: true # Run security scanner
|
||||
events:
|
||||
- opened
|
||||
- synchronize
|
||||
```
|
||||
|
||||
### Codebase Agent
|
||||
|
||||
```yaml
|
||||
agents:
|
||||
codebase:
|
||||
enabled: true
|
||||
schedule: "0 0 * * 0" # Cron schedule (weekly)
|
||||
```
|
||||
|
||||
### Chat Agent (Bartender)
|
||||
|
||||
```yaml
|
||||
agents:
|
||||
chat:
|
||||
enabled: true
|
||||
name: "Bartender" # Display name for the bot
|
||||
max_iterations: 5 # Max tool calls per chat
|
||||
tools:
|
||||
- search_codebase # Search repository files
|
||||
- read_file # Read file contents
|
||||
- search_web # Web search via SearXNG
|
||||
searxng_url: "" # SearXNG instance URL (or use SEARXNG_URL env var)
|
||||
```
|
||||
|
||||
## Interaction Settings
|
||||
|
||||
### Customizing the Bot Name
|
||||
|
||||
The `mention_prefix` controls what trigger the bot responds to. You can change it to any name you prefer:
|
||||
|
||||
```yaml
|
||||
interaction:
|
||||
mention_prefix: "@bartender" # Users will type @bartender to invoke the bot
|
||||
```
|
||||
|
||||
**Important:** When changing the bot name, you must also update the workflow files:
|
||||
|
||||
1. Edit `.github/workflows/ai-comment-reply.yml` and `ai-chat.yml` (for GitHub)
|
||||
2. Edit `.gitea/workflows/ai-comment-reply.yml` and `ai-chat.yml` (for Gitea)
|
||||
3. Change the `if:` condition to match your new prefix:
|
||||
```yaml
|
||||
if: contains(github.event.comment.body, '@bartender')
|
||||
```
|
||||
|
||||
**Example bot names:**
|
||||
- `@ai-bot` - Default, generic
|
||||
- `@bartender` - Friendly, conversational
|
||||
- `@uni` - Short, quick to type
|
||||
- `@joey` - Personal assistant
|
||||
- `@codebot` - Technical focus
|
||||
|
||||
```yaml
|
||||
interaction:
|
||||
respond_to_mentions: true
|
||||
mention_prefix: "@ai-bot"
|
||||
commands:
|
||||
- explain # Explain code/issue
|
||||
- suggest # Suggest solutions
|
||||
- security # Run security check
|
||||
- summarize # Summarize content
|
||||
```
|
||||
|
||||
## Label Mappings
|
||||
|
||||
```yaml
|
||||
labels:
|
||||
priority:
|
||||
high: "priority: high"
|
||||
medium: "priority: medium"
|
||||
low: "priority: low"
|
||||
type:
|
||||
bug: "type: bug"
|
||||
feature: "type: feature"
|
||||
question: "type: question"
|
||||
docs: "type: documentation"
|
||||
status:
|
||||
ai_approved: "ai-approved"
|
||||
ai_changes_required: "ai-changes-required"
|
||||
ai_reviewed: "ai-reviewed"
|
||||
```
|
||||
|
||||
## Enterprise Settings
|
||||
|
||||
```yaml
|
||||
enterprise:
|
||||
audit_log: true
|
||||
audit_path: "/var/log/ai-review/"
|
||||
metrics_enabled: true
|
||||
rate_limit:
|
||||
requests_per_minute: 30
|
||||
max_concurrent: 4
|
||||
```
|
||||
|
||||
## Security Configuration
|
||||
|
||||
```yaml
|
||||
security:
|
||||
enabled: true
|
||||
fail_on_high: true
|
||||
rules_file: "security/security_rules.yml" # Custom rules
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
These override config file settings:
|
||||
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `AI_REVIEW_TOKEN` | Gitea/GitHub API token |
|
||||
| `AI_REVIEW_API_URL` | API base URL (`https://api.github.com` or Gitea URL) |
|
||||
| `AI_REVIEW_REPO` | Target repository (owner/repo) |
|
||||
| `OPENAI_API_KEY` | OpenAI API key |
|
||||
| `OPENROUTER_API_KEY` | OpenRouter API key |
|
||||
| `OLLAMA_HOST` | Ollama server URL |
|
||||
| `SEARXNG_URL` | SearXNG instance URL for web search |
|
||||
| `AI_AUDIT_PATH` | Audit log directory |
|
||||
|
||||
## Per-Repository Overrides
|
||||
|
||||
Create `.ai-review.yml` in repository root:
|
||||
|
||||
```yaml
|
||||
# Override global config for this repo
|
||||
agents:
|
||||
pr:
|
||||
security_scan: false # Disable security scan
|
||||
issue:
|
||||
auto_label: false # Disable auto-labeling
|
||||
|
||||
# Custom labels
|
||||
labels:
|
||||
priority:
|
||||
high: "P0"
|
||||
medium: "P1"
|
||||
low: "P2"
|
||||
```
|
||||
223
docs/enterprise.md
Normal file
223
docs/enterprise.md
Normal file
@@ -0,0 +1,223 @@
|
||||
# Enterprise Features
|
||||
|
||||
Advanced features for enterprise deployments.
|
||||
|
||||
## Audit Logging
|
||||
|
||||
All AI actions are logged for compliance and debugging.
|
||||
|
||||
### Configuration
|
||||
|
||||
```yaml
|
||||
enterprise:
|
||||
audit_log: true
|
||||
audit_path: "/var/log/ai-review/"
|
||||
```
|
||||
|
||||
### Log Format
|
||||
|
||||
Logs are stored as JSONL (JSON Lines) with daily rotation:
|
||||
|
||||
```
|
||||
/var/log/ai-review/audit-2024-01-15.jsonl
|
||||
```
|
||||
|
||||
Each line is a JSON object:
|
||||
|
||||
```json
|
||||
{
|
||||
"timestamp": "2024-01-15T10:30:45.123Z",
|
||||
"action": "review_pr",
|
||||
"agent": "PRAgent",
|
||||
"repository": "org/repo",
|
||||
"success": true,
|
||||
"details": {
|
||||
"pr_number": 123,
|
||||
"severity": "MEDIUM",
|
||||
"issues_found": 3
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Actions Logged
|
||||
|
||||
| Action | Description |
|
||||
|--------|-------------|
|
||||
| `review_pr` | PR review completed |
|
||||
| `triage_issue` | Issue triaged |
|
||||
| `llm_call` | LLM API call made |
|
||||
| `comment_posted` | Comment created/updated |
|
||||
| `labels_applied` | Labels added |
|
||||
| `security_scan` | Security scan completed |
|
||||
|
||||
### Querying Logs
|
||||
|
||||
```python
|
||||
from enterprise import get_audit_logger
|
||||
|
||||
logger = get_audit_logger()
|
||||
|
||||
# Get all logs for a date range
|
||||
logs = logger.get_logs(
|
||||
start_date="2024-01-01",
|
||||
end_date="2024-01-31",
|
||||
action="review_pr",
|
||||
repository="org/repo",
|
||||
)
|
||||
|
||||
# Generate summary report
|
||||
report = logger.generate_report(
|
||||
start_date="2024-01-01",
|
||||
end_date="2024-01-31",
|
||||
)
|
||||
print(f"Total events: {report['total_events']}")
|
||||
print(f"Success rate: {report['success_rate']:.1%}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Metrics & Observability
|
||||
|
||||
Track performance and usage metrics.
|
||||
|
||||
### Configuration
|
||||
|
||||
```yaml
|
||||
enterprise:
|
||||
metrics_enabled: true
|
||||
```
|
||||
|
||||
### Available Metrics
|
||||
|
||||
**Counters:**
|
||||
- `ai_review_requests_total` - Total requests processed
|
||||
- `ai_review_requests_success` - Successful requests
|
||||
- `ai_review_requests_failed` - Failed requests
|
||||
- `ai_review_llm_calls_total` - Total LLM API calls
|
||||
- `ai_review_llm_tokens_total` - Total tokens consumed
|
||||
- `ai_review_comments_posted` - Comments posted
|
||||
- `ai_review_security_findings` - Security issues found
|
||||
|
||||
**Gauges:**
|
||||
- `ai_review_active_requests` - Currently processing
|
||||
|
||||
**Histograms:**
|
||||
- `ai_review_request_duration_seconds` - Request latency
|
||||
- `ai_review_llm_duration_seconds` - LLM call latency
|
||||
|
||||
### Getting Metrics
|
||||
|
||||
```python
|
||||
from enterprise import get_metrics
|
||||
|
||||
metrics = get_metrics()
|
||||
|
||||
# Get summary
|
||||
summary = metrics.get_summary()
|
||||
print(f"Total requests: {summary['requests']['total']}")
|
||||
print(f"Success rate: {summary['requests']['success_rate']:.1%}")
|
||||
print(f"Avg latency: {summary['latency']['avg_ms']:.0f}ms")
|
||||
print(f"P95 latency: {summary['latency']['p95_ms']:.0f}ms")
|
||||
print(f"LLM tokens used: {summary['llm']['tokens']}")
|
||||
|
||||
# Export Prometheus format
|
||||
prometheus_output = metrics.export_prometheus()
|
||||
```
|
||||
|
||||
### Prometheus Integration
|
||||
|
||||
Expose metrics endpoint:
|
||||
|
||||
```python
|
||||
from flask import Flask
|
||||
from enterprise import get_metrics
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route("/metrics")
|
||||
def metrics():
|
||||
return get_metrics().export_prometheus()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Rate Limiting
|
||||
|
||||
Prevent API overload and manage costs.
|
||||
|
||||
### Configuration
|
||||
|
||||
```yaml
|
||||
enterprise:
|
||||
rate_limit:
|
||||
requests_per_minute: 30
|
||||
max_concurrent: 4
|
||||
```
|
||||
|
||||
### Built-in Rate Limiting
|
||||
|
||||
The `BaseAgent` class includes automatic rate limiting:
|
||||
|
||||
```python
|
||||
class BaseAgent:
|
||||
def __init__(self):
|
||||
self._min_request_interval = 1.0 # seconds
|
||||
|
||||
def _rate_limit(self):
|
||||
elapsed = time.time() - self._last_request_time
|
||||
if elapsed < self._min_request_interval:
|
||||
time.sleep(self._min_request_interval - elapsed)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Queue Management
|
||||
|
||||
The dispatcher handles concurrent execution:
|
||||
|
||||
```python
|
||||
dispatcher = Dispatcher(max_workers=4)
|
||||
```
|
||||
|
||||
For high-volume environments, use async dispatch:
|
||||
|
||||
```python
|
||||
future = dispatcher.dispatch_async(event_type, event_data, owner, repo)
|
||||
# Continue with other work
|
||||
result = future.result() # Block when needed
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Security Considerations
|
||||
|
||||
### Token Permissions
|
||||
|
||||
Minimum required permissions for `AI_REVIEW_TOKEN`:
|
||||
- `repo:read` - Read repository contents
|
||||
- `repo:write` - Create branches (if needed)
|
||||
- `issue:read` - Read issues and PRs
|
||||
- `issue:write` - Create comments, labels
|
||||
|
||||
### Network Isolation
|
||||
|
||||
For air-gapped environments, use Ollama:
|
||||
|
||||
```yaml
|
||||
provider: ollama
|
||||
|
||||
# Internal network address
|
||||
# Set via environment: OLLAMA_HOST=http://ollama.internal:11434
|
||||
```
|
||||
|
||||
### Data Privacy
|
||||
|
||||
By default:
|
||||
- Code is sent to LLM provider for analysis
|
||||
- Review comments are stored in Gitea
|
||||
- Audit logs are stored locally
|
||||
|
||||
For sensitive codebases:
|
||||
1. Use self-hosted Ollama
|
||||
2. Disable external LLM providers
|
||||
3. Review audit log retention policies
|
||||
82
docs/future_roadmap.md
Normal file
82
docs/future_roadmap.md
Normal file
@@ -0,0 +1,82 @@
|
||||
# Future Features Roadmap
|
||||
|
||||
This document outlines the strategic plan for evolving the AI Code Review system. These features are proposed for future implementation to enhance security coverage, context awareness, and user interaction.
|
||||
|
||||
---
|
||||
|
||||
## Phase 1: Advanced Security Scanning
|
||||
|
||||
Expand the current 17-rule regex scanner with dedicated industry-standard tools for **Static Application Security Testing (SAST)** and **Software Composition Analysis (SCA)**.
|
||||
|
||||
### Proposed Integrations
|
||||
|
||||
| Tool | Type | Purpose | Implementation Plan |
|
||||
|------|------|---------|---------------------|
|
||||
| **Bandit** | SAST | Analyze Python code for common vulnerability patterns (e.g., `exec`, weak crypto). | Run `bandit -r . -f json` and parse results into the review report. |
|
||||
| **Semgrep** | SAST | Polyglot scanning with custom rule support. | Integrate `semgrep --config=p/security-audit` for broader language support (JS, Go, Java). |
|
||||
| **Safety** | SCA | Check installed dependencies against known vulnerability databases. | Run `safety check --json` during CI to flag vulnerable packages in `requirements.txt`. |
|
||||
| **Trivy** | SCA/Container | Scan container images (Dockerfiles) and filesystem. | Add a workflow step to run Trivy for container-based projects. |
|
||||
|
||||
**Impact:** significantly reduces false negatives and covers dependency chain risks (Supply Chain Security).
|
||||
|
||||
---
|
||||
|
||||
## Phase 2: "Chat with Codebase" (RAG)
|
||||
|
||||
Move beyond single-file context by implementing **Retrieval-Augmented Generation (RAG)**. This allows the AI to answer questions like *"Where is authentication handled?"* by searching the entire codebase semantically.
|
||||
|
||||
### Architecture
|
||||
|
||||
1. **Vector Database:**
|
||||
* **ChromaDB** or **Qdrant**: Lightweight, open-source choices for storing code embeddings.
|
||||
2. **Embeddings Model:**
|
||||
* **OpenAI `text-embedding-3-small`** or **FastEmbed**: To convert code chunks (functions/classes) into vectors.
|
||||
3. **Workflow:**
|
||||
* **Index:** Run a nightly job to parse the codebase -> chunk it -> embed it -> store in Vector DB.
|
||||
* **Query:** When `@ai-bot` receives a question, convert the question to a vector -> search Vector DB -> inject relevant snippets into the LLM prompt.
|
||||
|
||||
**Impact:** Enables high-accuracy architectural advice and deep-dive explanations spanning multiple files.
|
||||
|
||||
---
|
||||
|
||||
## Phase 3: Interactive Code Repair
|
||||
|
||||
Transform the bot from a passive reviewer into an active collaborator.
|
||||
|
||||
### Features
|
||||
|
||||
* **`@ai-bot apply <suggestion_id>`**:
|
||||
* The bot generates a secure `git patch` for a specific recommendation.
|
||||
* The system commits the patch directly to the PR branch.
|
||||
* **Refactoring Assistance**:
|
||||
* Command: `@ai-bot refactor this function to use dependency injection`.
|
||||
* Bot proposes the changed code block and offers to commit it.
|
||||
|
||||
**Risk Mitigation:**
|
||||
* Require human approval (comment reply) before any commit is pushed.
|
||||
* Run tests automatically after bot commits.
|
||||
|
||||
---
|
||||
|
||||
## Phase 4: Enterprise Dashboard
|
||||
|
||||
Provide a high-level view of engineering health across the organization.
|
||||
|
||||
### Metrics to Visualize
|
||||
|
||||
* **Security Health:** Trend of High/Critical issues over time.
|
||||
* **Code Quality:** Technical debt accumulation vs. reduction rate.
|
||||
* **Review Velocity:** Average time to AI review vs. Human review.
|
||||
* **Bot Usage:** Most frequent commands and value-add interactions.
|
||||
|
||||
### Tech Stack
|
||||
* **Prometheus** (already implemented) + **Grafana**: For time-series tracking.
|
||||
* **Streamlit** / **Next.js**: For a custom management console to configure rules and view logs.
|
||||
|
||||
---
|
||||
|
||||
## Strategic Recommendations
|
||||
|
||||
1. **Immediate Win:** Implement **Bandit** integration. It is low-effort (Python library) and high-value (detects real vulnerabilities).
|
||||
2. **High Impact:** **Safety** dependency scanning. Vulnerable dependencies are the #1 attack vector for modern apps.
|
||||
3. **Long Term:** Work on **Vector DB** integration only after the core review logic is flawless, as it introduces significant infrastructure complexity.
|
||||
142
docs/getting-started.md
Normal file
142
docs/getting-started.md
Normal file
@@ -0,0 +1,142 @@
|
||||
# Getting Started
|
||||
|
||||
This guide will help you set up the AI Code Review system for your Gitea repositories.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Gitea instance (self-hosted or managed)
|
||||
- Python 3.11+
|
||||
- LLM API access (OpenAI, OpenRouter, or Ollama)
|
||||
|
||||
---
|
||||
|
||||
## Step 1: Create a Bot Account
|
||||
|
||||
1. Create a new Gitea user account for the bot (e.g., `ai-reviewer`)
|
||||
2. Generate an access token with these permissions:
|
||||
- `repo` - Full repository access
|
||||
- `issue` - Issue read/write access
|
||||
3. Save the token securely
|
||||
|
||||
---
|
||||
|
||||
## Step 2: Configure Organization Secrets
|
||||
|
||||
In your Gitea organization or repository settings, add these secrets:
|
||||
|
||||
| Secret | Description |
|
||||
|--------|-------------|
|
||||
| `AI_REVIEW_TOKEN` | Bot's Gitea access token |
|
||||
| `OPENAI_API_KEY` | OpenAI API key (if using OpenAI) |
|
||||
| `OPENROUTER_API_KEY` | OpenRouter key (if using OpenRouter) |
|
||||
| `OLLAMA_HOST` | Ollama URL (if using Ollama, e.g., `http://localhost:11434`) |
|
||||
|
||||
---
|
||||
|
||||
## Step 3: Add Workflows to Your Repository
|
||||
|
||||
Copy the workflow files from this repository to your target repo:
|
||||
|
||||
```bash
|
||||
# Create workflows directory
|
||||
mkdir -p .gitea/workflows
|
||||
|
||||
# Copy workflow files
|
||||
# Option 1: Copy manually from this repo's .gitea/workflows/
|
||||
# Option 2: Reference this repo in your workflows (see README)
|
||||
```
|
||||
|
||||
### Workflow Files:
|
||||
|
||||
| File | Trigger | Purpose |
|
||||
|------|---------|---------|
|
||||
| `enterprise-ai-review.yml` | PR opened/updated | Run AI code review |
|
||||
| `ai-issue-review.yml` | Issue opened, @ai-bot | Triage issues & respond to commands |
|
||||
| `ai-codebase-review.yml` | Weekly/manual | Analyze codebase health |
|
||||
|
||||
---
|
||||
|
||||
## Step 4: Create Labels
|
||||
|
||||
Create these labels in your repository for auto-labeling:
|
||||
|
||||
**Priority Labels:**
|
||||
- `priority: high` (red)
|
||||
- `priority: medium` (yellow)
|
||||
- `priority: low` (green)
|
||||
|
||||
**Type Labels:**
|
||||
- `type: bug`
|
||||
- `type: feature`
|
||||
- `type: question`
|
||||
- `type: documentation`
|
||||
|
||||
**AI Status Labels:**
|
||||
- `ai-approved`
|
||||
- `ai-changes-required`
|
||||
- `ai-reviewed`
|
||||
|
||||
---
|
||||
|
||||
## Step 5: Test the Setup
|
||||
|
||||
### Test PR Review:
|
||||
1. Create a new pull request
|
||||
2. Wait for the AI review workflow to run
|
||||
3. Check for the AI review comment
|
||||
|
||||
### Test Issue Triage:
|
||||
1. Create a new issue
|
||||
2. The AI should automatically triage and comment
|
||||
|
||||
### Test @ai-bot Commands:
|
||||
1. On any issue, comment: `@ai-bot summarize`
|
||||
2. The AI should respond with a summary
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues:
|
||||
|
||||
**"Missing token" error:**
|
||||
- Verify `AI_REVIEW_TOKEN` is set in secrets
|
||||
- Ensure the token has correct permissions
|
||||
|
||||
**"LLM call failed" error:**
|
||||
- Verify your LLM API key is set
|
||||
- Check the `provider` setting in `config.yml`
|
||||
|
||||
**Workflow not triggering:**
|
||||
- Verify workflow files are in `.gitea/workflows/`
|
||||
- Check that Actions are enabled for your repository
|
||||
|
||||
See [Troubleshooting Guide](troubleshooting.md) for more.
|
||||
|
||||
---
|
||||
|
||||
## Helper: CLI Usage
|
||||
|
||||
If you need to run the agents manually (e.g. for debugging or local testing), you can use the CLI:
|
||||
|
||||
```bash
|
||||
# Review a pull request
|
||||
python main.py pr owner/repo 123
|
||||
|
||||
# Triage a new issue
|
||||
python main.py issue owner/repo 456
|
||||
|
||||
# Handle @ai-bot command in comment
|
||||
python main.py comment owner/repo 456 "@ai-bot summarize"
|
||||
|
||||
# Analyze codebase
|
||||
python main.py codebase owner/repo
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Configuration Reference](configuration.md) - Customize behavior
|
||||
- [Agents Documentation](agents.md) - Learn about each agent
|
||||
- [Security Scanning](security.md) - Understand security rules
|
||||
163
docs/security.md
Normal file
163
docs/security.md
Normal file
@@ -0,0 +1,163 @@
|
||||
# Security Scanning
|
||||
|
||||
The security scanner detects vulnerabilities aligned with OWASP Top 10.
|
||||
|
||||
## Supported Rules
|
||||
|
||||
### A01:2021 – Broken Access Control
|
||||
|
||||
| Rule | Severity | Description |
|
||||
|------|----------|-------------|
|
||||
| SEC001 | HIGH | Hardcoded credentials (passwords, API keys) |
|
||||
| SEC002 | HIGH | Exposed private keys |
|
||||
|
||||
### A02:2021 – Cryptographic Failures
|
||||
|
||||
| Rule | Severity | Description |
|
||||
|------|----------|-------------|
|
||||
| SEC003 | MEDIUM | Weak hash algorithms (MD5, SHA1) |
|
||||
| SEC004 | MEDIUM | Non-cryptographic random for security |
|
||||
|
||||
### A03:2021 – Injection
|
||||
|
||||
| Rule | Severity | Description |
|
||||
|------|----------|-------------|
|
||||
| SEC005 | HIGH | SQL injection via string formatting |
|
||||
| SEC006 | HIGH | Command injection in subprocess |
|
||||
| SEC007 | HIGH | eval() usage |
|
||||
| SEC008 | MEDIUM | XSS via innerHTML |
|
||||
|
||||
### A04:2021 – Insecure Design
|
||||
|
||||
| Rule | Severity | Description |
|
||||
|------|----------|-------------|
|
||||
| SEC009 | MEDIUM | Debug mode enabled |
|
||||
|
||||
### A05:2021 – Security Misconfiguration
|
||||
|
||||
| Rule | Severity | Description |
|
||||
|------|----------|-------------|
|
||||
| SEC010 | MEDIUM | CORS wildcard (*) |
|
||||
| SEC011 | HIGH | SSL verification disabled |
|
||||
|
||||
### A07:2021 – Authentication Failures
|
||||
|
||||
| Rule | Severity | Description |
|
||||
|------|----------|-------------|
|
||||
| SEC012 | HIGH | Hardcoded JWT secrets |
|
||||
|
||||
### A08:2021 – Integrity Failures
|
||||
|
||||
| Rule | Severity | Description |
|
||||
|------|----------|-------------|
|
||||
| SEC013 | MEDIUM | Pickle deserialization |
|
||||
|
||||
### A09:2021 – Logging Failures
|
||||
|
||||
| Rule | Severity | Description |
|
||||
|------|----------|-------------|
|
||||
| SEC014 | MEDIUM | Logging sensitive data |
|
||||
|
||||
### A10:2021 – Server-Side Request Forgery
|
||||
|
||||
| Rule | Severity | Description |
|
||||
|------|----------|-------------|
|
||||
| SEC015 | MEDIUM | SSRF via dynamic URLs |
|
||||
|
||||
### Additional Rules
|
||||
|
||||
| Rule | Severity | Description |
|
||||
|------|----------|-------------|
|
||||
| SEC016 | LOW | Hardcoded IP addresses |
|
||||
| SEC017 | MEDIUM | Security-related TODO/FIXME |
|
||||
|
||||
## Usage
|
||||
|
||||
### In PR Reviews
|
||||
|
||||
Security scanning runs automatically during PR review:
|
||||
|
||||
```yaml
|
||||
agents:
|
||||
pr:
|
||||
security_scan: true
|
||||
```
|
||||
|
||||
### Standalone
|
||||
|
||||
```python
|
||||
from security import SecurityScanner
|
||||
|
||||
scanner = SecurityScanner()
|
||||
|
||||
# Scan file content
|
||||
for finding in scanner.scan_content(code, "file.py"):
|
||||
print(f"[{finding.severity}] {finding.rule_name}")
|
||||
print(f" Line {finding.line}: {finding.code_snippet}")
|
||||
print(f" {finding.description}")
|
||||
|
||||
# Scan git diff
|
||||
for finding in scanner.scan_diff(diff):
|
||||
print(f"{finding.file}:{finding.line} - {finding.rule_name}")
|
||||
```
|
||||
|
||||
### Get Summary
|
||||
|
||||
```python
|
||||
findings = list(scanner.scan_content(code, "file.py"))
|
||||
summary = scanner.get_summary(findings)
|
||||
|
||||
print(f"Total: {summary['total']}")
|
||||
print(f"HIGH: {summary['by_severity']['HIGH']}")
|
||||
print(f"Categories: {summary['by_category']}")
|
||||
```
|
||||
|
||||
## Custom Rules
|
||||
|
||||
Create `security/security_rules.yml`:
|
||||
|
||||
```yaml
|
||||
rules:
|
||||
- id: "CUSTOM001"
|
||||
name: "Custom Pattern"
|
||||
pattern: "dangerous_function\\s*\\("
|
||||
severity: "HIGH"
|
||||
category: "Custom"
|
||||
cwe: "CWE-xxx"
|
||||
description: "Usage of dangerous function detected"
|
||||
recommendation: "Use safe_function() instead"
|
||||
```
|
||||
|
||||
Load custom rules:
|
||||
|
||||
```python
|
||||
scanner = SecurityScanner(rules_file="security/custom_rules.yml")
|
||||
```
|
||||
|
||||
## CI Integration
|
||||
|
||||
Fail CI on HIGH severity findings:
|
||||
|
||||
```yaml
|
||||
security:
|
||||
fail_on_high: true
|
||||
```
|
||||
|
||||
Or in code:
|
||||
|
||||
```python
|
||||
findings = list(scanner.scan_diff(diff))
|
||||
high_count = sum(1 for f in findings if f.severity == "HIGH")
|
||||
|
||||
if high_count > 0:
|
||||
sys.exit(1)
|
||||
```
|
||||
|
||||
## CWE References
|
||||
|
||||
All rules include CWE (Common Weakness Enumeration) references:
|
||||
|
||||
- [CWE-78](https://cwe.mitre.org/data/definitions/78.html): OS Command Injection
|
||||
- [CWE-79](https://cwe.mitre.org/data/definitions/79.html): XSS
|
||||
- [CWE-89](https://cwe.mitre.org/data/definitions/89.html): SQL Injection
|
||||
- [CWE-798](https://cwe.mitre.org/data/definitions/798.html): Hardcoded Credentials
|
||||
263
docs/troubleshooting.md
Normal file
263
docs/troubleshooting.md
Normal file
@@ -0,0 +1,263 @@
|
||||
# Troubleshooting
|
||||
|
||||
Common issues and solutions for the AI Code Review system.
|
||||
|
||||
## Installation Issues
|
||||
|
||||
### `ModuleNotFoundError: No module named 'requests'`
|
||||
|
||||
Install dependencies:
|
||||
|
||||
```bash
|
||||
pip install requests pyyaml
|
||||
```
|
||||
|
||||
### `ImportError: cannot import name 'BaseAgent'`
|
||||
|
||||
Ensure you're running from the correct directory:
|
||||
|
||||
```bash
|
||||
cd tools/ai-review
|
||||
python main.py pr owner/repo 123
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Authentication Issues
|
||||
|
||||
### `repository not found`
|
||||
|
||||
**Causes:**
|
||||
- Bot token lacks access to the repository
|
||||
- Repository path is incorrect
|
||||
|
||||
**Solutions:**
|
||||
1. Verify token has `repo` permissions
|
||||
2. Check repository path format: `owner/repo`
|
||||
3. Ensure token can access both the target repo and the AI tooling repo
|
||||
|
||||
### `401 Unauthorized`
|
||||
|
||||
**Causes:**
|
||||
- Invalid or expired token
|
||||
- Missing token in environment
|
||||
|
||||
**Solutions:**
|
||||
1. Regenerate the bot token
|
||||
2. Verify `AI_REVIEW_TOKEN` is set correctly
|
||||
3. Check organization secret scope is "All Repositories"
|
||||
|
||||
### `403 Forbidden`
|
||||
|
||||
**Causes:**
|
||||
- Token lacks write permissions
|
||||
- Repository is private and token doesn't have access
|
||||
|
||||
**Solutions:**
|
||||
1. Ensure token has `issue:write` permission
|
||||
2. Add bot account as collaborator to private repos
|
||||
|
||||
---
|
||||
|
||||
## LLM Issues
|
||||
|
||||
### `OPENAI_API_KEY not set`
|
||||
|
||||
Set the environment variable:
|
||||
|
||||
```bash
|
||||
export OPENAI_API_KEY="sk-..."
|
||||
```
|
||||
|
||||
Or in workflow:
|
||||
|
||||
```yaml
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
```
|
||||
|
||||
### `Rate limit exceeded`
|
||||
|
||||
**Causes:**
|
||||
- Too many requests to LLM provider
|
||||
- API quota exhausted
|
||||
|
||||
**Solutions:**
|
||||
1. Increase rate limit interval in config
|
||||
2. Switch to a different provider temporarily
|
||||
3. Check your API plan limits
|
||||
|
||||
### `JSON decode error` from LLM
|
||||
|
||||
**Causes:**
|
||||
- LLM returned non-JSON response
|
||||
- Response was truncated
|
||||
|
||||
**Solutions:**
|
||||
1. Increase `max_tokens` in config
|
||||
2. Check LLM response in logs
|
||||
3. Improve prompt to enforce JSON output
|
||||
|
||||
---
|
||||
|
||||
## Workflow Issues
|
||||
|
||||
### Workflow doesn't trigger
|
||||
|
||||
**Causes:**
|
||||
- Workflow file not in correct location
|
||||
- Event type not configured
|
||||
|
||||
**Solutions:**
|
||||
1. Ensure workflow is in `.gitea/workflows/`
|
||||
2. Check event types match your needs:
|
||||
```yaml
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize]
|
||||
```
|
||||
3. Verify Gitea Actions is enabled for the repository
|
||||
|
||||
### `review.py not found`
|
||||
|
||||
**Causes:**
|
||||
- Central repo checkout failed
|
||||
- Path is incorrect
|
||||
|
||||
**Solutions:**
|
||||
1. Verify the checkout step has correct repository and path
|
||||
2. Check bot token has access to the AI tooling repo
|
||||
3. Ensure path matches: `.ai-review/tools/ai-review/main.py`
|
||||
|
||||
### PR comments not appearing
|
||||
|
||||
**Causes:**
|
||||
- Token lacks issue write permission
|
||||
- API URL is incorrect
|
||||
|
||||
**Solutions:**
|
||||
1. Check `AI_REVIEW_API_URL` is correct
|
||||
2. Verify token has `issue:write` permission
|
||||
3. Check workflow logs for API errors
|
||||
|
||||
### @ai-bot edits the issue instead of replying
|
||||
|
||||
**Causes:**
|
||||
- Workflow is using the wrong CLI command for comments
|
||||
- `event_type` is incorrectly set to "issues"
|
||||
|
||||
**Solutions:**
|
||||
1. Ensure your workflow uses the `comment` command for mentions:
|
||||
```yaml
|
||||
python main.py comment owner/repo 123 "@ai-bot ..."
|
||||
```
|
||||
2. Verify you have separate jobs for `issues` vs `issue_comment` events (see [Workflows](workflows.md))
|
||||
|
||||
---
|
||||
|
||||
## Label Issues
|
||||
|
||||
### Labels not being applied
|
||||
|
||||
**Causes:**
|
||||
- Labels don't exist in repository
|
||||
- Label names don't match config
|
||||
|
||||
**Solutions:**
|
||||
1. Create labels matching your config:
|
||||
- `priority: high`
|
||||
- `type: bug`
|
||||
- `ai-approved`
|
||||
2. Or update config to match existing labels:
|
||||
```yaml
|
||||
labels:
|
||||
priority:
|
||||
high: "P0" # Your label name
|
||||
```
|
||||
|
||||
### `label not found` error
|
||||
|
||||
The agent gracefully handles missing labels. Create labels manually or disable auto-labeling:
|
||||
|
||||
```yaml
|
||||
agents:
|
||||
issue:
|
||||
auto_label: false
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Issues
|
||||
|
||||
### Reviews are slow
|
||||
|
||||
**Causes:**
|
||||
- Large diffs taking long to process
|
||||
- LLM response time
|
||||
|
||||
**Solutions:**
|
||||
1. Reduce max diff lines:
|
||||
```yaml
|
||||
review:
|
||||
max_diff_lines: 500
|
||||
```
|
||||
2. Use a faster model:
|
||||
```yaml
|
||||
model:
|
||||
openai: gpt-4.1-mini # Faster than gpt-4
|
||||
```
|
||||
3. Consider Ollama for local, faster inference
|
||||
|
||||
### Timeout errors
|
||||
|
||||
Increase timeout in API calls or use async processing:
|
||||
|
||||
```python
|
||||
client = GiteaClient(timeout=60) # Increase from default 30
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Debugging
|
||||
|
||||
### Enable verbose logging
|
||||
|
||||
```bash
|
||||
python main.py -v pr owner/repo 123
|
||||
```
|
||||
|
||||
### Check workflow logs
|
||||
|
||||
1. Go to repository -> Actions
|
||||
2. Click on the failed workflow run
|
||||
3. Expand job steps to see output
|
||||
|
||||
### Test locally
|
||||
|
||||
```bash
|
||||
# Set environment variables
|
||||
export AI_REVIEW_TOKEN="your_token"
|
||||
export AI_REVIEW_API_URL="https://your-gitea/api/v1"
|
||||
export OPENAI_API_KEY="sk-..."
|
||||
|
||||
# Run locally
|
||||
cd tools/ai-review
|
||||
python main.py pr owner/repo 123
|
||||
```
|
||||
|
||||
### Validate Python syntax
|
||||
|
||||
```bash
|
||||
python -m py_compile main.py
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Getting Help
|
||||
|
||||
1. Check the [documentation](README.md)
|
||||
2. Search existing issues in the repository
|
||||
3. Create a new issue with:
|
||||
- Steps to reproduce
|
||||
- Error messages
|
||||
- Environment details (Gitea version, Python version)
|
||||
389
docs/workflows.md
Normal file
389
docs/workflows.md
Normal file
@@ -0,0 +1,389 @@
|
||||
# Workflows
|
||||
|
||||
This document provides ready-to-use workflow files for integrating AI code review into your repositories. Workflows are provided for both **GitHub Actions** and **Gitea Actions**.
|
||||
|
||||
---
|
||||
|
||||
## Platform Comparison
|
||||
|
||||
| Feature | GitHub | Gitea |
|
||||
|---------|--------|-------|
|
||||
| Context variable | `github.*` | `gitea.*` |
|
||||
| Default token | `GITHUB_TOKEN` | `AI_REVIEW_TOKEN` (custom) |
|
||||
| API URL | `https://api.github.com` | Your Gitea instance URL |
|
||||
| Tools location | Same repo (`tools/ai-review`) | Checkout from central repo |
|
||||
|
||||
---
|
||||
|
||||
## GitHub Workflows
|
||||
|
||||
### PR Review Workflow
|
||||
|
||||
```yaml
|
||||
# .github/workflows/ai-review.yml
|
||||
name: AI Code Review
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize]
|
||||
|
||||
jobs:
|
||||
ai-review:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- run: pip install requests pyyaml
|
||||
|
||||
- name: Run AI Review
|
||||
env:
|
||||
AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
AI_REVIEW_REPO: ${{ github.repository }}
|
||||
AI_REVIEW_API_URL: https://api.github.com
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
cd tools/ai-review
|
||||
python main.py pr ${{ github.repository }} ${{ github.event.pull_request.number }}
|
||||
```
|
||||
|
||||
### Issue Triage Workflow
|
||||
|
||||
```yaml
|
||||
# .github/workflows/ai-issue-triage.yml
|
||||
name: AI Issue Triage
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened, labeled]
|
||||
|
||||
jobs:
|
||||
ai-triage:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- run: pip install requests pyyaml
|
||||
|
||||
- name: Run AI Issue Triage
|
||||
env:
|
||||
AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
AI_REVIEW_REPO: ${{ github.repository }}
|
||||
AI_REVIEW_API_URL: https://api.github.com
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
cd tools/ai-review
|
||||
python main.py issue ${{ github.repository }} ${{ github.event.issue.number }} \
|
||||
--title "${{ github.event.issue.title }}"
|
||||
```
|
||||
|
||||
### Comment Reply Workflow (includes Bartender Chat)
|
||||
|
||||
```yaml
|
||||
# .github/workflows/ai-comment-reply.yml
|
||||
name: AI Comment Reply
|
||||
|
||||
on:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
|
||||
jobs:
|
||||
ai-reply:
|
||||
runs-on: ubuntu-latest
|
||||
if: contains(github.event.comment.body, '@ai-bot')
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- run: pip install requests pyyaml
|
||||
|
||||
- name: Run AI Comment Response
|
||||
env:
|
||||
AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
AI_REVIEW_REPO: ${{ github.repository }}
|
||||
AI_REVIEW_API_URL: https://api.github.com
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
SEARXNG_URL: ${{ secrets.SEARXNG_URL }}
|
||||
run: |
|
||||
cd tools/ai-review
|
||||
python main.py comment ${{ github.repository }} ${{ github.event.issue.number }} \
|
||||
"${{ github.event.comment.body }}"
|
||||
```
|
||||
|
||||
### Codebase Analysis Workflow
|
||||
|
||||
```yaml
|
||||
# .github/workflows/ai-codebase-review.yml
|
||||
name: AI Codebase Analysis
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 0 * * 0" # Weekly on Sunday
|
||||
workflow_dispatch: # Manual trigger
|
||||
|
||||
jobs:
|
||||
ai-codebase:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- run: pip install requests pyyaml
|
||||
|
||||
- name: Run Codebase Analysis
|
||||
env:
|
||||
AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
AI_REVIEW_REPO: ${{ github.repository }}
|
||||
AI_REVIEW_API_URL: https://api.github.com
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
cd tools/ai-review
|
||||
python main.py codebase ${{ github.repository }}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Gitea Workflows
|
||||
|
||||
### PR Review Workflow
|
||||
|
||||
```yaml
|
||||
# .gitea/workflows/enterprise-ai-review.yml
|
||||
name: AI Code Review
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize]
|
||||
|
||||
jobs:
|
||||
ai-review:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: YourOrg/OpenRabbit
|
||||
path: .ai-review
|
||||
token: ${{ secrets.AI_REVIEW_TOKEN }}
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- run: pip install requests pyyaml
|
||||
|
||||
- name: Run AI Review
|
||||
env:
|
||||
AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }}
|
||||
AI_REVIEW_REPO: ${{ gitea.repository }}
|
||||
AI_REVIEW_API_URL: https://your-gitea.example.com/api/v1
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
cd .ai-review/tools/ai-review
|
||||
python main.py pr ${{ gitea.repository }} ${{ gitea.event.pull_request.number }}
|
||||
```
|
||||
|
||||
### Issue Triage Workflow
|
||||
|
||||
```yaml
|
||||
# .gitea/workflows/ai-issue-triage.yml
|
||||
name: AI Issue Triage
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened, labeled]
|
||||
|
||||
jobs:
|
||||
ai-triage:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: YourOrg/OpenRabbit
|
||||
path: .ai-review
|
||||
token: ${{ secrets.AI_REVIEW_TOKEN }}
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- run: pip install requests pyyaml
|
||||
|
||||
- name: Run AI Issue Triage
|
||||
env:
|
||||
AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }}
|
||||
AI_REVIEW_REPO: ${{ gitea.repository }}
|
||||
AI_REVIEW_API_URL: https://your-gitea.example.com/api/v1
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
cd .ai-review/tools/ai-review
|
||||
python main.py issue ${{ gitea.repository }} ${{ gitea.event.issue.number }} \
|
||||
--title "${{ gitea.event.issue.title }}"
|
||||
```
|
||||
|
||||
### Comment Reply Workflow (includes Bartender Chat)
|
||||
|
||||
```yaml
|
||||
# .gitea/workflows/ai-comment-reply.yml
|
||||
name: AI Comment Reply
|
||||
|
||||
on:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
|
||||
jobs:
|
||||
ai-reply:
|
||||
runs-on: ubuntu-latest
|
||||
if: contains(github.event.comment.body, '@ai-bot')
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: YourOrg/OpenRabbit
|
||||
path: .ai-review
|
||||
token: ${{ secrets.AI_REVIEW_TOKEN }}
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- run: pip install requests pyyaml
|
||||
|
||||
- name: Run AI Comment Response
|
||||
env:
|
||||
AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }}
|
||||
AI_REVIEW_REPO: ${{ gitea.repository }}
|
||||
AI_REVIEW_API_URL: https://your-gitea.example.com/api/v1
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
SEARXNG_URL: ${{ secrets.SEARXNG_URL }}
|
||||
run: |
|
||||
cd .ai-review/tools/ai-review
|
||||
python main.py comment ${{ gitea.repository }} ${{ gitea.event.issue.number }} \
|
||||
"${{ gitea.event.comment.body }}"
|
||||
```
|
||||
|
||||
### Codebase Analysis Workflow
|
||||
|
||||
```yaml
|
||||
# .gitea/workflows/ai-codebase-review.yml
|
||||
name: AI Codebase Analysis
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 0 * * 0" # Weekly on Sunday
|
||||
workflow_dispatch: # Manual trigger
|
||||
|
||||
jobs:
|
||||
ai-codebase:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: YourOrg/OpenRabbit
|
||||
path: .ai-review
|
||||
token: ${{ secrets.AI_REVIEW_TOKEN }}
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- run: pip install requests pyyaml
|
||||
|
||||
- name: Run Codebase Analysis
|
||||
env:
|
||||
AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }}
|
||||
AI_REVIEW_REPO: ${{ gitea.repository }}
|
||||
AI_REVIEW_API_URL: https://your-gitea.example.com/api/v1
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
cd .ai-review/tools/ai-review
|
||||
python main.py codebase ${{ gitea.repository }}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Required Secrets
|
||||
|
||||
### GitHub
|
||||
|
||||
| Secret | Required | Description |
|
||||
|--------|----------|-------------|
|
||||
| `GITHUB_TOKEN` | Auto | Built-in token (automatic) |
|
||||
| `OPENAI_API_KEY` | Choose one | OpenAI API key |
|
||||
| `OPENROUTER_API_KEY` | Choose one | OpenRouter API key |
|
||||
| `OLLAMA_HOST` | Choose one | Ollama server URL |
|
||||
| `SEARXNG_URL` | Optional | SearXNG instance for web search |
|
||||
|
||||
### Gitea
|
||||
|
||||
| Secret | Required | Description |
|
||||
|--------|----------|-------------|
|
||||
| `AI_REVIEW_TOKEN` | Yes | Gitea bot access token |
|
||||
| `OPENAI_API_KEY` | Choose one | OpenAI API key |
|
||||
| `OPENROUTER_API_KEY` | Choose one | OpenRouter API key |
|
||||
| `OLLAMA_HOST` | Choose one | Ollama server URL |
|
||||
| `SEARXNG_URL` | Optional | SearXNG instance for web search |
|
||||
|
||||
---
|
||||
|
||||
## Customization
|
||||
|
||||
### For GitHub
|
||||
|
||||
The tools are included in the same repository under `tools/ai-review`, so no additional checkout is needed.
|
||||
|
||||
### For Gitea
|
||||
|
||||
Replace the repository reference with your OpenRabbit fork:
|
||||
|
||||
```yaml
|
||||
repository: YourOrg/OpenRabbit
|
||||
```
|
||||
|
||||
Replace the API URL with your Gitea instance:
|
||||
|
||||
```yaml
|
||||
AI_REVIEW_API_URL: https://your-gitea.example.com/api/v1
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Chat/Bartender Workflow
|
||||
|
||||
Both platforms support the Bartender chat agent through the comment reply workflow. When `@ai-bot` is mentioned with a question (not a specific command like `summarize`), the Chat Agent handles it with tool calling capabilities.
|
||||
|
||||
To enable web search, set the `SEARXNG_URL` secret to your SearXNG instance URL.
|
||||
|
||||
**Example usage:**
|
||||
```
|
||||
@ai-bot How do I configure rate limiting?
|
||||
@ai-bot Find all authentication-related files
|
||||
@ai-bot What does the dispatcher module do?
|
||||
```
|
||||
4
pytest.ini
Normal file
4
pytest.ini
Normal file
@@ -0,0 +1,4 @@
|
||||
[pytest]
|
||||
testpaths = tests
|
||||
python_paths = tools/ai-review
|
||||
addopts = -v --tb=short
|
||||
257
tests/test_ai_review.py
Normal file
257
tests/test_ai_review.py
Normal file
@@ -0,0 +1,257 @@
|
||||
"""Test Suite for AI Code Review Workflow
|
||||
|
||||
Tests for verifying prompt formatting, agent logic, and core functionality.
|
||||
Run with: pytest tests/ -v
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Add the tools directory to path
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "tools", "ai-review"))
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestPromptFormatting:
|
||||
"""Test that all prompts can be formatted without errors."""
|
||||
|
||||
def get_prompt_path(self, name: str) -> str:
|
||||
"""Get the full path to a prompt file."""
|
||||
return os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"..", "tools", "ai-review", "prompts", f"{name}.md"
|
||||
)
|
||||
|
||||
def load_prompt(self, name: str) -> str:
|
||||
"""Load a prompt file."""
|
||||
path = self.get_prompt_path(name)
|
||||
with open(path) as f:
|
||||
return f.read()
|
||||
|
||||
def test_issue_triage_prompt_formatting(self):
|
||||
"""Test that issue_triage.md can be formatted with placeholders."""
|
||||
prompt = self.load_prompt("issue_triage")
|
||||
|
||||
# This should NOT raise a KeyError
|
||||
formatted = prompt.format(
|
||||
title="Test Issue Title",
|
||||
body="This is the issue body content",
|
||||
author="testuser",
|
||||
existing_labels="bug, urgent"
|
||||
)
|
||||
|
||||
assert "Test Issue Title" in formatted
|
||||
assert "This is the issue body content" in formatted
|
||||
assert "testuser" in formatted
|
||||
assert "bug, urgent" in formatted
|
||||
# JSON example should still be present (curly braces escaped)
|
||||
assert '"type"' in formatted
|
||||
assert '"priority"' in formatted
|
||||
|
||||
def test_issue_response_prompt_formatting(self):
|
||||
"""Test that issue_response.md can be formatted with placeholders."""
|
||||
prompt = self.load_prompt("issue_response")
|
||||
|
||||
formatted = prompt.format(
|
||||
issue_type="bug",
|
||||
priority="high",
|
||||
title="Bug Report",
|
||||
body="Description of the bug",
|
||||
triage_analysis="This is a high priority bug"
|
||||
)
|
||||
|
||||
assert "bug" in formatted
|
||||
assert "high" in formatted
|
||||
assert "Bug Report" in formatted
|
||||
# JSON example should still be present
|
||||
assert '"comment"' in formatted
|
||||
|
||||
def test_base_prompt_no_placeholders(self):
|
||||
"""Test that base.md loads correctly (no placeholders needed)."""
|
||||
prompt = self.load_prompt("base")
|
||||
|
||||
# Should contain key elements
|
||||
assert "security" in prompt.lower()
|
||||
assert "JSON" in prompt
|
||||
assert "severity" in prompt.lower()
|
||||
|
||||
def test_prompts_have_escaped_json(self):
|
||||
"""Verify JSON examples use double curly braces."""
|
||||
for prompt_name in ["issue_triage", "issue_response"]:
|
||||
prompt = self.load_prompt(prompt_name)
|
||||
|
||||
# Check that format() doesn't fail
|
||||
try:
|
||||
# Try with minimal placeholders
|
||||
if prompt_name == "issue_triage":
|
||||
prompt.format(title="t", body="b", author="a", existing_labels="l")
|
||||
elif prompt_name == "issue_response":
|
||||
prompt.format(issue_type="t", priority="p", title="t", body="b", triage_analysis="a")
|
||||
except KeyError as e:
|
||||
pytest.fail(f"Prompt {prompt_name} has unescaped curly braces: {e}")
|
||||
|
||||
|
||||
class TestImports:
|
||||
"""Test that all modules can be imported correctly."""
|
||||
|
||||
def test_import_agents(self):
|
||||
"""Test importing agent classes."""
|
||||
from agents.base_agent import BaseAgent, AgentContext, AgentResult
|
||||
from agents.issue_agent import IssueAgent
|
||||
from agents.pr_agent import PRAgent
|
||||
from agents.codebase_agent import CodebaseAgent
|
||||
|
||||
assert BaseAgent is not None
|
||||
assert IssueAgent is not None
|
||||
assert PRAgent is not None
|
||||
assert CodebaseAgent is not None
|
||||
|
||||
def test_import_clients(self):
|
||||
"""Test importing client classes."""
|
||||
from clients.gitea_client import GiteaClient
|
||||
from clients.llm_client import LLMClient
|
||||
|
||||
assert GiteaClient is not None
|
||||
assert LLMClient is not None
|
||||
|
||||
def test_import_security(self):
|
||||
"""Test importing security scanner."""
|
||||
from security.security_scanner import SecurityScanner
|
||||
|
||||
assert SecurityScanner is not None
|
||||
|
||||
def test_import_enterprise(self):
|
||||
"""Test importing enterprise features."""
|
||||
from enterprise.audit_logger import AuditLogger
|
||||
from enterprise.metrics import MetricsCollector
|
||||
|
||||
assert AuditLogger is not None
|
||||
assert MetricsCollector is not None
|
||||
|
||||
def test_import_dispatcher(self):
|
||||
"""Test importing dispatcher."""
|
||||
from dispatcher import Dispatcher
|
||||
|
||||
assert Dispatcher is not None
|
||||
|
||||
|
||||
class TestSecurityScanner:
|
||||
"""Test security scanner pattern detection."""
|
||||
|
||||
def test_detects_hardcoded_secret(self):
|
||||
"""Test detection of hardcoded secrets."""
|
||||
from security.security_scanner import SecurityScanner
|
||||
|
||||
scanner = SecurityScanner()
|
||||
code = '''
|
||||
API_KEY = "sk-1234567890abcdef"
|
||||
'''
|
||||
findings = list(scanner.scan_content(code, "test.py"))
|
||||
assert len(findings) >= 1
|
||||
assert any(f.severity == "HIGH" for f in findings)
|
||||
|
||||
def test_detects_eval(self):
|
||||
"""Test detection of eval usage."""
|
||||
from security.security_scanner import SecurityScanner
|
||||
|
||||
scanner = SecurityScanner()
|
||||
code = '''
|
||||
result = eval(user_input)
|
||||
'''
|
||||
findings = list(scanner.scan_content(code, "test.py"))
|
||||
assert len(findings) >= 1
|
||||
assert any("eval" in f.rule_name.lower() for f in findings)
|
||||
|
||||
def test_no_false_positives_on_clean_code(self):
|
||||
"""Test that clean code doesn't trigger false positives."""
|
||||
from security.security_scanner import SecurityScanner
|
||||
|
||||
scanner = SecurityScanner()
|
||||
code = '''
|
||||
def hello():
|
||||
print("Hello, world!")
|
||||
return 42
|
||||
'''
|
||||
findings = list(scanner.scan_content(code, "test.py"))
|
||||
# Should have no HIGH severity issues for clean code
|
||||
high_findings = [f for f in findings if f.severity == "HIGH"]
|
||||
assert len(high_findings) == 0
|
||||
|
||||
|
||||
class TestAgentContext:
|
||||
"""Test agent context and result dataclasses."""
|
||||
|
||||
def test_agent_context_creation(self):
|
||||
"""Test creating AgentContext."""
|
||||
from agents.base_agent import AgentContext
|
||||
|
||||
context = AgentContext(
|
||||
owner="testowner",
|
||||
repo="testrepo",
|
||||
event_type="issues",
|
||||
event_data={"action": "opened"},
|
||||
config={}
|
||||
)
|
||||
|
||||
assert context.owner == "testowner"
|
||||
assert context.repo == "testrepo"
|
||||
assert context.event_type == "issues"
|
||||
|
||||
def test_agent_result_creation(self):
|
||||
"""Test creating AgentResult."""
|
||||
from agents.base_agent import AgentResult
|
||||
|
||||
result = AgentResult(
|
||||
success=True,
|
||||
message="Test passed",
|
||||
data={"key": "value"},
|
||||
actions_taken=["action1", "action2"]
|
||||
)
|
||||
|
||||
assert result.success is True
|
||||
assert result.message == "Test passed"
|
||||
assert len(result.actions_taken) == 2
|
||||
|
||||
|
||||
class TestMetrics:
|
||||
"""Test metrics collection."""
|
||||
|
||||
def test_counter_increment(self):
|
||||
"""Test counter metrics."""
|
||||
from enterprise.metrics import Counter
|
||||
|
||||
counter = Counter("test_counter")
|
||||
assert counter.value == 0
|
||||
counter.inc()
|
||||
assert counter.value == 1
|
||||
counter.inc(5)
|
||||
assert counter.value == 6
|
||||
|
||||
def test_histogram_observation(self):
|
||||
"""Test histogram metrics."""
|
||||
from enterprise.metrics import Histogram
|
||||
|
||||
hist = Histogram("test_histogram")
|
||||
hist.observe(0.1)
|
||||
hist.observe(0.5)
|
||||
hist.observe(1.0)
|
||||
|
||||
assert hist.count == 3
|
||||
assert hist.sum == 1.6
|
||||
|
||||
def test_metrics_collector_summary(self):
|
||||
"""Test metrics collector summary."""
|
||||
from enterprise.metrics import MetricsCollector
|
||||
|
||||
collector = MetricsCollector()
|
||||
collector.record_request_start("TestAgent")
|
||||
collector.record_request_end("TestAgent", success=True, duration_seconds=0.5)
|
||||
|
||||
summary = collector.get_summary()
|
||||
assert summary["requests"]["total"] == 1
|
||||
assert summary["requests"]["success"] == 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
19
tools/ai-review/agents/__init__.py
Normal file
19
tools/ai-review/agents/__init__.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""AI Review Agents Package
|
||||
|
||||
This package contains the modular agent implementations for the
|
||||
enterprise AI code review system.
|
||||
"""
|
||||
|
||||
from agents.base_agent import BaseAgent, AgentContext, AgentResult
|
||||
from agents.issue_agent import IssueAgent
|
||||
from agents.pr_agent import PRAgent
|
||||
from agents.codebase_agent import CodebaseAgent
|
||||
|
||||
__all__ = [
|
||||
"BaseAgent",
|
||||
"AgentContext",
|
||||
"AgentResult",
|
||||
"IssueAgent",
|
||||
"PRAgent",
|
||||
"CodebaseAgent",
|
||||
]
|
||||
257
tools/ai-review/agents/base_agent.py
Normal file
257
tools/ai-review/agents/base_agent.py
Normal file
@@ -0,0 +1,257 @@
|
||||
"""Base Agent
|
||||
|
||||
Abstract base class for all AI agents. Provides common functionality
|
||||
for Gitea API interaction, LLM calls, logging, and rate limiting.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
from clients.gitea_client import GiteaClient
|
||||
from clients.llm_client import LLMClient, LLMResponse
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentContext:
|
||||
"""Context passed to agent during execution."""
|
||||
|
||||
owner: str
|
||||
repo: str
|
||||
event_type: str
|
||||
event_data: dict
|
||||
config: dict = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentResult:
|
||||
"""Result from agent execution."""
|
||||
|
||||
success: bool
|
||||
message: str
|
||||
data: dict = field(default_factory=dict)
|
||||
actions_taken: list[str] = field(default_factory=list)
|
||||
error: str | None = None
|
||||
|
||||
|
||||
class BaseAgent(ABC):
|
||||
"""Abstract base class for AI agents."""
|
||||
|
||||
# Marker to identify AI-generated comments
|
||||
AI_MARKER = "<!-- AI_CODE_REVIEW -->"
|
||||
|
||||
# Disclaimer text
|
||||
AI_DISCLAIMER = (
|
||||
"**Note:** This review was generated by an AI assistant. "
|
||||
"While it aims to be accurate and helpful, it may contain mistakes "
|
||||
"or miss important issues. Please verify all findings before taking action."
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: dict | None = None,
|
||||
gitea_client: GiteaClient | None = None,
|
||||
llm_client: LLMClient | None = None,
|
||||
):
|
||||
"""Initialize the base agent.
|
||||
|
||||
Args:
|
||||
config: Agent configuration dictionary.
|
||||
gitea_client: Optional pre-configured Gitea client.
|
||||
llm_client: Optional pre-configured LLM client.
|
||||
"""
|
||||
self.config = config or self._load_config()
|
||||
self.gitea = gitea_client or GiteaClient()
|
||||
self.llm = llm_client or LLMClient.from_config(self.config)
|
||||
self.logger = logging.getLogger(self.__class__.__name__)
|
||||
|
||||
# Rate limiting
|
||||
self._last_request_time = 0.0
|
||||
self._min_request_interval = 1.0 # seconds
|
||||
|
||||
@staticmethod
|
||||
def _load_config() -> dict:
|
||||
"""Load configuration from config.yml."""
|
||||
config_path = os.path.join(os.path.dirname(__file__), "..", "config.yml")
|
||||
if os.path.exists(config_path):
|
||||
with open(config_path) as f:
|
||||
return yaml.safe_load(f)
|
||||
return {}
|
||||
|
||||
def _rate_limit(self):
|
||||
"""Apply rate limiting between requests."""
|
||||
elapsed = time.time() - self._last_request_time
|
||||
if elapsed < self._min_request_interval:
|
||||
time.sleep(self._min_request_interval - elapsed)
|
||||
self._last_request_time = time.time()
|
||||
|
||||
def load_prompt(self, prompt_name: str) -> str:
|
||||
"""Load a prompt template from the prompts directory.
|
||||
|
||||
Args:
|
||||
prompt_name: Name of the prompt file (without extension).
|
||||
|
||||
Returns:
|
||||
Prompt template content.
|
||||
"""
|
||||
prompt_path = os.path.join(
|
||||
os.path.dirname(__file__), "..", "prompts", f"{prompt_name}.md"
|
||||
)
|
||||
if not os.path.exists(prompt_path):
|
||||
raise FileNotFoundError(f"Prompt not found: {prompt_path}")
|
||||
with open(prompt_path) as f:
|
||||
return f.read()
|
||||
|
||||
def call_llm(self, prompt: str, **kwargs) -> LLMResponse:
|
||||
"""Make a rate-limited call to the LLM.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to send.
|
||||
**kwargs: Additional LLM options.
|
||||
|
||||
Returns:
|
||||
LLM response.
|
||||
"""
|
||||
self._rate_limit()
|
||||
return self.llm.call(prompt, **kwargs)
|
||||
|
||||
def call_llm_json(self, prompt: str, **kwargs) -> dict:
|
||||
"""Make a rate-limited call and parse JSON response.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to send.
|
||||
**kwargs: Additional LLM options.
|
||||
|
||||
Returns:
|
||||
Parsed JSON response.
|
||||
"""
|
||||
self._rate_limit()
|
||||
return self.llm.call_json(prompt, **kwargs)
|
||||
|
||||
def find_ai_comment(
|
||||
self,
|
||||
owner: str,
|
||||
repo: str,
|
||||
issue_index: int,
|
||||
marker: str | None = None,
|
||||
) -> int | None:
|
||||
"""Find an existing AI comment by marker.
|
||||
|
||||
Args:
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
issue_index: Issue or PR number.
|
||||
marker: Custom marker to search for. Defaults to AI_MARKER.
|
||||
|
||||
Returns:
|
||||
Comment ID if found, None otherwise.
|
||||
"""
|
||||
marker = marker or self.AI_MARKER
|
||||
comments = self.gitea.list_issue_comments(owner, repo, issue_index)
|
||||
|
||||
for comment in comments:
|
||||
if marker in comment.get("body", ""):
|
||||
return comment["id"]
|
||||
|
||||
return None
|
||||
|
||||
def upsert_comment(
|
||||
self,
|
||||
owner: str,
|
||||
repo: str,
|
||||
issue_index: int,
|
||||
body: str,
|
||||
marker: str | None = None,
|
||||
) -> dict:
|
||||
"""Create or update an AI comment.
|
||||
|
||||
Args:
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
issue_index: Issue or PR number.
|
||||
body: Comment body (marker will be prepended if not present).
|
||||
marker: Custom marker. Defaults to AI_MARKER.
|
||||
|
||||
Returns:
|
||||
Created or updated comment.
|
||||
"""
|
||||
marker = marker or self.AI_MARKER
|
||||
|
||||
# Ensure marker is in the body
|
||||
if marker not in body:
|
||||
body = f"{marker}\n{body}"
|
||||
|
||||
# Check for existing comment
|
||||
existing_id = self.find_ai_comment(owner, repo, issue_index, marker)
|
||||
|
||||
if existing_id:
|
||||
return self.gitea.update_issue_comment(owner, repo, existing_id, body)
|
||||
else:
|
||||
return self.gitea.create_issue_comment(owner, repo, issue_index, body)
|
||||
|
||||
def format_with_disclaimer(self, content: str) -> str:
|
||||
"""Add AI disclaimer to content.
|
||||
|
||||
Args:
|
||||
content: The main content.
|
||||
|
||||
Returns:
|
||||
Content with disclaimer prepended.
|
||||
"""
|
||||
return f"{self.AI_DISCLAIMER}\n\n{self.AI_MARKER}\n{content}"
|
||||
|
||||
@abstractmethod
|
||||
def execute(self, context: AgentContext) -> AgentResult:
|
||||
"""Execute the agent's main task.
|
||||
|
||||
Args:
|
||||
context: Execution context with event data.
|
||||
|
||||
Returns:
|
||||
Result of the agent execution.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def can_handle(self, event_type: str, event_data: dict) -> bool:
|
||||
"""Check if this agent can handle the given event.
|
||||
|
||||
Args:
|
||||
event_type: Type of event (issue, pull_request, etc).
|
||||
event_data: Event payload data.
|
||||
|
||||
Returns:
|
||||
True if this agent can handle the event.
|
||||
"""
|
||||
pass
|
||||
|
||||
def run(self, context: AgentContext) -> AgentResult:
|
||||
"""Run the agent with error handling.
|
||||
|
||||
Args:
|
||||
context: Execution context.
|
||||
|
||||
Returns:
|
||||
Agent result, including any errors.
|
||||
"""
|
||||
try:
|
||||
self.logger.info(
|
||||
f"Running {self.__class__.__name__} for {context.owner}/{context.repo}"
|
||||
)
|
||||
result = self.execute(context)
|
||||
self.logger.info(
|
||||
f"Completed with success={result.success}: {result.message}"
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
self.logger.exception(f"Agent execution failed: {e}")
|
||||
return AgentResult(
|
||||
success=False,
|
||||
message="Agent execution failed",
|
||||
error=str(e),
|
||||
)
|
||||
470
tools/ai-review/agents/chat_agent.py
Normal file
470
tools/ai-review/agents/chat_agent.py
Normal file
@@ -0,0 +1,470 @@
|
||||
"""Chat Agent (Bartender)
|
||||
|
||||
Interactive AI chat agent with tool use capabilities.
|
||||
Can search the codebase and web to answer user questions.
|
||||
"""
|
||||
|
||||
import base64
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
import requests
|
||||
|
||||
from agents.base_agent import AgentContext, AgentResult, BaseAgent
|
||||
from clients.llm_client import ToolCall
|
||||
|
||||
|
||||
@dataclass
|
||||
class ChatMessage:
|
||||
"""A message in the chat conversation."""
|
||||
|
||||
role: str # 'user', 'assistant', or 'tool'
|
||||
content: str
|
||||
tool_call_id: str | None = None
|
||||
name: str | None = None # Tool name for tool responses
|
||||
|
||||
|
||||
class ChatAgent(BaseAgent):
|
||||
"""Interactive chat agent with tool capabilities."""
|
||||
|
||||
# Marker for chat responses
|
||||
CHAT_AI_MARKER = "<!-- AI_CHAT_RESPONSE -->"
|
||||
|
||||
# Tool definitions in OpenAI format
|
||||
TOOLS = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "search_codebase",
|
||||
"description": "Search the repository codebase for files, functions, classes, or patterns. Use this to find relevant code.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "Search query - can be a filename, function name, class name, or code pattern",
|
||||
},
|
||||
"file_pattern": {
|
||||
"type": "string",
|
||||
"description": "Optional file pattern to filter results (e.g., '*.py', 'src/*.js')",
|
||||
},
|
||||
},
|
||||
"required": ["query"],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "read_file",
|
||||
"description": "Read the contents of a specific file from the repository.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"filepath": {
|
||||
"type": "string",
|
||||
"description": "Path to the file to read",
|
||||
},
|
||||
},
|
||||
"required": ["filepath"],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "search_web",
|
||||
"description": "Search the web for information using SearXNG. Use this for external documentation, tutorials, or general knowledge.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "Search query",
|
||||
},
|
||||
"categories": {
|
||||
"type": "string",
|
||||
"description": "Optional: comma-separated categories (general, images, videos, news, science, it)",
|
||||
},
|
||||
},
|
||||
"required": ["query"],
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
# System prompt for the chat agent
|
||||
SYSTEM_PROMPT = """You are Bartender, a helpful AI assistant for code review and development tasks.
|
||||
|
||||
You have access to tools to help answer questions:
|
||||
- search_codebase: Search the repository for code, files, functions, or patterns
|
||||
- read_file: Read specific files from the repository
|
||||
- search_web: Search the web for documentation, tutorials, or external information
|
||||
|
||||
When helping users:
|
||||
1. Use tools to gather information before answering questions about code
|
||||
2. Be concise but thorough in your explanations
|
||||
3. Provide code examples when helpful
|
||||
4. If you're unsure, say so and suggest alternatives
|
||||
|
||||
Repository context: {owner}/{repo}
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._searxng_url = self.config.get("agents", {}).get("chat", {}).get(
|
||||
"searxng_url", os.environ.get("SEARXNG_URL", "")
|
||||
)
|
||||
|
||||
def can_handle(self, event_type: str, event_data: dict) -> bool:
|
||||
"""Check if this agent handles the given event."""
|
||||
agent_config = self.config.get("agents", {}).get("chat", {})
|
||||
if not agent_config.get("enabled", True):
|
||||
return False
|
||||
|
||||
# Handle issue comment with @ai-bot chat or just @ai-bot
|
||||
if event_type == "issue_comment":
|
||||
comment_body = event_data.get("comment", {}).get("body", "")
|
||||
mention_prefix = self.config.get("interaction", {}).get(
|
||||
"mention_prefix", "@ai-bot"
|
||||
)
|
||||
# Check if this is a chat request (any @ai-bot mention that isn't a specific command)
|
||||
if mention_prefix in comment_body:
|
||||
# Check it's not another specific command
|
||||
specific_commands = ["summarize", "explain", "suggest", "security", "codebase"]
|
||||
body_lower = comment_body.lower()
|
||||
for cmd in specific_commands:
|
||||
if f"{mention_prefix} {cmd}" in body_lower:
|
||||
return False
|
||||
return True
|
||||
|
||||
# Handle direct chat command
|
||||
if event_type == "chat":
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def execute(self, context: AgentContext) -> AgentResult:
|
||||
"""Execute the chat agent."""
|
||||
self.logger.info(f"Starting chat for {context.owner}/{context.repo}")
|
||||
|
||||
# Extract user message
|
||||
if context.event_type == "issue_comment":
|
||||
user_message = context.event_data.get("comment", {}).get("body", "")
|
||||
issue_index = context.event_data.get("issue", {}).get("number")
|
||||
# Remove the @ai-bot prefix
|
||||
mention_prefix = self.config.get("interaction", {}).get(
|
||||
"mention_prefix", "@ai-bot"
|
||||
)
|
||||
user_message = user_message.replace(mention_prefix, "").strip()
|
||||
else:
|
||||
user_message = context.event_data.get("message", "")
|
||||
issue_index = context.event_data.get("issue_number")
|
||||
|
||||
if not user_message:
|
||||
return AgentResult(
|
||||
success=False,
|
||||
message="No message provided",
|
||||
)
|
||||
|
||||
# Build conversation
|
||||
system_prompt = self.SYSTEM_PROMPT.format(
|
||||
owner=context.owner,
|
||||
repo=context.repo,
|
||||
)
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_message},
|
||||
]
|
||||
|
||||
# Run the chat loop with tool execution
|
||||
response_content, tools_used = self._run_chat_loop(
|
||||
context, messages, max_iterations=5
|
||||
)
|
||||
|
||||
actions_taken = []
|
||||
if tools_used:
|
||||
actions_taken.append(f"Used tools: {', '.join(tools_used)}")
|
||||
|
||||
# Post response if this is an issue comment
|
||||
if issue_index:
|
||||
comment_body = self._format_response(response_content)
|
||||
self.upsert_comment(
|
||||
context.owner,
|
||||
context.repo,
|
||||
issue_index,
|
||||
comment_body,
|
||||
marker=self.CHAT_AI_MARKER,
|
||||
)
|
||||
actions_taken.append("Posted chat response")
|
||||
|
||||
return AgentResult(
|
||||
success=True,
|
||||
message="Chat completed",
|
||||
data={"response": response_content, "tools_used": tools_used},
|
||||
actions_taken=actions_taken,
|
||||
)
|
||||
|
||||
def _run_chat_loop(
|
||||
self,
|
||||
context: AgentContext,
|
||||
messages: list[dict],
|
||||
max_iterations: int = 5,
|
||||
) -> tuple[str, list[str]]:
|
||||
"""Run the chat loop with tool execution.
|
||||
|
||||
Returns:
|
||||
Tuple of (final response content, list of tools used)
|
||||
"""
|
||||
tools_used = []
|
||||
|
||||
for _ in range(max_iterations):
|
||||
self._rate_limit()
|
||||
response = self.llm.call_with_tools(messages, tools=self.TOOLS)
|
||||
|
||||
# If no tool calls, we're done
|
||||
if not response.tool_calls:
|
||||
return response.content, tools_used
|
||||
|
||||
# Add assistant message with tool calls
|
||||
messages.append({
|
||||
"role": "assistant",
|
||||
"content": response.content or "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": tc.id,
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": tc.name,
|
||||
"arguments": str(tc.arguments),
|
||||
},
|
||||
}
|
||||
for tc in response.tool_calls
|
||||
],
|
||||
})
|
||||
|
||||
# Execute each tool call
|
||||
for tool_call in response.tool_calls:
|
||||
tool_result = self._execute_tool(context, tool_call)
|
||||
tools_used.append(tool_call.name)
|
||||
|
||||
# Add tool result to messages
|
||||
messages.append({
|
||||
"role": "tool",
|
||||
"tool_call_id": tool_call.id,
|
||||
"content": tool_result,
|
||||
})
|
||||
|
||||
# If we hit max iterations, make one final call without tools
|
||||
self._rate_limit()
|
||||
final_response = self.llm.call_with_tools(
|
||||
messages, tools=None, tool_choice="none"
|
||||
)
|
||||
return final_response.content, tools_used
|
||||
|
||||
def _execute_tool(self, context: AgentContext, tool_call: ToolCall) -> str:
|
||||
"""Execute a tool call and return the result."""
|
||||
self.logger.info(f"Executing tool: {tool_call.name}")
|
||||
|
||||
try:
|
||||
if tool_call.name == "search_codebase":
|
||||
return self._tool_search_codebase(
|
||||
context,
|
||||
tool_call.arguments.get("query", ""),
|
||||
tool_call.arguments.get("file_pattern"),
|
||||
)
|
||||
elif tool_call.name == "read_file":
|
||||
return self._tool_read_file(
|
||||
context,
|
||||
tool_call.arguments.get("filepath", ""),
|
||||
)
|
||||
elif tool_call.name == "search_web":
|
||||
return self._tool_search_web(
|
||||
tool_call.arguments.get("query", ""),
|
||||
tool_call.arguments.get("categories"),
|
||||
)
|
||||
else:
|
||||
return f"Unknown tool: {tool_call.name}"
|
||||
except Exception as e:
|
||||
self.logger.error(f"Tool execution failed: {e}")
|
||||
return f"Error executing tool: {e}"
|
||||
|
||||
def _tool_search_codebase(
|
||||
self,
|
||||
context: AgentContext,
|
||||
query: str,
|
||||
file_pattern: str | None = None,
|
||||
) -> str:
|
||||
"""Search the codebase for files matching a query."""
|
||||
results = []
|
||||
|
||||
# Get repository file list
|
||||
try:
|
||||
files = self._collect_files(context.owner, context.repo, file_pattern)
|
||||
except Exception as e:
|
||||
return f"Error listing files: {e}"
|
||||
|
||||
query_lower = query.lower()
|
||||
|
||||
# Search through files
|
||||
for file_info in files[:50]: # Limit to prevent API exhaustion
|
||||
filepath = file_info.get("path", "")
|
||||
|
||||
# Check filename match
|
||||
if query_lower in filepath.lower():
|
||||
results.append(f"File: {filepath}")
|
||||
continue
|
||||
|
||||
# Check content for code patterns
|
||||
try:
|
||||
content_data = self.gitea.get_file_contents(
|
||||
context.owner, context.repo, filepath
|
||||
)
|
||||
if content_data.get("content"):
|
||||
content = base64.b64decode(content_data["content"]).decode(
|
||||
"utf-8", errors="ignore"
|
||||
)
|
||||
|
||||
# Search for query in content
|
||||
lines = content.splitlines()
|
||||
matching_lines = []
|
||||
for i, line in enumerate(lines, 1):
|
||||
if query_lower in line.lower():
|
||||
matching_lines.append(f" L{i}: {line.strip()[:100]}")
|
||||
|
||||
if matching_lines:
|
||||
results.append(f"File: {filepath}")
|
||||
results.extend(matching_lines[:5]) # Max 5 matches per file
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not results:
|
||||
return f"No results found for '{query}'"
|
||||
|
||||
return "\n".join(results[:30]) # Limit total results
|
||||
|
||||
def _collect_files(
|
||||
self,
|
||||
owner: str,
|
||||
repo: str,
|
||||
file_pattern: str | None = None,
|
||||
) -> list[dict]:
|
||||
"""Collect files from the repository."""
|
||||
files = []
|
||||
|
||||
# Code extensions to search
|
||||
code_extensions = {
|
||||
".py", ".js", ".ts", ".go", ".rs", ".java", ".rb",
|
||||
".php", ".c", ".cpp", ".h", ".cs", ".swift", ".kt",
|
||||
".md", ".yml", ".yaml", ".json", ".toml",
|
||||
}
|
||||
|
||||
# Patterns to ignore
|
||||
ignore_patterns = [
|
||||
"node_modules/", "vendor/", ".git/", "__pycache__/",
|
||||
".venv/", "dist/", "build/", ".min.js", ".min.css",
|
||||
]
|
||||
|
||||
def traverse(path: str = ""):
|
||||
try:
|
||||
contents = self.gitea.get_file_contents(owner, repo, path or ".")
|
||||
if isinstance(contents, list):
|
||||
for item in contents:
|
||||
item_path = item.get("path", "")
|
||||
|
||||
if any(p in item_path for p in ignore_patterns):
|
||||
continue
|
||||
|
||||
if item.get("type") == "file":
|
||||
ext = os.path.splitext(item_path)[1]
|
||||
if ext in code_extensions:
|
||||
# Check file pattern if provided
|
||||
if file_pattern:
|
||||
if not self._match_pattern(item_path, file_pattern):
|
||||
continue
|
||||
files.append(item)
|
||||
elif item.get("type") == "dir":
|
||||
traverse(item_path)
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Failed to list {path}: {e}")
|
||||
|
||||
traverse()
|
||||
return files[:100] # Limit to prevent API exhaustion
|
||||
|
||||
def _match_pattern(self, filepath: str, pattern: str) -> bool:
|
||||
"""Check if filepath matches a simple glob pattern."""
|
||||
import fnmatch
|
||||
return fnmatch.fnmatch(filepath, pattern)
|
||||
|
||||
def _tool_read_file(self, context: AgentContext, filepath: str) -> str:
|
||||
"""Read a file from the repository."""
|
||||
try:
|
||||
content_data = self.gitea.get_file_contents(
|
||||
context.owner, context.repo, filepath
|
||||
)
|
||||
if content_data.get("content"):
|
||||
content = base64.b64decode(content_data["content"]).decode(
|
||||
"utf-8", errors="ignore"
|
||||
)
|
||||
# Truncate if too long
|
||||
if len(content) > 8000:
|
||||
content = content[:8000] + "\n... (truncated)"
|
||||
return f"File: {filepath}\n\n```\n{content}\n```"
|
||||
return f"File not found: {filepath}"
|
||||
except Exception as e:
|
||||
return f"Error reading file: {e}"
|
||||
|
||||
def _tool_search_web(
|
||||
self,
|
||||
query: str,
|
||||
categories: str | None = None,
|
||||
) -> str:
|
||||
"""Search the web using SearXNG."""
|
||||
if not self._searxng_url:
|
||||
return "Web search is not configured. Set SEARXNG_URL environment variable."
|
||||
|
||||
try:
|
||||
params = {
|
||||
"q": query,
|
||||
"format": "json",
|
||||
}
|
||||
if categories:
|
||||
params["categories"] = categories
|
||||
|
||||
response = requests.get(
|
||||
f"{self._searxng_url}/search",
|
||||
params=params,
|
||||
timeout=30,
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
results = data.get("results", [])
|
||||
if not results:
|
||||
return f"No web results found for '{query}'"
|
||||
|
||||
# Format results
|
||||
output = []
|
||||
for i, result in enumerate(results[:5], 1): # Top 5 results
|
||||
title = result.get("title", "No title")
|
||||
url = result.get("url", "")
|
||||
content = result.get("content", "")[:200]
|
||||
output.append(f"{i}. **{title}**\n {url}\n {content}")
|
||||
|
||||
return "\n\n".join(output)
|
||||
except requests.exceptions.RequestException as e:
|
||||
return f"Web search failed: {e}"
|
||||
|
||||
def _format_response(self, content: str) -> str:
|
||||
"""Format the chat response with disclaimer."""
|
||||
lines = [
|
||||
f"{self.AI_DISCLAIMER}",
|
||||
"",
|
||||
"---",
|
||||
"",
|
||||
content,
|
||||
]
|
||||
return "\n".join(lines)
|
||||
457
tools/ai-review/agents/codebase_agent.py
Normal file
457
tools/ai-review/agents/codebase_agent.py
Normal file
@@ -0,0 +1,457 @@
|
||||
"""Codebase Quality Agent
|
||||
|
||||
AI agent for analyzing overall codebase health, architecture,
|
||||
technical debt, and documentation coverage.
|
||||
"""
|
||||
|
||||
import base64
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from agents.base_agent import AgentContext, AgentResult, BaseAgent
|
||||
|
||||
|
||||
@dataclass
|
||||
class CodebaseMetrics:
|
||||
"""Metrics collected from codebase analysis."""
|
||||
|
||||
total_files: int = 0
|
||||
total_lines: int = 0
|
||||
languages: dict = field(default_factory=dict)
|
||||
todo_count: int = 0
|
||||
fixme_count: int = 0
|
||||
deprecated_count: int = 0
|
||||
missing_docstrings: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class CodebaseReport:
|
||||
"""Complete codebase analysis report."""
|
||||
|
||||
summary: str
|
||||
health_score: float # 0-100
|
||||
metrics: CodebaseMetrics
|
||||
issues: list[dict]
|
||||
recommendations: list[str]
|
||||
architecture_notes: list[str]
|
||||
|
||||
|
||||
class CodebaseAgent(BaseAgent):
|
||||
"""Agent for codebase quality analysis."""
|
||||
|
||||
# Marker for codebase reports
|
||||
CODEBASE_AI_MARKER = "<!-- AI_CODEBASE_REVIEW -->"
|
||||
|
||||
# File extensions to analyze
|
||||
CODE_EXTENSIONS = {
|
||||
".py": "Python",
|
||||
".js": "JavaScript",
|
||||
".ts": "TypeScript",
|
||||
".go": "Go",
|
||||
".rs": "Rust",
|
||||
".java": "Java",
|
||||
".rb": "Ruby",
|
||||
".php": "PHP",
|
||||
".c": "C",
|
||||
".cpp": "C++",
|
||||
".h": "C/C++ Header",
|
||||
".cs": "C#",
|
||||
".swift": "Swift",
|
||||
".kt": "Kotlin",
|
||||
}
|
||||
|
||||
# Files to ignore
|
||||
IGNORE_PATTERNS = [
|
||||
"node_modules/",
|
||||
"vendor/",
|
||||
".git/",
|
||||
"__pycache__/",
|
||||
".venv/",
|
||||
"dist/",
|
||||
"build/",
|
||||
".min.js",
|
||||
".min.css",
|
||||
]
|
||||
|
||||
def can_handle(self, event_type: str, event_data: dict) -> bool:
|
||||
"""Check if this agent handles the given event."""
|
||||
agent_config = self.config.get("agents", {}).get("codebase", {})
|
||||
if not agent_config.get("enabled", True):
|
||||
return False
|
||||
|
||||
# Handle manual trigger via workflow_dispatch or schedule
|
||||
if event_type in ("workflow_dispatch", "schedule"):
|
||||
return True
|
||||
|
||||
# Handle special issue command
|
||||
if event_type == "issue_comment":
|
||||
comment_body = event_data.get("comment", {}).get("body", "")
|
||||
mention_prefix = self.config.get("interaction", {}).get(
|
||||
"mention_prefix", "@ai-bot"
|
||||
)
|
||||
if f"{mention_prefix} codebase" in comment_body.lower():
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def execute(self, context: AgentContext) -> AgentResult:
|
||||
"""Execute codebase analysis."""
|
||||
self.logger.info(f"Starting codebase analysis for {context.owner}/{context.repo}")
|
||||
|
||||
actions_taken = []
|
||||
|
||||
# Step 1: Collect file list from repository
|
||||
files = self._collect_files(context.owner, context.repo)
|
||||
self.logger.info(f"Found {len(files)} files to analyze")
|
||||
|
||||
# Step 2: Analyze metrics
|
||||
metrics = self._analyze_metrics(context.owner, context.repo, files)
|
||||
actions_taken.append(f"Analyzed {metrics.total_files} files")
|
||||
|
||||
# Step 3: Run AI analysis on key files
|
||||
report = self._run_ai_analysis(context, files, metrics)
|
||||
actions_taken.append("Generated AI analysis report")
|
||||
|
||||
# Step 4: Create or update report issue
|
||||
issue_number = self._create_report_issue(context, report)
|
||||
actions_taken.append(f"Created/updated report issue #{issue_number}")
|
||||
|
||||
return AgentResult(
|
||||
success=True,
|
||||
message=f"Codebase analysis complete - Health Score: {report.health_score:.0f}/100",
|
||||
data={
|
||||
"health_score": report.health_score,
|
||||
"total_files": metrics.total_files,
|
||||
"issues_found": len(report.issues),
|
||||
},
|
||||
actions_taken=actions_taken,
|
||||
)
|
||||
|
||||
def _collect_files(self, owner: str, repo: str) -> list[dict]:
|
||||
"""Collect list of files from the repository."""
|
||||
files = []
|
||||
|
||||
def traverse(path: str = ""):
|
||||
try:
|
||||
contents = self.gitea.get_file_contents(owner, repo, path or ".")
|
||||
if isinstance(contents, list):
|
||||
for item in contents:
|
||||
item_path = item.get("path", "")
|
||||
|
||||
# Skip ignored patterns
|
||||
if any(p in item_path for p in self.IGNORE_PATTERNS):
|
||||
continue
|
||||
|
||||
if item.get("type") == "file":
|
||||
ext = os.path.splitext(item_path)[1]
|
||||
if ext in self.CODE_EXTENSIONS:
|
||||
files.append(item)
|
||||
elif item.get("type") == "dir":
|
||||
traverse(item_path)
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Failed to list {path}: {e}")
|
||||
|
||||
traverse()
|
||||
return files[:100] # Limit to prevent API exhaustion
|
||||
|
||||
def _analyze_metrics(
|
||||
self,
|
||||
owner: str,
|
||||
repo: str,
|
||||
files: list[dict],
|
||||
) -> CodebaseMetrics:
|
||||
"""Analyze metrics from files."""
|
||||
metrics = CodebaseMetrics()
|
||||
metrics.total_files = len(files)
|
||||
|
||||
for file_info in files[:50]: # Analyze top 50 files
|
||||
filepath = file_info.get("path", "")
|
||||
ext = os.path.splitext(filepath)[1]
|
||||
lang = self.CODE_EXTENSIONS.get(ext, "Unknown")
|
||||
|
||||
metrics.languages[lang] = metrics.languages.get(lang, 0) + 1
|
||||
|
||||
try:
|
||||
content_data = self.gitea.get_file_contents(owner, repo, filepath)
|
||||
if content_data.get("content"):
|
||||
content = base64.b64decode(content_data["content"]).decode(
|
||||
"utf-8", errors="ignore"
|
||||
)
|
||||
lines = content.splitlines()
|
||||
metrics.total_lines += len(lines)
|
||||
|
||||
# Count markers
|
||||
for line in lines:
|
||||
line_upper = line.upper()
|
||||
if "TODO" in line_upper:
|
||||
metrics.todo_count += 1
|
||||
if "FIXME" in line_upper:
|
||||
metrics.fixme_count += 1
|
||||
if "DEPRECATED" in line_upper:
|
||||
metrics.deprecated_count += 1
|
||||
|
||||
# Check for docstrings (Python)
|
||||
if ext == ".py":
|
||||
if 'def ' in content and '"""' not in content:
|
||||
metrics.missing_docstrings += 1
|
||||
|
||||
except Exception as e:
|
||||
self.logger.debug(f"Could not analyze {filepath}: {e}")
|
||||
|
||||
return metrics
|
||||
|
||||
def _run_ai_analysis(
|
||||
self,
|
||||
context: AgentContext,
|
||||
files: list[dict],
|
||||
metrics: CodebaseMetrics,
|
||||
) -> CodebaseReport:
|
||||
"""Run AI analysis on the codebase."""
|
||||
# Prepare context for AI
|
||||
file_list = "\n".join([f"- {f.get('path', '')}" for f in files[:30]])
|
||||
language_breakdown = "\n".join(
|
||||
[f"- {lang}: {count} files" for lang, count in metrics.languages.items()]
|
||||
)
|
||||
|
||||
# Sample some key files for deeper analysis
|
||||
key_files_content = self._get_key_files_content(
|
||||
context.owner, context.repo, files
|
||||
)
|
||||
|
||||
prompt = f"""Analyze this codebase and provide a comprehensive quality assessment.
|
||||
|
||||
## Repository: {context.owner}/{context.repo}
|
||||
|
||||
## Metrics
|
||||
- Total Files: {metrics.total_files}
|
||||
- Total Lines: {metrics.total_lines}
|
||||
- TODO Comments: {metrics.todo_count}
|
||||
- FIXME Comments: {metrics.fixme_count}
|
||||
- Deprecated Markers: {metrics.deprecated_count}
|
||||
|
||||
## Language Breakdown
|
||||
{language_breakdown}
|
||||
|
||||
## File Structure (sample)
|
||||
{file_list}
|
||||
|
||||
## Key Files Content
|
||||
{key_files_content}
|
||||
|
||||
## Analysis Required
|
||||
|
||||
Provide your analysis as JSON with this structure:
|
||||
```json
|
||||
{{
|
||||
"summary": "Overall assessment in 2-3 sentences",
|
||||
"health_score": 0-100,
|
||||
"issues": [
|
||||
{{
|
||||
"severity": "HIGH|MEDIUM|LOW",
|
||||
"category": "Architecture|Code Quality|Security|Testing|Documentation",
|
||||
"description": "Issue description",
|
||||
"recommendation": "How to fix"
|
||||
}}
|
||||
],
|
||||
"recommendations": ["Top 3-5 actionable recommendations"],
|
||||
"architecture_notes": ["Observations about code structure and patterns"]
|
||||
}}
|
||||
```
|
||||
|
||||
Be constructive and actionable. Focus on the most impactful improvements.
|
||||
"""
|
||||
|
||||
try:
|
||||
result = self.call_llm_json(prompt)
|
||||
return CodebaseReport(
|
||||
summary=result.get("summary", "Analysis complete"),
|
||||
health_score=float(result.get("health_score", 50)),
|
||||
metrics=metrics,
|
||||
issues=result.get("issues", []),
|
||||
recommendations=result.get("recommendations", []),
|
||||
architecture_notes=result.get("architecture_notes", []),
|
||||
)
|
||||
except Exception as e:
|
||||
self.logger.error(f"AI analysis failed: {e}")
|
||||
# Try to log the raw response if possible (requires accessing the last response)
|
||||
# Since we don't have direct access here, we rely on having good logging in LLMClient if needed.
|
||||
# But let's add a note to the summary.
|
||||
# Calculate basic health score from metrics
|
||||
health_score = 70
|
||||
if metrics.todo_count > 10:
|
||||
health_score -= 10
|
||||
if metrics.fixme_count > 5:
|
||||
health_score -= 10
|
||||
|
||||
return CodebaseReport(
|
||||
summary=f"Basic analysis complete (AI unavailable: {e})",
|
||||
health_score=health_score,
|
||||
metrics=metrics,
|
||||
issues=[],
|
||||
recommendations=["Manual review recommended"],
|
||||
architecture_notes=[],
|
||||
)
|
||||
|
||||
def _get_key_files_content(
|
||||
self,
|
||||
owner: str,
|
||||
repo: str,
|
||||
files: list[dict],
|
||||
) -> str:
|
||||
"""Get content of key files for AI analysis."""
|
||||
key_file_names = [
|
||||
"README.md",
|
||||
"setup.py",
|
||||
"pyproject.toml",
|
||||
"package.json",
|
||||
"Cargo.toml",
|
||||
"go.mod",
|
||||
"Makefile",
|
||||
"Dockerfile",
|
||||
]
|
||||
|
||||
content_parts = []
|
||||
|
||||
for file_info in files:
|
||||
filepath = file_info.get("path", "")
|
||||
filename = os.path.basename(filepath)
|
||||
|
||||
if filename in key_file_names:
|
||||
try:
|
||||
content_data = self.gitea.get_file_contents(owner, repo, filepath)
|
||||
if content_data.get("content"):
|
||||
content = base64.b64decode(content_data["content"]).decode(
|
||||
"utf-8", errors="ignore"
|
||||
)
|
||||
# Truncate long files
|
||||
if len(content) > 2000:
|
||||
content = content[:2000] + "\n... (truncated)"
|
||||
content_parts.append(f"### {filepath}\n```\n{content}\n```")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return "\n\n".join(content_parts[:5]) or "No key configuration files found."
|
||||
|
||||
def _create_report_issue(
|
||||
self,
|
||||
context: AgentContext,
|
||||
report: CodebaseReport,
|
||||
) -> int:
|
||||
"""Create or update a report issue."""
|
||||
# Generate issue body
|
||||
body = self._generate_report_body(report)
|
||||
|
||||
# Look for existing report issue
|
||||
try:
|
||||
issues = self.gitea.list_issues(
|
||||
context.owner, context.repo, state="open", labels=["ai-codebase-report"]
|
||||
)
|
||||
for issue in issues:
|
||||
if self.CODEBASE_AI_MARKER in issue.get("body", ""):
|
||||
# Update existing issue body
|
||||
self.gitea.update_issue(
|
||||
context.owner,
|
||||
context.repo,
|
||||
issue["number"],
|
||||
body=body,
|
||||
)
|
||||
return issue["number"]
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Failed to check for existing report: {e}")
|
||||
|
||||
# Create new issue
|
||||
try:
|
||||
# Check for label ID
|
||||
labels = []
|
||||
try:
|
||||
repo_labels = self.gitea.get_repo_labels(context.owner, context.repo)
|
||||
for label in repo_labels:
|
||||
if label["name"] == "ai-codebase-report":
|
||||
labels.append(label["id"])
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
issue = self.gitea.create_issue(
|
||||
context.owner,
|
||||
context.repo,
|
||||
title=f"AI Codebase Report - {context.repo}",
|
||||
body=body,
|
||||
labels=labels,
|
||||
)
|
||||
return issue["number"]
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to create report issue: {e}")
|
||||
return 0
|
||||
|
||||
def _generate_report_body(self, report: CodebaseReport) -> str:
|
||||
"""Generate the report issue body."""
|
||||
health_emoji = "🟢" if report.health_score >= 80 else ("🟡" if report.health_score >= 60 else "🔴")
|
||||
|
||||
lines = [
|
||||
f"{self.AI_DISCLAIMER}",
|
||||
"",
|
||||
"# AI Codebase Quality Report",
|
||||
"",
|
||||
f"## Health Score: {report.health_score:.0f}/100",
|
||||
"",
|
||||
report.summary,
|
||||
"",
|
||||
"---",
|
||||
"",
|
||||
"## Metrics",
|
||||
"",
|
||||
"| Metric | Value |",
|
||||
"|--------|-------|",
|
||||
f"| Total Files | {report.metrics.total_files} |",
|
||||
f"| Total Lines | {report.metrics.total_lines:,} |",
|
||||
f"| TODO Comments | {report.metrics.todo_count} |",
|
||||
f"| FIXME Comments | {report.metrics.fixme_count} |",
|
||||
f"| Deprecated | {report.metrics.deprecated_count} |",
|
||||
"",
|
||||
]
|
||||
|
||||
# Languages
|
||||
if report.metrics.languages:
|
||||
lines.append("### Languages")
|
||||
lines.append("")
|
||||
for lang, count in sorted(
|
||||
report.metrics.languages.items(), key=lambda x: -x[1]
|
||||
):
|
||||
lines.append(f"- **{lang}**: {count} files")
|
||||
lines.append("")
|
||||
|
||||
# Issues
|
||||
if report.issues:
|
||||
lines.append("## Issues Found")
|
||||
lines.append("")
|
||||
for issue in report.issues[:10]:
|
||||
severity = issue.get("severity", "MEDIUM")
|
||||
emoji = "🔴" if severity == "HIGH" else ("🟡" if severity == "MEDIUM" else "🟢")
|
||||
lines.append(f"### [{severity}] {issue.get('category', 'General')}")
|
||||
lines.append("")
|
||||
lines.append(issue.get("description", ""))
|
||||
lines.append("")
|
||||
lines.append(f"**Recommendation:** {issue.get('recommendation', '')}")
|
||||
lines.append("")
|
||||
|
||||
# Recommendations
|
||||
if report.recommendations:
|
||||
lines.append("## Recommendations")
|
||||
lines.append("")
|
||||
for i, rec in enumerate(report.recommendations[:5], 1):
|
||||
lines.append(f"{i}. {rec}")
|
||||
lines.append("")
|
||||
|
||||
# Architecture notes
|
||||
if report.architecture_notes:
|
||||
lines.append("## Architecture Notes")
|
||||
lines.append("")
|
||||
for note in report.architecture_notes[:5]:
|
||||
lines.append(f"- {note}")
|
||||
lines.append("")
|
||||
|
||||
lines.append("---")
|
||||
lines.append(f"*Generated by AI Codebase Agent*")
|
||||
|
||||
return "\n".join(lines)
|
||||
392
tools/ai-review/agents/issue_agent.py
Normal file
392
tools/ai-review/agents/issue_agent.py
Normal file
@@ -0,0 +1,392 @@
|
||||
"""Issue Review Agent
|
||||
|
||||
AI agent for triaging, labeling, and responding to issues.
|
||||
Handles issue.opened, issue.labeled, and issue_comment events.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
|
||||
from agents.base_agent import AgentContext, AgentResult, BaseAgent
|
||||
|
||||
|
||||
@dataclass
|
||||
class TriageResult:
|
||||
"""Result of issue triage analysis."""
|
||||
|
||||
issue_type: str
|
||||
priority: str
|
||||
confidence: float
|
||||
summary: str
|
||||
suggested_labels: list[str]
|
||||
is_duplicate: bool
|
||||
duplicate_of: int | None
|
||||
needs_more_info: bool
|
||||
missing_info: list[str]
|
||||
components: list[str]
|
||||
reasoning: str
|
||||
|
||||
|
||||
class IssueAgent(BaseAgent):
|
||||
"""Agent for handling issue events."""
|
||||
|
||||
# Marker specific to issue comments
|
||||
ISSUE_AI_MARKER = "<!-- AI_ISSUE_TRIAGE -->"
|
||||
|
||||
def can_handle(self, event_type: str, event_data: dict) -> bool:
|
||||
"""Check if this agent handles the given event."""
|
||||
# Check if agent is enabled
|
||||
agent_config = self.config.get("agents", {}).get("issue", {})
|
||||
if not agent_config.get("enabled", True):
|
||||
return False
|
||||
|
||||
# Handle issue events
|
||||
if event_type == "issues":
|
||||
action = event_data.get("action", "")
|
||||
allowed_events = agent_config.get("events", ["opened", "labeled"])
|
||||
if action not in allowed_events:
|
||||
return False
|
||||
|
||||
# Ignore our own codebase reports to prevent double-commenting
|
||||
issue = event_data.get("issue", {})
|
||||
title = issue.get("title", "")
|
||||
labels = [l.get("name") for l in issue.get("labels", [])]
|
||||
if "AI Codebase Report" in title or "ai-codebase-report" in labels:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
# Handle issue comment events (for @mentions)
|
||||
if event_type == "issue_comment":
|
||||
action = event_data.get("action", "")
|
||||
if action == "created":
|
||||
comment_body = event_data.get("comment", {}).get("body", "")
|
||||
mention_prefix = self.config.get("interaction", {}).get(
|
||||
"mention_prefix", "@ai-bot"
|
||||
)
|
||||
return mention_prefix in comment_body
|
||||
|
||||
return False
|
||||
|
||||
def execute(self, context: AgentContext) -> AgentResult:
|
||||
"""Execute the issue agent."""
|
||||
event_data = context.event_data
|
||||
action = event_data.get("action", "")
|
||||
|
||||
if context.event_type == "issues":
|
||||
if action == "opened":
|
||||
return self._handle_issue_opened(context)
|
||||
elif action == "labeled":
|
||||
return self._handle_issue_labeled(context)
|
||||
|
||||
if context.event_type == "issue_comment":
|
||||
return self._handle_issue_comment(context)
|
||||
|
||||
return AgentResult(
|
||||
success=False,
|
||||
message=f"Unknown action: {action}",
|
||||
)
|
||||
|
||||
def _handle_issue_opened(self, context: AgentContext) -> AgentResult:
|
||||
"""Handle a newly opened issue."""
|
||||
issue = context.event_data.get("issue", {})
|
||||
issue_index = issue.get("number")
|
||||
title = issue.get("title", "")
|
||||
body = issue.get("body", "")
|
||||
author = issue.get("user", {}).get("login", "unknown")
|
||||
existing_labels = [l.get("name", "") for l in issue.get("labels", [])]
|
||||
|
||||
self.logger.info(f"Triaging issue #{issue_index}: {title}")
|
||||
|
||||
# Step 1: Triage the issue
|
||||
triage = self._triage_issue(title, body, author, existing_labels)
|
||||
|
||||
actions_taken = []
|
||||
|
||||
# Step 2: Apply labels if auto-label is enabled
|
||||
agent_config = self.config.get("agents", {}).get("issue", {})
|
||||
if agent_config.get("auto_label", True):
|
||||
labels_applied = self._apply_labels(
|
||||
context.owner, context.repo, issue_index, triage
|
||||
)
|
||||
if labels_applied:
|
||||
actions_taken.append(f"Applied labels: {labels_applied}")
|
||||
|
||||
# Step 3: Post triage comment
|
||||
comment = self._generate_triage_comment(triage, issue)
|
||||
self.upsert_comment(
|
||||
context.owner,
|
||||
context.repo,
|
||||
issue_index,
|
||||
comment,
|
||||
marker=self.ISSUE_AI_MARKER,
|
||||
)
|
||||
actions_taken.append("Posted triage comment")
|
||||
|
||||
return AgentResult(
|
||||
success=True,
|
||||
message=f"Triaged issue #{issue_index} as {triage.issue_type} ({triage.priority} priority)",
|
||||
data={
|
||||
"triage": {
|
||||
"type": triage.issue_type,
|
||||
"priority": triage.priority,
|
||||
"confidence": triage.confidence,
|
||||
}
|
||||
},
|
||||
actions_taken=actions_taken,
|
||||
)
|
||||
|
||||
def _handle_issue_labeled(self, context: AgentContext) -> AgentResult:
|
||||
"""Handle label addition to an issue."""
|
||||
# Could be used for specific label-triggered actions
|
||||
issue = context.event_data.get("issue", {})
|
||||
label = context.event_data.get("label", {})
|
||||
|
||||
return AgentResult(
|
||||
success=True,
|
||||
message=f"Noted label '{label.get('name')}' added to issue #{issue.get('number')}",
|
||||
)
|
||||
|
||||
def _handle_issue_comment(self, context: AgentContext) -> AgentResult:
|
||||
"""Handle @mention in issue comment."""
|
||||
issue = context.event_data.get("issue", {})
|
||||
comment = context.event_data.get("comment", {})
|
||||
issue_index = issue.get("number")
|
||||
comment_body = comment.get("body", "")
|
||||
|
||||
# Parse command from mention
|
||||
command = self._parse_command(comment_body)
|
||||
|
||||
if command:
|
||||
response = self._handle_command(context, issue, command)
|
||||
self.gitea.create_issue_comment(
|
||||
context.owner, context.repo, issue_index, response
|
||||
)
|
||||
return AgentResult(
|
||||
success=True,
|
||||
message=f"Responded to command: {command}",
|
||||
actions_taken=["Posted command response"],
|
||||
)
|
||||
|
||||
return AgentResult(
|
||||
success=True,
|
||||
message="No actionable command found in mention",
|
||||
)
|
||||
|
||||
def _triage_issue(
|
||||
self,
|
||||
title: str,
|
||||
body: str,
|
||||
author: str,
|
||||
existing_labels: list[str],
|
||||
) -> TriageResult:
|
||||
"""Use LLM to triage the issue."""
|
||||
prompt_template = self.load_prompt("issue_triage")
|
||||
prompt = prompt_template.format(
|
||||
title=title,
|
||||
body=body or "(no description provided)",
|
||||
author=author,
|
||||
existing_labels=", ".join(existing_labels) if existing_labels else "none",
|
||||
)
|
||||
|
||||
try:
|
||||
result = self.call_llm_json(prompt)
|
||||
return TriageResult(
|
||||
issue_type=result.get("type", "question"),
|
||||
priority=result.get("priority", "medium"),
|
||||
confidence=result.get("confidence", 0.5),
|
||||
summary=result.get("summary", title),
|
||||
suggested_labels=result.get("suggested_labels", []),
|
||||
is_duplicate=result.get("is_duplicate", False),
|
||||
duplicate_of=result.get("duplicate_of"),
|
||||
needs_more_info=result.get("needs_more_info", False),
|
||||
missing_info=result.get("missing_info", []),
|
||||
components=result.get("components", []),
|
||||
reasoning=result.get("reasoning", ""),
|
||||
)
|
||||
except Exception as e:
|
||||
self.logger.warning(f"LLM triage failed: {e}")
|
||||
# Return default triage on failure
|
||||
return TriageResult(
|
||||
issue_type="question",
|
||||
priority="medium",
|
||||
confidence=0.3,
|
||||
summary=title,
|
||||
suggested_labels=[],
|
||||
is_duplicate=False,
|
||||
duplicate_of=None,
|
||||
needs_more_info=True,
|
||||
missing_info=["Unable to parse issue automatically"],
|
||||
components=[],
|
||||
reasoning="Automatic triage failed, needs human review",
|
||||
)
|
||||
|
||||
def _apply_labels(
|
||||
self,
|
||||
owner: str,
|
||||
repo: str,
|
||||
issue_index: int,
|
||||
triage: TriageResult,
|
||||
) -> list[str]:
|
||||
"""Apply labels based on triage result."""
|
||||
labels_config = self.config.get("labels", {})
|
||||
|
||||
# Get all repo labels
|
||||
try:
|
||||
repo_labels = self.gitea.get_repo_labels(owner, repo)
|
||||
label_map = {l["name"]: l["id"] for l in repo_labels}
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Failed to get repo labels: {e}")
|
||||
return []
|
||||
|
||||
labels_to_add = []
|
||||
|
||||
# Map priority
|
||||
priority_labels = labels_config.get("priority", {})
|
||||
priority_label = priority_labels.get(triage.priority)
|
||||
if priority_label and priority_label in label_map:
|
||||
labels_to_add.append(label_map[priority_label])
|
||||
|
||||
# Map type
|
||||
type_labels = labels_config.get("type", {})
|
||||
type_label = type_labels.get(triage.issue_type)
|
||||
if type_label and type_label in label_map:
|
||||
labels_to_add.append(label_map[type_label])
|
||||
|
||||
# Add AI reviewed label
|
||||
status_labels = labels_config.get("status", {})
|
||||
reviewed_label = status_labels.get("ai_reviewed")
|
||||
if reviewed_label and reviewed_label in label_map:
|
||||
labels_to_add.append(label_map[reviewed_label])
|
||||
|
||||
if labels_to_add:
|
||||
try:
|
||||
self.gitea.add_issue_labels(owner, repo, issue_index, labels_to_add)
|
||||
return [
|
||||
name for name, id in label_map.items() if id in labels_to_add
|
||||
]
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Failed to add labels: {e}")
|
||||
|
||||
return []
|
||||
|
||||
def _generate_triage_comment(self, triage: TriageResult, issue: dict) -> str:
|
||||
"""Generate a triage summary comment."""
|
||||
lines = [
|
||||
f"{self.AI_DISCLAIMER}",
|
||||
"",
|
||||
"## AI Issue Triage",
|
||||
"",
|
||||
f"| Field | Value |",
|
||||
f"|-------|--------|",
|
||||
f"| **Type** | {triage.issue_type.capitalize()} |",
|
||||
f"| **Priority** | {triage.priority.capitalize()} |",
|
||||
f"| **Confidence** | {triage.confidence:.0%} |",
|
||||
"",
|
||||
]
|
||||
|
||||
if triage.summary != issue.get("title"):
|
||||
lines.append(f"**Summary:** {triage.summary}")
|
||||
lines.append("")
|
||||
|
||||
if triage.components:
|
||||
lines.append(f"**Components:** {', '.join(triage.components)}")
|
||||
lines.append("")
|
||||
|
||||
if triage.needs_more_info and triage.missing_info:
|
||||
lines.append("### Additional Information Needed")
|
||||
lines.append("")
|
||||
for info in triage.missing_info:
|
||||
lines.append(f"- {info}")
|
||||
lines.append("")
|
||||
|
||||
if triage.is_duplicate and triage.duplicate_of:
|
||||
lines.append(f"### Possible Duplicate")
|
||||
lines.append(f"This issue may be a duplicate of #{triage.duplicate_of}")
|
||||
lines.append("")
|
||||
|
||||
lines.append("---")
|
||||
lines.append(f"*{triage.reasoning}*")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def _parse_command(self, body: str) -> str | None:
|
||||
"""Parse a command from a comment body."""
|
||||
mention_prefix = self.config.get("interaction", {}).get(
|
||||
"mention_prefix", "@ai-bot"
|
||||
)
|
||||
commands = self.config.get("interaction", {}).get(
|
||||
"commands", ["explain", "suggest", "security", "summarize"]
|
||||
)
|
||||
|
||||
for command in commands:
|
||||
if f"{mention_prefix} {command}" in body.lower():
|
||||
return command
|
||||
|
||||
return None
|
||||
|
||||
def _handle_command(self, context: AgentContext, issue: dict, command: str) -> str:
|
||||
"""Handle a command from an @mention."""
|
||||
title = issue.get("title", "")
|
||||
body = issue.get("body", "")
|
||||
|
||||
if command == "summarize":
|
||||
return self._command_summarize(title, body)
|
||||
elif command == "explain":
|
||||
return self._command_explain(title, body)
|
||||
elif command == "suggest":
|
||||
return self._command_suggest(title, body)
|
||||
|
||||
return f"{self.AI_DISCLAIMER}\n\nSorry, I don't understand the command `{command}`."
|
||||
|
||||
def _command_summarize(self, title: str, body: str) -> str:
|
||||
"""Generate a summary of the issue."""
|
||||
prompt = f"""Summarize the following issue in 2-3 concise sentences:
|
||||
|
||||
Title: {title}
|
||||
Body: {body}
|
||||
|
||||
Provide only the summary, no additional formatting."""
|
||||
|
||||
try:
|
||||
response = self.call_llm(prompt)
|
||||
return f"{self.AI_DISCLAIMER}\n\n**Summary:**\n{response.content}"
|
||||
except Exception as e:
|
||||
return f"{self.AI_DISCLAIMER}\n\nSorry, I was unable to generate a summary. Error: {e}"
|
||||
|
||||
def _command_explain(self, title: str, body: str) -> str:
|
||||
"""Explain the issue in more detail."""
|
||||
prompt = f"""Analyze this issue and provide a clear explanation of what the user is asking for or reporting:
|
||||
|
||||
Title: {title}
|
||||
Body: {body}
|
||||
|
||||
Provide:
|
||||
1. What the issue is about
|
||||
2. What the user expects
|
||||
3. Any technical context that might be relevant
|
||||
|
||||
Be concise and helpful."""
|
||||
|
||||
try:
|
||||
response = self.call_llm(prompt)
|
||||
return f"{self.AI_DISCLAIMER}\n\n**Explanation:**\n{response.content}"
|
||||
except Exception as e:
|
||||
return f"{self.AI_DISCLAIMER}\n\nSorry, I was unable to explain this issue. Error: {e}"
|
||||
|
||||
def _command_suggest(self, title: str, body: str) -> str:
|
||||
"""Suggest solutions for the issue."""
|
||||
prompt = f"""Based on this issue, suggest potential solutions or next steps:
|
||||
|
||||
Title: {title}
|
||||
Body: {body}
|
||||
|
||||
Provide 2-3 actionable suggestions. If this is a bug, suggest debugging steps. If this is a feature request, suggest implementation approaches.
|
||||
|
||||
Be practical and concise."""
|
||||
|
||||
try:
|
||||
response = self.call_llm(prompt)
|
||||
return f"{self.AI_DISCLAIMER}\n\n**Suggestions:**\n{response.content}"
|
||||
except Exception as e:
|
||||
return f"{self.AI_DISCLAIMER}\n\nSorry, I was unable to generate suggestions. Error: {e}"
|
||||
436
tools/ai-review/agents/pr_agent.py
Normal file
436
tools/ai-review/agents/pr_agent.py
Normal file
@@ -0,0 +1,436 @@
|
||||
"""Pull Request Review Agent
|
||||
|
||||
Enhanced AI agent for comprehensive PR reviews with inline comments,
|
||||
security scanning, and automatic label management.
|
||||
"""
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from agents.base_agent import AgentContext, AgentResult, BaseAgent
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReviewIssue:
|
||||
"""A single issue found in the PR."""
|
||||
|
||||
file: str
|
||||
line: int | None
|
||||
severity: str # HIGH, MEDIUM, LOW
|
||||
category: str # Security, Correctness, Performance, etc.
|
||||
description: str
|
||||
recommendation: str
|
||||
code_snippet: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class PRReviewResult:
|
||||
"""Result of a PR review."""
|
||||
|
||||
summary: str
|
||||
issues: list[ReviewIssue]
|
||||
overall_severity: str
|
||||
approval: bool
|
||||
security_issues: list[ReviewIssue] = field(default_factory=list)
|
||||
|
||||
|
||||
class PRAgent(BaseAgent):
|
||||
"""Agent for handling pull request reviews."""
|
||||
|
||||
# Marker specific to PR reviews
|
||||
PR_AI_MARKER = "<!-- AI_PR_REVIEW -->"
|
||||
|
||||
def can_handle(self, event_type: str, event_data: dict) -> bool:
|
||||
"""Check if this agent handles the given event."""
|
||||
# Check if agent is enabled
|
||||
agent_config = self.config.get("agents", {}).get("pr", {})
|
||||
if not agent_config.get("enabled", True):
|
||||
return False
|
||||
|
||||
if event_type == "pull_request":
|
||||
action = event_data.get("action", "")
|
||||
allowed_events = agent_config.get("events", ["opened", "synchronize"])
|
||||
return action in allowed_events
|
||||
|
||||
return False
|
||||
|
||||
def execute(self, context: AgentContext) -> AgentResult:
|
||||
"""Execute the PR review agent."""
|
||||
pr = context.event_data.get("pull_request", {})
|
||||
pr_number = pr.get("number")
|
||||
|
||||
self.logger.info(f"Reviewing PR #{pr_number}: {pr.get('title')}")
|
||||
|
||||
actions_taken = []
|
||||
|
||||
# Step 1: Get PR diff
|
||||
diff = self._get_diff(context.owner, context.repo, pr_number)
|
||||
if not diff.strip():
|
||||
return AgentResult(
|
||||
success=True,
|
||||
message="PR has no changes to review",
|
||||
)
|
||||
|
||||
# Step 2: Parse changed files
|
||||
changed_files = self._parse_diff_files(diff)
|
||||
|
||||
# Step 3: Run security scan if enabled
|
||||
security_issues = []
|
||||
agent_config = self.config.get("agents", {}).get("pr", {})
|
||||
if agent_config.get("security_scan", True):
|
||||
security_issues = self._run_security_scan(changed_files, diff)
|
||||
if security_issues:
|
||||
actions_taken.append(f"Found {len(security_issues)} security issues")
|
||||
|
||||
# Step 4: Run AI review
|
||||
review_result = self._run_ai_review(diff, context, security_issues)
|
||||
|
||||
# Step 5: Post inline comments if enabled
|
||||
if agent_config.get("inline_comments", True) and review_result.issues:
|
||||
inline_count = self._post_inline_comments(
|
||||
context.owner, context.repo, pr_number, review_result
|
||||
)
|
||||
actions_taken.append(f"Posted {inline_count} inline comments")
|
||||
|
||||
# Step 6: Post summary comment
|
||||
summary_comment = self._generate_summary_comment(review_result)
|
||||
self.upsert_comment(
|
||||
context.owner,
|
||||
context.repo,
|
||||
pr_number,
|
||||
summary_comment,
|
||||
marker=self.PR_AI_MARKER,
|
||||
)
|
||||
actions_taken.append("Posted summary comment")
|
||||
|
||||
# Step 7: Apply labels
|
||||
labels_applied = self._apply_review_labels(
|
||||
context.owner, context.repo, pr_number, review_result
|
||||
)
|
||||
if labels_applied:
|
||||
actions_taken.append(f"Applied labels: {labels_applied}")
|
||||
|
||||
return AgentResult(
|
||||
success=True,
|
||||
message=f"Reviewed PR #{pr_number}: {review_result.overall_severity} severity",
|
||||
data={
|
||||
"severity": review_result.overall_severity,
|
||||
"approval": review_result.approval,
|
||||
"issues_count": len(review_result.issues),
|
||||
"security_issues_count": len(review_result.security_issues),
|
||||
},
|
||||
actions_taken=actions_taken,
|
||||
)
|
||||
|
||||
def _get_diff(self, owner: str, repo: str, pr_number: int) -> str:
|
||||
"""Get the PR diff, truncated if necessary."""
|
||||
max_lines = self.config.get("review", {}).get("max_diff_lines", 800)
|
||||
|
||||
try:
|
||||
diff = self.gitea.get_pull_request_diff(owner, repo, pr_number)
|
||||
lines = diff.splitlines()
|
||||
if len(lines) > max_lines:
|
||||
return "\n".join(lines[:max_lines])
|
||||
return diff
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to get diff: {e}")
|
||||
return ""
|
||||
|
||||
def _parse_diff_files(self, diff: str) -> dict[str, str]:
|
||||
"""Parse diff into file -> content mapping."""
|
||||
files = {}
|
||||
current_file = None
|
||||
current_content = []
|
||||
|
||||
for line in diff.splitlines():
|
||||
if line.startswith("diff --git"):
|
||||
if current_file:
|
||||
files[current_file] = "\n".join(current_content)
|
||||
# Extract file path from "diff --git a/path b/path"
|
||||
match = re.search(r"b/(.+)$", line)
|
||||
if match:
|
||||
current_file = match.group(1)
|
||||
current_content = []
|
||||
elif current_file:
|
||||
current_content.append(line)
|
||||
|
||||
if current_file:
|
||||
files[current_file] = "\n".join(current_content)
|
||||
|
||||
return files
|
||||
|
||||
def _run_security_scan(
|
||||
self, changed_files: dict[str, str], diff: str
|
||||
) -> list[ReviewIssue]:
|
||||
"""Run security pattern scanning on the diff."""
|
||||
issues = []
|
||||
|
||||
# Security patterns to detect
|
||||
patterns = [
|
||||
{
|
||||
"name": "Hardcoded Secrets",
|
||||
"pattern": r'(?i)(api_key|apikey|secret|password|token|auth)\s*[=:]\s*["\'][^"\']{8,}["\']',
|
||||
"severity": "HIGH",
|
||||
"category": "Security",
|
||||
"description": "Potential hardcoded secret or API key detected",
|
||||
"recommendation": "Move secrets to environment variables or a secrets manager",
|
||||
},
|
||||
{
|
||||
"name": "SQL Injection",
|
||||
"pattern": r'(?i)(execute|query)\s*\([^)]*\+[^)]*\)|f["\'].*\{.*\}.*(?:SELECT|INSERT|UPDATE|DELETE)',
|
||||
"severity": "HIGH",
|
||||
"category": "Security",
|
||||
"description": "Potential SQL injection vulnerability - string concatenation in query",
|
||||
"recommendation": "Use parameterized queries or prepared statements",
|
||||
},
|
||||
{
|
||||
"name": "Hardcoded IP",
|
||||
"pattern": r'\b(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b',
|
||||
"severity": "LOW",
|
||||
"category": "Security",
|
||||
"description": "Hardcoded IP address detected",
|
||||
"recommendation": "Consider using configuration or DNS names instead",
|
||||
},
|
||||
{
|
||||
"name": "Eval Usage",
|
||||
"pattern": r'\beval\s*\(',
|
||||
"severity": "HIGH",
|
||||
"category": "Security",
|
||||
"description": "Use of eval() detected - potential code injection risk",
|
||||
"recommendation": "Avoid eval() - use safer alternatives like ast.literal_eval() for Python",
|
||||
},
|
||||
{
|
||||
"name": "Shell Injection",
|
||||
"pattern": r'(?i)(?:subprocess\.call|os\.system|shell\s*=\s*True)',
|
||||
"severity": "MEDIUM",
|
||||
"category": "Security",
|
||||
"description": "Potential shell command execution - verify input is sanitized",
|
||||
"recommendation": "Use subprocess with shell=False and pass arguments as a list",
|
||||
},
|
||||
]
|
||||
|
||||
for filename, content in changed_files.items():
|
||||
# Only check added lines (starting with +)
|
||||
added_lines = []
|
||||
line_numbers = []
|
||||
current_line = 0
|
||||
|
||||
for line in content.splitlines():
|
||||
if line.startswith("@@"):
|
||||
# Parse line number from @@ -x,y +a,b @@
|
||||
match = re.search(r"\+(\d+)", line)
|
||||
if match:
|
||||
current_line = int(match.group(1)) - 1
|
||||
elif line.startswith("+") and not line.startswith("+++"):
|
||||
current_line += 1
|
||||
added_lines.append((current_line, line[1:]))
|
||||
elif not line.startswith("-"):
|
||||
current_line += 1
|
||||
|
||||
# Check patterns on added lines
|
||||
for line_num, line_content in added_lines:
|
||||
for pattern_def in patterns:
|
||||
if re.search(pattern_def["pattern"], line_content):
|
||||
issues.append(
|
||||
ReviewIssue(
|
||||
file=filename,
|
||||
line=line_num,
|
||||
severity=pattern_def["severity"],
|
||||
category=pattern_def["category"],
|
||||
description=pattern_def["description"],
|
||||
recommendation=pattern_def["recommendation"],
|
||||
code_snippet=line_content.strip()[:100],
|
||||
)
|
||||
)
|
||||
|
||||
return issues
|
||||
|
||||
def _run_ai_review(
|
||||
self,
|
||||
diff: str,
|
||||
context: AgentContext,
|
||||
security_issues: list[ReviewIssue],
|
||||
) -> PRReviewResult:
|
||||
"""Run AI-based code review."""
|
||||
prompt_template = self.load_prompt("base")
|
||||
|
||||
# Add security context if issues were found
|
||||
security_context = ""
|
||||
if security_issues:
|
||||
security_context = "\n\nSECURITY SCAN RESULTS (already detected):\n"
|
||||
for issue in security_issues[:5]: # Limit to first 5
|
||||
security_context += f"- [{issue.severity}] {issue.file}:{issue.line} - {issue.description}\n"
|
||||
|
||||
prompt = f"{prompt_template}\n{security_context}\nDIFF:\n{diff}"
|
||||
|
||||
try:
|
||||
result = self.call_llm_json(prompt)
|
||||
|
||||
issues = []
|
||||
for issue_data in result.get("issues", []):
|
||||
issues.append(
|
||||
ReviewIssue(
|
||||
file=issue_data.get("file", "unknown"),
|
||||
line=issue_data.get("line"),
|
||||
severity=issue_data.get("severity", "MEDIUM"),
|
||||
category=issue_data.get("category", "General"),
|
||||
description=issue_data.get("description", ""),
|
||||
recommendation=issue_data.get("recommendation", ""),
|
||||
code_snippet=issue_data.get("code_snippet"),
|
||||
)
|
||||
)
|
||||
|
||||
return PRReviewResult(
|
||||
summary=result.get("summary", "Review completed"),
|
||||
issues=issues,
|
||||
overall_severity=result.get("overall_severity", "LOW"),
|
||||
approval=result.get("approval", True),
|
||||
security_issues=security_issues,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"AI review failed: {e}")
|
||||
return PRReviewResult(
|
||||
summary=f"AI review encountered an error: {e}",
|
||||
issues=[],
|
||||
overall_severity="UNKNOWN",
|
||||
approval=False,
|
||||
security_issues=security_issues,
|
||||
)
|
||||
|
||||
def _post_inline_comments(
|
||||
self,
|
||||
owner: str,
|
||||
repo: str,
|
||||
pr_number: int,
|
||||
review: PRReviewResult,
|
||||
) -> int:
|
||||
"""Post inline comments for issues with line numbers."""
|
||||
comments = []
|
||||
|
||||
all_issues = review.issues + review.security_issues
|
||||
for issue in all_issues:
|
||||
if issue.line and issue.file:
|
||||
comment_body = (
|
||||
f"**[{issue.severity}] {issue.category}**\n\n"
|
||||
f"{issue.description}\n\n"
|
||||
f"**Recommendation:** {issue.recommendation}"
|
||||
)
|
||||
comments.append(
|
||||
{
|
||||
"path": issue.file,
|
||||
"line": issue.line,
|
||||
"body": comment_body,
|
||||
}
|
||||
)
|
||||
|
||||
if not comments:
|
||||
return 0
|
||||
|
||||
try:
|
||||
# Use Gitea's pull request review API for inline comments
|
||||
self.gitea.create_pull_request_review(
|
||||
owner=owner,
|
||||
repo=repo,
|
||||
index=pr_number,
|
||||
body="AI Code Review - Inline Comments",
|
||||
event="COMMENT",
|
||||
comments=comments[:10], # Limit to 10 inline comments
|
||||
)
|
||||
return min(len(comments), 10)
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Failed to post inline comments: {e}")
|
||||
return 0
|
||||
|
||||
def _generate_summary_comment(self, review: PRReviewResult) -> str:
|
||||
"""Generate the summary comment for the PR."""
|
||||
lines = [
|
||||
f"{self.AI_DISCLAIMER}",
|
||||
"",
|
||||
"## AI Code Review",
|
||||
"",
|
||||
review.summary,
|
||||
"",
|
||||
]
|
||||
|
||||
# Statistics
|
||||
all_issues = review.issues + review.security_issues
|
||||
high = sum(1 for i in all_issues if i.severity == "HIGH")
|
||||
medium = sum(1 for i in all_issues if i.severity == "MEDIUM")
|
||||
low = sum(1 for i in all_issues if i.severity == "LOW")
|
||||
|
||||
lines.append("### Summary")
|
||||
lines.append("")
|
||||
lines.append(f"| Severity | Count |")
|
||||
lines.append(f"|----------|-------|")
|
||||
lines.append(f"| HIGH | {high} |")
|
||||
lines.append(f"| MEDIUM | {medium} |")
|
||||
lines.append(f"| LOW | {low} |")
|
||||
lines.append("")
|
||||
|
||||
# Security issues section
|
||||
if review.security_issues:
|
||||
lines.append("### Security Issues")
|
||||
lines.append("")
|
||||
for issue in review.security_issues[:5]:
|
||||
lines.append(f"- **[{issue.severity}]** `{issue.file}:{issue.line}` - {issue.description}")
|
||||
lines.append("")
|
||||
|
||||
# Other issues (limit display)
|
||||
other_issues = [i for i in review.issues if i not in review.security_issues]
|
||||
if other_issues:
|
||||
lines.append("### Review Findings")
|
||||
lines.append("")
|
||||
for issue in other_issues[:10]:
|
||||
loc = f"`{issue.file}:{issue.line}`" if issue.line else f"`{issue.file}`"
|
||||
lines.append(f"- **[{issue.severity}]** {loc} - {issue.description}")
|
||||
if len(other_issues) > 10:
|
||||
lines.append(f"- ...and {len(other_issues) - 10} more issues")
|
||||
lines.append("")
|
||||
|
||||
# Verdict
|
||||
lines.append("---")
|
||||
lines.append(f"**Overall Severity:** `{review.overall_severity}`")
|
||||
if review.approval:
|
||||
lines.append("**AI Recommendation:** Approve")
|
||||
else:
|
||||
lines.append("**AI Recommendation:** Changes Requested")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def _apply_review_labels(
|
||||
self,
|
||||
owner: str,
|
||||
repo: str,
|
||||
pr_number: int,
|
||||
review: PRReviewResult,
|
||||
) -> list[str]:
|
||||
"""Apply labels based on review result."""
|
||||
labels_config = self.config.get("labels", {}).get("status", {})
|
||||
|
||||
try:
|
||||
repo_labels = self.gitea.get_repo_labels(owner, repo)
|
||||
label_map = {l["name"]: l["id"] for l in repo_labels}
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Failed to get repo labels: {e}")
|
||||
return []
|
||||
|
||||
labels_to_add = []
|
||||
|
||||
# Add approval/changes required label
|
||||
if review.approval:
|
||||
label_name = labels_config.get("ai_approved", "ai-approved")
|
||||
else:
|
||||
label_name = labels_config.get("ai_changes_required", "ai-changes-required")
|
||||
|
||||
if label_name in label_map:
|
||||
labels_to_add.append(label_map[label_name])
|
||||
|
||||
if labels_to_add:
|
||||
try:
|
||||
self.gitea.add_issue_labels(owner, repo, pr_number, labels_to_add)
|
||||
return [name for name, id in label_map.items() if id in labels_to_add]
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Failed to add labels: {e}")
|
||||
|
||||
return []
|
||||
10
tools/ai-review/clients/__init__.py
Normal file
10
tools/ai-review/clients/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
"""API Clients Package
|
||||
|
||||
This package contains client wrappers for external services
|
||||
like Gitea API and LLM providers.
|
||||
"""
|
||||
|
||||
from clients.gitea_client import GiteaClient
|
||||
from clients.llm_client import LLMClient
|
||||
|
||||
__all__ = ["GiteaClient", "LLMClient"]
|
||||
447
tools/ai-review/clients/gitea_client.py
Normal file
447
tools/ai-review/clients/gitea_client.py
Normal file
@@ -0,0 +1,447 @@
|
||||
"""Gitea API Client
|
||||
|
||||
A unified client for interacting with the Gitea REST API.
|
||||
Provides methods for issues, pull requests, comments, and repository operations.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
class GiteaClient:
|
||||
"""Client for Gitea API operations."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_url: str | None = None,
|
||||
token: str | None = None,
|
||||
timeout: int = 30,
|
||||
):
|
||||
"""Initialize the Gitea client.
|
||||
|
||||
Args:
|
||||
api_url: Gitea API base URL. Defaults to AI_REVIEW_API_URL env var.
|
||||
token: API token. Defaults to AI_REVIEW_TOKEN env var.
|
||||
timeout: Request timeout in seconds.
|
||||
"""
|
||||
self.api_url = api_url or os.environ.get("AI_REVIEW_API_URL", "")
|
||||
self.token = token or os.environ.get("AI_REVIEW_TOKEN", "")
|
||||
self.timeout = timeout
|
||||
|
||||
if not self.api_url:
|
||||
raise ValueError("Gitea API URL is required")
|
||||
if not self.token:
|
||||
raise ValueError("Gitea API token is required")
|
||||
|
||||
self.headers = {
|
||||
"Authorization": f"token {self.token}",
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
|
||||
def _request(
|
||||
self,
|
||||
method: str,
|
||||
endpoint: str,
|
||||
json: dict | None = None,
|
||||
params: dict | None = None,
|
||||
) -> dict | list:
|
||||
"""Make an API request.
|
||||
|
||||
Args:
|
||||
method: HTTP method (GET, POST, PATCH, DELETE).
|
||||
endpoint: API endpoint (without base URL).
|
||||
json: Request body for POST/PATCH.
|
||||
params: Query parameters.
|
||||
|
||||
Returns:
|
||||
Response JSON data.
|
||||
|
||||
Raises:
|
||||
requests.HTTPError: If the request fails.
|
||||
"""
|
||||
url = f"{self.api_url}{endpoint}"
|
||||
response = requests.request(
|
||||
method=method,
|
||||
url=url,
|
||||
headers=self.headers,
|
||||
json=json,
|
||||
params=params,
|
||||
timeout=self.timeout,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
if response.status_code == 204:
|
||||
return {}
|
||||
return response.json()
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Issue Operations
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def create_issue(
|
||||
self,
|
||||
owner: str,
|
||||
repo: str,
|
||||
title: str,
|
||||
body: str,
|
||||
labels: list[int] | None = None,
|
||||
) -> dict:
|
||||
"""Create a new issue.
|
||||
|
||||
Args:
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
title: Issue title.
|
||||
body: Issue body.
|
||||
labels: Optional list of label IDs.
|
||||
|
||||
Returns:
|
||||
Created issue object.
|
||||
"""
|
||||
payload = {
|
||||
"title": title,
|
||||
"body": body,
|
||||
}
|
||||
if labels:
|
||||
payload["labels"] = labels
|
||||
|
||||
return self._request(
|
||||
"POST",
|
||||
f"/repos/{owner}/{repo}/issues",
|
||||
json=payload,
|
||||
)
|
||||
|
||||
def update_issue(
|
||||
self,
|
||||
owner: str,
|
||||
repo: str,
|
||||
index: int,
|
||||
title: str | None = None,
|
||||
body: str | None = None,
|
||||
state: str | None = None,
|
||||
) -> dict:
|
||||
"""Update an existing issue.
|
||||
|
||||
Args:
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
index: Issue number.
|
||||
title: New title.
|
||||
body: New body.
|
||||
state: New state (open, closed).
|
||||
|
||||
Returns:
|
||||
Updated issue object.
|
||||
"""
|
||||
payload = {}
|
||||
if title:
|
||||
payload["title"] = title
|
||||
if body:
|
||||
payload["body"] = body
|
||||
if state:
|
||||
payload["state"] = state
|
||||
|
||||
return self._request(
|
||||
"PATCH",
|
||||
f"/repos/{owner}/{repo}/issues/{index}",
|
||||
json=payload,
|
||||
)
|
||||
|
||||
def list_issues(
|
||||
self,
|
||||
owner: str,
|
||||
repo: str,
|
||||
state: str = "open",
|
||||
labels: list[str] | None = None,
|
||||
page: int = 1,
|
||||
limit: int = 30,
|
||||
) -> list[dict]:
|
||||
"""List issues in a repository.
|
||||
|
||||
Args:
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
state: Issue state (open, closed, all).
|
||||
labels: Filter by labels.
|
||||
page: Page number.
|
||||
limit: Items per page.
|
||||
|
||||
Returns:
|
||||
List of issue objects.
|
||||
"""
|
||||
params = {
|
||||
"state": state,
|
||||
"page": page,
|
||||
"limit": limit,
|
||||
}
|
||||
if labels:
|
||||
params["labels"] = ",".join(labels)
|
||||
|
||||
return self._request("GET", f"/repos/{owner}/{repo}/issues", params=params)
|
||||
|
||||
def get_issue(self, owner: str, repo: str, index: int) -> dict:
|
||||
"""Get a single issue.
|
||||
|
||||
Args:
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
index: Issue number.
|
||||
|
||||
Returns:
|
||||
Issue object.
|
||||
"""
|
||||
return self._request("GET", f"/repos/{owner}/{repo}/issues/{index}")
|
||||
|
||||
def create_issue_comment(
|
||||
self,
|
||||
owner: str,
|
||||
repo: str,
|
||||
index: int,
|
||||
body: str,
|
||||
) -> dict:
|
||||
"""Create a comment on an issue.
|
||||
|
||||
Args:
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
index: Issue number.
|
||||
body: Comment body.
|
||||
|
||||
Returns:
|
||||
Created comment object.
|
||||
"""
|
||||
return self._request(
|
||||
"POST",
|
||||
f"/repos/{owner}/{repo}/issues/{index}/comments",
|
||||
json={"body": body},
|
||||
)
|
||||
|
||||
def update_issue_comment(
|
||||
self,
|
||||
owner: str,
|
||||
repo: str,
|
||||
comment_id: int,
|
||||
body: str,
|
||||
) -> dict:
|
||||
"""Update an existing comment.
|
||||
|
||||
Args:
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
comment_id: Comment ID.
|
||||
body: Updated comment body.
|
||||
|
||||
Returns:
|
||||
Updated comment object.
|
||||
"""
|
||||
return self._request(
|
||||
"PATCH",
|
||||
f"/repos/{owner}/{repo}/issues/comments/{comment_id}",
|
||||
json={"body": body},
|
||||
)
|
||||
|
||||
def list_issue_comments(
|
||||
self,
|
||||
owner: str,
|
||||
repo: str,
|
||||
index: int,
|
||||
) -> list[dict]:
|
||||
"""List comments on an issue.
|
||||
|
||||
Args:
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
index: Issue number.
|
||||
|
||||
Returns:
|
||||
List of comment objects.
|
||||
"""
|
||||
return self._request("GET", f"/repos/{owner}/{repo}/issues/{index}/comments")
|
||||
|
||||
def add_issue_labels(
|
||||
self,
|
||||
owner: str,
|
||||
repo: str,
|
||||
index: int,
|
||||
labels: list[int],
|
||||
) -> list[dict]:
|
||||
"""Add labels to an issue.
|
||||
|
||||
Args:
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
index: Issue number.
|
||||
labels: List of label IDs to add.
|
||||
|
||||
Returns:
|
||||
List of label objects.
|
||||
"""
|
||||
return self._request(
|
||||
"POST",
|
||||
f"/repos/{owner}/{repo}/issues/{index}/labels",
|
||||
json={"labels": labels},
|
||||
)
|
||||
|
||||
def get_repo_labels(self, owner: str, repo: str) -> list[dict]:
|
||||
"""Get all labels for a repository.
|
||||
|
||||
Args:
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
|
||||
Returns:
|
||||
List of label objects.
|
||||
"""
|
||||
return self._request("GET", f"/repos/{owner}/{repo}/labels")
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Pull Request Operations
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def get_pull_request(self, owner: str, repo: str, index: int) -> dict:
|
||||
"""Get a pull request.
|
||||
|
||||
Args:
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
index: PR number.
|
||||
|
||||
Returns:
|
||||
Pull request object.
|
||||
"""
|
||||
return self._request("GET", f"/repos/{owner}/{repo}/pulls/{index}")
|
||||
|
||||
def get_pull_request_diff(self, owner: str, repo: str, index: int) -> str:
|
||||
"""Get the diff for a pull request.
|
||||
|
||||
Args:
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
index: PR number.
|
||||
|
||||
Returns:
|
||||
Diff text.
|
||||
"""
|
||||
url = f"{self.api_url}/repos/{owner}/{repo}/pulls/{index}.diff"
|
||||
response = requests.get(
|
||||
url,
|
||||
headers={
|
||||
"Authorization": f"token {self.token}",
|
||||
"Accept": "text/plain",
|
||||
},
|
||||
timeout=self.timeout,
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
|
||||
def list_pull_request_files(
|
||||
self,
|
||||
owner: str,
|
||||
repo: str,
|
||||
index: int,
|
||||
) -> list[dict]:
|
||||
"""List files changed in a pull request.
|
||||
|
||||
Args:
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
index: PR number.
|
||||
|
||||
Returns:
|
||||
List of changed file objects.
|
||||
"""
|
||||
return self._request("GET", f"/repos/{owner}/{repo}/pulls/{index}/files")
|
||||
|
||||
def create_pull_request_review(
|
||||
self,
|
||||
owner: str,
|
||||
repo: str,
|
||||
index: int,
|
||||
body: str,
|
||||
event: str = "COMMENT",
|
||||
comments: list[dict] | None = None,
|
||||
) -> dict:
|
||||
"""Create a review on a pull request.
|
||||
|
||||
Args:
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
index: PR number.
|
||||
body: Review body.
|
||||
event: Review event (APPROVE, REQUEST_CHANGES, COMMENT).
|
||||
comments: List of inline comments.
|
||||
|
||||
Returns:
|
||||
Created review object.
|
||||
"""
|
||||
payload: dict[str, Any] = {
|
||||
"body": body,
|
||||
"event": event,
|
||||
}
|
||||
if comments:
|
||||
payload["comments"] = comments
|
||||
|
||||
return self._request(
|
||||
"POST",
|
||||
f"/repos/{owner}/{repo}/pulls/{index}/reviews",
|
||||
json=payload,
|
||||
)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Repository Operations
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def get_repository(self, owner: str, repo: str) -> dict:
|
||||
"""Get repository information.
|
||||
|
||||
Args:
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
|
||||
Returns:
|
||||
Repository object.
|
||||
"""
|
||||
return self._request("GET", f"/repos/{owner}/{repo}")
|
||||
|
||||
def get_file_contents(
|
||||
self,
|
||||
owner: str,
|
||||
repo: str,
|
||||
filepath: str,
|
||||
ref: str | None = None,
|
||||
) -> dict:
|
||||
"""Get file contents from a repository.
|
||||
|
||||
Args:
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
filepath: Path to file.
|
||||
ref: Git ref (branch, tag, commit).
|
||||
|
||||
Returns:
|
||||
File content object with base64-encoded content.
|
||||
"""
|
||||
params = {}
|
||||
if ref:
|
||||
params["ref"] = ref
|
||||
return self._request(
|
||||
"GET",
|
||||
f"/repos/{owner}/{repo}/contents/{filepath}",
|
||||
params=params,
|
||||
)
|
||||
|
||||
def get_branch(self, owner: str, repo: str, branch: str) -> dict:
|
||||
"""Get branch information.
|
||||
|
||||
Args:
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
branch: Branch name.
|
||||
|
||||
Returns:
|
||||
Branch object.
|
||||
"""
|
||||
return self._request("GET", f"/repos/{owner}/{repo}/branches/{branch}")
|
||||
482
tools/ai-review/clients/llm_client.py
Normal file
482
tools/ai-review/clients/llm_client.py
Normal file
@@ -0,0 +1,482 @@
|
||||
"""LLM Client
|
||||
|
||||
A unified client for interacting with multiple LLM providers.
|
||||
Supports OpenAI, OpenRouter, Ollama, and extensible for more providers.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
@dataclass
|
||||
class ToolCall:
|
||||
"""Represents a tool call from the LLM."""
|
||||
|
||||
id: str
|
||||
name: str
|
||||
arguments: dict
|
||||
|
||||
|
||||
@dataclass
|
||||
class LLMResponse:
|
||||
"""Response from an LLM call."""
|
||||
|
||||
content: str
|
||||
model: str
|
||||
provider: str
|
||||
tokens_used: int | None = None
|
||||
finish_reason: str | None = None
|
||||
tool_calls: list[ToolCall] | None = None
|
||||
|
||||
|
||||
class BaseLLMProvider(ABC):
|
||||
"""Abstract base class for LLM providers."""
|
||||
|
||||
@abstractmethod
|
||||
def call(self, prompt: str, **kwargs) -> LLMResponse:
|
||||
"""Make a call to the LLM.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to send.
|
||||
**kwargs: Provider-specific options.
|
||||
|
||||
Returns:
|
||||
LLMResponse with the generated content.
|
||||
"""
|
||||
pass
|
||||
|
||||
def call_with_tools(
|
||||
self,
|
||||
messages: list[dict],
|
||||
tools: list[dict] | None = None,
|
||||
**kwargs,
|
||||
) -> LLMResponse:
|
||||
"""Make a call to the LLM with tool/function calling support.
|
||||
|
||||
Args:
|
||||
messages: List of message dicts with 'role' and 'content'.
|
||||
tools: List of tool definitions in OpenAI format.
|
||||
**kwargs: Provider-specific options.
|
||||
|
||||
Returns:
|
||||
LLMResponse with content and/or tool_calls.
|
||||
"""
|
||||
raise NotImplementedError("Tool calling not supported by this provider")
|
||||
|
||||
|
||||
class OpenAIProvider(BaseLLMProvider):
|
||||
"""OpenAI API provider."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str | None = None,
|
||||
model: str = "gpt-4o-mini",
|
||||
temperature: float = 0,
|
||||
max_tokens: int = 4096,
|
||||
):
|
||||
self.api_key = api_key or os.environ.get("OPENAI_API_KEY", "")
|
||||
self.model = model
|
||||
self.temperature = temperature
|
||||
self.max_tokens = max_tokens
|
||||
self.api_url = "https://api.openai.com/v1/chat/completions"
|
||||
|
||||
def call(self, prompt: str, **kwargs) -> LLMResponse:
|
||||
"""Call OpenAI API."""
|
||||
if not self.api_key:
|
||||
raise ValueError("OpenAI API key is required")
|
||||
|
||||
response = requests.post(
|
||||
self.api_url,
|
||||
headers={
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": kwargs.get("model", self.model),
|
||||
"temperature": kwargs.get("temperature", self.temperature),
|
||||
"max_tokens": kwargs.get("max_tokens", self.max_tokens),
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
},
|
||||
timeout=120,
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
choice = data["choices"][0]
|
||||
usage = data.get("usage", {})
|
||||
|
||||
return LLMResponse(
|
||||
content=choice["message"]["content"],
|
||||
model=data["model"],
|
||||
provider="openai",
|
||||
tokens_used=usage.get("total_tokens"),
|
||||
finish_reason=choice.get("finish_reason"),
|
||||
)
|
||||
|
||||
def call_with_tools(
|
||||
self,
|
||||
messages: list[dict],
|
||||
tools: list[dict] | None = None,
|
||||
**kwargs,
|
||||
) -> LLMResponse:
|
||||
"""Call OpenAI API with tool support."""
|
||||
if not self.api_key:
|
||||
raise ValueError("OpenAI API key is required")
|
||||
|
||||
request_body = {
|
||||
"model": kwargs.get("model", self.model),
|
||||
"temperature": kwargs.get("temperature", self.temperature),
|
||||
"max_tokens": kwargs.get("max_tokens", self.max_tokens),
|
||||
"messages": messages,
|
||||
}
|
||||
|
||||
if tools:
|
||||
request_body["tools"] = tools
|
||||
request_body["tool_choice"] = kwargs.get("tool_choice", "auto")
|
||||
|
||||
response = requests.post(
|
||||
self.api_url,
|
||||
headers={
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json=request_body,
|
||||
timeout=120,
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
choice = data["choices"][0]
|
||||
usage = data.get("usage", {})
|
||||
message = choice["message"]
|
||||
|
||||
# Parse tool calls if present
|
||||
tool_calls = None
|
||||
if message.get("tool_calls"):
|
||||
tool_calls = []
|
||||
for tc in message["tool_calls"]:
|
||||
tool_calls.append(
|
||||
ToolCall(
|
||||
id=tc["id"],
|
||||
name=tc["function"]["name"],
|
||||
arguments=json.loads(tc["function"]["arguments"]),
|
||||
)
|
||||
)
|
||||
|
||||
return LLMResponse(
|
||||
content=message.get("content") or "",
|
||||
model=data["model"],
|
||||
provider="openai",
|
||||
tokens_used=usage.get("total_tokens"),
|
||||
finish_reason=choice.get("finish_reason"),
|
||||
tool_calls=tool_calls,
|
||||
)
|
||||
|
||||
|
||||
class OpenRouterProvider(BaseLLMProvider):
|
||||
"""OpenRouter API provider."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str | None = None,
|
||||
model: str = "anthropic/claude-3.5-sonnet",
|
||||
temperature: float = 0,
|
||||
max_tokens: int = 4096,
|
||||
):
|
||||
self.api_key = api_key or os.environ.get("OPENROUTER_API_KEY", "")
|
||||
self.model = model
|
||||
self.temperature = temperature
|
||||
self.max_tokens = max_tokens
|
||||
self.api_url = "https://openrouter.ai/api/v1/chat/completions"
|
||||
|
||||
def call(self, prompt: str, **kwargs) -> LLMResponse:
|
||||
"""Call OpenRouter API."""
|
||||
if not self.api_key:
|
||||
raise ValueError("OpenRouter API key is required")
|
||||
|
||||
response = requests.post(
|
||||
self.api_url,
|
||||
headers={
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": kwargs.get("model", self.model),
|
||||
"temperature": kwargs.get("temperature", self.temperature),
|
||||
"max_tokens": kwargs.get("max_tokens", self.max_tokens),
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
},
|
||||
timeout=120,
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
choice = data["choices"][0]
|
||||
usage = data.get("usage", {})
|
||||
|
||||
return LLMResponse(
|
||||
content=choice["message"]["content"],
|
||||
model=data.get("model", self.model),
|
||||
provider="openrouter",
|
||||
tokens_used=usage.get("total_tokens"),
|
||||
finish_reason=choice.get("finish_reason"),
|
||||
)
|
||||
|
||||
def call_with_tools(
|
||||
self,
|
||||
messages: list[dict],
|
||||
tools: list[dict] | None = None,
|
||||
**kwargs,
|
||||
) -> LLMResponse:
|
||||
"""Call OpenRouter API with tool support."""
|
||||
if not self.api_key:
|
||||
raise ValueError("OpenRouter API key is required")
|
||||
|
||||
request_body = {
|
||||
"model": kwargs.get("model", self.model),
|
||||
"temperature": kwargs.get("temperature", self.temperature),
|
||||
"max_tokens": kwargs.get("max_tokens", self.max_tokens),
|
||||
"messages": messages,
|
||||
}
|
||||
|
||||
if tools:
|
||||
request_body["tools"] = tools
|
||||
request_body["tool_choice"] = kwargs.get("tool_choice", "auto")
|
||||
|
||||
response = requests.post(
|
||||
self.api_url,
|
||||
headers={
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json=request_body,
|
||||
timeout=120,
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
choice = data["choices"][0]
|
||||
usage = data.get("usage", {})
|
||||
message = choice["message"]
|
||||
|
||||
# Parse tool calls if present
|
||||
tool_calls = None
|
||||
if message.get("tool_calls"):
|
||||
tool_calls = []
|
||||
for tc in message["tool_calls"]:
|
||||
tool_calls.append(
|
||||
ToolCall(
|
||||
id=tc["id"],
|
||||
name=tc["function"]["name"],
|
||||
arguments=json.loads(tc["function"]["arguments"]),
|
||||
)
|
||||
)
|
||||
|
||||
return LLMResponse(
|
||||
content=message.get("content") or "",
|
||||
model=data.get("model", self.model),
|
||||
provider="openrouter",
|
||||
tokens_used=usage.get("total_tokens"),
|
||||
finish_reason=choice.get("finish_reason"),
|
||||
tool_calls=tool_calls,
|
||||
)
|
||||
|
||||
|
||||
class OllamaProvider(BaseLLMProvider):
|
||||
"""Ollama (self-hosted) provider."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
host: str | None = None,
|
||||
model: str = "codellama:13b",
|
||||
temperature: float = 0,
|
||||
):
|
||||
self.host = host or os.environ.get("OLLAMA_HOST", "http://localhost:11434")
|
||||
self.model = model
|
||||
self.temperature = temperature
|
||||
|
||||
def call(self, prompt: str, **kwargs) -> LLMResponse:
|
||||
"""Call Ollama API."""
|
||||
response = requests.post(
|
||||
f"{self.host}/api/generate",
|
||||
json={
|
||||
"model": kwargs.get("model", self.model),
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {
|
||||
"temperature": kwargs.get("temperature", self.temperature),
|
||||
},
|
||||
},
|
||||
timeout=300, # Longer timeout for local models
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
return LLMResponse(
|
||||
content=data["response"],
|
||||
model=data.get("model", self.model),
|
||||
provider="ollama",
|
||||
tokens_used=data.get("eval_count"),
|
||||
finish_reason="stop" if data.get("done") else None,
|
||||
)
|
||||
|
||||
|
||||
class LLMClient:
|
||||
"""Unified LLM client supporting multiple providers."""
|
||||
|
||||
PROVIDERS = {
|
||||
"openai": OpenAIProvider,
|
||||
"openrouter": OpenRouterProvider,
|
||||
"ollama": OllamaProvider,
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
provider: str = "openai",
|
||||
config: dict | None = None,
|
||||
):
|
||||
"""Initialize the LLM client.
|
||||
|
||||
Args:
|
||||
provider: Provider name (openai, openrouter, ollama).
|
||||
config: Provider-specific configuration.
|
||||
"""
|
||||
if provider not in self.PROVIDERS:
|
||||
raise ValueError(f"Unknown provider: {provider}. Available: {list(self.PROVIDERS.keys())}")
|
||||
|
||||
self.provider_name = provider
|
||||
self.config = config or {}
|
||||
self._provider = self.PROVIDERS[provider](**self.config)
|
||||
|
||||
def call(self, prompt: str, **kwargs) -> LLMResponse:
|
||||
"""Make a call to the configured LLM provider.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to send.
|
||||
**kwargs: Provider-specific options.
|
||||
|
||||
Returns:
|
||||
LLMResponse with the generated content.
|
||||
"""
|
||||
return self._provider.call(prompt, **kwargs)
|
||||
|
||||
def call_with_tools(
|
||||
self,
|
||||
messages: list[dict],
|
||||
tools: list[dict] | None = None,
|
||||
**kwargs,
|
||||
) -> LLMResponse:
|
||||
"""Make a call with tool/function calling support.
|
||||
|
||||
Args:
|
||||
messages: List of message dicts with 'role' and 'content'.
|
||||
tools: List of tool definitions in OpenAI format.
|
||||
**kwargs: Provider-specific options.
|
||||
|
||||
Returns:
|
||||
LLMResponse with content and/or tool_calls.
|
||||
"""
|
||||
return self._provider.call_with_tools(messages, tools, **kwargs)
|
||||
|
||||
def call_json(self, prompt: str, **kwargs) -> dict:
|
||||
"""Make a call and parse the response as JSON.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to send (should request JSON output).
|
||||
**kwargs: Provider-specific options.
|
||||
|
||||
Returns:
|
||||
Parsed JSON response.
|
||||
|
||||
Raises:
|
||||
json.JSONDecodeError: If response is not valid JSON.
|
||||
"""
|
||||
response = self.call(prompt, **kwargs)
|
||||
content = response.content.strip()
|
||||
|
||||
return self._extract_json(content)
|
||||
|
||||
def _extract_json(self, content: str) -> dict:
|
||||
"""Extract and parse JSON from content string.
|
||||
|
||||
Handles markdown code blocks and preamble text.
|
||||
"""
|
||||
content = content.strip()
|
||||
|
||||
# Attempt 1: direct parse
|
||||
try:
|
||||
return json.loads(content)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Attempt 2: Extract from markdown code blocks
|
||||
if "```" in content:
|
||||
# Find the JSON block
|
||||
import re
|
||||
match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", content)
|
||||
if match:
|
||||
try:
|
||||
return json.loads(match.group(1))
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Attempt 3: Find first { and last }
|
||||
try:
|
||||
start = content.find("{")
|
||||
end = content.rfind("}")
|
||||
if start != -1 and end != -1:
|
||||
json_str = content[start : end + 1]
|
||||
return json.loads(json_str)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Attempt 4: Fix common JSON errors (comments, trailing commas)
|
||||
# This is risky but helpful for LLM output
|
||||
try:
|
||||
# Remove comments
|
||||
import re
|
||||
json_str = re.sub(r"//.*", "", content)
|
||||
json_str = re.sub(r"/\*[\s\S]*?\*/", "", json_str)
|
||||
return json.loads(json_str)
|
||||
except json.JSONDecodeError as e:
|
||||
# If all attempts fail, raise an error with the content for debugging
|
||||
snippet = content[:500] + "..." if len(content) > 500 else content
|
||||
raise ValueError(f"Failed to parse JSON response: {e}. Raw content snippet: {snippet!r}")
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, config: dict) -> "LLMClient":
|
||||
"""Create an LLM client from a configuration dictionary.
|
||||
|
||||
Args:
|
||||
config: Configuration with 'provider' key and provider-specific settings.
|
||||
|
||||
Returns:
|
||||
Configured LLMClient instance.
|
||||
"""
|
||||
provider = config.get("provider", "openai")
|
||||
provider_config = {}
|
||||
|
||||
# Map config keys to provider-specific settings
|
||||
if provider == "openai":
|
||||
provider_config = {
|
||||
"model": config.get("model", {}).get("openai", "gpt-4o-mini"),
|
||||
"temperature": config.get("temperature", 0),
|
||||
"max_tokens": config.get("max_tokens", 16000),
|
||||
}
|
||||
elif provider == "openrouter":
|
||||
provider_config = {
|
||||
"model": config.get("model", {}).get("openrouter", "anthropic/claude-3.5-sonnet"),
|
||||
"temperature": config.get("temperature", 0),
|
||||
"max_tokens": config.get("max_tokens", 16000),
|
||||
}
|
||||
elif provider == "ollama":
|
||||
provider_config = {
|
||||
"model": config.get("model", {}).get("ollama", "codellama:13b"),
|
||||
"temperature": config.get("temperature", 0),
|
||||
}
|
||||
|
||||
return cls(provider=provider, config=provider_config)
|
||||
23
tools/ai-review/comment.py
Normal file
23
tools/ai-review/comment.py
Normal file
@@ -0,0 +1,23 @@
|
||||
def to_markdown(result: dict) -> str:
|
||||
lines = []
|
||||
lines.append("## 🤖 Enterprise AI Code Review\n")
|
||||
lines.append(result.get("summary", "") + "\n")
|
||||
|
||||
if not result.get("issues"):
|
||||
lines.append("✅ No issues found.\n")
|
||||
else:
|
||||
for issue in result["issues"]:
|
||||
lines.append(f"### ❗ {issue['severity']} — {issue['category']}")
|
||||
lines.append(f"- **File:** `{issue['file']}`")
|
||||
if issue.get("line"):
|
||||
lines.append(f"- **Line:** `{issue['line']}`")
|
||||
lines.append(f"- **Issue:** {issue['description']}")
|
||||
lines.append(f"- **Recommendation:** {issue['recommendation']}\n")
|
||||
|
||||
lines.append("---")
|
||||
lines.append(f"**Overall severity:** `{result['overall_severity']}`")
|
||||
lines.append(
|
||||
"✅ **AI Approval**" if result.get("approval") else "❌ **Changes required**"
|
||||
)
|
||||
|
||||
return "\n".join(lines)
|
||||
96
tools/ai-review/config.yml
Normal file
96
tools/ai-review/config.yml
Normal file
@@ -0,0 +1,96 @@
|
||||
provider: openai # openai | openrouter | ollama
|
||||
|
||||
model:
|
||||
openai: gpt-4.1-mini
|
||||
openrouter: anthropic/claude-3.5-sonnet
|
||||
ollama: codellama:13b
|
||||
|
||||
temperature: 0
|
||||
max_tokens: 4096
|
||||
|
||||
# Review settings
|
||||
review:
|
||||
fail_on_severity: HIGH
|
||||
max_diff_lines: 800
|
||||
inline_comments: true
|
||||
security_scan: true
|
||||
|
||||
# Agent settings
|
||||
agents:
|
||||
issue:
|
||||
enabled: true
|
||||
auto_label: true
|
||||
auto_triage: true
|
||||
duplicate_threshold: 0.85
|
||||
events:
|
||||
- opened
|
||||
- labeled
|
||||
pr:
|
||||
enabled: true
|
||||
inline_comments: true
|
||||
security_scan: true
|
||||
events:
|
||||
- opened
|
||||
- synchronize
|
||||
codebase:
|
||||
enabled: true
|
||||
schedule: "0 0 * * 0" # Weekly on Sunday
|
||||
chat:
|
||||
enabled: true
|
||||
name: "Bartender"
|
||||
max_iterations: 5 # Max tool call iterations per chat
|
||||
tools:
|
||||
- search_codebase
|
||||
- read_file
|
||||
- search_web
|
||||
searxng_url: "" # Set via SEARXNG_URL env var or here
|
||||
|
||||
# Interaction settings
|
||||
# CUSTOMIZE YOUR BOT NAME HERE!
|
||||
# Change mention_prefix to your preferred bot name:
|
||||
# "@ai-bot" - Default
|
||||
# "@bartender" - Friendly bar theme
|
||||
# "@uni" - Short and simple
|
||||
# "@joey" - Personal assistant name
|
||||
# "@codebot" - Code-focused name
|
||||
# NOTE: Also update the workflow files (.github/workflows/ or .gitea/workflows/)
|
||||
# to match this prefix in the 'if: contains(...)' condition
|
||||
interaction:
|
||||
respond_to_mentions: true
|
||||
mention_prefix: "@ai-bot" # Change this to customize your bot's name!
|
||||
commands:
|
||||
- explain
|
||||
- suggest
|
||||
- security
|
||||
- summarize
|
||||
|
||||
# Enterprise settings
|
||||
enterprise:
|
||||
audit_log: true
|
||||
audit_path: "/var/log/ai-review/"
|
||||
metrics_enabled: true
|
||||
rate_limit:
|
||||
requests_per_minute: 30
|
||||
max_concurrent: 4
|
||||
|
||||
# Label mappings for auto-labeling
|
||||
labels:
|
||||
priority:
|
||||
high: "priority: high"
|
||||
medium: "priority: medium"
|
||||
low: "priority: low"
|
||||
type:
|
||||
bug: "type: bug"
|
||||
feature: "type: feature"
|
||||
question: "type: question"
|
||||
docs: "type: documentation"
|
||||
status:
|
||||
ai_approved: "ai-approved"
|
||||
ai_changes_required: "ai-changes-required"
|
||||
ai_reviewed: "ai-reviewed"
|
||||
|
||||
# Security scanning rules
|
||||
security:
|
||||
enabled: true
|
||||
fail_on_high: true
|
||||
rules_file: "security/security_rules.yml"
|
||||
211
tools/ai-review/dispatcher.py
Normal file
211
tools/ai-review/dispatcher.py
Normal file
@@ -0,0 +1,211 @@
|
||||
"""Event Dispatcher
|
||||
|
||||
Routes incoming webhook events to the appropriate agent handlers.
|
||||
Supports concurrent execution and queue management.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from dataclasses import dataclass
|
||||
from typing import Type
|
||||
|
||||
import yaml
|
||||
|
||||
from agents.base_agent import AgentContext, AgentResult, BaseAgent
|
||||
|
||||
|
||||
@dataclass
|
||||
class DispatchResult:
|
||||
"""Result of dispatching an event."""
|
||||
|
||||
event_type: str
|
||||
agents_run: list[str]
|
||||
results: list[AgentResult]
|
||||
errors: list[str]
|
||||
|
||||
|
||||
class Dispatcher:
|
||||
"""Event dispatcher that routes events to appropriate agents."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: dict | None = None,
|
||||
max_workers: int = 4,
|
||||
):
|
||||
"""Initialize the dispatcher.
|
||||
|
||||
Args:
|
||||
config: Configuration dictionary.
|
||||
max_workers: Maximum concurrent agent executions.
|
||||
"""
|
||||
self.config = config or self._load_config()
|
||||
self.max_workers = max_workers
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self._agents: list[BaseAgent] = []
|
||||
self._executor = ThreadPoolExecutor(max_workers=max_workers)
|
||||
|
||||
@staticmethod
|
||||
def _load_config() -> dict:
|
||||
"""Load configuration from config.yml."""
|
||||
config_path = os.path.join(os.path.dirname(__file__), "config.yml")
|
||||
if os.path.exists(config_path):
|
||||
with open(config_path) as f:
|
||||
return yaml.safe_load(f)
|
||||
return {}
|
||||
|
||||
def register_agent(self, agent: BaseAgent):
|
||||
"""Register an agent with the dispatcher.
|
||||
|
||||
Args:
|
||||
agent: Agent instance to register.
|
||||
"""
|
||||
self._agents.append(agent)
|
||||
self.logger.info(f"Registered agent: {agent.__class__.__name__}")
|
||||
|
||||
def register_agent_class(self, agent_class: Type[BaseAgent], **kwargs):
|
||||
"""Register an agent class (will be instantiated).
|
||||
|
||||
Args:
|
||||
agent_class: Agent class to instantiate and register.
|
||||
**kwargs: Arguments to pass to agent constructor.
|
||||
"""
|
||||
agent = agent_class(config=self.config, **kwargs)
|
||||
self.register_agent(agent)
|
||||
|
||||
def dispatch(
|
||||
self,
|
||||
event_type: str,
|
||||
event_data: dict,
|
||||
owner: str,
|
||||
repo: str,
|
||||
) -> DispatchResult:
|
||||
"""Dispatch an event to registered agents.
|
||||
|
||||
Args:
|
||||
event_type: Type of event (issue, pull_request, issue_comment, etc).
|
||||
event_data: Event payload data.
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
|
||||
Returns:
|
||||
Dispatch result with all agent results.
|
||||
"""
|
||||
self.logger.info(f"Dispatching event: {event_type} for {owner}/{repo}")
|
||||
|
||||
# Find agents that can handle this event
|
||||
handlers = [
|
||||
agent for agent in self._agents if agent.can_handle(event_type, event_data)
|
||||
]
|
||||
|
||||
if not handlers:
|
||||
self.logger.info(f"No agents registered for event: {event_type}")
|
||||
return DispatchResult(
|
||||
event_type=event_type,
|
||||
agents_run=[],
|
||||
results=[],
|
||||
errors=[],
|
||||
)
|
||||
|
||||
self.logger.info(
|
||||
f"Found {len(handlers)} agent(s) for event: {[a.__class__.__name__ for a in handlers]}"
|
||||
)
|
||||
|
||||
# Create context for agents
|
||||
context = AgentContext(
|
||||
owner=owner,
|
||||
repo=repo,
|
||||
event_type=event_type,
|
||||
event_data=event_data,
|
||||
config=self.config,
|
||||
)
|
||||
|
||||
# Run all handlers
|
||||
results = []
|
||||
errors = []
|
||||
agents_run = []
|
||||
|
||||
for agent in handlers:
|
||||
agent_name = agent.__class__.__name__
|
||||
agents_run.append(agent_name)
|
||||
|
||||
try:
|
||||
result = agent.run(context)
|
||||
results.append(result)
|
||||
if not result.success:
|
||||
errors.append(f"{agent_name}: {result.error or result.message}")
|
||||
except Exception as e:
|
||||
self.logger.exception(f"Agent {agent_name} failed: {e}")
|
||||
errors.append(f"{agent_name}: {str(e)}")
|
||||
results.append(
|
||||
AgentResult(
|
||||
success=False,
|
||||
message="Unexpected error",
|
||||
error=str(e),
|
||||
)
|
||||
)
|
||||
|
||||
return DispatchResult(
|
||||
event_type=event_type,
|
||||
agents_run=agents_run,
|
||||
results=results,
|
||||
errors=errors,
|
||||
)
|
||||
|
||||
def dispatch_async(
|
||||
self,
|
||||
event_type: str,
|
||||
event_data: dict,
|
||||
owner: str,
|
||||
repo: str,
|
||||
):
|
||||
"""Dispatch an event asynchronously.
|
||||
|
||||
Args:
|
||||
event_type: Type of event.
|
||||
event_data: Event payload data.
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
|
||||
Returns:
|
||||
Future that resolves to DispatchResult.
|
||||
"""
|
||||
return self._executor.submit(
|
||||
self.dispatch, event_type, event_data, owner, repo
|
||||
)
|
||||
|
||||
def shutdown(self):
|
||||
"""Shutdown the executor."""
|
||||
self._executor.shutdown(wait=True)
|
||||
|
||||
|
||||
# Singleton dispatcher for easy access
|
||||
_dispatcher: Dispatcher | None = None
|
||||
|
||||
|
||||
def get_dispatcher() -> Dispatcher:
|
||||
"""Get the global dispatcher instance."""
|
||||
global _dispatcher
|
||||
if _dispatcher is None:
|
||||
_dispatcher = Dispatcher()
|
||||
return _dispatcher
|
||||
|
||||
|
||||
def dispatch_event(
|
||||
event_type: str,
|
||||
event_data: dict,
|
||||
owner: str,
|
||||
repo: str,
|
||||
) -> DispatchResult:
|
||||
"""Dispatch an event using the global dispatcher.
|
||||
|
||||
Args:
|
||||
event_type: Type of event.
|
||||
event_data: Event payload data.
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
|
||||
Returns:
|
||||
Dispatch result.
|
||||
"""
|
||||
return get_dispatcher().dispatch(event_type, event_data, owner, repo)
|
||||
10
tools/ai-review/enterprise/__init__.py
Normal file
10
tools/ai-review/enterprise/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
"""Enterprise Features Package
|
||||
|
||||
This package contains enterprise-grade features like
|
||||
audit logging and metrics collection.
|
||||
"""
|
||||
|
||||
from enterprise.audit_logger import AuditLogger
|
||||
from enterprise.metrics import MetricsCollector
|
||||
|
||||
__all__ = ["AuditLogger", "MetricsCollector"]
|
||||
303
tools/ai-review/enterprise/audit_logger.py
Normal file
303
tools/ai-review/enterprise/audit_logger.py
Normal file
@@ -0,0 +1,303 @@
|
||||
"""Audit Logger
|
||||
|
||||
Enterprise audit logging for tracking all AI agent actions,
|
||||
decisions, and interactions for compliance and debugging.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
class AuditLogger:
|
||||
"""Audit logger for enterprise compliance."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
log_path: str | None = None,
|
||||
enabled: bool = True,
|
||||
):
|
||||
"""Initialize the audit logger.
|
||||
|
||||
Args:
|
||||
log_path: Directory to write audit logs.
|
||||
enabled: Whether audit logging is enabled.
|
||||
"""
|
||||
self.enabled = enabled
|
||||
self.log_path = Path(
|
||||
log_path or os.environ.get("AI_AUDIT_PATH", "/var/log/ai-review/")
|
||||
)
|
||||
self.logger = logging.getLogger("audit")
|
||||
|
||||
if self.enabled:
|
||||
self._ensure_log_dir()
|
||||
|
||||
def _ensure_log_dir(self):
|
||||
"""Ensure the log directory exists."""
|
||||
try:
|
||||
self.log_path.mkdir(parents=True, exist_ok=True)
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not create audit log directory: {e}")
|
||||
self.enabled = False
|
||||
|
||||
def _get_log_file(self) -> Path:
|
||||
"""Get the current log file path (daily rotation)."""
|
||||
date_str = datetime.utcnow().strftime("%Y-%m-%d")
|
||||
return self.log_path / f"audit-{date_str}.jsonl"
|
||||
|
||||
def log(
|
||||
self,
|
||||
action: str,
|
||||
agent: str,
|
||||
owner: str,
|
||||
repo: str,
|
||||
details: dict[str, Any] | None = None,
|
||||
success: bool = True,
|
||||
error: str | None = None,
|
||||
):
|
||||
"""Log an audit event.
|
||||
|
||||
Args:
|
||||
action: Action performed (e.g., "review_pr", "triage_issue").
|
||||
agent: Agent name that performed the action.
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
details: Additional details about the action.
|
||||
success: Whether the action succeeded.
|
||||
error: Error message if failed.
|
||||
"""
|
||||
if not self.enabled:
|
||||
return
|
||||
|
||||
event = {
|
||||
"timestamp": datetime.utcnow().isoformat() + "Z",
|
||||
"action": action,
|
||||
"agent": agent,
|
||||
"repository": f"{owner}/{repo}",
|
||||
"success": success,
|
||||
"details": details or {},
|
||||
}
|
||||
|
||||
if error:
|
||||
event["error"] = error
|
||||
|
||||
try:
|
||||
log_file = self._get_log_file()
|
||||
with open(log_file, "a") as f:
|
||||
f.write(json.dumps(event) + "\n")
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to write audit log: {e}")
|
||||
|
||||
def log_llm_call(
|
||||
self,
|
||||
agent: str,
|
||||
owner: str,
|
||||
repo: str,
|
||||
provider: str,
|
||||
model: str,
|
||||
tokens_used: int | None = None,
|
||||
duration_ms: int | None = None,
|
||||
):
|
||||
"""Log an LLM API call.
|
||||
|
||||
Args:
|
||||
agent: Agent making the call.
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
provider: LLM provider used.
|
||||
model: Model name.
|
||||
tokens_used: Number of tokens consumed.
|
||||
duration_ms: Call duration in milliseconds.
|
||||
"""
|
||||
self.log(
|
||||
action="llm_call",
|
||||
agent=agent,
|
||||
owner=owner,
|
||||
repo=repo,
|
||||
details={
|
||||
"provider": provider,
|
||||
"model": model,
|
||||
"tokens_used": tokens_used,
|
||||
"duration_ms": duration_ms,
|
||||
},
|
||||
)
|
||||
|
||||
def log_comment_posted(
|
||||
self,
|
||||
agent: str,
|
||||
owner: str,
|
||||
repo: str,
|
||||
issue_number: int,
|
||||
comment_type: str,
|
||||
):
|
||||
"""Log a comment being posted.
|
||||
|
||||
Args:
|
||||
agent: Agent posting the comment.
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
issue_number: Issue or PR number.
|
||||
comment_type: Type of comment (triage, review, response).
|
||||
"""
|
||||
self.log(
|
||||
action="comment_posted",
|
||||
agent=agent,
|
||||
owner=owner,
|
||||
repo=repo,
|
||||
details={
|
||||
"issue_number": issue_number,
|
||||
"comment_type": comment_type,
|
||||
},
|
||||
)
|
||||
|
||||
def log_labels_applied(
|
||||
self,
|
||||
agent: str,
|
||||
owner: str,
|
||||
repo: str,
|
||||
issue_number: int,
|
||||
labels: list[str],
|
||||
):
|
||||
"""Log labels being applied.
|
||||
|
||||
Args:
|
||||
agent: Agent applying labels.
|
||||
owner: Repository owner.
|
||||
repo: Repository name.
|
||||
issue_number: Issue or PR number.
|
||||
labels: Labels applied.
|
||||
"""
|
||||
self.log(
|
||||
action="labels_applied",
|
||||
agent=agent,
|
||||
owner=owner,
|
||||
repo=repo,
|
||||
details={
|
||||
"issue_number": issue_number,
|
||||
"labels": labels,
|
||||
},
|
||||
)
|
||||
|
||||
def get_logs(
|
||||
self,
|
||||
start_date: str | None = None,
|
||||
end_date: str | None = None,
|
||||
action: str | None = None,
|
||||
repository: str | None = None,
|
||||
) -> list[dict]:
|
||||
"""Retrieve audit logs with optional filtering.
|
||||
|
||||
Args:
|
||||
start_date: Start date (YYYY-MM-DD).
|
||||
end_date: End date (YYYY-MM-DD).
|
||||
action: Filter by action type.
|
||||
repository: Filter by repository (owner/repo).
|
||||
|
||||
Returns:
|
||||
List of audit log entries.
|
||||
"""
|
||||
if not self.enabled:
|
||||
return []
|
||||
|
||||
logs = []
|
||||
log_files = sorted(self.log_path.glob("audit-*.jsonl"))
|
||||
|
||||
for log_file in log_files:
|
||||
# Date filter on filename
|
||||
file_date = log_file.stem.replace("audit-", "")
|
||||
if start_date and file_date < start_date:
|
||||
continue
|
||||
if end_date and file_date > end_date:
|
||||
continue
|
||||
|
||||
try:
|
||||
with open(log_file) as f:
|
||||
for line in f:
|
||||
try:
|
||||
entry = json.loads(line.strip())
|
||||
|
||||
# Apply filters
|
||||
if action and entry.get("action") != action:
|
||||
continue
|
||||
if repository and entry.get("repository") != repository:
|
||||
continue
|
||||
|
||||
logs.append(entry)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return logs
|
||||
|
||||
def generate_report(
|
||||
self,
|
||||
start_date: str | None = None,
|
||||
end_date: str | None = None,
|
||||
) -> dict:
|
||||
"""Generate a summary report of audit activity.
|
||||
|
||||
Args:
|
||||
start_date: Report start date.
|
||||
end_date: Report end date.
|
||||
|
||||
Returns:
|
||||
Summary report dictionary.
|
||||
"""
|
||||
logs = self.get_logs(start_date=start_date, end_date=end_date)
|
||||
|
||||
report = {
|
||||
"period": {
|
||||
"start": start_date or "all",
|
||||
"end": end_date or "all",
|
||||
},
|
||||
"total_events": len(logs),
|
||||
"by_action": {},
|
||||
"by_repository": {},
|
||||
"by_agent": {},
|
||||
"success_rate": 0.0,
|
||||
"llm_usage": {
|
||||
"total_calls": 0,
|
||||
"total_tokens": 0,
|
||||
},
|
||||
}
|
||||
|
||||
success_count = 0
|
||||
|
||||
for log in logs:
|
||||
action = log.get("action", "unknown")
|
||||
repo = log.get("repository", "unknown")
|
||||
agent = log.get("agent", "unknown")
|
||||
|
||||
report["by_action"][action] = report["by_action"].get(action, 0) + 1
|
||||
report["by_repository"][repo] = report["by_repository"].get(repo, 0) + 1
|
||||
report["by_agent"][agent] = report["by_agent"].get(agent, 0) + 1
|
||||
|
||||
if log.get("success"):
|
||||
success_count += 1
|
||||
|
||||
if action == "llm_call":
|
||||
report["llm_usage"]["total_calls"] += 1
|
||||
tokens = log.get("details", {}).get("tokens_used")
|
||||
if tokens:
|
||||
report["llm_usage"]["total_tokens"] += tokens
|
||||
|
||||
if logs:
|
||||
report["success_rate"] = success_count / len(logs)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
# Global instance
|
||||
_audit_logger: AuditLogger | None = None
|
||||
|
||||
|
||||
def get_audit_logger() -> AuditLogger:
|
||||
"""Get the global audit logger instance."""
|
||||
global _audit_logger
|
||||
if _audit_logger is None:
|
||||
_audit_logger = AuditLogger()
|
||||
return _audit_logger
|
||||
371
tools/ai-review/enterprise/metrics.py
Normal file
371
tools/ai-review/enterprise/metrics.py
Normal file
@@ -0,0 +1,371 @@
|
||||
"""Metrics Collector
|
||||
|
||||
Observability metrics for AI agent performance monitoring.
|
||||
Tracks request counts, latencies, errors, and LLM usage.
|
||||
"""
|
||||
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta
|
||||
from threading import Lock
|
||||
|
||||
|
||||
@dataclass
|
||||
class MetricPoint:
|
||||
"""A single metric data point."""
|
||||
|
||||
timestamp: datetime
|
||||
value: float
|
||||
labels: dict = field(default_factory=dict)
|
||||
|
||||
|
||||
class Counter:
|
||||
"""Thread-safe counter metric."""
|
||||
|
||||
def __init__(self, name: str, description: str = ""):
|
||||
self.name = name
|
||||
self.description = description
|
||||
self._value = 0.0
|
||||
self._lock = Lock()
|
||||
|
||||
def inc(self, value: float = 1.0):
|
||||
"""Increment the counter."""
|
||||
with self._lock:
|
||||
self._value += value
|
||||
|
||||
@property
|
||||
def value(self) -> float:
|
||||
"""Get current counter value."""
|
||||
with self._lock:
|
||||
return self._value
|
||||
|
||||
|
||||
class Gauge:
|
||||
"""Thread-safe gauge metric."""
|
||||
|
||||
def __init__(self, name: str, description: str = ""):
|
||||
self.name = name
|
||||
self.description = description
|
||||
self._value = 0.0
|
||||
self._lock = Lock()
|
||||
|
||||
def set(self, value: float):
|
||||
"""Set the gauge value."""
|
||||
with self._lock:
|
||||
self._value = value
|
||||
|
||||
def inc(self, value: float = 1.0):
|
||||
"""Increment the gauge."""
|
||||
with self._lock:
|
||||
self._value += value
|
||||
|
||||
def dec(self, value: float = 1.0):
|
||||
"""Decrement the gauge."""
|
||||
with self._lock:
|
||||
self._value -= value
|
||||
|
||||
@property
|
||||
def value(self) -> float:
|
||||
"""Get current gauge value."""
|
||||
with self._lock:
|
||||
return self._value
|
||||
|
||||
|
||||
class Histogram:
|
||||
"""Simple histogram for tracking distributions."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
description: str = "",
|
||||
buckets: list[float] | None = None,
|
||||
):
|
||||
self.name = name
|
||||
self.description = description
|
||||
self.buckets = buckets or [0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
|
||||
self._values: list[float] = []
|
||||
self._lock = Lock()
|
||||
|
||||
def observe(self, value: float):
|
||||
"""Record an observation."""
|
||||
with self._lock:
|
||||
self._values.append(value)
|
||||
# Keep only last 1000 observations
|
||||
if len(self._values) > 1000:
|
||||
self._values = self._values[-1000:]
|
||||
|
||||
def get_percentile(self, percentile: float) -> float:
|
||||
"""Get a percentile value."""
|
||||
with self._lock:
|
||||
if not self._values:
|
||||
return 0.0
|
||||
sorted_values = sorted(self._values)
|
||||
idx = int(len(sorted_values) * percentile / 100)
|
||||
return sorted_values[min(idx, len(sorted_values) - 1)]
|
||||
|
||||
@property
|
||||
def count(self) -> int:
|
||||
"""Get observation count."""
|
||||
with self._lock:
|
||||
return len(self._values)
|
||||
|
||||
@property
|
||||
def sum(self) -> float:
|
||||
"""Get sum of observations."""
|
||||
with self._lock:
|
||||
return sum(self._values)
|
||||
|
||||
|
||||
class MetricsCollector:
|
||||
"""Central metrics collector for AI agents."""
|
||||
|
||||
def __init__(self, enabled: bool = True):
|
||||
"""Initialize metrics collector.
|
||||
|
||||
Args:
|
||||
enabled: Whether metrics collection is enabled.
|
||||
"""
|
||||
self.enabled = enabled
|
||||
self._start_time = time.time()
|
||||
|
||||
# Counters
|
||||
self.requests_total = Counter(
|
||||
"ai_review_requests_total",
|
||||
"Total number of review requests processed",
|
||||
)
|
||||
self.requests_success = Counter(
|
||||
"ai_review_requests_success",
|
||||
"Number of successful review requests",
|
||||
)
|
||||
self.requests_failed = Counter(
|
||||
"ai_review_requests_failed",
|
||||
"Number of failed review requests",
|
||||
)
|
||||
self.llm_calls_total = Counter(
|
||||
"ai_review_llm_calls_total",
|
||||
"Total number of LLM API calls",
|
||||
)
|
||||
self.llm_tokens_total = Counter(
|
||||
"ai_review_llm_tokens_total",
|
||||
"Total LLM tokens consumed",
|
||||
)
|
||||
self.comments_posted = Counter(
|
||||
"ai_review_comments_posted_total",
|
||||
"Total comments posted",
|
||||
)
|
||||
self.labels_applied = Counter(
|
||||
"ai_review_labels_applied_total",
|
||||
"Total labels applied",
|
||||
)
|
||||
self.security_findings = Counter(
|
||||
"ai_review_security_findings_total",
|
||||
"Total security findings detected",
|
||||
)
|
||||
|
||||
# Gauges
|
||||
self.active_requests = Gauge(
|
||||
"ai_review_active_requests",
|
||||
"Currently active review requests",
|
||||
)
|
||||
|
||||
# Histograms
|
||||
self.request_duration = Histogram(
|
||||
"ai_review_request_duration_seconds",
|
||||
"Request processing duration",
|
||||
)
|
||||
self.llm_duration = Histogram(
|
||||
"ai_review_llm_duration_seconds",
|
||||
"LLM API call duration",
|
||||
)
|
||||
|
||||
# Per-agent metrics
|
||||
self._agent_metrics: dict[str, dict] = {}
|
||||
|
||||
def record_request_start(self, agent: str):
|
||||
"""Record the start of a request.
|
||||
|
||||
Args:
|
||||
agent: Name of the agent handling the request.
|
||||
"""
|
||||
if not self.enabled:
|
||||
return
|
||||
|
||||
self.requests_total.inc()
|
||||
self.active_requests.inc()
|
||||
|
||||
if agent not in self._agent_metrics:
|
||||
self._agent_metrics[agent] = {
|
||||
"total": 0,
|
||||
"success": 0,
|
||||
"failed": 0,
|
||||
}
|
||||
self._agent_metrics[agent]["total"] += 1
|
||||
|
||||
def record_request_end(
|
||||
self,
|
||||
agent: str,
|
||||
success: bool,
|
||||
duration_seconds: float,
|
||||
):
|
||||
"""Record the end of a request.
|
||||
|
||||
Args:
|
||||
agent: Name of the agent.
|
||||
success: Whether the request succeeded.
|
||||
duration_seconds: Request duration.
|
||||
"""
|
||||
if not self.enabled:
|
||||
return
|
||||
|
||||
self.active_requests.dec()
|
||||
self.request_duration.observe(duration_seconds)
|
||||
|
||||
if success:
|
||||
self.requests_success.inc()
|
||||
if agent in self._agent_metrics:
|
||||
self._agent_metrics[agent]["success"] += 1
|
||||
else:
|
||||
self.requests_failed.inc()
|
||||
if agent in self._agent_metrics:
|
||||
self._agent_metrics[agent]["failed"] += 1
|
||||
|
||||
def record_llm_call(
|
||||
self,
|
||||
provider: str,
|
||||
model: str,
|
||||
tokens: int | None,
|
||||
duration_seconds: float,
|
||||
):
|
||||
"""Record an LLM API call.
|
||||
|
||||
Args:
|
||||
provider: LLM provider name.
|
||||
model: Model used.
|
||||
tokens: Tokens consumed.
|
||||
duration_seconds: Call duration.
|
||||
"""
|
||||
if not self.enabled:
|
||||
return
|
||||
|
||||
self.llm_calls_total.inc()
|
||||
self.llm_duration.observe(duration_seconds)
|
||||
if tokens:
|
||||
self.llm_tokens_total.inc(tokens)
|
||||
|
||||
def record_comment_posted(self):
|
||||
"""Record a comment being posted."""
|
||||
if self.enabled:
|
||||
self.comments_posted.inc()
|
||||
|
||||
def record_labels_applied(self, count: int = 1):
|
||||
"""Record labels being applied."""
|
||||
if self.enabled:
|
||||
self.labels_applied.inc(count)
|
||||
|
||||
def record_security_finding(self, severity: str):
|
||||
"""Record a security finding."""
|
||||
if self.enabled:
|
||||
self.security_findings.inc()
|
||||
|
||||
def get_summary(self) -> dict:
|
||||
"""Get a summary of all metrics.
|
||||
|
||||
Returns:
|
||||
Dictionary with metric summaries.
|
||||
"""
|
||||
uptime = time.time() - self._start_time
|
||||
|
||||
return {
|
||||
"uptime_seconds": uptime,
|
||||
"requests": {
|
||||
"total": self.requests_total.value,
|
||||
"success": self.requests_success.value,
|
||||
"failed": self.requests_failed.value,
|
||||
"active": self.active_requests.value,
|
||||
"success_rate": (
|
||||
self.requests_success.value / max(self.requests_total.value, 1)
|
||||
),
|
||||
},
|
||||
"llm": {
|
||||
"calls": self.llm_calls_total.value,
|
||||
"tokens": self.llm_tokens_total.value,
|
||||
"avg_duration_ms": (
|
||||
(self.llm_duration.sum / max(self.llm_duration.count, 1)) * 1000
|
||||
),
|
||||
"p50_duration_ms": self.llm_duration.get_percentile(50) * 1000,
|
||||
"p95_duration_ms": self.llm_duration.get_percentile(95) * 1000,
|
||||
},
|
||||
"actions": {
|
||||
"comments_posted": self.comments_posted.value,
|
||||
"labels_applied": self.labels_applied.value,
|
||||
"security_findings": self.security_findings.value,
|
||||
},
|
||||
"latency": {
|
||||
"avg_ms": (
|
||||
(self.request_duration.sum / max(self.request_duration.count, 1))
|
||||
* 1000
|
||||
),
|
||||
"p50_ms": self.request_duration.get_percentile(50) * 1000,
|
||||
"p95_ms": self.request_duration.get_percentile(95) * 1000,
|
||||
"p99_ms": self.request_duration.get_percentile(99) * 1000,
|
||||
},
|
||||
"by_agent": self._agent_metrics,
|
||||
}
|
||||
|
||||
def export_prometheus(self) -> str:
|
||||
"""Export metrics in Prometheus format.
|
||||
|
||||
Returns:
|
||||
Prometheus-formatted metrics string.
|
||||
"""
|
||||
lines = []
|
||||
|
||||
def add_metric(name: str, value: float, help_text: str = ""):
|
||||
if help_text:
|
||||
lines.append(f"# HELP {name} {help_text}")
|
||||
lines.append(f"{name} {value}")
|
||||
|
||||
add_metric(
|
||||
"ai_review_requests_total",
|
||||
self.requests_total.value,
|
||||
"Total review requests",
|
||||
)
|
||||
add_metric(
|
||||
"ai_review_requests_success_total",
|
||||
self.requests_success.value,
|
||||
"Successful requests",
|
||||
)
|
||||
add_metric(
|
||||
"ai_review_requests_failed_total",
|
||||
self.requests_failed.value,
|
||||
"Failed requests",
|
||||
)
|
||||
add_metric(
|
||||
"ai_review_llm_calls_total",
|
||||
self.llm_calls_total.value,
|
||||
"Total LLM calls",
|
||||
)
|
||||
add_metric(
|
||||
"ai_review_llm_tokens_total",
|
||||
self.llm_tokens_total.value,
|
||||
"Total LLM tokens",
|
||||
)
|
||||
add_metric(
|
||||
"ai_review_comments_posted_total",
|
||||
self.comments_posted.value,
|
||||
"Comments posted",
|
||||
)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# Global instance
|
||||
_metrics: MetricsCollector | None = None
|
||||
|
||||
|
||||
def get_metrics() -> MetricsCollector:
|
||||
"""Get the global metrics collector instance."""
|
||||
global _metrics
|
||||
if _metrics is None:
|
||||
_metrics = MetricsCollector()
|
||||
return _metrics
|
||||
350
tools/ai-review/main.py
Normal file
350
tools/ai-review/main.py
Normal file
@@ -0,0 +1,350 @@
|
||||
#!/usr/bin/env python3
|
||||
"""AI Code Review Agent - Main Entry Point
|
||||
|
||||
This is the main CLI for running AI code review agents.
|
||||
Can be invoked directly or through CI/CD workflows.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
import yaml
|
||||
|
||||
# Add the package to path
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from agents.issue_agent import IssueAgent
|
||||
from agents.pr_agent import PRAgent
|
||||
from agents.codebase_agent import CodebaseAgent
|
||||
from agents.chat_agent import ChatAgent
|
||||
from dispatcher import Dispatcher, get_dispatcher
|
||||
|
||||
|
||||
def setup_logging(verbose: bool = False):
|
||||
"""Configure logging."""
|
||||
level = logging.DEBUG if verbose else logging.INFO
|
||||
logging.basicConfig(
|
||||
level=level,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
)
|
||||
|
||||
|
||||
def load_config(config_path: str | None = None) -> dict:
|
||||
"""Load configuration from file."""
|
||||
if config_path and os.path.exists(config_path):
|
||||
with open(config_path) as f:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
default_path = os.path.join(os.path.dirname(__file__), "config.yml")
|
||||
if os.path.exists(default_path):
|
||||
with open(default_path) as f:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
def run_pr_review(args, config: dict):
|
||||
"""Run PR review agent."""
|
||||
from agents.base_agent import AgentContext
|
||||
|
||||
agent = PRAgent(config=config)
|
||||
|
||||
# Build context from environment or arguments
|
||||
owner, repo = args.repo.split("/")
|
||||
pr_number = args.pr_number
|
||||
|
||||
context = AgentContext(
|
||||
owner=owner,
|
||||
repo=repo,
|
||||
event_type="pull_request",
|
||||
event_data={
|
||||
"action": "opened",
|
||||
"pull_request": {
|
||||
"number": pr_number,
|
||||
"title": args.title or f"PR #{pr_number}",
|
||||
},
|
||||
},
|
||||
config=config,
|
||||
)
|
||||
|
||||
result = agent.run(context)
|
||||
|
||||
if result.success:
|
||||
print(f"✅ PR Review Complete: {result.message}")
|
||||
print(f" Actions: {', '.join(result.actions_taken)}")
|
||||
else:
|
||||
print(f"❌ PR Review Failed: {result.message}")
|
||||
if result.error:
|
||||
print(f" Error: {result.error}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def run_issue_triage(args, config: dict):
|
||||
"""Run issue triage agent."""
|
||||
from agents.base_agent import AgentContext
|
||||
from clients.gitea_client import GiteaClient
|
||||
|
||||
agent = IssueAgent(config=config)
|
||||
|
||||
owner, repo = args.repo.split("/")
|
||||
issue_number = args.issue_number
|
||||
|
||||
# Fetch the actual issue data from Gitea API to get the complete body
|
||||
gitea = GiteaClient()
|
||||
|
||||
try:
|
||||
issue_data = gitea.get_issue(owner, repo, issue_number)
|
||||
except Exception as e:
|
||||
print(f"❌ Failed to fetch issue: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
context = AgentContext(
|
||||
owner=owner,
|
||||
repo=repo,
|
||||
event_type="issues",
|
||||
event_data={
|
||||
"action": "opened",
|
||||
"issue": issue_data,
|
||||
},
|
||||
config=config,
|
||||
)
|
||||
|
||||
result = agent.run(context)
|
||||
|
||||
if result.success:
|
||||
print(f"✅ Issue Triage Complete: {result.message}")
|
||||
print(f" Actions: {', '.join(result.actions_taken)}")
|
||||
else:
|
||||
print(f"❌ Issue Triage Failed: {result.message}")
|
||||
if result.error:
|
||||
print(f" Error: {result.error}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def run_issue_comment(args, config: dict):
|
||||
"""Handle @ai-bot command in issue comment."""
|
||||
from agents.base_agent import AgentContext
|
||||
|
||||
agent = IssueAgent(config=config)
|
||||
|
||||
owner, repo = args.repo.split("/")
|
||||
issue_number = args.issue_number
|
||||
|
||||
# Fetch the actual issue data from Gitea API
|
||||
from clients.gitea_client import GiteaClient
|
||||
gitea = GiteaClient()
|
||||
|
||||
try:
|
||||
issue_data = gitea.get_issue(owner, repo, issue_number)
|
||||
except Exception as e:
|
||||
print(f"❌ Failed to fetch issue: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
context = AgentContext(
|
||||
owner=owner,
|
||||
repo=repo,
|
||||
event_type="issue_comment",
|
||||
event_data={
|
||||
"action": "created",
|
||||
"issue": issue_data,
|
||||
"comment": {
|
||||
"body": args.comment_body,
|
||||
},
|
||||
},
|
||||
config=config,
|
||||
)
|
||||
|
||||
result = agent.run(context)
|
||||
|
||||
if result.success:
|
||||
print(f"✅ Comment Response Complete: {result.message}")
|
||||
print(f" Actions: {', '.join(result.actions_taken)}")
|
||||
else:
|
||||
print(f"❌ Comment Response Failed: {result.message}")
|
||||
if result.error:
|
||||
print(f" Error: {result.error}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def run_codebase_analysis(args, config: dict):
|
||||
"""Run codebase analysis agent."""
|
||||
from agents.base_agent import AgentContext
|
||||
|
||||
agent = CodebaseAgent(config=config)
|
||||
|
||||
owner, repo = args.repo.split("/")
|
||||
|
||||
context = AgentContext(
|
||||
owner=owner,
|
||||
repo=repo,
|
||||
event_type="workflow_dispatch",
|
||||
event_data={},
|
||||
config=config,
|
||||
)
|
||||
|
||||
result = agent.run(context)
|
||||
|
||||
if result.success:
|
||||
print(f"✅ Codebase Analysis Complete: {result.message}")
|
||||
print(f" Health Score: {result.data.get('health_score', 'N/A')}")
|
||||
print(f" Actions: {', '.join(result.actions_taken)}")
|
||||
else:
|
||||
print(f"❌ Codebase Analysis Failed: {result.message}")
|
||||
if result.error:
|
||||
print(f" Error: {result.error}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def run_chat(args, config: dict):
|
||||
"""Run interactive chat with the Bartender bot."""
|
||||
from agents.base_agent import AgentContext
|
||||
from clients.gitea_client import GiteaClient
|
||||
|
||||
agent = ChatAgent(config=config)
|
||||
|
||||
owner, repo = args.repo.split("/")
|
||||
|
||||
# Build context
|
||||
event_data = {"message": args.message}
|
||||
|
||||
# If issue number provided, add issue context
|
||||
if args.issue_number:
|
||||
gitea = GiteaClient()
|
||||
try:
|
||||
issue_data = gitea.get_issue(owner, repo, args.issue_number)
|
||||
event_data["issue"] = issue_data
|
||||
event_data["issue_number"] = args.issue_number
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not fetch issue #{args.issue_number}: {e}")
|
||||
|
||||
context = AgentContext(
|
||||
owner=owner,
|
||||
repo=repo,
|
||||
event_type="chat",
|
||||
event_data=event_data,
|
||||
config=config,
|
||||
)
|
||||
|
||||
result = agent.run(context)
|
||||
|
||||
if result.success:
|
||||
print(f"\n🍸 Bartender says:\n")
|
||||
print(result.data.get("response", ""))
|
||||
print()
|
||||
if result.data.get("tools_used"):
|
||||
print(f" [Tools used: {', '.join(result.data['tools_used'])}]")
|
||||
else:
|
||||
print(f"❌ Chat Failed: {result.message}")
|
||||
if result.error:
|
||||
print(f" Error: {result.error}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def run_webhook_dispatch(args, config: dict):
|
||||
"""Dispatch a webhook event."""
|
||||
dispatcher = get_dispatcher()
|
||||
|
||||
# Register all agents
|
||||
dispatcher.register_agent(IssueAgent(config=config))
|
||||
dispatcher.register_agent(PRAgent(config=config))
|
||||
dispatcher.register_agent(CodebaseAgent(config=config))
|
||||
dispatcher.register_agent(ChatAgent(config=config))
|
||||
|
||||
# Parse event data
|
||||
event_data = json.loads(args.event_data)
|
||||
owner, repo = args.repo.split("/")
|
||||
|
||||
result = dispatcher.dispatch(
|
||||
event_type=args.event_type,
|
||||
event_data=event_data,
|
||||
owner=owner,
|
||||
repo=repo,
|
||||
)
|
||||
|
||||
print(f"Dispatched event: {result.event_type}")
|
||||
print(f"Agents run: {result.agents_run}")
|
||||
for i, agent_result in enumerate(result.results):
|
||||
status = "✅" if agent_result.success else "❌"
|
||||
print(f" {status} {result.agents_run[i]}: {agent_result.message}")
|
||||
|
||||
if result.errors:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="AI Code Review Agent",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
|
||||
parser.add_argument("-c", "--config", help="Path to config file")
|
||||
|
||||
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
||||
|
||||
# PR review command
|
||||
pr_parser = subparsers.add_parser("pr", help="Review a pull request")
|
||||
pr_parser.add_argument("repo", help="Repository (owner/repo)")
|
||||
pr_parser.add_argument("pr_number", type=int, help="PR number")
|
||||
pr_parser.add_argument("--title", help="PR title (optional)")
|
||||
|
||||
# Issue triage command
|
||||
issue_parser = subparsers.add_parser("issue", help="Triage an issue")
|
||||
issue_parser.add_argument("repo", help="Repository (owner/repo)")
|
||||
issue_parser.add_argument("issue_number", type=int, help="Issue number")
|
||||
issue_parser.add_argument("--title", help="Issue title")
|
||||
issue_parser.add_argument("--body", help="Issue body")
|
||||
|
||||
# Issue comment command (for @ai-bot mentions)
|
||||
comment_parser = subparsers.add_parser("comment", help="Respond to @ai-bot command")
|
||||
comment_parser.add_argument("repo", help="Repository (owner/repo)")
|
||||
comment_parser.add_argument("issue_number", type=int, help="Issue number")
|
||||
comment_parser.add_argument("comment_body", help="Comment body with @ai-bot command")
|
||||
|
||||
# Codebase analysis command
|
||||
codebase_parser = subparsers.add_parser("codebase", help="Analyze codebase")
|
||||
codebase_parser.add_argument("repo", help="Repository (owner/repo)")
|
||||
|
||||
# Chat command (Bartender)
|
||||
chat_parser = subparsers.add_parser("chat", help="Chat with Bartender bot")
|
||||
chat_parser.add_argument("repo", help="Repository (owner/repo)")
|
||||
chat_parser.add_argument("message", help="Message to send to Bartender")
|
||||
chat_parser.add_argument(
|
||||
"--issue", dest="issue_number", type=int,
|
||||
help="Optional issue number to post response to"
|
||||
)
|
||||
|
||||
# Webhook dispatch command
|
||||
webhook_parser = subparsers.add_parser("dispatch", help="Dispatch webhook event")
|
||||
webhook_parser.add_argument("repo", help="Repository (owner/repo)")
|
||||
webhook_parser.add_argument("event_type", help="Event type")
|
||||
webhook_parser.add_argument("event_data", help="Event data (JSON)")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.command:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
setup_logging(args.verbose)
|
||||
config = load_config(args.config)
|
||||
|
||||
if args.command == "pr":
|
||||
run_pr_review(args, config)
|
||||
elif args.command == "issue":
|
||||
run_issue_triage(args, config)
|
||||
elif args.command == "comment":
|
||||
run_issue_comment(args, config)
|
||||
elif args.command == "codebase":
|
||||
run_codebase_analysis(args, config)
|
||||
elif args.command == "chat":
|
||||
run_chat(args, config)
|
||||
elif args.command == "dispatch":
|
||||
run_webhook_dispatch(args, config)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
64
tools/ai-review/prompts/base.md
Normal file
64
tools/ai-review/prompts/base.md
Normal file
@@ -0,0 +1,64 @@
|
||||
You are an experienced senior software engineer with deep expertise in:
|
||||
- Secure coding and security analysis
|
||||
- System design and architecture
|
||||
- Performance optimization
|
||||
- Maintainable, readable code
|
||||
- Test coverage and documentation
|
||||
- CI/CD pipeline best practices
|
||||
|
||||
You are reviewing the following **pull request diff**. Your goal is to provide a **comprehensive, actionable, and clear review** as a structured JSON response.
|
||||
|
||||
---
|
||||
|
||||
## Requirements
|
||||
|
||||
Review the diff and identify issues in these categories:
|
||||
- **Security**: Vulnerabilities, hardcoded secrets, injection risks
|
||||
- **Correctness**: Logic errors, edge cases, bugs
|
||||
- **Performance**: Inefficiencies, N+1 queries, memory issues
|
||||
- **Maintainability**: Code complexity, duplication, unclear logic
|
||||
- **Readability**: Naming, formatting, documentation
|
||||
- **Testing**: Missing tests, untested paths
|
||||
- **Architecture**: Design issues, coupling, separation of concerns
|
||||
|
||||
---
|
||||
|
||||
## Output Format
|
||||
|
||||
Return a JSON object with this structure:
|
||||
|
||||
```json
|
||||
{{
|
||||
"summary": "Brief overall assessment of the PR",
|
||||
"overall_severity": "HIGH" | "MEDIUM" | "LOW",
|
||||
"approval": true | false,
|
||||
"issues": [
|
||||
{{
|
||||
"file": "path/to/file.py",
|
||||
"line": 42,
|
||||
"severity": "HIGH" | "MEDIUM" | "LOW",
|
||||
"category": "Security" | "Correctness" | "Performance" | "Maintainability" | "Readability" | "Testing" | "Architecture",
|
||||
"description": "Clear description of the issue",
|
||||
"recommendation": "Specific fix or improvement",
|
||||
"code_snippet": "relevant code if applicable"
|
||||
}}
|
||||
]
|
||||
}}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Rules
|
||||
|
||||
1. **Be specific**: Include file paths and line numbers when possible
|
||||
2. **Be actionable**: Every issue must have a clear recommendation
|
||||
3. **Prioritize**: HIGH severity for security/data-loss issues, MEDIUM for bugs, LOW for style
|
||||
4. **Be honest**: If uncertain, note it in the description
|
||||
5. **Stay focused**: Only report real issues, not style preferences
|
||||
6. Set `approval: false` if any HIGH severity issues exist
|
||||
7. Output ONLY valid JSON, no additional text
|
||||
|
||||
---
|
||||
|
||||
## Diff to Review
|
||||
|
||||
63
tools/ai-review/prompts/issue_response.md
Normal file
63
tools/ai-review/prompts/issue_response.md
Normal file
@@ -0,0 +1,63 @@
|
||||
You are a helpful AI assistant responding to a GitHub/Gitea issue. Your goal is to provide a helpful, professional response that assists the issue author.
|
||||
|
||||
## Context
|
||||
|
||||
**Issue Type:** {issue_type}
|
||||
**Priority:** {priority}
|
||||
**Title:** {title}
|
||||
**Body:**
|
||||
{body}
|
||||
|
||||
## Triage Analysis
|
||||
{triage_analysis}
|
||||
|
||||
## Your Task
|
||||
|
||||
Generate a helpful comment response based on the issue type:
|
||||
|
||||
### For Bug Reports:
|
||||
1. Acknowledge the issue
|
||||
2. If missing info, politely request specific details needed
|
||||
3. Suggest any immediate workarounds if obvious
|
||||
4. Indicate next steps (investigation, need reproduction, etc.)
|
||||
|
||||
### For Feature Requests:
|
||||
1. Thank the user for the suggestion
|
||||
2. Summarize understanding of the request
|
||||
3. Ask clarifying questions if needed
|
||||
4. Note any related existing features
|
||||
|
||||
### For Questions:
|
||||
1. Directly answer the question if possible
|
||||
2. Link to relevant documentation
|
||||
3. Provide code examples if helpful
|
||||
4. Suggest alternatives if applicable
|
||||
|
||||
### For Documentation Issues:
|
||||
1. Acknowledge the gap/issue
|
||||
2. Clarify the correct information if known
|
||||
3. Note what documentation updates are needed
|
||||
|
||||
## Response Guidelines
|
||||
|
||||
1. Be concise but thorough
|
||||
2. Use a friendly, professional tone
|
||||
3. Format with Markdown appropriately
|
||||
4. Include code blocks where relevant
|
||||
5. DO NOT promise timelines or fixes
|
||||
6. DO NOT make up information - say "I'm not certain" if unsure
|
||||
7. Always end with an offer to help further
|
||||
|
||||
## Output Format
|
||||
|
||||
Return a JSON object:
|
||||
```json
|
||||
{{
|
||||
"comment": "Your markdown-formatted response here",
|
||||
"needs_human_review": true/false,
|
||||
"suggested_assignee": null or "username",
|
||||
"follow_up_questions": ["question1", "question2"]
|
||||
}}
|
||||
```
|
||||
|
||||
Generate your response:
|
||||
69
tools/ai-review/prompts/issue_triage.md
Normal file
69
tools/ai-review/prompts/issue_triage.md
Normal file
@@ -0,0 +1,69 @@
|
||||
You are an expert issue triage specialist. Analyze the following GitHub/Gitea issue and provide a structured classification.
|
||||
|
||||
## Your Task
|
||||
|
||||
Analyze the issue and return a JSON object with the following structure:
|
||||
|
||||
```json
|
||||
{{
|
||||
"type": "bug" | "feature" | "question" | "documentation" | "support" | "enhancement",
|
||||
"priority": "high" | "medium" | "low",
|
||||
"confidence": 0.0-1.0,
|
||||
"summary": "Brief one-line summary of the issue",
|
||||
"suggested_labels": ["label1", "label2"],
|
||||
"is_duplicate": false,
|
||||
"duplicate_of": null,
|
||||
"needs_more_info": false,
|
||||
"missing_info": [],
|
||||
"components": ["component1", "component2"],
|
||||
"reasoning": "Brief explanation of your classification"
|
||||
}}
|
||||
```
|
||||
|
||||
## Classification Guidelines
|
||||
|
||||
### Type Classification
|
||||
- **bug**: Something is broken, not working as expected, error messages, crashes
|
||||
- **feature**: Request for new functionality that doesn't exist
|
||||
- **enhancement**: Improvement to existing functionality
|
||||
- **question**: User asking how to do something, seeking clarification
|
||||
- **documentation**: Issues with docs, missing docs, unclear docs
|
||||
- **support**: General help request, troubleshooting
|
||||
|
||||
### Priority Classification
|
||||
- **high**: Security issues, data loss, complete feature broken, blocking issues
|
||||
- **medium**: Significant functionality impacted, workaround exists
|
||||
- **low**: Minor issues, cosmetic, nice-to-have improvements
|
||||
|
||||
### Missing Information Indicators
|
||||
Look for missing:
|
||||
- Steps to reproduce (for bugs)
|
||||
- Expected vs actual behavior
|
||||
- Environment details (OS, version, etc.)
|
||||
- Error messages or logs
|
||||
- Screenshots (for UI issues)
|
||||
|
||||
## Important Rules
|
||||
|
||||
1. Be conservative with "high" priority - use it sparingly
|
||||
2. If uncertain between two types, choose the more actionable one
|
||||
3. Always provide reasoning for your classification
|
||||
4. Set confidence lower if the issue is vague or ambiguous
|
||||
5. Output ONLY valid JSON, no additional text
|
||||
|
||||
---
|
||||
|
||||
## Issue to Analyze
|
||||
|
||||
**Title:** {title}
|
||||
|
||||
**Body:**
|
||||
{body}
|
||||
|
||||
**Author:** {author}
|
||||
|
||||
**Labels (if any):** {existing_labels}
|
||||
|
||||
---
|
||||
|
||||
Provide your JSON classification:
|
||||
17
tools/ai-review/requirements.txt
Normal file
17
tools/ai-review/requirements.txt
Normal file
@@ -0,0 +1,17 @@
|
||||
# AI Code Review - Python Dependencies
|
||||
|
||||
# Core dependencies
|
||||
requests>=2.31.0
|
||||
pyyaml>=6.0
|
||||
|
||||
# Optional: For webhook server mode
|
||||
# flask>=3.0.0
|
||||
|
||||
# Optional: For async operations
|
||||
# aiohttp>=3.9.0
|
||||
|
||||
# Development dependencies (install with pip install -e .[dev])
|
||||
# pytest>=7.4.0
|
||||
# pytest-mock>=3.12.0
|
||||
# black>=24.0.0
|
||||
# mypy>=1.8.0
|
||||
174
tools/ai-review/review.py
Normal file
174
tools/ai-review/review.py
Normal file
@@ -0,0 +1,174 @@
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
import requests
|
||||
import yaml
|
||||
from comment import to_markdown
|
||||
|
||||
ROOT = os.path.dirname(__file__)
|
||||
CFG = yaml.safe_load(open(f"{ROOT}/config.yml"))
|
||||
|
||||
# Marker to identify the AI comment
|
||||
AI_MARKER = "<!-- AI_CODE_REVIEW -->"
|
||||
|
||||
# Disclaimer text to prepend
|
||||
AI_DISCLAIMER = (
|
||||
"**Note:** This review was generated by an AI assistant. "
|
||||
"While it aims to be accurate and helpful, it may contain mistakes "
|
||||
"or miss important issues. Please verify all findings before taking action."
|
||||
)
|
||||
|
||||
# -------------------------------
|
||||
# Helper functions
|
||||
# -------------------------------
|
||||
|
||||
|
||||
def get_diff() -> str:
|
||||
"""Get git diff against main branch, limited by config"""
|
||||
diff = subprocess.check_output(["git", "diff", "origin/main...HEAD"], text=True)
|
||||
lines = diff.splitlines()
|
||||
if len(lines) > CFG["review"]["max_diff_lines"]:
|
||||
return "\n".join(lines[: CFG["review"]["max_diff_lines"]])
|
||||
return diff
|
||||
|
||||
|
||||
def build_prompt(diff: str) -> str:
|
||||
"""Prepare the AI prompt with the diff"""
|
||||
base = open(f"{ROOT}/prompts/base.md").read()
|
||||
return f"{base}\n\nDIFF:\n{diff}"
|
||||
|
||||
|
||||
def call_llm(prompt: str) -> str:
|
||||
"""Call the configured LLM provider"""
|
||||
provider = CFG["provider"]
|
||||
|
||||
if provider == "openai":
|
||||
r = requests.post(
|
||||
"https://api.openai.com/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": CFG["model"]["openai"],
|
||||
"temperature": CFG["temperature"],
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
},
|
||||
timeout=60,
|
||||
)
|
||||
return r.json()["choices"][0]["message"]["content"]
|
||||
|
||||
if provider == "openrouter":
|
||||
r = requests.post(
|
||||
"https://openrouter.ai/api/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {os.environ['OPENROUTER_API_KEY']}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": CFG["model"]["openrouter"],
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
},
|
||||
timeout=60,
|
||||
)
|
||||
return r.json()["choices"][0]["message"]["content"]
|
||||
|
||||
if provider == "ollama":
|
||||
r = requests.post(
|
||||
f"{os.environ['OLLAMA_HOST']}/api/generate",
|
||||
json={
|
||||
"model": CFG["model"]["ollama"],
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
},
|
||||
timeout=120,
|
||||
)
|
||||
return r.json()["response"]
|
||||
|
||||
raise RuntimeError("Unknown provider")
|
||||
|
||||
|
||||
# -------------------------------
|
||||
# Gitea PR comment functions
|
||||
# -------------------------------
|
||||
|
||||
|
||||
def find_existing_comment() -> int | None:
|
||||
"""Find existing AI review comment in the PR"""
|
||||
url = (
|
||||
f"{os.environ['AI_REVIEW_API_URL']}/repos/"
|
||||
f"{os.environ['AI_REVIEW_REPO']}/issues/"
|
||||
f"{os.environ['AI_REVIEW_PR_NUMBER']}/comments"
|
||||
)
|
||||
|
||||
r = requests.get(
|
||||
url,
|
||||
headers={"Authorization": f"token {os.environ['AI_REVIEW_TOKEN']}"},
|
||||
timeout=15,
|
||||
)
|
||||
|
||||
for c in r.json():
|
||||
if AI_MARKER in c["body"]:
|
||||
return c["id"]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def upsert_pr_comment(markdown: str):
|
||||
"""Create or update the PR comment"""
|
||||
comment_id = find_existing_comment()
|
||||
headers = {
|
||||
"Authorization": f"token {os.environ['AI_REVIEW_TOKEN']}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
if comment_id:
|
||||
url = (
|
||||
f"{os.environ['AI_REVIEW_API_URL']}/repos/"
|
||||
f"{os.environ['AI_REVIEW_REPO']}/issues/comments/{comment_id}"
|
||||
)
|
||||
r = requests.patch(url, headers=headers, json={"body": markdown})
|
||||
else:
|
||||
url = (
|
||||
f"{os.environ['AI_REVIEW_API_URL']}/repos/"
|
||||
f"{os.environ['AI_REVIEW_REPO']}/issues/"
|
||||
f"{os.environ['AI_REVIEW_PR_NUMBER']}/comments"
|
||||
)
|
||||
r = requests.post(url, headers=headers, json={"body": markdown})
|
||||
|
||||
if r.status_code not in (200, 201):
|
||||
raise RuntimeError(f"Failed to upsert PR comment: {r.text}")
|
||||
|
||||
|
||||
# -------------------------------
|
||||
# Main workflow
|
||||
# -------------------------------
|
||||
|
||||
|
||||
def main():
|
||||
diff = get_diff()
|
||||
if not diff.strip():
|
||||
sys.exit(0)
|
||||
|
||||
raw = call_llm(build_prompt(diff))
|
||||
result = json.loads(raw)
|
||||
|
||||
# Convert JSON review to Markdown
|
||||
markdown = to_markdown(result)
|
||||
|
||||
# Prepend AI disclaimer and marker
|
||||
full_comment = AI_DISCLAIMER + "\n\n" + AI_MARKER + "\n" + markdown
|
||||
|
||||
upsert_pr_comment(full_comment)
|
||||
|
||||
# Fail CI if severity is HIGH
|
||||
if result["overall_severity"] == CFG["review"][
|
||||
"fail_on_severity"
|
||||
] and not result.get("approval", False):
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
9
tools/ai-review/security/__init__.py
Normal file
9
tools/ai-review/security/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""Security Scanning Package
|
||||
|
||||
This package contains security scanning utilities for
|
||||
detecting vulnerabilities in code.
|
||||
"""
|
||||
|
||||
from security.security_scanner import SecurityScanner
|
||||
|
||||
__all__ = ["SecurityScanner"]
|
||||
335
tools/ai-review/security/security_scanner.py
Normal file
335
tools/ai-review/security/security_scanner.py
Normal file
@@ -0,0 +1,335 @@
|
||||
"""Security Scanner
|
||||
|
||||
Pattern-based security vulnerability detection for code analysis.
|
||||
Covers OWASP Top 10 and common security anti-patterns.
|
||||
"""
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Iterator
|
||||
|
||||
import yaml
|
||||
import os
|
||||
|
||||
|
||||
@dataclass
|
||||
class SecurityFinding:
|
||||
"""A single security finding."""
|
||||
|
||||
rule_id: str
|
||||
rule_name: str
|
||||
severity: str # HIGH, MEDIUM, LOW
|
||||
category: str # OWASP category
|
||||
file: str
|
||||
line: int
|
||||
code_snippet: str
|
||||
description: str
|
||||
recommendation: str
|
||||
cwe: str | None = None # CWE reference
|
||||
|
||||
|
||||
class SecurityScanner:
|
||||
"""Security scanner using pattern matching and rules."""
|
||||
|
||||
# Default rules covering OWASP Top 10
|
||||
DEFAULT_RULES = [
|
||||
# A01:2021 – Broken Access Control
|
||||
{
|
||||
"id": "SEC001",
|
||||
"name": "Hardcoded Credentials",
|
||||
"pattern": r'(?i)(password|passwd|pwd|secret|api_key|apikey|token|auth_token)\s*[=:]\s*["\'][^"\']{4,}["\']',
|
||||
"severity": "HIGH",
|
||||
"category": "A01:2021 Broken Access Control",
|
||||
"cwe": "CWE-798",
|
||||
"description": "Hardcoded credentials detected in source code",
|
||||
"recommendation": "Use environment variables or a secrets management system",
|
||||
},
|
||||
{
|
||||
"id": "SEC002",
|
||||
"name": "Exposed Private Key",
|
||||
"pattern": r"-----BEGIN\s+(RSA\s+)?PRIVATE\s+KEY-----",
|
||||
"severity": "HIGH",
|
||||
"category": "A01:2021 Broken Access Control",
|
||||
"cwe": "CWE-321",
|
||||
"description": "Private key embedded in source code",
|
||||
"recommendation": "Never commit private keys. Use secure key management",
|
||||
},
|
||||
# A02:2021 – Cryptographic Failures
|
||||
{
|
||||
"id": "SEC003",
|
||||
"name": "Weak Crypto Algorithm",
|
||||
"pattern": r"(?i)\b(md5|sha1)\s*\(",
|
||||
"severity": "MEDIUM",
|
||||
"category": "A02:2021 Cryptographic Failures",
|
||||
"cwe": "CWE-328",
|
||||
"description": "Use of weak cryptographic hash function",
|
||||
"recommendation": "Use SHA-256 or stronger hashing algorithms",
|
||||
},
|
||||
{
|
||||
"id": "SEC004",
|
||||
"name": "Insecure Random",
|
||||
"pattern": r"(?i)\brandom\.(random|randint|choice|randrange)\s*\(",
|
||||
"severity": "MEDIUM",
|
||||
"category": "A02:2021 Cryptographic Failures",
|
||||
"cwe": "CWE-330",
|
||||
"description": "Use of non-cryptographic random number generator for security purposes",
|
||||
"recommendation": "Use secrets module or os.urandom() for security-critical randomness",
|
||||
},
|
||||
# A03:2021 – Injection
|
||||
{
|
||||
"id": "SEC005",
|
||||
"name": "SQL Injection",
|
||||
"pattern": r'(?i)(execute|query|cursor\.execute)\s*\([^)]*(%s|%d|\{|\+)[^)]*\)',
|
||||
"severity": "HIGH",
|
||||
"category": "A03:2021 Injection",
|
||||
"cwe": "CWE-89",
|
||||
"description": "Potential SQL injection through string formatting",
|
||||
"recommendation": "Use parameterized queries with placeholders",
|
||||
},
|
||||
{
|
||||
"id": "SEC006",
|
||||
"name": "Command Injection",
|
||||
"pattern": r"(?i)(os\.system|subprocess\.call|subprocess\.run)\s*\([^)]*(\+|format|%)[^)]*\)",
|
||||
"severity": "HIGH",
|
||||
"category": "A03:2021 Injection",
|
||||
"cwe": "CWE-78",
|
||||
"description": "Potential command injection through string concatenation",
|
||||
"recommendation": "Use subprocess with shell=False and pass arguments as list",
|
||||
},
|
||||
{
|
||||
"id": "SEC007",
|
||||
"name": "Eval Usage",
|
||||
"pattern": r"\beval\s*\(",
|
||||
"severity": "HIGH",
|
||||
"category": "A03:2021 Injection",
|
||||
"cwe": "CWE-95",
|
||||
"description": "Use of eval() can lead to code injection",
|
||||
"recommendation": "Avoid eval(). Use ast.literal_eval() for data or safer alternatives",
|
||||
},
|
||||
{
|
||||
"id": "SEC008",
|
||||
"name": "XSS Risk",
|
||||
"pattern": r'(?i)(innerHTML|outerHTML|document\.write)\s*=',
|
||||
"severity": "MEDIUM",
|
||||
"category": "A03:2021 Injection",
|
||||
"cwe": "CWE-79",
|
||||
"description": "Direct DOM manipulation may allow XSS",
|
||||
"recommendation": "Use textContent or proper sanitization libraries",
|
||||
},
|
||||
# A04:2021 – Insecure Design
|
||||
{
|
||||
"id": "SEC009",
|
||||
"name": "Debug Mode",
|
||||
"pattern": r"(?i)(debug\s*=\s*true|DEBUG\s*=\s*True|\.setLevel\(.*DEBUG\))",
|
||||
"severity": "MEDIUM",
|
||||
"category": "A04:2021 Insecure Design",
|
||||
"cwe": "CWE-489",
|
||||
"description": "Debug mode enabled in code",
|
||||
"recommendation": "Ensure debug mode is disabled in production",
|
||||
},
|
||||
# A05:2021 – Security Misconfiguration
|
||||
{
|
||||
"id": "SEC010",
|
||||
"name": "CORS Wildcard",
|
||||
"pattern": r'(?i)(access-control-allow-origin|cors.*origin)\s*[=:]\s*["\']?\*',
|
||||
"severity": "MEDIUM",
|
||||
"category": "A05:2021 Security Misconfiguration",
|
||||
"cwe": "CWE-942",
|
||||
"description": "CORS configured to allow all origins",
|
||||
"recommendation": "Specify allowed origins explicitly",
|
||||
},
|
||||
{
|
||||
"id": "SEC011",
|
||||
"name": "SSL Verification Disabled",
|
||||
"pattern": r"(?i)(verify\s*=\s*False|CERT_NONE|ssl\._create_unverified_context)",
|
||||
"severity": "HIGH",
|
||||
"category": "A05:2021 Security Misconfiguration",
|
||||
"cwe": "CWE-295",
|
||||
"description": "SSL certificate verification disabled",
|
||||
"recommendation": "Always verify SSL certificates in production",
|
||||
},
|
||||
# A07:2021 – Identification and Authentication Failures
|
||||
{
|
||||
"id": "SEC012",
|
||||
"name": "Hardcoded JWT Secret",
|
||||
"pattern": r'(?i)(jwt|token).*secret\s*[=:]\s*["\'][^"\']+["\']',
|
||||
"severity": "HIGH",
|
||||
"category": "A07:2021 Authentication Failures",
|
||||
"cwe": "CWE-798",
|
||||
"description": "JWT secret hardcoded in source code",
|
||||
"recommendation": "Use environment variables for JWT secrets",
|
||||
},
|
||||
# A08:2021 – Software and Data Integrity Failures
|
||||
{
|
||||
"id": "SEC013",
|
||||
"name": "Pickle Usage",
|
||||
"pattern": r"(?i)pickle\.(loads?|dumps?)\s*\(",
|
||||
"severity": "MEDIUM",
|
||||
"category": "A08:2021 Integrity Failures",
|
||||
"cwe": "CWE-502",
|
||||
"description": "Pickle can execute arbitrary code during deserialization",
|
||||
"recommendation": "Use JSON or other safe serialization formats",
|
||||
},
|
||||
# A09:2021 – Security Logging and Monitoring Failures
|
||||
{
|
||||
"id": "SEC014",
|
||||
"name": "Sensitive Data Logging",
|
||||
"pattern": r'(?i)(log|print|console\.log)\s*\([^)]*\b(password|token|secret|key)\b',
|
||||
"severity": "MEDIUM",
|
||||
"category": "A09:2021 Logging Failures",
|
||||
"cwe": "CWE-532",
|
||||
"description": "Potentially logging sensitive information",
|
||||
"recommendation": "Never log passwords, tokens, or secrets",
|
||||
},
|
||||
# A10:2021 – Server-Side Request Forgery
|
||||
{
|
||||
"id": "SEC015",
|
||||
"name": "SSRF Risk",
|
||||
"pattern": r'(?i)(requests\.(get|post|put)|urllib\.request\.urlopen|fetch)\s*\([^)]*\+',
|
||||
"severity": "MEDIUM",
|
||||
"category": "A10:2021 SSRF",
|
||||
"cwe": "CWE-918",
|
||||
"description": "URL constructed from user input may allow SSRF",
|
||||
"recommendation": "Validate and sanitize URLs, use allowlists",
|
||||
},
|
||||
# Additional common issues
|
||||
{
|
||||
"id": "SEC016",
|
||||
"name": "Hardcoded IP Address",
|
||||
"pattern": r'\b(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b',
|
||||
"severity": "LOW",
|
||||
"category": "Configuration",
|
||||
"cwe": "CWE-547",
|
||||
"description": "Hardcoded IP address found",
|
||||
"recommendation": "Use configuration files or environment variables for IP addresses",
|
||||
},
|
||||
{
|
||||
"id": "SEC017",
|
||||
"name": "TODO/FIXME Security",
|
||||
"pattern": r"(?i)(TODO|FIXME).*\b(security|auth|password|token|secret|vulnerable)\b",
|
||||
"severity": "MEDIUM",
|
||||
"category": "Code Quality",
|
||||
"cwe": None,
|
||||
"description": "Security-related TODO/FIXME comment found",
|
||||
"recommendation": "Address security-related TODO items before deployment",
|
||||
},
|
||||
]
|
||||
|
||||
def __init__(self, rules_file: str | None = None):
|
||||
"""Initialize scanner with rules.
|
||||
|
||||
Args:
|
||||
rules_file: Optional path to custom rules YAML file.
|
||||
"""
|
||||
self.rules = self.DEFAULT_RULES.copy()
|
||||
|
||||
if rules_file and os.path.exists(rules_file):
|
||||
try:
|
||||
with open(rules_file) as f:
|
||||
custom_rules = yaml.safe_load(f)
|
||||
if custom_rules and "rules" in custom_rules:
|
||||
self.rules.extend(custom_rules["rules"])
|
||||
except Exception:
|
||||
pass # Use defaults if custom rules fail to load
|
||||
|
||||
# Compile patterns for efficiency
|
||||
self._compiled_rules = []
|
||||
for rule in self.rules:
|
||||
try:
|
||||
self._compiled_rules.append(
|
||||
{**rule, "_pattern": re.compile(rule["pattern"])}
|
||||
)
|
||||
except re.error:
|
||||
pass # Skip invalid patterns
|
||||
|
||||
def scan_content(
|
||||
self,
|
||||
content: str,
|
||||
filename: str,
|
||||
) -> Iterator[SecurityFinding]:
|
||||
"""Scan content for security issues.
|
||||
|
||||
Args:
|
||||
content: File content to scan.
|
||||
filename: Name of the file (for reporting).
|
||||
|
||||
Yields:
|
||||
SecurityFinding for each detected issue.
|
||||
"""
|
||||
lines = content.splitlines()
|
||||
|
||||
for line_num, line in enumerate(lines, 1):
|
||||
for rule in self._compiled_rules:
|
||||
if rule["_pattern"].search(line):
|
||||
yield SecurityFinding(
|
||||
rule_id=rule["id"],
|
||||
rule_name=rule["name"],
|
||||
severity=rule["severity"],
|
||||
category=rule["category"],
|
||||
file=filename,
|
||||
line=line_num,
|
||||
code_snippet=line.strip()[:120],
|
||||
description=rule["description"],
|
||||
recommendation=rule["recommendation"],
|
||||
cwe=rule.get("cwe"),
|
||||
)
|
||||
|
||||
def scan_diff(self, diff: str) -> Iterator[SecurityFinding]:
|
||||
"""Scan a git diff for security issues.
|
||||
|
||||
Only scans added lines (lines starting with +).
|
||||
|
||||
Args:
|
||||
diff: Git diff content.
|
||||
|
||||
Yields:
|
||||
SecurityFinding for each detected issue.
|
||||
"""
|
||||
current_file = None
|
||||
current_line = 0
|
||||
|
||||
for line in diff.splitlines():
|
||||
# Track current file
|
||||
if line.startswith("diff --git"):
|
||||
match = re.search(r"b/(.+)$", line)
|
||||
if match:
|
||||
current_file = match.group(1)
|
||||
current_line = 0
|
||||
# Track line numbers
|
||||
elif line.startswith("@@"):
|
||||
match = re.search(r"\+(\d+)", line)
|
||||
if match:
|
||||
current_line = int(match.group(1)) - 1
|
||||
# Check added lines
|
||||
elif line.startswith("+") and not line.startswith("+++"):
|
||||
current_line += 1
|
||||
for finding in self.scan_content(line[1:], current_file or "unknown"):
|
||||
finding.line = current_line
|
||||
yield finding
|
||||
elif not line.startswith("-"):
|
||||
current_line += 1
|
||||
|
||||
def get_summary(self, findings: list[SecurityFinding]) -> dict:
|
||||
"""Get summary statistics for findings.
|
||||
|
||||
Args:
|
||||
findings: List of security findings.
|
||||
|
||||
Returns:
|
||||
Summary dictionary with counts by severity and category.
|
||||
"""
|
||||
summary = {
|
||||
"total": len(findings),
|
||||
"by_severity": {"HIGH": 0, "MEDIUM": 0, "LOW": 0},
|
||||
"by_category": {},
|
||||
}
|
||||
|
||||
for finding in findings:
|
||||
summary["by_severity"][finding.severity] = (
|
||||
summary["by_severity"].get(finding.severity, 0) + 1
|
||||
)
|
||||
summary["by_category"][finding.category] = (
|
||||
summary["by_category"].get(finding.category, 0) + 1
|
||||
)
|
||||
|
||||
return summary
|
||||
Reference in New Issue
Block a user