first commit

This commit is contained in:
2025-12-21 13:42:30 +01:00
parent 823b825acb
commit f9b24fe248
47 changed files with 8222 additions and 1 deletions

View File

@@ -0,0 +1,42 @@
name: AI Chat (Bartender)
on:
issue_comment:
types: [created]
# CUSTOMIZE YOUR BOT NAME:
# Change '@ai-bot' below to match your config.yml mention_prefix
# Examples: '@bartender', '@uni', '@joey', '@codebot'
jobs:
ai-chat:
# Only run if comment mentions the bot
if: contains(github.event.comment.body, '@ai-bot') # <-- Change this to your bot name
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v4
with:
repository: Hiddenden/OpenRabbit
path: .ai-review
token: ${{ secrets.AI_REVIEW_TOKEN }}
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install requests pyyaml
- name: Run AI Chat
env:
AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }}
AI_REVIEW_REPO: ${{ gitea.repository }}
AI_REVIEW_API_URL: https://git.hiddenden.cafe/api/v1
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }}
SEARXNG_URL: ${{ secrets.SEARXNG_URL }}
run: |
cd .ai-review/tools/ai-review
python main.py comment ${{ gitea.repository }} ${{ gitea.event.issue.number }} "${{ gitea.event.comment.body }}"

View File

@@ -0,0 +1,58 @@
name: AI Codebase Quality Review
on:
# Weekly scheduled run
schedule:
- cron: "0 0 * * 0" # Every Sunday at midnight
# Manual trigger
workflow_dispatch:
inputs:
report_type:
description: "Type of report to generate"
required: false
default: "full"
type: choice
options:
- full
- security
- quick
jobs:
ai-codebase-review:
runs-on: ubuntu-latest
steps:
# Checkout the repository
- uses: actions/checkout@v4
with:
fetch-depth: 0 # Full history for analysis
# Checkout central AI tooling
- uses: actions/checkout@v4
with:
repository: Hiddenden/AI-code-review-workflow
path: .ai-review
token: ${{ secrets.AI_REVIEW_TOKEN }}
# Setup Python
- uses: actions/setup-python@v5
with:
python-version: "3.11"
# Install dependencies
- run: pip install requests pyyaml
# Run AI codebase analysis
- name: Run AI Codebase Analysis
env:
AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }}
AI_REVIEW_REPO: ${{ gitea.repository }}
AI_REVIEW_API_URL: https://git.hiddenden.cafe/api/v1
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }}
run: |
cd .ai-review/tools/ai-review
python main.py codebase ${{ gitea.repository }}

View File

@@ -0,0 +1,41 @@
name: AI Comment Reply
on:
issue_comment:
types: [created]
# CUSTOMIZE YOUR BOT NAME:
# Change '@ai-bot' below to match your config.yml mention_prefix
# Examples: '@bartender', '@uni', '@joey', '@codebot'
jobs:
ai-reply:
runs-on: ubuntu-latest
if: contains(github.event.comment.body, '@ai-bot') # <-- Change this to your bot name
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v4
with:
repository: Hiddenden/AI-code-review-workflow
path: .ai-review
token: ${{ secrets.AI_REVIEW_TOKEN }}
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install requests pyyaml
- name: Run AI Comment Response
env:
AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }}
AI_REVIEW_REPO: ${{ gitea.repository }}
AI_REVIEW_API_URL: https://git.hiddenden.cafe/api/v1
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }}
run: |
cd .ai-review/tools/ai-review
python main.py comment ${{ gitea.repository }} ${{ gitea.event.issue.number }} \
"${{ gitea.event.comment.body }}"

View File

@@ -0,0 +1,36 @@
name: AI Issue Triage
on:
issues:
types: [opened, labeled]
jobs:
ai-triage:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v4
with:
repository: Hiddenden/AI-code-review-workflow
path: .ai-review
token: ${{ secrets.AI_REVIEW_TOKEN }}
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install requests pyyaml
- name: Run AI Issue Triage
env:
AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }}
AI_REVIEW_REPO: ${{ gitea.repository }}
AI_REVIEW_API_URL: https://git.hiddenden.cafe/api/v1
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }}
run: |
cd .ai-review/tools/ai-review
python main.py issue ${{ gitea.repository }} ${{ gitea.event.issue.number }} \
--title "${{ gitea.event.issue.title }}"

View File

@@ -0,0 +1,53 @@
name: Enterprise AI Code Review
on:
pull_request:
types: [opened, synchronize]
jobs:
ai-review:
runs-on: ubuntu-latest
steps:
# Checkout the PR repository
- uses: actions/checkout@v4
with:
fetch-depth: 0
# Checkout the CENTRAL AI tooling repo
- uses: actions/checkout@v4
with:
repository: Hiddenden/AI-code-review-workflow
path: .ai-review
token: ${{ secrets.AI_REVIEW_TOKEN }}
# Setup Python
- uses: actions/setup-python@v5
with:
python-version: "3.11"
# Install dependencies
- run: pip install requests pyyaml
# Run the AI review
- name: Run Enterprise AI Review
env:
AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }}
AI_REVIEW_REPO: ${{ gitea.repository }}
AI_REVIEW_API_URL: https://git.hiddenden.cafe/api/v1
AI_REVIEW_PR_NUMBER: ${{ gitea.event.pull_request.number }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }}
run: |
cd .ai-review/tools/ai-review
python main.py pr ${{ gitea.repository }} ${{ gitea.event.pull_request.number }} \
--title "${{ gitea.event.pull_request.title }}"
# Fail CI on HIGH severity (optional)
- name: Check Review Result
if: failure()
run: |
echo "AI Review found HIGH severity issues. Please address them before merging."
exit 1

36
.github/workflows/ai-chat.yml vendored Normal file
View File

@@ -0,0 +1,36 @@
name: AI Chat (Bartender)
on:
issue_comment:
types: [created]
# CUSTOMIZE YOUR BOT NAME:
# Change '@ai-bot' below to match your config.yml mention_prefix
# Examples: '@bartender', '@uni', '@joey', '@codebot'
jobs:
ai-chat:
# Only run if comment mentions the bot
if: contains(github.event.comment.body, '@ai-bot') # <-- Change this to your bot name
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install requests pyyaml
- name: Run AI Chat
env:
AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }}
AI_REVIEW_REPO: ${{ github.repository }}
AI_REVIEW_API_URL: https://api.github.com
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }}
SEARXNG_URL: ${{ secrets.SEARXNG_URL }}
run: |
cd tools/ai-review
python main.py comment ${{ github.repository }} ${{ github.event.issue.number }} "${{ github.event.comment.body }}"

View File

@@ -0,0 +1,51 @@
name: AI Codebase Quality Review
on:
# Weekly scheduled run
schedule:
- cron: "0 0 * * 0" # Every Sunday at midnight
# Manual trigger
workflow_dispatch:
inputs:
report_type:
description: "Type of report to generate"
required: false
default: "full"
type: choice
options:
- full
- security
- quick
jobs:
ai-codebase-review:
runs-on: ubuntu-latest
steps:
# Checkout the repository
- uses: actions/checkout@v4
with:
fetch-depth: 0 # Full history for analysis
# Setup Python
- uses: actions/setup-python@v5
with:
python-version: "3.11"
# Install dependencies
- run: pip install requests pyyaml
# Run AI codebase analysis
- name: Run AI Codebase Analysis
env:
AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }}
AI_REVIEW_REPO: ${{ github.repository }}
AI_REVIEW_API_URL: https://api.github.com
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }}
run: |
cd tools/ai-review
python main.py codebase ${{ github.repository }}

36
.github/workflows/ai-comment-reply.yml vendored Normal file
View File

@@ -0,0 +1,36 @@
name: AI Comment Reply
on:
issue_comment:
types: [created]
# CUSTOMIZE YOUR BOT NAME:
# Change '@ai-bot' below to match your config.yml mention_prefix
# Examples: '@bartender', '@uni', '@joey', '@codebot'
jobs:
ai-reply:
runs-on: ubuntu-latest
if: contains(github.event.comment.body, '@ai-bot') # <-- Change this to your bot name
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install requests pyyaml
- name: Run AI Comment Response
env:
AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }}
AI_REVIEW_REPO: ${{ github.repository }}
AI_REVIEW_API_URL: https://api.github.com
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }}
SEARXNG_URL: ${{ secrets.SEARXNG_URL }}
run: |
cd tools/ai-review
python main.py comment ${{ github.repository }} ${{ github.event.issue.number }} \
"${{ github.event.comment.body }}"

30
.github/workflows/ai-issue-triage.yml vendored Normal file
View File

@@ -0,0 +1,30 @@
name: AI Issue Triage
on:
issues:
types: [opened, labeled]
jobs:
ai-triage:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install requests pyyaml
- name: Run AI Issue Triage
env:
AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }}
AI_REVIEW_REPO: ${{ github.repository }}
AI_REVIEW_API_URL: https://api.github.com
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }}
run: |
cd tools/ai-review
python main.py issue ${{ github.repository }} ${{ github.event.issue.number }} \
--title "${{ github.event.issue.title }}"

52
.github/workflows/ai-review.yml vendored Normal file
View File

@@ -0,0 +1,52 @@
name: AI Code Review
on:
pull_request:
types: [opened, synchronize]
jobs:
ai-review:
runs-on: ubuntu-latest
steps:
# Checkout the PR repository
- uses: actions/checkout@v4
with:
fetch-depth: 0
# Checkout the AI tooling from this repo's tools directory
- name: Setup AI Review Tools
run: |
# Tools are already in this repo under tools/ai-review
echo "AI Review tools available at tools/ai-review"
# Setup Python
- uses: actions/setup-python@v5
with:
python-version: "3.11"
# Install dependencies
- run: pip install requests pyyaml
# Run the AI review
- name: Run AI Review
env:
AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }}
AI_REVIEW_REPO: ${{ github.repository }}
AI_REVIEW_API_URL: https://api.github.com
AI_REVIEW_PR_NUMBER: ${{ github.event.pull_request.number }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }}
run: |
cd tools/ai-review
python main.py pr ${{ github.repository }} ${{ github.event.pull_request.number }} \
--title "${{ github.event.pull_request.title }}"
# Fail CI on HIGH severity (optional)
- name: Check Review Result
if: failure()
run: |
echo "AI Review found HIGH severity issues. Please address them before merging."
exit 1

32
.gitignore vendored Normal file
View File

@@ -0,0 +1,32 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
*.egg-info/
.eggs/
dist/
build/
# Virtual environments
.venv/
venv/
ENV/
# IDE
.idea/
.vscode/
*.swp
*.swo
# OS
.DS_Store
Thumbs.db
# Logs
*.log
# Environment
.env
.env.local

342
README.md
View File

@@ -1,2 +1,342 @@
# openrabbit # OpenRabbit
Enterprise-grade AI code review system for **GitHub** and **Gitea** with automated PR review, issue triage, interactive chat (Bartender), and codebase analysis.
---
## Features
| Feature | Description |
|---------|-------------|
| **PR Review** | Inline comments, security scanning, severity-based CI failure |
| **Issue Triage** | Auto-classification, labeling, priority assignment |
| **Chat (Bartender)** | Interactive AI chat with codebase search and web search tools |
| **@ai-bot Commands** | `@ai-bot summarize`, `explain`, `suggest` in issue comments |
| **Codebase Analysis** | Health scores, tech debt tracking, weekly reports |
| **Security Scanner** | 17 OWASP-aligned rules for vulnerability detection |
| **Enterprise Ready** | Audit logging, metrics, Prometheus export |
| **Multi-Platform** | Works with both GitHub and Gitea |
---
## Quick Start
### 1. Set Repository/Organization Secrets
```
OPENAI_API_KEY - OpenAI API key (or use OpenRouter/Ollama)
SEARXNG_URL - (Optional) SearXNG instance URL for web search
```
**For Gitea:**
```
AI_REVIEW_TOKEN - Bot token with repo + issue permissions
```
**For GitHub:**
The built-in `GITHUB_TOKEN` is used automatically.
### 2. Add Workflows to Repository
Workflows are provided for both platforms:
| Platform | Location |
|----------|----------|
| GitHub | `.github/workflows/` |
| Gitea | `.gitea/workflows/` |
#### GitHub Example
```yaml
# .github/workflows/ai-review.yml
name: AI PR Review
on: [pull_request]
jobs:
ai-review:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install requests pyyaml
- name: Run AI Review
env:
AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }}
AI_REVIEW_REPO: ${{ github.repository }}
AI_REVIEW_API_URL: https://api.github.com
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
cd tools/ai-review
python main.py pr ${{ github.repository }} ${{ github.event.pull_request.number }}
```
#### Gitea Example
```yaml
# .gitea/workflows/ai-review.yml
name: AI PR Review
on: [pull_request]
jobs:
ai-review:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/checkout@v4
with:
repository: YourOrg/OpenRabbit
path: .ai-review
token: ${{ secrets.AI_REVIEW_TOKEN }}
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install requests pyyaml
- name: Run AI Review
env:
AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }}
AI_REVIEW_REPO: ${{ gitea.repository }}
AI_REVIEW_API_URL: https://your-gitea.example.com/api/v1
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
cd .ai-review/tools/ai-review
python main.py pr ${{ gitea.repository }} ${{ gitea.event.pull_request.number }}
```
For full workflow examples, see [Workflows Documentation](docs/workflows.md).
### 3. Create Labels
Create these labels in your repository for auto-labeling:
- `priority: high`, `priority: medium`, `priority: low`
- `type: bug`, `type: feature`, `type: question`
- `ai-approved`, `ai-changes-required`
---
## Project Structure
```
tools/ai-review/
├── agents/ # Agent implementations
│ ├── base_agent.py # Abstract base agent
│ ├── issue_agent.py # Issue triage & @ai-bot commands
│ ├── pr_agent.py # PR review with security scan
│ ├── codebase_agent.py # Codebase health analysis
│ └── chat_agent.py # Bartender chat with tool calling
├── clients/ # API clients
│ ├── gitea_client.py # Gitea REST API wrapper
│ └── llm_client.py # Multi-provider LLM client with tool support
├── security/ # Security scanning
│ └── security_scanner.py # 17 OWASP-aligned rules
├── enterprise/ # Enterprise features
│ ├── audit_logger.py # JSONL audit logging
│ └── metrics.py # Prometheus-compatible metrics
├── prompts/ # AI prompt templates
├── main.py # CLI entry point
└── config.yml # Configuration
.github/workflows/ # GitHub Actions workflows
├── ai-review.yml # PR review workflow
├── ai-issue-triage.yml # Issue triage workflow
├── ai-codebase-review.yml # Codebase analysis
├── ai-comment-reply.yml # @ai-bot command responses
└── ai-chat.yml # Bartender chat
.gitea/workflows/ # Gitea Actions workflows
├── enterprise-ai-review.yml
├── ai-issue-triage.yml
├── ai-codebase-review.yml
├── ai-comment-reply.yml
└── ai-chat.yml
```
---
## CLI Commands
```bash
# Review a pull request
python main.py pr owner/repo 123
# Triage an issue
python main.py issue owner/repo 456
# Respond to @ai-bot command
python main.py comment owner/repo 456 "@ai-bot explain"
# Analyze codebase
python main.py codebase owner/repo
# Chat with Bartender
python main.py chat owner/repo "How does authentication work?"
python main.py chat owner/repo "Find all API endpoints" --issue 789
```
---
## @ai-bot Commands
In any issue comment:
| Command | Description |
|---------|-------------|
| `@ai-bot summarize` | Summarize the issue in 2-3 sentences |
| `@ai-bot explain` | Explain what the issue is about |
| `@ai-bot suggest` | Suggest solutions or next steps |
| `@ai-bot` (any question) | Chat with Bartender using codebase/web search |
---
## Bartender Chat
Bartender is an interactive AI assistant with tool-calling capabilities:
**Tools Available:**
- `search_codebase` - Search repository files and code
- `read_file` - Read specific files
- `search_web` - Search the web via SearXNG
**Example:**
```
@ai-bot How do I configure rate limiting in this project?
```
Bartender will search the codebase, read relevant files, and provide a comprehensive answer.
---
## Configuration
Edit `tools/ai-review/config.yml`:
```yaml
provider: openai # openai | openrouter | ollama
model:
openai: gpt-4.1-mini
openrouter: anthropic/claude-3.5-sonnet
ollama: codellama:13b
agents:
issue:
enabled: true
auto_label: true
pr:
enabled: true
inline_comments: true
security_scan: true
codebase:
enabled: true
chat:
enabled: true
name: "Bartender"
searxng_url: "" # Or set SEARXNG_URL env var
interaction:
respond_to_mentions: true
mention_prefix: "@ai-bot" # Customize your bot name here!
commands:
- summarize
- explain
- suggest
```
---
## Customizing the Bot Name
You can change the bot's mention trigger from `@ai-bot` to any name you prefer:
**Step 1:** Edit `tools/ai-review/config.yml`:
```yaml
interaction:
mention_prefix: "@bartender" # or "@uni", "@joey", "@codebot", etc.
```
**Step 2:** Update the workflow files to match:
For GitHub (`.github/workflows/ai-comment-reply.yml` and `ai-chat.yml`):
```yaml
if: contains(github.event.comment.body, '@bartender')
```
For Gitea (`.gitea/workflows/ai-comment-reply.yml` and `ai-chat.yml`):
```yaml
if: contains(github.event.comment.body, '@bartender')
```
**Example bot names:**
| Name | Use Case |
|------|----------|
| `@bartender` | Friendly, conversational |
| `@uni` | Short, quick to type |
| `@joey` | Personal assistant feel |
| `@codebot` | Technical, code-focused |
| `@reviewer` | Review-focused |
---
## Security Scanning
17 rules covering OWASP Top 10:
| Category | Examples |
|----------|----------|
| Injection | SQL injection, command injection, XSS |
| Access Control | Hardcoded secrets, private keys |
| Crypto Failures | Weak hashing (MD5/SHA1), insecure random |
| Misconfiguration | Debug mode, CORS wildcard, SSL bypass |
---
## Documentation
| Document | Description |
|----------|-------------|
| [Getting Started](docs/getting-started.md) | Quick setup guide |
| [Configuration](docs/configuration.md) | All options explained |
| [Agents](docs/agents.md) | Agent documentation |
| [Security](docs/security.md) | Security rules reference |
| [Workflows](docs/workflows.md) | GitHub & Gitea workflow examples |
| [API Reference](docs/api-reference.md) | Client and agent APIs |
| [Enterprise](docs/enterprise.md) | Audit logging, metrics |
| [Troubleshooting](docs/troubleshooting.md) | Common issues |
---
## LLM Providers
| Provider | Model | Use Case |
|----------|-------|----------|
| OpenAI | gpt-4.1-mini | Fast, reliable |
| OpenRouter | claude-3.5-sonnet | Multi-provider access |
| Ollama | codellama:13b | Self-hosted, private |
---
## Enterprise Features
- **Audit Logging**: JSONL logs with daily rotation
- **Metrics**: Prometheus-compatible export
- **Rate Limiting**: Configurable request limits
- **Custom Security Rules**: Define your own patterns via YAML
- **Tool Calling**: LLM function calling for interactive chat
---
## License
MIT

52
docs/README.md Normal file
View File

@@ -0,0 +1,52 @@
# AI Code Review Workflow Documentation
Enterprise-grade AI code review system for Gitea with automated issue triage, PR review, and codebase analysis.
## 📚 Documentation
| Document | Description |
|----------|-------------|
| [Getting Started](getting-started.md) | Quick setup guide |
| [Configuration](configuration.md) | All configuration options |
| [Agents](agents.md) | Detailed agent documentation |
| [Security](security.md) | Security scanning features |
| [API Reference](api-reference.md) | Client and agent APIs |
| [Workflows](workflows.md) | Gitea workflow examples |
| [Troubleshooting](troubleshooting.md) | Common issues and solutions |
## Quick Links
- **Setup**: See [Getting Started](getting-started.md)
- **Configuration**: See [Configuration](configuration.md)
- **Enterprise Features**: See [Enterprise](enterprise.md)
## Architecture Overview
```
┌─────────────────────────────────────────────────────────────┐
│ Event Sources │
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
│ │ PR Event │ │ Issue │ │ Schedule │ │
│ └────┬─────┘ └────┬─────┘ └────┬─────┘ │
└───────┼─────────────┼─────────────┼─────────────────────────┘
│ │ │
└─────────────┼─────────────┘
┌───────────────┐
│ Dispatcher │
└───────┬───────┘
┌─────────────┼─────────────┐
▼ ▼ ▼
┌───────────┐ ┌───────────┐ ┌───────────┐
│ Issue │ │ PR │ │ Codebase │
│ Agent │ │ Agent │ │ Agent │
└─────┬─────┘ └─────┬─────┘ └─────┬─────┘
│ │ │
└──────────────┼──────────────┘
┌─────────────────┐
│ Gitea API │
│ LLM Provider │
└─────────────────┘
```

298
docs/agents.md Normal file
View File

@@ -0,0 +1,298 @@
# Agents Documentation
The AI Code Review system includes four specialized agents.
## Issue Agent
Handles issue triage, classification, and interaction.
### Triggers
- `issues.opened` - New issue created (handled by `run_issue_triage`)
- `issues.labeled` - Label added to issue
- `issue_comment.created` - Comment with @mention (handled by `run_issue_comment`)
### Features
**Automatic Triage:**
- Classifies issue type: bug, feature, question, documentation, support
- Assigns priority: high, medium, low
- Calculates confidence score
**Auto-Labeling:**
- Applies type labels (`type: bug`, etc.)
- Applies priority labels (`priority: high`, etc.)
- Adds `ai-reviewed` status label
**@Mention Commands:**
| Command | Description |
|---------|-------------|
| `@ai-bot summarize` | Generate concise summary |
| `@ai-bot explain` | Detailed explanation |
| `@ai-bot suggest` | Solution suggestions |
### Output
Posts a triage comment:
```markdown
## AI Issue Triage
| Field | Value |
|-------|--------|
| **Type** | Bug |
| **Priority** | High |
| **Confidence** | 85% |
### Additional Information Needed
- Steps to reproduce
- Error logs
---
*Classification based on issue content*
```
---
## PR Agent
Comprehensive pull request review with security scanning.
### Triggers
- `pull_request.opened` - New PR created
- `pull_request.synchronize` - PR updated with new commits
### Features
**AI Code Review:**
- Analyzes diff for issues
- Categorizes: Security, Correctness, Performance, Maintainability
- Assigns severity: HIGH, MEDIUM, LOW
**Inline Comments:**
- Posts comments on specific lines
- Links to file and line number
- Provides recommendations
**Security Scanning:**
- 17 OWASP-aligned rules
- Detects hardcoded secrets, SQL injection, XSS
- Fails CI on HIGH severity
**Label Management:**
- `ai-approved` - No blocking issues
- `ai-changes-required` - HIGH severity issues found
### Output
Posts summary comment:
```markdown
## AI Code Review
Review of changes in this PR.
### Summary
| Severity | Count |
|----------|-------|
| HIGH | 1 |
| MEDIUM | 2 |
| LOW | 3 |
### Security Issues
- **[HIGH]** `src/auth.py:45` - Hardcoded API key detected
### Review Findings
- **[MEDIUM]** `src/db.py:12` - SQL query uses string formatting
- **[LOW]** `src/utils.py:30` - Missing docstring
---
**Overall Severity:** `HIGH`
**AI Recommendation:** Changes Requested
```
---
## Codebase Agent
Repository-wide quality and health analysis.
### Triggers
- `schedule` - Cron schedule (default: weekly)
- `workflow_dispatch` - Manual trigger
- `@ai-bot codebase` - Comment command
### Features
**Metrics Collection:**
- Total files and lines of code
- Language distribution
- TODO/FIXME/DEPRECATED counts
**AI Analysis:**
- Overall health score (0-100)
- Architecture observations
- Technical debt identification
- Improvement recommendations
### Output
Creates/updates report issue:
```markdown
# AI Codebase Quality Report
## Health Score: 72/100
The codebase is in reasonable condition with some areas for improvement.
---
## Metrics
| Metric | Value |
|--------|-------|
| Total Files | 45 |
| Total Lines | 12,500 |
| TODO Comments | 23 |
| FIXME Comments | 8 |
### Languages
- **Python**: 35 files
- **JavaScript**: 10 files
## Issues Found
### [MEDIUM] Code Quality
Missing docstrings in 15 functions.
**Recommendation:** Add docstrings for public functions.
## Recommendations
1. Add comprehensive test coverage
2. Document API endpoints
3. Reduce TODO backlog
```
---
## Chat Agent (Bartender)
Interactive AI chat assistant with tool-calling capabilities.
### Triggers
- `issue_comment.created` - Any @ai-bot mention that isn't a specific command
- `chat` - Direct CLI invocation
### Features
**Tool Calling:**
The Chat Agent uses LLM function calling to gather information before responding:
| Tool | Description |
|------|-------------|
| `search_codebase` | Search repository files and code patterns |
| `read_file` | Read specific files from the repository |
| `search_web` | Search the web via SearXNG instance |
**Iterative Reasoning:**
- Makes up to 5 tool calls per request
- Combines information from multiple sources
- Provides comprehensive, contextual answers
**Web Search:**
- Requires SearXNG instance URL (via `SEARXNG_URL` env var or config)
- Searches for documentation, tutorials, external resources
### Configuration
```yaml
agents:
chat:
enabled: true
name: "Bartender"
max_iterations: 5
tools:
- search_codebase
- read_file
- search_web
searxng_url: "" # Or set SEARXNG_URL env var
```
### CLI Usage
```bash
# Simple chat
python main.py chat owner/repo "How does authentication work?"
# Chat and post response to issue
python main.py chat owner/repo "Explain this bug" --issue 123
```
### Issue Comment Usage
```
@ai-bot How do I configure rate limiting?
@ai-bot Find all files that handle user authentication
@ai-bot What does the dispatcher module do?
```
### Output
Posts a response comment:
```markdown
**Note:** This review was generated by an AI assistant...
---
Based on my analysis of the codebase, rate limiting is configured in
`tools/ai-review/config.yml` under the `enterprise.rate_limit` section:
- `requests_per_minute`: Maximum requests per minute (default: 30)
- `max_concurrent`: Maximum concurrent requests (default: 4)
The rate limiting logic is implemented in `enterprise/rate_limiter.py`...
```
---
## Agent Interface
All agents extend `BaseAgent`:
```python
from agents import BaseAgent, AgentContext, AgentResult
class CustomAgent(BaseAgent):
def can_handle(self, event_type: str, event_data: dict) -> bool:
# Return True if this agent handles the event
return event_type == "custom_event"
def execute(self, context: AgentContext) -> AgentResult:
# Perform agent logic
return AgentResult(
success=True,
message="Custom action completed",
actions_taken=["action1", "action2"],
)
```
Register with dispatcher:
```python
from dispatcher import get_dispatcher
from agents import CustomAgent
dispatcher = get_dispatcher()
dispatcher.register_agent(CustomAgent())
```

280
docs/api-reference.md Normal file
View File

@@ -0,0 +1,280 @@
# API Reference
## Gitea Client
`clients/gitea_client.py`
### Initialization
```python
from clients import GiteaClient
client = GiteaClient(
api_url="https://gitea.example.com/api/v1",
token="your_token",
timeout=30,
)
```
### Issue Methods
```python
# List issues
issues = client.list_issues(
owner="user",
repo="repo",
state="open", # open, closed, all
labels=["bug"],
page=1,
limit=30,
)
# Get single issue
issue = client.get_issue(owner, repo, index=123)
# Create comment
comment = client.create_issue_comment(owner, repo, index=123, body="Comment text")
# Update comment
client.update_issue_comment(owner, repo, comment_id=456, body="Updated text")
# List comments
comments = client.list_issue_comments(owner, repo, index=123)
# Add labels
client.add_issue_labels(owner, repo, index=123, labels=[1, 2, 3])
# Get repo labels
labels = client.get_repo_labels(owner, repo)
```
### Pull Request Methods
```python
# Get PR
pr = client.get_pull_request(owner, repo, index=123)
# Get diff
diff = client.get_pull_request_diff(owner, repo, index=123)
# List changed files
files = client.list_pull_request_files(owner, repo, index=123)
# Create review with inline comments
client.create_pull_request_review(
owner, repo, index=123,
body="Review summary",
event="COMMENT", # APPROVE, REQUEST_CHANGES, COMMENT
comments=[
{"path": "file.py", "line": 10, "body": "Issue here"},
],
)
```
### Repository Methods
```python
# Get repository info
repo = client.get_repository(owner, repo)
# Get file contents (base64 encoded)
content = client.get_file_contents(owner, repo, "path/to/file.py", ref="main")
# Get branch
branch = client.get_branch(owner, repo, "main")
```
---
## LLM Client
`clients/llm_client.py`
### Initialization
```python
from clients import LLMClient
# Direct initialization
client = LLMClient(
provider="openai", # openai, openrouter, ollama
config={"model": "gpt-4", "temperature": 0},
)
# From config file
client = LLMClient.from_config(config_dict)
```
### Methods
```python
# Basic call
response = client.call("Explain this code")
print(response.content)
print(response.tokens_used)
# JSON response
result = client.call_json("Return JSON: {\"key\": \"value\"}")
print(result["key"])
```
### Response Object
```python
@dataclass
class LLMResponse:
content: str # Generated text
model: str # Model used
provider: str # Provider name
tokens_used: int # Token count
finish_reason: str # stop, length, etc.
```
---
## Base Agent
`agents/base_agent.py`
### Creating Custom Agent
```python
from agents import BaseAgent, AgentContext, AgentResult
class MyAgent(BaseAgent):
def can_handle(self, event_type: str, event_data: dict) -> bool:
return event_type == "my_event"
def execute(self, context: AgentContext) -> AgentResult:
# Use built-in methods
prompt = self.load_prompt("my_prompt")
response = self.call_llm(prompt)
self.upsert_comment(
context.owner,
context.repo,
issue_index=123,
body=response.content,
)
return AgentResult(
success=True,
message="Done",
actions_taken=["posted comment"],
)
```
### Built-in Methods
```python
# Load prompt template
prompt = self.load_prompt("prompt_name") # From prompts/prompt_name.md
# LLM calls (with rate limiting)
response = self.call_llm(prompt)
json_result = self.call_llm_json(prompt)
# Comment management
comment_id = self.find_ai_comment(owner, repo, issue_index)
self.upsert_comment(owner, repo, issue_index, body)
# Format with disclaimer
formatted = self.format_with_disclaimer(content)
```
### Context Object
```python
@dataclass
class AgentContext:
owner: str # Repository owner
repo: str # Repository name
event_type: str # Event type
event_data: dict # Event payload
config: dict # Configuration
```
### Result Object
```python
@dataclass
class AgentResult:
success: bool
message: str
data: dict = {}
actions_taken: list[str] = []
error: str | None = None
```
---
## Dispatcher
`dispatcher.py`
### Usage
```python
from dispatcher import Dispatcher, get_dispatcher
# Get global dispatcher
dispatcher = get_dispatcher()
# Register agents
dispatcher.register_agent(MyAgent())
# Dispatch event
result = dispatcher.dispatch(
event_type="pull_request",
event_data={"action": "opened", ...},
owner="user",
repo="repo",
)
# Async dispatch
future = dispatcher.dispatch_async(event_type, event_data, owner, repo)
result = future.result()
```
---
## Security Scanner
`security/security_scanner.py`
### Usage
```python
from security import SecurityScanner
scanner = SecurityScanner()
# Scan content
for finding in scanner.scan_content(code, "file.py"):
print(finding.rule_id, finding.severity, finding.line)
# Scan diff (only added lines)
for finding in scanner.scan_diff(diff):
print(finding.file, finding.line, finding.code_snippet)
# Summary
findings = list(scanner.scan_diff(diff))
summary = scanner.get_summary(findings)
```
### Finding Object
```python
@dataclass
class SecurityFinding:
rule_id: str # SEC001, SEC002, etc.
rule_name: str # Human-readable name
severity: str # HIGH, MEDIUM, LOW
category: str # OWASP category
file: str # File path
line: int # Line number
code_snippet: str # Matched code
description: str # Issue description
recommendation: str # How to fix
cwe: str | None # CWE reference
```

196
docs/configuration.md Normal file
View File

@@ -0,0 +1,196 @@
# Configuration Reference
All configuration is managed in `tools/ai-review/config.yml`.
## Provider Settings
```yaml
# LLM Provider: openai | openrouter | ollama
provider: openai
# Model per provider
model:
openai: gpt-4.1-mini
openrouter: anthropic/claude-3.5-sonnet
ollama: codellama:13b
# Generation settings
temperature: 0 # 0 = deterministic
max_tokens: 4096 # Max response tokens
```
## Review Settings
```yaml
review:
fail_on_severity: HIGH # Fail CI on this severity
max_diff_lines: 800 # Truncate large diffs
inline_comments: true # Post inline PR comments
security_scan: true # Run security scanner
```
## Agent Configuration
### Issue Agent
```yaml
agents:
issue:
enabled: true
auto_label: true # Apply labels automatically
auto_triage: true # Run triage on new issues
duplicate_threshold: 0.85 # Similarity threshold
events:
- opened
- labeled
```
### PR Agent
```yaml
agents:
pr:
enabled: true
inline_comments: true # Post inline comments
security_scan: true # Run security scanner
events:
- opened
- synchronize
```
### Codebase Agent
```yaml
agents:
codebase:
enabled: true
schedule: "0 0 * * 0" # Cron schedule (weekly)
```
### Chat Agent (Bartender)
```yaml
agents:
chat:
enabled: true
name: "Bartender" # Display name for the bot
max_iterations: 5 # Max tool calls per chat
tools:
- search_codebase # Search repository files
- read_file # Read file contents
- search_web # Web search via SearXNG
searxng_url: "" # SearXNG instance URL (or use SEARXNG_URL env var)
```
## Interaction Settings
### Customizing the Bot Name
The `mention_prefix` controls what trigger the bot responds to. You can change it to any name you prefer:
```yaml
interaction:
mention_prefix: "@bartender" # Users will type @bartender to invoke the bot
```
**Important:** When changing the bot name, you must also update the workflow files:
1. Edit `.github/workflows/ai-comment-reply.yml` and `ai-chat.yml` (for GitHub)
2. Edit `.gitea/workflows/ai-comment-reply.yml` and `ai-chat.yml` (for Gitea)
3. Change the `if:` condition to match your new prefix:
```yaml
if: contains(github.event.comment.body, '@bartender')
```
**Example bot names:**
- `@ai-bot` - Default, generic
- `@bartender` - Friendly, conversational
- `@uni` - Short, quick to type
- `@joey` - Personal assistant
- `@codebot` - Technical focus
```yaml
interaction:
respond_to_mentions: true
mention_prefix: "@ai-bot"
commands:
- explain # Explain code/issue
- suggest # Suggest solutions
- security # Run security check
- summarize # Summarize content
```
## Label Mappings
```yaml
labels:
priority:
high: "priority: high"
medium: "priority: medium"
low: "priority: low"
type:
bug: "type: bug"
feature: "type: feature"
question: "type: question"
docs: "type: documentation"
status:
ai_approved: "ai-approved"
ai_changes_required: "ai-changes-required"
ai_reviewed: "ai-reviewed"
```
## Enterprise Settings
```yaml
enterprise:
audit_log: true
audit_path: "/var/log/ai-review/"
metrics_enabled: true
rate_limit:
requests_per_minute: 30
max_concurrent: 4
```
## Security Configuration
```yaml
security:
enabled: true
fail_on_high: true
rules_file: "security/security_rules.yml" # Custom rules
```
## Environment Variables
These override config file settings:
| Variable | Description |
|----------|-------------|
| `AI_REVIEW_TOKEN` | Gitea/GitHub API token |
| `AI_REVIEW_API_URL` | API base URL (`https://api.github.com` or Gitea URL) |
| `AI_REVIEW_REPO` | Target repository (owner/repo) |
| `OPENAI_API_KEY` | OpenAI API key |
| `OPENROUTER_API_KEY` | OpenRouter API key |
| `OLLAMA_HOST` | Ollama server URL |
| `SEARXNG_URL` | SearXNG instance URL for web search |
| `AI_AUDIT_PATH` | Audit log directory |
## Per-Repository Overrides
Create `.ai-review.yml` in repository root:
```yaml
# Override global config for this repo
agents:
pr:
security_scan: false # Disable security scan
issue:
auto_label: false # Disable auto-labeling
# Custom labels
labels:
priority:
high: "P0"
medium: "P1"
low: "P2"
```

223
docs/enterprise.md Normal file
View File

@@ -0,0 +1,223 @@
# Enterprise Features
Advanced features for enterprise deployments.
## Audit Logging
All AI actions are logged for compliance and debugging.
### Configuration
```yaml
enterprise:
audit_log: true
audit_path: "/var/log/ai-review/"
```
### Log Format
Logs are stored as JSONL (JSON Lines) with daily rotation:
```
/var/log/ai-review/audit-2024-01-15.jsonl
```
Each line is a JSON object:
```json
{
"timestamp": "2024-01-15T10:30:45.123Z",
"action": "review_pr",
"agent": "PRAgent",
"repository": "org/repo",
"success": true,
"details": {
"pr_number": 123,
"severity": "MEDIUM",
"issues_found": 3
}
}
```
### Actions Logged
| Action | Description |
|--------|-------------|
| `review_pr` | PR review completed |
| `triage_issue` | Issue triaged |
| `llm_call` | LLM API call made |
| `comment_posted` | Comment created/updated |
| `labels_applied` | Labels added |
| `security_scan` | Security scan completed |
### Querying Logs
```python
from enterprise import get_audit_logger
logger = get_audit_logger()
# Get all logs for a date range
logs = logger.get_logs(
start_date="2024-01-01",
end_date="2024-01-31",
action="review_pr",
repository="org/repo",
)
# Generate summary report
report = logger.generate_report(
start_date="2024-01-01",
end_date="2024-01-31",
)
print(f"Total events: {report['total_events']}")
print(f"Success rate: {report['success_rate']:.1%}")
```
---
## Metrics & Observability
Track performance and usage metrics.
### Configuration
```yaml
enterprise:
metrics_enabled: true
```
### Available Metrics
**Counters:**
- `ai_review_requests_total` - Total requests processed
- `ai_review_requests_success` - Successful requests
- `ai_review_requests_failed` - Failed requests
- `ai_review_llm_calls_total` - Total LLM API calls
- `ai_review_llm_tokens_total` - Total tokens consumed
- `ai_review_comments_posted` - Comments posted
- `ai_review_security_findings` - Security issues found
**Gauges:**
- `ai_review_active_requests` - Currently processing
**Histograms:**
- `ai_review_request_duration_seconds` - Request latency
- `ai_review_llm_duration_seconds` - LLM call latency
### Getting Metrics
```python
from enterprise import get_metrics
metrics = get_metrics()
# Get summary
summary = metrics.get_summary()
print(f"Total requests: {summary['requests']['total']}")
print(f"Success rate: {summary['requests']['success_rate']:.1%}")
print(f"Avg latency: {summary['latency']['avg_ms']:.0f}ms")
print(f"P95 latency: {summary['latency']['p95_ms']:.0f}ms")
print(f"LLM tokens used: {summary['llm']['tokens']}")
# Export Prometheus format
prometheus_output = metrics.export_prometheus()
```
### Prometheus Integration
Expose metrics endpoint:
```python
from flask import Flask
from enterprise import get_metrics
app = Flask(__name__)
@app.route("/metrics")
def metrics():
return get_metrics().export_prometheus()
```
---
## Rate Limiting
Prevent API overload and manage costs.
### Configuration
```yaml
enterprise:
rate_limit:
requests_per_minute: 30
max_concurrent: 4
```
### Built-in Rate Limiting
The `BaseAgent` class includes automatic rate limiting:
```python
class BaseAgent:
def __init__(self):
self._min_request_interval = 1.0 # seconds
def _rate_limit(self):
elapsed = time.time() - self._last_request_time
if elapsed < self._min_request_interval:
time.sleep(self._min_request_interval - elapsed)
```
---
## Queue Management
The dispatcher handles concurrent execution:
```python
dispatcher = Dispatcher(max_workers=4)
```
For high-volume environments, use async dispatch:
```python
future = dispatcher.dispatch_async(event_type, event_data, owner, repo)
# Continue with other work
result = future.result() # Block when needed
```
---
## Security Considerations
### Token Permissions
Minimum required permissions for `AI_REVIEW_TOKEN`:
- `repo:read` - Read repository contents
- `repo:write` - Create branches (if needed)
- `issue:read` - Read issues and PRs
- `issue:write` - Create comments, labels
### Network Isolation
For air-gapped environments, use Ollama:
```yaml
provider: ollama
# Internal network address
# Set via environment: OLLAMA_HOST=http://ollama.internal:11434
```
### Data Privacy
By default:
- Code is sent to LLM provider for analysis
- Review comments are stored in Gitea
- Audit logs are stored locally
For sensitive codebases:
1. Use self-hosted Ollama
2. Disable external LLM providers
3. Review audit log retention policies

82
docs/future_roadmap.md Normal file
View File

@@ -0,0 +1,82 @@
# Future Features Roadmap
This document outlines the strategic plan for evolving the AI Code Review system. These features are proposed for future implementation to enhance security coverage, context awareness, and user interaction.
---
## Phase 1: Advanced Security Scanning
Expand the current 17-rule regex scanner with dedicated industry-standard tools for **Static Application Security Testing (SAST)** and **Software Composition Analysis (SCA)**.
### Proposed Integrations
| Tool | Type | Purpose | Implementation Plan |
|------|------|---------|---------------------|
| **Bandit** | SAST | Analyze Python code for common vulnerability patterns (e.g., `exec`, weak crypto). | Run `bandit -r . -f json` and parse results into the review report. |
| **Semgrep** | SAST | Polyglot scanning with custom rule support. | Integrate `semgrep --config=p/security-audit` for broader language support (JS, Go, Java). |
| **Safety** | SCA | Check installed dependencies against known vulnerability databases. | Run `safety check --json` during CI to flag vulnerable packages in `requirements.txt`. |
| **Trivy** | SCA/Container | Scan container images (Dockerfiles) and filesystem. | Add a workflow step to run Trivy for container-based projects. |
**Impact:** significantly reduces false negatives and covers dependency chain risks (Supply Chain Security).
---
## Phase 2: "Chat with Codebase" (RAG)
Move beyond single-file context by implementing **Retrieval-Augmented Generation (RAG)**. This allows the AI to answer questions like *"Where is authentication handled?"* by searching the entire codebase semantically.
### Architecture
1. **Vector Database:**
* **ChromaDB** or **Qdrant**: Lightweight, open-source choices for storing code embeddings.
2. **Embeddings Model:**
* **OpenAI `text-embedding-3-small`** or **FastEmbed**: To convert code chunks (functions/classes) into vectors.
3. **Workflow:**
* **Index:** Run a nightly job to parse the codebase -> chunk it -> embed it -> store in Vector DB.
* **Query:** When `@ai-bot` receives a question, convert the question to a vector -> search Vector DB -> inject relevant snippets into the LLM prompt.
**Impact:** Enables high-accuracy architectural advice and deep-dive explanations spanning multiple files.
---
## Phase 3: Interactive Code Repair
Transform the bot from a passive reviewer into an active collaborator.
### Features
* **`@ai-bot apply <suggestion_id>`**:
* The bot generates a secure `git patch` for a specific recommendation.
* The system commits the patch directly to the PR branch.
* **Refactoring Assistance**:
* Command: `@ai-bot refactor this function to use dependency injection`.
* Bot proposes the changed code block and offers to commit it.
**Risk Mitigation:**
* Require human approval (comment reply) before any commit is pushed.
* Run tests automatically after bot commits.
---
## Phase 4: Enterprise Dashboard
Provide a high-level view of engineering health across the organization.
### Metrics to Visualize
* **Security Health:** Trend of High/Critical issues over time.
* **Code Quality:** Technical debt accumulation vs. reduction rate.
* **Review Velocity:** Average time to AI review vs. Human review.
* **Bot Usage:** Most frequent commands and value-add interactions.
### Tech Stack
* **Prometheus** (already implemented) + **Grafana**: For time-series tracking.
* **Streamlit** / **Next.js**: For a custom management console to configure rules and view logs.
---
## Strategic Recommendations
1. **Immediate Win:** Implement **Bandit** integration. It is low-effort (Python library) and high-value (detects real vulnerabilities).
2. **High Impact:** **Safety** dependency scanning. Vulnerable dependencies are the #1 attack vector for modern apps.
3. **Long Term:** Work on **Vector DB** integration only after the core review logic is flawless, as it introduces significant infrastructure complexity.

142
docs/getting-started.md Normal file
View File

@@ -0,0 +1,142 @@
# Getting Started
This guide will help you set up the AI Code Review system for your Gitea repositories.
## Prerequisites
- Gitea instance (self-hosted or managed)
- Python 3.11+
- LLM API access (OpenAI, OpenRouter, or Ollama)
---
## Step 1: Create a Bot Account
1. Create a new Gitea user account for the bot (e.g., `ai-reviewer`)
2. Generate an access token with these permissions:
- `repo` - Full repository access
- `issue` - Issue read/write access
3. Save the token securely
---
## Step 2: Configure Organization Secrets
In your Gitea organization or repository settings, add these secrets:
| Secret | Description |
|--------|-------------|
| `AI_REVIEW_TOKEN` | Bot's Gitea access token |
| `OPENAI_API_KEY` | OpenAI API key (if using OpenAI) |
| `OPENROUTER_API_KEY` | OpenRouter key (if using OpenRouter) |
| `OLLAMA_HOST` | Ollama URL (if using Ollama, e.g., `http://localhost:11434`) |
---
## Step 3: Add Workflows to Your Repository
Copy the workflow files from this repository to your target repo:
```bash
# Create workflows directory
mkdir -p .gitea/workflows
# Copy workflow files
# Option 1: Copy manually from this repo's .gitea/workflows/
# Option 2: Reference this repo in your workflows (see README)
```
### Workflow Files:
| File | Trigger | Purpose |
|------|---------|---------|
| `enterprise-ai-review.yml` | PR opened/updated | Run AI code review |
| `ai-issue-review.yml` | Issue opened, @ai-bot | Triage issues & respond to commands |
| `ai-codebase-review.yml` | Weekly/manual | Analyze codebase health |
---
## Step 4: Create Labels
Create these labels in your repository for auto-labeling:
**Priority Labels:**
- `priority: high` (red)
- `priority: medium` (yellow)
- `priority: low` (green)
**Type Labels:**
- `type: bug`
- `type: feature`
- `type: question`
- `type: documentation`
**AI Status Labels:**
- `ai-approved`
- `ai-changes-required`
- `ai-reviewed`
---
## Step 5: Test the Setup
### Test PR Review:
1. Create a new pull request
2. Wait for the AI review workflow to run
3. Check for the AI review comment
### Test Issue Triage:
1. Create a new issue
2. The AI should automatically triage and comment
### Test @ai-bot Commands:
1. On any issue, comment: `@ai-bot summarize`
2. The AI should respond with a summary
---
## Troubleshooting
### Common Issues:
**"Missing token" error:**
- Verify `AI_REVIEW_TOKEN` is set in secrets
- Ensure the token has correct permissions
**"LLM call failed" error:**
- Verify your LLM API key is set
- Check the `provider` setting in `config.yml`
**Workflow not triggering:**
- Verify workflow files are in `.gitea/workflows/`
- Check that Actions are enabled for your repository
See [Troubleshooting Guide](troubleshooting.md) for more.
---
## Helper: CLI Usage
If you need to run the agents manually (e.g. for debugging or local testing), you can use the CLI:
```bash
# Review a pull request
python main.py pr owner/repo 123
# Triage a new issue
python main.py issue owner/repo 456
# Handle @ai-bot command in comment
python main.py comment owner/repo 456 "@ai-bot summarize"
# Analyze codebase
python main.py codebase owner/repo
```
---
## Next Steps
- [Configuration Reference](configuration.md) - Customize behavior
- [Agents Documentation](agents.md) - Learn about each agent
- [Security Scanning](security.md) - Understand security rules

163
docs/security.md Normal file
View File

@@ -0,0 +1,163 @@
# Security Scanning
The security scanner detects vulnerabilities aligned with OWASP Top 10.
## Supported Rules
### A01:2021 Broken Access Control
| Rule | Severity | Description |
|------|----------|-------------|
| SEC001 | HIGH | Hardcoded credentials (passwords, API keys) |
| SEC002 | HIGH | Exposed private keys |
### A02:2021 Cryptographic Failures
| Rule | Severity | Description |
|------|----------|-------------|
| SEC003 | MEDIUM | Weak hash algorithms (MD5, SHA1) |
| SEC004 | MEDIUM | Non-cryptographic random for security |
### A03:2021 Injection
| Rule | Severity | Description |
|------|----------|-------------|
| SEC005 | HIGH | SQL injection via string formatting |
| SEC006 | HIGH | Command injection in subprocess |
| SEC007 | HIGH | eval() usage |
| SEC008 | MEDIUM | XSS via innerHTML |
### A04:2021 Insecure Design
| Rule | Severity | Description |
|------|----------|-------------|
| SEC009 | MEDIUM | Debug mode enabled |
### A05:2021 Security Misconfiguration
| Rule | Severity | Description |
|------|----------|-------------|
| SEC010 | MEDIUM | CORS wildcard (*) |
| SEC011 | HIGH | SSL verification disabled |
### A07:2021 Authentication Failures
| Rule | Severity | Description |
|------|----------|-------------|
| SEC012 | HIGH | Hardcoded JWT secrets |
### A08:2021 Integrity Failures
| Rule | Severity | Description |
|------|----------|-------------|
| SEC013 | MEDIUM | Pickle deserialization |
### A09:2021 Logging Failures
| Rule | Severity | Description |
|------|----------|-------------|
| SEC014 | MEDIUM | Logging sensitive data |
### A10:2021 Server-Side Request Forgery
| Rule | Severity | Description |
|------|----------|-------------|
| SEC015 | MEDIUM | SSRF via dynamic URLs |
### Additional Rules
| Rule | Severity | Description |
|------|----------|-------------|
| SEC016 | LOW | Hardcoded IP addresses |
| SEC017 | MEDIUM | Security-related TODO/FIXME |
## Usage
### In PR Reviews
Security scanning runs automatically during PR review:
```yaml
agents:
pr:
security_scan: true
```
### Standalone
```python
from security import SecurityScanner
scanner = SecurityScanner()
# Scan file content
for finding in scanner.scan_content(code, "file.py"):
print(f"[{finding.severity}] {finding.rule_name}")
print(f" Line {finding.line}: {finding.code_snippet}")
print(f" {finding.description}")
# Scan git diff
for finding in scanner.scan_diff(diff):
print(f"{finding.file}:{finding.line} - {finding.rule_name}")
```
### Get Summary
```python
findings = list(scanner.scan_content(code, "file.py"))
summary = scanner.get_summary(findings)
print(f"Total: {summary['total']}")
print(f"HIGH: {summary['by_severity']['HIGH']}")
print(f"Categories: {summary['by_category']}")
```
## Custom Rules
Create `security/security_rules.yml`:
```yaml
rules:
- id: "CUSTOM001"
name: "Custom Pattern"
pattern: "dangerous_function\\s*\\("
severity: "HIGH"
category: "Custom"
cwe: "CWE-xxx"
description: "Usage of dangerous function detected"
recommendation: "Use safe_function() instead"
```
Load custom rules:
```python
scanner = SecurityScanner(rules_file="security/custom_rules.yml")
```
## CI Integration
Fail CI on HIGH severity findings:
```yaml
security:
fail_on_high: true
```
Or in code:
```python
findings = list(scanner.scan_diff(diff))
high_count = sum(1 for f in findings if f.severity == "HIGH")
if high_count > 0:
sys.exit(1)
```
## CWE References
All rules include CWE (Common Weakness Enumeration) references:
- [CWE-78](https://cwe.mitre.org/data/definitions/78.html): OS Command Injection
- [CWE-79](https://cwe.mitre.org/data/definitions/79.html): XSS
- [CWE-89](https://cwe.mitre.org/data/definitions/89.html): SQL Injection
- [CWE-798](https://cwe.mitre.org/data/definitions/798.html): Hardcoded Credentials

263
docs/troubleshooting.md Normal file
View File

@@ -0,0 +1,263 @@
# Troubleshooting
Common issues and solutions for the AI Code Review system.
## Installation Issues
### `ModuleNotFoundError: No module named 'requests'`
Install dependencies:
```bash
pip install requests pyyaml
```
### `ImportError: cannot import name 'BaseAgent'`
Ensure you're running from the correct directory:
```bash
cd tools/ai-review
python main.py pr owner/repo 123
```
---
## Authentication Issues
### `repository not found`
**Causes:**
- Bot token lacks access to the repository
- Repository path is incorrect
**Solutions:**
1. Verify token has `repo` permissions
2. Check repository path format: `owner/repo`
3. Ensure token can access both the target repo and the AI tooling repo
### `401 Unauthorized`
**Causes:**
- Invalid or expired token
- Missing token in environment
**Solutions:**
1. Regenerate the bot token
2. Verify `AI_REVIEW_TOKEN` is set correctly
3. Check organization secret scope is "All Repositories"
### `403 Forbidden`
**Causes:**
- Token lacks write permissions
- Repository is private and token doesn't have access
**Solutions:**
1. Ensure token has `issue:write` permission
2. Add bot account as collaborator to private repos
---
## LLM Issues
### `OPENAI_API_KEY not set`
Set the environment variable:
```bash
export OPENAI_API_KEY="sk-..."
```
Or in workflow:
```yaml
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
```
### `Rate limit exceeded`
**Causes:**
- Too many requests to LLM provider
- API quota exhausted
**Solutions:**
1. Increase rate limit interval in config
2. Switch to a different provider temporarily
3. Check your API plan limits
### `JSON decode error` from LLM
**Causes:**
- LLM returned non-JSON response
- Response was truncated
**Solutions:**
1. Increase `max_tokens` in config
2. Check LLM response in logs
3. Improve prompt to enforce JSON output
---
## Workflow Issues
### Workflow doesn't trigger
**Causes:**
- Workflow file not in correct location
- Event type not configured
**Solutions:**
1. Ensure workflow is in `.gitea/workflows/`
2. Check event types match your needs:
```yaml
on:
pull_request:
types: [opened, synchronize]
```
3. Verify Gitea Actions is enabled for the repository
### `review.py not found`
**Causes:**
- Central repo checkout failed
- Path is incorrect
**Solutions:**
1. Verify the checkout step has correct repository and path
2. Check bot token has access to the AI tooling repo
3. Ensure path matches: `.ai-review/tools/ai-review/main.py`
### PR comments not appearing
**Causes:**
- Token lacks issue write permission
- API URL is incorrect
**Solutions:**
1. Check `AI_REVIEW_API_URL` is correct
2. Verify token has `issue:write` permission
3. Check workflow logs for API errors
### @ai-bot edits the issue instead of replying
**Causes:**
- Workflow is using the wrong CLI command for comments
- `event_type` is incorrectly set to "issues"
**Solutions:**
1. Ensure your workflow uses the `comment` command for mentions:
```yaml
python main.py comment owner/repo 123 "@ai-bot ..."
```
2. Verify you have separate jobs for `issues` vs `issue_comment` events (see [Workflows](workflows.md))
---
## Label Issues
### Labels not being applied
**Causes:**
- Labels don't exist in repository
- Label names don't match config
**Solutions:**
1. Create labels matching your config:
- `priority: high`
- `type: bug`
- `ai-approved`
2. Or update config to match existing labels:
```yaml
labels:
priority:
high: "P0" # Your label name
```
### `label not found` error
The agent gracefully handles missing labels. Create labels manually or disable auto-labeling:
```yaml
agents:
issue:
auto_label: false
```
---
## Performance Issues
### Reviews are slow
**Causes:**
- Large diffs taking long to process
- LLM response time
**Solutions:**
1. Reduce max diff lines:
```yaml
review:
max_diff_lines: 500
```
2. Use a faster model:
```yaml
model:
openai: gpt-4.1-mini # Faster than gpt-4
```
3. Consider Ollama for local, faster inference
### Timeout errors
Increase timeout in API calls or use async processing:
```python
client = GiteaClient(timeout=60) # Increase from default 30
```
---
## Debugging
### Enable verbose logging
```bash
python main.py -v pr owner/repo 123
```
### Check workflow logs
1. Go to repository -> Actions
2. Click on the failed workflow run
3. Expand job steps to see output
### Test locally
```bash
# Set environment variables
export AI_REVIEW_TOKEN="your_token"
export AI_REVIEW_API_URL="https://your-gitea/api/v1"
export OPENAI_API_KEY="sk-..."
# Run locally
cd tools/ai-review
python main.py pr owner/repo 123
```
### Validate Python syntax
```bash
python -m py_compile main.py
```
---
## Getting Help
1. Check the [documentation](README.md)
2. Search existing issues in the repository
3. Create a new issue with:
- Steps to reproduce
- Error messages
- Environment details (Gitea version, Python version)

389
docs/workflows.md Normal file
View File

@@ -0,0 +1,389 @@
# Workflows
This document provides ready-to-use workflow files for integrating AI code review into your repositories. Workflows are provided for both **GitHub Actions** and **Gitea Actions**.
---
## Platform Comparison
| Feature | GitHub | Gitea |
|---------|--------|-------|
| Context variable | `github.*` | `gitea.*` |
| Default token | `GITHUB_TOKEN` | `AI_REVIEW_TOKEN` (custom) |
| API URL | `https://api.github.com` | Your Gitea instance URL |
| Tools location | Same repo (`tools/ai-review`) | Checkout from central repo |
---
## GitHub Workflows
### PR Review Workflow
```yaml
# .github/workflows/ai-review.yml
name: AI Code Review
on:
pull_request:
types: [opened, synchronize]
jobs:
ai-review:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install requests pyyaml
- name: Run AI Review
env:
AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }}
AI_REVIEW_REPO: ${{ github.repository }}
AI_REVIEW_API_URL: https://api.github.com
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
cd tools/ai-review
python main.py pr ${{ github.repository }} ${{ github.event.pull_request.number }}
```
### Issue Triage Workflow
```yaml
# .github/workflows/ai-issue-triage.yml
name: AI Issue Triage
on:
issues:
types: [opened, labeled]
jobs:
ai-triage:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install requests pyyaml
- name: Run AI Issue Triage
env:
AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }}
AI_REVIEW_REPO: ${{ github.repository }}
AI_REVIEW_API_URL: https://api.github.com
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
cd tools/ai-review
python main.py issue ${{ github.repository }} ${{ github.event.issue.number }} \
--title "${{ github.event.issue.title }}"
```
### Comment Reply Workflow (includes Bartender Chat)
```yaml
# .github/workflows/ai-comment-reply.yml
name: AI Comment Reply
on:
issue_comment:
types: [created]
jobs:
ai-reply:
runs-on: ubuntu-latest
if: contains(github.event.comment.body, '@ai-bot')
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install requests pyyaml
- name: Run AI Comment Response
env:
AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }}
AI_REVIEW_REPO: ${{ github.repository }}
AI_REVIEW_API_URL: https://api.github.com
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
SEARXNG_URL: ${{ secrets.SEARXNG_URL }}
run: |
cd tools/ai-review
python main.py comment ${{ github.repository }} ${{ github.event.issue.number }} \
"${{ github.event.comment.body }}"
```
### Codebase Analysis Workflow
```yaml
# .github/workflows/ai-codebase-review.yml
name: AI Codebase Analysis
on:
schedule:
- cron: "0 0 * * 0" # Weekly on Sunday
workflow_dispatch: # Manual trigger
jobs:
ai-codebase:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install requests pyyaml
- name: Run Codebase Analysis
env:
AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }}
AI_REVIEW_REPO: ${{ github.repository }}
AI_REVIEW_API_URL: https://api.github.com
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
cd tools/ai-review
python main.py codebase ${{ github.repository }}
```
---
## Gitea Workflows
### PR Review Workflow
```yaml
# .gitea/workflows/enterprise-ai-review.yml
name: AI Code Review
on:
pull_request:
types: [opened, synchronize]
jobs:
ai-review:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/checkout@v4
with:
repository: YourOrg/OpenRabbit
path: .ai-review
token: ${{ secrets.AI_REVIEW_TOKEN }}
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install requests pyyaml
- name: Run AI Review
env:
AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }}
AI_REVIEW_REPO: ${{ gitea.repository }}
AI_REVIEW_API_URL: https://your-gitea.example.com/api/v1
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
cd .ai-review/tools/ai-review
python main.py pr ${{ gitea.repository }} ${{ gitea.event.pull_request.number }}
```
### Issue Triage Workflow
```yaml
# .gitea/workflows/ai-issue-triage.yml
name: AI Issue Triage
on:
issues:
types: [opened, labeled]
jobs:
ai-triage:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v4
with:
repository: YourOrg/OpenRabbit
path: .ai-review
token: ${{ secrets.AI_REVIEW_TOKEN }}
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install requests pyyaml
- name: Run AI Issue Triage
env:
AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }}
AI_REVIEW_REPO: ${{ gitea.repository }}
AI_REVIEW_API_URL: https://your-gitea.example.com/api/v1
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
cd .ai-review/tools/ai-review
python main.py issue ${{ gitea.repository }} ${{ gitea.event.issue.number }} \
--title "${{ gitea.event.issue.title }}"
```
### Comment Reply Workflow (includes Bartender Chat)
```yaml
# .gitea/workflows/ai-comment-reply.yml
name: AI Comment Reply
on:
issue_comment:
types: [created]
jobs:
ai-reply:
runs-on: ubuntu-latest
if: contains(github.event.comment.body, '@ai-bot')
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v4
with:
repository: YourOrg/OpenRabbit
path: .ai-review
token: ${{ secrets.AI_REVIEW_TOKEN }}
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install requests pyyaml
- name: Run AI Comment Response
env:
AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }}
AI_REVIEW_REPO: ${{ gitea.repository }}
AI_REVIEW_API_URL: https://your-gitea.example.com/api/v1
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
SEARXNG_URL: ${{ secrets.SEARXNG_URL }}
run: |
cd .ai-review/tools/ai-review
python main.py comment ${{ gitea.repository }} ${{ gitea.event.issue.number }} \
"${{ gitea.event.comment.body }}"
```
### Codebase Analysis Workflow
```yaml
# .gitea/workflows/ai-codebase-review.yml
name: AI Codebase Analysis
on:
schedule:
- cron: "0 0 * * 0" # Weekly on Sunday
workflow_dispatch: # Manual trigger
jobs:
ai-codebase:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/checkout@v4
with:
repository: YourOrg/OpenRabbit
path: .ai-review
token: ${{ secrets.AI_REVIEW_TOKEN }}
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install requests pyyaml
- name: Run Codebase Analysis
env:
AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }}
AI_REVIEW_REPO: ${{ gitea.repository }}
AI_REVIEW_API_URL: https://your-gitea.example.com/api/v1
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
cd .ai-review/tools/ai-review
python main.py codebase ${{ gitea.repository }}
```
---
## Required Secrets
### GitHub
| Secret | Required | Description |
|--------|----------|-------------|
| `GITHUB_TOKEN` | Auto | Built-in token (automatic) |
| `OPENAI_API_KEY` | Choose one | OpenAI API key |
| `OPENROUTER_API_KEY` | Choose one | OpenRouter API key |
| `OLLAMA_HOST` | Choose one | Ollama server URL |
| `SEARXNG_URL` | Optional | SearXNG instance for web search |
### Gitea
| Secret | Required | Description |
|--------|----------|-------------|
| `AI_REVIEW_TOKEN` | Yes | Gitea bot access token |
| `OPENAI_API_KEY` | Choose one | OpenAI API key |
| `OPENROUTER_API_KEY` | Choose one | OpenRouter API key |
| `OLLAMA_HOST` | Choose one | Ollama server URL |
| `SEARXNG_URL` | Optional | SearXNG instance for web search |
---
## Customization
### For GitHub
The tools are included in the same repository under `tools/ai-review`, so no additional checkout is needed.
### For Gitea
Replace the repository reference with your OpenRabbit fork:
```yaml
repository: YourOrg/OpenRabbit
```
Replace the API URL with your Gitea instance:
```yaml
AI_REVIEW_API_URL: https://your-gitea.example.com/api/v1
```
---
## Chat/Bartender Workflow
Both platforms support the Bartender chat agent through the comment reply workflow. When `@ai-bot` is mentioned with a question (not a specific command like `summarize`), the Chat Agent handles it with tool calling capabilities.
To enable web search, set the `SEARXNG_URL` secret to your SearXNG instance URL.
**Example usage:**
```
@ai-bot How do I configure rate limiting?
@ai-bot Find all authentication-related files
@ai-bot What does the dispatcher module do?
```

4
pytest.ini Normal file
View File

@@ -0,0 +1,4 @@
[pytest]
testpaths = tests
python_paths = tools/ai-review
addopts = -v --tb=short

257
tests/test_ai_review.py Normal file
View File

@@ -0,0 +1,257 @@
"""Test Suite for AI Code Review Workflow
Tests for verifying prompt formatting, agent logic, and core functionality.
Run with: pytest tests/ -v
"""
import os
import sys
# Add the tools directory to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "tools", "ai-review"))
import pytest
class TestPromptFormatting:
"""Test that all prompts can be formatted without errors."""
def get_prompt_path(self, name: str) -> str:
"""Get the full path to a prompt file."""
return os.path.join(
os.path.dirname(__file__),
"..", "tools", "ai-review", "prompts", f"{name}.md"
)
def load_prompt(self, name: str) -> str:
"""Load a prompt file."""
path = self.get_prompt_path(name)
with open(path) as f:
return f.read()
def test_issue_triage_prompt_formatting(self):
"""Test that issue_triage.md can be formatted with placeholders."""
prompt = self.load_prompt("issue_triage")
# This should NOT raise a KeyError
formatted = prompt.format(
title="Test Issue Title",
body="This is the issue body content",
author="testuser",
existing_labels="bug, urgent"
)
assert "Test Issue Title" in formatted
assert "This is the issue body content" in formatted
assert "testuser" in formatted
assert "bug, urgent" in formatted
# JSON example should still be present (curly braces escaped)
assert '"type"' in formatted
assert '"priority"' in formatted
def test_issue_response_prompt_formatting(self):
"""Test that issue_response.md can be formatted with placeholders."""
prompt = self.load_prompt("issue_response")
formatted = prompt.format(
issue_type="bug",
priority="high",
title="Bug Report",
body="Description of the bug",
triage_analysis="This is a high priority bug"
)
assert "bug" in formatted
assert "high" in formatted
assert "Bug Report" in formatted
# JSON example should still be present
assert '"comment"' in formatted
def test_base_prompt_no_placeholders(self):
"""Test that base.md loads correctly (no placeholders needed)."""
prompt = self.load_prompt("base")
# Should contain key elements
assert "security" in prompt.lower()
assert "JSON" in prompt
assert "severity" in prompt.lower()
def test_prompts_have_escaped_json(self):
"""Verify JSON examples use double curly braces."""
for prompt_name in ["issue_triage", "issue_response"]:
prompt = self.load_prompt(prompt_name)
# Check that format() doesn't fail
try:
# Try with minimal placeholders
if prompt_name == "issue_triage":
prompt.format(title="t", body="b", author="a", existing_labels="l")
elif prompt_name == "issue_response":
prompt.format(issue_type="t", priority="p", title="t", body="b", triage_analysis="a")
except KeyError as e:
pytest.fail(f"Prompt {prompt_name} has unescaped curly braces: {e}")
class TestImports:
"""Test that all modules can be imported correctly."""
def test_import_agents(self):
"""Test importing agent classes."""
from agents.base_agent import BaseAgent, AgentContext, AgentResult
from agents.issue_agent import IssueAgent
from agents.pr_agent import PRAgent
from agents.codebase_agent import CodebaseAgent
assert BaseAgent is not None
assert IssueAgent is not None
assert PRAgent is not None
assert CodebaseAgent is not None
def test_import_clients(self):
"""Test importing client classes."""
from clients.gitea_client import GiteaClient
from clients.llm_client import LLMClient
assert GiteaClient is not None
assert LLMClient is not None
def test_import_security(self):
"""Test importing security scanner."""
from security.security_scanner import SecurityScanner
assert SecurityScanner is not None
def test_import_enterprise(self):
"""Test importing enterprise features."""
from enterprise.audit_logger import AuditLogger
from enterprise.metrics import MetricsCollector
assert AuditLogger is not None
assert MetricsCollector is not None
def test_import_dispatcher(self):
"""Test importing dispatcher."""
from dispatcher import Dispatcher
assert Dispatcher is not None
class TestSecurityScanner:
"""Test security scanner pattern detection."""
def test_detects_hardcoded_secret(self):
"""Test detection of hardcoded secrets."""
from security.security_scanner import SecurityScanner
scanner = SecurityScanner()
code = '''
API_KEY = "sk-1234567890abcdef"
'''
findings = list(scanner.scan_content(code, "test.py"))
assert len(findings) >= 1
assert any(f.severity == "HIGH" for f in findings)
def test_detects_eval(self):
"""Test detection of eval usage."""
from security.security_scanner import SecurityScanner
scanner = SecurityScanner()
code = '''
result = eval(user_input)
'''
findings = list(scanner.scan_content(code, "test.py"))
assert len(findings) >= 1
assert any("eval" in f.rule_name.lower() for f in findings)
def test_no_false_positives_on_clean_code(self):
"""Test that clean code doesn't trigger false positives."""
from security.security_scanner import SecurityScanner
scanner = SecurityScanner()
code = '''
def hello():
print("Hello, world!")
return 42
'''
findings = list(scanner.scan_content(code, "test.py"))
# Should have no HIGH severity issues for clean code
high_findings = [f for f in findings if f.severity == "HIGH"]
assert len(high_findings) == 0
class TestAgentContext:
"""Test agent context and result dataclasses."""
def test_agent_context_creation(self):
"""Test creating AgentContext."""
from agents.base_agent import AgentContext
context = AgentContext(
owner="testowner",
repo="testrepo",
event_type="issues",
event_data={"action": "opened"},
config={}
)
assert context.owner == "testowner"
assert context.repo == "testrepo"
assert context.event_type == "issues"
def test_agent_result_creation(self):
"""Test creating AgentResult."""
from agents.base_agent import AgentResult
result = AgentResult(
success=True,
message="Test passed",
data={"key": "value"},
actions_taken=["action1", "action2"]
)
assert result.success is True
assert result.message == "Test passed"
assert len(result.actions_taken) == 2
class TestMetrics:
"""Test metrics collection."""
def test_counter_increment(self):
"""Test counter metrics."""
from enterprise.metrics import Counter
counter = Counter("test_counter")
assert counter.value == 0
counter.inc()
assert counter.value == 1
counter.inc(5)
assert counter.value == 6
def test_histogram_observation(self):
"""Test histogram metrics."""
from enterprise.metrics import Histogram
hist = Histogram("test_histogram")
hist.observe(0.1)
hist.observe(0.5)
hist.observe(1.0)
assert hist.count == 3
assert hist.sum == 1.6
def test_metrics_collector_summary(self):
"""Test metrics collector summary."""
from enterprise.metrics import MetricsCollector
collector = MetricsCollector()
collector.record_request_start("TestAgent")
collector.record_request_end("TestAgent", success=True, duration_seconds=0.5)
summary = collector.get_summary()
assert summary["requests"]["total"] == 1
assert summary["requests"]["success"] == 1
if __name__ == "__main__":
pytest.main([__file__, "-v"])

View File

@@ -0,0 +1,19 @@
"""AI Review Agents Package
This package contains the modular agent implementations for the
enterprise AI code review system.
"""
from agents.base_agent import BaseAgent, AgentContext, AgentResult
from agents.issue_agent import IssueAgent
from agents.pr_agent import PRAgent
from agents.codebase_agent import CodebaseAgent
__all__ = [
"BaseAgent",
"AgentContext",
"AgentResult",
"IssueAgent",
"PRAgent",
"CodebaseAgent",
]

View File

@@ -0,0 +1,257 @@
"""Base Agent
Abstract base class for all AI agents. Provides common functionality
for Gitea API interaction, LLM calls, logging, and rate limiting.
"""
import logging
import os
import time
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Any
import yaml
from clients.gitea_client import GiteaClient
from clients.llm_client import LLMClient, LLMResponse
@dataclass
class AgentContext:
"""Context passed to agent during execution."""
owner: str
repo: str
event_type: str
event_data: dict
config: dict = field(default_factory=dict)
@dataclass
class AgentResult:
"""Result from agent execution."""
success: bool
message: str
data: dict = field(default_factory=dict)
actions_taken: list[str] = field(default_factory=list)
error: str | None = None
class BaseAgent(ABC):
"""Abstract base class for AI agents."""
# Marker to identify AI-generated comments
AI_MARKER = "<!-- AI_CODE_REVIEW -->"
# Disclaimer text
AI_DISCLAIMER = (
"**Note:** This review was generated by an AI assistant. "
"While it aims to be accurate and helpful, it may contain mistakes "
"or miss important issues. Please verify all findings before taking action."
)
def __init__(
self,
config: dict | None = None,
gitea_client: GiteaClient | None = None,
llm_client: LLMClient | None = None,
):
"""Initialize the base agent.
Args:
config: Agent configuration dictionary.
gitea_client: Optional pre-configured Gitea client.
llm_client: Optional pre-configured LLM client.
"""
self.config = config or self._load_config()
self.gitea = gitea_client or GiteaClient()
self.llm = llm_client or LLMClient.from_config(self.config)
self.logger = logging.getLogger(self.__class__.__name__)
# Rate limiting
self._last_request_time = 0.0
self._min_request_interval = 1.0 # seconds
@staticmethod
def _load_config() -> dict:
"""Load configuration from config.yml."""
config_path = os.path.join(os.path.dirname(__file__), "..", "config.yml")
if os.path.exists(config_path):
with open(config_path) as f:
return yaml.safe_load(f)
return {}
def _rate_limit(self):
"""Apply rate limiting between requests."""
elapsed = time.time() - self._last_request_time
if elapsed < self._min_request_interval:
time.sleep(self._min_request_interval - elapsed)
self._last_request_time = time.time()
def load_prompt(self, prompt_name: str) -> str:
"""Load a prompt template from the prompts directory.
Args:
prompt_name: Name of the prompt file (without extension).
Returns:
Prompt template content.
"""
prompt_path = os.path.join(
os.path.dirname(__file__), "..", "prompts", f"{prompt_name}.md"
)
if not os.path.exists(prompt_path):
raise FileNotFoundError(f"Prompt not found: {prompt_path}")
with open(prompt_path) as f:
return f.read()
def call_llm(self, prompt: str, **kwargs) -> LLMResponse:
"""Make a rate-limited call to the LLM.
Args:
prompt: The prompt to send.
**kwargs: Additional LLM options.
Returns:
LLM response.
"""
self._rate_limit()
return self.llm.call(prompt, **kwargs)
def call_llm_json(self, prompt: str, **kwargs) -> dict:
"""Make a rate-limited call and parse JSON response.
Args:
prompt: The prompt to send.
**kwargs: Additional LLM options.
Returns:
Parsed JSON response.
"""
self._rate_limit()
return self.llm.call_json(prompt, **kwargs)
def find_ai_comment(
self,
owner: str,
repo: str,
issue_index: int,
marker: str | None = None,
) -> int | None:
"""Find an existing AI comment by marker.
Args:
owner: Repository owner.
repo: Repository name.
issue_index: Issue or PR number.
marker: Custom marker to search for. Defaults to AI_MARKER.
Returns:
Comment ID if found, None otherwise.
"""
marker = marker or self.AI_MARKER
comments = self.gitea.list_issue_comments(owner, repo, issue_index)
for comment in comments:
if marker in comment.get("body", ""):
return comment["id"]
return None
def upsert_comment(
self,
owner: str,
repo: str,
issue_index: int,
body: str,
marker: str | None = None,
) -> dict:
"""Create or update an AI comment.
Args:
owner: Repository owner.
repo: Repository name.
issue_index: Issue or PR number.
body: Comment body (marker will be prepended if not present).
marker: Custom marker. Defaults to AI_MARKER.
Returns:
Created or updated comment.
"""
marker = marker or self.AI_MARKER
# Ensure marker is in the body
if marker not in body:
body = f"{marker}\n{body}"
# Check for existing comment
existing_id = self.find_ai_comment(owner, repo, issue_index, marker)
if existing_id:
return self.gitea.update_issue_comment(owner, repo, existing_id, body)
else:
return self.gitea.create_issue_comment(owner, repo, issue_index, body)
def format_with_disclaimer(self, content: str) -> str:
"""Add AI disclaimer to content.
Args:
content: The main content.
Returns:
Content with disclaimer prepended.
"""
return f"{self.AI_DISCLAIMER}\n\n{self.AI_MARKER}\n{content}"
@abstractmethod
def execute(self, context: AgentContext) -> AgentResult:
"""Execute the agent's main task.
Args:
context: Execution context with event data.
Returns:
Result of the agent execution.
"""
pass
@abstractmethod
def can_handle(self, event_type: str, event_data: dict) -> bool:
"""Check if this agent can handle the given event.
Args:
event_type: Type of event (issue, pull_request, etc).
event_data: Event payload data.
Returns:
True if this agent can handle the event.
"""
pass
def run(self, context: AgentContext) -> AgentResult:
"""Run the agent with error handling.
Args:
context: Execution context.
Returns:
Agent result, including any errors.
"""
try:
self.logger.info(
f"Running {self.__class__.__name__} for {context.owner}/{context.repo}"
)
result = self.execute(context)
self.logger.info(
f"Completed with success={result.success}: {result.message}"
)
return result
except Exception as e:
self.logger.exception(f"Agent execution failed: {e}")
return AgentResult(
success=False,
message="Agent execution failed",
error=str(e),
)

View File

@@ -0,0 +1,470 @@
"""Chat Agent (Bartender)
Interactive AI chat agent with tool use capabilities.
Can search the codebase and web to answer user questions.
"""
import base64
import logging
import os
import re
from dataclasses import dataclass
import requests
from agents.base_agent import AgentContext, AgentResult, BaseAgent
from clients.llm_client import ToolCall
@dataclass
class ChatMessage:
"""A message in the chat conversation."""
role: str # 'user', 'assistant', or 'tool'
content: str
tool_call_id: str | None = None
name: str | None = None # Tool name for tool responses
class ChatAgent(BaseAgent):
"""Interactive chat agent with tool capabilities."""
# Marker for chat responses
CHAT_AI_MARKER = "<!-- AI_CHAT_RESPONSE -->"
# Tool definitions in OpenAI format
TOOLS = [
{
"type": "function",
"function": {
"name": "search_codebase",
"description": "Search the repository codebase for files, functions, classes, or patterns. Use this to find relevant code.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query - can be a filename, function name, class name, or code pattern",
},
"file_pattern": {
"type": "string",
"description": "Optional file pattern to filter results (e.g., '*.py', 'src/*.js')",
},
},
"required": ["query"],
},
},
},
{
"type": "function",
"function": {
"name": "read_file",
"description": "Read the contents of a specific file from the repository.",
"parameters": {
"type": "object",
"properties": {
"filepath": {
"type": "string",
"description": "Path to the file to read",
},
},
"required": ["filepath"],
},
},
},
{
"type": "function",
"function": {
"name": "search_web",
"description": "Search the web for information using SearXNG. Use this for external documentation, tutorials, or general knowledge.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query",
},
"categories": {
"type": "string",
"description": "Optional: comma-separated categories (general, images, videos, news, science, it)",
},
},
"required": ["query"],
},
},
},
]
# System prompt for the chat agent
SYSTEM_PROMPT = """You are Bartender, a helpful AI assistant for code review and development tasks.
You have access to tools to help answer questions:
- search_codebase: Search the repository for code, files, functions, or patterns
- read_file: Read specific files from the repository
- search_web: Search the web for documentation, tutorials, or external information
When helping users:
1. Use tools to gather information before answering questions about code
2. Be concise but thorough in your explanations
3. Provide code examples when helpful
4. If you're unsure, say so and suggest alternatives
Repository context: {owner}/{repo}
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._searxng_url = self.config.get("agents", {}).get("chat", {}).get(
"searxng_url", os.environ.get("SEARXNG_URL", "")
)
def can_handle(self, event_type: str, event_data: dict) -> bool:
"""Check if this agent handles the given event."""
agent_config = self.config.get("agents", {}).get("chat", {})
if not agent_config.get("enabled", True):
return False
# Handle issue comment with @ai-bot chat or just @ai-bot
if event_type == "issue_comment":
comment_body = event_data.get("comment", {}).get("body", "")
mention_prefix = self.config.get("interaction", {}).get(
"mention_prefix", "@ai-bot"
)
# Check if this is a chat request (any @ai-bot mention that isn't a specific command)
if mention_prefix in comment_body:
# Check it's not another specific command
specific_commands = ["summarize", "explain", "suggest", "security", "codebase"]
body_lower = comment_body.lower()
for cmd in specific_commands:
if f"{mention_prefix} {cmd}" in body_lower:
return False
return True
# Handle direct chat command
if event_type == "chat":
return True
return False
def execute(self, context: AgentContext) -> AgentResult:
"""Execute the chat agent."""
self.logger.info(f"Starting chat for {context.owner}/{context.repo}")
# Extract user message
if context.event_type == "issue_comment":
user_message = context.event_data.get("comment", {}).get("body", "")
issue_index = context.event_data.get("issue", {}).get("number")
# Remove the @ai-bot prefix
mention_prefix = self.config.get("interaction", {}).get(
"mention_prefix", "@ai-bot"
)
user_message = user_message.replace(mention_prefix, "").strip()
else:
user_message = context.event_data.get("message", "")
issue_index = context.event_data.get("issue_number")
if not user_message:
return AgentResult(
success=False,
message="No message provided",
)
# Build conversation
system_prompt = self.SYSTEM_PROMPT.format(
owner=context.owner,
repo=context.repo,
)
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message},
]
# Run the chat loop with tool execution
response_content, tools_used = self._run_chat_loop(
context, messages, max_iterations=5
)
actions_taken = []
if tools_used:
actions_taken.append(f"Used tools: {', '.join(tools_used)}")
# Post response if this is an issue comment
if issue_index:
comment_body = self._format_response(response_content)
self.upsert_comment(
context.owner,
context.repo,
issue_index,
comment_body,
marker=self.CHAT_AI_MARKER,
)
actions_taken.append("Posted chat response")
return AgentResult(
success=True,
message="Chat completed",
data={"response": response_content, "tools_used": tools_used},
actions_taken=actions_taken,
)
def _run_chat_loop(
self,
context: AgentContext,
messages: list[dict],
max_iterations: int = 5,
) -> tuple[str, list[str]]:
"""Run the chat loop with tool execution.
Returns:
Tuple of (final response content, list of tools used)
"""
tools_used = []
for _ in range(max_iterations):
self._rate_limit()
response = self.llm.call_with_tools(messages, tools=self.TOOLS)
# If no tool calls, we're done
if not response.tool_calls:
return response.content, tools_used
# Add assistant message with tool calls
messages.append({
"role": "assistant",
"content": response.content or "",
"tool_calls": [
{
"id": tc.id,
"type": "function",
"function": {
"name": tc.name,
"arguments": str(tc.arguments),
},
}
for tc in response.tool_calls
],
})
# Execute each tool call
for tool_call in response.tool_calls:
tool_result = self._execute_tool(context, tool_call)
tools_used.append(tool_call.name)
# Add tool result to messages
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": tool_result,
})
# If we hit max iterations, make one final call without tools
self._rate_limit()
final_response = self.llm.call_with_tools(
messages, tools=None, tool_choice="none"
)
return final_response.content, tools_used
def _execute_tool(self, context: AgentContext, tool_call: ToolCall) -> str:
"""Execute a tool call and return the result."""
self.logger.info(f"Executing tool: {tool_call.name}")
try:
if tool_call.name == "search_codebase":
return self._tool_search_codebase(
context,
tool_call.arguments.get("query", ""),
tool_call.arguments.get("file_pattern"),
)
elif tool_call.name == "read_file":
return self._tool_read_file(
context,
tool_call.arguments.get("filepath", ""),
)
elif tool_call.name == "search_web":
return self._tool_search_web(
tool_call.arguments.get("query", ""),
tool_call.arguments.get("categories"),
)
else:
return f"Unknown tool: {tool_call.name}"
except Exception as e:
self.logger.error(f"Tool execution failed: {e}")
return f"Error executing tool: {e}"
def _tool_search_codebase(
self,
context: AgentContext,
query: str,
file_pattern: str | None = None,
) -> str:
"""Search the codebase for files matching a query."""
results = []
# Get repository file list
try:
files = self._collect_files(context.owner, context.repo, file_pattern)
except Exception as e:
return f"Error listing files: {e}"
query_lower = query.lower()
# Search through files
for file_info in files[:50]: # Limit to prevent API exhaustion
filepath = file_info.get("path", "")
# Check filename match
if query_lower in filepath.lower():
results.append(f"File: {filepath}")
continue
# Check content for code patterns
try:
content_data = self.gitea.get_file_contents(
context.owner, context.repo, filepath
)
if content_data.get("content"):
content = base64.b64decode(content_data["content"]).decode(
"utf-8", errors="ignore"
)
# Search for query in content
lines = content.splitlines()
matching_lines = []
for i, line in enumerate(lines, 1):
if query_lower in line.lower():
matching_lines.append(f" L{i}: {line.strip()[:100]}")
if matching_lines:
results.append(f"File: {filepath}")
results.extend(matching_lines[:5]) # Max 5 matches per file
except Exception:
pass
if not results:
return f"No results found for '{query}'"
return "\n".join(results[:30]) # Limit total results
def _collect_files(
self,
owner: str,
repo: str,
file_pattern: str | None = None,
) -> list[dict]:
"""Collect files from the repository."""
files = []
# Code extensions to search
code_extensions = {
".py", ".js", ".ts", ".go", ".rs", ".java", ".rb",
".php", ".c", ".cpp", ".h", ".cs", ".swift", ".kt",
".md", ".yml", ".yaml", ".json", ".toml",
}
# Patterns to ignore
ignore_patterns = [
"node_modules/", "vendor/", ".git/", "__pycache__/",
".venv/", "dist/", "build/", ".min.js", ".min.css",
]
def traverse(path: str = ""):
try:
contents = self.gitea.get_file_contents(owner, repo, path or ".")
if isinstance(contents, list):
for item in contents:
item_path = item.get("path", "")
if any(p in item_path for p in ignore_patterns):
continue
if item.get("type") == "file":
ext = os.path.splitext(item_path)[1]
if ext in code_extensions:
# Check file pattern if provided
if file_pattern:
if not self._match_pattern(item_path, file_pattern):
continue
files.append(item)
elif item.get("type") == "dir":
traverse(item_path)
except Exception as e:
self.logger.warning(f"Failed to list {path}: {e}")
traverse()
return files[:100] # Limit to prevent API exhaustion
def _match_pattern(self, filepath: str, pattern: str) -> bool:
"""Check if filepath matches a simple glob pattern."""
import fnmatch
return fnmatch.fnmatch(filepath, pattern)
def _tool_read_file(self, context: AgentContext, filepath: str) -> str:
"""Read a file from the repository."""
try:
content_data = self.gitea.get_file_contents(
context.owner, context.repo, filepath
)
if content_data.get("content"):
content = base64.b64decode(content_data["content"]).decode(
"utf-8", errors="ignore"
)
# Truncate if too long
if len(content) > 8000:
content = content[:8000] + "\n... (truncated)"
return f"File: {filepath}\n\n```\n{content}\n```"
return f"File not found: {filepath}"
except Exception as e:
return f"Error reading file: {e}"
def _tool_search_web(
self,
query: str,
categories: str | None = None,
) -> str:
"""Search the web using SearXNG."""
if not self._searxng_url:
return "Web search is not configured. Set SEARXNG_URL environment variable."
try:
params = {
"q": query,
"format": "json",
}
if categories:
params["categories"] = categories
response = requests.get(
f"{self._searxng_url}/search",
params=params,
timeout=30,
)
response.raise_for_status()
data = response.json()
results = data.get("results", [])
if not results:
return f"No web results found for '{query}'"
# Format results
output = []
for i, result in enumerate(results[:5], 1): # Top 5 results
title = result.get("title", "No title")
url = result.get("url", "")
content = result.get("content", "")[:200]
output.append(f"{i}. **{title}**\n {url}\n {content}")
return "\n\n".join(output)
except requests.exceptions.RequestException as e:
return f"Web search failed: {e}"
def _format_response(self, content: str) -> str:
"""Format the chat response with disclaimer."""
lines = [
f"{self.AI_DISCLAIMER}",
"",
"---",
"",
content,
]
return "\n".join(lines)

View File

@@ -0,0 +1,457 @@
"""Codebase Quality Agent
AI agent for analyzing overall codebase health, architecture,
technical debt, and documentation coverage.
"""
import base64
import os
from dataclasses import dataclass, field
from agents.base_agent import AgentContext, AgentResult, BaseAgent
@dataclass
class CodebaseMetrics:
"""Metrics collected from codebase analysis."""
total_files: int = 0
total_lines: int = 0
languages: dict = field(default_factory=dict)
todo_count: int = 0
fixme_count: int = 0
deprecated_count: int = 0
missing_docstrings: int = 0
@dataclass
class CodebaseReport:
"""Complete codebase analysis report."""
summary: str
health_score: float # 0-100
metrics: CodebaseMetrics
issues: list[dict]
recommendations: list[str]
architecture_notes: list[str]
class CodebaseAgent(BaseAgent):
"""Agent for codebase quality analysis."""
# Marker for codebase reports
CODEBASE_AI_MARKER = "<!-- AI_CODEBASE_REVIEW -->"
# File extensions to analyze
CODE_EXTENSIONS = {
".py": "Python",
".js": "JavaScript",
".ts": "TypeScript",
".go": "Go",
".rs": "Rust",
".java": "Java",
".rb": "Ruby",
".php": "PHP",
".c": "C",
".cpp": "C++",
".h": "C/C++ Header",
".cs": "C#",
".swift": "Swift",
".kt": "Kotlin",
}
# Files to ignore
IGNORE_PATTERNS = [
"node_modules/",
"vendor/",
".git/",
"__pycache__/",
".venv/",
"dist/",
"build/",
".min.js",
".min.css",
]
def can_handle(self, event_type: str, event_data: dict) -> bool:
"""Check if this agent handles the given event."""
agent_config = self.config.get("agents", {}).get("codebase", {})
if not agent_config.get("enabled", True):
return False
# Handle manual trigger via workflow_dispatch or schedule
if event_type in ("workflow_dispatch", "schedule"):
return True
# Handle special issue command
if event_type == "issue_comment":
comment_body = event_data.get("comment", {}).get("body", "")
mention_prefix = self.config.get("interaction", {}).get(
"mention_prefix", "@ai-bot"
)
if f"{mention_prefix} codebase" in comment_body.lower():
return True
return False
def execute(self, context: AgentContext) -> AgentResult:
"""Execute codebase analysis."""
self.logger.info(f"Starting codebase analysis for {context.owner}/{context.repo}")
actions_taken = []
# Step 1: Collect file list from repository
files = self._collect_files(context.owner, context.repo)
self.logger.info(f"Found {len(files)} files to analyze")
# Step 2: Analyze metrics
metrics = self._analyze_metrics(context.owner, context.repo, files)
actions_taken.append(f"Analyzed {metrics.total_files} files")
# Step 3: Run AI analysis on key files
report = self._run_ai_analysis(context, files, metrics)
actions_taken.append("Generated AI analysis report")
# Step 4: Create or update report issue
issue_number = self._create_report_issue(context, report)
actions_taken.append(f"Created/updated report issue #{issue_number}")
return AgentResult(
success=True,
message=f"Codebase analysis complete - Health Score: {report.health_score:.0f}/100",
data={
"health_score": report.health_score,
"total_files": metrics.total_files,
"issues_found": len(report.issues),
},
actions_taken=actions_taken,
)
def _collect_files(self, owner: str, repo: str) -> list[dict]:
"""Collect list of files from the repository."""
files = []
def traverse(path: str = ""):
try:
contents = self.gitea.get_file_contents(owner, repo, path or ".")
if isinstance(contents, list):
for item in contents:
item_path = item.get("path", "")
# Skip ignored patterns
if any(p in item_path for p in self.IGNORE_PATTERNS):
continue
if item.get("type") == "file":
ext = os.path.splitext(item_path)[1]
if ext in self.CODE_EXTENSIONS:
files.append(item)
elif item.get("type") == "dir":
traverse(item_path)
except Exception as e:
self.logger.warning(f"Failed to list {path}: {e}")
traverse()
return files[:100] # Limit to prevent API exhaustion
def _analyze_metrics(
self,
owner: str,
repo: str,
files: list[dict],
) -> CodebaseMetrics:
"""Analyze metrics from files."""
metrics = CodebaseMetrics()
metrics.total_files = len(files)
for file_info in files[:50]: # Analyze top 50 files
filepath = file_info.get("path", "")
ext = os.path.splitext(filepath)[1]
lang = self.CODE_EXTENSIONS.get(ext, "Unknown")
metrics.languages[lang] = metrics.languages.get(lang, 0) + 1
try:
content_data = self.gitea.get_file_contents(owner, repo, filepath)
if content_data.get("content"):
content = base64.b64decode(content_data["content"]).decode(
"utf-8", errors="ignore"
)
lines = content.splitlines()
metrics.total_lines += len(lines)
# Count markers
for line in lines:
line_upper = line.upper()
if "TODO" in line_upper:
metrics.todo_count += 1
if "FIXME" in line_upper:
metrics.fixme_count += 1
if "DEPRECATED" in line_upper:
metrics.deprecated_count += 1
# Check for docstrings (Python)
if ext == ".py":
if 'def ' in content and '"""' not in content:
metrics.missing_docstrings += 1
except Exception as e:
self.logger.debug(f"Could not analyze {filepath}: {e}")
return metrics
def _run_ai_analysis(
self,
context: AgentContext,
files: list[dict],
metrics: CodebaseMetrics,
) -> CodebaseReport:
"""Run AI analysis on the codebase."""
# Prepare context for AI
file_list = "\n".join([f"- {f.get('path', '')}" for f in files[:30]])
language_breakdown = "\n".join(
[f"- {lang}: {count} files" for lang, count in metrics.languages.items()]
)
# Sample some key files for deeper analysis
key_files_content = self._get_key_files_content(
context.owner, context.repo, files
)
prompt = f"""Analyze this codebase and provide a comprehensive quality assessment.
## Repository: {context.owner}/{context.repo}
## Metrics
- Total Files: {metrics.total_files}
- Total Lines: {metrics.total_lines}
- TODO Comments: {metrics.todo_count}
- FIXME Comments: {metrics.fixme_count}
- Deprecated Markers: {metrics.deprecated_count}
## Language Breakdown
{language_breakdown}
## File Structure (sample)
{file_list}
## Key Files Content
{key_files_content}
## Analysis Required
Provide your analysis as JSON with this structure:
```json
{{
"summary": "Overall assessment in 2-3 sentences",
"health_score": 0-100,
"issues": [
{{
"severity": "HIGH|MEDIUM|LOW",
"category": "Architecture|Code Quality|Security|Testing|Documentation",
"description": "Issue description",
"recommendation": "How to fix"
}}
],
"recommendations": ["Top 3-5 actionable recommendations"],
"architecture_notes": ["Observations about code structure and patterns"]
}}
```
Be constructive and actionable. Focus on the most impactful improvements.
"""
try:
result = self.call_llm_json(prompt)
return CodebaseReport(
summary=result.get("summary", "Analysis complete"),
health_score=float(result.get("health_score", 50)),
metrics=metrics,
issues=result.get("issues", []),
recommendations=result.get("recommendations", []),
architecture_notes=result.get("architecture_notes", []),
)
except Exception as e:
self.logger.error(f"AI analysis failed: {e}")
# Try to log the raw response if possible (requires accessing the last response)
# Since we don't have direct access here, we rely on having good logging in LLMClient if needed.
# But let's add a note to the summary.
# Calculate basic health score from metrics
health_score = 70
if metrics.todo_count > 10:
health_score -= 10
if metrics.fixme_count > 5:
health_score -= 10
return CodebaseReport(
summary=f"Basic analysis complete (AI unavailable: {e})",
health_score=health_score,
metrics=metrics,
issues=[],
recommendations=["Manual review recommended"],
architecture_notes=[],
)
def _get_key_files_content(
self,
owner: str,
repo: str,
files: list[dict],
) -> str:
"""Get content of key files for AI analysis."""
key_file_names = [
"README.md",
"setup.py",
"pyproject.toml",
"package.json",
"Cargo.toml",
"go.mod",
"Makefile",
"Dockerfile",
]
content_parts = []
for file_info in files:
filepath = file_info.get("path", "")
filename = os.path.basename(filepath)
if filename in key_file_names:
try:
content_data = self.gitea.get_file_contents(owner, repo, filepath)
if content_data.get("content"):
content = base64.b64decode(content_data["content"]).decode(
"utf-8", errors="ignore"
)
# Truncate long files
if len(content) > 2000:
content = content[:2000] + "\n... (truncated)"
content_parts.append(f"### {filepath}\n```\n{content}\n```")
except Exception:
pass
return "\n\n".join(content_parts[:5]) or "No key configuration files found."
def _create_report_issue(
self,
context: AgentContext,
report: CodebaseReport,
) -> int:
"""Create or update a report issue."""
# Generate issue body
body = self._generate_report_body(report)
# Look for existing report issue
try:
issues = self.gitea.list_issues(
context.owner, context.repo, state="open", labels=["ai-codebase-report"]
)
for issue in issues:
if self.CODEBASE_AI_MARKER in issue.get("body", ""):
# Update existing issue body
self.gitea.update_issue(
context.owner,
context.repo,
issue["number"],
body=body,
)
return issue["number"]
except Exception as e:
self.logger.warning(f"Failed to check for existing report: {e}")
# Create new issue
try:
# Check for label ID
labels = []
try:
repo_labels = self.gitea.get_repo_labels(context.owner, context.repo)
for label in repo_labels:
if label["name"] == "ai-codebase-report":
labels.append(label["id"])
break
except Exception:
pass
issue = self.gitea.create_issue(
context.owner,
context.repo,
title=f"AI Codebase Report - {context.repo}",
body=body,
labels=labels,
)
return issue["number"]
except Exception as e:
self.logger.error(f"Failed to create report issue: {e}")
return 0
def _generate_report_body(self, report: CodebaseReport) -> str:
"""Generate the report issue body."""
health_emoji = "🟢" if report.health_score >= 80 else ("🟡" if report.health_score >= 60 else "🔴")
lines = [
f"{self.AI_DISCLAIMER}",
"",
"# AI Codebase Quality Report",
"",
f"## Health Score: {report.health_score:.0f}/100",
"",
report.summary,
"",
"---",
"",
"## Metrics",
"",
"| Metric | Value |",
"|--------|-------|",
f"| Total Files | {report.metrics.total_files} |",
f"| Total Lines | {report.metrics.total_lines:,} |",
f"| TODO Comments | {report.metrics.todo_count} |",
f"| FIXME Comments | {report.metrics.fixme_count} |",
f"| Deprecated | {report.metrics.deprecated_count} |",
"",
]
# Languages
if report.metrics.languages:
lines.append("### Languages")
lines.append("")
for lang, count in sorted(
report.metrics.languages.items(), key=lambda x: -x[1]
):
lines.append(f"- **{lang}**: {count} files")
lines.append("")
# Issues
if report.issues:
lines.append("## Issues Found")
lines.append("")
for issue in report.issues[:10]:
severity = issue.get("severity", "MEDIUM")
emoji = "🔴" if severity == "HIGH" else ("🟡" if severity == "MEDIUM" else "🟢")
lines.append(f"### [{severity}] {issue.get('category', 'General')}")
lines.append("")
lines.append(issue.get("description", ""))
lines.append("")
lines.append(f"**Recommendation:** {issue.get('recommendation', '')}")
lines.append("")
# Recommendations
if report.recommendations:
lines.append("## Recommendations")
lines.append("")
for i, rec in enumerate(report.recommendations[:5], 1):
lines.append(f"{i}. {rec}")
lines.append("")
# Architecture notes
if report.architecture_notes:
lines.append("## Architecture Notes")
lines.append("")
for note in report.architecture_notes[:5]:
lines.append(f"- {note}")
lines.append("")
lines.append("---")
lines.append(f"*Generated by AI Codebase Agent*")
return "\n".join(lines)

View File

@@ -0,0 +1,392 @@
"""Issue Review Agent
AI agent for triaging, labeling, and responding to issues.
Handles issue.opened, issue.labeled, and issue_comment events.
"""
import logging
from dataclasses import dataclass
from agents.base_agent import AgentContext, AgentResult, BaseAgent
@dataclass
class TriageResult:
"""Result of issue triage analysis."""
issue_type: str
priority: str
confidence: float
summary: str
suggested_labels: list[str]
is_duplicate: bool
duplicate_of: int | None
needs_more_info: bool
missing_info: list[str]
components: list[str]
reasoning: str
class IssueAgent(BaseAgent):
"""Agent for handling issue events."""
# Marker specific to issue comments
ISSUE_AI_MARKER = "<!-- AI_ISSUE_TRIAGE -->"
def can_handle(self, event_type: str, event_data: dict) -> bool:
"""Check if this agent handles the given event."""
# Check if agent is enabled
agent_config = self.config.get("agents", {}).get("issue", {})
if not agent_config.get("enabled", True):
return False
# Handle issue events
if event_type == "issues":
action = event_data.get("action", "")
allowed_events = agent_config.get("events", ["opened", "labeled"])
if action not in allowed_events:
return False
# Ignore our own codebase reports to prevent double-commenting
issue = event_data.get("issue", {})
title = issue.get("title", "")
labels = [l.get("name") for l in issue.get("labels", [])]
if "AI Codebase Report" in title or "ai-codebase-report" in labels:
return False
return True
# Handle issue comment events (for @mentions)
if event_type == "issue_comment":
action = event_data.get("action", "")
if action == "created":
comment_body = event_data.get("comment", {}).get("body", "")
mention_prefix = self.config.get("interaction", {}).get(
"mention_prefix", "@ai-bot"
)
return mention_prefix in comment_body
return False
def execute(self, context: AgentContext) -> AgentResult:
"""Execute the issue agent."""
event_data = context.event_data
action = event_data.get("action", "")
if context.event_type == "issues":
if action == "opened":
return self._handle_issue_opened(context)
elif action == "labeled":
return self._handle_issue_labeled(context)
if context.event_type == "issue_comment":
return self._handle_issue_comment(context)
return AgentResult(
success=False,
message=f"Unknown action: {action}",
)
def _handle_issue_opened(self, context: AgentContext) -> AgentResult:
"""Handle a newly opened issue."""
issue = context.event_data.get("issue", {})
issue_index = issue.get("number")
title = issue.get("title", "")
body = issue.get("body", "")
author = issue.get("user", {}).get("login", "unknown")
existing_labels = [l.get("name", "") for l in issue.get("labels", [])]
self.logger.info(f"Triaging issue #{issue_index}: {title}")
# Step 1: Triage the issue
triage = self._triage_issue(title, body, author, existing_labels)
actions_taken = []
# Step 2: Apply labels if auto-label is enabled
agent_config = self.config.get("agents", {}).get("issue", {})
if agent_config.get("auto_label", True):
labels_applied = self._apply_labels(
context.owner, context.repo, issue_index, triage
)
if labels_applied:
actions_taken.append(f"Applied labels: {labels_applied}")
# Step 3: Post triage comment
comment = self._generate_triage_comment(triage, issue)
self.upsert_comment(
context.owner,
context.repo,
issue_index,
comment,
marker=self.ISSUE_AI_MARKER,
)
actions_taken.append("Posted triage comment")
return AgentResult(
success=True,
message=f"Triaged issue #{issue_index} as {triage.issue_type} ({triage.priority} priority)",
data={
"triage": {
"type": triage.issue_type,
"priority": triage.priority,
"confidence": triage.confidence,
}
},
actions_taken=actions_taken,
)
def _handle_issue_labeled(self, context: AgentContext) -> AgentResult:
"""Handle label addition to an issue."""
# Could be used for specific label-triggered actions
issue = context.event_data.get("issue", {})
label = context.event_data.get("label", {})
return AgentResult(
success=True,
message=f"Noted label '{label.get('name')}' added to issue #{issue.get('number')}",
)
def _handle_issue_comment(self, context: AgentContext) -> AgentResult:
"""Handle @mention in issue comment."""
issue = context.event_data.get("issue", {})
comment = context.event_data.get("comment", {})
issue_index = issue.get("number")
comment_body = comment.get("body", "")
# Parse command from mention
command = self._parse_command(comment_body)
if command:
response = self._handle_command(context, issue, command)
self.gitea.create_issue_comment(
context.owner, context.repo, issue_index, response
)
return AgentResult(
success=True,
message=f"Responded to command: {command}",
actions_taken=["Posted command response"],
)
return AgentResult(
success=True,
message="No actionable command found in mention",
)
def _triage_issue(
self,
title: str,
body: str,
author: str,
existing_labels: list[str],
) -> TriageResult:
"""Use LLM to triage the issue."""
prompt_template = self.load_prompt("issue_triage")
prompt = prompt_template.format(
title=title,
body=body or "(no description provided)",
author=author,
existing_labels=", ".join(existing_labels) if existing_labels else "none",
)
try:
result = self.call_llm_json(prompt)
return TriageResult(
issue_type=result.get("type", "question"),
priority=result.get("priority", "medium"),
confidence=result.get("confidence", 0.5),
summary=result.get("summary", title),
suggested_labels=result.get("suggested_labels", []),
is_duplicate=result.get("is_duplicate", False),
duplicate_of=result.get("duplicate_of"),
needs_more_info=result.get("needs_more_info", False),
missing_info=result.get("missing_info", []),
components=result.get("components", []),
reasoning=result.get("reasoning", ""),
)
except Exception as e:
self.logger.warning(f"LLM triage failed: {e}")
# Return default triage on failure
return TriageResult(
issue_type="question",
priority="medium",
confidence=0.3,
summary=title,
suggested_labels=[],
is_duplicate=False,
duplicate_of=None,
needs_more_info=True,
missing_info=["Unable to parse issue automatically"],
components=[],
reasoning="Automatic triage failed, needs human review",
)
def _apply_labels(
self,
owner: str,
repo: str,
issue_index: int,
triage: TriageResult,
) -> list[str]:
"""Apply labels based on triage result."""
labels_config = self.config.get("labels", {})
# Get all repo labels
try:
repo_labels = self.gitea.get_repo_labels(owner, repo)
label_map = {l["name"]: l["id"] for l in repo_labels}
except Exception as e:
self.logger.warning(f"Failed to get repo labels: {e}")
return []
labels_to_add = []
# Map priority
priority_labels = labels_config.get("priority", {})
priority_label = priority_labels.get(triage.priority)
if priority_label and priority_label in label_map:
labels_to_add.append(label_map[priority_label])
# Map type
type_labels = labels_config.get("type", {})
type_label = type_labels.get(triage.issue_type)
if type_label and type_label in label_map:
labels_to_add.append(label_map[type_label])
# Add AI reviewed label
status_labels = labels_config.get("status", {})
reviewed_label = status_labels.get("ai_reviewed")
if reviewed_label and reviewed_label in label_map:
labels_to_add.append(label_map[reviewed_label])
if labels_to_add:
try:
self.gitea.add_issue_labels(owner, repo, issue_index, labels_to_add)
return [
name for name, id in label_map.items() if id in labels_to_add
]
except Exception as e:
self.logger.warning(f"Failed to add labels: {e}")
return []
def _generate_triage_comment(self, triage: TriageResult, issue: dict) -> str:
"""Generate a triage summary comment."""
lines = [
f"{self.AI_DISCLAIMER}",
"",
"## AI Issue Triage",
"",
f"| Field | Value |",
f"|-------|--------|",
f"| **Type** | {triage.issue_type.capitalize()} |",
f"| **Priority** | {triage.priority.capitalize()} |",
f"| **Confidence** | {triage.confidence:.0%} |",
"",
]
if triage.summary != issue.get("title"):
lines.append(f"**Summary:** {triage.summary}")
lines.append("")
if triage.components:
lines.append(f"**Components:** {', '.join(triage.components)}")
lines.append("")
if triage.needs_more_info and triage.missing_info:
lines.append("### Additional Information Needed")
lines.append("")
for info in triage.missing_info:
lines.append(f"- {info}")
lines.append("")
if triage.is_duplicate and triage.duplicate_of:
lines.append(f"### Possible Duplicate")
lines.append(f"This issue may be a duplicate of #{triage.duplicate_of}")
lines.append("")
lines.append("---")
lines.append(f"*{triage.reasoning}*")
return "\n".join(lines)
def _parse_command(self, body: str) -> str | None:
"""Parse a command from a comment body."""
mention_prefix = self.config.get("interaction", {}).get(
"mention_prefix", "@ai-bot"
)
commands = self.config.get("interaction", {}).get(
"commands", ["explain", "suggest", "security", "summarize"]
)
for command in commands:
if f"{mention_prefix} {command}" in body.lower():
return command
return None
def _handle_command(self, context: AgentContext, issue: dict, command: str) -> str:
"""Handle a command from an @mention."""
title = issue.get("title", "")
body = issue.get("body", "")
if command == "summarize":
return self._command_summarize(title, body)
elif command == "explain":
return self._command_explain(title, body)
elif command == "suggest":
return self._command_suggest(title, body)
return f"{self.AI_DISCLAIMER}\n\nSorry, I don't understand the command `{command}`."
def _command_summarize(self, title: str, body: str) -> str:
"""Generate a summary of the issue."""
prompt = f"""Summarize the following issue in 2-3 concise sentences:
Title: {title}
Body: {body}
Provide only the summary, no additional formatting."""
try:
response = self.call_llm(prompt)
return f"{self.AI_DISCLAIMER}\n\n**Summary:**\n{response.content}"
except Exception as e:
return f"{self.AI_DISCLAIMER}\n\nSorry, I was unable to generate a summary. Error: {e}"
def _command_explain(self, title: str, body: str) -> str:
"""Explain the issue in more detail."""
prompt = f"""Analyze this issue and provide a clear explanation of what the user is asking for or reporting:
Title: {title}
Body: {body}
Provide:
1. What the issue is about
2. What the user expects
3. Any technical context that might be relevant
Be concise and helpful."""
try:
response = self.call_llm(prompt)
return f"{self.AI_DISCLAIMER}\n\n**Explanation:**\n{response.content}"
except Exception as e:
return f"{self.AI_DISCLAIMER}\n\nSorry, I was unable to explain this issue. Error: {e}"
def _command_suggest(self, title: str, body: str) -> str:
"""Suggest solutions for the issue."""
prompt = f"""Based on this issue, suggest potential solutions or next steps:
Title: {title}
Body: {body}
Provide 2-3 actionable suggestions. If this is a bug, suggest debugging steps. If this is a feature request, suggest implementation approaches.
Be practical and concise."""
try:
response = self.call_llm(prompt)
return f"{self.AI_DISCLAIMER}\n\n**Suggestions:**\n{response.content}"
except Exception as e:
return f"{self.AI_DISCLAIMER}\n\nSorry, I was unable to generate suggestions. Error: {e}"

View File

@@ -0,0 +1,436 @@
"""Pull Request Review Agent
Enhanced AI agent for comprehensive PR reviews with inline comments,
security scanning, and automatic label management.
"""
import re
from dataclasses import dataclass, field
from agents.base_agent import AgentContext, AgentResult, BaseAgent
@dataclass
class ReviewIssue:
"""A single issue found in the PR."""
file: str
line: int | None
severity: str # HIGH, MEDIUM, LOW
category: str # Security, Correctness, Performance, etc.
description: str
recommendation: str
code_snippet: str | None = None
@dataclass
class PRReviewResult:
"""Result of a PR review."""
summary: str
issues: list[ReviewIssue]
overall_severity: str
approval: bool
security_issues: list[ReviewIssue] = field(default_factory=list)
class PRAgent(BaseAgent):
"""Agent for handling pull request reviews."""
# Marker specific to PR reviews
PR_AI_MARKER = "<!-- AI_PR_REVIEW -->"
def can_handle(self, event_type: str, event_data: dict) -> bool:
"""Check if this agent handles the given event."""
# Check if agent is enabled
agent_config = self.config.get("agents", {}).get("pr", {})
if not agent_config.get("enabled", True):
return False
if event_type == "pull_request":
action = event_data.get("action", "")
allowed_events = agent_config.get("events", ["opened", "synchronize"])
return action in allowed_events
return False
def execute(self, context: AgentContext) -> AgentResult:
"""Execute the PR review agent."""
pr = context.event_data.get("pull_request", {})
pr_number = pr.get("number")
self.logger.info(f"Reviewing PR #{pr_number}: {pr.get('title')}")
actions_taken = []
# Step 1: Get PR diff
diff = self._get_diff(context.owner, context.repo, pr_number)
if not diff.strip():
return AgentResult(
success=True,
message="PR has no changes to review",
)
# Step 2: Parse changed files
changed_files = self._parse_diff_files(diff)
# Step 3: Run security scan if enabled
security_issues = []
agent_config = self.config.get("agents", {}).get("pr", {})
if agent_config.get("security_scan", True):
security_issues = self._run_security_scan(changed_files, diff)
if security_issues:
actions_taken.append(f"Found {len(security_issues)} security issues")
# Step 4: Run AI review
review_result = self._run_ai_review(diff, context, security_issues)
# Step 5: Post inline comments if enabled
if agent_config.get("inline_comments", True) and review_result.issues:
inline_count = self._post_inline_comments(
context.owner, context.repo, pr_number, review_result
)
actions_taken.append(f"Posted {inline_count} inline comments")
# Step 6: Post summary comment
summary_comment = self._generate_summary_comment(review_result)
self.upsert_comment(
context.owner,
context.repo,
pr_number,
summary_comment,
marker=self.PR_AI_MARKER,
)
actions_taken.append("Posted summary comment")
# Step 7: Apply labels
labels_applied = self._apply_review_labels(
context.owner, context.repo, pr_number, review_result
)
if labels_applied:
actions_taken.append(f"Applied labels: {labels_applied}")
return AgentResult(
success=True,
message=f"Reviewed PR #{pr_number}: {review_result.overall_severity} severity",
data={
"severity": review_result.overall_severity,
"approval": review_result.approval,
"issues_count": len(review_result.issues),
"security_issues_count": len(review_result.security_issues),
},
actions_taken=actions_taken,
)
def _get_diff(self, owner: str, repo: str, pr_number: int) -> str:
"""Get the PR diff, truncated if necessary."""
max_lines = self.config.get("review", {}).get("max_diff_lines", 800)
try:
diff = self.gitea.get_pull_request_diff(owner, repo, pr_number)
lines = diff.splitlines()
if len(lines) > max_lines:
return "\n".join(lines[:max_lines])
return diff
except Exception as e:
self.logger.error(f"Failed to get diff: {e}")
return ""
def _parse_diff_files(self, diff: str) -> dict[str, str]:
"""Parse diff into file -> content mapping."""
files = {}
current_file = None
current_content = []
for line in diff.splitlines():
if line.startswith("diff --git"):
if current_file:
files[current_file] = "\n".join(current_content)
# Extract file path from "diff --git a/path b/path"
match = re.search(r"b/(.+)$", line)
if match:
current_file = match.group(1)
current_content = []
elif current_file:
current_content.append(line)
if current_file:
files[current_file] = "\n".join(current_content)
return files
def _run_security_scan(
self, changed_files: dict[str, str], diff: str
) -> list[ReviewIssue]:
"""Run security pattern scanning on the diff."""
issues = []
# Security patterns to detect
patterns = [
{
"name": "Hardcoded Secrets",
"pattern": r'(?i)(api_key|apikey|secret|password|token|auth)\s*[=:]\s*["\'][^"\']{8,}["\']',
"severity": "HIGH",
"category": "Security",
"description": "Potential hardcoded secret or API key detected",
"recommendation": "Move secrets to environment variables or a secrets manager",
},
{
"name": "SQL Injection",
"pattern": r'(?i)(execute|query)\s*\([^)]*\+[^)]*\)|f["\'].*\{.*\}.*(?:SELECT|INSERT|UPDATE|DELETE)',
"severity": "HIGH",
"category": "Security",
"description": "Potential SQL injection vulnerability - string concatenation in query",
"recommendation": "Use parameterized queries or prepared statements",
},
{
"name": "Hardcoded IP",
"pattern": r'\b(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b',
"severity": "LOW",
"category": "Security",
"description": "Hardcoded IP address detected",
"recommendation": "Consider using configuration or DNS names instead",
},
{
"name": "Eval Usage",
"pattern": r'\beval\s*\(',
"severity": "HIGH",
"category": "Security",
"description": "Use of eval() detected - potential code injection risk",
"recommendation": "Avoid eval() - use safer alternatives like ast.literal_eval() for Python",
},
{
"name": "Shell Injection",
"pattern": r'(?i)(?:subprocess\.call|os\.system|shell\s*=\s*True)',
"severity": "MEDIUM",
"category": "Security",
"description": "Potential shell command execution - verify input is sanitized",
"recommendation": "Use subprocess with shell=False and pass arguments as a list",
},
]
for filename, content in changed_files.items():
# Only check added lines (starting with +)
added_lines = []
line_numbers = []
current_line = 0
for line in content.splitlines():
if line.startswith("@@"):
# Parse line number from @@ -x,y +a,b @@
match = re.search(r"\+(\d+)", line)
if match:
current_line = int(match.group(1)) - 1
elif line.startswith("+") and not line.startswith("+++"):
current_line += 1
added_lines.append((current_line, line[1:]))
elif not line.startswith("-"):
current_line += 1
# Check patterns on added lines
for line_num, line_content in added_lines:
for pattern_def in patterns:
if re.search(pattern_def["pattern"], line_content):
issues.append(
ReviewIssue(
file=filename,
line=line_num,
severity=pattern_def["severity"],
category=pattern_def["category"],
description=pattern_def["description"],
recommendation=pattern_def["recommendation"],
code_snippet=line_content.strip()[:100],
)
)
return issues
def _run_ai_review(
self,
diff: str,
context: AgentContext,
security_issues: list[ReviewIssue],
) -> PRReviewResult:
"""Run AI-based code review."""
prompt_template = self.load_prompt("base")
# Add security context if issues were found
security_context = ""
if security_issues:
security_context = "\n\nSECURITY SCAN RESULTS (already detected):\n"
for issue in security_issues[:5]: # Limit to first 5
security_context += f"- [{issue.severity}] {issue.file}:{issue.line} - {issue.description}\n"
prompt = f"{prompt_template}\n{security_context}\nDIFF:\n{diff}"
try:
result = self.call_llm_json(prompt)
issues = []
for issue_data in result.get("issues", []):
issues.append(
ReviewIssue(
file=issue_data.get("file", "unknown"),
line=issue_data.get("line"),
severity=issue_data.get("severity", "MEDIUM"),
category=issue_data.get("category", "General"),
description=issue_data.get("description", ""),
recommendation=issue_data.get("recommendation", ""),
code_snippet=issue_data.get("code_snippet"),
)
)
return PRReviewResult(
summary=result.get("summary", "Review completed"),
issues=issues,
overall_severity=result.get("overall_severity", "LOW"),
approval=result.get("approval", True),
security_issues=security_issues,
)
except Exception as e:
self.logger.error(f"AI review failed: {e}")
return PRReviewResult(
summary=f"AI review encountered an error: {e}",
issues=[],
overall_severity="UNKNOWN",
approval=False,
security_issues=security_issues,
)
def _post_inline_comments(
self,
owner: str,
repo: str,
pr_number: int,
review: PRReviewResult,
) -> int:
"""Post inline comments for issues with line numbers."""
comments = []
all_issues = review.issues + review.security_issues
for issue in all_issues:
if issue.line and issue.file:
comment_body = (
f"**[{issue.severity}] {issue.category}**\n\n"
f"{issue.description}\n\n"
f"**Recommendation:** {issue.recommendation}"
)
comments.append(
{
"path": issue.file,
"line": issue.line,
"body": comment_body,
}
)
if not comments:
return 0
try:
# Use Gitea's pull request review API for inline comments
self.gitea.create_pull_request_review(
owner=owner,
repo=repo,
index=pr_number,
body="AI Code Review - Inline Comments",
event="COMMENT",
comments=comments[:10], # Limit to 10 inline comments
)
return min(len(comments), 10)
except Exception as e:
self.logger.warning(f"Failed to post inline comments: {e}")
return 0
def _generate_summary_comment(self, review: PRReviewResult) -> str:
"""Generate the summary comment for the PR."""
lines = [
f"{self.AI_DISCLAIMER}",
"",
"## AI Code Review",
"",
review.summary,
"",
]
# Statistics
all_issues = review.issues + review.security_issues
high = sum(1 for i in all_issues if i.severity == "HIGH")
medium = sum(1 for i in all_issues if i.severity == "MEDIUM")
low = sum(1 for i in all_issues if i.severity == "LOW")
lines.append("### Summary")
lines.append("")
lines.append(f"| Severity | Count |")
lines.append(f"|----------|-------|")
lines.append(f"| HIGH | {high} |")
lines.append(f"| MEDIUM | {medium} |")
lines.append(f"| LOW | {low} |")
lines.append("")
# Security issues section
if review.security_issues:
lines.append("### Security Issues")
lines.append("")
for issue in review.security_issues[:5]:
lines.append(f"- **[{issue.severity}]** `{issue.file}:{issue.line}` - {issue.description}")
lines.append("")
# Other issues (limit display)
other_issues = [i for i in review.issues if i not in review.security_issues]
if other_issues:
lines.append("### Review Findings")
lines.append("")
for issue in other_issues[:10]:
loc = f"`{issue.file}:{issue.line}`" if issue.line else f"`{issue.file}`"
lines.append(f"- **[{issue.severity}]** {loc} - {issue.description}")
if len(other_issues) > 10:
lines.append(f"- ...and {len(other_issues) - 10} more issues")
lines.append("")
# Verdict
lines.append("---")
lines.append(f"**Overall Severity:** `{review.overall_severity}`")
if review.approval:
lines.append("**AI Recommendation:** Approve")
else:
lines.append("**AI Recommendation:** Changes Requested")
return "\n".join(lines)
def _apply_review_labels(
self,
owner: str,
repo: str,
pr_number: int,
review: PRReviewResult,
) -> list[str]:
"""Apply labels based on review result."""
labels_config = self.config.get("labels", {}).get("status", {})
try:
repo_labels = self.gitea.get_repo_labels(owner, repo)
label_map = {l["name"]: l["id"] for l in repo_labels}
except Exception as e:
self.logger.warning(f"Failed to get repo labels: {e}")
return []
labels_to_add = []
# Add approval/changes required label
if review.approval:
label_name = labels_config.get("ai_approved", "ai-approved")
else:
label_name = labels_config.get("ai_changes_required", "ai-changes-required")
if label_name in label_map:
labels_to_add.append(label_map[label_name])
if labels_to_add:
try:
self.gitea.add_issue_labels(owner, repo, pr_number, labels_to_add)
return [name for name, id in label_map.items() if id in labels_to_add]
except Exception as e:
self.logger.warning(f"Failed to add labels: {e}")
return []

View File

@@ -0,0 +1,10 @@
"""API Clients Package
This package contains client wrappers for external services
like Gitea API and LLM providers.
"""
from clients.gitea_client import GiteaClient
from clients.llm_client import LLMClient
__all__ = ["GiteaClient", "LLMClient"]

View File

@@ -0,0 +1,447 @@
"""Gitea API Client
A unified client for interacting with the Gitea REST API.
Provides methods for issues, pull requests, comments, and repository operations.
"""
import os
from typing import Any
import requests
class GiteaClient:
"""Client for Gitea API operations."""
def __init__(
self,
api_url: str | None = None,
token: str | None = None,
timeout: int = 30,
):
"""Initialize the Gitea client.
Args:
api_url: Gitea API base URL. Defaults to AI_REVIEW_API_URL env var.
token: API token. Defaults to AI_REVIEW_TOKEN env var.
timeout: Request timeout in seconds.
"""
self.api_url = api_url or os.environ.get("AI_REVIEW_API_URL", "")
self.token = token or os.environ.get("AI_REVIEW_TOKEN", "")
self.timeout = timeout
if not self.api_url:
raise ValueError("Gitea API URL is required")
if not self.token:
raise ValueError("Gitea API token is required")
self.headers = {
"Authorization": f"token {self.token}",
"Content-Type": "application/json",
"Accept": "application/json",
}
def _request(
self,
method: str,
endpoint: str,
json: dict | None = None,
params: dict | None = None,
) -> dict | list:
"""Make an API request.
Args:
method: HTTP method (GET, POST, PATCH, DELETE).
endpoint: API endpoint (without base URL).
json: Request body for POST/PATCH.
params: Query parameters.
Returns:
Response JSON data.
Raises:
requests.HTTPError: If the request fails.
"""
url = f"{self.api_url}{endpoint}"
response = requests.request(
method=method,
url=url,
headers=self.headers,
json=json,
params=params,
timeout=self.timeout,
)
response.raise_for_status()
if response.status_code == 204:
return {}
return response.json()
# -------------------------------------------------------------------------
# Issue Operations
# -------------------------------------------------------------------------
def create_issue(
self,
owner: str,
repo: str,
title: str,
body: str,
labels: list[int] | None = None,
) -> dict:
"""Create a new issue.
Args:
owner: Repository owner.
repo: Repository name.
title: Issue title.
body: Issue body.
labels: Optional list of label IDs.
Returns:
Created issue object.
"""
payload = {
"title": title,
"body": body,
}
if labels:
payload["labels"] = labels
return self._request(
"POST",
f"/repos/{owner}/{repo}/issues",
json=payload,
)
def update_issue(
self,
owner: str,
repo: str,
index: int,
title: str | None = None,
body: str | None = None,
state: str | None = None,
) -> dict:
"""Update an existing issue.
Args:
owner: Repository owner.
repo: Repository name.
index: Issue number.
title: New title.
body: New body.
state: New state (open, closed).
Returns:
Updated issue object.
"""
payload = {}
if title:
payload["title"] = title
if body:
payload["body"] = body
if state:
payload["state"] = state
return self._request(
"PATCH",
f"/repos/{owner}/{repo}/issues/{index}",
json=payload,
)
def list_issues(
self,
owner: str,
repo: str,
state: str = "open",
labels: list[str] | None = None,
page: int = 1,
limit: int = 30,
) -> list[dict]:
"""List issues in a repository.
Args:
owner: Repository owner.
repo: Repository name.
state: Issue state (open, closed, all).
labels: Filter by labels.
page: Page number.
limit: Items per page.
Returns:
List of issue objects.
"""
params = {
"state": state,
"page": page,
"limit": limit,
}
if labels:
params["labels"] = ",".join(labels)
return self._request("GET", f"/repos/{owner}/{repo}/issues", params=params)
def get_issue(self, owner: str, repo: str, index: int) -> dict:
"""Get a single issue.
Args:
owner: Repository owner.
repo: Repository name.
index: Issue number.
Returns:
Issue object.
"""
return self._request("GET", f"/repos/{owner}/{repo}/issues/{index}")
def create_issue_comment(
self,
owner: str,
repo: str,
index: int,
body: str,
) -> dict:
"""Create a comment on an issue.
Args:
owner: Repository owner.
repo: Repository name.
index: Issue number.
body: Comment body.
Returns:
Created comment object.
"""
return self._request(
"POST",
f"/repos/{owner}/{repo}/issues/{index}/comments",
json={"body": body},
)
def update_issue_comment(
self,
owner: str,
repo: str,
comment_id: int,
body: str,
) -> dict:
"""Update an existing comment.
Args:
owner: Repository owner.
repo: Repository name.
comment_id: Comment ID.
body: Updated comment body.
Returns:
Updated comment object.
"""
return self._request(
"PATCH",
f"/repos/{owner}/{repo}/issues/comments/{comment_id}",
json={"body": body},
)
def list_issue_comments(
self,
owner: str,
repo: str,
index: int,
) -> list[dict]:
"""List comments on an issue.
Args:
owner: Repository owner.
repo: Repository name.
index: Issue number.
Returns:
List of comment objects.
"""
return self._request("GET", f"/repos/{owner}/{repo}/issues/{index}/comments")
def add_issue_labels(
self,
owner: str,
repo: str,
index: int,
labels: list[int],
) -> list[dict]:
"""Add labels to an issue.
Args:
owner: Repository owner.
repo: Repository name.
index: Issue number.
labels: List of label IDs to add.
Returns:
List of label objects.
"""
return self._request(
"POST",
f"/repos/{owner}/{repo}/issues/{index}/labels",
json={"labels": labels},
)
def get_repo_labels(self, owner: str, repo: str) -> list[dict]:
"""Get all labels for a repository.
Args:
owner: Repository owner.
repo: Repository name.
Returns:
List of label objects.
"""
return self._request("GET", f"/repos/{owner}/{repo}/labels")
# -------------------------------------------------------------------------
# Pull Request Operations
# -------------------------------------------------------------------------
def get_pull_request(self, owner: str, repo: str, index: int) -> dict:
"""Get a pull request.
Args:
owner: Repository owner.
repo: Repository name.
index: PR number.
Returns:
Pull request object.
"""
return self._request("GET", f"/repos/{owner}/{repo}/pulls/{index}")
def get_pull_request_diff(self, owner: str, repo: str, index: int) -> str:
"""Get the diff for a pull request.
Args:
owner: Repository owner.
repo: Repository name.
index: PR number.
Returns:
Diff text.
"""
url = f"{self.api_url}/repos/{owner}/{repo}/pulls/{index}.diff"
response = requests.get(
url,
headers={
"Authorization": f"token {self.token}",
"Accept": "text/plain",
},
timeout=self.timeout,
)
response.raise_for_status()
return response.text
def list_pull_request_files(
self,
owner: str,
repo: str,
index: int,
) -> list[dict]:
"""List files changed in a pull request.
Args:
owner: Repository owner.
repo: Repository name.
index: PR number.
Returns:
List of changed file objects.
"""
return self._request("GET", f"/repos/{owner}/{repo}/pulls/{index}/files")
def create_pull_request_review(
self,
owner: str,
repo: str,
index: int,
body: str,
event: str = "COMMENT",
comments: list[dict] | None = None,
) -> dict:
"""Create a review on a pull request.
Args:
owner: Repository owner.
repo: Repository name.
index: PR number.
body: Review body.
event: Review event (APPROVE, REQUEST_CHANGES, COMMENT).
comments: List of inline comments.
Returns:
Created review object.
"""
payload: dict[str, Any] = {
"body": body,
"event": event,
}
if comments:
payload["comments"] = comments
return self._request(
"POST",
f"/repos/{owner}/{repo}/pulls/{index}/reviews",
json=payload,
)
# -------------------------------------------------------------------------
# Repository Operations
# -------------------------------------------------------------------------
def get_repository(self, owner: str, repo: str) -> dict:
"""Get repository information.
Args:
owner: Repository owner.
repo: Repository name.
Returns:
Repository object.
"""
return self._request("GET", f"/repos/{owner}/{repo}")
def get_file_contents(
self,
owner: str,
repo: str,
filepath: str,
ref: str | None = None,
) -> dict:
"""Get file contents from a repository.
Args:
owner: Repository owner.
repo: Repository name.
filepath: Path to file.
ref: Git ref (branch, tag, commit).
Returns:
File content object with base64-encoded content.
"""
params = {}
if ref:
params["ref"] = ref
return self._request(
"GET",
f"/repos/{owner}/{repo}/contents/{filepath}",
params=params,
)
def get_branch(self, owner: str, repo: str, branch: str) -> dict:
"""Get branch information.
Args:
owner: Repository owner.
repo: Repository name.
branch: Branch name.
Returns:
Branch object.
"""
return self._request("GET", f"/repos/{owner}/{repo}/branches/{branch}")

View File

@@ -0,0 +1,482 @@
"""LLM Client
A unified client for interacting with multiple LLM providers.
Supports OpenAI, OpenRouter, Ollama, and extensible for more providers.
"""
import json
import os
from abc import ABC, abstractmethod
from dataclasses import dataclass
import requests
@dataclass
class ToolCall:
"""Represents a tool call from the LLM."""
id: str
name: str
arguments: dict
@dataclass
class LLMResponse:
"""Response from an LLM call."""
content: str
model: str
provider: str
tokens_used: int | None = None
finish_reason: str | None = None
tool_calls: list[ToolCall] | None = None
class BaseLLMProvider(ABC):
"""Abstract base class for LLM providers."""
@abstractmethod
def call(self, prompt: str, **kwargs) -> LLMResponse:
"""Make a call to the LLM.
Args:
prompt: The prompt to send.
**kwargs: Provider-specific options.
Returns:
LLMResponse with the generated content.
"""
pass
def call_with_tools(
self,
messages: list[dict],
tools: list[dict] | None = None,
**kwargs,
) -> LLMResponse:
"""Make a call to the LLM with tool/function calling support.
Args:
messages: List of message dicts with 'role' and 'content'.
tools: List of tool definitions in OpenAI format.
**kwargs: Provider-specific options.
Returns:
LLMResponse with content and/or tool_calls.
"""
raise NotImplementedError("Tool calling not supported by this provider")
class OpenAIProvider(BaseLLMProvider):
"""OpenAI API provider."""
def __init__(
self,
api_key: str | None = None,
model: str = "gpt-4o-mini",
temperature: float = 0,
max_tokens: int = 4096,
):
self.api_key = api_key or os.environ.get("OPENAI_API_KEY", "")
self.model = model
self.temperature = temperature
self.max_tokens = max_tokens
self.api_url = "https://api.openai.com/v1/chat/completions"
def call(self, prompt: str, **kwargs) -> LLMResponse:
"""Call OpenAI API."""
if not self.api_key:
raise ValueError("OpenAI API key is required")
response = requests.post(
self.api_url,
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
},
json={
"model": kwargs.get("model", self.model),
"temperature": kwargs.get("temperature", self.temperature),
"max_tokens": kwargs.get("max_tokens", self.max_tokens),
"messages": [{"role": "user", "content": prompt}],
},
timeout=120,
)
response.raise_for_status()
data = response.json()
choice = data["choices"][0]
usage = data.get("usage", {})
return LLMResponse(
content=choice["message"]["content"],
model=data["model"],
provider="openai",
tokens_used=usage.get("total_tokens"),
finish_reason=choice.get("finish_reason"),
)
def call_with_tools(
self,
messages: list[dict],
tools: list[dict] | None = None,
**kwargs,
) -> LLMResponse:
"""Call OpenAI API with tool support."""
if not self.api_key:
raise ValueError("OpenAI API key is required")
request_body = {
"model": kwargs.get("model", self.model),
"temperature": kwargs.get("temperature", self.temperature),
"max_tokens": kwargs.get("max_tokens", self.max_tokens),
"messages": messages,
}
if tools:
request_body["tools"] = tools
request_body["tool_choice"] = kwargs.get("tool_choice", "auto")
response = requests.post(
self.api_url,
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
},
json=request_body,
timeout=120,
)
response.raise_for_status()
data = response.json()
choice = data["choices"][0]
usage = data.get("usage", {})
message = choice["message"]
# Parse tool calls if present
tool_calls = None
if message.get("tool_calls"):
tool_calls = []
for tc in message["tool_calls"]:
tool_calls.append(
ToolCall(
id=tc["id"],
name=tc["function"]["name"],
arguments=json.loads(tc["function"]["arguments"]),
)
)
return LLMResponse(
content=message.get("content") or "",
model=data["model"],
provider="openai",
tokens_used=usage.get("total_tokens"),
finish_reason=choice.get("finish_reason"),
tool_calls=tool_calls,
)
class OpenRouterProvider(BaseLLMProvider):
"""OpenRouter API provider."""
def __init__(
self,
api_key: str | None = None,
model: str = "anthropic/claude-3.5-sonnet",
temperature: float = 0,
max_tokens: int = 4096,
):
self.api_key = api_key or os.environ.get("OPENROUTER_API_KEY", "")
self.model = model
self.temperature = temperature
self.max_tokens = max_tokens
self.api_url = "https://openrouter.ai/api/v1/chat/completions"
def call(self, prompt: str, **kwargs) -> LLMResponse:
"""Call OpenRouter API."""
if not self.api_key:
raise ValueError("OpenRouter API key is required")
response = requests.post(
self.api_url,
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
},
json={
"model": kwargs.get("model", self.model),
"temperature": kwargs.get("temperature", self.temperature),
"max_tokens": kwargs.get("max_tokens", self.max_tokens),
"messages": [{"role": "user", "content": prompt}],
},
timeout=120,
)
response.raise_for_status()
data = response.json()
choice = data["choices"][0]
usage = data.get("usage", {})
return LLMResponse(
content=choice["message"]["content"],
model=data.get("model", self.model),
provider="openrouter",
tokens_used=usage.get("total_tokens"),
finish_reason=choice.get("finish_reason"),
)
def call_with_tools(
self,
messages: list[dict],
tools: list[dict] | None = None,
**kwargs,
) -> LLMResponse:
"""Call OpenRouter API with tool support."""
if not self.api_key:
raise ValueError("OpenRouter API key is required")
request_body = {
"model": kwargs.get("model", self.model),
"temperature": kwargs.get("temperature", self.temperature),
"max_tokens": kwargs.get("max_tokens", self.max_tokens),
"messages": messages,
}
if tools:
request_body["tools"] = tools
request_body["tool_choice"] = kwargs.get("tool_choice", "auto")
response = requests.post(
self.api_url,
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
},
json=request_body,
timeout=120,
)
response.raise_for_status()
data = response.json()
choice = data["choices"][0]
usage = data.get("usage", {})
message = choice["message"]
# Parse tool calls if present
tool_calls = None
if message.get("tool_calls"):
tool_calls = []
for tc in message["tool_calls"]:
tool_calls.append(
ToolCall(
id=tc["id"],
name=tc["function"]["name"],
arguments=json.loads(tc["function"]["arguments"]),
)
)
return LLMResponse(
content=message.get("content") or "",
model=data.get("model", self.model),
provider="openrouter",
tokens_used=usage.get("total_tokens"),
finish_reason=choice.get("finish_reason"),
tool_calls=tool_calls,
)
class OllamaProvider(BaseLLMProvider):
"""Ollama (self-hosted) provider."""
def __init__(
self,
host: str | None = None,
model: str = "codellama:13b",
temperature: float = 0,
):
self.host = host or os.environ.get("OLLAMA_HOST", "http://localhost:11434")
self.model = model
self.temperature = temperature
def call(self, prompt: str, **kwargs) -> LLMResponse:
"""Call Ollama API."""
response = requests.post(
f"{self.host}/api/generate",
json={
"model": kwargs.get("model", self.model),
"prompt": prompt,
"stream": False,
"options": {
"temperature": kwargs.get("temperature", self.temperature),
},
},
timeout=300, # Longer timeout for local models
)
response.raise_for_status()
data = response.json()
return LLMResponse(
content=data["response"],
model=data.get("model", self.model),
provider="ollama",
tokens_used=data.get("eval_count"),
finish_reason="stop" if data.get("done") else None,
)
class LLMClient:
"""Unified LLM client supporting multiple providers."""
PROVIDERS = {
"openai": OpenAIProvider,
"openrouter": OpenRouterProvider,
"ollama": OllamaProvider,
}
def __init__(
self,
provider: str = "openai",
config: dict | None = None,
):
"""Initialize the LLM client.
Args:
provider: Provider name (openai, openrouter, ollama).
config: Provider-specific configuration.
"""
if provider not in self.PROVIDERS:
raise ValueError(f"Unknown provider: {provider}. Available: {list(self.PROVIDERS.keys())}")
self.provider_name = provider
self.config = config or {}
self._provider = self.PROVIDERS[provider](**self.config)
def call(self, prompt: str, **kwargs) -> LLMResponse:
"""Make a call to the configured LLM provider.
Args:
prompt: The prompt to send.
**kwargs: Provider-specific options.
Returns:
LLMResponse with the generated content.
"""
return self._provider.call(prompt, **kwargs)
def call_with_tools(
self,
messages: list[dict],
tools: list[dict] | None = None,
**kwargs,
) -> LLMResponse:
"""Make a call with tool/function calling support.
Args:
messages: List of message dicts with 'role' and 'content'.
tools: List of tool definitions in OpenAI format.
**kwargs: Provider-specific options.
Returns:
LLMResponse with content and/or tool_calls.
"""
return self._provider.call_with_tools(messages, tools, **kwargs)
def call_json(self, prompt: str, **kwargs) -> dict:
"""Make a call and parse the response as JSON.
Args:
prompt: The prompt to send (should request JSON output).
**kwargs: Provider-specific options.
Returns:
Parsed JSON response.
Raises:
json.JSONDecodeError: If response is not valid JSON.
"""
response = self.call(prompt, **kwargs)
content = response.content.strip()
return self._extract_json(content)
def _extract_json(self, content: str) -> dict:
"""Extract and parse JSON from content string.
Handles markdown code blocks and preamble text.
"""
content = content.strip()
# Attempt 1: direct parse
try:
return json.loads(content)
except json.JSONDecodeError:
pass
# Attempt 2: Extract from markdown code blocks
if "```" in content:
# Find the JSON block
import re
match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", content)
if match:
try:
return json.loads(match.group(1))
except json.JSONDecodeError:
pass
# Attempt 3: Find first { and last }
try:
start = content.find("{")
end = content.rfind("}")
if start != -1 and end != -1:
json_str = content[start : end + 1]
return json.loads(json_str)
except json.JSONDecodeError:
pass
# Attempt 4: Fix common JSON errors (comments, trailing commas)
# This is risky but helpful for LLM output
try:
# Remove comments
import re
json_str = re.sub(r"//.*", "", content)
json_str = re.sub(r"/\*[\s\S]*?\*/", "", json_str)
return json.loads(json_str)
except json.JSONDecodeError as e:
# If all attempts fail, raise an error with the content for debugging
snippet = content[:500] + "..." if len(content) > 500 else content
raise ValueError(f"Failed to parse JSON response: {e}. Raw content snippet: {snippet!r}")
@classmethod
def from_config(cls, config: dict) -> "LLMClient":
"""Create an LLM client from a configuration dictionary.
Args:
config: Configuration with 'provider' key and provider-specific settings.
Returns:
Configured LLMClient instance.
"""
provider = config.get("provider", "openai")
provider_config = {}
# Map config keys to provider-specific settings
if provider == "openai":
provider_config = {
"model": config.get("model", {}).get("openai", "gpt-4o-mini"),
"temperature": config.get("temperature", 0),
"max_tokens": config.get("max_tokens", 16000),
}
elif provider == "openrouter":
provider_config = {
"model": config.get("model", {}).get("openrouter", "anthropic/claude-3.5-sonnet"),
"temperature": config.get("temperature", 0),
"max_tokens": config.get("max_tokens", 16000),
}
elif provider == "ollama":
provider_config = {
"model": config.get("model", {}).get("ollama", "codellama:13b"),
"temperature": config.get("temperature", 0),
}
return cls(provider=provider, config=provider_config)

View File

@@ -0,0 +1,23 @@
def to_markdown(result: dict) -> str:
lines = []
lines.append("## 🤖 Enterprise AI Code Review\n")
lines.append(result.get("summary", "") + "\n")
if not result.get("issues"):
lines.append("✅ No issues found.\n")
else:
for issue in result["issues"]:
lines.append(f"### ❗ {issue['severity']}{issue['category']}")
lines.append(f"- **File:** `{issue['file']}`")
if issue.get("line"):
lines.append(f"- **Line:** `{issue['line']}`")
lines.append(f"- **Issue:** {issue['description']}")
lines.append(f"- **Recommendation:** {issue['recommendation']}\n")
lines.append("---")
lines.append(f"**Overall severity:** `{result['overall_severity']}`")
lines.append(
"✅ **AI Approval**" if result.get("approval") else "❌ **Changes required**"
)
return "\n".join(lines)

View File

@@ -0,0 +1,96 @@
provider: openai # openai | openrouter | ollama
model:
openai: gpt-4.1-mini
openrouter: anthropic/claude-3.5-sonnet
ollama: codellama:13b
temperature: 0
max_tokens: 4096
# Review settings
review:
fail_on_severity: HIGH
max_diff_lines: 800
inline_comments: true
security_scan: true
# Agent settings
agents:
issue:
enabled: true
auto_label: true
auto_triage: true
duplicate_threshold: 0.85
events:
- opened
- labeled
pr:
enabled: true
inline_comments: true
security_scan: true
events:
- opened
- synchronize
codebase:
enabled: true
schedule: "0 0 * * 0" # Weekly on Sunday
chat:
enabled: true
name: "Bartender"
max_iterations: 5 # Max tool call iterations per chat
tools:
- search_codebase
- read_file
- search_web
searxng_url: "" # Set via SEARXNG_URL env var or here
# Interaction settings
# CUSTOMIZE YOUR BOT NAME HERE!
# Change mention_prefix to your preferred bot name:
# "@ai-bot" - Default
# "@bartender" - Friendly bar theme
# "@uni" - Short and simple
# "@joey" - Personal assistant name
# "@codebot" - Code-focused name
# NOTE: Also update the workflow files (.github/workflows/ or .gitea/workflows/)
# to match this prefix in the 'if: contains(...)' condition
interaction:
respond_to_mentions: true
mention_prefix: "@ai-bot" # Change this to customize your bot's name!
commands:
- explain
- suggest
- security
- summarize
# Enterprise settings
enterprise:
audit_log: true
audit_path: "/var/log/ai-review/"
metrics_enabled: true
rate_limit:
requests_per_minute: 30
max_concurrent: 4
# Label mappings for auto-labeling
labels:
priority:
high: "priority: high"
medium: "priority: medium"
low: "priority: low"
type:
bug: "type: bug"
feature: "type: feature"
question: "type: question"
docs: "type: documentation"
status:
ai_approved: "ai-approved"
ai_changes_required: "ai-changes-required"
ai_reviewed: "ai-reviewed"
# Security scanning rules
security:
enabled: true
fail_on_high: true
rules_file: "security/security_rules.yml"

View File

@@ -0,0 +1,211 @@
"""Event Dispatcher
Routes incoming webhook events to the appropriate agent handlers.
Supports concurrent execution and queue management.
"""
import logging
import os
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass
from typing import Type
import yaml
from agents.base_agent import AgentContext, AgentResult, BaseAgent
@dataclass
class DispatchResult:
"""Result of dispatching an event."""
event_type: str
agents_run: list[str]
results: list[AgentResult]
errors: list[str]
class Dispatcher:
"""Event dispatcher that routes events to appropriate agents."""
def __init__(
self,
config: dict | None = None,
max_workers: int = 4,
):
"""Initialize the dispatcher.
Args:
config: Configuration dictionary.
max_workers: Maximum concurrent agent executions.
"""
self.config = config or self._load_config()
self.max_workers = max_workers
self.logger = logging.getLogger(__name__)
self._agents: list[BaseAgent] = []
self._executor = ThreadPoolExecutor(max_workers=max_workers)
@staticmethod
def _load_config() -> dict:
"""Load configuration from config.yml."""
config_path = os.path.join(os.path.dirname(__file__), "config.yml")
if os.path.exists(config_path):
with open(config_path) as f:
return yaml.safe_load(f)
return {}
def register_agent(self, agent: BaseAgent):
"""Register an agent with the dispatcher.
Args:
agent: Agent instance to register.
"""
self._agents.append(agent)
self.logger.info(f"Registered agent: {agent.__class__.__name__}")
def register_agent_class(self, agent_class: Type[BaseAgent], **kwargs):
"""Register an agent class (will be instantiated).
Args:
agent_class: Agent class to instantiate and register.
**kwargs: Arguments to pass to agent constructor.
"""
agent = agent_class(config=self.config, **kwargs)
self.register_agent(agent)
def dispatch(
self,
event_type: str,
event_data: dict,
owner: str,
repo: str,
) -> DispatchResult:
"""Dispatch an event to registered agents.
Args:
event_type: Type of event (issue, pull_request, issue_comment, etc).
event_data: Event payload data.
owner: Repository owner.
repo: Repository name.
Returns:
Dispatch result with all agent results.
"""
self.logger.info(f"Dispatching event: {event_type} for {owner}/{repo}")
# Find agents that can handle this event
handlers = [
agent for agent in self._agents if agent.can_handle(event_type, event_data)
]
if not handlers:
self.logger.info(f"No agents registered for event: {event_type}")
return DispatchResult(
event_type=event_type,
agents_run=[],
results=[],
errors=[],
)
self.logger.info(
f"Found {len(handlers)} agent(s) for event: {[a.__class__.__name__ for a in handlers]}"
)
# Create context for agents
context = AgentContext(
owner=owner,
repo=repo,
event_type=event_type,
event_data=event_data,
config=self.config,
)
# Run all handlers
results = []
errors = []
agents_run = []
for agent in handlers:
agent_name = agent.__class__.__name__
agents_run.append(agent_name)
try:
result = agent.run(context)
results.append(result)
if not result.success:
errors.append(f"{agent_name}: {result.error or result.message}")
except Exception as e:
self.logger.exception(f"Agent {agent_name} failed: {e}")
errors.append(f"{agent_name}: {str(e)}")
results.append(
AgentResult(
success=False,
message="Unexpected error",
error=str(e),
)
)
return DispatchResult(
event_type=event_type,
agents_run=agents_run,
results=results,
errors=errors,
)
def dispatch_async(
self,
event_type: str,
event_data: dict,
owner: str,
repo: str,
):
"""Dispatch an event asynchronously.
Args:
event_type: Type of event.
event_data: Event payload data.
owner: Repository owner.
repo: Repository name.
Returns:
Future that resolves to DispatchResult.
"""
return self._executor.submit(
self.dispatch, event_type, event_data, owner, repo
)
def shutdown(self):
"""Shutdown the executor."""
self._executor.shutdown(wait=True)
# Singleton dispatcher for easy access
_dispatcher: Dispatcher | None = None
def get_dispatcher() -> Dispatcher:
"""Get the global dispatcher instance."""
global _dispatcher
if _dispatcher is None:
_dispatcher = Dispatcher()
return _dispatcher
def dispatch_event(
event_type: str,
event_data: dict,
owner: str,
repo: str,
) -> DispatchResult:
"""Dispatch an event using the global dispatcher.
Args:
event_type: Type of event.
event_data: Event payload data.
owner: Repository owner.
repo: Repository name.
Returns:
Dispatch result.
"""
return get_dispatcher().dispatch(event_type, event_data, owner, repo)

View File

@@ -0,0 +1,10 @@
"""Enterprise Features Package
This package contains enterprise-grade features like
audit logging and metrics collection.
"""
from enterprise.audit_logger import AuditLogger
from enterprise.metrics import MetricsCollector
__all__ = ["AuditLogger", "MetricsCollector"]

View File

@@ -0,0 +1,303 @@
"""Audit Logger
Enterprise audit logging for tracking all AI agent actions,
decisions, and interactions for compliance and debugging.
"""
import json
import logging
import os
from datetime import datetime
from pathlib import Path
from typing import Any
class AuditLogger:
"""Audit logger for enterprise compliance."""
def __init__(
self,
log_path: str | None = None,
enabled: bool = True,
):
"""Initialize the audit logger.
Args:
log_path: Directory to write audit logs.
enabled: Whether audit logging is enabled.
"""
self.enabled = enabled
self.log_path = Path(
log_path or os.environ.get("AI_AUDIT_PATH", "/var/log/ai-review/")
)
self.logger = logging.getLogger("audit")
if self.enabled:
self._ensure_log_dir()
def _ensure_log_dir(self):
"""Ensure the log directory exists."""
try:
self.log_path.mkdir(parents=True, exist_ok=True)
except Exception as e:
self.logger.warning(f"Could not create audit log directory: {e}")
self.enabled = False
def _get_log_file(self) -> Path:
"""Get the current log file path (daily rotation)."""
date_str = datetime.utcnow().strftime("%Y-%m-%d")
return self.log_path / f"audit-{date_str}.jsonl"
def log(
self,
action: str,
agent: str,
owner: str,
repo: str,
details: dict[str, Any] | None = None,
success: bool = True,
error: str | None = None,
):
"""Log an audit event.
Args:
action: Action performed (e.g., "review_pr", "triage_issue").
agent: Agent name that performed the action.
owner: Repository owner.
repo: Repository name.
details: Additional details about the action.
success: Whether the action succeeded.
error: Error message if failed.
"""
if not self.enabled:
return
event = {
"timestamp": datetime.utcnow().isoformat() + "Z",
"action": action,
"agent": agent,
"repository": f"{owner}/{repo}",
"success": success,
"details": details or {},
}
if error:
event["error"] = error
try:
log_file = self._get_log_file()
with open(log_file, "a") as f:
f.write(json.dumps(event) + "\n")
except Exception as e:
self.logger.error(f"Failed to write audit log: {e}")
def log_llm_call(
self,
agent: str,
owner: str,
repo: str,
provider: str,
model: str,
tokens_used: int | None = None,
duration_ms: int | None = None,
):
"""Log an LLM API call.
Args:
agent: Agent making the call.
owner: Repository owner.
repo: Repository name.
provider: LLM provider used.
model: Model name.
tokens_used: Number of tokens consumed.
duration_ms: Call duration in milliseconds.
"""
self.log(
action="llm_call",
agent=agent,
owner=owner,
repo=repo,
details={
"provider": provider,
"model": model,
"tokens_used": tokens_used,
"duration_ms": duration_ms,
},
)
def log_comment_posted(
self,
agent: str,
owner: str,
repo: str,
issue_number: int,
comment_type: str,
):
"""Log a comment being posted.
Args:
agent: Agent posting the comment.
owner: Repository owner.
repo: Repository name.
issue_number: Issue or PR number.
comment_type: Type of comment (triage, review, response).
"""
self.log(
action="comment_posted",
agent=agent,
owner=owner,
repo=repo,
details={
"issue_number": issue_number,
"comment_type": comment_type,
},
)
def log_labels_applied(
self,
agent: str,
owner: str,
repo: str,
issue_number: int,
labels: list[str],
):
"""Log labels being applied.
Args:
agent: Agent applying labels.
owner: Repository owner.
repo: Repository name.
issue_number: Issue or PR number.
labels: Labels applied.
"""
self.log(
action="labels_applied",
agent=agent,
owner=owner,
repo=repo,
details={
"issue_number": issue_number,
"labels": labels,
},
)
def get_logs(
self,
start_date: str | None = None,
end_date: str | None = None,
action: str | None = None,
repository: str | None = None,
) -> list[dict]:
"""Retrieve audit logs with optional filtering.
Args:
start_date: Start date (YYYY-MM-DD).
end_date: End date (YYYY-MM-DD).
action: Filter by action type.
repository: Filter by repository (owner/repo).
Returns:
List of audit log entries.
"""
if not self.enabled:
return []
logs = []
log_files = sorted(self.log_path.glob("audit-*.jsonl"))
for log_file in log_files:
# Date filter on filename
file_date = log_file.stem.replace("audit-", "")
if start_date and file_date < start_date:
continue
if end_date and file_date > end_date:
continue
try:
with open(log_file) as f:
for line in f:
try:
entry = json.loads(line.strip())
# Apply filters
if action and entry.get("action") != action:
continue
if repository and entry.get("repository") != repository:
continue
logs.append(entry)
except json.JSONDecodeError:
continue
except Exception:
continue
return logs
def generate_report(
self,
start_date: str | None = None,
end_date: str | None = None,
) -> dict:
"""Generate a summary report of audit activity.
Args:
start_date: Report start date.
end_date: Report end date.
Returns:
Summary report dictionary.
"""
logs = self.get_logs(start_date=start_date, end_date=end_date)
report = {
"period": {
"start": start_date or "all",
"end": end_date or "all",
},
"total_events": len(logs),
"by_action": {},
"by_repository": {},
"by_agent": {},
"success_rate": 0.0,
"llm_usage": {
"total_calls": 0,
"total_tokens": 0,
},
}
success_count = 0
for log in logs:
action = log.get("action", "unknown")
repo = log.get("repository", "unknown")
agent = log.get("agent", "unknown")
report["by_action"][action] = report["by_action"].get(action, 0) + 1
report["by_repository"][repo] = report["by_repository"].get(repo, 0) + 1
report["by_agent"][agent] = report["by_agent"].get(agent, 0) + 1
if log.get("success"):
success_count += 1
if action == "llm_call":
report["llm_usage"]["total_calls"] += 1
tokens = log.get("details", {}).get("tokens_used")
if tokens:
report["llm_usage"]["total_tokens"] += tokens
if logs:
report["success_rate"] = success_count / len(logs)
return report
# Global instance
_audit_logger: AuditLogger | None = None
def get_audit_logger() -> AuditLogger:
"""Get the global audit logger instance."""
global _audit_logger
if _audit_logger is None:
_audit_logger = AuditLogger()
return _audit_logger

View File

@@ -0,0 +1,371 @@
"""Metrics Collector
Observability metrics for AI agent performance monitoring.
Tracks request counts, latencies, errors, and LLM usage.
"""
import time
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from threading import Lock
@dataclass
class MetricPoint:
"""A single metric data point."""
timestamp: datetime
value: float
labels: dict = field(default_factory=dict)
class Counter:
"""Thread-safe counter metric."""
def __init__(self, name: str, description: str = ""):
self.name = name
self.description = description
self._value = 0.0
self._lock = Lock()
def inc(self, value: float = 1.0):
"""Increment the counter."""
with self._lock:
self._value += value
@property
def value(self) -> float:
"""Get current counter value."""
with self._lock:
return self._value
class Gauge:
"""Thread-safe gauge metric."""
def __init__(self, name: str, description: str = ""):
self.name = name
self.description = description
self._value = 0.0
self._lock = Lock()
def set(self, value: float):
"""Set the gauge value."""
with self._lock:
self._value = value
def inc(self, value: float = 1.0):
"""Increment the gauge."""
with self._lock:
self._value += value
def dec(self, value: float = 1.0):
"""Decrement the gauge."""
with self._lock:
self._value -= value
@property
def value(self) -> float:
"""Get current gauge value."""
with self._lock:
return self._value
class Histogram:
"""Simple histogram for tracking distributions."""
def __init__(
self,
name: str,
description: str = "",
buckets: list[float] | None = None,
):
self.name = name
self.description = description
self.buckets = buckets or [0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
self._values: list[float] = []
self._lock = Lock()
def observe(self, value: float):
"""Record an observation."""
with self._lock:
self._values.append(value)
# Keep only last 1000 observations
if len(self._values) > 1000:
self._values = self._values[-1000:]
def get_percentile(self, percentile: float) -> float:
"""Get a percentile value."""
with self._lock:
if not self._values:
return 0.0
sorted_values = sorted(self._values)
idx = int(len(sorted_values) * percentile / 100)
return sorted_values[min(idx, len(sorted_values) - 1)]
@property
def count(self) -> int:
"""Get observation count."""
with self._lock:
return len(self._values)
@property
def sum(self) -> float:
"""Get sum of observations."""
with self._lock:
return sum(self._values)
class MetricsCollector:
"""Central metrics collector for AI agents."""
def __init__(self, enabled: bool = True):
"""Initialize metrics collector.
Args:
enabled: Whether metrics collection is enabled.
"""
self.enabled = enabled
self._start_time = time.time()
# Counters
self.requests_total = Counter(
"ai_review_requests_total",
"Total number of review requests processed",
)
self.requests_success = Counter(
"ai_review_requests_success",
"Number of successful review requests",
)
self.requests_failed = Counter(
"ai_review_requests_failed",
"Number of failed review requests",
)
self.llm_calls_total = Counter(
"ai_review_llm_calls_total",
"Total number of LLM API calls",
)
self.llm_tokens_total = Counter(
"ai_review_llm_tokens_total",
"Total LLM tokens consumed",
)
self.comments_posted = Counter(
"ai_review_comments_posted_total",
"Total comments posted",
)
self.labels_applied = Counter(
"ai_review_labels_applied_total",
"Total labels applied",
)
self.security_findings = Counter(
"ai_review_security_findings_total",
"Total security findings detected",
)
# Gauges
self.active_requests = Gauge(
"ai_review_active_requests",
"Currently active review requests",
)
# Histograms
self.request_duration = Histogram(
"ai_review_request_duration_seconds",
"Request processing duration",
)
self.llm_duration = Histogram(
"ai_review_llm_duration_seconds",
"LLM API call duration",
)
# Per-agent metrics
self._agent_metrics: dict[str, dict] = {}
def record_request_start(self, agent: str):
"""Record the start of a request.
Args:
agent: Name of the agent handling the request.
"""
if not self.enabled:
return
self.requests_total.inc()
self.active_requests.inc()
if agent not in self._agent_metrics:
self._agent_metrics[agent] = {
"total": 0,
"success": 0,
"failed": 0,
}
self._agent_metrics[agent]["total"] += 1
def record_request_end(
self,
agent: str,
success: bool,
duration_seconds: float,
):
"""Record the end of a request.
Args:
agent: Name of the agent.
success: Whether the request succeeded.
duration_seconds: Request duration.
"""
if not self.enabled:
return
self.active_requests.dec()
self.request_duration.observe(duration_seconds)
if success:
self.requests_success.inc()
if agent in self._agent_metrics:
self._agent_metrics[agent]["success"] += 1
else:
self.requests_failed.inc()
if agent in self._agent_metrics:
self._agent_metrics[agent]["failed"] += 1
def record_llm_call(
self,
provider: str,
model: str,
tokens: int | None,
duration_seconds: float,
):
"""Record an LLM API call.
Args:
provider: LLM provider name.
model: Model used.
tokens: Tokens consumed.
duration_seconds: Call duration.
"""
if not self.enabled:
return
self.llm_calls_total.inc()
self.llm_duration.observe(duration_seconds)
if tokens:
self.llm_tokens_total.inc(tokens)
def record_comment_posted(self):
"""Record a comment being posted."""
if self.enabled:
self.comments_posted.inc()
def record_labels_applied(self, count: int = 1):
"""Record labels being applied."""
if self.enabled:
self.labels_applied.inc(count)
def record_security_finding(self, severity: str):
"""Record a security finding."""
if self.enabled:
self.security_findings.inc()
def get_summary(self) -> dict:
"""Get a summary of all metrics.
Returns:
Dictionary with metric summaries.
"""
uptime = time.time() - self._start_time
return {
"uptime_seconds": uptime,
"requests": {
"total": self.requests_total.value,
"success": self.requests_success.value,
"failed": self.requests_failed.value,
"active": self.active_requests.value,
"success_rate": (
self.requests_success.value / max(self.requests_total.value, 1)
),
},
"llm": {
"calls": self.llm_calls_total.value,
"tokens": self.llm_tokens_total.value,
"avg_duration_ms": (
(self.llm_duration.sum / max(self.llm_duration.count, 1)) * 1000
),
"p50_duration_ms": self.llm_duration.get_percentile(50) * 1000,
"p95_duration_ms": self.llm_duration.get_percentile(95) * 1000,
},
"actions": {
"comments_posted": self.comments_posted.value,
"labels_applied": self.labels_applied.value,
"security_findings": self.security_findings.value,
},
"latency": {
"avg_ms": (
(self.request_duration.sum / max(self.request_duration.count, 1))
* 1000
),
"p50_ms": self.request_duration.get_percentile(50) * 1000,
"p95_ms": self.request_duration.get_percentile(95) * 1000,
"p99_ms": self.request_duration.get_percentile(99) * 1000,
},
"by_agent": self._agent_metrics,
}
def export_prometheus(self) -> str:
"""Export metrics in Prometheus format.
Returns:
Prometheus-formatted metrics string.
"""
lines = []
def add_metric(name: str, value: float, help_text: str = ""):
if help_text:
lines.append(f"# HELP {name} {help_text}")
lines.append(f"{name} {value}")
add_metric(
"ai_review_requests_total",
self.requests_total.value,
"Total review requests",
)
add_metric(
"ai_review_requests_success_total",
self.requests_success.value,
"Successful requests",
)
add_metric(
"ai_review_requests_failed_total",
self.requests_failed.value,
"Failed requests",
)
add_metric(
"ai_review_llm_calls_total",
self.llm_calls_total.value,
"Total LLM calls",
)
add_metric(
"ai_review_llm_tokens_total",
self.llm_tokens_total.value,
"Total LLM tokens",
)
add_metric(
"ai_review_comments_posted_total",
self.comments_posted.value,
"Comments posted",
)
return "\n".join(lines)
# Global instance
_metrics: MetricsCollector | None = None
def get_metrics() -> MetricsCollector:
"""Get the global metrics collector instance."""
global _metrics
if _metrics is None:
_metrics = MetricsCollector()
return _metrics

350
tools/ai-review/main.py Normal file
View File

@@ -0,0 +1,350 @@
#!/usr/bin/env python3
"""AI Code Review Agent - Main Entry Point
This is the main CLI for running AI code review agents.
Can be invoked directly or through CI/CD workflows.
"""
import argparse
import json
import logging
import os
import sys
import yaml
# Add the package to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from agents.issue_agent import IssueAgent
from agents.pr_agent import PRAgent
from agents.codebase_agent import CodebaseAgent
from agents.chat_agent import ChatAgent
from dispatcher import Dispatcher, get_dispatcher
def setup_logging(verbose: bool = False):
"""Configure logging."""
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(
level=level,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
def load_config(config_path: str | None = None) -> dict:
"""Load configuration from file."""
if config_path and os.path.exists(config_path):
with open(config_path) as f:
return yaml.safe_load(f)
default_path = os.path.join(os.path.dirname(__file__), "config.yml")
if os.path.exists(default_path):
with open(default_path) as f:
return yaml.safe_load(f)
return {}
def run_pr_review(args, config: dict):
"""Run PR review agent."""
from agents.base_agent import AgentContext
agent = PRAgent(config=config)
# Build context from environment or arguments
owner, repo = args.repo.split("/")
pr_number = args.pr_number
context = AgentContext(
owner=owner,
repo=repo,
event_type="pull_request",
event_data={
"action": "opened",
"pull_request": {
"number": pr_number,
"title": args.title or f"PR #{pr_number}",
},
},
config=config,
)
result = agent.run(context)
if result.success:
print(f"✅ PR Review Complete: {result.message}")
print(f" Actions: {', '.join(result.actions_taken)}")
else:
print(f"❌ PR Review Failed: {result.message}")
if result.error:
print(f" Error: {result.error}")
sys.exit(1)
def run_issue_triage(args, config: dict):
"""Run issue triage agent."""
from agents.base_agent import AgentContext
from clients.gitea_client import GiteaClient
agent = IssueAgent(config=config)
owner, repo = args.repo.split("/")
issue_number = args.issue_number
# Fetch the actual issue data from Gitea API to get the complete body
gitea = GiteaClient()
try:
issue_data = gitea.get_issue(owner, repo, issue_number)
except Exception as e:
print(f"❌ Failed to fetch issue: {e}")
sys.exit(1)
context = AgentContext(
owner=owner,
repo=repo,
event_type="issues",
event_data={
"action": "opened",
"issue": issue_data,
},
config=config,
)
result = agent.run(context)
if result.success:
print(f"✅ Issue Triage Complete: {result.message}")
print(f" Actions: {', '.join(result.actions_taken)}")
else:
print(f"❌ Issue Triage Failed: {result.message}")
if result.error:
print(f" Error: {result.error}")
sys.exit(1)
def run_issue_comment(args, config: dict):
"""Handle @ai-bot command in issue comment."""
from agents.base_agent import AgentContext
agent = IssueAgent(config=config)
owner, repo = args.repo.split("/")
issue_number = args.issue_number
# Fetch the actual issue data from Gitea API
from clients.gitea_client import GiteaClient
gitea = GiteaClient()
try:
issue_data = gitea.get_issue(owner, repo, issue_number)
except Exception as e:
print(f"❌ Failed to fetch issue: {e}")
sys.exit(1)
context = AgentContext(
owner=owner,
repo=repo,
event_type="issue_comment",
event_data={
"action": "created",
"issue": issue_data,
"comment": {
"body": args.comment_body,
},
},
config=config,
)
result = agent.run(context)
if result.success:
print(f"✅ Comment Response Complete: {result.message}")
print(f" Actions: {', '.join(result.actions_taken)}")
else:
print(f"❌ Comment Response Failed: {result.message}")
if result.error:
print(f" Error: {result.error}")
sys.exit(1)
def run_codebase_analysis(args, config: dict):
"""Run codebase analysis agent."""
from agents.base_agent import AgentContext
agent = CodebaseAgent(config=config)
owner, repo = args.repo.split("/")
context = AgentContext(
owner=owner,
repo=repo,
event_type="workflow_dispatch",
event_data={},
config=config,
)
result = agent.run(context)
if result.success:
print(f"✅ Codebase Analysis Complete: {result.message}")
print(f" Health Score: {result.data.get('health_score', 'N/A')}")
print(f" Actions: {', '.join(result.actions_taken)}")
else:
print(f"❌ Codebase Analysis Failed: {result.message}")
if result.error:
print(f" Error: {result.error}")
sys.exit(1)
def run_chat(args, config: dict):
"""Run interactive chat with the Bartender bot."""
from agents.base_agent import AgentContext
from clients.gitea_client import GiteaClient
agent = ChatAgent(config=config)
owner, repo = args.repo.split("/")
# Build context
event_data = {"message": args.message}
# If issue number provided, add issue context
if args.issue_number:
gitea = GiteaClient()
try:
issue_data = gitea.get_issue(owner, repo, args.issue_number)
event_data["issue"] = issue_data
event_data["issue_number"] = args.issue_number
except Exception as e:
print(f"Warning: Could not fetch issue #{args.issue_number}: {e}")
context = AgentContext(
owner=owner,
repo=repo,
event_type="chat",
event_data=event_data,
config=config,
)
result = agent.run(context)
if result.success:
print(f"\n🍸 Bartender says:\n")
print(result.data.get("response", ""))
print()
if result.data.get("tools_used"):
print(f" [Tools used: {', '.join(result.data['tools_used'])}]")
else:
print(f"❌ Chat Failed: {result.message}")
if result.error:
print(f" Error: {result.error}")
sys.exit(1)
def run_webhook_dispatch(args, config: dict):
"""Dispatch a webhook event."""
dispatcher = get_dispatcher()
# Register all agents
dispatcher.register_agent(IssueAgent(config=config))
dispatcher.register_agent(PRAgent(config=config))
dispatcher.register_agent(CodebaseAgent(config=config))
dispatcher.register_agent(ChatAgent(config=config))
# Parse event data
event_data = json.loads(args.event_data)
owner, repo = args.repo.split("/")
result = dispatcher.dispatch(
event_type=args.event_type,
event_data=event_data,
owner=owner,
repo=repo,
)
print(f"Dispatched event: {result.event_type}")
print(f"Agents run: {result.agents_run}")
for i, agent_result in enumerate(result.results):
status = "" if agent_result.success else ""
print(f" {status} {result.agents_run[i]}: {agent_result.message}")
if result.errors:
sys.exit(1)
def main():
"""Main entry point."""
parser = argparse.ArgumentParser(
description="AI Code Review Agent",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
parser.add_argument("-c", "--config", help="Path to config file")
subparsers = parser.add_subparsers(dest="command", help="Available commands")
# PR review command
pr_parser = subparsers.add_parser("pr", help="Review a pull request")
pr_parser.add_argument("repo", help="Repository (owner/repo)")
pr_parser.add_argument("pr_number", type=int, help="PR number")
pr_parser.add_argument("--title", help="PR title (optional)")
# Issue triage command
issue_parser = subparsers.add_parser("issue", help="Triage an issue")
issue_parser.add_argument("repo", help="Repository (owner/repo)")
issue_parser.add_argument("issue_number", type=int, help="Issue number")
issue_parser.add_argument("--title", help="Issue title")
issue_parser.add_argument("--body", help="Issue body")
# Issue comment command (for @ai-bot mentions)
comment_parser = subparsers.add_parser("comment", help="Respond to @ai-bot command")
comment_parser.add_argument("repo", help="Repository (owner/repo)")
comment_parser.add_argument("issue_number", type=int, help="Issue number")
comment_parser.add_argument("comment_body", help="Comment body with @ai-bot command")
# Codebase analysis command
codebase_parser = subparsers.add_parser("codebase", help="Analyze codebase")
codebase_parser.add_argument("repo", help="Repository (owner/repo)")
# Chat command (Bartender)
chat_parser = subparsers.add_parser("chat", help="Chat with Bartender bot")
chat_parser.add_argument("repo", help="Repository (owner/repo)")
chat_parser.add_argument("message", help="Message to send to Bartender")
chat_parser.add_argument(
"--issue", dest="issue_number", type=int,
help="Optional issue number to post response to"
)
# Webhook dispatch command
webhook_parser = subparsers.add_parser("dispatch", help="Dispatch webhook event")
webhook_parser.add_argument("repo", help="Repository (owner/repo)")
webhook_parser.add_argument("event_type", help="Event type")
webhook_parser.add_argument("event_data", help="Event data (JSON)")
args = parser.parse_args()
if not args.command:
parser.print_help()
sys.exit(1)
setup_logging(args.verbose)
config = load_config(args.config)
if args.command == "pr":
run_pr_review(args, config)
elif args.command == "issue":
run_issue_triage(args, config)
elif args.command == "comment":
run_issue_comment(args, config)
elif args.command == "codebase":
run_codebase_analysis(args, config)
elif args.command == "chat":
run_chat(args, config)
elif args.command == "dispatch":
run_webhook_dispatch(args, config)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,64 @@
You are an experienced senior software engineer with deep expertise in:
- Secure coding and security analysis
- System design and architecture
- Performance optimization
- Maintainable, readable code
- Test coverage and documentation
- CI/CD pipeline best practices
You are reviewing the following **pull request diff**. Your goal is to provide a **comprehensive, actionable, and clear review** as a structured JSON response.
---
## Requirements
Review the diff and identify issues in these categories:
- **Security**: Vulnerabilities, hardcoded secrets, injection risks
- **Correctness**: Logic errors, edge cases, bugs
- **Performance**: Inefficiencies, N+1 queries, memory issues
- **Maintainability**: Code complexity, duplication, unclear logic
- **Readability**: Naming, formatting, documentation
- **Testing**: Missing tests, untested paths
- **Architecture**: Design issues, coupling, separation of concerns
---
## Output Format
Return a JSON object with this structure:
```json
{{
"summary": "Brief overall assessment of the PR",
"overall_severity": "HIGH" | "MEDIUM" | "LOW",
"approval": true | false,
"issues": [
{{
"file": "path/to/file.py",
"line": 42,
"severity": "HIGH" | "MEDIUM" | "LOW",
"category": "Security" | "Correctness" | "Performance" | "Maintainability" | "Readability" | "Testing" | "Architecture",
"description": "Clear description of the issue",
"recommendation": "Specific fix or improvement",
"code_snippet": "relevant code if applicable"
}}
]
}}
```
---
## Rules
1. **Be specific**: Include file paths and line numbers when possible
2. **Be actionable**: Every issue must have a clear recommendation
3. **Prioritize**: HIGH severity for security/data-loss issues, MEDIUM for bugs, LOW for style
4. **Be honest**: If uncertain, note it in the description
5. **Stay focused**: Only report real issues, not style preferences
6. Set `approval: false` if any HIGH severity issues exist
7. Output ONLY valid JSON, no additional text
---
## Diff to Review

View File

@@ -0,0 +1,63 @@
You are a helpful AI assistant responding to a GitHub/Gitea issue. Your goal is to provide a helpful, professional response that assists the issue author.
## Context
**Issue Type:** {issue_type}
**Priority:** {priority}
**Title:** {title}
**Body:**
{body}
## Triage Analysis
{triage_analysis}
## Your Task
Generate a helpful comment response based on the issue type:
### For Bug Reports:
1. Acknowledge the issue
2. If missing info, politely request specific details needed
3. Suggest any immediate workarounds if obvious
4. Indicate next steps (investigation, need reproduction, etc.)
### For Feature Requests:
1. Thank the user for the suggestion
2. Summarize understanding of the request
3. Ask clarifying questions if needed
4. Note any related existing features
### For Questions:
1. Directly answer the question if possible
2. Link to relevant documentation
3. Provide code examples if helpful
4. Suggest alternatives if applicable
### For Documentation Issues:
1. Acknowledge the gap/issue
2. Clarify the correct information if known
3. Note what documentation updates are needed
## Response Guidelines
1. Be concise but thorough
2. Use a friendly, professional tone
3. Format with Markdown appropriately
4. Include code blocks where relevant
5. DO NOT promise timelines or fixes
6. DO NOT make up information - say "I'm not certain" if unsure
7. Always end with an offer to help further
## Output Format
Return a JSON object:
```json
{{
"comment": "Your markdown-formatted response here",
"needs_human_review": true/false,
"suggested_assignee": null or "username",
"follow_up_questions": ["question1", "question2"]
}}
```
Generate your response:

View File

@@ -0,0 +1,69 @@
You are an expert issue triage specialist. Analyze the following GitHub/Gitea issue and provide a structured classification.
## Your Task
Analyze the issue and return a JSON object with the following structure:
```json
{{
"type": "bug" | "feature" | "question" | "documentation" | "support" | "enhancement",
"priority": "high" | "medium" | "low",
"confidence": 0.0-1.0,
"summary": "Brief one-line summary of the issue",
"suggested_labels": ["label1", "label2"],
"is_duplicate": false,
"duplicate_of": null,
"needs_more_info": false,
"missing_info": [],
"components": ["component1", "component2"],
"reasoning": "Brief explanation of your classification"
}}
```
## Classification Guidelines
### Type Classification
- **bug**: Something is broken, not working as expected, error messages, crashes
- **feature**: Request for new functionality that doesn't exist
- **enhancement**: Improvement to existing functionality
- **question**: User asking how to do something, seeking clarification
- **documentation**: Issues with docs, missing docs, unclear docs
- **support**: General help request, troubleshooting
### Priority Classification
- **high**: Security issues, data loss, complete feature broken, blocking issues
- **medium**: Significant functionality impacted, workaround exists
- **low**: Minor issues, cosmetic, nice-to-have improvements
### Missing Information Indicators
Look for missing:
- Steps to reproduce (for bugs)
- Expected vs actual behavior
- Environment details (OS, version, etc.)
- Error messages or logs
- Screenshots (for UI issues)
## Important Rules
1. Be conservative with "high" priority - use it sparingly
2. If uncertain between two types, choose the more actionable one
3. Always provide reasoning for your classification
4. Set confidence lower if the issue is vague or ambiguous
5. Output ONLY valid JSON, no additional text
---
## Issue to Analyze
**Title:** {title}
**Body:**
{body}
**Author:** {author}
**Labels (if any):** {existing_labels}
---
Provide your JSON classification:

View File

@@ -0,0 +1,17 @@
# AI Code Review - Python Dependencies
# Core dependencies
requests>=2.31.0
pyyaml>=6.0
# Optional: For webhook server mode
# flask>=3.0.0
# Optional: For async operations
# aiohttp>=3.9.0
# Development dependencies (install with pip install -e .[dev])
# pytest>=7.4.0
# pytest-mock>=3.12.0
# black>=24.0.0
# mypy>=1.8.0

174
tools/ai-review/review.py Normal file
View File

@@ -0,0 +1,174 @@
import json
import os
import subprocess
import sys
import requests
import yaml
from comment import to_markdown
ROOT = os.path.dirname(__file__)
CFG = yaml.safe_load(open(f"{ROOT}/config.yml"))
# Marker to identify the AI comment
AI_MARKER = "<!-- AI_CODE_REVIEW -->"
# Disclaimer text to prepend
AI_DISCLAIMER = (
"**Note:** This review was generated by an AI assistant. "
"While it aims to be accurate and helpful, it may contain mistakes "
"or miss important issues. Please verify all findings before taking action."
)
# -------------------------------
# Helper functions
# -------------------------------
def get_diff() -> str:
"""Get git diff against main branch, limited by config"""
diff = subprocess.check_output(["git", "diff", "origin/main...HEAD"], text=True)
lines = diff.splitlines()
if len(lines) > CFG["review"]["max_diff_lines"]:
return "\n".join(lines[: CFG["review"]["max_diff_lines"]])
return diff
def build_prompt(diff: str) -> str:
"""Prepare the AI prompt with the diff"""
base = open(f"{ROOT}/prompts/base.md").read()
return f"{base}\n\nDIFF:\n{diff}"
def call_llm(prompt: str) -> str:
"""Call the configured LLM provider"""
provider = CFG["provider"]
if provider == "openai":
r = requests.post(
"https://api.openai.com/v1/chat/completions",
headers={
"Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}",
"Content-Type": "application/json",
},
json={
"model": CFG["model"]["openai"],
"temperature": CFG["temperature"],
"messages": [{"role": "user", "content": prompt}],
},
timeout=60,
)
return r.json()["choices"][0]["message"]["content"]
if provider == "openrouter":
r = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers={
"Authorization": f"Bearer {os.environ['OPENROUTER_API_KEY']}",
"Content-Type": "application/json",
},
json={
"model": CFG["model"]["openrouter"],
"messages": [{"role": "user", "content": prompt}],
},
timeout=60,
)
return r.json()["choices"][0]["message"]["content"]
if provider == "ollama":
r = requests.post(
f"{os.environ['OLLAMA_HOST']}/api/generate",
json={
"model": CFG["model"]["ollama"],
"prompt": prompt,
"stream": False,
},
timeout=120,
)
return r.json()["response"]
raise RuntimeError("Unknown provider")
# -------------------------------
# Gitea PR comment functions
# -------------------------------
def find_existing_comment() -> int | None:
"""Find existing AI review comment in the PR"""
url = (
f"{os.environ['AI_REVIEW_API_URL']}/repos/"
f"{os.environ['AI_REVIEW_REPO']}/issues/"
f"{os.environ['AI_REVIEW_PR_NUMBER']}/comments"
)
r = requests.get(
url,
headers={"Authorization": f"token {os.environ['AI_REVIEW_TOKEN']}"},
timeout=15,
)
for c in r.json():
if AI_MARKER in c["body"]:
return c["id"]
return None
def upsert_pr_comment(markdown: str):
"""Create or update the PR comment"""
comment_id = find_existing_comment()
headers = {
"Authorization": f"token {os.environ['AI_REVIEW_TOKEN']}",
"Content-Type": "application/json",
}
if comment_id:
url = (
f"{os.environ['AI_REVIEW_API_URL']}/repos/"
f"{os.environ['AI_REVIEW_REPO']}/issues/comments/{comment_id}"
)
r = requests.patch(url, headers=headers, json={"body": markdown})
else:
url = (
f"{os.environ['AI_REVIEW_API_URL']}/repos/"
f"{os.environ['AI_REVIEW_REPO']}/issues/"
f"{os.environ['AI_REVIEW_PR_NUMBER']}/comments"
)
r = requests.post(url, headers=headers, json={"body": markdown})
if r.status_code not in (200, 201):
raise RuntimeError(f"Failed to upsert PR comment: {r.text}")
# -------------------------------
# Main workflow
# -------------------------------
def main():
diff = get_diff()
if not diff.strip():
sys.exit(0)
raw = call_llm(build_prompt(diff))
result = json.loads(raw)
# Convert JSON review to Markdown
markdown = to_markdown(result)
# Prepend AI disclaimer and marker
full_comment = AI_DISCLAIMER + "\n\n" + AI_MARKER + "\n" + markdown
upsert_pr_comment(full_comment)
# Fail CI if severity is HIGH
if result["overall_severity"] == CFG["review"][
"fail_on_severity"
] and not result.get("approval", False):
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,9 @@
"""Security Scanning Package
This package contains security scanning utilities for
detecting vulnerabilities in code.
"""
from security.security_scanner import SecurityScanner
__all__ = ["SecurityScanner"]

View File

@@ -0,0 +1,335 @@
"""Security Scanner
Pattern-based security vulnerability detection for code analysis.
Covers OWASP Top 10 and common security anti-patterns.
"""
import re
from dataclasses import dataclass
from typing import Iterator
import yaml
import os
@dataclass
class SecurityFinding:
"""A single security finding."""
rule_id: str
rule_name: str
severity: str # HIGH, MEDIUM, LOW
category: str # OWASP category
file: str
line: int
code_snippet: str
description: str
recommendation: str
cwe: str | None = None # CWE reference
class SecurityScanner:
"""Security scanner using pattern matching and rules."""
# Default rules covering OWASP Top 10
DEFAULT_RULES = [
# A01:2021 Broken Access Control
{
"id": "SEC001",
"name": "Hardcoded Credentials",
"pattern": r'(?i)(password|passwd|pwd|secret|api_key|apikey|token|auth_token)\s*[=:]\s*["\'][^"\']{4,}["\']',
"severity": "HIGH",
"category": "A01:2021 Broken Access Control",
"cwe": "CWE-798",
"description": "Hardcoded credentials detected in source code",
"recommendation": "Use environment variables or a secrets management system",
},
{
"id": "SEC002",
"name": "Exposed Private Key",
"pattern": r"-----BEGIN\s+(RSA\s+)?PRIVATE\s+KEY-----",
"severity": "HIGH",
"category": "A01:2021 Broken Access Control",
"cwe": "CWE-321",
"description": "Private key embedded in source code",
"recommendation": "Never commit private keys. Use secure key management",
},
# A02:2021 Cryptographic Failures
{
"id": "SEC003",
"name": "Weak Crypto Algorithm",
"pattern": r"(?i)\b(md5|sha1)\s*\(",
"severity": "MEDIUM",
"category": "A02:2021 Cryptographic Failures",
"cwe": "CWE-328",
"description": "Use of weak cryptographic hash function",
"recommendation": "Use SHA-256 or stronger hashing algorithms",
},
{
"id": "SEC004",
"name": "Insecure Random",
"pattern": r"(?i)\brandom\.(random|randint|choice|randrange)\s*\(",
"severity": "MEDIUM",
"category": "A02:2021 Cryptographic Failures",
"cwe": "CWE-330",
"description": "Use of non-cryptographic random number generator for security purposes",
"recommendation": "Use secrets module or os.urandom() for security-critical randomness",
},
# A03:2021 Injection
{
"id": "SEC005",
"name": "SQL Injection",
"pattern": r'(?i)(execute|query|cursor\.execute)\s*\([^)]*(%s|%d|\{|\+)[^)]*\)',
"severity": "HIGH",
"category": "A03:2021 Injection",
"cwe": "CWE-89",
"description": "Potential SQL injection through string formatting",
"recommendation": "Use parameterized queries with placeholders",
},
{
"id": "SEC006",
"name": "Command Injection",
"pattern": r"(?i)(os\.system|subprocess\.call|subprocess\.run)\s*\([^)]*(\+|format|%)[^)]*\)",
"severity": "HIGH",
"category": "A03:2021 Injection",
"cwe": "CWE-78",
"description": "Potential command injection through string concatenation",
"recommendation": "Use subprocess with shell=False and pass arguments as list",
},
{
"id": "SEC007",
"name": "Eval Usage",
"pattern": r"\beval\s*\(",
"severity": "HIGH",
"category": "A03:2021 Injection",
"cwe": "CWE-95",
"description": "Use of eval() can lead to code injection",
"recommendation": "Avoid eval(). Use ast.literal_eval() for data or safer alternatives",
},
{
"id": "SEC008",
"name": "XSS Risk",
"pattern": r'(?i)(innerHTML|outerHTML|document\.write)\s*=',
"severity": "MEDIUM",
"category": "A03:2021 Injection",
"cwe": "CWE-79",
"description": "Direct DOM manipulation may allow XSS",
"recommendation": "Use textContent or proper sanitization libraries",
},
# A04:2021 Insecure Design
{
"id": "SEC009",
"name": "Debug Mode",
"pattern": r"(?i)(debug\s*=\s*true|DEBUG\s*=\s*True|\.setLevel\(.*DEBUG\))",
"severity": "MEDIUM",
"category": "A04:2021 Insecure Design",
"cwe": "CWE-489",
"description": "Debug mode enabled in code",
"recommendation": "Ensure debug mode is disabled in production",
},
# A05:2021 Security Misconfiguration
{
"id": "SEC010",
"name": "CORS Wildcard",
"pattern": r'(?i)(access-control-allow-origin|cors.*origin)\s*[=:]\s*["\']?\*',
"severity": "MEDIUM",
"category": "A05:2021 Security Misconfiguration",
"cwe": "CWE-942",
"description": "CORS configured to allow all origins",
"recommendation": "Specify allowed origins explicitly",
},
{
"id": "SEC011",
"name": "SSL Verification Disabled",
"pattern": r"(?i)(verify\s*=\s*False|CERT_NONE|ssl\._create_unverified_context)",
"severity": "HIGH",
"category": "A05:2021 Security Misconfiguration",
"cwe": "CWE-295",
"description": "SSL certificate verification disabled",
"recommendation": "Always verify SSL certificates in production",
},
# A07:2021 Identification and Authentication Failures
{
"id": "SEC012",
"name": "Hardcoded JWT Secret",
"pattern": r'(?i)(jwt|token).*secret\s*[=:]\s*["\'][^"\']+["\']',
"severity": "HIGH",
"category": "A07:2021 Authentication Failures",
"cwe": "CWE-798",
"description": "JWT secret hardcoded in source code",
"recommendation": "Use environment variables for JWT secrets",
},
# A08:2021 Software and Data Integrity Failures
{
"id": "SEC013",
"name": "Pickle Usage",
"pattern": r"(?i)pickle\.(loads?|dumps?)\s*\(",
"severity": "MEDIUM",
"category": "A08:2021 Integrity Failures",
"cwe": "CWE-502",
"description": "Pickle can execute arbitrary code during deserialization",
"recommendation": "Use JSON or other safe serialization formats",
},
# A09:2021 Security Logging and Monitoring Failures
{
"id": "SEC014",
"name": "Sensitive Data Logging",
"pattern": r'(?i)(log|print|console\.log)\s*\([^)]*\b(password|token|secret|key)\b',
"severity": "MEDIUM",
"category": "A09:2021 Logging Failures",
"cwe": "CWE-532",
"description": "Potentially logging sensitive information",
"recommendation": "Never log passwords, tokens, or secrets",
},
# A10:2021 Server-Side Request Forgery
{
"id": "SEC015",
"name": "SSRF Risk",
"pattern": r'(?i)(requests\.(get|post|put)|urllib\.request\.urlopen|fetch)\s*\([^)]*\+',
"severity": "MEDIUM",
"category": "A10:2021 SSRF",
"cwe": "CWE-918",
"description": "URL constructed from user input may allow SSRF",
"recommendation": "Validate and sanitize URLs, use allowlists",
},
# Additional common issues
{
"id": "SEC016",
"name": "Hardcoded IP Address",
"pattern": r'\b(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b',
"severity": "LOW",
"category": "Configuration",
"cwe": "CWE-547",
"description": "Hardcoded IP address found",
"recommendation": "Use configuration files or environment variables for IP addresses",
},
{
"id": "SEC017",
"name": "TODO/FIXME Security",
"pattern": r"(?i)(TODO|FIXME).*\b(security|auth|password|token|secret|vulnerable)\b",
"severity": "MEDIUM",
"category": "Code Quality",
"cwe": None,
"description": "Security-related TODO/FIXME comment found",
"recommendation": "Address security-related TODO items before deployment",
},
]
def __init__(self, rules_file: str | None = None):
"""Initialize scanner with rules.
Args:
rules_file: Optional path to custom rules YAML file.
"""
self.rules = self.DEFAULT_RULES.copy()
if rules_file and os.path.exists(rules_file):
try:
with open(rules_file) as f:
custom_rules = yaml.safe_load(f)
if custom_rules and "rules" in custom_rules:
self.rules.extend(custom_rules["rules"])
except Exception:
pass # Use defaults if custom rules fail to load
# Compile patterns for efficiency
self._compiled_rules = []
for rule in self.rules:
try:
self._compiled_rules.append(
{**rule, "_pattern": re.compile(rule["pattern"])}
)
except re.error:
pass # Skip invalid patterns
def scan_content(
self,
content: str,
filename: str,
) -> Iterator[SecurityFinding]:
"""Scan content for security issues.
Args:
content: File content to scan.
filename: Name of the file (for reporting).
Yields:
SecurityFinding for each detected issue.
"""
lines = content.splitlines()
for line_num, line in enumerate(lines, 1):
for rule in self._compiled_rules:
if rule["_pattern"].search(line):
yield SecurityFinding(
rule_id=rule["id"],
rule_name=rule["name"],
severity=rule["severity"],
category=rule["category"],
file=filename,
line=line_num,
code_snippet=line.strip()[:120],
description=rule["description"],
recommendation=rule["recommendation"],
cwe=rule.get("cwe"),
)
def scan_diff(self, diff: str) -> Iterator[SecurityFinding]:
"""Scan a git diff for security issues.
Only scans added lines (lines starting with +).
Args:
diff: Git diff content.
Yields:
SecurityFinding for each detected issue.
"""
current_file = None
current_line = 0
for line in diff.splitlines():
# Track current file
if line.startswith("diff --git"):
match = re.search(r"b/(.+)$", line)
if match:
current_file = match.group(1)
current_line = 0
# Track line numbers
elif line.startswith("@@"):
match = re.search(r"\+(\d+)", line)
if match:
current_line = int(match.group(1)) - 1
# Check added lines
elif line.startswith("+") and not line.startswith("+++"):
current_line += 1
for finding in self.scan_content(line[1:], current_file or "unknown"):
finding.line = current_line
yield finding
elif not line.startswith("-"):
current_line += 1
def get_summary(self, findings: list[SecurityFinding]) -> dict:
"""Get summary statistics for findings.
Args:
findings: List of security findings.
Returns:
Summary dictionary with counts by severity and category.
"""
summary = {
"total": len(findings),
"by_severity": {"HIGH": 0, "MEDIUM": 0, "LOW": 0},
"by_category": {},
}
for finding in findings:
summary["by_severity"][finding.severity] = (
summary["by_severity"].get(finding.severity, 0) + 1
)
summary["by_category"][finding.category] = (
summary["by_category"].get(finding.category, 0) + 1
)
return summary