diff --git a/.gitea/workflows/ai-chat.yml b/.gitea/workflows/ai-chat.yml new file mode 100644 index 0000000..1bff424 --- /dev/null +++ b/.gitea/workflows/ai-chat.yml @@ -0,0 +1,42 @@ +name: AI Chat (Bartender) + +on: + issue_comment: + types: [created] + +# CUSTOMIZE YOUR BOT NAME: +# Change '@ai-bot' below to match your config.yml mention_prefix +# Examples: '@bartender', '@uni', '@joey', '@codebot' + +jobs: + ai-chat: + # Only run if comment mentions the bot + if: contains(github.event.comment.body, '@ai-bot') # <-- Change this to your bot name + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/checkout@v4 + with: + repository: Hiddenden/OpenRabbit + path: .ai-review + token: ${{ secrets.AI_REVIEW_TOKEN }} + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - run: pip install requests pyyaml + + - name: Run AI Chat + env: + AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }} + AI_REVIEW_REPO: ${{ gitea.repository }} + AI_REVIEW_API_URL: https://git.hiddenden.cafe/api/v1 + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }} + SEARXNG_URL: ${{ secrets.SEARXNG_URL }} + run: | + cd .ai-review/tools/ai-review + python main.py comment ${{ gitea.repository }} ${{ gitea.event.issue.number }} "${{ gitea.event.comment.body }}" diff --git a/.gitea/workflows/ai-codebase-review.yml b/.gitea/workflows/ai-codebase-review.yml new file mode 100644 index 0000000..944718d --- /dev/null +++ b/.gitea/workflows/ai-codebase-review.yml @@ -0,0 +1,58 @@ +name: AI Codebase Quality Review + +on: + # Weekly scheduled run + schedule: + - cron: "0 0 * * 0" # Every Sunday at midnight + + # Manual trigger + workflow_dispatch: + inputs: + report_type: + description: "Type of report to generate" + required: false + default: "full" + type: choice + options: + - full + - security + - quick + +jobs: + ai-codebase-review: + runs-on: ubuntu-latest + + steps: + # Checkout the repository + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Full history for analysis + + # Checkout central AI tooling + - uses: actions/checkout@v4 + with: + repository: Hiddenden/AI-code-review-workflow + path: .ai-review + token: ${{ secrets.AI_REVIEW_TOKEN }} + + # Setup Python + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + # Install dependencies + - run: pip install requests pyyaml + + # Run AI codebase analysis + - name: Run AI Codebase Analysis + env: + AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }} + AI_REVIEW_REPO: ${{ gitea.repository }} + AI_REVIEW_API_URL: https://git.hiddenden.cafe/api/v1 + + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }} + run: | + cd .ai-review/tools/ai-review + python main.py codebase ${{ gitea.repository }} diff --git a/.gitea/workflows/ai-comment-reply.yml b/.gitea/workflows/ai-comment-reply.yml new file mode 100644 index 0000000..a6e662e --- /dev/null +++ b/.gitea/workflows/ai-comment-reply.yml @@ -0,0 +1,41 @@ +name: AI Comment Reply + +on: + issue_comment: + types: [created] + +# CUSTOMIZE YOUR BOT NAME: +# Change '@ai-bot' below to match your config.yml mention_prefix +# Examples: '@bartender', '@uni', '@joey', '@codebot' + +jobs: + ai-reply: + runs-on: ubuntu-latest + if: contains(github.event.comment.body, '@ai-bot') # <-- Change this to your bot name + steps: + - uses: actions/checkout@v4 + + - uses: actions/checkout@v4 + with: + repository: Hiddenden/AI-code-review-workflow + path: .ai-review + token: ${{ secrets.AI_REVIEW_TOKEN }} + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - run: pip install requests pyyaml + + - name: Run AI Comment Response + env: + AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }} + AI_REVIEW_REPO: ${{ gitea.repository }} + AI_REVIEW_API_URL: https://git.hiddenden.cafe/api/v1 + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }} + run: | + cd .ai-review/tools/ai-review + python main.py comment ${{ gitea.repository }} ${{ gitea.event.issue.number }} \ + "${{ gitea.event.comment.body }}" diff --git a/.gitea/workflows/ai-issue-triage.yml b/.gitea/workflows/ai-issue-triage.yml new file mode 100644 index 0000000..9d1de47 --- /dev/null +++ b/.gitea/workflows/ai-issue-triage.yml @@ -0,0 +1,36 @@ +name: AI Issue Triage + +on: + issues: + types: [opened, labeled] + +jobs: + ai-triage: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/checkout@v4 + with: + repository: Hiddenden/AI-code-review-workflow + path: .ai-review + token: ${{ secrets.AI_REVIEW_TOKEN }} + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - run: pip install requests pyyaml + + - name: Run AI Issue Triage + env: + AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }} + AI_REVIEW_REPO: ${{ gitea.repository }} + AI_REVIEW_API_URL: https://git.hiddenden.cafe/api/v1 + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }} + run: | + cd .ai-review/tools/ai-review + python main.py issue ${{ gitea.repository }} ${{ gitea.event.issue.number }} \ + --title "${{ gitea.event.issue.title }}" diff --git a/.gitea/workflows/enterprise-ai-review.yml b/.gitea/workflows/enterprise-ai-review.yml new file mode 100644 index 0000000..b1e0073 --- /dev/null +++ b/.gitea/workflows/enterprise-ai-review.yml @@ -0,0 +1,53 @@ +name: Enterprise AI Code Review + +on: + pull_request: + types: [opened, synchronize] + +jobs: + ai-review: + runs-on: ubuntu-latest + + steps: + # Checkout the PR repository + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # Checkout the CENTRAL AI tooling repo + - uses: actions/checkout@v4 + with: + repository: Hiddenden/AI-code-review-workflow + path: .ai-review + token: ${{ secrets.AI_REVIEW_TOKEN }} + + # Setup Python + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + # Install dependencies + - run: pip install requests pyyaml + + # Run the AI review + - name: Run Enterprise AI Review + env: + AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }} + AI_REVIEW_REPO: ${{ gitea.repository }} + AI_REVIEW_API_URL: https://git.hiddenden.cafe/api/v1 + AI_REVIEW_PR_NUMBER: ${{ gitea.event.pull_request.number }} + + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }} + run: | + cd .ai-review/tools/ai-review + python main.py pr ${{ gitea.repository }} ${{ gitea.event.pull_request.number }} \ + --title "${{ gitea.event.pull_request.title }}" + + # Fail CI on HIGH severity (optional) + - name: Check Review Result + if: failure() + run: | + echo "AI Review found HIGH severity issues. Please address them before merging." + exit 1 diff --git a/.github/workflows/ai-chat.yml b/.github/workflows/ai-chat.yml new file mode 100644 index 0000000..2bdcac9 --- /dev/null +++ b/.github/workflows/ai-chat.yml @@ -0,0 +1,36 @@ +name: AI Chat (Bartender) + +on: + issue_comment: + types: [created] + +# CUSTOMIZE YOUR BOT NAME: +# Change '@ai-bot' below to match your config.yml mention_prefix +# Examples: '@bartender', '@uni', '@joey', '@codebot' + +jobs: + ai-chat: + # Only run if comment mentions the bot + if: contains(github.event.comment.body, '@ai-bot') # <-- Change this to your bot name + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - run: pip install requests pyyaml + + - name: Run AI Chat + env: + AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }} + AI_REVIEW_REPO: ${{ github.repository }} + AI_REVIEW_API_URL: https://api.github.com + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }} + SEARXNG_URL: ${{ secrets.SEARXNG_URL }} + run: | + cd tools/ai-review + python main.py comment ${{ github.repository }} ${{ github.event.issue.number }} "${{ github.event.comment.body }}" diff --git a/.github/workflows/ai-codebase-review.yml b/.github/workflows/ai-codebase-review.yml new file mode 100644 index 0000000..f384059 --- /dev/null +++ b/.github/workflows/ai-codebase-review.yml @@ -0,0 +1,51 @@ +name: AI Codebase Quality Review + +on: + # Weekly scheduled run + schedule: + - cron: "0 0 * * 0" # Every Sunday at midnight + + # Manual trigger + workflow_dispatch: + inputs: + report_type: + description: "Type of report to generate" + required: false + default: "full" + type: choice + options: + - full + - security + - quick + +jobs: + ai-codebase-review: + runs-on: ubuntu-latest + + steps: + # Checkout the repository + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Full history for analysis + + # Setup Python + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + # Install dependencies + - run: pip install requests pyyaml + + # Run AI codebase analysis + - name: Run AI Codebase Analysis + env: + AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }} + AI_REVIEW_REPO: ${{ github.repository }} + AI_REVIEW_API_URL: https://api.github.com + + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }} + run: | + cd tools/ai-review + python main.py codebase ${{ github.repository }} diff --git a/.github/workflows/ai-comment-reply.yml b/.github/workflows/ai-comment-reply.yml new file mode 100644 index 0000000..0b88177 --- /dev/null +++ b/.github/workflows/ai-comment-reply.yml @@ -0,0 +1,36 @@ +name: AI Comment Reply + +on: + issue_comment: + types: [created] + +# CUSTOMIZE YOUR BOT NAME: +# Change '@ai-bot' below to match your config.yml mention_prefix +# Examples: '@bartender', '@uni', '@joey', '@codebot' + +jobs: + ai-reply: + runs-on: ubuntu-latest + if: contains(github.event.comment.body, '@ai-bot') # <-- Change this to your bot name + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - run: pip install requests pyyaml + + - name: Run AI Comment Response + env: + AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }} + AI_REVIEW_REPO: ${{ github.repository }} + AI_REVIEW_API_URL: https://api.github.com + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }} + SEARXNG_URL: ${{ secrets.SEARXNG_URL }} + run: | + cd tools/ai-review + python main.py comment ${{ github.repository }} ${{ github.event.issue.number }} \ + "${{ github.event.comment.body }}" diff --git a/.github/workflows/ai-issue-triage.yml b/.github/workflows/ai-issue-triage.yml new file mode 100644 index 0000000..3c4a2dd --- /dev/null +++ b/.github/workflows/ai-issue-triage.yml @@ -0,0 +1,30 @@ +name: AI Issue Triage + +on: + issues: + types: [opened, labeled] + +jobs: + ai-triage: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - run: pip install requests pyyaml + + - name: Run AI Issue Triage + env: + AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }} + AI_REVIEW_REPO: ${{ github.repository }} + AI_REVIEW_API_URL: https://api.github.com + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }} + run: | + cd tools/ai-review + python main.py issue ${{ github.repository }} ${{ github.event.issue.number }} \ + --title "${{ github.event.issue.title }}" diff --git a/.github/workflows/ai-review.yml b/.github/workflows/ai-review.yml new file mode 100644 index 0000000..d637c16 --- /dev/null +++ b/.github/workflows/ai-review.yml @@ -0,0 +1,52 @@ +name: AI Code Review + +on: + pull_request: + types: [opened, synchronize] + +jobs: + ai-review: + runs-on: ubuntu-latest + + steps: + # Checkout the PR repository + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # Checkout the AI tooling from this repo's tools directory + - name: Setup AI Review Tools + run: | + # Tools are already in this repo under tools/ai-review + echo "AI Review tools available at tools/ai-review" + + # Setup Python + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + # Install dependencies + - run: pip install requests pyyaml + + # Run the AI review + - name: Run AI Review + env: + AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }} + AI_REVIEW_REPO: ${{ github.repository }} + AI_REVIEW_API_URL: https://api.github.com + AI_REVIEW_PR_NUMBER: ${{ github.event.pull_request.number }} + + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + OLLAMA_HOST: ${{ secrets.OLLAMA_HOST }} + run: | + cd tools/ai-review + python main.py pr ${{ github.repository }} ${{ github.event.pull_request.number }} \ + --title "${{ github.event.pull_request.title }}" + + # Fail CI on HIGH severity (optional) + - name: Check Review Result + if: failure() + run: | + echo "AI Review found HIGH severity issues. Please address them before merging." + exit 1 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..727cbe7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,32 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +*.egg-info/ +.eggs/ +dist/ +build/ + +# Virtual environments +.venv/ +venv/ +ENV/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db + +# Logs +*.log + +# Environment +.env +.env.local diff --git a/README.md b/README.md index 1e06343..73e6193 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,342 @@ -# openrabbit +# OpenRabbit +Enterprise-grade AI code review system for **GitHub** and **Gitea** with automated PR review, issue triage, interactive chat (Bartender), and codebase analysis. + +--- + +## Features + +| Feature | Description | +|---------|-------------| +| **PR Review** | Inline comments, security scanning, severity-based CI failure | +| **Issue Triage** | Auto-classification, labeling, priority assignment | +| **Chat (Bartender)** | Interactive AI chat with codebase search and web search tools | +| **@ai-bot Commands** | `@ai-bot summarize`, `explain`, `suggest` in issue comments | +| **Codebase Analysis** | Health scores, tech debt tracking, weekly reports | +| **Security Scanner** | 17 OWASP-aligned rules for vulnerability detection | +| **Enterprise Ready** | Audit logging, metrics, Prometheus export | +| **Multi-Platform** | Works with both GitHub and Gitea | + +--- + +## Quick Start + +### 1. Set Repository/Organization Secrets + +``` +OPENAI_API_KEY - OpenAI API key (or use OpenRouter/Ollama) +SEARXNG_URL - (Optional) SearXNG instance URL for web search +``` + +**For Gitea:** +``` +AI_REVIEW_TOKEN - Bot token with repo + issue permissions +``` + +**For GitHub:** +The built-in `GITHUB_TOKEN` is used automatically. + +### 2. Add Workflows to Repository + +Workflows are provided for both platforms: + +| Platform | Location | +|----------|----------| +| GitHub | `.github/workflows/` | +| Gitea | `.gitea/workflows/` | + +#### GitHub Example + +```yaml +# .github/workflows/ai-review.yml +name: AI PR Review +on: [pull_request] + +jobs: + ai-review: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - run: pip install requests pyyaml + + - name: Run AI Review + env: + AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }} + AI_REVIEW_REPO: ${{ github.repository }} + AI_REVIEW_API_URL: https://api.github.com + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + cd tools/ai-review + python main.py pr ${{ github.repository }} ${{ github.event.pull_request.number }} +``` + +#### Gitea Example + +```yaml +# .gitea/workflows/ai-review.yml +name: AI PR Review +on: [pull_request] + +jobs: + ai-review: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/checkout@v4 + with: + repository: YourOrg/OpenRabbit + path: .ai-review + token: ${{ secrets.AI_REVIEW_TOKEN }} + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - run: pip install requests pyyaml + + - name: Run AI Review + env: + AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }} + AI_REVIEW_REPO: ${{ gitea.repository }} + AI_REVIEW_API_URL: https://your-gitea.example.com/api/v1 + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + cd .ai-review/tools/ai-review + python main.py pr ${{ gitea.repository }} ${{ gitea.event.pull_request.number }} +``` + +For full workflow examples, see [Workflows Documentation](docs/workflows.md). + +### 3. Create Labels + +Create these labels in your repository for auto-labeling: +- `priority: high`, `priority: medium`, `priority: low` +- `type: bug`, `type: feature`, `type: question` +- `ai-approved`, `ai-changes-required` + +--- + +## Project Structure + +``` +tools/ai-review/ +├── agents/ # Agent implementations +│ ├── base_agent.py # Abstract base agent +│ ├── issue_agent.py # Issue triage & @ai-bot commands +│ ├── pr_agent.py # PR review with security scan +│ ├── codebase_agent.py # Codebase health analysis +│ └── chat_agent.py # Bartender chat with tool calling +├── clients/ # API clients +│ ├── gitea_client.py # Gitea REST API wrapper +│ └── llm_client.py # Multi-provider LLM client with tool support +├── security/ # Security scanning +│ └── security_scanner.py # 17 OWASP-aligned rules +├── enterprise/ # Enterprise features +│ ├── audit_logger.py # JSONL audit logging +│ └── metrics.py # Prometheus-compatible metrics +├── prompts/ # AI prompt templates +├── main.py # CLI entry point +└── config.yml # Configuration + +.github/workflows/ # GitHub Actions workflows +├── ai-review.yml # PR review workflow +├── ai-issue-triage.yml # Issue triage workflow +├── ai-codebase-review.yml # Codebase analysis +├── ai-comment-reply.yml # @ai-bot command responses +└── ai-chat.yml # Bartender chat + +.gitea/workflows/ # Gitea Actions workflows +├── enterprise-ai-review.yml +├── ai-issue-triage.yml +├── ai-codebase-review.yml +├── ai-comment-reply.yml +└── ai-chat.yml +``` + +--- + +## CLI Commands + +```bash +# Review a pull request +python main.py pr owner/repo 123 + +# Triage an issue +python main.py issue owner/repo 456 + +# Respond to @ai-bot command +python main.py comment owner/repo 456 "@ai-bot explain" + +# Analyze codebase +python main.py codebase owner/repo + +# Chat with Bartender +python main.py chat owner/repo "How does authentication work?" +python main.py chat owner/repo "Find all API endpoints" --issue 789 +``` + +--- + +## @ai-bot Commands + +In any issue comment: + +| Command | Description | +|---------|-------------| +| `@ai-bot summarize` | Summarize the issue in 2-3 sentences | +| `@ai-bot explain` | Explain what the issue is about | +| `@ai-bot suggest` | Suggest solutions or next steps | +| `@ai-bot` (any question) | Chat with Bartender using codebase/web search | + +--- + +## Bartender Chat + +Bartender is an interactive AI assistant with tool-calling capabilities: + +**Tools Available:** +- `search_codebase` - Search repository files and code +- `read_file` - Read specific files +- `search_web` - Search the web via SearXNG + +**Example:** +``` +@ai-bot How do I configure rate limiting in this project? +``` + +Bartender will search the codebase, read relevant files, and provide a comprehensive answer. + +--- + +## Configuration + +Edit `tools/ai-review/config.yml`: + +```yaml +provider: openai # openai | openrouter | ollama + +model: + openai: gpt-4.1-mini + openrouter: anthropic/claude-3.5-sonnet + ollama: codellama:13b + +agents: + issue: + enabled: true + auto_label: true + pr: + enabled: true + inline_comments: true + security_scan: true + codebase: + enabled: true + chat: + enabled: true + name: "Bartender" + searxng_url: "" # Or set SEARXNG_URL env var + +interaction: + respond_to_mentions: true + mention_prefix: "@ai-bot" # Customize your bot name here! + commands: + - summarize + - explain + - suggest +``` + +--- + +## Customizing the Bot Name + +You can change the bot's mention trigger from `@ai-bot` to any name you prefer: + +**Step 1:** Edit `tools/ai-review/config.yml`: +```yaml +interaction: + mention_prefix: "@bartender" # or "@uni", "@joey", "@codebot", etc. +``` + +**Step 2:** Update the workflow files to match: + +For GitHub (`.github/workflows/ai-comment-reply.yml` and `ai-chat.yml`): +```yaml +if: contains(github.event.comment.body, '@bartender') +``` + +For Gitea (`.gitea/workflows/ai-comment-reply.yml` and `ai-chat.yml`): +```yaml +if: contains(github.event.comment.body, '@bartender') +``` + +**Example bot names:** +| Name | Use Case | +|------|----------| +| `@bartender` | Friendly, conversational | +| `@uni` | Short, quick to type | +| `@joey` | Personal assistant feel | +| `@codebot` | Technical, code-focused | +| `@reviewer` | Review-focused | + +--- + +## Security Scanning + +17 rules covering OWASP Top 10: + +| Category | Examples | +|----------|----------| +| Injection | SQL injection, command injection, XSS | +| Access Control | Hardcoded secrets, private keys | +| Crypto Failures | Weak hashing (MD5/SHA1), insecure random | +| Misconfiguration | Debug mode, CORS wildcard, SSL bypass | + +--- + +## Documentation + +| Document | Description | +|----------|-------------| +| [Getting Started](docs/getting-started.md) | Quick setup guide | +| [Configuration](docs/configuration.md) | All options explained | +| [Agents](docs/agents.md) | Agent documentation | +| [Security](docs/security.md) | Security rules reference | +| [Workflows](docs/workflows.md) | GitHub & Gitea workflow examples | +| [API Reference](docs/api-reference.md) | Client and agent APIs | +| [Enterprise](docs/enterprise.md) | Audit logging, metrics | +| [Troubleshooting](docs/troubleshooting.md) | Common issues | + +--- + +## LLM Providers + +| Provider | Model | Use Case | +|----------|-------|----------| +| OpenAI | gpt-4.1-mini | Fast, reliable | +| OpenRouter | claude-3.5-sonnet | Multi-provider access | +| Ollama | codellama:13b | Self-hosted, private | + +--- + +## Enterprise Features + +- **Audit Logging**: JSONL logs with daily rotation +- **Metrics**: Prometheus-compatible export +- **Rate Limiting**: Configurable request limits +- **Custom Security Rules**: Define your own patterns via YAML +- **Tool Calling**: LLM function calling for interactive chat + +--- + +## License + +MIT diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..230def8 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,52 @@ +# AI Code Review Workflow Documentation + +Enterprise-grade AI code review system for Gitea with automated issue triage, PR review, and codebase analysis. + +## 📚 Documentation + +| Document | Description | +|----------|-------------| +| [Getting Started](getting-started.md) | Quick setup guide | +| [Configuration](configuration.md) | All configuration options | +| [Agents](agents.md) | Detailed agent documentation | +| [Security](security.md) | Security scanning features | +| [API Reference](api-reference.md) | Client and agent APIs | +| [Workflows](workflows.md) | Gitea workflow examples | +| [Troubleshooting](troubleshooting.md) | Common issues and solutions | + +## Quick Links + +- **Setup**: See [Getting Started](getting-started.md) +- **Configuration**: See [Configuration](configuration.md) +- **Enterprise Features**: See [Enterprise](enterprise.md) + +## Architecture Overview + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Event Sources │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ +│ │ PR Event │ │ Issue │ │ Schedule │ │ +│ └────┬─────┘ └────┬─────┘ └────┬─────┘ │ +└───────┼─────────────┼─────────────┼─────────────────────────┘ + │ │ │ + └─────────────┼─────────────┘ + ▼ + ┌───────────────┐ + │ Dispatcher │ + └───────┬───────┘ + │ + ┌─────────────┼─────────────┐ + ▼ ▼ ▼ +┌───────────┐ ┌───────────┐ ┌───────────┐ +│ Issue │ │ PR │ │ Codebase │ +│ Agent │ │ Agent │ │ Agent │ +└─────┬─────┘ └─────┬─────┘ └─────┬─────┘ + │ │ │ + └──────────────┼──────────────┘ + ▼ + ┌─────────────────┐ + │ Gitea API │ + │ LLM Provider │ + └─────────────────┘ +``` diff --git a/docs/agents.md b/docs/agents.md new file mode 100644 index 0000000..8df0dc0 --- /dev/null +++ b/docs/agents.md @@ -0,0 +1,298 @@ +# Agents Documentation + +The AI Code Review system includes four specialized agents. + +## Issue Agent + +Handles issue triage, classification, and interaction. + +### Triggers + +- `issues.opened` - New issue created (handled by `run_issue_triage`) +- `issues.labeled` - Label added to issue +- `issue_comment.created` - Comment with @mention (handled by `run_issue_comment`) + +### Features + +**Automatic Triage:** +- Classifies issue type: bug, feature, question, documentation, support +- Assigns priority: high, medium, low +- Calculates confidence score + +**Auto-Labeling:** +- Applies type labels (`type: bug`, etc.) +- Applies priority labels (`priority: high`, etc.) +- Adds `ai-reviewed` status label + +**@Mention Commands:** + +| Command | Description | +|---------|-------------| +| `@ai-bot summarize` | Generate concise summary | +| `@ai-bot explain` | Detailed explanation | +| `@ai-bot suggest` | Solution suggestions | + +### Output + +Posts a triage comment: + +```markdown +## AI Issue Triage + +| Field | Value | +|-------|--------| +| **Type** | Bug | +| **Priority** | High | +| **Confidence** | 85% | + +### Additional Information Needed + +- Steps to reproduce +- Error logs + +--- +*Classification based on issue content* +``` + +--- + +## PR Agent + +Comprehensive pull request review with security scanning. + +### Triggers + +- `pull_request.opened` - New PR created +- `pull_request.synchronize` - PR updated with new commits + +### Features + +**AI Code Review:** +- Analyzes diff for issues +- Categorizes: Security, Correctness, Performance, Maintainability +- Assigns severity: HIGH, MEDIUM, LOW + +**Inline Comments:** +- Posts comments on specific lines +- Links to file and line number +- Provides recommendations + +**Security Scanning:** +- 17 OWASP-aligned rules +- Detects hardcoded secrets, SQL injection, XSS +- Fails CI on HIGH severity + +**Label Management:** +- `ai-approved` - No blocking issues +- `ai-changes-required` - HIGH severity issues found + +### Output + +Posts summary comment: + +```markdown +## AI Code Review + +Review of changes in this PR. + +### Summary + +| Severity | Count | +|----------|-------| +| HIGH | 1 | +| MEDIUM | 2 | +| LOW | 3 | + +### Security Issues + +- **[HIGH]** `src/auth.py:45` - Hardcoded API key detected + +### Review Findings + +- **[MEDIUM]** `src/db.py:12` - SQL query uses string formatting +- **[LOW]** `src/utils.py:30` - Missing docstring + +--- +**Overall Severity:** `HIGH` +**AI Recommendation:** Changes Requested +``` + +--- + +## Codebase Agent + +Repository-wide quality and health analysis. + +### Triggers + +- `schedule` - Cron schedule (default: weekly) +- `workflow_dispatch` - Manual trigger +- `@ai-bot codebase` - Comment command + +### Features + +**Metrics Collection:** +- Total files and lines of code +- Language distribution +- TODO/FIXME/DEPRECATED counts + +**AI Analysis:** +- Overall health score (0-100) +- Architecture observations +- Technical debt identification +- Improvement recommendations + +### Output + +Creates/updates report issue: + +```markdown +# AI Codebase Quality Report + +## Health Score: 72/100 + +The codebase is in reasonable condition with some areas for improvement. + +--- + +## Metrics + +| Metric | Value | +|--------|-------| +| Total Files | 45 | +| Total Lines | 12,500 | +| TODO Comments | 23 | +| FIXME Comments | 8 | + +### Languages + +- **Python**: 35 files +- **JavaScript**: 10 files + +## Issues Found + +### [MEDIUM] Code Quality +Missing docstrings in 15 functions. +**Recommendation:** Add docstrings for public functions. + +## Recommendations + +1. Add comprehensive test coverage +2. Document API endpoints +3. Reduce TODO backlog +``` + +--- + +## Chat Agent (Bartender) + +Interactive AI chat assistant with tool-calling capabilities. + +### Triggers + +- `issue_comment.created` - Any @ai-bot mention that isn't a specific command +- `chat` - Direct CLI invocation + +### Features + +**Tool Calling:** +The Chat Agent uses LLM function calling to gather information before responding: + +| Tool | Description | +|------|-------------| +| `search_codebase` | Search repository files and code patterns | +| `read_file` | Read specific files from the repository | +| `search_web` | Search the web via SearXNG instance | + +**Iterative Reasoning:** +- Makes up to 5 tool calls per request +- Combines information from multiple sources +- Provides comprehensive, contextual answers + +**Web Search:** +- Requires SearXNG instance URL (via `SEARXNG_URL` env var or config) +- Searches for documentation, tutorials, external resources + +### Configuration + +```yaml +agents: + chat: + enabled: true + name: "Bartender" + max_iterations: 5 + tools: + - search_codebase + - read_file + - search_web + searxng_url: "" # Or set SEARXNG_URL env var +``` + +### CLI Usage + +```bash +# Simple chat +python main.py chat owner/repo "How does authentication work?" + +# Chat and post response to issue +python main.py chat owner/repo "Explain this bug" --issue 123 +``` + +### Issue Comment Usage + +``` +@ai-bot How do I configure rate limiting? +@ai-bot Find all files that handle user authentication +@ai-bot What does the dispatcher module do? +``` + +### Output + +Posts a response comment: + +```markdown +**Note:** This review was generated by an AI assistant... + +--- + +Based on my analysis of the codebase, rate limiting is configured in +`tools/ai-review/config.yml` under the `enterprise.rate_limit` section: + +- `requests_per_minute`: Maximum requests per minute (default: 30) +- `max_concurrent`: Maximum concurrent requests (default: 4) + +The rate limiting logic is implemented in `enterprise/rate_limiter.py`... +``` + +--- + +## Agent Interface + +All agents extend `BaseAgent`: + +```python +from agents import BaseAgent, AgentContext, AgentResult + +class CustomAgent(BaseAgent): + def can_handle(self, event_type: str, event_data: dict) -> bool: + # Return True if this agent handles the event + return event_type == "custom_event" + + def execute(self, context: AgentContext) -> AgentResult: + # Perform agent logic + return AgentResult( + success=True, + message="Custom action completed", + actions_taken=["action1", "action2"], + ) +``` + +Register with dispatcher: + +```python +from dispatcher import get_dispatcher +from agents import CustomAgent + +dispatcher = get_dispatcher() +dispatcher.register_agent(CustomAgent()) +``` diff --git a/docs/api-reference.md b/docs/api-reference.md new file mode 100644 index 0000000..aa489ce --- /dev/null +++ b/docs/api-reference.md @@ -0,0 +1,280 @@ +# API Reference + +## Gitea Client + +`clients/gitea_client.py` + +### Initialization + +```python +from clients import GiteaClient + +client = GiteaClient( + api_url="https://gitea.example.com/api/v1", + token="your_token", + timeout=30, +) +``` + +### Issue Methods + +```python +# List issues +issues = client.list_issues( + owner="user", + repo="repo", + state="open", # open, closed, all + labels=["bug"], + page=1, + limit=30, +) + +# Get single issue +issue = client.get_issue(owner, repo, index=123) + +# Create comment +comment = client.create_issue_comment(owner, repo, index=123, body="Comment text") + +# Update comment +client.update_issue_comment(owner, repo, comment_id=456, body="Updated text") + +# List comments +comments = client.list_issue_comments(owner, repo, index=123) + +# Add labels +client.add_issue_labels(owner, repo, index=123, labels=[1, 2, 3]) + +# Get repo labels +labels = client.get_repo_labels(owner, repo) +``` + +### Pull Request Methods + +```python +# Get PR +pr = client.get_pull_request(owner, repo, index=123) + +# Get diff +diff = client.get_pull_request_diff(owner, repo, index=123) + +# List changed files +files = client.list_pull_request_files(owner, repo, index=123) + +# Create review with inline comments +client.create_pull_request_review( + owner, repo, index=123, + body="Review summary", + event="COMMENT", # APPROVE, REQUEST_CHANGES, COMMENT + comments=[ + {"path": "file.py", "line": 10, "body": "Issue here"}, + ], +) +``` + +### Repository Methods + +```python +# Get repository info +repo = client.get_repository(owner, repo) + +# Get file contents (base64 encoded) +content = client.get_file_contents(owner, repo, "path/to/file.py", ref="main") + +# Get branch +branch = client.get_branch(owner, repo, "main") +``` + +--- + +## LLM Client + +`clients/llm_client.py` + +### Initialization + +```python +from clients import LLMClient + +# Direct initialization +client = LLMClient( + provider="openai", # openai, openrouter, ollama + config={"model": "gpt-4", "temperature": 0}, +) + +# From config file +client = LLMClient.from_config(config_dict) +``` + +### Methods + +```python +# Basic call +response = client.call("Explain this code") +print(response.content) +print(response.tokens_used) + +# JSON response +result = client.call_json("Return JSON: {\"key\": \"value\"}") +print(result["key"]) +``` + +### Response Object + +```python +@dataclass +class LLMResponse: + content: str # Generated text + model: str # Model used + provider: str # Provider name + tokens_used: int # Token count + finish_reason: str # stop, length, etc. +``` + +--- + +## Base Agent + +`agents/base_agent.py` + +### Creating Custom Agent + +```python +from agents import BaseAgent, AgentContext, AgentResult + +class MyAgent(BaseAgent): + def can_handle(self, event_type: str, event_data: dict) -> bool: + return event_type == "my_event" + + def execute(self, context: AgentContext) -> AgentResult: + # Use built-in methods + prompt = self.load_prompt("my_prompt") + response = self.call_llm(prompt) + + self.upsert_comment( + context.owner, + context.repo, + issue_index=123, + body=response.content, + ) + + return AgentResult( + success=True, + message="Done", + actions_taken=["posted comment"], + ) +``` + +### Built-in Methods + +```python +# Load prompt template +prompt = self.load_prompt("prompt_name") # From prompts/prompt_name.md + +# LLM calls (with rate limiting) +response = self.call_llm(prompt) +json_result = self.call_llm_json(prompt) + +# Comment management +comment_id = self.find_ai_comment(owner, repo, issue_index) +self.upsert_comment(owner, repo, issue_index, body) + +# Format with disclaimer +formatted = self.format_with_disclaimer(content) +``` + +### Context Object + +```python +@dataclass +class AgentContext: + owner: str # Repository owner + repo: str # Repository name + event_type: str # Event type + event_data: dict # Event payload + config: dict # Configuration +``` + +### Result Object + +```python +@dataclass +class AgentResult: + success: bool + message: str + data: dict = {} + actions_taken: list[str] = [] + error: str | None = None +``` + +--- + +## Dispatcher + +`dispatcher.py` + +### Usage + +```python +from dispatcher import Dispatcher, get_dispatcher + +# Get global dispatcher +dispatcher = get_dispatcher() + +# Register agents +dispatcher.register_agent(MyAgent()) + +# Dispatch event +result = dispatcher.dispatch( + event_type="pull_request", + event_data={"action": "opened", ...}, + owner="user", + repo="repo", +) + +# Async dispatch +future = dispatcher.dispatch_async(event_type, event_data, owner, repo) +result = future.result() +``` + +--- + +## Security Scanner + +`security/security_scanner.py` + +### Usage + +```python +from security import SecurityScanner + +scanner = SecurityScanner() + +# Scan content +for finding in scanner.scan_content(code, "file.py"): + print(finding.rule_id, finding.severity, finding.line) + +# Scan diff (only added lines) +for finding in scanner.scan_diff(diff): + print(finding.file, finding.line, finding.code_snippet) + +# Summary +findings = list(scanner.scan_diff(diff)) +summary = scanner.get_summary(findings) +``` + +### Finding Object + +```python +@dataclass +class SecurityFinding: + rule_id: str # SEC001, SEC002, etc. + rule_name: str # Human-readable name + severity: str # HIGH, MEDIUM, LOW + category: str # OWASP category + file: str # File path + line: int # Line number + code_snippet: str # Matched code + description: str # Issue description + recommendation: str # How to fix + cwe: str | None # CWE reference +``` diff --git a/docs/configuration.md b/docs/configuration.md new file mode 100644 index 0000000..8d4fbcd --- /dev/null +++ b/docs/configuration.md @@ -0,0 +1,196 @@ +# Configuration Reference + +All configuration is managed in `tools/ai-review/config.yml`. + +## Provider Settings + +```yaml +# LLM Provider: openai | openrouter | ollama +provider: openai + +# Model per provider +model: + openai: gpt-4.1-mini + openrouter: anthropic/claude-3.5-sonnet + ollama: codellama:13b + +# Generation settings +temperature: 0 # 0 = deterministic +max_tokens: 4096 # Max response tokens +``` + +## Review Settings + +```yaml +review: + fail_on_severity: HIGH # Fail CI on this severity + max_diff_lines: 800 # Truncate large diffs + inline_comments: true # Post inline PR comments + security_scan: true # Run security scanner +``` + +## Agent Configuration + +### Issue Agent + +```yaml +agents: + issue: + enabled: true + auto_label: true # Apply labels automatically + auto_triage: true # Run triage on new issues + duplicate_threshold: 0.85 # Similarity threshold + events: + - opened + - labeled +``` + +### PR Agent + +```yaml +agents: + pr: + enabled: true + inline_comments: true # Post inline comments + security_scan: true # Run security scanner + events: + - opened + - synchronize +``` + +### Codebase Agent + +```yaml +agents: + codebase: + enabled: true + schedule: "0 0 * * 0" # Cron schedule (weekly) +``` + +### Chat Agent (Bartender) + +```yaml +agents: + chat: + enabled: true + name: "Bartender" # Display name for the bot + max_iterations: 5 # Max tool calls per chat + tools: + - search_codebase # Search repository files + - read_file # Read file contents + - search_web # Web search via SearXNG + searxng_url: "" # SearXNG instance URL (or use SEARXNG_URL env var) +``` + +## Interaction Settings + +### Customizing the Bot Name + +The `mention_prefix` controls what trigger the bot responds to. You can change it to any name you prefer: + +```yaml +interaction: + mention_prefix: "@bartender" # Users will type @bartender to invoke the bot +``` + +**Important:** When changing the bot name, you must also update the workflow files: + +1. Edit `.github/workflows/ai-comment-reply.yml` and `ai-chat.yml` (for GitHub) +2. Edit `.gitea/workflows/ai-comment-reply.yml` and `ai-chat.yml` (for Gitea) +3. Change the `if:` condition to match your new prefix: + ```yaml + if: contains(github.event.comment.body, '@bartender') + ``` + +**Example bot names:** +- `@ai-bot` - Default, generic +- `@bartender` - Friendly, conversational +- `@uni` - Short, quick to type +- `@joey` - Personal assistant +- `@codebot` - Technical focus + +```yaml +interaction: + respond_to_mentions: true + mention_prefix: "@ai-bot" + commands: + - explain # Explain code/issue + - suggest # Suggest solutions + - security # Run security check + - summarize # Summarize content +``` + +## Label Mappings + +```yaml +labels: + priority: + high: "priority: high" + medium: "priority: medium" + low: "priority: low" + type: + bug: "type: bug" + feature: "type: feature" + question: "type: question" + docs: "type: documentation" + status: + ai_approved: "ai-approved" + ai_changes_required: "ai-changes-required" + ai_reviewed: "ai-reviewed" +``` + +## Enterprise Settings + +```yaml +enterprise: + audit_log: true + audit_path: "/var/log/ai-review/" + metrics_enabled: true + rate_limit: + requests_per_minute: 30 + max_concurrent: 4 +``` + +## Security Configuration + +```yaml +security: + enabled: true + fail_on_high: true + rules_file: "security/security_rules.yml" # Custom rules +``` + +## Environment Variables + +These override config file settings: + +| Variable | Description | +|----------|-------------| +| `AI_REVIEW_TOKEN` | Gitea/GitHub API token | +| `AI_REVIEW_API_URL` | API base URL (`https://api.github.com` or Gitea URL) | +| `AI_REVIEW_REPO` | Target repository (owner/repo) | +| `OPENAI_API_KEY` | OpenAI API key | +| `OPENROUTER_API_KEY` | OpenRouter API key | +| `OLLAMA_HOST` | Ollama server URL | +| `SEARXNG_URL` | SearXNG instance URL for web search | +| `AI_AUDIT_PATH` | Audit log directory | + +## Per-Repository Overrides + +Create `.ai-review.yml` in repository root: + +```yaml +# Override global config for this repo +agents: + pr: + security_scan: false # Disable security scan + issue: + auto_label: false # Disable auto-labeling + +# Custom labels +labels: + priority: + high: "P0" + medium: "P1" + low: "P2" +``` diff --git a/docs/enterprise.md b/docs/enterprise.md new file mode 100644 index 0000000..867df14 --- /dev/null +++ b/docs/enterprise.md @@ -0,0 +1,223 @@ +# Enterprise Features + +Advanced features for enterprise deployments. + +## Audit Logging + +All AI actions are logged for compliance and debugging. + +### Configuration + +```yaml +enterprise: + audit_log: true + audit_path: "/var/log/ai-review/" +``` + +### Log Format + +Logs are stored as JSONL (JSON Lines) with daily rotation: + +``` +/var/log/ai-review/audit-2024-01-15.jsonl +``` + +Each line is a JSON object: + +```json +{ + "timestamp": "2024-01-15T10:30:45.123Z", + "action": "review_pr", + "agent": "PRAgent", + "repository": "org/repo", + "success": true, + "details": { + "pr_number": 123, + "severity": "MEDIUM", + "issues_found": 3 + } +} +``` + +### Actions Logged + +| Action | Description | +|--------|-------------| +| `review_pr` | PR review completed | +| `triage_issue` | Issue triaged | +| `llm_call` | LLM API call made | +| `comment_posted` | Comment created/updated | +| `labels_applied` | Labels added | +| `security_scan` | Security scan completed | + +### Querying Logs + +```python +from enterprise import get_audit_logger + +logger = get_audit_logger() + +# Get all logs for a date range +logs = logger.get_logs( + start_date="2024-01-01", + end_date="2024-01-31", + action="review_pr", + repository="org/repo", +) + +# Generate summary report +report = logger.generate_report( + start_date="2024-01-01", + end_date="2024-01-31", +) +print(f"Total events: {report['total_events']}") +print(f"Success rate: {report['success_rate']:.1%}") +``` + +--- + +## Metrics & Observability + +Track performance and usage metrics. + +### Configuration + +```yaml +enterprise: + metrics_enabled: true +``` + +### Available Metrics + +**Counters:** +- `ai_review_requests_total` - Total requests processed +- `ai_review_requests_success` - Successful requests +- `ai_review_requests_failed` - Failed requests +- `ai_review_llm_calls_total` - Total LLM API calls +- `ai_review_llm_tokens_total` - Total tokens consumed +- `ai_review_comments_posted` - Comments posted +- `ai_review_security_findings` - Security issues found + +**Gauges:** +- `ai_review_active_requests` - Currently processing + +**Histograms:** +- `ai_review_request_duration_seconds` - Request latency +- `ai_review_llm_duration_seconds` - LLM call latency + +### Getting Metrics + +```python +from enterprise import get_metrics + +metrics = get_metrics() + +# Get summary +summary = metrics.get_summary() +print(f"Total requests: {summary['requests']['total']}") +print(f"Success rate: {summary['requests']['success_rate']:.1%}") +print(f"Avg latency: {summary['latency']['avg_ms']:.0f}ms") +print(f"P95 latency: {summary['latency']['p95_ms']:.0f}ms") +print(f"LLM tokens used: {summary['llm']['tokens']}") + +# Export Prometheus format +prometheus_output = metrics.export_prometheus() +``` + +### Prometheus Integration + +Expose metrics endpoint: + +```python +from flask import Flask +from enterprise import get_metrics + +app = Flask(__name__) + +@app.route("/metrics") +def metrics(): + return get_metrics().export_prometheus() +``` + +--- + +## Rate Limiting + +Prevent API overload and manage costs. + +### Configuration + +```yaml +enterprise: + rate_limit: + requests_per_minute: 30 + max_concurrent: 4 +``` + +### Built-in Rate Limiting + +The `BaseAgent` class includes automatic rate limiting: + +```python +class BaseAgent: + def __init__(self): + self._min_request_interval = 1.0 # seconds + + def _rate_limit(self): + elapsed = time.time() - self._last_request_time + if elapsed < self._min_request_interval: + time.sleep(self._min_request_interval - elapsed) +``` + +--- + +## Queue Management + +The dispatcher handles concurrent execution: + +```python +dispatcher = Dispatcher(max_workers=4) +``` + +For high-volume environments, use async dispatch: + +```python +future = dispatcher.dispatch_async(event_type, event_data, owner, repo) +# Continue with other work +result = future.result() # Block when needed +``` + +--- + +## Security Considerations + +### Token Permissions + +Minimum required permissions for `AI_REVIEW_TOKEN`: +- `repo:read` - Read repository contents +- `repo:write` - Create branches (if needed) +- `issue:read` - Read issues and PRs +- `issue:write` - Create comments, labels + +### Network Isolation + +For air-gapped environments, use Ollama: + +```yaml +provider: ollama + +# Internal network address +# Set via environment: OLLAMA_HOST=http://ollama.internal:11434 +``` + +### Data Privacy + +By default: +- Code is sent to LLM provider for analysis +- Review comments are stored in Gitea +- Audit logs are stored locally + +For sensitive codebases: +1. Use self-hosted Ollama +2. Disable external LLM providers +3. Review audit log retention policies diff --git a/docs/future_roadmap.md b/docs/future_roadmap.md new file mode 100644 index 0000000..a958686 --- /dev/null +++ b/docs/future_roadmap.md @@ -0,0 +1,82 @@ +# Future Features Roadmap + +This document outlines the strategic plan for evolving the AI Code Review system. These features are proposed for future implementation to enhance security coverage, context awareness, and user interaction. + +--- + +## Phase 1: Advanced Security Scanning + +Expand the current 17-rule regex scanner with dedicated industry-standard tools for **Static Application Security Testing (SAST)** and **Software Composition Analysis (SCA)**. + +### Proposed Integrations + +| Tool | Type | Purpose | Implementation Plan | +|------|------|---------|---------------------| +| **Bandit** | SAST | Analyze Python code for common vulnerability patterns (e.g., `exec`, weak crypto). | Run `bandit -r . -f json` and parse results into the review report. | +| **Semgrep** | SAST | Polyglot scanning with custom rule support. | Integrate `semgrep --config=p/security-audit` for broader language support (JS, Go, Java). | +| **Safety** | SCA | Check installed dependencies against known vulnerability databases. | Run `safety check --json` during CI to flag vulnerable packages in `requirements.txt`. | +| **Trivy** | SCA/Container | Scan container images (Dockerfiles) and filesystem. | Add a workflow step to run Trivy for container-based projects. | + +**Impact:** significantly reduces false negatives and covers dependency chain risks (Supply Chain Security). + +--- + +## Phase 2: "Chat with Codebase" (RAG) + +Move beyond single-file context by implementing **Retrieval-Augmented Generation (RAG)**. This allows the AI to answer questions like *"Where is authentication handled?"* by searching the entire codebase semantically. + +### Architecture + +1. **Vector Database:** + * **ChromaDB** or **Qdrant**: Lightweight, open-source choices for storing code embeddings. +2. **Embeddings Model:** + * **OpenAI `text-embedding-3-small`** or **FastEmbed**: To convert code chunks (functions/classes) into vectors. +3. **Workflow:** + * **Index:** Run a nightly job to parse the codebase -> chunk it -> embed it -> store in Vector DB. + * **Query:** When `@ai-bot` receives a question, convert the question to a vector -> search Vector DB -> inject relevant snippets into the LLM prompt. + +**Impact:** Enables high-accuracy architectural advice and deep-dive explanations spanning multiple files. + +--- + +## Phase 3: Interactive Code Repair + +Transform the bot from a passive reviewer into an active collaborator. + +### Features + +* **`@ai-bot apply `**: + * The bot generates a secure `git patch` for a specific recommendation. + * The system commits the patch directly to the PR branch. +* **Refactoring Assistance**: + * Command: `@ai-bot refactor this function to use dependency injection`. + * Bot proposes the changed code block and offers to commit it. + +**Risk Mitigation:** +* Require human approval (comment reply) before any commit is pushed. +* Run tests automatically after bot commits. + +--- + +## Phase 4: Enterprise Dashboard + +Provide a high-level view of engineering health across the organization. + +### Metrics to Visualize + +* **Security Health:** Trend of High/Critical issues over time. +* **Code Quality:** Technical debt accumulation vs. reduction rate. +* **Review Velocity:** Average time to AI review vs. Human review. +* **Bot Usage:** Most frequent commands and value-add interactions. + +### Tech Stack +* **Prometheus** (already implemented) + **Grafana**: For time-series tracking. +* **Streamlit** / **Next.js**: For a custom management console to configure rules and view logs. + +--- + +## Strategic Recommendations + +1. **Immediate Win:** Implement **Bandit** integration. It is low-effort (Python library) and high-value (detects real vulnerabilities). +2. **High Impact:** **Safety** dependency scanning. Vulnerable dependencies are the #1 attack vector for modern apps. +3. **Long Term:** Work on **Vector DB** integration only after the core review logic is flawless, as it introduces significant infrastructure complexity. diff --git a/docs/getting-started.md b/docs/getting-started.md new file mode 100644 index 0000000..0899c6a --- /dev/null +++ b/docs/getting-started.md @@ -0,0 +1,142 @@ +# Getting Started + +This guide will help you set up the AI Code Review system for your Gitea repositories. + +## Prerequisites + +- Gitea instance (self-hosted or managed) +- Python 3.11+ +- LLM API access (OpenAI, OpenRouter, or Ollama) + +--- + +## Step 1: Create a Bot Account + +1. Create a new Gitea user account for the bot (e.g., `ai-reviewer`) +2. Generate an access token with these permissions: + - `repo` - Full repository access + - `issue` - Issue read/write access +3. Save the token securely + +--- + +## Step 2: Configure Organization Secrets + +In your Gitea organization or repository settings, add these secrets: + +| Secret | Description | +|--------|-------------| +| `AI_REVIEW_TOKEN` | Bot's Gitea access token | +| `OPENAI_API_KEY` | OpenAI API key (if using OpenAI) | +| `OPENROUTER_API_KEY` | OpenRouter key (if using OpenRouter) | +| `OLLAMA_HOST` | Ollama URL (if using Ollama, e.g., `http://localhost:11434`) | + +--- + +## Step 3: Add Workflows to Your Repository + +Copy the workflow files from this repository to your target repo: + +```bash +# Create workflows directory +mkdir -p .gitea/workflows + +# Copy workflow files +# Option 1: Copy manually from this repo's .gitea/workflows/ +# Option 2: Reference this repo in your workflows (see README) +``` + +### Workflow Files: + +| File | Trigger | Purpose | +|------|---------|---------| +| `enterprise-ai-review.yml` | PR opened/updated | Run AI code review | +| `ai-issue-review.yml` | Issue opened, @ai-bot | Triage issues & respond to commands | +| `ai-codebase-review.yml` | Weekly/manual | Analyze codebase health | + +--- + +## Step 4: Create Labels + +Create these labels in your repository for auto-labeling: + +**Priority Labels:** +- `priority: high` (red) +- `priority: medium` (yellow) +- `priority: low` (green) + +**Type Labels:** +- `type: bug` +- `type: feature` +- `type: question` +- `type: documentation` + +**AI Status Labels:** +- `ai-approved` +- `ai-changes-required` +- `ai-reviewed` + +--- + +## Step 5: Test the Setup + +### Test PR Review: +1. Create a new pull request +2. Wait for the AI review workflow to run +3. Check for the AI review comment + +### Test Issue Triage: +1. Create a new issue +2. The AI should automatically triage and comment + +### Test @ai-bot Commands: +1. On any issue, comment: `@ai-bot summarize` +2. The AI should respond with a summary + +--- + +## Troubleshooting + +### Common Issues: + +**"Missing token" error:** +- Verify `AI_REVIEW_TOKEN` is set in secrets +- Ensure the token has correct permissions + +**"LLM call failed" error:** +- Verify your LLM API key is set +- Check the `provider` setting in `config.yml` + +**Workflow not triggering:** +- Verify workflow files are in `.gitea/workflows/` +- Check that Actions are enabled for your repository + +See [Troubleshooting Guide](troubleshooting.md) for more. + +--- + +## Helper: CLI Usage + +If you need to run the agents manually (e.g. for debugging or local testing), you can use the CLI: + +```bash +# Review a pull request +python main.py pr owner/repo 123 + +# Triage a new issue +python main.py issue owner/repo 456 + +# Handle @ai-bot command in comment +python main.py comment owner/repo 456 "@ai-bot summarize" + +# Analyze codebase +python main.py codebase owner/repo +``` + +--- + +## Next Steps + +- [Configuration Reference](configuration.md) - Customize behavior +- [Agents Documentation](agents.md) - Learn about each agent +- [Security Scanning](security.md) - Understand security rules diff --git a/docs/security.md b/docs/security.md new file mode 100644 index 0000000..5e56789 --- /dev/null +++ b/docs/security.md @@ -0,0 +1,163 @@ +# Security Scanning + +The security scanner detects vulnerabilities aligned with OWASP Top 10. + +## Supported Rules + +### A01:2021 – Broken Access Control + +| Rule | Severity | Description | +|------|----------|-------------| +| SEC001 | HIGH | Hardcoded credentials (passwords, API keys) | +| SEC002 | HIGH | Exposed private keys | + +### A02:2021 – Cryptographic Failures + +| Rule | Severity | Description | +|------|----------|-------------| +| SEC003 | MEDIUM | Weak hash algorithms (MD5, SHA1) | +| SEC004 | MEDIUM | Non-cryptographic random for security | + +### A03:2021 – Injection + +| Rule | Severity | Description | +|------|----------|-------------| +| SEC005 | HIGH | SQL injection via string formatting | +| SEC006 | HIGH | Command injection in subprocess | +| SEC007 | HIGH | eval() usage | +| SEC008 | MEDIUM | XSS via innerHTML | + +### A04:2021 – Insecure Design + +| Rule | Severity | Description | +|------|----------|-------------| +| SEC009 | MEDIUM | Debug mode enabled | + +### A05:2021 – Security Misconfiguration + +| Rule | Severity | Description | +|------|----------|-------------| +| SEC010 | MEDIUM | CORS wildcard (*) | +| SEC011 | HIGH | SSL verification disabled | + +### A07:2021 – Authentication Failures + +| Rule | Severity | Description | +|------|----------|-------------| +| SEC012 | HIGH | Hardcoded JWT secrets | + +### A08:2021 – Integrity Failures + +| Rule | Severity | Description | +|------|----------|-------------| +| SEC013 | MEDIUM | Pickle deserialization | + +### A09:2021 – Logging Failures + +| Rule | Severity | Description | +|------|----------|-------------| +| SEC014 | MEDIUM | Logging sensitive data | + +### A10:2021 – Server-Side Request Forgery + +| Rule | Severity | Description | +|------|----------|-------------| +| SEC015 | MEDIUM | SSRF via dynamic URLs | + +### Additional Rules + +| Rule | Severity | Description | +|------|----------|-------------| +| SEC016 | LOW | Hardcoded IP addresses | +| SEC017 | MEDIUM | Security-related TODO/FIXME | + +## Usage + +### In PR Reviews + +Security scanning runs automatically during PR review: + +```yaml +agents: + pr: + security_scan: true +``` + +### Standalone + +```python +from security import SecurityScanner + +scanner = SecurityScanner() + +# Scan file content +for finding in scanner.scan_content(code, "file.py"): + print(f"[{finding.severity}] {finding.rule_name}") + print(f" Line {finding.line}: {finding.code_snippet}") + print(f" {finding.description}") + +# Scan git diff +for finding in scanner.scan_diff(diff): + print(f"{finding.file}:{finding.line} - {finding.rule_name}") +``` + +### Get Summary + +```python +findings = list(scanner.scan_content(code, "file.py")) +summary = scanner.get_summary(findings) + +print(f"Total: {summary['total']}") +print(f"HIGH: {summary['by_severity']['HIGH']}") +print(f"Categories: {summary['by_category']}") +``` + +## Custom Rules + +Create `security/security_rules.yml`: + +```yaml +rules: + - id: "CUSTOM001" + name: "Custom Pattern" + pattern: "dangerous_function\\s*\\(" + severity: "HIGH" + category: "Custom" + cwe: "CWE-xxx" + description: "Usage of dangerous function detected" + recommendation: "Use safe_function() instead" +``` + +Load custom rules: + +```python +scanner = SecurityScanner(rules_file="security/custom_rules.yml") +``` + +## CI Integration + +Fail CI on HIGH severity findings: + +```yaml +security: + fail_on_high: true +``` + +Or in code: + +```python +findings = list(scanner.scan_diff(diff)) +high_count = sum(1 for f in findings if f.severity == "HIGH") + +if high_count > 0: + sys.exit(1) +``` + +## CWE References + +All rules include CWE (Common Weakness Enumeration) references: + +- [CWE-78](https://cwe.mitre.org/data/definitions/78.html): OS Command Injection +- [CWE-79](https://cwe.mitre.org/data/definitions/79.html): XSS +- [CWE-89](https://cwe.mitre.org/data/definitions/89.html): SQL Injection +- [CWE-798](https://cwe.mitre.org/data/definitions/798.html): Hardcoded Credentials diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md new file mode 100644 index 0000000..7029e73 --- /dev/null +++ b/docs/troubleshooting.md @@ -0,0 +1,263 @@ +# Troubleshooting + +Common issues and solutions for the AI Code Review system. + +## Installation Issues + +### `ModuleNotFoundError: No module named 'requests'` + +Install dependencies: + +```bash +pip install requests pyyaml +``` + +### `ImportError: cannot import name 'BaseAgent'` + +Ensure you're running from the correct directory: + +```bash +cd tools/ai-review +python main.py pr owner/repo 123 +``` + +--- + +## Authentication Issues + +### `repository not found` + +**Causes:** +- Bot token lacks access to the repository +- Repository path is incorrect + +**Solutions:** +1. Verify token has `repo` permissions +2. Check repository path format: `owner/repo` +3. Ensure token can access both the target repo and the AI tooling repo + +### `401 Unauthorized` + +**Causes:** +- Invalid or expired token +- Missing token in environment + +**Solutions:** +1. Regenerate the bot token +2. Verify `AI_REVIEW_TOKEN` is set correctly +3. Check organization secret scope is "All Repositories" + +### `403 Forbidden` + +**Causes:** +- Token lacks write permissions +- Repository is private and token doesn't have access + +**Solutions:** +1. Ensure token has `issue:write` permission +2. Add bot account as collaborator to private repos + +--- + +## LLM Issues + +### `OPENAI_API_KEY not set` + +Set the environment variable: + +```bash +export OPENAI_API_KEY="sk-..." +``` + +Or in workflow: + +```yaml +env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} +``` + +### `Rate limit exceeded` + +**Causes:** +- Too many requests to LLM provider +- API quota exhausted + +**Solutions:** +1. Increase rate limit interval in config +2. Switch to a different provider temporarily +3. Check your API plan limits + +### `JSON decode error` from LLM + +**Causes:** +- LLM returned non-JSON response +- Response was truncated + +**Solutions:** +1. Increase `max_tokens` in config +2. Check LLM response in logs +3. Improve prompt to enforce JSON output + +--- + +## Workflow Issues + +### Workflow doesn't trigger + +**Causes:** +- Workflow file not in correct location +- Event type not configured + +**Solutions:** +1. Ensure workflow is in `.gitea/workflows/` +2. Check event types match your needs: + ```yaml + on: + pull_request: + types: [opened, synchronize] + ``` +3. Verify Gitea Actions is enabled for the repository + +### `review.py not found` + +**Causes:** +- Central repo checkout failed +- Path is incorrect + +**Solutions:** +1. Verify the checkout step has correct repository and path +2. Check bot token has access to the AI tooling repo +3. Ensure path matches: `.ai-review/tools/ai-review/main.py` + +### PR comments not appearing + +**Causes:** +- Token lacks issue write permission +- API URL is incorrect + +**Solutions:** +1. Check `AI_REVIEW_API_URL` is correct +2. Verify token has `issue:write` permission +3. Check workflow logs for API errors + +### @ai-bot edits the issue instead of replying + +**Causes:** +- Workflow is using the wrong CLI command for comments +- `event_type` is incorrectly set to "issues" + +**Solutions:** +1. Ensure your workflow uses the `comment` command for mentions: + ```yaml + python main.py comment owner/repo 123 "@ai-bot ..." + ``` +2. Verify you have separate jobs for `issues` vs `issue_comment` events (see [Workflows](workflows.md)) + +--- + +## Label Issues + +### Labels not being applied + +**Causes:** +- Labels don't exist in repository +- Label names don't match config + +**Solutions:** +1. Create labels matching your config: + - `priority: high` + - `type: bug` + - `ai-approved` +2. Or update config to match existing labels: + ```yaml + labels: + priority: + high: "P0" # Your label name + ``` + +### `label not found` error + +The agent gracefully handles missing labels. Create labels manually or disable auto-labeling: + +```yaml +agents: + issue: + auto_label: false +``` + +--- + +## Performance Issues + +### Reviews are slow + +**Causes:** +- Large diffs taking long to process +- LLM response time + +**Solutions:** +1. Reduce max diff lines: + ```yaml + review: + max_diff_lines: 500 + ``` +2. Use a faster model: + ```yaml + model: + openai: gpt-4.1-mini # Faster than gpt-4 + ``` +3. Consider Ollama for local, faster inference + +### Timeout errors + +Increase timeout in API calls or use async processing: + +```python +client = GiteaClient(timeout=60) # Increase from default 30 +``` + +--- + +## Debugging + +### Enable verbose logging + +```bash +python main.py -v pr owner/repo 123 +``` + +### Check workflow logs + +1. Go to repository -> Actions +2. Click on the failed workflow run +3. Expand job steps to see output + +### Test locally + +```bash +# Set environment variables +export AI_REVIEW_TOKEN="your_token" +export AI_REVIEW_API_URL="https://your-gitea/api/v1" +export OPENAI_API_KEY="sk-..." + +# Run locally +cd tools/ai-review +python main.py pr owner/repo 123 +``` + +### Validate Python syntax + +```bash +python -m py_compile main.py +``` + +--- + +## Getting Help + +1. Check the [documentation](README.md) +2. Search existing issues in the repository +3. Create a new issue with: + - Steps to reproduce + - Error messages + - Environment details (Gitea version, Python version) diff --git a/docs/workflows.md b/docs/workflows.md new file mode 100644 index 0000000..1a21fce --- /dev/null +++ b/docs/workflows.md @@ -0,0 +1,389 @@ +# Workflows + +This document provides ready-to-use workflow files for integrating AI code review into your repositories. Workflows are provided for both **GitHub Actions** and **Gitea Actions**. + +--- + +## Platform Comparison + +| Feature | GitHub | Gitea | +|---------|--------|-------| +| Context variable | `github.*` | `gitea.*` | +| Default token | `GITHUB_TOKEN` | `AI_REVIEW_TOKEN` (custom) | +| API URL | `https://api.github.com` | Your Gitea instance URL | +| Tools location | Same repo (`tools/ai-review`) | Checkout from central repo | + +--- + +## GitHub Workflows + +### PR Review Workflow + +```yaml +# .github/workflows/ai-review.yml +name: AI Code Review + +on: + pull_request: + types: [opened, synchronize] + +jobs: + ai-review: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - run: pip install requests pyyaml + + - name: Run AI Review + env: + AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }} + AI_REVIEW_REPO: ${{ github.repository }} + AI_REVIEW_API_URL: https://api.github.com + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + cd tools/ai-review + python main.py pr ${{ github.repository }} ${{ github.event.pull_request.number }} +``` + +### Issue Triage Workflow + +```yaml +# .github/workflows/ai-issue-triage.yml +name: AI Issue Triage + +on: + issues: + types: [opened, labeled] + +jobs: + ai-triage: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - run: pip install requests pyyaml + + - name: Run AI Issue Triage + env: + AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }} + AI_REVIEW_REPO: ${{ github.repository }} + AI_REVIEW_API_URL: https://api.github.com + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + cd tools/ai-review + python main.py issue ${{ github.repository }} ${{ github.event.issue.number }} \ + --title "${{ github.event.issue.title }}" +``` + +### Comment Reply Workflow (includes Bartender Chat) + +```yaml +# .github/workflows/ai-comment-reply.yml +name: AI Comment Reply + +on: + issue_comment: + types: [created] + +jobs: + ai-reply: + runs-on: ubuntu-latest + if: contains(github.event.comment.body, '@ai-bot') + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - run: pip install requests pyyaml + + - name: Run AI Comment Response + env: + AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }} + AI_REVIEW_REPO: ${{ github.repository }} + AI_REVIEW_API_URL: https://api.github.com + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + SEARXNG_URL: ${{ secrets.SEARXNG_URL }} + run: | + cd tools/ai-review + python main.py comment ${{ github.repository }} ${{ github.event.issue.number }} \ + "${{ github.event.comment.body }}" +``` + +### Codebase Analysis Workflow + +```yaml +# .github/workflows/ai-codebase-review.yml +name: AI Codebase Analysis + +on: + schedule: + - cron: "0 0 * * 0" # Weekly on Sunday + workflow_dispatch: # Manual trigger + +jobs: + ai-codebase: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - run: pip install requests pyyaml + + - name: Run Codebase Analysis + env: + AI_REVIEW_TOKEN: ${{ secrets.GITHUB_TOKEN }} + AI_REVIEW_REPO: ${{ github.repository }} + AI_REVIEW_API_URL: https://api.github.com + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + cd tools/ai-review + python main.py codebase ${{ github.repository }} +``` + +--- + +## Gitea Workflows + +### PR Review Workflow + +```yaml +# .gitea/workflows/enterprise-ai-review.yml +name: AI Code Review + +on: + pull_request: + types: [opened, synchronize] + +jobs: + ai-review: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/checkout@v4 + with: + repository: YourOrg/OpenRabbit + path: .ai-review + token: ${{ secrets.AI_REVIEW_TOKEN }} + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - run: pip install requests pyyaml + + - name: Run AI Review + env: + AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }} + AI_REVIEW_REPO: ${{ gitea.repository }} + AI_REVIEW_API_URL: https://your-gitea.example.com/api/v1 + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + cd .ai-review/tools/ai-review + python main.py pr ${{ gitea.repository }} ${{ gitea.event.pull_request.number }} +``` + +### Issue Triage Workflow + +```yaml +# .gitea/workflows/ai-issue-triage.yml +name: AI Issue Triage + +on: + issues: + types: [opened, labeled] + +jobs: + ai-triage: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/checkout@v4 + with: + repository: YourOrg/OpenRabbit + path: .ai-review + token: ${{ secrets.AI_REVIEW_TOKEN }} + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - run: pip install requests pyyaml + + - name: Run AI Issue Triage + env: + AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }} + AI_REVIEW_REPO: ${{ gitea.repository }} + AI_REVIEW_API_URL: https://your-gitea.example.com/api/v1 + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + cd .ai-review/tools/ai-review + python main.py issue ${{ gitea.repository }} ${{ gitea.event.issue.number }} \ + --title "${{ gitea.event.issue.title }}" +``` + +### Comment Reply Workflow (includes Bartender Chat) + +```yaml +# .gitea/workflows/ai-comment-reply.yml +name: AI Comment Reply + +on: + issue_comment: + types: [created] + +jobs: + ai-reply: + runs-on: ubuntu-latest + if: contains(github.event.comment.body, '@ai-bot') + steps: + - uses: actions/checkout@v4 + + - uses: actions/checkout@v4 + with: + repository: YourOrg/OpenRabbit + path: .ai-review + token: ${{ secrets.AI_REVIEW_TOKEN }} + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - run: pip install requests pyyaml + + - name: Run AI Comment Response + env: + AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }} + AI_REVIEW_REPO: ${{ gitea.repository }} + AI_REVIEW_API_URL: https://your-gitea.example.com/api/v1 + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + SEARXNG_URL: ${{ secrets.SEARXNG_URL }} + run: | + cd .ai-review/tools/ai-review + python main.py comment ${{ gitea.repository }} ${{ gitea.event.issue.number }} \ + "${{ gitea.event.comment.body }}" +``` + +### Codebase Analysis Workflow + +```yaml +# .gitea/workflows/ai-codebase-review.yml +name: AI Codebase Analysis + +on: + schedule: + - cron: "0 0 * * 0" # Weekly on Sunday + workflow_dispatch: # Manual trigger + +jobs: + ai-codebase: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/checkout@v4 + with: + repository: YourOrg/OpenRabbit + path: .ai-review + token: ${{ secrets.AI_REVIEW_TOKEN }} + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - run: pip install requests pyyaml + + - name: Run Codebase Analysis + env: + AI_REVIEW_TOKEN: ${{ secrets.AI_REVIEW_TOKEN }} + AI_REVIEW_REPO: ${{ gitea.repository }} + AI_REVIEW_API_URL: https://your-gitea.example.com/api/v1 + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + cd .ai-review/tools/ai-review + python main.py codebase ${{ gitea.repository }} +``` + +--- + +## Required Secrets + +### GitHub + +| Secret | Required | Description | +|--------|----------|-------------| +| `GITHUB_TOKEN` | Auto | Built-in token (automatic) | +| `OPENAI_API_KEY` | Choose one | OpenAI API key | +| `OPENROUTER_API_KEY` | Choose one | OpenRouter API key | +| `OLLAMA_HOST` | Choose one | Ollama server URL | +| `SEARXNG_URL` | Optional | SearXNG instance for web search | + +### Gitea + +| Secret | Required | Description | +|--------|----------|-------------| +| `AI_REVIEW_TOKEN` | Yes | Gitea bot access token | +| `OPENAI_API_KEY` | Choose one | OpenAI API key | +| `OPENROUTER_API_KEY` | Choose one | OpenRouter API key | +| `OLLAMA_HOST` | Choose one | Ollama server URL | +| `SEARXNG_URL` | Optional | SearXNG instance for web search | + +--- + +## Customization + +### For GitHub + +The tools are included in the same repository under `tools/ai-review`, so no additional checkout is needed. + +### For Gitea + +Replace the repository reference with your OpenRabbit fork: + +```yaml +repository: YourOrg/OpenRabbit +``` + +Replace the API URL with your Gitea instance: + +```yaml +AI_REVIEW_API_URL: https://your-gitea.example.com/api/v1 +``` + +--- + +## Chat/Bartender Workflow + +Both platforms support the Bartender chat agent through the comment reply workflow. When `@ai-bot` is mentioned with a question (not a specific command like `summarize`), the Chat Agent handles it with tool calling capabilities. + +To enable web search, set the `SEARXNG_URL` secret to your SearXNG instance URL. + +**Example usage:** +``` +@ai-bot How do I configure rate limiting? +@ai-bot Find all authentication-related files +@ai-bot What does the dispatcher module do? +``` diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..a107bff --- /dev/null +++ b/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +testpaths = tests +python_paths = tools/ai-review +addopts = -v --tb=short diff --git a/tests/test_ai_review.py b/tests/test_ai_review.py new file mode 100644 index 0000000..ba85612 --- /dev/null +++ b/tests/test_ai_review.py @@ -0,0 +1,257 @@ +"""Test Suite for AI Code Review Workflow + +Tests for verifying prompt formatting, agent logic, and core functionality. +Run with: pytest tests/ -v +""" + +import os +import sys + +# Add the tools directory to path +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "tools", "ai-review")) + +import pytest + + +class TestPromptFormatting: + """Test that all prompts can be formatted without errors.""" + + def get_prompt_path(self, name: str) -> str: + """Get the full path to a prompt file.""" + return os.path.join( + os.path.dirname(__file__), + "..", "tools", "ai-review", "prompts", f"{name}.md" + ) + + def load_prompt(self, name: str) -> str: + """Load a prompt file.""" + path = self.get_prompt_path(name) + with open(path) as f: + return f.read() + + def test_issue_triage_prompt_formatting(self): + """Test that issue_triage.md can be formatted with placeholders.""" + prompt = self.load_prompt("issue_triage") + + # This should NOT raise a KeyError + formatted = prompt.format( + title="Test Issue Title", + body="This is the issue body content", + author="testuser", + existing_labels="bug, urgent" + ) + + assert "Test Issue Title" in formatted + assert "This is the issue body content" in formatted + assert "testuser" in formatted + assert "bug, urgent" in formatted + # JSON example should still be present (curly braces escaped) + assert '"type"' in formatted + assert '"priority"' in formatted + + def test_issue_response_prompt_formatting(self): + """Test that issue_response.md can be formatted with placeholders.""" + prompt = self.load_prompt("issue_response") + + formatted = prompt.format( + issue_type="bug", + priority="high", + title="Bug Report", + body="Description of the bug", + triage_analysis="This is a high priority bug" + ) + + assert "bug" in formatted + assert "high" in formatted + assert "Bug Report" in formatted + # JSON example should still be present + assert '"comment"' in formatted + + def test_base_prompt_no_placeholders(self): + """Test that base.md loads correctly (no placeholders needed).""" + prompt = self.load_prompt("base") + + # Should contain key elements + assert "security" in prompt.lower() + assert "JSON" in prompt + assert "severity" in prompt.lower() + + def test_prompts_have_escaped_json(self): + """Verify JSON examples use double curly braces.""" + for prompt_name in ["issue_triage", "issue_response"]: + prompt = self.load_prompt(prompt_name) + + # Check that format() doesn't fail + try: + # Try with minimal placeholders + if prompt_name == "issue_triage": + prompt.format(title="t", body="b", author="a", existing_labels="l") + elif prompt_name == "issue_response": + prompt.format(issue_type="t", priority="p", title="t", body="b", triage_analysis="a") + except KeyError as e: + pytest.fail(f"Prompt {prompt_name} has unescaped curly braces: {e}") + + +class TestImports: + """Test that all modules can be imported correctly.""" + + def test_import_agents(self): + """Test importing agent classes.""" + from agents.base_agent import BaseAgent, AgentContext, AgentResult + from agents.issue_agent import IssueAgent + from agents.pr_agent import PRAgent + from agents.codebase_agent import CodebaseAgent + + assert BaseAgent is not None + assert IssueAgent is not None + assert PRAgent is not None + assert CodebaseAgent is not None + + def test_import_clients(self): + """Test importing client classes.""" + from clients.gitea_client import GiteaClient + from clients.llm_client import LLMClient + + assert GiteaClient is not None + assert LLMClient is not None + + def test_import_security(self): + """Test importing security scanner.""" + from security.security_scanner import SecurityScanner + + assert SecurityScanner is not None + + def test_import_enterprise(self): + """Test importing enterprise features.""" + from enterprise.audit_logger import AuditLogger + from enterprise.metrics import MetricsCollector + + assert AuditLogger is not None + assert MetricsCollector is not None + + def test_import_dispatcher(self): + """Test importing dispatcher.""" + from dispatcher import Dispatcher + + assert Dispatcher is not None + + +class TestSecurityScanner: + """Test security scanner pattern detection.""" + + def test_detects_hardcoded_secret(self): + """Test detection of hardcoded secrets.""" + from security.security_scanner import SecurityScanner + + scanner = SecurityScanner() + code = ''' +API_KEY = "sk-1234567890abcdef" +''' + findings = list(scanner.scan_content(code, "test.py")) + assert len(findings) >= 1 + assert any(f.severity == "HIGH" for f in findings) + + def test_detects_eval(self): + """Test detection of eval usage.""" + from security.security_scanner import SecurityScanner + + scanner = SecurityScanner() + code = ''' +result = eval(user_input) +''' + findings = list(scanner.scan_content(code, "test.py")) + assert len(findings) >= 1 + assert any("eval" in f.rule_name.lower() for f in findings) + + def test_no_false_positives_on_clean_code(self): + """Test that clean code doesn't trigger false positives.""" + from security.security_scanner import SecurityScanner + + scanner = SecurityScanner() + code = ''' +def hello(): + print("Hello, world!") + return 42 +''' + findings = list(scanner.scan_content(code, "test.py")) + # Should have no HIGH severity issues for clean code + high_findings = [f for f in findings if f.severity == "HIGH"] + assert len(high_findings) == 0 + + +class TestAgentContext: + """Test agent context and result dataclasses.""" + + def test_agent_context_creation(self): + """Test creating AgentContext.""" + from agents.base_agent import AgentContext + + context = AgentContext( + owner="testowner", + repo="testrepo", + event_type="issues", + event_data={"action": "opened"}, + config={} + ) + + assert context.owner == "testowner" + assert context.repo == "testrepo" + assert context.event_type == "issues" + + def test_agent_result_creation(self): + """Test creating AgentResult.""" + from agents.base_agent import AgentResult + + result = AgentResult( + success=True, + message="Test passed", + data={"key": "value"}, + actions_taken=["action1", "action2"] + ) + + assert result.success is True + assert result.message == "Test passed" + assert len(result.actions_taken) == 2 + + +class TestMetrics: + """Test metrics collection.""" + + def test_counter_increment(self): + """Test counter metrics.""" + from enterprise.metrics import Counter + + counter = Counter("test_counter") + assert counter.value == 0 + counter.inc() + assert counter.value == 1 + counter.inc(5) + assert counter.value == 6 + + def test_histogram_observation(self): + """Test histogram metrics.""" + from enterprise.metrics import Histogram + + hist = Histogram("test_histogram") + hist.observe(0.1) + hist.observe(0.5) + hist.observe(1.0) + + assert hist.count == 3 + assert hist.sum == 1.6 + + def test_metrics_collector_summary(self): + """Test metrics collector summary.""" + from enterprise.metrics import MetricsCollector + + collector = MetricsCollector() + collector.record_request_start("TestAgent") + collector.record_request_end("TestAgent", success=True, duration_seconds=0.5) + + summary = collector.get_summary() + assert summary["requests"]["total"] == 1 + assert summary["requests"]["success"] == 1 + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tools/ai-review/agents/__init__.py b/tools/ai-review/agents/__init__.py new file mode 100644 index 0000000..5403b16 --- /dev/null +++ b/tools/ai-review/agents/__init__.py @@ -0,0 +1,19 @@ +"""AI Review Agents Package + +This package contains the modular agent implementations for the +enterprise AI code review system. +""" + +from agents.base_agent import BaseAgent, AgentContext, AgentResult +from agents.issue_agent import IssueAgent +from agents.pr_agent import PRAgent +from agents.codebase_agent import CodebaseAgent + +__all__ = [ + "BaseAgent", + "AgentContext", + "AgentResult", + "IssueAgent", + "PRAgent", + "CodebaseAgent", +] diff --git a/tools/ai-review/agents/base_agent.py b/tools/ai-review/agents/base_agent.py new file mode 100644 index 0000000..d635e09 --- /dev/null +++ b/tools/ai-review/agents/base_agent.py @@ -0,0 +1,257 @@ +"""Base Agent + +Abstract base class for all AI agents. Provides common functionality +for Gitea API interaction, LLM calls, logging, and rate limiting. +""" + +import logging +import os +import time +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any + +import yaml + +from clients.gitea_client import GiteaClient +from clients.llm_client import LLMClient, LLMResponse + + +@dataclass +class AgentContext: + """Context passed to agent during execution.""" + + owner: str + repo: str + event_type: str + event_data: dict + config: dict = field(default_factory=dict) + + +@dataclass +class AgentResult: + """Result from agent execution.""" + + success: bool + message: str + data: dict = field(default_factory=dict) + actions_taken: list[str] = field(default_factory=list) + error: str | None = None + + +class BaseAgent(ABC): + """Abstract base class for AI agents.""" + + # Marker to identify AI-generated comments + AI_MARKER = "" + + # Disclaimer text + AI_DISCLAIMER = ( + "**Note:** This review was generated by an AI assistant. " + "While it aims to be accurate and helpful, it may contain mistakes " + "or miss important issues. Please verify all findings before taking action." + ) + + def __init__( + self, + config: dict | None = None, + gitea_client: GiteaClient | None = None, + llm_client: LLMClient | None = None, + ): + """Initialize the base agent. + + Args: + config: Agent configuration dictionary. + gitea_client: Optional pre-configured Gitea client. + llm_client: Optional pre-configured LLM client. + """ + self.config = config or self._load_config() + self.gitea = gitea_client or GiteaClient() + self.llm = llm_client or LLMClient.from_config(self.config) + self.logger = logging.getLogger(self.__class__.__name__) + + # Rate limiting + self._last_request_time = 0.0 + self._min_request_interval = 1.0 # seconds + + @staticmethod + def _load_config() -> dict: + """Load configuration from config.yml.""" + config_path = os.path.join(os.path.dirname(__file__), "..", "config.yml") + if os.path.exists(config_path): + with open(config_path) as f: + return yaml.safe_load(f) + return {} + + def _rate_limit(self): + """Apply rate limiting between requests.""" + elapsed = time.time() - self._last_request_time + if elapsed < self._min_request_interval: + time.sleep(self._min_request_interval - elapsed) + self._last_request_time = time.time() + + def load_prompt(self, prompt_name: str) -> str: + """Load a prompt template from the prompts directory. + + Args: + prompt_name: Name of the prompt file (without extension). + + Returns: + Prompt template content. + """ + prompt_path = os.path.join( + os.path.dirname(__file__), "..", "prompts", f"{prompt_name}.md" + ) + if not os.path.exists(prompt_path): + raise FileNotFoundError(f"Prompt not found: {prompt_path}") + with open(prompt_path) as f: + return f.read() + + def call_llm(self, prompt: str, **kwargs) -> LLMResponse: + """Make a rate-limited call to the LLM. + + Args: + prompt: The prompt to send. + **kwargs: Additional LLM options. + + Returns: + LLM response. + """ + self._rate_limit() + return self.llm.call(prompt, **kwargs) + + def call_llm_json(self, prompt: str, **kwargs) -> dict: + """Make a rate-limited call and parse JSON response. + + Args: + prompt: The prompt to send. + **kwargs: Additional LLM options. + + Returns: + Parsed JSON response. + """ + self._rate_limit() + return self.llm.call_json(prompt, **kwargs) + + def find_ai_comment( + self, + owner: str, + repo: str, + issue_index: int, + marker: str | None = None, + ) -> int | None: + """Find an existing AI comment by marker. + + Args: + owner: Repository owner. + repo: Repository name. + issue_index: Issue or PR number. + marker: Custom marker to search for. Defaults to AI_MARKER. + + Returns: + Comment ID if found, None otherwise. + """ + marker = marker or self.AI_MARKER + comments = self.gitea.list_issue_comments(owner, repo, issue_index) + + for comment in comments: + if marker in comment.get("body", ""): + return comment["id"] + + return None + + def upsert_comment( + self, + owner: str, + repo: str, + issue_index: int, + body: str, + marker: str | None = None, + ) -> dict: + """Create or update an AI comment. + + Args: + owner: Repository owner. + repo: Repository name. + issue_index: Issue or PR number. + body: Comment body (marker will be prepended if not present). + marker: Custom marker. Defaults to AI_MARKER. + + Returns: + Created or updated comment. + """ + marker = marker or self.AI_MARKER + + # Ensure marker is in the body + if marker not in body: + body = f"{marker}\n{body}" + + # Check for existing comment + existing_id = self.find_ai_comment(owner, repo, issue_index, marker) + + if existing_id: + return self.gitea.update_issue_comment(owner, repo, existing_id, body) + else: + return self.gitea.create_issue_comment(owner, repo, issue_index, body) + + def format_with_disclaimer(self, content: str) -> str: + """Add AI disclaimer to content. + + Args: + content: The main content. + + Returns: + Content with disclaimer prepended. + """ + return f"{self.AI_DISCLAIMER}\n\n{self.AI_MARKER}\n{content}" + + @abstractmethod + def execute(self, context: AgentContext) -> AgentResult: + """Execute the agent's main task. + + Args: + context: Execution context with event data. + + Returns: + Result of the agent execution. + """ + pass + + @abstractmethod + def can_handle(self, event_type: str, event_data: dict) -> bool: + """Check if this agent can handle the given event. + + Args: + event_type: Type of event (issue, pull_request, etc). + event_data: Event payload data. + + Returns: + True if this agent can handle the event. + """ + pass + + def run(self, context: AgentContext) -> AgentResult: + """Run the agent with error handling. + + Args: + context: Execution context. + + Returns: + Agent result, including any errors. + """ + try: + self.logger.info( + f"Running {self.__class__.__name__} for {context.owner}/{context.repo}" + ) + result = self.execute(context) + self.logger.info( + f"Completed with success={result.success}: {result.message}" + ) + return result + except Exception as e: + self.logger.exception(f"Agent execution failed: {e}") + return AgentResult( + success=False, + message="Agent execution failed", + error=str(e), + ) diff --git a/tools/ai-review/agents/chat_agent.py b/tools/ai-review/agents/chat_agent.py new file mode 100644 index 0000000..b7b6f04 --- /dev/null +++ b/tools/ai-review/agents/chat_agent.py @@ -0,0 +1,470 @@ +"""Chat Agent (Bartender) + +Interactive AI chat agent with tool use capabilities. +Can search the codebase and web to answer user questions. +""" + +import base64 +import logging +import os +import re +from dataclasses import dataclass + +import requests + +from agents.base_agent import AgentContext, AgentResult, BaseAgent +from clients.llm_client import ToolCall + + +@dataclass +class ChatMessage: + """A message in the chat conversation.""" + + role: str # 'user', 'assistant', or 'tool' + content: str + tool_call_id: str | None = None + name: str | None = None # Tool name for tool responses + + +class ChatAgent(BaseAgent): + """Interactive chat agent with tool capabilities.""" + + # Marker for chat responses + CHAT_AI_MARKER = "" + + # Tool definitions in OpenAI format + TOOLS = [ + { + "type": "function", + "function": { + "name": "search_codebase", + "description": "Search the repository codebase for files, functions, classes, or patterns. Use this to find relevant code.", + "parameters": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Search query - can be a filename, function name, class name, or code pattern", + }, + "file_pattern": { + "type": "string", + "description": "Optional file pattern to filter results (e.g., '*.py', 'src/*.js')", + }, + }, + "required": ["query"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "read_file", + "description": "Read the contents of a specific file from the repository.", + "parameters": { + "type": "object", + "properties": { + "filepath": { + "type": "string", + "description": "Path to the file to read", + }, + }, + "required": ["filepath"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "search_web", + "description": "Search the web for information using SearXNG. Use this for external documentation, tutorials, or general knowledge.", + "parameters": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Search query", + }, + "categories": { + "type": "string", + "description": "Optional: comma-separated categories (general, images, videos, news, science, it)", + }, + }, + "required": ["query"], + }, + }, + }, + ] + + # System prompt for the chat agent + SYSTEM_PROMPT = """You are Bartender, a helpful AI assistant for code review and development tasks. + +You have access to tools to help answer questions: +- search_codebase: Search the repository for code, files, functions, or patterns +- read_file: Read specific files from the repository +- search_web: Search the web for documentation, tutorials, or external information + +When helping users: +1. Use tools to gather information before answering questions about code +2. Be concise but thorough in your explanations +3. Provide code examples when helpful +4. If you're unsure, say so and suggest alternatives + +Repository context: {owner}/{repo} +""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._searxng_url = self.config.get("agents", {}).get("chat", {}).get( + "searxng_url", os.environ.get("SEARXNG_URL", "") + ) + + def can_handle(self, event_type: str, event_data: dict) -> bool: + """Check if this agent handles the given event.""" + agent_config = self.config.get("agents", {}).get("chat", {}) + if not agent_config.get("enabled", True): + return False + + # Handle issue comment with @ai-bot chat or just @ai-bot + if event_type == "issue_comment": + comment_body = event_data.get("comment", {}).get("body", "") + mention_prefix = self.config.get("interaction", {}).get( + "mention_prefix", "@ai-bot" + ) + # Check if this is a chat request (any @ai-bot mention that isn't a specific command) + if mention_prefix in comment_body: + # Check it's not another specific command + specific_commands = ["summarize", "explain", "suggest", "security", "codebase"] + body_lower = comment_body.lower() + for cmd in specific_commands: + if f"{mention_prefix} {cmd}" in body_lower: + return False + return True + + # Handle direct chat command + if event_type == "chat": + return True + + return False + + def execute(self, context: AgentContext) -> AgentResult: + """Execute the chat agent.""" + self.logger.info(f"Starting chat for {context.owner}/{context.repo}") + + # Extract user message + if context.event_type == "issue_comment": + user_message = context.event_data.get("comment", {}).get("body", "") + issue_index = context.event_data.get("issue", {}).get("number") + # Remove the @ai-bot prefix + mention_prefix = self.config.get("interaction", {}).get( + "mention_prefix", "@ai-bot" + ) + user_message = user_message.replace(mention_prefix, "").strip() + else: + user_message = context.event_data.get("message", "") + issue_index = context.event_data.get("issue_number") + + if not user_message: + return AgentResult( + success=False, + message="No message provided", + ) + + # Build conversation + system_prompt = self.SYSTEM_PROMPT.format( + owner=context.owner, + repo=context.repo, + ) + + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_message}, + ] + + # Run the chat loop with tool execution + response_content, tools_used = self._run_chat_loop( + context, messages, max_iterations=5 + ) + + actions_taken = [] + if tools_used: + actions_taken.append(f"Used tools: {', '.join(tools_used)}") + + # Post response if this is an issue comment + if issue_index: + comment_body = self._format_response(response_content) + self.upsert_comment( + context.owner, + context.repo, + issue_index, + comment_body, + marker=self.CHAT_AI_MARKER, + ) + actions_taken.append("Posted chat response") + + return AgentResult( + success=True, + message="Chat completed", + data={"response": response_content, "tools_used": tools_used}, + actions_taken=actions_taken, + ) + + def _run_chat_loop( + self, + context: AgentContext, + messages: list[dict], + max_iterations: int = 5, + ) -> tuple[str, list[str]]: + """Run the chat loop with tool execution. + + Returns: + Tuple of (final response content, list of tools used) + """ + tools_used = [] + + for _ in range(max_iterations): + self._rate_limit() + response = self.llm.call_with_tools(messages, tools=self.TOOLS) + + # If no tool calls, we're done + if not response.tool_calls: + return response.content, tools_used + + # Add assistant message with tool calls + messages.append({ + "role": "assistant", + "content": response.content or "", + "tool_calls": [ + { + "id": tc.id, + "type": "function", + "function": { + "name": tc.name, + "arguments": str(tc.arguments), + }, + } + for tc in response.tool_calls + ], + }) + + # Execute each tool call + for tool_call in response.tool_calls: + tool_result = self._execute_tool(context, tool_call) + tools_used.append(tool_call.name) + + # Add tool result to messages + messages.append({ + "role": "tool", + "tool_call_id": tool_call.id, + "content": tool_result, + }) + + # If we hit max iterations, make one final call without tools + self._rate_limit() + final_response = self.llm.call_with_tools( + messages, tools=None, tool_choice="none" + ) + return final_response.content, tools_used + + def _execute_tool(self, context: AgentContext, tool_call: ToolCall) -> str: + """Execute a tool call and return the result.""" + self.logger.info(f"Executing tool: {tool_call.name}") + + try: + if tool_call.name == "search_codebase": + return self._tool_search_codebase( + context, + tool_call.arguments.get("query", ""), + tool_call.arguments.get("file_pattern"), + ) + elif tool_call.name == "read_file": + return self._tool_read_file( + context, + tool_call.arguments.get("filepath", ""), + ) + elif tool_call.name == "search_web": + return self._tool_search_web( + tool_call.arguments.get("query", ""), + tool_call.arguments.get("categories"), + ) + else: + return f"Unknown tool: {tool_call.name}" + except Exception as e: + self.logger.error(f"Tool execution failed: {e}") + return f"Error executing tool: {e}" + + def _tool_search_codebase( + self, + context: AgentContext, + query: str, + file_pattern: str | None = None, + ) -> str: + """Search the codebase for files matching a query.""" + results = [] + + # Get repository file list + try: + files = self._collect_files(context.owner, context.repo, file_pattern) + except Exception as e: + return f"Error listing files: {e}" + + query_lower = query.lower() + + # Search through files + for file_info in files[:50]: # Limit to prevent API exhaustion + filepath = file_info.get("path", "") + + # Check filename match + if query_lower in filepath.lower(): + results.append(f"File: {filepath}") + continue + + # Check content for code patterns + try: + content_data = self.gitea.get_file_contents( + context.owner, context.repo, filepath + ) + if content_data.get("content"): + content = base64.b64decode(content_data["content"]).decode( + "utf-8", errors="ignore" + ) + + # Search for query in content + lines = content.splitlines() + matching_lines = [] + for i, line in enumerate(lines, 1): + if query_lower in line.lower(): + matching_lines.append(f" L{i}: {line.strip()[:100]}") + + if matching_lines: + results.append(f"File: {filepath}") + results.extend(matching_lines[:5]) # Max 5 matches per file + except Exception: + pass + + if not results: + return f"No results found for '{query}'" + + return "\n".join(results[:30]) # Limit total results + + def _collect_files( + self, + owner: str, + repo: str, + file_pattern: str | None = None, + ) -> list[dict]: + """Collect files from the repository.""" + files = [] + + # Code extensions to search + code_extensions = { + ".py", ".js", ".ts", ".go", ".rs", ".java", ".rb", + ".php", ".c", ".cpp", ".h", ".cs", ".swift", ".kt", + ".md", ".yml", ".yaml", ".json", ".toml", + } + + # Patterns to ignore + ignore_patterns = [ + "node_modules/", "vendor/", ".git/", "__pycache__/", + ".venv/", "dist/", "build/", ".min.js", ".min.css", + ] + + def traverse(path: str = ""): + try: + contents = self.gitea.get_file_contents(owner, repo, path or ".") + if isinstance(contents, list): + for item in contents: + item_path = item.get("path", "") + + if any(p in item_path for p in ignore_patterns): + continue + + if item.get("type") == "file": + ext = os.path.splitext(item_path)[1] + if ext in code_extensions: + # Check file pattern if provided + if file_pattern: + if not self._match_pattern(item_path, file_pattern): + continue + files.append(item) + elif item.get("type") == "dir": + traverse(item_path) + except Exception as e: + self.logger.warning(f"Failed to list {path}: {e}") + + traverse() + return files[:100] # Limit to prevent API exhaustion + + def _match_pattern(self, filepath: str, pattern: str) -> bool: + """Check if filepath matches a simple glob pattern.""" + import fnmatch + return fnmatch.fnmatch(filepath, pattern) + + def _tool_read_file(self, context: AgentContext, filepath: str) -> str: + """Read a file from the repository.""" + try: + content_data = self.gitea.get_file_contents( + context.owner, context.repo, filepath + ) + if content_data.get("content"): + content = base64.b64decode(content_data["content"]).decode( + "utf-8", errors="ignore" + ) + # Truncate if too long + if len(content) > 8000: + content = content[:8000] + "\n... (truncated)" + return f"File: {filepath}\n\n```\n{content}\n```" + return f"File not found: {filepath}" + except Exception as e: + return f"Error reading file: {e}" + + def _tool_search_web( + self, + query: str, + categories: str | None = None, + ) -> str: + """Search the web using SearXNG.""" + if not self._searxng_url: + return "Web search is not configured. Set SEARXNG_URL environment variable." + + try: + params = { + "q": query, + "format": "json", + } + if categories: + params["categories"] = categories + + response = requests.get( + f"{self._searxng_url}/search", + params=params, + timeout=30, + ) + response.raise_for_status() + data = response.json() + + results = data.get("results", []) + if not results: + return f"No web results found for '{query}'" + + # Format results + output = [] + for i, result in enumerate(results[:5], 1): # Top 5 results + title = result.get("title", "No title") + url = result.get("url", "") + content = result.get("content", "")[:200] + output.append(f"{i}. **{title}**\n {url}\n {content}") + + return "\n\n".join(output) + except requests.exceptions.RequestException as e: + return f"Web search failed: {e}" + + def _format_response(self, content: str) -> str: + """Format the chat response with disclaimer.""" + lines = [ + f"{self.AI_DISCLAIMER}", + "", + "---", + "", + content, + ] + return "\n".join(lines) diff --git a/tools/ai-review/agents/codebase_agent.py b/tools/ai-review/agents/codebase_agent.py new file mode 100644 index 0000000..dde7486 --- /dev/null +++ b/tools/ai-review/agents/codebase_agent.py @@ -0,0 +1,457 @@ +"""Codebase Quality Agent + +AI agent for analyzing overall codebase health, architecture, +technical debt, and documentation coverage. +""" + +import base64 +import os +from dataclasses import dataclass, field + +from agents.base_agent import AgentContext, AgentResult, BaseAgent + + +@dataclass +class CodebaseMetrics: + """Metrics collected from codebase analysis.""" + + total_files: int = 0 + total_lines: int = 0 + languages: dict = field(default_factory=dict) + todo_count: int = 0 + fixme_count: int = 0 + deprecated_count: int = 0 + missing_docstrings: int = 0 + + +@dataclass +class CodebaseReport: + """Complete codebase analysis report.""" + + summary: str + health_score: float # 0-100 + metrics: CodebaseMetrics + issues: list[dict] + recommendations: list[str] + architecture_notes: list[str] + + +class CodebaseAgent(BaseAgent): + """Agent for codebase quality analysis.""" + + # Marker for codebase reports + CODEBASE_AI_MARKER = "" + + # File extensions to analyze + CODE_EXTENSIONS = { + ".py": "Python", + ".js": "JavaScript", + ".ts": "TypeScript", + ".go": "Go", + ".rs": "Rust", + ".java": "Java", + ".rb": "Ruby", + ".php": "PHP", + ".c": "C", + ".cpp": "C++", + ".h": "C/C++ Header", + ".cs": "C#", + ".swift": "Swift", + ".kt": "Kotlin", + } + + # Files to ignore + IGNORE_PATTERNS = [ + "node_modules/", + "vendor/", + ".git/", + "__pycache__/", + ".venv/", + "dist/", + "build/", + ".min.js", + ".min.css", + ] + + def can_handle(self, event_type: str, event_data: dict) -> bool: + """Check if this agent handles the given event.""" + agent_config = self.config.get("agents", {}).get("codebase", {}) + if not agent_config.get("enabled", True): + return False + + # Handle manual trigger via workflow_dispatch or schedule + if event_type in ("workflow_dispatch", "schedule"): + return True + + # Handle special issue command + if event_type == "issue_comment": + comment_body = event_data.get("comment", {}).get("body", "") + mention_prefix = self.config.get("interaction", {}).get( + "mention_prefix", "@ai-bot" + ) + if f"{mention_prefix} codebase" in comment_body.lower(): + return True + + return False + + def execute(self, context: AgentContext) -> AgentResult: + """Execute codebase analysis.""" + self.logger.info(f"Starting codebase analysis for {context.owner}/{context.repo}") + + actions_taken = [] + + # Step 1: Collect file list from repository + files = self._collect_files(context.owner, context.repo) + self.logger.info(f"Found {len(files)} files to analyze") + + # Step 2: Analyze metrics + metrics = self._analyze_metrics(context.owner, context.repo, files) + actions_taken.append(f"Analyzed {metrics.total_files} files") + + # Step 3: Run AI analysis on key files + report = self._run_ai_analysis(context, files, metrics) + actions_taken.append("Generated AI analysis report") + + # Step 4: Create or update report issue + issue_number = self._create_report_issue(context, report) + actions_taken.append(f"Created/updated report issue #{issue_number}") + + return AgentResult( + success=True, + message=f"Codebase analysis complete - Health Score: {report.health_score:.0f}/100", + data={ + "health_score": report.health_score, + "total_files": metrics.total_files, + "issues_found": len(report.issues), + }, + actions_taken=actions_taken, + ) + + def _collect_files(self, owner: str, repo: str) -> list[dict]: + """Collect list of files from the repository.""" + files = [] + + def traverse(path: str = ""): + try: + contents = self.gitea.get_file_contents(owner, repo, path or ".") + if isinstance(contents, list): + for item in contents: + item_path = item.get("path", "") + + # Skip ignored patterns + if any(p in item_path for p in self.IGNORE_PATTERNS): + continue + + if item.get("type") == "file": + ext = os.path.splitext(item_path)[1] + if ext in self.CODE_EXTENSIONS: + files.append(item) + elif item.get("type") == "dir": + traverse(item_path) + except Exception as e: + self.logger.warning(f"Failed to list {path}: {e}") + + traverse() + return files[:100] # Limit to prevent API exhaustion + + def _analyze_metrics( + self, + owner: str, + repo: str, + files: list[dict], + ) -> CodebaseMetrics: + """Analyze metrics from files.""" + metrics = CodebaseMetrics() + metrics.total_files = len(files) + + for file_info in files[:50]: # Analyze top 50 files + filepath = file_info.get("path", "") + ext = os.path.splitext(filepath)[1] + lang = self.CODE_EXTENSIONS.get(ext, "Unknown") + + metrics.languages[lang] = metrics.languages.get(lang, 0) + 1 + + try: + content_data = self.gitea.get_file_contents(owner, repo, filepath) + if content_data.get("content"): + content = base64.b64decode(content_data["content"]).decode( + "utf-8", errors="ignore" + ) + lines = content.splitlines() + metrics.total_lines += len(lines) + + # Count markers + for line in lines: + line_upper = line.upper() + if "TODO" in line_upper: + metrics.todo_count += 1 + if "FIXME" in line_upper: + metrics.fixme_count += 1 + if "DEPRECATED" in line_upper: + metrics.deprecated_count += 1 + + # Check for docstrings (Python) + if ext == ".py": + if 'def ' in content and '"""' not in content: + metrics.missing_docstrings += 1 + + except Exception as e: + self.logger.debug(f"Could not analyze {filepath}: {e}") + + return metrics + + def _run_ai_analysis( + self, + context: AgentContext, + files: list[dict], + metrics: CodebaseMetrics, + ) -> CodebaseReport: + """Run AI analysis on the codebase.""" + # Prepare context for AI + file_list = "\n".join([f"- {f.get('path', '')}" for f in files[:30]]) + language_breakdown = "\n".join( + [f"- {lang}: {count} files" for lang, count in metrics.languages.items()] + ) + + # Sample some key files for deeper analysis + key_files_content = self._get_key_files_content( + context.owner, context.repo, files + ) + + prompt = f"""Analyze this codebase and provide a comprehensive quality assessment. + +## Repository: {context.owner}/{context.repo} + +## Metrics +- Total Files: {metrics.total_files} +- Total Lines: {metrics.total_lines} +- TODO Comments: {metrics.todo_count} +- FIXME Comments: {metrics.fixme_count} +- Deprecated Markers: {metrics.deprecated_count} + +## Language Breakdown +{language_breakdown} + +## File Structure (sample) +{file_list} + +## Key Files Content +{key_files_content} + +## Analysis Required + +Provide your analysis as JSON with this structure: +```json +{{ + "summary": "Overall assessment in 2-3 sentences", + "health_score": 0-100, + "issues": [ + {{ + "severity": "HIGH|MEDIUM|LOW", + "category": "Architecture|Code Quality|Security|Testing|Documentation", + "description": "Issue description", + "recommendation": "How to fix" + }} + ], + "recommendations": ["Top 3-5 actionable recommendations"], + "architecture_notes": ["Observations about code structure and patterns"] +}} +``` + +Be constructive and actionable. Focus on the most impactful improvements. +""" + + try: + result = self.call_llm_json(prompt) + return CodebaseReport( + summary=result.get("summary", "Analysis complete"), + health_score=float(result.get("health_score", 50)), + metrics=metrics, + issues=result.get("issues", []), + recommendations=result.get("recommendations", []), + architecture_notes=result.get("architecture_notes", []), + ) + except Exception as e: + self.logger.error(f"AI analysis failed: {e}") + # Try to log the raw response if possible (requires accessing the last response) + # Since we don't have direct access here, we rely on having good logging in LLMClient if needed. + # But let's add a note to the summary. + # Calculate basic health score from metrics + health_score = 70 + if metrics.todo_count > 10: + health_score -= 10 + if metrics.fixme_count > 5: + health_score -= 10 + + return CodebaseReport( + summary=f"Basic analysis complete (AI unavailable: {e})", + health_score=health_score, + metrics=metrics, + issues=[], + recommendations=["Manual review recommended"], + architecture_notes=[], + ) + + def _get_key_files_content( + self, + owner: str, + repo: str, + files: list[dict], + ) -> str: + """Get content of key files for AI analysis.""" + key_file_names = [ + "README.md", + "setup.py", + "pyproject.toml", + "package.json", + "Cargo.toml", + "go.mod", + "Makefile", + "Dockerfile", + ] + + content_parts = [] + + for file_info in files: + filepath = file_info.get("path", "") + filename = os.path.basename(filepath) + + if filename in key_file_names: + try: + content_data = self.gitea.get_file_contents(owner, repo, filepath) + if content_data.get("content"): + content = base64.b64decode(content_data["content"]).decode( + "utf-8", errors="ignore" + ) + # Truncate long files + if len(content) > 2000: + content = content[:2000] + "\n... (truncated)" + content_parts.append(f"### {filepath}\n```\n{content}\n```") + except Exception: + pass + + return "\n\n".join(content_parts[:5]) or "No key configuration files found." + + def _create_report_issue( + self, + context: AgentContext, + report: CodebaseReport, + ) -> int: + """Create or update a report issue.""" + # Generate issue body + body = self._generate_report_body(report) + + # Look for existing report issue + try: + issues = self.gitea.list_issues( + context.owner, context.repo, state="open", labels=["ai-codebase-report"] + ) + for issue in issues: + if self.CODEBASE_AI_MARKER in issue.get("body", ""): + # Update existing issue body + self.gitea.update_issue( + context.owner, + context.repo, + issue["number"], + body=body, + ) + return issue["number"] + except Exception as e: + self.logger.warning(f"Failed to check for existing report: {e}") + + # Create new issue + try: + # Check for label ID + labels = [] + try: + repo_labels = self.gitea.get_repo_labels(context.owner, context.repo) + for label in repo_labels: + if label["name"] == "ai-codebase-report": + labels.append(label["id"]) + break + except Exception: + pass + + issue = self.gitea.create_issue( + context.owner, + context.repo, + title=f"AI Codebase Report - {context.repo}", + body=body, + labels=labels, + ) + return issue["number"] + except Exception as e: + self.logger.error(f"Failed to create report issue: {e}") + return 0 + + def _generate_report_body(self, report: CodebaseReport) -> str: + """Generate the report issue body.""" + health_emoji = "🟢" if report.health_score >= 80 else ("🟡" if report.health_score >= 60 else "🔴") + + lines = [ + f"{self.AI_DISCLAIMER}", + "", + "# AI Codebase Quality Report", + "", + f"## Health Score: {report.health_score:.0f}/100", + "", + report.summary, + "", + "---", + "", + "## Metrics", + "", + "| Metric | Value |", + "|--------|-------|", + f"| Total Files | {report.metrics.total_files} |", + f"| Total Lines | {report.metrics.total_lines:,} |", + f"| TODO Comments | {report.metrics.todo_count} |", + f"| FIXME Comments | {report.metrics.fixme_count} |", + f"| Deprecated | {report.metrics.deprecated_count} |", + "", + ] + + # Languages + if report.metrics.languages: + lines.append("### Languages") + lines.append("") + for lang, count in sorted( + report.metrics.languages.items(), key=lambda x: -x[1] + ): + lines.append(f"- **{lang}**: {count} files") + lines.append("") + + # Issues + if report.issues: + lines.append("## Issues Found") + lines.append("") + for issue in report.issues[:10]: + severity = issue.get("severity", "MEDIUM") + emoji = "🔴" if severity == "HIGH" else ("🟡" if severity == "MEDIUM" else "🟢") + lines.append(f"### [{severity}] {issue.get('category', 'General')}") + lines.append("") + lines.append(issue.get("description", "")) + lines.append("") + lines.append(f"**Recommendation:** {issue.get('recommendation', '')}") + lines.append("") + + # Recommendations + if report.recommendations: + lines.append("## Recommendations") + lines.append("") + for i, rec in enumerate(report.recommendations[:5], 1): + lines.append(f"{i}. {rec}") + lines.append("") + + # Architecture notes + if report.architecture_notes: + lines.append("## Architecture Notes") + lines.append("") + for note in report.architecture_notes[:5]: + lines.append(f"- {note}") + lines.append("") + + lines.append("---") + lines.append(f"*Generated by AI Codebase Agent*") + + return "\n".join(lines) diff --git a/tools/ai-review/agents/issue_agent.py b/tools/ai-review/agents/issue_agent.py new file mode 100644 index 0000000..361ba0c --- /dev/null +++ b/tools/ai-review/agents/issue_agent.py @@ -0,0 +1,392 @@ +"""Issue Review Agent + +AI agent for triaging, labeling, and responding to issues. +Handles issue.opened, issue.labeled, and issue_comment events. +""" + +import logging +from dataclasses import dataclass + +from agents.base_agent import AgentContext, AgentResult, BaseAgent + + +@dataclass +class TriageResult: + """Result of issue triage analysis.""" + + issue_type: str + priority: str + confidence: float + summary: str + suggested_labels: list[str] + is_duplicate: bool + duplicate_of: int | None + needs_more_info: bool + missing_info: list[str] + components: list[str] + reasoning: str + + +class IssueAgent(BaseAgent): + """Agent for handling issue events.""" + + # Marker specific to issue comments + ISSUE_AI_MARKER = "" + + def can_handle(self, event_type: str, event_data: dict) -> bool: + """Check if this agent handles the given event.""" + # Check if agent is enabled + agent_config = self.config.get("agents", {}).get("issue", {}) + if not agent_config.get("enabled", True): + return False + + # Handle issue events + if event_type == "issues": + action = event_data.get("action", "") + allowed_events = agent_config.get("events", ["opened", "labeled"]) + if action not in allowed_events: + return False + + # Ignore our own codebase reports to prevent double-commenting + issue = event_data.get("issue", {}) + title = issue.get("title", "") + labels = [l.get("name") for l in issue.get("labels", [])] + if "AI Codebase Report" in title or "ai-codebase-report" in labels: + return False + + return True + + # Handle issue comment events (for @mentions) + if event_type == "issue_comment": + action = event_data.get("action", "") + if action == "created": + comment_body = event_data.get("comment", {}).get("body", "") + mention_prefix = self.config.get("interaction", {}).get( + "mention_prefix", "@ai-bot" + ) + return mention_prefix in comment_body + + return False + + def execute(self, context: AgentContext) -> AgentResult: + """Execute the issue agent.""" + event_data = context.event_data + action = event_data.get("action", "") + + if context.event_type == "issues": + if action == "opened": + return self._handle_issue_opened(context) + elif action == "labeled": + return self._handle_issue_labeled(context) + + if context.event_type == "issue_comment": + return self._handle_issue_comment(context) + + return AgentResult( + success=False, + message=f"Unknown action: {action}", + ) + + def _handle_issue_opened(self, context: AgentContext) -> AgentResult: + """Handle a newly opened issue.""" + issue = context.event_data.get("issue", {}) + issue_index = issue.get("number") + title = issue.get("title", "") + body = issue.get("body", "") + author = issue.get("user", {}).get("login", "unknown") + existing_labels = [l.get("name", "") for l in issue.get("labels", [])] + + self.logger.info(f"Triaging issue #{issue_index}: {title}") + + # Step 1: Triage the issue + triage = self._triage_issue(title, body, author, existing_labels) + + actions_taken = [] + + # Step 2: Apply labels if auto-label is enabled + agent_config = self.config.get("agents", {}).get("issue", {}) + if agent_config.get("auto_label", True): + labels_applied = self._apply_labels( + context.owner, context.repo, issue_index, triage + ) + if labels_applied: + actions_taken.append(f"Applied labels: {labels_applied}") + + # Step 3: Post triage comment + comment = self._generate_triage_comment(triage, issue) + self.upsert_comment( + context.owner, + context.repo, + issue_index, + comment, + marker=self.ISSUE_AI_MARKER, + ) + actions_taken.append("Posted triage comment") + + return AgentResult( + success=True, + message=f"Triaged issue #{issue_index} as {triage.issue_type} ({triage.priority} priority)", + data={ + "triage": { + "type": triage.issue_type, + "priority": triage.priority, + "confidence": triage.confidence, + } + }, + actions_taken=actions_taken, + ) + + def _handle_issue_labeled(self, context: AgentContext) -> AgentResult: + """Handle label addition to an issue.""" + # Could be used for specific label-triggered actions + issue = context.event_data.get("issue", {}) + label = context.event_data.get("label", {}) + + return AgentResult( + success=True, + message=f"Noted label '{label.get('name')}' added to issue #{issue.get('number')}", + ) + + def _handle_issue_comment(self, context: AgentContext) -> AgentResult: + """Handle @mention in issue comment.""" + issue = context.event_data.get("issue", {}) + comment = context.event_data.get("comment", {}) + issue_index = issue.get("number") + comment_body = comment.get("body", "") + + # Parse command from mention + command = self._parse_command(comment_body) + + if command: + response = self._handle_command(context, issue, command) + self.gitea.create_issue_comment( + context.owner, context.repo, issue_index, response + ) + return AgentResult( + success=True, + message=f"Responded to command: {command}", + actions_taken=["Posted command response"], + ) + + return AgentResult( + success=True, + message="No actionable command found in mention", + ) + + def _triage_issue( + self, + title: str, + body: str, + author: str, + existing_labels: list[str], + ) -> TriageResult: + """Use LLM to triage the issue.""" + prompt_template = self.load_prompt("issue_triage") + prompt = prompt_template.format( + title=title, + body=body or "(no description provided)", + author=author, + existing_labels=", ".join(existing_labels) if existing_labels else "none", + ) + + try: + result = self.call_llm_json(prompt) + return TriageResult( + issue_type=result.get("type", "question"), + priority=result.get("priority", "medium"), + confidence=result.get("confidence", 0.5), + summary=result.get("summary", title), + suggested_labels=result.get("suggested_labels", []), + is_duplicate=result.get("is_duplicate", False), + duplicate_of=result.get("duplicate_of"), + needs_more_info=result.get("needs_more_info", False), + missing_info=result.get("missing_info", []), + components=result.get("components", []), + reasoning=result.get("reasoning", ""), + ) + except Exception as e: + self.logger.warning(f"LLM triage failed: {e}") + # Return default triage on failure + return TriageResult( + issue_type="question", + priority="medium", + confidence=0.3, + summary=title, + suggested_labels=[], + is_duplicate=False, + duplicate_of=None, + needs_more_info=True, + missing_info=["Unable to parse issue automatically"], + components=[], + reasoning="Automatic triage failed, needs human review", + ) + + def _apply_labels( + self, + owner: str, + repo: str, + issue_index: int, + triage: TriageResult, + ) -> list[str]: + """Apply labels based on triage result.""" + labels_config = self.config.get("labels", {}) + + # Get all repo labels + try: + repo_labels = self.gitea.get_repo_labels(owner, repo) + label_map = {l["name"]: l["id"] for l in repo_labels} + except Exception as e: + self.logger.warning(f"Failed to get repo labels: {e}") + return [] + + labels_to_add = [] + + # Map priority + priority_labels = labels_config.get("priority", {}) + priority_label = priority_labels.get(triage.priority) + if priority_label and priority_label in label_map: + labels_to_add.append(label_map[priority_label]) + + # Map type + type_labels = labels_config.get("type", {}) + type_label = type_labels.get(triage.issue_type) + if type_label and type_label in label_map: + labels_to_add.append(label_map[type_label]) + + # Add AI reviewed label + status_labels = labels_config.get("status", {}) + reviewed_label = status_labels.get("ai_reviewed") + if reviewed_label and reviewed_label in label_map: + labels_to_add.append(label_map[reviewed_label]) + + if labels_to_add: + try: + self.gitea.add_issue_labels(owner, repo, issue_index, labels_to_add) + return [ + name for name, id in label_map.items() if id in labels_to_add + ] + except Exception as e: + self.logger.warning(f"Failed to add labels: {e}") + + return [] + + def _generate_triage_comment(self, triage: TriageResult, issue: dict) -> str: + """Generate a triage summary comment.""" + lines = [ + f"{self.AI_DISCLAIMER}", + "", + "## AI Issue Triage", + "", + f"| Field | Value |", + f"|-------|--------|", + f"| **Type** | {triage.issue_type.capitalize()} |", + f"| **Priority** | {triage.priority.capitalize()} |", + f"| **Confidence** | {triage.confidence:.0%} |", + "", + ] + + if triage.summary != issue.get("title"): + lines.append(f"**Summary:** {triage.summary}") + lines.append("") + + if triage.components: + lines.append(f"**Components:** {', '.join(triage.components)}") + lines.append("") + + if triage.needs_more_info and triage.missing_info: + lines.append("### Additional Information Needed") + lines.append("") + for info in triage.missing_info: + lines.append(f"- {info}") + lines.append("") + + if triage.is_duplicate and triage.duplicate_of: + lines.append(f"### Possible Duplicate") + lines.append(f"This issue may be a duplicate of #{triage.duplicate_of}") + lines.append("") + + lines.append("---") + lines.append(f"*{triage.reasoning}*") + + return "\n".join(lines) + + def _parse_command(self, body: str) -> str | None: + """Parse a command from a comment body.""" + mention_prefix = self.config.get("interaction", {}).get( + "mention_prefix", "@ai-bot" + ) + commands = self.config.get("interaction", {}).get( + "commands", ["explain", "suggest", "security", "summarize"] + ) + + for command in commands: + if f"{mention_prefix} {command}" in body.lower(): + return command + + return None + + def _handle_command(self, context: AgentContext, issue: dict, command: str) -> str: + """Handle a command from an @mention.""" + title = issue.get("title", "") + body = issue.get("body", "") + + if command == "summarize": + return self._command_summarize(title, body) + elif command == "explain": + return self._command_explain(title, body) + elif command == "suggest": + return self._command_suggest(title, body) + + return f"{self.AI_DISCLAIMER}\n\nSorry, I don't understand the command `{command}`." + + def _command_summarize(self, title: str, body: str) -> str: + """Generate a summary of the issue.""" + prompt = f"""Summarize the following issue in 2-3 concise sentences: + +Title: {title} +Body: {body} + +Provide only the summary, no additional formatting.""" + + try: + response = self.call_llm(prompt) + return f"{self.AI_DISCLAIMER}\n\n**Summary:**\n{response.content}" + except Exception as e: + return f"{self.AI_DISCLAIMER}\n\nSorry, I was unable to generate a summary. Error: {e}" + + def _command_explain(self, title: str, body: str) -> str: + """Explain the issue in more detail.""" + prompt = f"""Analyze this issue and provide a clear explanation of what the user is asking for or reporting: + +Title: {title} +Body: {body} + +Provide: +1. What the issue is about +2. What the user expects +3. Any technical context that might be relevant + +Be concise and helpful.""" + + try: + response = self.call_llm(prompt) + return f"{self.AI_DISCLAIMER}\n\n**Explanation:**\n{response.content}" + except Exception as e: + return f"{self.AI_DISCLAIMER}\n\nSorry, I was unable to explain this issue. Error: {e}" + + def _command_suggest(self, title: str, body: str) -> str: + """Suggest solutions for the issue.""" + prompt = f"""Based on this issue, suggest potential solutions or next steps: + +Title: {title} +Body: {body} + +Provide 2-3 actionable suggestions. If this is a bug, suggest debugging steps. If this is a feature request, suggest implementation approaches. + +Be practical and concise.""" + + try: + response = self.call_llm(prompt) + return f"{self.AI_DISCLAIMER}\n\n**Suggestions:**\n{response.content}" + except Exception as e: + return f"{self.AI_DISCLAIMER}\n\nSorry, I was unable to generate suggestions. Error: {e}" diff --git a/tools/ai-review/agents/pr_agent.py b/tools/ai-review/agents/pr_agent.py new file mode 100644 index 0000000..a2b1f43 --- /dev/null +++ b/tools/ai-review/agents/pr_agent.py @@ -0,0 +1,436 @@ +"""Pull Request Review Agent + +Enhanced AI agent for comprehensive PR reviews with inline comments, +security scanning, and automatic label management. +""" + +import re +from dataclasses import dataclass, field + +from agents.base_agent import AgentContext, AgentResult, BaseAgent + + +@dataclass +class ReviewIssue: + """A single issue found in the PR.""" + + file: str + line: int | None + severity: str # HIGH, MEDIUM, LOW + category: str # Security, Correctness, Performance, etc. + description: str + recommendation: str + code_snippet: str | None = None + + +@dataclass +class PRReviewResult: + """Result of a PR review.""" + + summary: str + issues: list[ReviewIssue] + overall_severity: str + approval: bool + security_issues: list[ReviewIssue] = field(default_factory=list) + + +class PRAgent(BaseAgent): + """Agent for handling pull request reviews.""" + + # Marker specific to PR reviews + PR_AI_MARKER = "" + + def can_handle(self, event_type: str, event_data: dict) -> bool: + """Check if this agent handles the given event.""" + # Check if agent is enabled + agent_config = self.config.get("agents", {}).get("pr", {}) + if not agent_config.get("enabled", True): + return False + + if event_type == "pull_request": + action = event_data.get("action", "") + allowed_events = agent_config.get("events", ["opened", "synchronize"]) + return action in allowed_events + + return False + + def execute(self, context: AgentContext) -> AgentResult: + """Execute the PR review agent.""" + pr = context.event_data.get("pull_request", {}) + pr_number = pr.get("number") + + self.logger.info(f"Reviewing PR #{pr_number}: {pr.get('title')}") + + actions_taken = [] + + # Step 1: Get PR diff + diff = self._get_diff(context.owner, context.repo, pr_number) + if not diff.strip(): + return AgentResult( + success=True, + message="PR has no changes to review", + ) + + # Step 2: Parse changed files + changed_files = self._parse_diff_files(diff) + + # Step 3: Run security scan if enabled + security_issues = [] + agent_config = self.config.get("agents", {}).get("pr", {}) + if agent_config.get("security_scan", True): + security_issues = self._run_security_scan(changed_files, diff) + if security_issues: + actions_taken.append(f"Found {len(security_issues)} security issues") + + # Step 4: Run AI review + review_result = self._run_ai_review(diff, context, security_issues) + + # Step 5: Post inline comments if enabled + if agent_config.get("inline_comments", True) and review_result.issues: + inline_count = self._post_inline_comments( + context.owner, context.repo, pr_number, review_result + ) + actions_taken.append(f"Posted {inline_count} inline comments") + + # Step 6: Post summary comment + summary_comment = self._generate_summary_comment(review_result) + self.upsert_comment( + context.owner, + context.repo, + pr_number, + summary_comment, + marker=self.PR_AI_MARKER, + ) + actions_taken.append("Posted summary comment") + + # Step 7: Apply labels + labels_applied = self._apply_review_labels( + context.owner, context.repo, pr_number, review_result + ) + if labels_applied: + actions_taken.append(f"Applied labels: {labels_applied}") + + return AgentResult( + success=True, + message=f"Reviewed PR #{pr_number}: {review_result.overall_severity} severity", + data={ + "severity": review_result.overall_severity, + "approval": review_result.approval, + "issues_count": len(review_result.issues), + "security_issues_count": len(review_result.security_issues), + }, + actions_taken=actions_taken, + ) + + def _get_diff(self, owner: str, repo: str, pr_number: int) -> str: + """Get the PR diff, truncated if necessary.""" + max_lines = self.config.get("review", {}).get("max_diff_lines", 800) + + try: + diff = self.gitea.get_pull_request_diff(owner, repo, pr_number) + lines = diff.splitlines() + if len(lines) > max_lines: + return "\n".join(lines[:max_lines]) + return diff + except Exception as e: + self.logger.error(f"Failed to get diff: {e}") + return "" + + def _parse_diff_files(self, diff: str) -> dict[str, str]: + """Parse diff into file -> content mapping.""" + files = {} + current_file = None + current_content = [] + + for line in diff.splitlines(): + if line.startswith("diff --git"): + if current_file: + files[current_file] = "\n".join(current_content) + # Extract file path from "diff --git a/path b/path" + match = re.search(r"b/(.+)$", line) + if match: + current_file = match.group(1) + current_content = [] + elif current_file: + current_content.append(line) + + if current_file: + files[current_file] = "\n".join(current_content) + + return files + + def _run_security_scan( + self, changed_files: dict[str, str], diff: str + ) -> list[ReviewIssue]: + """Run security pattern scanning on the diff.""" + issues = [] + + # Security patterns to detect + patterns = [ + { + "name": "Hardcoded Secrets", + "pattern": r'(?i)(api_key|apikey|secret|password|token|auth)\s*[=:]\s*["\'][^"\']{8,}["\']', + "severity": "HIGH", + "category": "Security", + "description": "Potential hardcoded secret or API key detected", + "recommendation": "Move secrets to environment variables or a secrets manager", + }, + { + "name": "SQL Injection", + "pattern": r'(?i)(execute|query)\s*\([^)]*\+[^)]*\)|f["\'].*\{.*\}.*(?:SELECT|INSERT|UPDATE|DELETE)', + "severity": "HIGH", + "category": "Security", + "description": "Potential SQL injection vulnerability - string concatenation in query", + "recommendation": "Use parameterized queries or prepared statements", + }, + { + "name": "Hardcoded IP", + "pattern": r'\b(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b', + "severity": "LOW", + "category": "Security", + "description": "Hardcoded IP address detected", + "recommendation": "Consider using configuration or DNS names instead", + }, + { + "name": "Eval Usage", + "pattern": r'\beval\s*\(', + "severity": "HIGH", + "category": "Security", + "description": "Use of eval() detected - potential code injection risk", + "recommendation": "Avoid eval() - use safer alternatives like ast.literal_eval() for Python", + }, + { + "name": "Shell Injection", + "pattern": r'(?i)(?:subprocess\.call|os\.system|shell\s*=\s*True)', + "severity": "MEDIUM", + "category": "Security", + "description": "Potential shell command execution - verify input is sanitized", + "recommendation": "Use subprocess with shell=False and pass arguments as a list", + }, + ] + + for filename, content in changed_files.items(): + # Only check added lines (starting with +) + added_lines = [] + line_numbers = [] + current_line = 0 + + for line in content.splitlines(): + if line.startswith("@@"): + # Parse line number from @@ -x,y +a,b @@ + match = re.search(r"\+(\d+)", line) + if match: + current_line = int(match.group(1)) - 1 + elif line.startswith("+") and not line.startswith("+++"): + current_line += 1 + added_lines.append((current_line, line[1:])) + elif not line.startswith("-"): + current_line += 1 + + # Check patterns on added lines + for line_num, line_content in added_lines: + for pattern_def in patterns: + if re.search(pattern_def["pattern"], line_content): + issues.append( + ReviewIssue( + file=filename, + line=line_num, + severity=pattern_def["severity"], + category=pattern_def["category"], + description=pattern_def["description"], + recommendation=pattern_def["recommendation"], + code_snippet=line_content.strip()[:100], + ) + ) + + return issues + + def _run_ai_review( + self, + diff: str, + context: AgentContext, + security_issues: list[ReviewIssue], + ) -> PRReviewResult: + """Run AI-based code review.""" + prompt_template = self.load_prompt("base") + + # Add security context if issues were found + security_context = "" + if security_issues: + security_context = "\n\nSECURITY SCAN RESULTS (already detected):\n" + for issue in security_issues[:5]: # Limit to first 5 + security_context += f"- [{issue.severity}] {issue.file}:{issue.line} - {issue.description}\n" + + prompt = f"{prompt_template}\n{security_context}\nDIFF:\n{diff}" + + try: + result = self.call_llm_json(prompt) + + issues = [] + for issue_data in result.get("issues", []): + issues.append( + ReviewIssue( + file=issue_data.get("file", "unknown"), + line=issue_data.get("line"), + severity=issue_data.get("severity", "MEDIUM"), + category=issue_data.get("category", "General"), + description=issue_data.get("description", ""), + recommendation=issue_data.get("recommendation", ""), + code_snippet=issue_data.get("code_snippet"), + ) + ) + + return PRReviewResult( + summary=result.get("summary", "Review completed"), + issues=issues, + overall_severity=result.get("overall_severity", "LOW"), + approval=result.get("approval", True), + security_issues=security_issues, + ) + + except Exception as e: + self.logger.error(f"AI review failed: {e}") + return PRReviewResult( + summary=f"AI review encountered an error: {e}", + issues=[], + overall_severity="UNKNOWN", + approval=False, + security_issues=security_issues, + ) + + def _post_inline_comments( + self, + owner: str, + repo: str, + pr_number: int, + review: PRReviewResult, + ) -> int: + """Post inline comments for issues with line numbers.""" + comments = [] + + all_issues = review.issues + review.security_issues + for issue in all_issues: + if issue.line and issue.file: + comment_body = ( + f"**[{issue.severity}] {issue.category}**\n\n" + f"{issue.description}\n\n" + f"**Recommendation:** {issue.recommendation}" + ) + comments.append( + { + "path": issue.file, + "line": issue.line, + "body": comment_body, + } + ) + + if not comments: + return 0 + + try: + # Use Gitea's pull request review API for inline comments + self.gitea.create_pull_request_review( + owner=owner, + repo=repo, + index=pr_number, + body="AI Code Review - Inline Comments", + event="COMMENT", + comments=comments[:10], # Limit to 10 inline comments + ) + return min(len(comments), 10) + except Exception as e: + self.logger.warning(f"Failed to post inline comments: {e}") + return 0 + + def _generate_summary_comment(self, review: PRReviewResult) -> str: + """Generate the summary comment for the PR.""" + lines = [ + f"{self.AI_DISCLAIMER}", + "", + "## AI Code Review", + "", + review.summary, + "", + ] + + # Statistics + all_issues = review.issues + review.security_issues + high = sum(1 for i in all_issues if i.severity == "HIGH") + medium = sum(1 for i in all_issues if i.severity == "MEDIUM") + low = sum(1 for i in all_issues if i.severity == "LOW") + + lines.append("### Summary") + lines.append("") + lines.append(f"| Severity | Count |") + lines.append(f"|----------|-------|") + lines.append(f"| HIGH | {high} |") + lines.append(f"| MEDIUM | {medium} |") + lines.append(f"| LOW | {low} |") + lines.append("") + + # Security issues section + if review.security_issues: + lines.append("### Security Issues") + lines.append("") + for issue in review.security_issues[:5]: + lines.append(f"- **[{issue.severity}]** `{issue.file}:{issue.line}` - {issue.description}") + lines.append("") + + # Other issues (limit display) + other_issues = [i for i in review.issues if i not in review.security_issues] + if other_issues: + lines.append("### Review Findings") + lines.append("") + for issue in other_issues[:10]: + loc = f"`{issue.file}:{issue.line}`" if issue.line else f"`{issue.file}`" + lines.append(f"- **[{issue.severity}]** {loc} - {issue.description}") + if len(other_issues) > 10: + lines.append(f"- ...and {len(other_issues) - 10} more issues") + lines.append("") + + # Verdict + lines.append("---") + lines.append(f"**Overall Severity:** `{review.overall_severity}`") + if review.approval: + lines.append("**AI Recommendation:** Approve") + else: + lines.append("**AI Recommendation:** Changes Requested") + + return "\n".join(lines) + + def _apply_review_labels( + self, + owner: str, + repo: str, + pr_number: int, + review: PRReviewResult, + ) -> list[str]: + """Apply labels based on review result.""" + labels_config = self.config.get("labels", {}).get("status", {}) + + try: + repo_labels = self.gitea.get_repo_labels(owner, repo) + label_map = {l["name"]: l["id"] for l in repo_labels} + except Exception as e: + self.logger.warning(f"Failed to get repo labels: {e}") + return [] + + labels_to_add = [] + + # Add approval/changes required label + if review.approval: + label_name = labels_config.get("ai_approved", "ai-approved") + else: + label_name = labels_config.get("ai_changes_required", "ai-changes-required") + + if label_name in label_map: + labels_to_add.append(label_map[label_name]) + + if labels_to_add: + try: + self.gitea.add_issue_labels(owner, repo, pr_number, labels_to_add) + return [name for name, id in label_map.items() if id in labels_to_add] + except Exception as e: + self.logger.warning(f"Failed to add labels: {e}") + + return [] diff --git a/tools/ai-review/clients/__init__.py b/tools/ai-review/clients/__init__.py new file mode 100644 index 0000000..2ac02ed --- /dev/null +++ b/tools/ai-review/clients/__init__.py @@ -0,0 +1,10 @@ +"""API Clients Package + +This package contains client wrappers for external services +like Gitea API and LLM providers. +""" + +from clients.gitea_client import GiteaClient +from clients.llm_client import LLMClient + +__all__ = ["GiteaClient", "LLMClient"] diff --git a/tools/ai-review/clients/gitea_client.py b/tools/ai-review/clients/gitea_client.py new file mode 100644 index 0000000..14c6219 --- /dev/null +++ b/tools/ai-review/clients/gitea_client.py @@ -0,0 +1,447 @@ +"""Gitea API Client + +A unified client for interacting with the Gitea REST API. +Provides methods for issues, pull requests, comments, and repository operations. +""" + +import os +from typing import Any + +import requests + + +class GiteaClient: + """Client for Gitea API operations.""" + + def __init__( + self, + api_url: str | None = None, + token: str | None = None, + timeout: int = 30, + ): + """Initialize the Gitea client. + + Args: + api_url: Gitea API base URL. Defaults to AI_REVIEW_API_URL env var. + token: API token. Defaults to AI_REVIEW_TOKEN env var. + timeout: Request timeout in seconds. + """ + self.api_url = api_url or os.environ.get("AI_REVIEW_API_URL", "") + self.token = token or os.environ.get("AI_REVIEW_TOKEN", "") + self.timeout = timeout + + if not self.api_url: + raise ValueError("Gitea API URL is required") + if not self.token: + raise ValueError("Gitea API token is required") + + self.headers = { + "Authorization": f"token {self.token}", + "Content-Type": "application/json", + "Accept": "application/json", + } + + def _request( + self, + method: str, + endpoint: str, + json: dict | None = None, + params: dict | None = None, + ) -> dict | list: + """Make an API request. + + Args: + method: HTTP method (GET, POST, PATCH, DELETE). + endpoint: API endpoint (without base URL). + json: Request body for POST/PATCH. + params: Query parameters. + + Returns: + Response JSON data. + + Raises: + requests.HTTPError: If the request fails. + """ + url = f"{self.api_url}{endpoint}" + response = requests.request( + method=method, + url=url, + headers=self.headers, + json=json, + params=params, + timeout=self.timeout, + ) + response.raise_for_status() + + if response.status_code == 204: + return {} + return response.json() + + # ------------------------------------------------------------------------- + # Issue Operations + # ------------------------------------------------------------------------- + + def create_issue( + self, + owner: str, + repo: str, + title: str, + body: str, + labels: list[int] | None = None, + ) -> dict: + """Create a new issue. + + Args: + owner: Repository owner. + repo: Repository name. + title: Issue title. + body: Issue body. + labels: Optional list of label IDs. + + Returns: + Created issue object. + """ + payload = { + "title": title, + "body": body, + } + if labels: + payload["labels"] = labels + + return self._request( + "POST", + f"/repos/{owner}/{repo}/issues", + json=payload, + ) + + def update_issue( + self, + owner: str, + repo: str, + index: int, + title: str | None = None, + body: str | None = None, + state: str | None = None, + ) -> dict: + """Update an existing issue. + + Args: + owner: Repository owner. + repo: Repository name. + index: Issue number. + title: New title. + body: New body. + state: New state (open, closed). + + Returns: + Updated issue object. + """ + payload = {} + if title: + payload["title"] = title + if body: + payload["body"] = body + if state: + payload["state"] = state + + return self._request( + "PATCH", + f"/repos/{owner}/{repo}/issues/{index}", + json=payload, + ) + + def list_issues( + self, + owner: str, + repo: str, + state: str = "open", + labels: list[str] | None = None, + page: int = 1, + limit: int = 30, + ) -> list[dict]: + """List issues in a repository. + + Args: + owner: Repository owner. + repo: Repository name. + state: Issue state (open, closed, all). + labels: Filter by labels. + page: Page number. + limit: Items per page. + + Returns: + List of issue objects. + """ + params = { + "state": state, + "page": page, + "limit": limit, + } + if labels: + params["labels"] = ",".join(labels) + + return self._request("GET", f"/repos/{owner}/{repo}/issues", params=params) + + def get_issue(self, owner: str, repo: str, index: int) -> dict: + """Get a single issue. + + Args: + owner: Repository owner. + repo: Repository name. + index: Issue number. + + Returns: + Issue object. + """ + return self._request("GET", f"/repos/{owner}/{repo}/issues/{index}") + + def create_issue_comment( + self, + owner: str, + repo: str, + index: int, + body: str, + ) -> dict: + """Create a comment on an issue. + + Args: + owner: Repository owner. + repo: Repository name. + index: Issue number. + body: Comment body. + + Returns: + Created comment object. + """ + return self._request( + "POST", + f"/repos/{owner}/{repo}/issues/{index}/comments", + json={"body": body}, + ) + + def update_issue_comment( + self, + owner: str, + repo: str, + comment_id: int, + body: str, + ) -> dict: + """Update an existing comment. + + Args: + owner: Repository owner. + repo: Repository name. + comment_id: Comment ID. + body: Updated comment body. + + Returns: + Updated comment object. + """ + return self._request( + "PATCH", + f"/repos/{owner}/{repo}/issues/comments/{comment_id}", + json={"body": body}, + ) + + def list_issue_comments( + self, + owner: str, + repo: str, + index: int, + ) -> list[dict]: + """List comments on an issue. + + Args: + owner: Repository owner. + repo: Repository name. + index: Issue number. + + Returns: + List of comment objects. + """ + return self._request("GET", f"/repos/{owner}/{repo}/issues/{index}/comments") + + def add_issue_labels( + self, + owner: str, + repo: str, + index: int, + labels: list[int], + ) -> list[dict]: + """Add labels to an issue. + + Args: + owner: Repository owner. + repo: Repository name. + index: Issue number. + labels: List of label IDs to add. + + Returns: + List of label objects. + """ + return self._request( + "POST", + f"/repos/{owner}/{repo}/issues/{index}/labels", + json={"labels": labels}, + ) + + def get_repo_labels(self, owner: str, repo: str) -> list[dict]: + """Get all labels for a repository. + + Args: + owner: Repository owner. + repo: Repository name. + + Returns: + List of label objects. + """ + return self._request("GET", f"/repos/{owner}/{repo}/labels") + + # ------------------------------------------------------------------------- + # Pull Request Operations + # ------------------------------------------------------------------------- + + def get_pull_request(self, owner: str, repo: str, index: int) -> dict: + """Get a pull request. + + Args: + owner: Repository owner. + repo: Repository name. + index: PR number. + + Returns: + Pull request object. + """ + return self._request("GET", f"/repos/{owner}/{repo}/pulls/{index}") + + def get_pull_request_diff(self, owner: str, repo: str, index: int) -> str: + """Get the diff for a pull request. + + Args: + owner: Repository owner. + repo: Repository name. + index: PR number. + + Returns: + Diff text. + """ + url = f"{self.api_url}/repos/{owner}/{repo}/pulls/{index}.diff" + response = requests.get( + url, + headers={ + "Authorization": f"token {self.token}", + "Accept": "text/plain", + }, + timeout=self.timeout, + ) + response.raise_for_status() + return response.text + + def list_pull_request_files( + self, + owner: str, + repo: str, + index: int, + ) -> list[dict]: + """List files changed in a pull request. + + Args: + owner: Repository owner. + repo: Repository name. + index: PR number. + + Returns: + List of changed file objects. + """ + return self._request("GET", f"/repos/{owner}/{repo}/pulls/{index}/files") + + def create_pull_request_review( + self, + owner: str, + repo: str, + index: int, + body: str, + event: str = "COMMENT", + comments: list[dict] | None = None, + ) -> dict: + """Create a review on a pull request. + + Args: + owner: Repository owner. + repo: Repository name. + index: PR number. + body: Review body. + event: Review event (APPROVE, REQUEST_CHANGES, COMMENT). + comments: List of inline comments. + + Returns: + Created review object. + """ + payload: dict[str, Any] = { + "body": body, + "event": event, + } + if comments: + payload["comments"] = comments + + return self._request( + "POST", + f"/repos/{owner}/{repo}/pulls/{index}/reviews", + json=payload, + ) + + # ------------------------------------------------------------------------- + # Repository Operations + # ------------------------------------------------------------------------- + + def get_repository(self, owner: str, repo: str) -> dict: + """Get repository information. + + Args: + owner: Repository owner. + repo: Repository name. + + Returns: + Repository object. + """ + return self._request("GET", f"/repos/{owner}/{repo}") + + def get_file_contents( + self, + owner: str, + repo: str, + filepath: str, + ref: str | None = None, + ) -> dict: + """Get file contents from a repository. + + Args: + owner: Repository owner. + repo: Repository name. + filepath: Path to file. + ref: Git ref (branch, tag, commit). + + Returns: + File content object with base64-encoded content. + """ + params = {} + if ref: + params["ref"] = ref + return self._request( + "GET", + f"/repos/{owner}/{repo}/contents/{filepath}", + params=params, + ) + + def get_branch(self, owner: str, repo: str, branch: str) -> dict: + """Get branch information. + + Args: + owner: Repository owner. + repo: Repository name. + branch: Branch name. + + Returns: + Branch object. + """ + return self._request("GET", f"/repos/{owner}/{repo}/branches/{branch}") diff --git a/tools/ai-review/clients/llm_client.py b/tools/ai-review/clients/llm_client.py new file mode 100644 index 0000000..d220be7 --- /dev/null +++ b/tools/ai-review/clients/llm_client.py @@ -0,0 +1,482 @@ +"""LLM Client + +A unified client for interacting with multiple LLM providers. +Supports OpenAI, OpenRouter, Ollama, and extensible for more providers. +""" + +import json +import os +from abc import ABC, abstractmethod +from dataclasses import dataclass + +import requests + + +@dataclass +class ToolCall: + """Represents a tool call from the LLM.""" + + id: str + name: str + arguments: dict + + +@dataclass +class LLMResponse: + """Response from an LLM call.""" + + content: str + model: str + provider: str + tokens_used: int | None = None + finish_reason: str | None = None + tool_calls: list[ToolCall] | None = None + + +class BaseLLMProvider(ABC): + """Abstract base class for LLM providers.""" + + @abstractmethod + def call(self, prompt: str, **kwargs) -> LLMResponse: + """Make a call to the LLM. + + Args: + prompt: The prompt to send. + **kwargs: Provider-specific options. + + Returns: + LLMResponse with the generated content. + """ + pass + + def call_with_tools( + self, + messages: list[dict], + tools: list[dict] | None = None, + **kwargs, + ) -> LLMResponse: + """Make a call to the LLM with tool/function calling support. + + Args: + messages: List of message dicts with 'role' and 'content'. + tools: List of tool definitions in OpenAI format. + **kwargs: Provider-specific options. + + Returns: + LLMResponse with content and/or tool_calls. + """ + raise NotImplementedError("Tool calling not supported by this provider") + + +class OpenAIProvider(BaseLLMProvider): + """OpenAI API provider.""" + + def __init__( + self, + api_key: str | None = None, + model: str = "gpt-4o-mini", + temperature: float = 0, + max_tokens: int = 4096, + ): + self.api_key = api_key or os.environ.get("OPENAI_API_KEY", "") + self.model = model + self.temperature = temperature + self.max_tokens = max_tokens + self.api_url = "https://api.openai.com/v1/chat/completions" + + def call(self, prompt: str, **kwargs) -> LLMResponse: + """Call OpenAI API.""" + if not self.api_key: + raise ValueError("OpenAI API key is required") + + response = requests.post( + self.api_url, + headers={ + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + }, + json={ + "model": kwargs.get("model", self.model), + "temperature": kwargs.get("temperature", self.temperature), + "max_tokens": kwargs.get("max_tokens", self.max_tokens), + "messages": [{"role": "user", "content": prompt}], + }, + timeout=120, + ) + response.raise_for_status() + data = response.json() + + choice = data["choices"][0] + usage = data.get("usage", {}) + + return LLMResponse( + content=choice["message"]["content"], + model=data["model"], + provider="openai", + tokens_used=usage.get("total_tokens"), + finish_reason=choice.get("finish_reason"), + ) + + def call_with_tools( + self, + messages: list[dict], + tools: list[dict] | None = None, + **kwargs, + ) -> LLMResponse: + """Call OpenAI API with tool support.""" + if not self.api_key: + raise ValueError("OpenAI API key is required") + + request_body = { + "model": kwargs.get("model", self.model), + "temperature": kwargs.get("temperature", self.temperature), + "max_tokens": kwargs.get("max_tokens", self.max_tokens), + "messages": messages, + } + + if tools: + request_body["tools"] = tools + request_body["tool_choice"] = kwargs.get("tool_choice", "auto") + + response = requests.post( + self.api_url, + headers={ + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + }, + json=request_body, + timeout=120, + ) + response.raise_for_status() + data = response.json() + + choice = data["choices"][0] + usage = data.get("usage", {}) + message = choice["message"] + + # Parse tool calls if present + tool_calls = None + if message.get("tool_calls"): + tool_calls = [] + for tc in message["tool_calls"]: + tool_calls.append( + ToolCall( + id=tc["id"], + name=tc["function"]["name"], + arguments=json.loads(tc["function"]["arguments"]), + ) + ) + + return LLMResponse( + content=message.get("content") or "", + model=data["model"], + provider="openai", + tokens_used=usage.get("total_tokens"), + finish_reason=choice.get("finish_reason"), + tool_calls=tool_calls, + ) + + +class OpenRouterProvider(BaseLLMProvider): + """OpenRouter API provider.""" + + def __init__( + self, + api_key: str | None = None, + model: str = "anthropic/claude-3.5-sonnet", + temperature: float = 0, + max_tokens: int = 4096, + ): + self.api_key = api_key or os.environ.get("OPENROUTER_API_KEY", "") + self.model = model + self.temperature = temperature + self.max_tokens = max_tokens + self.api_url = "https://openrouter.ai/api/v1/chat/completions" + + def call(self, prompt: str, **kwargs) -> LLMResponse: + """Call OpenRouter API.""" + if not self.api_key: + raise ValueError("OpenRouter API key is required") + + response = requests.post( + self.api_url, + headers={ + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + }, + json={ + "model": kwargs.get("model", self.model), + "temperature": kwargs.get("temperature", self.temperature), + "max_tokens": kwargs.get("max_tokens", self.max_tokens), + "messages": [{"role": "user", "content": prompt}], + }, + timeout=120, + ) + response.raise_for_status() + data = response.json() + + choice = data["choices"][0] + usage = data.get("usage", {}) + + return LLMResponse( + content=choice["message"]["content"], + model=data.get("model", self.model), + provider="openrouter", + tokens_used=usage.get("total_tokens"), + finish_reason=choice.get("finish_reason"), + ) + + def call_with_tools( + self, + messages: list[dict], + tools: list[dict] | None = None, + **kwargs, + ) -> LLMResponse: + """Call OpenRouter API with tool support.""" + if not self.api_key: + raise ValueError("OpenRouter API key is required") + + request_body = { + "model": kwargs.get("model", self.model), + "temperature": kwargs.get("temperature", self.temperature), + "max_tokens": kwargs.get("max_tokens", self.max_tokens), + "messages": messages, + } + + if tools: + request_body["tools"] = tools + request_body["tool_choice"] = kwargs.get("tool_choice", "auto") + + response = requests.post( + self.api_url, + headers={ + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + }, + json=request_body, + timeout=120, + ) + response.raise_for_status() + data = response.json() + + choice = data["choices"][0] + usage = data.get("usage", {}) + message = choice["message"] + + # Parse tool calls if present + tool_calls = None + if message.get("tool_calls"): + tool_calls = [] + for tc in message["tool_calls"]: + tool_calls.append( + ToolCall( + id=tc["id"], + name=tc["function"]["name"], + arguments=json.loads(tc["function"]["arguments"]), + ) + ) + + return LLMResponse( + content=message.get("content") or "", + model=data.get("model", self.model), + provider="openrouter", + tokens_used=usage.get("total_tokens"), + finish_reason=choice.get("finish_reason"), + tool_calls=tool_calls, + ) + + +class OllamaProvider(BaseLLMProvider): + """Ollama (self-hosted) provider.""" + + def __init__( + self, + host: str | None = None, + model: str = "codellama:13b", + temperature: float = 0, + ): + self.host = host or os.environ.get("OLLAMA_HOST", "http://localhost:11434") + self.model = model + self.temperature = temperature + + def call(self, prompt: str, **kwargs) -> LLMResponse: + """Call Ollama API.""" + response = requests.post( + f"{self.host}/api/generate", + json={ + "model": kwargs.get("model", self.model), + "prompt": prompt, + "stream": False, + "options": { + "temperature": kwargs.get("temperature", self.temperature), + }, + }, + timeout=300, # Longer timeout for local models + ) + response.raise_for_status() + data = response.json() + + return LLMResponse( + content=data["response"], + model=data.get("model", self.model), + provider="ollama", + tokens_used=data.get("eval_count"), + finish_reason="stop" if data.get("done") else None, + ) + + +class LLMClient: + """Unified LLM client supporting multiple providers.""" + + PROVIDERS = { + "openai": OpenAIProvider, + "openrouter": OpenRouterProvider, + "ollama": OllamaProvider, + } + + def __init__( + self, + provider: str = "openai", + config: dict | None = None, + ): + """Initialize the LLM client. + + Args: + provider: Provider name (openai, openrouter, ollama). + config: Provider-specific configuration. + """ + if provider not in self.PROVIDERS: + raise ValueError(f"Unknown provider: {provider}. Available: {list(self.PROVIDERS.keys())}") + + self.provider_name = provider + self.config = config or {} + self._provider = self.PROVIDERS[provider](**self.config) + + def call(self, prompt: str, **kwargs) -> LLMResponse: + """Make a call to the configured LLM provider. + + Args: + prompt: The prompt to send. + **kwargs: Provider-specific options. + + Returns: + LLMResponse with the generated content. + """ + return self._provider.call(prompt, **kwargs) + + def call_with_tools( + self, + messages: list[dict], + tools: list[dict] | None = None, + **kwargs, + ) -> LLMResponse: + """Make a call with tool/function calling support. + + Args: + messages: List of message dicts with 'role' and 'content'. + tools: List of tool definitions in OpenAI format. + **kwargs: Provider-specific options. + + Returns: + LLMResponse with content and/or tool_calls. + """ + return self._provider.call_with_tools(messages, tools, **kwargs) + + def call_json(self, prompt: str, **kwargs) -> dict: + """Make a call and parse the response as JSON. + + Args: + prompt: The prompt to send (should request JSON output). + **kwargs: Provider-specific options. + + Returns: + Parsed JSON response. + + Raises: + json.JSONDecodeError: If response is not valid JSON. + """ + response = self.call(prompt, **kwargs) + content = response.content.strip() + + return self._extract_json(content) + + def _extract_json(self, content: str) -> dict: + """Extract and parse JSON from content string. + + Handles markdown code blocks and preamble text. + """ + content = content.strip() + + # Attempt 1: direct parse + try: + return json.loads(content) + except json.JSONDecodeError: + pass + + # Attempt 2: Extract from markdown code blocks + if "```" in content: + # Find the JSON block + import re + match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", content) + if match: + try: + return json.loads(match.group(1)) + except json.JSONDecodeError: + pass + + # Attempt 3: Find first { and last } + try: + start = content.find("{") + end = content.rfind("}") + if start != -1 and end != -1: + json_str = content[start : end + 1] + return json.loads(json_str) + except json.JSONDecodeError: + pass + + # Attempt 4: Fix common JSON errors (comments, trailing commas) + # This is risky but helpful for LLM output + try: + # Remove comments + import re + json_str = re.sub(r"//.*", "", content) + json_str = re.sub(r"/\*[\s\S]*?\*/", "", json_str) + return json.loads(json_str) + except json.JSONDecodeError as e: + # If all attempts fail, raise an error with the content for debugging + snippet = content[:500] + "..." if len(content) > 500 else content + raise ValueError(f"Failed to parse JSON response: {e}. Raw content snippet: {snippet!r}") + + @classmethod + def from_config(cls, config: dict) -> "LLMClient": + """Create an LLM client from a configuration dictionary. + + Args: + config: Configuration with 'provider' key and provider-specific settings. + + Returns: + Configured LLMClient instance. + """ + provider = config.get("provider", "openai") + provider_config = {} + + # Map config keys to provider-specific settings + if provider == "openai": + provider_config = { + "model": config.get("model", {}).get("openai", "gpt-4o-mini"), + "temperature": config.get("temperature", 0), + "max_tokens": config.get("max_tokens", 16000), + } + elif provider == "openrouter": + provider_config = { + "model": config.get("model", {}).get("openrouter", "anthropic/claude-3.5-sonnet"), + "temperature": config.get("temperature", 0), + "max_tokens": config.get("max_tokens", 16000), + } + elif provider == "ollama": + provider_config = { + "model": config.get("model", {}).get("ollama", "codellama:13b"), + "temperature": config.get("temperature", 0), + } + + return cls(provider=provider, config=provider_config) diff --git a/tools/ai-review/comment.py b/tools/ai-review/comment.py new file mode 100644 index 0000000..b5e2b07 --- /dev/null +++ b/tools/ai-review/comment.py @@ -0,0 +1,23 @@ +def to_markdown(result: dict) -> str: + lines = [] + lines.append("## 🤖 Enterprise AI Code Review\n") + lines.append(result.get("summary", "") + "\n") + + if not result.get("issues"): + lines.append("✅ No issues found.\n") + else: + for issue in result["issues"]: + lines.append(f"### ❗ {issue['severity']} — {issue['category']}") + lines.append(f"- **File:** `{issue['file']}`") + if issue.get("line"): + lines.append(f"- **Line:** `{issue['line']}`") + lines.append(f"- **Issue:** {issue['description']}") + lines.append(f"- **Recommendation:** {issue['recommendation']}\n") + + lines.append("---") + lines.append(f"**Overall severity:** `{result['overall_severity']}`") + lines.append( + "✅ **AI Approval**" if result.get("approval") else "❌ **Changes required**" + ) + + return "\n".join(lines) diff --git a/tools/ai-review/config.yml b/tools/ai-review/config.yml new file mode 100644 index 0000000..083d687 --- /dev/null +++ b/tools/ai-review/config.yml @@ -0,0 +1,96 @@ +provider: openai # openai | openrouter | ollama + +model: + openai: gpt-4.1-mini + openrouter: anthropic/claude-3.5-sonnet + ollama: codellama:13b + +temperature: 0 +max_tokens: 4096 + +# Review settings +review: + fail_on_severity: HIGH + max_diff_lines: 800 + inline_comments: true + security_scan: true + +# Agent settings +agents: + issue: + enabled: true + auto_label: true + auto_triage: true + duplicate_threshold: 0.85 + events: + - opened + - labeled + pr: + enabled: true + inline_comments: true + security_scan: true + events: + - opened + - synchronize + codebase: + enabled: true + schedule: "0 0 * * 0" # Weekly on Sunday + chat: + enabled: true + name: "Bartender" + max_iterations: 5 # Max tool call iterations per chat + tools: + - search_codebase + - read_file + - search_web + searxng_url: "" # Set via SEARXNG_URL env var or here + +# Interaction settings +# CUSTOMIZE YOUR BOT NAME HERE! +# Change mention_prefix to your preferred bot name: +# "@ai-bot" - Default +# "@bartender" - Friendly bar theme +# "@uni" - Short and simple +# "@joey" - Personal assistant name +# "@codebot" - Code-focused name +# NOTE: Also update the workflow files (.github/workflows/ or .gitea/workflows/) +# to match this prefix in the 'if: contains(...)' condition +interaction: + respond_to_mentions: true + mention_prefix: "@ai-bot" # Change this to customize your bot's name! + commands: + - explain + - suggest + - security + - summarize + +# Enterprise settings +enterprise: + audit_log: true + audit_path: "/var/log/ai-review/" + metrics_enabled: true + rate_limit: + requests_per_minute: 30 + max_concurrent: 4 + +# Label mappings for auto-labeling +labels: + priority: + high: "priority: high" + medium: "priority: medium" + low: "priority: low" + type: + bug: "type: bug" + feature: "type: feature" + question: "type: question" + docs: "type: documentation" + status: + ai_approved: "ai-approved" + ai_changes_required: "ai-changes-required" + ai_reviewed: "ai-reviewed" + +# Security scanning rules +security: + enabled: true + fail_on_high: true + rules_file: "security/security_rules.yml" diff --git a/tools/ai-review/dispatcher.py b/tools/ai-review/dispatcher.py new file mode 100644 index 0000000..6f4b81a --- /dev/null +++ b/tools/ai-review/dispatcher.py @@ -0,0 +1,211 @@ +"""Event Dispatcher + +Routes incoming webhook events to the appropriate agent handlers. +Supports concurrent execution and queue management. +""" + +import logging +import os +from concurrent.futures import ThreadPoolExecutor +from dataclasses import dataclass +from typing import Type + +import yaml + +from agents.base_agent import AgentContext, AgentResult, BaseAgent + + +@dataclass +class DispatchResult: + """Result of dispatching an event.""" + + event_type: str + agents_run: list[str] + results: list[AgentResult] + errors: list[str] + + +class Dispatcher: + """Event dispatcher that routes events to appropriate agents.""" + + def __init__( + self, + config: dict | None = None, + max_workers: int = 4, + ): + """Initialize the dispatcher. + + Args: + config: Configuration dictionary. + max_workers: Maximum concurrent agent executions. + """ + self.config = config or self._load_config() + self.max_workers = max_workers + self.logger = logging.getLogger(__name__) + self._agents: list[BaseAgent] = [] + self._executor = ThreadPoolExecutor(max_workers=max_workers) + + @staticmethod + def _load_config() -> dict: + """Load configuration from config.yml.""" + config_path = os.path.join(os.path.dirname(__file__), "config.yml") + if os.path.exists(config_path): + with open(config_path) as f: + return yaml.safe_load(f) + return {} + + def register_agent(self, agent: BaseAgent): + """Register an agent with the dispatcher. + + Args: + agent: Agent instance to register. + """ + self._agents.append(agent) + self.logger.info(f"Registered agent: {agent.__class__.__name__}") + + def register_agent_class(self, agent_class: Type[BaseAgent], **kwargs): + """Register an agent class (will be instantiated). + + Args: + agent_class: Agent class to instantiate and register. + **kwargs: Arguments to pass to agent constructor. + """ + agent = agent_class(config=self.config, **kwargs) + self.register_agent(agent) + + def dispatch( + self, + event_type: str, + event_data: dict, + owner: str, + repo: str, + ) -> DispatchResult: + """Dispatch an event to registered agents. + + Args: + event_type: Type of event (issue, pull_request, issue_comment, etc). + event_data: Event payload data. + owner: Repository owner. + repo: Repository name. + + Returns: + Dispatch result with all agent results. + """ + self.logger.info(f"Dispatching event: {event_type} for {owner}/{repo}") + + # Find agents that can handle this event + handlers = [ + agent for agent in self._agents if agent.can_handle(event_type, event_data) + ] + + if not handlers: + self.logger.info(f"No agents registered for event: {event_type}") + return DispatchResult( + event_type=event_type, + agents_run=[], + results=[], + errors=[], + ) + + self.logger.info( + f"Found {len(handlers)} agent(s) for event: {[a.__class__.__name__ for a in handlers]}" + ) + + # Create context for agents + context = AgentContext( + owner=owner, + repo=repo, + event_type=event_type, + event_data=event_data, + config=self.config, + ) + + # Run all handlers + results = [] + errors = [] + agents_run = [] + + for agent in handlers: + agent_name = agent.__class__.__name__ + agents_run.append(agent_name) + + try: + result = agent.run(context) + results.append(result) + if not result.success: + errors.append(f"{agent_name}: {result.error or result.message}") + except Exception as e: + self.logger.exception(f"Agent {agent_name} failed: {e}") + errors.append(f"{agent_name}: {str(e)}") + results.append( + AgentResult( + success=False, + message="Unexpected error", + error=str(e), + ) + ) + + return DispatchResult( + event_type=event_type, + agents_run=agents_run, + results=results, + errors=errors, + ) + + def dispatch_async( + self, + event_type: str, + event_data: dict, + owner: str, + repo: str, + ): + """Dispatch an event asynchronously. + + Args: + event_type: Type of event. + event_data: Event payload data. + owner: Repository owner. + repo: Repository name. + + Returns: + Future that resolves to DispatchResult. + """ + return self._executor.submit( + self.dispatch, event_type, event_data, owner, repo + ) + + def shutdown(self): + """Shutdown the executor.""" + self._executor.shutdown(wait=True) + + +# Singleton dispatcher for easy access +_dispatcher: Dispatcher | None = None + + +def get_dispatcher() -> Dispatcher: + """Get the global dispatcher instance.""" + global _dispatcher + if _dispatcher is None: + _dispatcher = Dispatcher() + return _dispatcher + + +def dispatch_event( + event_type: str, + event_data: dict, + owner: str, + repo: str, +) -> DispatchResult: + """Dispatch an event using the global dispatcher. + + Args: + event_type: Type of event. + event_data: Event payload data. + owner: Repository owner. + repo: Repository name. + + Returns: + Dispatch result. + """ + return get_dispatcher().dispatch(event_type, event_data, owner, repo) diff --git a/tools/ai-review/enterprise/__init__.py b/tools/ai-review/enterprise/__init__.py new file mode 100644 index 0000000..288bb69 --- /dev/null +++ b/tools/ai-review/enterprise/__init__.py @@ -0,0 +1,10 @@ +"""Enterprise Features Package + +This package contains enterprise-grade features like +audit logging and metrics collection. +""" + +from enterprise.audit_logger import AuditLogger +from enterprise.metrics import MetricsCollector + +__all__ = ["AuditLogger", "MetricsCollector"] diff --git a/tools/ai-review/enterprise/audit_logger.py b/tools/ai-review/enterprise/audit_logger.py new file mode 100644 index 0000000..f4ceb6c --- /dev/null +++ b/tools/ai-review/enterprise/audit_logger.py @@ -0,0 +1,303 @@ +"""Audit Logger + +Enterprise audit logging for tracking all AI agent actions, +decisions, and interactions for compliance and debugging. +""" + +import json +import logging +import os +from datetime import datetime +from pathlib import Path +from typing import Any + + +class AuditLogger: + """Audit logger for enterprise compliance.""" + + def __init__( + self, + log_path: str | None = None, + enabled: bool = True, + ): + """Initialize the audit logger. + + Args: + log_path: Directory to write audit logs. + enabled: Whether audit logging is enabled. + """ + self.enabled = enabled + self.log_path = Path( + log_path or os.environ.get("AI_AUDIT_PATH", "/var/log/ai-review/") + ) + self.logger = logging.getLogger("audit") + + if self.enabled: + self._ensure_log_dir() + + def _ensure_log_dir(self): + """Ensure the log directory exists.""" + try: + self.log_path.mkdir(parents=True, exist_ok=True) + except Exception as e: + self.logger.warning(f"Could not create audit log directory: {e}") + self.enabled = False + + def _get_log_file(self) -> Path: + """Get the current log file path (daily rotation).""" + date_str = datetime.utcnow().strftime("%Y-%m-%d") + return self.log_path / f"audit-{date_str}.jsonl" + + def log( + self, + action: str, + agent: str, + owner: str, + repo: str, + details: dict[str, Any] | None = None, + success: bool = True, + error: str | None = None, + ): + """Log an audit event. + + Args: + action: Action performed (e.g., "review_pr", "triage_issue"). + agent: Agent name that performed the action. + owner: Repository owner. + repo: Repository name. + details: Additional details about the action. + success: Whether the action succeeded. + error: Error message if failed. + """ + if not self.enabled: + return + + event = { + "timestamp": datetime.utcnow().isoformat() + "Z", + "action": action, + "agent": agent, + "repository": f"{owner}/{repo}", + "success": success, + "details": details or {}, + } + + if error: + event["error"] = error + + try: + log_file = self._get_log_file() + with open(log_file, "a") as f: + f.write(json.dumps(event) + "\n") + except Exception as e: + self.logger.error(f"Failed to write audit log: {e}") + + def log_llm_call( + self, + agent: str, + owner: str, + repo: str, + provider: str, + model: str, + tokens_used: int | None = None, + duration_ms: int | None = None, + ): + """Log an LLM API call. + + Args: + agent: Agent making the call. + owner: Repository owner. + repo: Repository name. + provider: LLM provider used. + model: Model name. + tokens_used: Number of tokens consumed. + duration_ms: Call duration in milliseconds. + """ + self.log( + action="llm_call", + agent=agent, + owner=owner, + repo=repo, + details={ + "provider": provider, + "model": model, + "tokens_used": tokens_used, + "duration_ms": duration_ms, + }, + ) + + def log_comment_posted( + self, + agent: str, + owner: str, + repo: str, + issue_number: int, + comment_type: str, + ): + """Log a comment being posted. + + Args: + agent: Agent posting the comment. + owner: Repository owner. + repo: Repository name. + issue_number: Issue or PR number. + comment_type: Type of comment (triage, review, response). + """ + self.log( + action="comment_posted", + agent=agent, + owner=owner, + repo=repo, + details={ + "issue_number": issue_number, + "comment_type": comment_type, + }, + ) + + def log_labels_applied( + self, + agent: str, + owner: str, + repo: str, + issue_number: int, + labels: list[str], + ): + """Log labels being applied. + + Args: + agent: Agent applying labels. + owner: Repository owner. + repo: Repository name. + issue_number: Issue or PR number. + labels: Labels applied. + """ + self.log( + action="labels_applied", + agent=agent, + owner=owner, + repo=repo, + details={ + "issue_number": issue_number, + "labels": labels, + }, + ) + + def get_logs( + self, + start_date: str | None = None, + end_date: str | None = None, + action: str | None = None, + repository: str | None = None, + ) -> list[dict]: + """Retrieve audit logs with optional filtering. + + Args: + start_date: Start date (YYYY-MM-DD). + end_date: End date (YYYY-MM-DD). + action: Filter by action type. + repository: Filter by repository (owner/repo). + + Returns: + List of audit log entries. + """ + if not self.enabled: + return [] + + logs = [] + log_files = sorted(self.log_path.glob("audit-*.jsonl")) + + for log_file in log_files: + # Date filter on filename + file_date = log_file.stem.replace("audit-", "") + if start_date and file_date < start_date: + continue + if end_date and file_date > end_date: + continue + + try: + with open(log_file) as f: + for line in f: + try: + entry = json.loads(line.strip()) + + # Apply filters + if action and entry.get("action") != action: + continue + if repository and entry.get("repository") != repository: + continue + + logs.append(entry) + except json.JSONDecodeError: + continue + except Exception: + continue + + return logs + + def generate_report( + self, + start_date: str | None = None, + end_date: str | None = None, + ) -> dict: + """Generate a summary report of audit activity. + + Args: + start_date: Report start date. + end_date: Report end date. + + Returns: + Summary report dictionary. + """ + logs = self.get_logs(start_date=start_date, end_date=end_date) + + report = { + "period": { + "start": start_date or "all", + "end": end_date or "all", + }, + "total_events": len(logs), + "by_action": {}, + "by_repository": {}, + "by_agent": {}, + "success_rate": 0.0, + "llm_usage": { + "total_calls": 0, + "total_tokens": 0, + }, + } + + success_count = 0 + + for log in logs: + action = log.get("action", "unknown") + repo = log.get("repository", "unknown") + agent = log.get("agent", "unknown") + + report["by_action"][action] = report["by_action"].get(action, 0) + 1 + report["by_repository"][repo] = report["by_repository"].get(repo, 0) + 1 + report["by_agent"][agent] = report["by_agent"].get(agent, 0) + 1 + + if log.get("success"): + success_count += 1 + + if action == "llm_call": + report["llm_usage"]["total_calls"] += 1 + tokens = log.get("details", {}).get("tokens_used") + if tokens: + report["llm_usage"]["total_tokens"] += tokens + + if logs: + report["success_rate"] = success_count / len(logs) + + return report + + +# Global instance +_audit_logger: AuditLogger | None = None + + +def get_audit_logger() -> AuditLogger: + """Get the global audit logger instance.""" + global _audit_logger + if _audit_logger is None: + _audit_logger = AuditLogger() + return _audit_logger diff --git a/tools/ai-review/enterprise/metrics.py b/tools/ai-review/enterprise/metrics.py new file mode 100644 index 0000000..9fabc08 --- /dev/null +++ b/tools/ai-review/enterprise/metrics.py @@ -0,0 +1,371 @@ +"""Metrics Collector + +Observability metrics for AI agent performance monitoring. +Tracks request counts, latencies, errors, and LLM usage. +""" + +import time +from dataclasses import dataclass, field +from datetime import datetime, timedelta +from threading import Lock + + +@dataclass +class MetricPoint: + """A single metric data point.""" + + timestamp: datetime + value: float + labels: dict = field(default_factory=dict) + + +class Counter: + """Thread-safe counter metric.""" + + def __init__(self, name: str, description: str = ""): + self.name = name + self.description = description + self._value = 0.0 + self._lock = Lock() + + def inc(self, value: float = 1.0): + """Increment the counter.""" + with self._lock: + self._value += value + + @property + def value(self) -> float: + """Get current counter value.""" + with self._lock: + return self._value + + +class Gauge: + """Thread-safe gauge metric.""" + + def __init__(self, name: str, description: str = ""): + self.name = name + self.description = description + self._value = 0.0 + self._lock = Lock() + + def set(self, value: float): + """Set the gauge value.""" + with self._lock: + self._value = value + + def inc(self, value: float = 1.0): + """Increment the gauge.""" + with self._lock: + self._value += value + + def dec(self, value: float = 1.0): + """Decrement the gauge.""" + with self._lock: + self._value -= value + + @property + def value(self) -> float: + """Get current gauge value.""" + with self._lock: + return self._value + + +class Histogram: + """Simple histogram for tracking distributions.""" + + def __init__( + self, + name: str, + description: str = "", + buckets: list[float] | None = None, + ): + self.name = name + self.description = description + self.buckets = buckets or [0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0] + self._values: list[float] = [] + self._lock = Lock() + + def observe(self, value: float): + """Record an observation.""" + with self._lock: + self._values.append(value) + # Keep only last 1000 observations + if len(self._values) > 1000: + self._values = self._values[-1000:] + + def get_percentile(self, percentile: float) -> float: + """Get a percentile value.""" + with self._lock: + if not self._values: + return 0.0 + sorted_values = sorted(self._values) + idx = int(len(sorted_values) * percentile / 100) + return sorted_values[min(idx, len(sorted_values) - 1)] + + @property + def count(self) -> int: + """Get observation count.""" + with self._lock: + return len(self._values) + + @property + def sum(self) -> float: + """Get sum of observations.""" + with self._lock: + return sum(self._values) + + +class MetricsCollector: + """Central metrics collector for AI agents.""" + + def __init__(self, enabled: bool = True): + """Initialize metrics collector. + + Args: + enabled: Whether metrics collection is enabled. + """ + self.enabled = enabled + self._start_time = time.time() + + # Counters + self.requests_total = Counter( + "ai_review_requests_total", + "Total number of review requests processed", + ) + self.requests_success = Counter( + "ai_review_requests_success", + "Number of successful review requests", + ) + self.requests_failed = Counter( + "ai_review_requests_failed", + "Number of failed review requests", + ) + self.llm_calls_total = Counter( + "ai_review_llm_calls_total", + "Total number of LLM API calls", + ) + self.llm_tokens_total = Counter( + "ai_review_llm_tokens_total", + "Total LLM tokens consumed", + ) + self.comments_posted = Counter( + "ai_review_comments_posted_total", + "Total comments posted", + ) + self.labels_applied = Counter( + "ai_review_labels_applied_total", + "Total labels applied", + ) + self.security_findings = Counter( + "ai_review_security_findings_total", + "Total security findings detected", + ) + + # Gauges + self.active_requests = Gauge( + "ai_review_active_requests", + "Currently active review requests", + ) + + # Histograms + self.request_duration = Histogram( + "ai_review_request_duration_seconds", + "Request processing duration", + ) + self.llm_duration = Histogram( + "ai_review_llm_duration_seconds", + "LLM API call duration", + ) + + # Per-agent metrics + self._agent_metrics: dict[str, dict] = {} + + def record_request_start(self, agent: str): + """Record the start of a request. + + Args: + agent: Name of the agent handling the request. + """ + if not self.enabled: + return + + self.requests_total.inc() + self.active_requests.inc() + + if agent not in self._agent_metrics: + self._agent_metrics[agent] = { + "total": 0, + "success": 0, + "failed": 0, + } + self._agent_metrics[agent]["total"] += 1 + + def record_request_end( + self, + agent: str, + success: bool, + duration_seconds: float, + ): + """Record the end of a request. + + Args: + agent: Name of the agent. + success: Whether the request succeeded. + duration_seconds: Request duration. + """ + if not self.enabled: + return + + self.active_requests.dec() + self.request_duration.observe(duration_seconds) + + if success: + self.requests_success.inc() + if agent in self._agent_metrics: + self._agent_metrics[agent]["success"] += 1 + else: + self.requests_failed.inc() + if agent in self._agent_metrics: + self._agent_metrics[agent]["failed"] += 1 + + def record_llm_call( + self, + provider: str, + model: str, + tokens: int | None, + duration_seconds: float, + ): + """Record an LLM API call. + + Args: + provider: LLM provider name. + model: Model used. + tokens: Tokens consumed. + duration_seconds: Call duration. + """ + if not self.enabled: + return + + self.llm_calls_total.inc() + self.llm_duration.observe(duration_seconds) + if tokens: + self.llm_tokens_total.inc(tokens) + + def record_comment_posted(self): + """Record a comment being posted.""" + if self.enabled: + self.comments_posted.inc() + + def record_labels_applied(self, count: int = 1): + """Record labels being applied.""" + if self.enabled: + self.labels_applied.inc(count) + + def record_security_finding(self, severity: str): + """Record a security finding.""" + if self.enabled: + self.security_findings.inc() + + def get_summary(self) -> dict: + """Get a summary of all metrics. + + Returns: + Dictionary with metric summaries. + """ + uptime = time.time() - self._start_time + + return { + "uptime_seconds": uptime, + "requests": { + "total": self.requests_total.value, + "success": self.requests_success.value, + "failed": self.requests_failed.value, + "active": self.active_requests.value, + "success_rate": ( + self.requests_success.value / max(self.requests_total.value, 1) + ), + }, + "llm": { + "calls": self.llm_calls_total.value, + "tokens": self.llm_tokens_total.value, + "avg_duration_ms": ( + (self.llm_duration.sum / max(self.llm_duration.count, 1)) * 1000 + ), + "p50_duration_ms": self.llm_duration.get_percentile(50) * 1000, + "p95_duration_ms": self.llm_duration.get_percentile(95) * 1000, + }, + "actions": { + "comments_posted": self.comments_posted.value, + "labels_applied": self.labels_applied.value, + "security_findings": self.security_findings.value, + }, + "latency": { + "avg_ms": ( + (self.request_duration.sum / max(self.request_duration.count, 1)) + * 1000 + ), + "p50_ms": self.request_duration.get_percentile(50) * 1000, + "p95_ms": self.request_duration.get_percentile(95) * 1000, + "p99_ms": self.request_duration.get_percentile(99) * 1000, + }, + "by_agent": self._agent_metrics, + } + + def export_prometheus(self) -> str: + """Export metrics in Prometheus format. + + Returns: + Prometheus-formatted metrics string. + """ + lines = [] + + def add_metric(name: str, value: float, help_text: str = ""): + if help_text: + lines.append(f"# HELP {name} {help_text}") + lines.append(f"{name} {value}") + + add_metric( + "ai_review_requests_total", + self.requests_total.value, + "Total review requests", + ) + add_metric( + "ai_review_requests_success_total", + self.requests_success.value, + "Successful requests", + ) + add_metric( + "ai_review_requests_failed_total", + self.requests_failed.value, + "Failed requests", + ) + add_metric( + "ai_review_llm_calls_total", + self.llm_calls_total.value, + "Total LLM calls", + ) + add_metric( + "ai_review_llm_tokens_total", + self.llm_tokens_total.value, + "Total LLM tokens", + ) + add_metric( + "ai_review_comments_posted_total", + self.comments_posted.value, + "Comments posted", + ) + + return "\n".join(lines) + + +# Global instance +_metrics: MetricsCollector | None = None + + +def get_metrics() -> MetricsCollector: + """Get the global metrics collector instance.""" + global _metrics + if _metrics is None: + _metrics = MetricsCollector() + return _metrics diff --git a/tools/ai-review/main.py b/tools/ai-review/main.py new file mode 100644 index 0000000..9318fc5 --- /dev/null +++ b/tools/ai-review/main.py @@ -0,0 +1,350 @@ +#!/usr/bin/env python3 +"""AI Code Review Agent - Main Entry Point + +This is the main CLI for running AI code review agents. +Can be invoked directly or through CI/CD workflows. +""" + +import argparse +import json +import logging +import os +import sys + +import yaml + +# Add the package to path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from agents.issue_agent import IssueAgent +from agents.pr_agent import PRAgent +from agents.codebase_agent import CodebaseAgent +from agents.chat_agent import ChatAgent +from dispatcher import Dispatcher, get_dispatcher + + +def setup_logging(verbose: bool = False): + """Configure logging.""" + level = logging.DEBUG if verbose else logging.INFO + logging.basicConfig( + level=level, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + + +def load_config(config_path: str | None = None) -> dict: + """Load configuration from file.""" + if config_path and os.path.exists(config_path): + with open(config_path) as f: + return yaml.safe_load(f) + + default_path = os.path.join(os.path.dirname(__file__), "config.yml") + if os.path.exists(default_path): + with open(default_path) as f: + return yaml.safe_load(f) + + return {} + + +def run_pr_review(args, config: dict): + """Run PR review agent.""" + from agents.base_agent import AgentContext + + agent = PRAgent(config=config) + + # Build context from environment or arguments + owner, repo = args.repo.split("/") + pr_number = args.pr_number + + context = AgentContext( + owner=owner, + repo=repo, + event_type="pull_request", + event_data={ + "action": "opened", + "pull_request": { + "number": pr_number, + "title": args.title or f"PR #{pr_number}", + }, + }, + config=config, + ) + + result = agent.run(context) + + if result.success: + print(f"✅ PR Review Complete: {result.message}") + print(f" Actions: {', '.join(result.actions_taken)}") + else: + print(f"❌ PR Review Failed: {result.message}") + if result.error: + print(f" Error: {result.error}") + sys.exit(1) + + +def run_issue_triage(args, config: dict): + """Run issue triage agent.""" + from agents.base_agent import AgentContext + from clients.gitea_client import GiteaClient + + agent = IssueAgent(config=config) + + owner, repo = args.repo.split("/") + issue_number = args.issue_number + + # Fetch the actual issue data from Gitea API to get the complete body + gitea = GiteaClient() + + try: + issue_data = gitea.get_issue(owner, repo, issue_number) + except Exception as e: + print(f"❌ Failed to fetch issue: {e}") + sys.exit(1) + + context = AgentContext( + owner=owner, + repo=repo, + event_type="issues", + event_data={ + "action": "opened", + "issue": issue_data, + }, + config=config, + ) + + result = agent.run(context) + + if result.success: + print(f"✅ Issue Triage Complete: {result.message}") + print(f" Actions: {', '.join(result.actions_taken)}") + else: + print(f"❌ Issue Triage Failed: {result.message}") + if result.error: + print(f" Error: {result.error}") + sys.exit(1) + + +def run_issue_comment(args, config: dict): + """Handle @ai-bot command in issue comment.""" + from agents.base_agent import AgentContext + + agent = IssueAgent(config=config) + + owner, repo = args.repo.split("/") + issue_number = args.issue_number + + # Fetch the actual issue data from Gitea API + from clients.gitea_client import GiteaClient + gitea = GiteaClient() + + try: + issue_data = gitea.get_issue(owner, repo, issue_number) + except Exception as e: + print(f"❌ Failed to fetch issue: {e}") + sys.exit(1) + + context = AgentContext( + owner=owner, + repo=repo, + event_type="issue_comment", + event_data={ + "action": "created", + "issue": issue_data, + "comment": { + "body": args.comment_body, + }, + }, + config=config, + ) + + result = agent.run(context) + + if result.success: + print(f"✅ Comment Response Complete: {result.message}") + print(f" Actions: {', '.join(result.actions_taken)}") + else: + print(f"❌ Comment Response Failed: {result.message}") + if result.error: + print(f" Error: {result.error}") + sys.exit(1) + + +def run_codebase_analysis(args, config: dict): + """Run codebase analysis agent.""" + from agents.base_agent import AgentContext + + agent = CodebaseAgent(config=config) + + owner, repo = args.repo.split("/") + + context = AgentContext( + owner=owner, + repo=repo, + event_type="workflow_dispatch", + event_data={}, + config=config, + ) + + result = agent.run(context) + + if result.success: + print(f"✅ Codebase Analysis Complete: {result.message}") + print(f" Health Score: {result.data.get('health_score', 'N/A')}") + print(f" Actions: {', '.join(result.actions_taken)}") + else: + print(f"❌ Codebase Analysis Failed: {result.message}") + if result.error: + print(f" Error: {result.error}") + sys.exit(1) + + +def run_chat(args, config: dict): + """Run interactive chat with the Bartender bot.""" + from agents.base_agent import AgentContext + from clients.gitea_client import GiteaClient + + agent = ChatAgent(config=config) + + owner, repo = args.repo.split("/") + + # Build context + event_data = {"message": args.message} + + # If issue number provided, add issue context + if args.issue_number: + gitea = GiteaClient() + try: + issue_data = gitea.get_issue(owner, repo, args.issue_number) + event_data["issue"] = issue_data + event_data["issue_number"] = args.issue_number + except Exception as e: + print(f"Warning: Could not fetch issue #{args.issue_number}: {e}") + + context = AgentContext( + owner=owner, + repo=repo, + event_type="chat", + event_data=event_data, + config=config, + ) + + result = agent.run(context) + + if result.success: + print(f"\n🍸 Bartender says:\n") + print(result.data.get("response", "")) + print() + if result.data.get("tools_used"): + print(f" [Tools used: {', '.join(result.data['tools_used'])}]") + else: + print(f"❌ Chat Failed: {result.message}") + if result.error: + print(f" Error: {result.error}") + sys.exit(1) + + +def run_webhook_dispatch(args, config: dict): + """Dispatch a webhook event.""" + dispatcher = get_dispatcher() + + # Register all agents + dispatcher.register_agent(IssueAgent(config=config)) + dispatcher.register_agent(PRAgent(config=config)) + dispatcher.register_agent(CodebaseAgent(config=config)) + dispatcher.register_agent(ChatAgent(config=config)) + + # Parse event data + event_data = json.loads(args.event_data) + owner, repo = args.repo.split("/") + + result = dispatcher.dispatch( + event_type=args.event_type, + event_data=event_data, + owner=owner, + repo=repo, + ) + + print(f"Dispatched event: {result.event_type}") + print(f"Agents run: {result.agents_run}") + for i, agent_result in enumerate(result.results): + status = "✅" if agent_result.success else "❌" + print(f" {status} {result.agents_run[i]}: {agent_result.message}") + + if result.errors: + sys.exit(1) + + +def main(): + """Main entry point.""" + parser = argparse.ArgumentParser( + description="AI Code Review Agent", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output") + parser.add_argument("-c", "--config", help="Path to config file") + + subparsers = parser.add_subparsers(dest="command", help="Available commands") + + # PR review command + pr_parser = subparsers.add_parser("pr", help="Review a pull request") + pr_parser.add_argument("repo", help="Repository (owner/repo)") + pr_parser.add_argument("pr_number", type=int, help="PR number") + pr_parser.add_argument("--title", help="PR title (optional)") + + # Issue triage command + issue_parser = subparsers.add_parser("issue", help="Triage an issue") + issue_parser.add_argument("repo", help="Repository (owner/repo)") + issue_parser.add_argument("issue_number", type=int, help="Issue number") + issue_parser.add_argument("--title", help="Issue title") + issue_parser.add_argument("--body", help="Issue body") + + # Issue comment command (for @ai-bot mentions) + comment_parser = subparsers.add_parser("comment", help="Respond to @ai-bot command") + comment_parser.add_argument("repo", help="Repository (owner/repo)") + comment_parser.add_argument("issue_number", type=int, help="Issue number") + comment_parser.add_argument("comment_body", help="Comment body with @ai-bot command") + + # Codebase analysis command + codebase_parser = subparsers.add_parser("codebase", help="Analyze codebase") + codebase_parser.add_argument("repo", help="Repository (owner/repo)") + + # Chat command (Bartender) + chat_parser = subparsers.add_parser("chat", help="Chat with Bartender bot") + chat_parser.add_argument("repo", help="Repository (owner/repo)") + chat_parser.add_argument("message", help="Message to send to Bartender") + chat_parser.add_argument( + "--issue", dest="issue_number", type=int, + help="Optional issue number to post response to" + ) + + # Webhook dispatch command + webhook_parser = subparsers.add_parser("dispatch", help="Dispatch webhook event") + webhook_parser.add_argument("repo", help="Repository (owner/repo)") + webhook_parser.add_argument("event_type", help="Event type") + webhook_parser.add_argument("event_data", help="Event data (JSON)") + + args = parser.parse_args() + + if not args.command: + parser.print_help() + sys.exit(1) + + setup_logging(args.verbose) + config = load_config(args.config) + + if args.command == "pr": + run_pr_review(args, config) + elif args.command == "issue": + run_issue_triage(args, config) + elif args.command == "comment": + run_issue_comment(args, config) + elif args.command == "codebase": + run_codebase_analysis(args, config) + elif args.command == "chat": + run_chat(args, config) + elif args.command == "dispatch": + run_webhook_dispatch(args, config) + + +if __name__ == "__main__": + main() diff --git a/tools/ai-review/prompts/base.md b/tools/ai-review/prompts/base.md new file mode 100644 index 0000000..060f1c6 --- /dev/null +++ b/tools/ai-review/prompts/base.md @@ -0,0 +1,64 @@ +You are an experienced senior software engineer with deep expertise in: +- Secure coding and security analysis +- System design and architecture +- Performance optimization +- Maintainable, readable code +- Test coverage and documentation +- CI/CD pipeline best practices + +You are reviewing the following **pull request diff**. Your goal is to provide a **comprehensive, actionable, and clear review** as a structured JSON response. + +--- + +## Requirements + +Review the diff and identify issues in these categories: +- **Security**: Vulnerabilities, hardcoded secrets, injection risks +- **Correctness**: Logic errors, edge cases, bugs +- **Performance**: Inefficiencies, N+1 queries, memory issues +- **Maintainability**: Code complexity, duplication, unclear logic +- **Readability**: Naming, formatting, documentation +- **Testing**: Missing tests, untested paths +- **Architecture**: Design issues, coupling, separation of concerns + +--- + +## Output Format + +Return a JSON object with this structure: + +```json +{{ + "summary": "Brief overall assessment of the PR", + "overall_severity": "HIGH" | "MEDIUM" | "LOW", + "approval": true | false, + "issues": [ + {{ + "file": "path/to/file.py", + "line": 42, + "severity": "HIGH" | "MEDIUM" | "LOW", + "category": "Security" | "Correctness" | "Performance" | "Maintainability" | "Readability" | "Testing" | "Architecture", + "description": "Clear description of the issue", + "recommendation": "Specific fix or improvement", + "code_snippet": "relevant code if applicable" + }} + ] +}} +``` + +--- + +## Rules + +1. **Be specific**: Include file paths and line numbers when possible +2. **Be actionable**: Every issue must have a clear recommendation +3. **Prioritize**: HIGH severity for security/data-loss issues, MEDIUM for bugs, LOW for style +4. **Be honest**: If uncertain, note it in the description +5. **Stay focused**: Only report real issues, not style preferences +6. Set `approval: false` if any HIGH severity issues exist +7. Output ONLY valid JSON, no additional text + +--- + +## Diff to Review + diff --git a/tools/ai-review/prompts/issue_response.md b/tools/ai-review/prompts/issue_response.md new file mode 100644 index 0000000..c7d2a79 --- /dev/null +++ b/tools/ai-review/prompts/issue_response.md @@ -0,0 +1,63 @@ +You are a helpful AI assistant responding to a GitHub/Gitea issue. Your goal is to provide a helpful, professional response that assists the issue author. + +## Context + +**Issue Type:** {issue_type} +**Priority:** {priority} +**Title:** {title} +**Body:** +{body} + +## Triage Analysis +{triage_analysis} + +## Your Task + +Generate a helpful comment response based on the issue type: + +### For Bug Reports: +1. Acknowledge the issue +2. If missing info, politely request specific details needed +3. Suggest any immediate workarounds if obvious +4. Indicate next steps (investigation, need reproduction, etc.) + +### For Feature Requests: +1. Thank the user for the suggestion +2. Summarize understanding of the request +3. Ask clarifying questions if needed +4. Note any related existing features + +### For Questions: +1. Directly answer the question if possible +2. Link to relevant documentation +3. Provide code examples if helpful +4. Suggest alternatives if applicable + +### For Documentation Issues: +1. Acknowledge the gap/issue +2. Clarify the correct information if known +3. Note what documentation updates are needed + +## Response Guidelines + +1. Be concise but thorough +2. Use a friendly, professional tone +3. Format with Markdown appropriately +4. Include code blocks where relevant +5. DO NOT promise timelines or fixes +6. DO NOT make up information - say "I'm not certain" if unsure +7. Always end with an offer to help further + +## Output Format + +Return a JSON object: +```json +{{ + "comment": "Your markdown-formatted response here", + "needs_human_review": true/false, + "suggested_assignee": null or "username", + "follow_up_questions": ["question1", "question2"] +}} +``` + +Generate your response: diff --git a/tools/ai-review/prompts/issue_triage.md b/tools/ai-review/prompts/issue_triage.md new file mode 100644 index 0000000..aefc4af --- /dev/null +++ b/tools/ai-review/prompts/issue_triage.md @@ -0,0 +1,69 @@ +You are an expert issue triage specialist. Analyze the following GitHub/Gitea issue and provide a structured classification. + +## Your Task + +Analyze the issue and return a JSON object with the following structure: + +```json +{{ + "type": "bug" | "feature" | "question" | "documentation" | "support" | "enhancement", + "priority": "high" | "medium" | "low", + "confidence": 0.0-1.0, + "summary": "Brief one-line summary of the issue", + "suggested_labels": ["label1", "label2"], + "is_duplicate": false, + "duplicate_of": null, + "needs_more_info": false, + "missing_info": [], + "components": ["component1", "component2"], + "reasoning": "Brief explanation of your classification" +}} +``` + +## Classification Guidelines + +### Type Classification +- **bug**: Something is broken, not working as expected, error messages, crashes +- **feature**: Request for new functionality that doesn't exist +- **enhancement**: Improvement to existing functionality +- **question**: User asking how to do something, seeking clarification +- **documentation**: Issues with docs, missing docs, unclear docs +- **support**: General help request, troubleshooting + +### Priority Classification +- **high**: Security issues, data loss, complete feature broken, blocking issues +- **medium**: Significant functionality impacted, workaround exists +- **low**: Minor issues, cosmetic, nice-to-have improvements + +### Missing Information Indicators +Look for missing: +- Steps to reproduce (for bugs) +- Expected vs actual behavior +- Environment details (OS, version, etc.) +- Error messages or logs +- Screenshots (for UI issues) + +## Important Rules + +1. Be conservative with "high" priority - use it sparingly +2. If uncertain between two types, choose the more actionable one +3. Always provide reasoning for your classification +4. Set confidence lower if the issue is vague or ambiguous +5. Output ONLY valid JSON, no additional text + +--- + +## Issue to Analyze + +**Title:** {title} + +**Body:** +{body} + +**Author:** {author} + +**Labels (if any):** {existing_labels} + +--- + +Provide your JSON classification: diff --git a/tools/ai-review/requirements.txt b/tools/ai-review/requirements.txt new file mode 100644 index 0000000..5fe9037 --- /dev/null +++ b/tools/ai-review/requirements.txt @@ -0,0 +1,17 @@ +# AI Code Review - Python Dependencies + +# Core dependencies +requests>=2.31.0 +pyyaml>=6.0 + +# Optional: For webhook server mode +# flask>=3.0.0 + +# Optional: For async operations +# aiohttp>=3.9.0 + +# Development dependencies (install with pip install -e .[dev]) +# pytest>=7.4.0 +# pytest-mock>=3.12.0 +# black>=24.0.0 +# mypy>=1.8.0 diff --git a/tools/ai-review/review.py b/tools/ai-review/review.py new file mode 100644 index 0000000..4dc59d3 --- /dev/null +++ b/tools/ai-review/review.py @@ -0,0 +1,174 @@ +import json +import os +import subprocess +import sys + +import requests +import yaml +from comment import to_markdown + +ROOT = os.path.dirname(__file__) +CFG = yaml.safe_load(open(f"{ROOT}/config.yml")) + +# Marker to identify the AI comment +AI_MARKER = "" + +# Disclaimer text to prepend +AI_DISCLAIMER = ( + "**Note:** This review was generated by an AI assistant. " + "While it aims to be accurate and helpful, it may contain mistakes " + "or miss important issues. Please verify all findings before taking action." +) + +# ------------------------------- +# Helper functions +# ------------------------------- + + +def get_diff() -> str: + """Get git diff against main branch, limited by config""" + diff = subprocess.check_output(["git", "diff", "origin/main...HEAD"], text=True) + lines = diff.splitlines() + if len(lines) > CFG["review"]["max_diff_lines"]: + return "\n".join(lines[: CFG["review"]["max_diff_lines"]]) + return diff + + +def build_prompt(diff: str) -> str: + """Prepare the AI prompt with the diff""" + base = open(f"{ROOT}/prompts/base.md").read() + return f"{base}\n\nDIFF:\n{diff}" + + +def call_llm(prompt: str) -> str: + """Call the configured LLM provider""" + provider = CFG["provider"] + + if provider == "openai": + r = requests.post( + "https://api.openai.com/v1/chat/completions", + headers={ + "Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}", + "Content-Type": "application/json", + }, + json={ + "model": CFG["model"]["openai"], + "temperature": CFG["temperature"], + "messages": [{"role": "user", "content": prompt}], + }, + timeout=60, + ) + return r.json()["choices"][0]["message"]["content"] + + if provider == "openrouter": + r = requests.post( + "https://openrouter.ai/api/v1/chat/completions", + headers={ + "Authorization": f"Bearer {os.environ['OPENROUTER_API_KEY']}", + "Content-Type": "application/json", + }, + json={ + "model": CFG["model"]["openrouter"], + "messages": [{"role": "user", "content": prompt}], + }, + timeout=60, + ) + return r.json()["choices"][0]["message"]["content"] + + if provider == "ollama": + r = requests.post( + f"{os.environ['OLLAMA_HOST']}/api/generate", + json={ + "model": CFG["model"]["ollama"], + "prompt": prompt, + "stream": False, + }, + timeout=120, + ) + return r.json()["response"] + + raise RuntimeError("Unknown provider") + + +# ------------------------------- +# Gitea PR comment functions +# ------------------------------- + + +def find_existing_comment() -> int | None: + """Find existing AI review comment in the PR""" + url = ( + f"{os.environ['AI_REVIEW_API_URL']}/repos/" + f"{os.environ['AI_REVIEW_REPO']}/issues/" + f"{os.environ['AI_REVIEW_PR_NUMBER']}/comments" + ) + + r = requests.get( + url, + headers={"Authorization": f"token {os.environ['AI_REVIEW_TOKEN']}"}, + timeout=15, + ) + + for c in r.json(): + if AI_MARKER in c["body"]: + return c["id"] + + return None + + +def upsert_pr_comment(markdown: str): + """Create or update the PR comment""" + comment_id = find_existing_comment() + headers = { + "Authorization": f"token {os.environ['AI_REVIEW_TOKEN']}", + "Content-Type": "application/json", + } + + if comment_id: + url = ( + f"{os.environ['AI_REVIEW_API_URL']}/repos/" + f"{os.environ['AI_REVIEW_REPO']}/issues/comments/{comment_id}" + ) + r = requests.patch(url, headers=headers, json={"body": markdown}) + else: + url = ( + f"{os.environ['AI_REVIEW_API_URL']}/repos/" + f"{os.environ['AI_REVIEW_REPO']}/issues/" + f"{os.environ['AI_REVIEW_PR_NUMBER']}/comments" + ) + r = requests.post(url, headers=headers, json={"body": markdown}) + + if r.status_code not in (200, 201): + raise RuntimeError(f"Failed to upsert PR comment: {r.text}") + + +# ------------------------------- +# Main workflow +# ------------------------------- + + +def main(): + diff = get_diff() + if not diff.strip(): + sys.exit(0) + + raw = call_llm(build_prompt(diff)) + result = json.loads(raw) + + # Convert JSON review to Markdown + markdown = to_markdown(result) + + # Prepend AI disclaimer and marker + full_comment = AI_DISCLAIMER + "\n\n" + AI_MARKER + "\n" + markdown + + upsert_pr_comment(full_comment) + + # Fail CI if severity is HIGH + if result["overall_severity"] == CFG["review"][ + "fail_on_severity" + ] and not result.get("approval", False): + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tools/ai-review/security/__init__.py b/tools/ai-review/security/__init__.py new file mode 100644 index 0000000..b38e418 --- /dev/null +++ b/tools/ai-review/security/__init__.py @@ -0,0 +1,9 @@ +"""Security Scanning Package + +This package contains security scanning utilities for +detecting vulnerabilities in code. +""" + +from security.security_scanner import SecurityScanner + +__all__ = ["SecurityScanner"] diff --git a/tools/ai-review/security/security_scanner.py b/tools/ai-review/security/security_scanner.py new file mode 100644 index 0000000..b2b3e4d --- /dev/null +++ b/tools/ai-review/security/security_scanner.py @@ -0,0 +1,335 @@ +"""Security Scanner + +Pattern-based security vulnerability detection for code analysis. +Covers OWASP Top 10 and common security anti-patterns. +""" + +import re +from dataclasses import dataclass +from typing import Iterator + +import yaml +import os + + +@dataclass +class SecurityFinding: + """A single security finding.""" + + rule_id: str + rule_name: str + severity: str # HIGH, MEDIUM, LOW + category: str # OWASP category + file: str + line: int + code_snippet: str + description: str + recommendation: str + cwe: str | None = None # CWE reference + + +class SecurityScanner: + """Security scanner using pattern matching and rules.""" + + # Default rules covering OWASP Top 10 + DEFAULT_RULES = [ + # A01:2021 – Broken Access Control + { + "id": "SEC001", + "name": "Hardcoded Credentials", + "pattern": r'(?i)(password|passwd|pwd|secret|api_key|apikey|token|auth_token)\s*[=:]\s*["\'][^"\']{4,}["\']', + "severity": "HIGH", + "category": "A01:2021 Broken Access Control", + "cwe": "CWE-798", + "description": "Hardcoded credentials detected in source code", + "recommendation": "Use environment variables or a secrets management system", + }, + { + "id": "SEC002", + "name": "Exposed Private Key", + "pattern": r"-----BEGIN\s+(RSA\s+)?PRIVATE\s+KEY-----", + "severity": "HIGH", + "category": "A01:2021 Broken Access Control", + "cwe": "CWE-321", + "description": "Private key embedded in source code", + "recommendation": "Never commit private keys. Use secure key management", + }, + # A02:2021 – Cryptographic Failures + { + "id": "SEC003", + "name": "Weak Crypto Algorithm", + "pattern": r"(?i)\b(md5|sha1)\s*\(", + "severity": "MEDIUM", + "category": "A02:2021 Cryptographic Failures", + "cwe": "CWE-328", + "description": "Use of weak cryptographic hash function", + "recommendation": "Use SHA-256 or stronger hashing algorithms", + }, + { + "id": "SEC004", + "name": "Insecure Random", + "pattern": r"(?i)\brandom\.(random|randint|choice|randrange)\s*\(", + "severity": "MEDIUM", + "category": "A02:2021 Cryptographic Failures", + "cwe": "CWE-330", + "description": "Use of non-cryptographic random number generator for security purposes", + "recommendation": "Use secrets module or os.urandom() for security-critical randomness", + }, + # A03:2021 – Injection + { + "id": "SEC005", + "name": "SQL Injection", + "pattern": r'(?i)(execute|query|cursor\.execute)\s*\([^)]*(%s|%d|\{|\+)[^)]*\)', + "severity": "HIGH", + "category": "A03:2021 Injection", + "cwe": "CWE-89", + "description": "Potential SQL injection through string formatting", + "recommendation": "Use parameterized queries with placeholders", + }, + { + "id": "SEC006", + "name": "Command Injection", + "pattern": r"(?i)(os\.system|subprocess\.call|subprocess\.run)\s*\([^)]*(\+|format|%)[^)]*\)", + "severity": "HIGH", + "category": "A03:2021 Injection", + "cwe": "CWE-78", + "description": "Potential command injection through string concatenation", + "recommendation": "Use subprocess with shell=False and pass arguments as list", + }, + { + "id": "SEC007", + "name": "Eval Usage", + "pattern": r"\beval\s*\(", + "severity": "HIGH", + "category": "A03:2021 Injection", + "cwe": "CWE-95", + "description": "Use of eval() can lead to code injection", + "recommendation": "Avoid eval(). Use ast.literal_eval() for data or safer alternatives", + }, + { + "id": "SEC008", + "name": "XSS Risk", + "pattern": r'(?i)(innerHTML|outerHTML|document\.write)\s*=', + "severity": "MEDIUM", + "category": "A03:2021 Injection", + "cwe": "CWE-79", + "description": "Direct DOM manipulation may allow XSS", + "recommendation": "Use textContent or proper sanitization libraries", + }, + # A04:2021 – Insecure Design + { + "id": "SEC009", + "name": "Debug Mode", + "pattern": r"(?i)(debug\s*=\s*true|DEBUG\s*=\s*True|\.setLevel\(.*DEBUG\))", + "severity": "MEDIUM", + "category": "A04:2021 Insecure Design", + "cwe": "CWE-489", + "description": "Debug mode enabled in code", + "recommendation": "Ensure debug mode is disabled in production", + }, + # A05:2021 – Security Misconfiguration + { + "id": "SEC010", + "name": "CORS Wildcard", + "pattern": r'(?i)(access-control-allow-origin|cors.*origin)\s*[=:]\s*["\']?\*', + "severity": "MEDIUM", + "category": "A05:2021 Security Misconfiguration", + "cwe": "CWE-942", + "description": "CORS configured to allow all origins", + "recommendation": "Specify allowed origins explicitly", + }, + { + "id": "SEC011", + "name": "SSL Verification Disabled", + "pattern": r"(?i)(verify\s*=\s*False|CERT_NONE|ssl\._create_unverified_context)", + "severity": "HIGH", + "category": "A05:2021 Security Misconfiguration", + "cwe": "CWE-295", + "description": "SSL certificate verification disabled", + "recommendation": "Always verify SSL certificates in production", + }, + # A07:2021 – Identification and Authentication Failures + { + "id": "SEC012", + "name": "Hardcoded JWT Secret", + "pattern": r'(?i)(jwt|token).*secret\s*[=:]\s*["\'][^"\']+["\']', + "severity": "HIGH", + "category": "A07:2021 Authentication Failures", + "cwe": "CWE-798", + "description": "JWT secret hardcoded in source code", + "recommendation": "Use environment variables for JWT secrets", + }, + # A08:2021 – Software and Data Integrity Failures + { + "id": "SEC013", + "name": "Pickle Usage", + "pattern": r"(?i)pickle\.(loads?|dumps?)\s*\(", + "severity": "MEDIUM", + "category": "A08:2021 Integrity Failures", + "cwe": "CWE-502", + "description": "Pickle can execute arbitrary code during deserialization", + "recommendation": "Use JSON or other safe serialization formats", + }, + # A09:2021 – Security Logging and Monitoring Failures + { + "id": "SEC014", + "name": "Sensitive Data Logging", + "pattern": r'(?i)(log|print|console\.log)\s*\([^)]*\b(password|token|secret|key)\b', + "severity": "MEDIUM", + "category": "A09:2021 Logging Failures", + "cwe": "CWE-532", + "description": "Potentially logging sensitive information", + "recommendation": "Never log passwords, tokens, or secrets", + }, + # A10:2021 – Server-Side Request Forgery + { + "id": "SEC015", + "name": "SSRF Risk", + "pattern": r'(?i)(requests\.(get|post|put)|urllib\.request\.urlopen|fetch)\s*\([^)]*\+', + "severity": "MEDIUM", + "category": "A10:2021 SSRF", + "cwe": "CWE-918", + "description": "URL constructed from user input may allow SSRF", + "recommendation": "Validate and sanitize URLs, use allowlists", + }, + # Additional common issues + { + "id": "SEC016", + "name": "Hardcoded IP Address", + "pattern": r'\b(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b', + "severity": "LOW", + "category": "Configuration", + "cwe": "CWE-547", + "description": "Hardcoded IP address found", + "recommendation": "Use configuration files or environment variables for IP addresses", + }, + { + "id": "SEC017", + "name": "TODO/FIXME Security", + "pattern": r"(?i)(TODO|FIXME).*\b(security|auth|password|token|secret|vulnerable)\b", + "severity": "MEDIUM", + "category": "Code Quality", + "cwe": None, + "description": "Security-related TODO/FIXME comment found", + "recommendation": "Address security-related TODO items before deployment", + }, + ] + + def __init__(self, rules_file: str | None = None): + """Initialize scanner with rules. + + Args: + rules_file: Optional path to custom rules YAML file. + """ + self.rules = self.DEFAULT_RULES.copy() + + if rules_file and os.path.exists(rules_file): + try: + with open(rules_file) as f: + custom_rules = yaml.safe_load(f) + if custom_rules and "rules" in custom_rules: + self.rules.extend(custom_rules["rules"]) + except Exception: + pass # Use defaults if custom rules fail to load + + # Compile patterns for efficiency + self._compiled_rules = [] + for rule in self.rules: + try: + self._compiled_rules.append( + {**rule, "_pattern": re.compile(rule["pattern"])} + ) + except re.error: + pass # Skip invalid patterns + + def scan_content( + self, + content: str, + filename: str, + ) -> Iterator[SecurityFinding]: + """Scan content for security issues. + + Args: + content: File content to scan. + filename: Name of the file (for reporting). + + Yields: + SecurityFinding for each detected issue. + """ + lines = content.splitlines() + + for line_num, line in enumerate(lines, 1): + for rule in self._compiled_rules: + if rule["_pattern"].search(line): + yield SecurityFinding( + rule_id=rule["id"], + rule_name=rule["name"], + severity=rule["severity"], + category=rule["category"], + file=filename, + line=line_num, + code_snippet=line.strip()[:120], + description=rule["description"], + recommendation=rule["recommendation"], + cwe=rule.get("cwe"), + ) + + def scan_diff(self, diff: str) -> Iterator[SecurityFinding]: + """Scan a git diff for security issues. + + Only scans added lines (lines starting with +). + + Args: + diff: Git diff content. + + Yields: + SecurityFinding for each detected issue. + """ + current_file = None + current_line = 0 + + for line in diff.splitlines(): + # Track current file + if line.startswith("diff --git"): + match = re.search(r"b/(.+)$", line) + if match: + current_file = match.group(1) + current_line = 0 + # Track line numbers + elif line.startswith("@@"): + match = re.search(r"\+(\d+)", line) + if match: + current_line = int(match.group(1)) - 1 + # Check added lines + elif line.startswith("+") and not line.startswith("+++"): + current_line += 1 + for finding in self.scan_content(line[1:], current_file or "unknown"): + finding.line = current_line + yield finding + elif not line.startswith("-"): + current_line += 1 + + def get_summary(self, findings: list[SecurityFinding]) -> dict: + """Get summary statistics for findings. + + Args: + findings: List of security findings. + + Returns: + Summary dictionary with counts by severity and category. + """ + summary = { + "total": len(findings), + "by_severity": {"HIGH": 0, "MEDIUM": 0, "LOW": 0}, + "by_category": {}, + } + + for finding in findings: + summary["by_severity"][finding.severity] = ( + summary["by_severity"].get(finding.severity, 0) + 1 + ) + summary["by_category"][finding.category] = ( + summary["by_category"].get(finding.category, 0) + 1 + ) + + return summary