Implement GuardDen Discord moderation bot

Features:
- Core moderation: warn, kick, ban, timeout, strike system
- Automod: banned words filter, scam detection, anti-spam, link filtering
- AI moderation: Claude/OpenAI integration, NSFW detection, phishing analysis
- Verification system: button, captcha, math, emoji challenges
- Rate limiting system with configurable scopes
- Event logging: joins, leaves, message edits/deletes, voice activity
- Per-guild configuration with caching
- Docker deployment support

Bug fixes applied:
- Fixed await on session.delete() in guild_config.py
- Fixed memory leak in AI moderation message tracking (use deque)
- Added error handling to bot shutdown
- Added error handling to timeout command
- Removed unused Literal import
- Added prefix validation
- Added image analysis limit (3 per message)
- Fixed test mock for SQLAlchemy model
This commit is contained in:
2026-01-16 19:27:48 +01:00
parent ffe42b6d51
commit 4e16777f25
45 changed files with 5802 additions and 1 deletions

View File

@@ -0,0 +1,149 @@
"""Base classes for AI providers."""
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from enum import Enum
from typing import Literal
class ContentCategory(str, Enum):
"""Categories of problematic content."""
SAFE = "safe"
HARASSMENT = "harassment"
HATE_SPEECH = "hate_speech"
SEXUAL = "sexual"
VIOLENCE = "violence"
SELF_HARM = "self_harm"
SPAM = "spam"
SCAM = "scam"
MISINFORMATION = "misinformation"
@dataclass
class ModerationResult:
"""Result of AI content moderation."""
is_flagged: bool = False
confidence: float = 0.0 # 0.0 to 1.0
categories: list[ContentCategory] = field(default_factory=list)
explanation: str = ""
suggested_action: Literal["none", "warn", "delete", "timeout", "ban"] = "none"
@property
def severity(self) -> int:
"""Get severity score 0-100 based on confidence and categories."""
if not self.is_flagged:
return 0
# Base severity from confidence
severity = int(self.confidence * 50)
# Add severity based on category
high_severity = {
ContentCategory.HATE_SPEECH,
ContentCategory.SELF_HARM,
ContentCategory.SCAM,
}
medium_severity = {
ContentCategory.HARASSMENT,
ContentCategory.VIOLENCE,
ContentCategory.SEXUAL,
}
for cat in self.categories:
if cat in high_severity:
severity += 30
elif cat in medium_severity:
severity += 20
else:
severity += 10
return min(severity, 100)
@dataclass
class ImageAnalysisResult:
"""Result of AI image analysis."""
is_nsfw: bool = False
is_violent: bool = False
is_disturbing: bool = False
confidence: float = 0.0
description: str = ""
categories: list[str] = field(default_factory=list)
@dataclass
class PhishingAnalysisResult:
"""Result of AI phishing/scam analysis."""
is_phishing: bool = False
confidence: float = 0.0
risk_factors: list[str] = field(default_factory=list)
explanation: str = ""
class AIProvider(ABC):
"""Abstract base class for AI providers."""
@abstractmethod
async def moderate_text(
self,
content: str,
context: str | None = None,
sensitivity: int = 50,
) -> ModerationResult:
"""
Analyze text content for policy violations.
Args:
content: The text to analyze
context: Optional context about the conversation/server
sensitivity: 0-100, higher means more strict
Returns:
ModerationResult with analysis
"""
pass
@abstractmethod
async def analyze_image(
self,
image_url: str,
sensitivity: int = 50,
) -> ImageAnalysisResult:
"""
Analyze an image for NSFW or inappropriate content.
Args:
image_url: URL of the image to analyze
sensitivity: 0-100, higher means more strict
Returns:
ImageAnalysisResult with analysis
"""
pass
@abstractmethod
async def analyze_phishing(
self,
url: str,
message_content: str | None = None,
) -> PhishingAnalysisResult:
"""
Analyze a URL for phishing/scam indicators.
Args:
url: The URL to analyze
message_content: Optional full message for context
Returns:
PhishingAnalysisResult with analysis
"""
pass
@abstractmethod
async def close(self) -> None:
"""Clean up resources."""
pass