Implement GuardDen Discord moderation bot
Features: - Core moderation: warn, kick, ban, timeout, strike system - Automod: banned words filter, scam detection, anti-spam, link filtering - AI moderation: Claude/OpenAI integration, NSFW detection, phishing analysis - Verification system: button, captcha, math, emoji challenges - Rate limiting system with configurable scopes - Event logging: joins, leaves, message edits/deletes, voice activity - Per-guild configuration with caching - Docker deployment support Bug fixes applied: - Fixed await on session.delete() in guild_config.py - Fixed memory leak in AI moderation message tracking (use deque) - Added error handling to bot shutdown - Added error handling to timeout command - Removed unused Literal import - Added prefix validation - Added image analysis limit (3 per message) - Fixed test mock for SQLAlchemy model
This commit is contained in:
149
src/guardden/services/ai/base.py
Normal file
149
src/guardden/services/ai/base.py
Normal file
@@ -0,0 +1,149 @@
|
||||
"""Base classes for AI providers."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Literal
|
||||
|
||||
|
||||
class ContentCategory(str, Enum):
|
||||
"""Categories of problematic content."""
|
||||
|
||||
SAFE = "safe"
|
||||
HARASSMENT = "harassment"
|
||||
HATE_SPEECH = "hate_speech"
|
||||
SEXUAL = "sexual"
|
||||
VIOLENCE = "violence"
|
||||
SELF_HARM = "self_harm"
|
||||
SPAM = "spam"
|
||||
SCAM = "scam"
|
||||
MISINFORMATION = "misinformation"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModerationResult:
|
||||
"""Result of AI content moderation."""
|
||||
|
||||
is_flagged: bool = False
|
||||
confidence: float = 0.0 # 0.0 to 1.0
|
||||
categories: list[ContentCategory] = field(default_factory=list)
|
||||
explanation: str = ""
|
||||
suggested_action: Literal["none", "warn", "delete", "timeout", "ban"] = "none"
|
||||
|
||||
@property
|
||||
def severity(self) -> int:
|
||||
"""Get severity score 0-100 based on confidence and categories."""
|
||||
if not self.is_flagged:
|
||||
return 0
|
||||
|
||||
# Base severity from confidence
|
||||
severity = int(self.confidence * 50)
|
||||
|
||||
# Add severity based on category
|
||||
high_severity = {
|
||||
ContentCategory.HATE_SPEECH,
|
||||
ContentCategory.SELF_HARM,
|
||||
ContentCategory.SCAM,
|
||||
}
|
||||
medium_severity = {
|
||||
ContentCategory.HARASSMENT,
|
||||
ContentCategory.VIOLENCE,
|
||||
ContentCategory.SEXUAL,
|
||||
}
|
||||
|
||||
for cat in self.categories:
|
||||
if cat in high_severity:
|
||||
severity += 30
|
||||
elif cat in medium_severity:
|
||||
severity += 20
|
||||
else:
|
||||
severity += 10
|
||||
|
||||
return min(severity, 100)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ImageAnalysisResult:
|
||||
"""Result of AI image analysis."""
|
||||
|
||||
is_nsfw: bool = False
|
||||
is_violent: bool = False
|
||||
is_disturbing: bool = False
|
||||
confidence: float = 0.0
|
||||
description: str = ""
|
||||
categories: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PhishingAnalysisResult:
|
||||
"""Result of AI phishing/scam analysis."""
|
||||
|
||||
is_phishing: bool = False
|
||||
confidence: float = 0.0
|
||||
risk_factors: list[str] = field(default_factory=list)
|
||||
explanation: str = ""
|
||||
|
||||
|
||||
class AIProvider(ABC):
|
||||
"""Abstract base class for AI providers."""
|
||||
|
||||
@abstractmethod
|
||||
async def moderate_text(
|
||||
self,
|
||||
content: str,
|
||||
context: str | None = None,
|
||||
sensitivity: int = 50,
|
||||
) -> ModerationResult:
|
||||
"""
|
||||
Analyze text content for policy violations.
|
||||
|
||||
Args:
|
||||
content: The text to analyze
|
||||
context: Optional context about the conversation/server
|
||||
sensitivity: 0-100, higher means more strict
|
||||
|
||||
Returns:
|
||||
ModerationResult with analysis
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def analyze_image(
|
||||
self,
|
||||
image_url: str,
|
||||
sensitivity: int = 50,
|
||||
) -> ImageAnalysisResult:
|
||||
"""
|
||||
Analyze an image for NSFW or inappropriate content.
|
||||
|
||||
Args:
|
||||
image_url: URL of the image to analyze
|
||||
sensitivity: 0-100, higher means more strict
|
||||
|
||||
Returns:
|
||||
ImageAnalysisResult with analysis
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def analyze_phishing(
|
||||
self,
|
||||
url: str,
|
||||
message_content: str | None = None,
|
||||
) -> PhishingAnalysisResult:
|
||||
"""
|
||||
Analyze a URL for phishing/scam indicators.
|
||||
|
||||
Args:
|
||||
url: The URL to analyze
|
||||
message_content: Optional full message for context
|
||||
|
||||
Returns:
|
||||
PhishingAnalysisResult with analysis
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def close(self) -> None:
|
||||
"""Clean up resources."""
|
||||
pass
|
||||
Reference in New Issue
Block a user