Add monitoring, metrics tracking, and enhanced logging
- Add BotMonitor class for tracking requests, errors, and health status - Track request success/failure, response times, error rates - Add !status command (admin) for detailed metrics with embed - Add !health command (anyone) for quick health check - Add rotating file logging support (LOG_FILE config) - Auto-detect unhealthy status when errors exceed threshold - Record errors with timestamps and context for debugging
This commit is contained in:
147
src/daemon_boyfriend/cogs/status.py
Normal file
147
src/daemon_boyfriend/cogs/status.py
Normal file
@@ -0,0 +1,147 @@
|
||||
"""Status cog - provides bot health and metrics commands."""
|
||||
|
||||
import logging
|
||||
|
||||
import discord
|
||||
from discord.ext import commands
|
||||
|
||||
from daemon_boyfriend.utils import HealthStatus, get_monitor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class StatusCog(commands.Cog):
|
||||
"""Bot status and monitoring commands."""
|
||||
|
||||
def __init__(self, bot: commands.Bot) -> None:
|
||||
self.bot = bot
|
||||
|
||||
@commands.command(name="status")
|
||||
@commands.has_permissions(administrator=True)
|
||||
async def status_command(self, ctx: commands.Context) -> None:
|
||||
"""Show bot status and metrics (admin only)."""
|
||||
monitor = get_monitor()
|
||||
metrics = monitor.get_metrics()
|
||||
|
||||
# Choose embed color based on health status
|
||||
color_map = {
|
||||
HealthStatus.HEALTHY: discord.Color.green(),
|
||||
HealthStatus.DEGRADED: discord.Color.yellow(),
|
||||
HealthStatus.UNHEALTHY: discord.Color.red(),
|
||||
}
|
||||
color = color_map.get(metrics.health_status, discord.Color.grey())
|
||||
|
||||
# Format uptime
|
||||
uptime_hours = metrics.uptime_seconds / 3600
|
||||
if uptime_hours >= 24:
|
||||
uptime_str = f"{uptime_hours / 24:.1f} days"
|
||||
elif uptime_hours >= 1:
|
||||
uptime_str = f"{uptime_hours:.1f} hours"
|
||||
else:
|
||||
uptime_str = f"{metrics.uptime_seconds / 60:.1f} minutes"
|
||||
|
||||
# Create embed
|
||||
embed = discord.Embed(
|
||||
title="Bot Status Report",
|
||||
color=color,
|
||||
)
|
||||
|
||||
# Status field with emoji
|
||||
status_emoji = {
|
||||
HealthStatus.HEALTHY: "🟢",
|
||||
HealthStatus.DEGRADED: "🟡",
|
||||
HealthStatus.UNHEALTHY: "🔴",
|
||||
}
|
||||
embed.add_field(
|
||||
name="Status",
|
||||
value=f"{status_emoji.get(metrics.health_status, '⚪')} {metrics.health_status.value.upper()}",
|
||||
inline=True,
|
||||
)
|
||||
|
||||
embed.add_field(name="Uptime", value=uptime_str, inline=True)
|
||||
embed.add_field(name="Guilds", value=str(len(self.bot.guilds)), inline=True)
|
||||
|
||||
# Request metrics
|
||||
embed.add_field(
|
||||
name="Requests",
|
||||
value=f"Total: {metrics.total_requests}\n"
|
||||
f"Success: {metrics.successful_requests}\n"
|
||||
f"Failed: {metrics.failed_requests}",
|
||||
inline=True,
|
||||
)
|
||||
|
||||
embed.add_field(
|
||||
name="Performance",
|
||||
value=f"Avg response: {metrics.avg_response_time_ms:.0f}ms\n"
|
||||
f"Req/min: {metrics.requests_per_minute:.1f}\n"
|
||||
f"Error rate: {metrics.error_rate_percent:.1f}%",
|
||||
inline=True,
|
||||
)
|
||||
|
||||
# Last activity
|
||||
last_success = "Never"
|
||||
if metrics.last_successful_request:
|
||||
last_success = metrics.last_successful_request.strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||
|
||||
last_error = "None"
|
||||
if metrics.last_error:
|
||||
last_error = metrics.last_error.strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||
|
||||
embed.add_field(
|
||||
name="Last Activity",
|
||||
value=f"Success: {last_success}\nError: {last_error}",
|
||||
inline=False,
|
||||
)
|
||||
|
||||
# Recent errors (if any)
|
||||
if metrics.recent_errors:
|
||||
error_lines = []
|
||||
for error in metrics.recent_errors[-3:]: # Last 3 errors
|
||||
time_str = error.timestamp.strftime("%H:%M:%S")
|
||||
error_lines.append(f"`{time_str}` **{error.error_type}**: {error.message[:50]}...")
|
||||
embed.add_field(
|
||||
name="Recent Errors",
|
||||
value="\n".join(error_lines),
|
||||
inline=False,
|
||||
)
|
||||
|
||||
await ctx.send(embed=embed)
|
||||
|
||||
@commands.command(name="health")
|
||||
async def health_command(self, ctx: commands.Context) -> None:
|
||||
"""Quick health check (anyone can use)."""
|
||||
monitor = get_monitor()
|
||||
metrics = monitor.get_metrics()
|
||||
|
||||
status_emoji = {
|
||||
HealthStatus.HEALTHY: "🟢",
|
||||
HealthStatus.DEGRADED: "🟡",
|
||||
HealthStatus.UNHEALTHY: "🔴",
|
||||
}
|
||||
|
||||
emoji = status_emoji.get(metrics.health_status, "⚪")
|
||||
status = metrics.health_status.value
|
||||
|
||||
# Format uptime
|
||||
uptime_hours = metrics.uptime_seconds / 3600
|
||||
if uptime_hours >= 1:
|
||||
uptime_str = f"{uptime_hours:.1f}h"
|
||||
else:
|
||||
uptime_str = f"{metrics.uptime_seconds / 60:.0f}m"
|
||||
|
||||
await ctx.send(
|
||||
f"{emoji} **{status.upper()}** | Uptime: {uptime_str} | Requests: {metrics.total_requests}"
|
||||
)
|
||||
|
||||
@status_command.error
|
||||
async def status_error(self, ctx: commands.Context, error: commands.CommandError) -> None:
|
||||
"""Handle status command errors."""
|
||||
if isinstance(error, commands.MissingPermissions):
|
||||
await ctx.send("You need administrator permissions to view detailed status.")
|
||||
else:
|
||||
logger.error(f"Status command error: {error}")
|
||||
|
||||
|
||||
async def setup(bot: commands.Bot) -> None:
|
||||
"""Load the Status cog."""
|
||||
await bot.add_cog(StatusCog(bot))
|
||||
Reference in New Issue
Block a user