Add monitoring, metrics tracking, and enhanced logging
- Add BotMonitor class for tracking requests, errors, and health status - Track request success/failure, response times, error rates - Add !status command (admin) for detailed metrics with embed - Add !health command (anyone) for quick health check - Add rotating file logging support (LOG_FILE config) - Auto-detect unhealthy status when errors exceed threshold - Record errors with timestamps and context for debugging
This commit is contained in:
@@ -14,6 +14,7 @@ from daemon_boyfriend.services import (
|
||||
Message,
|
||||
SearXNGService,
|
||||
)
|
||||
from daemon_boyfriend.utils import get_monitor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -101,6 +102,9 @@ class AIChatCog(commands.Cog):
|
||||
return
|
||||
|
||||
# Show typing indicator while generating response
|
||||
monitor = get_monitor()
|
||||
start_time = monitor.record_request_start()
|
||||
|
||||
async with message.channel.typing():
|
||||
try:
|
||||
response_text = await self._generate_response(message, content)
|
||||
@@ -132,7 +136,12 @@ class AIChatCog(commands.Cog):
|
||||
for img_url in remaining_images:
|
||||
await message.channel.send(embed=self._create_image_embed(img_url))
|
||||
|
||||
# Record successful request
|
||||
monitor.record_request_success(start_time)
|
||||
|
||||
except Exception as e:
|
||||
# Record failed request
|
||||
monitor.record_request_failure(start_time, e, context="on_message")
|
||||
logger.error(f"Mention response error: {e}", exc_info=True)
|
||||
error_message = self._get_error_message(e)
|
||||
await message.reply(error_message)
|
||||
|
||||
147
src/daemon_boyfriend/cogs/status.py
Normal file
147
src/daemon_boyfriend/cogs/status.py
Normal file
@@ -0,0 +1,147 @@
|
||||
"""Status cog - provides bot health and metrics commands."""
|
||||
|
||||
import logging
|
||||
|
||||
import discord
|
||||
from discord.ext import commands
|
||||
|
||||
from daemon_boyfriend.utils import HealthStatus, get_monitor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class StatusCog(commands.Cog):
|
||||
"""Bot status and monitoring commands."""
|
||||
|
||||
def __init__(self, bot: commands.Bot) -> None:
|
||||
self.bot = bot
|
||||
|
||||
@commands.command(name="status")
|
||||
@commands.has_permissions(administrator=True)
|
||||
async def status_command(self, ctx: commands.Context) -> None:
|
||||
"""Show bot status and metrics (admin only)."""
|
||||
monitor = get_monitor()
|
||||
metrics = monitor.get_metrics()
|
||||
|
||||
# Choose embed color based on health status
|
||||
color_map = {
|
||||
HealthStatus.HEALTHY: discord.Color.green(),
|
||||
HealthStatus.DEGRADED: discord.Color.yellow(),
|
||||
HealthStatus.UNHEALTHY: discord.Color.red(),
|
||||
}
|
||||
color = color_map.get(metrics.health_status, discord.Color.grey())
|
||||
|
||||
# Format uptime
|
||||
uptime_hours = metrics.uptime_seconds / 3600
|
||||
if uptime_hours >= 24:
|
||||
uptime_str = f"{uptime_hours / 24:.1f} days"
|
||||
elif uptime_hours >= 1:
|
||||
uptime_str = f"{uptime_hours:.1f} hours"
|
||||
else:
|
||||
uptime_str = f"{metrics.uptime_seconds / 60:.1f} minutes"
|
||||
|
||||
# Create embed
|
||||
embed = discord.Embed(
|
||||
title="Bot Status Report",
|
||||
color=color,
|
||||
)
|
||||
|
||||
# Status field with emoji
|
||||
status_emoji = {
|
||||
HealthStatus.HEALTHY: "🟢",
|
||||
HealthStatus.DEGRADED: "🟡",
|
||||
HealthStatus.UNHEALTHY: "🔴",
|
||||
}
|
||||
embed.add_field(
|
||||
name="Status",
|
||||
value=f"{status_emoji.get(metrics.health_status, '⚪')} {metrics.health_status.value.upper()}",
|
||||
inline=True,
|
||||
)
|
||||
|
||||
embed.add_field(name="Uptime", value=uptime_str, inline=True)
|
||||
embed.add_field(name="Guilds", value=str(len(self.bot.guilds)), inline=True)
|
||||
|
||||
# Request metrics
|
||||
embed.add_field(
|
||||
name="Requests",
|
||||
value=f"Total: {metrics.total_requests}\n"
|
||||
f"Success: {metrics.successful_requests}\n"
|
||||
f"Failed: {metrics.failed_requests}",
|
||||
inline=True,
|
||||
)
|
||||
|
||||
embed.add_field(
|
||||
name="Performance",
|
||||
value=f"Avg response: {metrics.avg_response_time_ms:.0f}ms\n"
|
||||
f"Req/min: {metrics.requests_per_minute:.1f}\n"
|
||||
f"Error rate: {metrics.error_rate_percent:.1f}%",
|
||||
inline=True,
|
||||
)
|
||||
|
||||
# Last activity
|
||||
last_success = "Never"
|
||||
if metrics.last_successful_request:
|
||||
last_success = metrics.last_successful_request.strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||
|
||||
last_error = "None"
|
||||
if metrics.last_error:
|
||||
last_error = metrics.last_error.strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||
|
||||
embed.add_field(
|
||||
name="Last Activity",
|
||||
value=f"Success: {last_success}\nError: {last_error}",
|
||||
inline=False,
|
||||
)
|
||||
|
||||
# Recent errors (if any)
|
||||
if metrics.recent_errors:
|
||||
error_lines = []
|
||||
for error in metrics.recent_errors[-3:]: # Last 3 errors
|
||||
time_str = error.timestamp.strftime("%H:%M:%S")
|
||||
error_lines.append(f"`{time_str}` **{error.error_type}**: {error.message[:50]}...")
|
||||
embed.add_field(
|
||||
name="Recent Errors",
|
||||
value="\n".join(error_lines),
|
||||
inline=False,
|
||||
)
|
||||
|
||||
await ctx.send(embed=embed)
|
||||
|
||||
@commands.command(name="health")
|
||||
async def health_command(self, ctx: commands.Context) -> None:
|
||||
"""Quick health check (anyone can use)."""
|
||||
monitor = get_monitor()
|
||||
metrics = monitor.get_metrics()
|
||||
|
||||
status_emoji = {
|
||||
HealthStatus.HEALTHY: "🟢",
|
||||
HealthStatus.DEGRADED: "🟡",
|
||||
HealthStatus.UNHEALTHY: "🔴",
|
||||
}
|
||||
|
||||
emoji = status_emoji.get(metrics.health_status, "⚪")
|
||||
status = metrics.health_status.value
|
||||
|
||||
# Format uptime
|
||||
uptime_hours = metrics.uptime_seconds / 3600
|
||||
if uptime_hours >= 1:
|
||||
uptime_str = f"{uptime_hours:.1f}h"
|
||||
else:
|
||||
uptime_str = f"{metrics.uptime_seconds / 60:.0f}m"
|
||||
|
||||
await ctx.send(
|
||||
f"{emoji} **{status.upper()}** | Uptime: {uptime_str} | Requests: {metrics.total_requests}"
|
||||
)
|
||||
|
||||
@status_command.error
|
||||
async def status_error(self, ctx: commands.Context, error: commands.CommandError) -> None:
|
||||
"""Handle status command errors."""
|
||||
if isinstance(error, commands.MissingPermissions):
|
||||
await ctx.send("You need administrator permissions to view detailed status.")
|
||||
else:
|
||||
logger.error(f"Status command error: {error}")
|
||||
|
||||
|
||||
async def setup(bot: commands.Bot) -> None:
|
||||
"""Load the Status cog."""
|
||||
await bot.add_cog(StatusCog(bot))
|
||||
Reference in New Issue
Block a user