Add NSFW-only filtering mode for content moderation

- Add nsfw_only_filtering field to GuildSettings model - Create database migration for new field (20260124_add_nsfw_only_filtering) - Update AI moderation logic to respect NSFW-only mode - Add Discord command !ai nsfwonly <true/false> for toggling mode - Implement filtering logic in image analysis for both attachments and embeds - Add comprehensive test suite for new functionality - Update documentation with usage examples and feature description - Create dedicated CI workflow for testing NSFW-only filtering feature When enabled, only sexual/nude content is filtered while allowing: - Violence and gore - Harassment and bullying - Hate speech - Self-harm content - Other content categories This mode is useful for gaming communities and mature discussion servers that have specific content policies allowing violence but prohibiting sexual material.
2026-01-24 23:51:10 +01:00
parent 824dd681f7
commit 1250b5573c
6 changed files with 748 additions and 14 deletions
--- a/src/guardden/cogs/ai_moderation.py
+++ b/src/guardden/cogs/ai_moderation.py
@@ -93,6 +93,16 @@ class AIModeration(commands.Cog):
        if not config:
            return

+        # Check NSFW-only filtering mode
+        if config.nsfw_only_filtering:
+            # Only process SEXUAL content when NSFW-only mode is enabled
+            if ContentCategory.SEXUAL not in result.categories:
+                logger.debug(
+                    "NSFW-only mode enabled, ignoring non-sexual content: categories=%s",
+                    [cat.value for cat in result.categories],
+                )
+                return
+
        # Check if severity meets threshold based on sensitivity
        # Higher sensitivity = lower threshold needed to trigger
        threshold = 100 - config.ai_sensitivity  # e.g., sensitivity 70 = threshold 30
@@ -315,17 +325,27 @@ class AIModeration(commands.Cog):
                        f"severity={image_result.nsfw_severity}, violent={image_result.is_violent}, conf={image_result.confidence}"
                    )

-                    if (
-                        image_result.is_nsfw
-                        or image_result.is_violent
-                        or image_result.is_disturbing
-                    ):
-                        # Convert to ModerationResult format
-                        categories = []
+                    # Filter based on NSFW-only mode setting
+                    should_flag_image = False
+                    categories = []
+                    
+                    if config.nsfw_only_filtering:
+                        # In NSFW-only mode, only flag sexual content
                        if image_result.is_nsfw:
+                            should_flag_image = True
+                            categories.append(ContentCategory.SEXUAL)
+                    else:
+                        # Normal mode: flag all inappropriate content
+                        if image_result.is_nsfw:
+                            should_flag_image = True
                            categories.append(ContentCategory.SEXUAL)
                        if image_result.is_violent:
+                            should_flag_image = True
                            categories.append(ContentCategory.VIOLENCE)
+                        if image_result.is_disturbing:
+                            should_flag_image = True
+
+                    if should_flag_image:

                        # Use nsfw_severity if available, otherwise use None for default calculation
                        severity_override = (
@@ -373,16 +393,27 @@ class AIModeration(commands.Cog):
                        f"severity={image_result.nsfw_severity}, violent={image_result.is_violent}, conf={image_result.confidence}"
                    )

-                    if (
-                        image_result.is_nsfw
-                        or image_result.is_violent
-                        or image_result.is_disturbing
-                    ):
-                        categories = []
+                    # Filter based on NSFW-only mode setting
+                    should_flag_image = False
+                    categories = []
+                    
+                    if config.nsfw_only_filtering:
+                        # In NSFW-only mode, only flag sexual content
                        if image_result.is_nsfw:
+                            should_flag_image = True
+                            categories.append(ContentCategory.SEXUAL)
+                    else:
+                        # Normal mode: flag all inappropriate content
+                        if image_result.is_nsfw:
+                            should_flag_image = True
                            categories.append(ContentCategory.SEXUAL)
                        if image_result.is_violent:
+                            should_flag_image = True
                            categories.append(ContentCategory.VIOLENCE)
+                        if image_result.is_disturbing:
+                            should_flag_image = True
+
+                    if should_flag_image:

                        # Use nsfw_severity if available, otherwise use None for default calculation
                        severity_override = (
@@ -465,6 +496,11 @@ class AIModeration(commands.Cog):
            value="✅ Enabled" if config and config.ai_log_only else "❌ Disabled",
            inline=True,
        )
+        embed.add_field(
+            name="NSFW-Only Mode",
+            value="✅ Enabled" if config and config.nsfw_only_filtering else "❌ Disabled",
+            inline=True,
+        )
        embed.add_field(
            name="AI Provider",
            value=self.bot.settings.ai_provider.capitalize(),
@@ -537,6 +573,46 @@ class AIModeration(commands.Cog):
        status = "enabled" if enabled else "disabled"
        await ctx.send(f"NSFW detection {status}.")

+    @ai_cmd.command(name="nsfwonly")
+    @commands.has_permissions(administrator=True)
+    @commands.guild_only()
+    async def ai_nsfw_only(self, ctx: commands.Context, enabled: bool) -> None:
+        """Enable or disable NSFW-only filtering mode.
+        
+        When enabled, only sexual/nude content will be filtered.
+        Violence, harassment, and other content types will be allowed.
+        """
+        await self.bot.guild_config.update_settings(ctx.guild.id, nsfw_only_filtering=enabled)
+        status = "enabled" if enabled else "disabled"
+        
+        if enabled:
+            embed = discord.Embed(
+                title="NSFW-Only Mode Enabled",
+                description="⚠️ **Important:** Only sexual and nude content will now be filtered.\n"
+                           "Violence, harassment, hate speech, and other content types will be **allowed**.",
+                color=discord.Color.orange(),
+            )
+            embed.add_field(
+                name="What will be filtered:",
+                value="• Sexual content\n• Nude images\n• Explicit material",
+                inline=True,
+            )
+            embed.add_field(
+                name="What will be allowed:",
+                value="• Violence and gore\n• Harassment\n• Hate speech\n• Self-harm content",
+                inline=True,
+            )
+            embed.set_footer(text="Use '!ai nsfwonly false' to return to normal filtering")
+        else:
+            embed = discord.Embed(
+                title="NSFW-Only Mode Disabled",
+                description="✅ Normal content filtering restored.\n"
+                           "All inappropriate content types will now be filtered.",
+                color=discord.Color.green(),
+            )
+        
+        await ctx.send(embed=embed)
+
    @ai_cmd.command(name="analyze")
    @commands.has_permissions(administrator=True)
    @commands.guild_only()
--- a/src/guardden/models/guild.py
+++ b/src/guardden/models/guild.py
@@ -97,6 +97,7 @@ class GuildSettings(Base, TimestampMixin):
    ai_confidence_threshold: Mapped[float] = mapped_column(Float, default=0.7, nullable=False)
    ai_log_only: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
    nsfw_detection_enabled: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False)
+    nsfw_only_filtering: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)

    # Verification settings
    verification_enabled: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)