Add NSFW-only filtering mode for content moderation

- Add nsfw_only_filtering field to GuildSettings model - Create database migration for new field (20260124_add_nsfw_only_filtering) - Update AI moderation logic to respect NSFW-only mode - Add Discord command !ai nsfwonly <true/false> for toggling mode - Implement filtering logic in image analysis for both attachments and embeds - Add comprehensive test suite for new functionality - Update documentation with usage examples and feature description - Create dedicated CI workflow for testing NSFW-only filtering feature When enabled, only sexual/nude content is filtered while allowing: - Violence and gore - Harassment and bullying - Hate speech - Self-harm content - Other content categories This mode is useful for gaming communities and mature discussion servers that have specific content policies allowing violence but prohibiting sexual material.
2026-01-24 23:51:10 +01:00
parent 824dd681f7
commit 1250b5573c
6 changed files with 748 additions and 14 deletions
--- a/tests/test_nsfw_only_filtering.py
+++ b/tests/test_nsfw_only_filtering.py
@@ -0,0 +1,403 @@
+"""Tests for NSFW-only filtering functionality."""
+
+import pytest
+from unittest.mock import AsyncMock, MagicMock
+
+from guardden.models.guild import GuildSettings
+from guardden.services.ai.base import ContentCategory, ModerationResult, ImageAnalysisResult
+from guardden.cogs.ai_moderation import AIModeration
+
+
+class TestNSFWOnlyFiltering:
+    """Tests for NSFW-only filtering mode."""
+
+    @pytest.fixture
+    def mock_bot(self):
+        """Create a mock bot instance."""
+        bot = MagicMock()
+        bot.user.id = 123456789
+        bot.user.__str__ = MagicMock(return_value="GuardDen")
+        bot.database.session.return_value.__aenter__ = AsyncMock()
+        bot.database.session.return_value.__aexit__ = AsyncMock()
+        bot.database.session.return_value.add = MagicMock()
+        return bot
+
+    @pytest.fixture
+    def ai_moderation(self, mock_bot):
+        """Create an AIModeration instance with mocked bot."""
+        return AIModeration(mock_bot)
+
+    @pytest.fixture
+    def mock_message(self):
+        """Create a mock Discord message."""
+        message = MagicMock()
+        message.id = 987654321
+        message.content = "Test message content"
+        message.guild.id = 111222333
+        message.guild.name = "Test Guild"
+        message.channel.id = 444555666
+        message.channel.name = "general"
+        message.author.id = 777888999
+        message.author.__str__ = MagicMock(return_value="TestUser")
+        message.author.display_avatar.url = "https://example.com/avatar.png"
+        message.delete = AsyncMock()
+        message.author.send = AsyncMock()
+        return message
+
+    @pytest.fixture
+    def guild_config_normal(self):
+        """Guild config with normal filtering (NSFW-only disabled)."""
+        config = MagicMock()
+        config.ai_moderation_enabled = True
+        config.nsfw_detection_enabled = True
+        config.nsfw_only_filtering = False
+        config.ai_sensitivity = 80
+        config.ai_confidence_threshold = 0.7
+        config.ai_log_only = False
+        config.mod_log_channel_id = None
+        return config
+
+    @pytest.fixture
+    def guild_config_nsfw_only(self):
+        """Guild config with NSFW-only filtering enabled."""
+        config = MagicMock()
+        config.ai_moderation_enabled = True
+        config.nsfw_detection_enabled = True
+        config.nsfw_only_filtering = True
+        config.ai_sensitivity = 80
+        config.ai_confidence_threshold = 0.7
+        config.ai_log_only = False
+        config.mod_log_channel_id = None
+        return config
+
+    @pytest.mark.asyncio
+    async def test_normal_mode_blocks_violence(self, ai_moderation, mock_message, guild_config_normal, mock_bot):
+        """Test that normal mode blocks violence content."""
+        mock_bot.guild_config.get_config.return_value = guild_config_normal
+        
+        # Create a moderation result with violence category
+        result = ModerationResult(
+            is_flagged=True,
+            confidence=0.9,
+            categories=[ContentCategory.VIOLENCE],
+            explanation="Violence detected",
+            suggested_action="delete"
+        )
+
+        # Mock the message delete to track if it was called
+        await ai_moderation._handle_ai_result(mock_message, result, "Text Analysis")
+        
+        # In normal mode, violent content should be deleted
+        mock_message.delete.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_nsfw_only_mode_ignores_violence(self, ai_moderation, mock_message, guild_config_nsfw_only, mock_bot):
+        """Test that NSFW-only mode ignores violence content."""
+        mock_bot.guild_config.get_config.return_value = guild_config_nsfw_only
+        
+        # Create a moderation result with violence category
+        result = ModerationResult(
+            is_flagged=True,
+            confidence=0.9,
+            categories=[ContentCategory.VIOLENCE],
+            explanation="Violence detected",
+            suggested_action="delete"
+        )
+
+        # Mock the message delete to track if it was called
+        await ai_moderation._handle_ai_result(mock_message, result, "Text Analysis")
+        
+        # In NSFW-only mode, violent content should NOT be deleted
+        mock_message.delete.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_nsfw_only_mode_blocks_sexual_content(self, ai_moderation, mock_message, guild_config_nsfw_only, mock_bot):
+        """Test that NSFW-only mode still blocks sexual content."""
+        mock_bot.guild_config.get_config.return_value = guild_config_nsfw_only
+        
+        # Create a moderation result with sexual category
+        result = ModerationResult(
+            is_flagged=True,
+            confidence=0.9,
+            categories=[ContentCategory.SEXUAL],
+            explanation="Sexual content detected",
+            suggested_action="delete"
+        )
+
+        await ai_moderation._handle_ai_result(mock_message, result, "Text Analysis")
+        
+        # In NSFW-only mode, sexual content should still be deleted
+        mock_message.delete.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_nsfw_only_mode_ignores_harassment(self, ai_moderation, mock_message, guild_config_nsfw_only, mock_bot):
+        """Test that NSFW-only mode ignores harassment content."""
+        mock_bot.guild_config.get_config.return_value = guild_config_nsfw_only
+        
+        # Create a moderation result with harassment category
+        result = ModerationResult(
+            is_flagged=True,
+            confidence=0.9,
+            categories=[ContentCategory.HARASSMENT],
+            explanation="Harassment detected",
+            suggested_action="warn"
+        )
+
+        await ai_moderation._handle_ai_result(mock_message, result, "Text Analysis")
+        
+        # In NSFW-only mode, harassment content should be ignored
+        mock_message.delete.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_nsfw_only_mode_ignores_hate_speech(self, ai_moderation, mock_message, guild_config_nsfw_only, mock_bot):
+        """Test that NSFW-only mode ignores hate speech content."""
+        mock_bot.guild_config.get_config.return_value = guild_config_nsfw_only
+        
+        # Create a moderation result with hate speech category
+        result = ModerationResult(
+            is_flagged=True,
+            confidence=0.9,
+            categories=[ContentCategory.HATE_SPEECH],
+            explanation="Hate speech detected",
+            suggested_action="delete"
+        )
+
+        await ai_moderation._handle_ai_result(mock_message, result, "Text Analysis")
+        
+        # In NSFW-only mode, hate speech content should be ignored
+        mock_message.delete.assert_not_called()
+
+    @pytest.mark.asyncio 
+    async def test_nsfw_only_mode_ignores_self_harm(self, ai_moderation, mock_message, guild_config_nsfw_only, mock_bot):
+        """Test that NSFW-only mode ignores self-harm content."""
+        mock_bot.guild_config.get_config.return_value = guild_config_nsfw_only
+        
+        # Create a moderation result with self-harm category
+        result = ModerationResult(
+            is_flagged=True,
+            confidence=0.9,
+            categories=[ContentCategory.SELF_HARM],
+            explanation="Self-harm content detected",
+            suggested_action="delete"
+        )
+
+        await ai_moderation._handle_ai_result(mock_message, result, "Text Analysis")
+        
+        # In NSFW-only mode, self-harm content should be ignored
+        mock_message.delete.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_nsfw_only_mode_mixed_categories_blocks_only_sexual(self, ai_moderation, mock_message, guild_config_nsfw_only, mock_bot):
+        """Test that NSFW-only mode with mixed categories only blocks if sexual content is present."""
+        mock_bot.guild_config.get_config.return_value = guild_config_nsfw_only
+        
+        # Create a moderation result with both sexual and violence categories
+        result = ModerationResult(
+            is_flagged=True,
+            confidence=0.9,
+            categories=[ContentCategory.SEXUAL, ContentCategory.VIOLENCE],
+            explanation="Sexual and violent content detected",
+            suggested_action="delete"
+        )
+
+        await ai_moderation._handle_ai_result(mock_message, result, "Text Analysis")
+        
+        # Should still be deleted because sexual content is present
+        mock_message.delete.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_nsfw_only_mode_image_analysis_nsfw_flagged(self, ai_moderation, mock_bot, guild_config_nsfw_only):
+        """Test that NSFW-only mode flags NSFW images."""
+        mock_bot.guild_config.get_config.return_value = guild_config_nsfw_only
+        
+        # Mock image analysis result with NSFW content
+        image_result = ImageAnalysisResult(
+            is_nsfw=True,
+            is_violent=True,  # Also violent, but should be ignored in NSFW-only mode
+            is_disturbing=False,
+            confidence=0.9,
+            description="NSFW image with violence",
+            categories=["sexual", "violence"],
+            nsfw_category="explicit",
+            nsfw_severity=85
+        )
+
+        # Test the filtering logic by directly checking what gets flagged
+        should_flag_image = False
+        categories = []
+        
+        if guild_config_nsfw_only.nsfw_only_filtering:
+            # In NSFW-only mode, only flag sexual content
+            if image_result.is_nsfw:
+                should_flag_image = True
+                categories.append(ContentCategory.SEXUAL)
+        
+        assert should_flag_image is True
+        assert ContentCategory.SEXUAL in categories
+        assert ContentCategory.VIOLENCE not in categories
+
+    @pytest.mark.asyncio
+    async def test_nsfw_only_mode_image_analysis_violence_ignored(self, ai_moderation, mock_bot, guild_config_nsfw_only):
+        """Test that NSFW-only mode ignores violent images without sexual content."""
+        mock_bot.guild_config.get_config.return_value = guild_config_nsfw_only
+        
+        # Mock image analysis result with only violence (no NSFW)
+        image_result = ImageAnalysisResult(
+            is_nsfw=False,
+            is_violent=True,
+            is_disturbing=True,
+            confidence=0.9,
+            description="Violent image without sexual content",
+            categories=["violence"],
+            nsfw_category="none",
+            nsfw_severity=0
+        )
+
+        # Test the filtering logic
+        should_flag_image = False
+        categories = []
+        
+        if guild_config_nsfw_only.nsfw_only_filtering:
+            # In NSFW-only mode, only flag sexual content
+            if image_result.is_nsfw:
+                should_flag_image = True
+                categories.append(ContentCategory.SEXUAL)
+        
+        assert should_flag_image is False
+        assert categories == []
+
+    @pytest.mark.asyncio
+    async def test_normal_mode_image_analysis_flags_all(self, ai_moderation, mock_bot, guild_config_normal):
+        """Test that normal mode flags all inappropriate image content."""
+        mock_bot.guild_config.get_config.return_value = guild_config_normal
+        
+        # Mock image analysis result with violence only
+        image_result = ImageAnalysisResult(
+            is_nsfw=False,
+            is_violent=True,
+            is_disturbing=True,
+            confidence=0.9,
+            description="Violent image",
+            categories=["violence"],
+            nsfw_category="none",
+            nsfw_severity=0
+        )
+
+        # Test the filtering logic for normal mode
+        should_flag_image = False
+        categories = []
+        
+        if not guild_config_normal.nsfw_only_filtering:
+            # Normal mode: flag all inappropriate content
+            if image_result.is_nsfw:
+                should_flag_image = True
+                categories.append(ContentCategory.SEXUAL)
+            if image_result.is_violent:
+                should_flag_image = True
+                categories.append(ContentCategory.VIOLENCE)
+            if image_result.is_disturbing:
+                should_flag_image = True
+        
+        assert should_flag_image is True
+        assert ContentCategory.VIOLENCE in categories
+
+    def test_guild_settings_model_has_nsfw_only_filtering(self):
+        """Test that GuildSettings model includes the nsfw_only_filtering field."""
+        # This test ensures the database model was updated correctly
+        # Note: This is a basic check, more comprehensive DB tests would require actual DB setup
+        assert hasattr(GuildSettings, "nsfw_only_filtering")
+
+
+class TestNSFWOnlyFilteringCommands:
+    """Tests for NSFW-only filtering Discord commands."""
+
+    @pytest.fixture
+    def mock_ctx(self):
+        """Create a mock Discord context."""
+        ctx = MagicMock()
+        ctx.guild.id = 111222333
+        ctx.guild.name = "Test Guild"
+        ctx.send = AsyncMock()
+        return ctx
+
+    @pytest.fixture
+    def mock_bot_with_config(self):
+        """Create a mock bot with guild config service."""
+        bot = MagicMock()
+        bot.guild_config.update_settings = AsyncMock()
+        return bot
+
+    @pytest.fixture
+    def ai_moderation_with_bot(self, mock_bot_with_config):
+        """Create an AIModeration instance with mocked bot."""
+        return AIModeration(mock_bot_with_config)
+
+    @pytest.mark.asyncio
+    async def test_nsfw_only_command_enable(self, ai_moderation_with_bot, mock_ctx, mock_bot_with_config):
+        """Test the !ai nsfwonly true command."""
+        await ai_moderation_with_bot.ai_nsfw_only(mock_ctx, True)
+        
+        # Verify the setting was updated
+        mock_bot_with_config.guild_config.update_settings.assert_called_once_with(
+            mock_ctx.guild.id, 
+            nsfw_only_filtering=True
+        )
+        
+        # Verify response was sent
+        mock_ctx.send.assert_called_once()
+        
+        # Check that the response contains the warning about what will be allowed
+        call_args = mock_ctx.send.call_args[0][0]  # Get the embed argument
+        assert "NSFW-Only Mode Enabled" in str(call_args.title)
+
+    @pytest.mark.asyncio
+    async def test_nsfw_only_command_disable(self, ai_moderation_with_bot, mock_ctx, mock_bot_with_config):
+        """Test the !ai nsfwonly false command."""
+        await ai_moderation_with_bot.ai_nsfw_only(mock_ctx, False)
+        
+        # Verify the setting was updated
+        mock_bot_with_config.guild_config.update_settings.assert_called_once_with(
+            mock_ctx.guild.id, 
+            nsfw_only_filtering=False
+        )
+        
+        # Verify response was sent
+        mock_ctx.send.assert_called_once()
+        
+        # Check that the response confirms normal filtering is restored
+        call_args = mock_ctx.send.call_args[0][0]  # Get the embed argument
+        assert "NSFW-Only Mode Disabled" in str(call_args.title)
+
+    @pytest.mark.asyncio
+    async def test_ai_settings_display_includes_nsfw_only_mode(self, ai_moderation_with_bot, mock_ctx, mock_bot_with_config):
+        """Test that !ai command shows NSFW-only mode status."""
+        # Mock the config response
+        config = MagicMock()
+        config.ai_moderation_enabled = True
+        config.nsfw_detection_enabled = True
+        config.nsfw_only_filtering = True  # Enable NSFW-only mode
+        config.ai_sensitivity = 80
+        config.ai_confidence_threshold = 0.7
+        config.ai_log_only = False
+        
+        mock_bot_with_config.guild_config.get_config.return_value = config
+        mock_bot_with_config.settings.ai_provider = "openai"
+        
+        await ai_moderation_with_bot.ai_cmd(mock_ctx)
+        
+        # Verify that the embed was sent
+        mock_ctx.send.assert_called_once()
+        
+        # Check that NSFW-Only Mode field is in the embed
+        call_args = mock_ctx.send.call_args[0][0]  # Get the embed argument
+        embed_dict = call_args.to_dict()
+        
+        # Look for the NSFW-Only Mode field
+        nsfw_only_field = None
+        for field in embed_dict.get("fields", []):
+            if field.get("name") == "NSFW-Only Mode":
+                nsfw_only_field = field
+                break
+        
+        assert nsfw_only_field is not None
+        assert "✅ Enabled" in nsfw_only_field.get("value", "")