Add image and GIF vision support

- Add ImageAttachment dataclass for image metadata
- Update Message to support list of image attachments
- Update all providers (OpenAI, Anthropic, Gemini, OpenRouter) for vision
- Extract images from Discord attachments and embeds in ai_chat.py
- Supports PNG, JPEG, GIF, and WebP formats
This commit is contained in:
2026-01-11 20:56:50 +01:00
parent 8f521b869b
commit 4ac123be9c
8 changed files with 187 additions and 12 deletions

View File

@@ -7,7 +7,13 @@ import discord
from discord.ext import commands
from daemon_boyfriend.config import settings
from daemon_boyfriend.services import AIService, ConversationManager, Message, SearXNGService
from daemon_boyfriend.services import (
AIService,
ConversationManager,
ImageAttachment,
Message,
SearXNGService,
)
logger = logging.getLogger(__name__)
@@ -125,6 +131,65 @@ class AIChatCog(commands.Cog):
return content.strip()
def _extract_image_attachments(self, message: discord.Message) -> list[ImageAttachment]:
"""Extract image attachments from a Discord message.
Args:
message: The Discord message
Returns:
List of ImageAttachment objects
"""
images = []
# Supported image types
image_types = {
"image/png": "image/png",
"image/jpeg": "image/jpeg",
"image/jpg": "image/jpeg",
"image/gif": "image/gif",
"image/webp": "image/webp",
}
# Check message attachments
for attachment in message.attachments:
content_type = attachment.content_type or ""
if content_type in image_types:
images.append(
ImageAttachment(
url=attachment.url,
media_type=image_types[content_type],
)
)
# Also check by file extension if content_type not set
elif attachment.filename:
ext = attachment.filename.lower().split(".")[-1]
if ext in ("png", "jpg", "jpeg", "gif", "webp"):
media_type = f"image/{ext}" if ext != "jpg" else "image/jpeg"
images.append(
ImageAttachment(
url=attachment.url,
media_type=media_type,
)
)
# Check embeds for images
for embed in message.embeds:
if embed.image and embed.image.url:
# Guess media type from URL
url = embed.image.url.lower()
media_type = "image/png" # default
if ".jpg" in url or ".jpeg" in url:
media_type = "image/jpeg"
elif ".gif" in url:
media_type = "image/gif"
elif ".webp" in url:
media_type = "image/webp"
images.append(ImageAttachment(url=embed.image.url, media_type=media_type))
logger.debug(f"Extracted {len(images)} images from message")
return images
def _get_mentioned_users_context(self, message: discord.Message) -> str | None:
"""Get context about mentioned users (excluding the bot).
@@ -178,8 +243,12 @@ class AIChatCog(commands.Cog):
# Get conversation history
history = self.conversations.get_history(user_id)
# Add current message to history for the API call
messages = history + [Message(role="user", content=user_message)]
# Extract any image attachments from the message
images = self._extract_image_attachments(message)
# Add current message to history for the API call (with images if any)
current_message = Message(role="user", content=user_message, images=images)
messages = history + [current_message]
# Check if we should search the web
search_context = await self._maybe_search(user_message)