Add image and GIF vision support
- Add ImageAttachment dataclass for image metadata - Update Message to support list of image attachments - Update all providers (OpenAI, Anthropic, Gemini, OpenRouter) for vision - Extract images from Discord attachments and embeds in ai_chat.py - Supports PNG, JPEG, GIF, and WebP formats
This commit is contained in:
@@ -7,7 +7,13 @@ import discord
|
||||
from discord.ext import commands
|
||||
|
||||
from daemon_boyfriend.config import settings
|
||||
from daemon_boyfriend.services import AIService, ConversationManager, Message, SearXNGService
|
||||
from daemon_boyfriend.services import (
|
||||
AIService,
|
||||
ConversationManager,
|
||||
ImageAttachment,
|
||||
Message,
|
||||
SearXNGService,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -125,6 +131,65 @@ class AIChatCog(commands.Cog):
|
||||
|
||||
return content.strip()
|
||||
|
||||
def _extract_image_attachments(self, message: discord.Message) -> list[ImageAttachment]:
|
||||
"""Extract image attachments from a Discord message.
|
||||
|
||||
Args:
|
||||
message: The Discord message
|
||||
|
||||
Returns:
|
||||
List of ImageAttachment objects
|
||||
"""
|
||||
images = []
|
||||
|
||||
# Supported image types
|
||||
image_types = {
|
||||
"image/png": "image/png",
|
||||
"image/jpeg": "image/jpeg",
|
||||
"image/jpg": "image/jpeg",
|
||||
"image/gif": "image/gif",
|
||||
"image/webp": "image/webp",
|
||||
}
|
||||
|
||||
# Check message attachments
|
||||
for attachment in message.attachments:
|
||||
content_type = attachment.content_type or ""
|
||||
if content_type in image_types:
|
||||
images.append(
|
||||
ImageAttachment(
|
||||
url=attachment.url,
|
||||
media_type=image_types[content_type],
|
||||
)
|
||||
)
|
||||
# Also check by file extension if content_type not set
|
||||
elif attachment.filename:
|
||||
ext = attachment.filename.lower().split(".")[-1]
|
||||
if ext in ("png", "jpg", "jpeg", "gif", "webp"):
|
||||
media_type = f"image/{ext}" if ext != "jpg" else "image/jpeg"
|
||||
images.append(
|
||||
ImageAttachment(
|
||||
url=attachment.url,
|
||||
media_type=media_type,
|
||||
)
|
||||
)
|
||||
|
||||
# Check embeds for images
|
||||
for embed in message.embeds:
|
||||
if embed.image and embed.image.url:
|
||||
# Guess media type from URL
|
||||
url = embed.image.url.lower()
|
||||
media_type = "image/png" # default
|
||||
if ".jpg" in url or ".jpeg" in url:
|
||||
media_type = "image/jpeg"
|
||||
elif ".gif" in url:
|
||||
media_type = "image/gif"
|
||||
elif ".webp" in url:
|
||||
media_type = "image/webp"
|
||||
images.append(ImageAttachment(url=embed.image.url, media_type=media_type))
|
||||
|
||||
logger.debug(f"Extracted {len(images)} images from message")
|
||||
return images
|
||||
|
||||
def _get_mentioned_users_context(self, message: discord.Message) -> str | None:
|
||||
"""Get context about mentioned users (excluding the bot).
|
||||
|
||||
@@ -178,8 +243,12 @@ class AIChatCog(commands.Cog):
|
||||
# Get conversation history
|
||||
history = self.conversations.get_history(user_id)
|
||||
|
||||
# Add current message to history for the API call
|
||||
messages = history + [Message(role="user", content=user_message)]
|
||||
# Extract any image attachments from the message
|
||||
images = self._extract_image_attachments(message)
|
||||
|
||||
# Add current message to history for the API call (with images if any)
|
||||
current_message = Message(role="user", content=user_message, images=images)
|
||||
messages = history + [current_message]
|
||||
|
||||
# Check if we should search the web
|
||||
search_context = await self._maybe_search(user_message)
|
||||
|
||||
Reference in New Issue
Block a user